1 /*
2 * Copyright © 2009 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 /** @file gem_exec_whisper.c
26 *
27 * Pass around a value to write into a scratch buffer between lots of batches
28 */
29
30 #include "igt.h"
31 #include "igt_debugfs.h"
32 #include "igt_gpu_power.h"
33 #include "igt_gt.h"
34 #include "igt_rand.h"
35 #include "igt_sysfs.h"
36
37 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
38 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
39
40 #define LOCAL_I915_EXEC_BSD_SHIFT (13)
41 #define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT)
42
43 #define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
44
45 #define VERIFY 0
46
check_bo(int fd,uint32_t handle,int pass)47 static void check_bo(int fd, uint32_t handle, int pass)
48 {
49 uint32_t *map;
50
51 igt_debug("Verifying result\n");
52 map = gem_mmap__cpu(fd, handle, 0, 4096, PROT_READ);
53 gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, 0);
54 for (int i = 0; i < pass; i++)
55 igt_assert_eq(map[i], i);
56 munmap(map, 4096);
57 }
58
verify_reloc(int fd,uint32_t handle,const struct drm_i915_gem_relocation_entry * reloc)59 static void verify_reloc(int fd, uint32_t handle,
60 const struct drm_i915_gem_relocation_entry *reloc)
61 {
62 if (VERIFY) {
63 uint64_t target = 0;
64 if (intel_gen(intel_get_drm_devid(fd)) >= 8)
65 gem_read(fd, handle, reloc->offset, &target, 8);
66 else
67 gem_read(fd, handle, reloc->offset, &target, 4);
68 igt_assert_eq_u64(target,
69 reloc->presumed_offset + reloc->delta);
70 }
71 }
72
73 #define CONTEXTS 0x1
74 #define FDS 0x2
75 #define INTERRUPTIBLE 0x4
76 #define CHAIN 0x8
77 #define FORKED 0x10
78 #define HANG 0x20
79 #define SYNC 0x40
80 #define PRIORITY 0x80
81 #define ALL 0x100
82 #define QUEUES 0x200
83
84 struct hang {
85 struct drm_i915_gem_exec_object2 obj;
86 struct drm_i915_gem_relocation_entry reloc;
87 struct drm_i915_gem_execbuffer2 execbuf;
88 int fd;
89 };
90
init_hang(struct hang * h)91 static void init_hang(struct hang *h)
92 {
93 uint32_t *batch;
94 int i, gen;
95
96 h->fd = drm_open_driver(DRIVER_INTEL);
97 igt_allow_hang(h->fd, 0, 0);
98
99 gen = intel_gen(intel_get_drm_devid(h->fd));
100
101 memset(&h->execbuf, 0, sizeof(h->execbuf));
102 h->execbuf.buffers_ptr = to_user_pointer(&h->obj);
103 h->execbuf.buffer_count = 1;
104
105 memset(&h->obj, 0, sizeof(h->obj));
106 h->obj.handle = gem_create(h->fd, 4096);
107
108 h->obj.relocs_ptr = to_user_pointer(&h->reloc);
109 h->obj.relocation_count = 1;
110 memset(&h->reloc, 0, sizeof(h->reloc));
111
112 batch = gem_mmap__cpu(h->fd, h->obj.handle, 0, 4096, PROT_WRITE);
113 gem_set_domain(h->fd, h->obj.handle,
114 I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
115
116 h->reloc.target_handle = h->obj.handle; /* recurse */
117 h->reloc.presumed_offset = 0;
118 h->reloc.offset = 5*sizeof(uint32_t);
119 h->reloc.delta = 0;
120 h->reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
121 h->reloc.write_domain = 0;
122
123 i = 0;
124 batch[i++] = 0xffffffff;
125 batch[i++] = 0xdeadbeef;
126 batch[i++] = 0xc00fee00;
127 batch[i++] = 0x00c00fee;
128 batch[i] = MI_BATCH_BUFFER_START;
129 if (gen >= 8) {
130 batch[i] |= 1 << 8 | 1;
131 batch[++i] = 0;
132 batch[++i] = 0;
133 } else if (gen >= 6) {
134 batch[i] |= 1 << 8;
135 batch[++i] = 0;
136 } else {
137 batch[i] |= 2 << 6;
138 batch[++i] = 0;
139 if (gen < 4) {
140 batch[i] |= 1;
141 h->reloc.delta = 1;
142 }
143 }
144 munmap(batch, 4096);
145 }
146
submit_hang(struct hang * h,unsigned * engines,int nengine,unsigned flags)147 static void submit_hang(struct hang *h, unsigned *engines, int nengine, unsigned flags)
148 {
149 while (nengine--) {
150 h->execbuf.flags &= ~ENGINE_MASK;
151 h->execbuf.flags |= *engines++;
152 gem_execbuf(h->fd, &h->execbuf);
153 }
154 if (flags & SYNC)
155 gem_sync(h->fd, h->obj.handle);
156 }
157
fini_hang(struct hang * h)158 static void fini_hang(struct hang *h)
159 {
160 close(h->fd);
161 }
162
ctx_set_random_priority(int fd,uint32_t ctx)163 static void ctx_set_random_priority(int fd, uint32_t ctx)
164 {
165 int prio = hars_petruska_f54_1_random_unsafe_max(1024) - 512;
166 gem_context_set_priority(fd, ctx, prio);
167 }
168
whisper(int fd,unsigned engine,unsigned flags)169 static void whisper(int fd, unsigned engine, unsigned flags)
170 {
171 const uint32_t bbe = MI_BATCH_BUFFER_END;
172 const int gen = intel_gen(intel_get_drm_devid(fd));
173 struct drm_i915_gem_exec_object2 batches[1024];
174 struct drm_i915_gem_relocation_entry inter[1024];
175 struct drm_i915_gem_relocation_entry reloc;
176 struct drm_i915_gem_exec_object2 store, scratch;
177 struct drm_i915_gem_exec_object2 tmp[2];
178 struct drm_i915_gem_execbuffer2 execbuf;
179 struct hang hang;
180 int fds[64];
181 uint32_t contexts[64];
182 unsigned engines[16];
183 unsigned nengine;
184 uint32_t batch[16];
185 unsigned int relocations = 0;
186 unsigned int reloc_migrations = 0;
187 unsigned int reloc_interruptions = 0;
188 unsigned int eb_migrations = 0;
189 struct gpu_power_sample sample[2];
190 struct gpu_power power;
191 uint64_t old_offset;
192 int i, n, loc;
193 int debugfs;
194 int nchild;
195
196 if (flags & PRIORITY) {
197 igt_require(gem_scheduler_enabled(fd));
198 igt_require(gem_scheduler_has_ctx_priority(fd));
199 }
200
201 debugfs = igt_debugfs_dir(fd);
202 gpu_power_open(&power);
203
204 nengine = 0;
205 if (engine == ALL_ENGINES) {
206 for_each_physical_engine(fd, engine) {
207 if (gem_can_store_dword(fd, engine))
208 engines[nengine++] = engine;
209 }
210 } else {
211 igt_assert(!(flags & ALL));
212 igt_require(gem_has_ring(fd, engine));
213 igt_require(gem_can_store_dword(fd, engine));
214 engines[nengine++] = engine;
215 }
216 igt_require(nengine);
217
218 if (flags & FDS)
219 igt_require(gen >= 6);
220
221 if (flags & CONTEXTS)
222 gem_require_contexts(fd);
223
224 if (flags & QUEUES)
225 igt_require(gem_has_queues(fd));
226
227 if (flags & HANG)
228 init_hang(&hang);
229
230 nchild = 1;
231 if (flags & FORKED)
232 nchild *= sysconf(_SC_NPROCESSORS_ONLN);
233 if (flags & ALL)
234 nchild *= nengine;
235
236 intel_detect_and_clear_missed_interrupts(fd);
237 gpu_power_read(&power, &sample[0]);
238 igt_fork(child, nchild) {
239 unsigned int pass;
240
241 if (flags & ALL) {
242 engines[0] = engines[child % nengine];
243 nengine = 1;
244 }
245
246 memset(&scratch, 0, sizeof(scratch));
247 scratch.handle = gem_create(fd, 4096);
248 scratch.flags = EXEC_OBJECT_WRITE;
249
250 memset(&store, 0, sizeof(store));
251 store.handle = gem_create(fd, 4096);
252 store.relocs_ptr = to_user_pointer(&reloc);
253 store.relocation_count = 1;
254
255 memset(&reloc, 0, sizeof(reloc));
256 reloc.offset = sizeof(uint32_t);
257 if (gen < 8 && gen >= 4)
258 reloc.offset += sizeof(uint32_t);
259 loc = 8;
260 if (gen >= 4)
261 loc += 4;
262 reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
263 reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
264
265 {
266 tmp[0] = scratch;
267 tmp[1] = store;
268 gem_write(fd, store.handle, 0, &bbe, sizeof(bbe));
269
270 memset(&execbuf, 0, sizeof(execbuf));
271 execbuf.buffers_ptr = to_user_pointer(tmp);
272 execbuf.buffer_count = 2;
273 execbuf.flags = LOCAL_I915_EXEC_HANDLE_LUT;
274 execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
275 if (gen < 6)
276 execbuf.flags |= I915_EXEC_SECURE;
277 igt_require(__gem_execbuf(fd, &execbuf) == 0);
278 scratch = tmp[0];
279 store = tmp[1];
280 }
281
282 i = 0;
283 batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
284 if (gen >= 8) {
285 batch[++i] = store.offset + loc;
286 batch[++i] = (store.offset + loc) >> 32;
287 } else if (gen >= 4) {
288 batch[++i] = 0;
289 batch[++i] = store.offset + loc;
290 } else {
291 batch[i]--;
292 batch[++i] = store.offset + loc;
293 }
294 batch[++i] = 0xc0ffee;
295 igt_assert(loc == sizeof(uint32_t) * i);
296 batch[++i] = MI_BATCH_BUFFER_END;
297
298 if (flags & CONTEXTS) {
299 for (n = 0; n < 64; n++)
300 contexts[n] = gem_context_create(fd);
301 }
302 if (flags & QUEUES) {
303 for (n = 0; n < 64; n++)
304 contexts[n] = gem_queue_create(fd);
305 }
306 if (flags & FDS) {
307 for (n = 0; n < 64; n++)
308 fds[n] = drm_open_driver(DRIVER_INTEL);
309 }
310
311 memset(batches, 0, sizeof(batches));
312 for (n = 0; n < 1024; n++) {
313 batches[n].handle = gem_create(fd, 4096);
314 gem_write(fd, batches[n].handle, 0, &bbe, sizeof(bbe));
315 }
316 execbuf.buffers_ptr = to_user_pointer(batches);
317 execbuf.buffer_count = 1024;
318 gem_execbuf(fd, &execbuf);
319
320 execbuf.buffers_ptr = to_user_pointer(tmp);
321 execbuf.buffer_count = 2;
322
323 old_offset = store.offset;
324 for (n = 0; n < 1024; n++) {
325 if (gen >= 8) {
326 batch[1] = old_offset + loc;
327 batch[2] = (old_offset + loc) >> 32;
328 } else if (gen >= 4) {
329 batch[2] = old_offset + loc;
330 } else {
331 batch[1] = old_offset + loc;
332 }
333
334 inter[n] = reloc;
335 inter[n].presumed_offset = old_offset;
336 inter[n].delta = loc;
337 batches[n].relocs_ptr = to_user_pointer(&inter[n]);
338 batches[n].relocation_count = 1;
339 gem_write(fd, batches[n].handle, 0, batch, sizeof(batch));
340
341 old_offset = batches[n].offset;
342 }
343
344 igt_while_interruptible(flags & INTERRUPTIBLE) {
345 pass = 0;
346 igt_until_timeout(150) {
347 uint64_t offset;
348
349 if (flags & HANG)
350 submit_hang(&hang, engines, nengine, flags);
351
352 if (flags & CHAIN) {
353 execbuf.flags &= ~ENGINE_MASK;
354 execbuf.flags |= engines[rand() % nengine];
355 }
356
357 reloc.presumed_offset = scratch.offset;
358 reloc.delta = 4*pass;
359 offset = reloc.presumed_offset + reloc.delta;
360
361 i = 0;
362 if (gen >= 8) {
363 batch[++i] = offset;
364 batch[++i] = offset >> 32;
365 } else if (gen >= 4) {
366 batch[++i] = 0;
367 batch[++i] = offset;
368 } else {
369 batch[++i] = offset;
370 }
371 batch[++i] = ~pass;
372 gem_write(fd, store.handle, 0, batch, sizeof(batch));
373
374 tmp[0] = scratch;
375 igt_assert(tmp[0].flags & EXEC_OBJECT_WRITE);
376 tmp[1] = store;
377 verify_reloc(fd, store.handle, &reloc);
378 execbuf.buffers_ptr = to_user_pointer(tmp);
379 gem_execbuf(fd, &execbuf);
380 igt_assert_eq_u64(reloc.presumed_offset, tmp[0].offset);
381 if (flags & SYNC)
382 gem_sync(fd, tmp[0].handle);
383 scratch = tmp[0];
384
385 gem_write(fd, batches[1023].handle, loc, &pass, sizeof(pass));
386 for (n = 1024; --n >= 1; ) {
387 uint32_t handle[2] = {};
388 int this_fd = fd;
389
390 execbuf.buffers_ptr = to_user_pointer(&batches[n-1]);
391 reloc_migrations += batches[n-1].offset != inter[n].presumed_offset;
392 batches[n-1].offset = inter[n].presumed_offset;
393 old_offset = inter[n].presumed_offset;
394 batches[n-1].relocation_count = 0;
395 batches[n-1].flags |= EXEC_OBJECT_WRITE;
396 verify_reloc(fd, batches[n].handle, &inter[n]);
397
398 if (flags & FDS) {
399 this_fd = fds[rand() % 64];
400 handle[0] = batches[n-1].handle;
401 handle[1] = batches[n].handle;
402 batches[n-1].handle =
403 gem_open(this_fd,
404 gem_flink(fd, handle[0]));
405 batches[n].handle =
406 gem_open(this_fd,
407 gem_flink(fd, handle[1]));
408 if (flags & PRIORITY)
409 ctx_set_random_priority(this_fd, 0);
410 }
411
412 if (!(flags & CHAIN)) {
413 execbuf.flags &= ~ENGINE_MASK;
414 execbuf.flags |= engines[rand() % nengine];
415 }
416 if (flags & (CONTEXTS | QUEUES)) {
417 execbuf.rsvd1 = contexts[rand() % 64];
418 if (flags & PRIORITY)
419 ctx_set_random_priority(this_fd, execbuf.rsvd1);
420 }
421
422 gem_execbuf(this_fd, &execbuf);
423 if (inter[n].presumed_offset == -1) {
424 reloc_interruptions++;
425 inter[n].presumed_offset = batches[n-1].offset;
426 }
427 igt_assert_eq_u64(inter[n].presumed_offset, batches[n-1].offset);
428
429 if (flags & SYNC)
430 gem_sync(this_fd, batches[n-1].handle);
431 relocations += inter[n].presumed_offset != old_offset;
432
433 batches[n-1].relocation_count = 1;
434 batches[n-1].flags &= ~EXEC_OBJECT_WRITE;
435
436 if (this_fd != fd) {
437 gem_close(this_fd, batches[n-1].handle);
438 batches[n-1].handle = handle[0];
439
440 gem_close(this_fd, batches[n].handle);
441 batches[n].handle = handle[1];
442 }
443 }
444 execbuf.flags &= ~ENGINE_MASK;
445 execbuf.rsvd1 = 0;
446 execbuf.buffers_ptr = to_user_pointer(&tmp);
447
448 tmp[0] = tmp[1];
449 tmp[0].relocation_count = 0;
450 tmp[0].flags = EXEC_OBJECT_WRITE;
451 reloc_migrations += tmp[0].offset != inter[0].presumed_offset;
452 tmp[0].offset = inter[0].presumed_offset;
453 old_offset = tmp[0].offset;
454 tmp[1] = batches[0];
455 verify_reloc(fd, batches[0].handle, &inter[0]);
456 gem_execbuf(fd, &execbuf);
457 if (inter[0].presumed_offset == -1) {
458 reloc_interruptions++;
459 inter[0].presumed_offset = tmp[0].offset;
460 }
461 igt_assert_eq_u64(inter[0].presumed_offset, tmp[0].offset);
462 relocations += inter[0].presumed_offset != old_offset;
463 batches[0] = tmp[1];
464
465 tmp[1] = tmp[0];
466 tmp[0] = scratch;
467 igt_assert(tmp[0].flags & EXEC_OBJECT_WRITE);
468 igt_assert_eq_u64(reloc.presumed_offset, tmp[0].offset);
469 igt_assert(tmp[1].relocs_ptr == to_user_pointer(&reloc));
470 tmp[1].relocation_count = 1;
471 tmp[1].flags &= ~EXEC_OBJECT_WRITE;
472 verify_reloc(fd, store.handle, &reloc);
473 gem_execbuf(fd, &execbuf);
474 eb_migrations += tmp[0].offset != scratch.offset;
475 eb_migrations += tmp[1].offset != store.offset;
476 igt_assert_eq_u64(reloc.presumed_offset, tmp[0].offset);
477 if (flags & SYNC)
478 gem_sync(fd, tmp[0].handle);
479
480 store = tmp[1];
481 scratch = tmp[0];
482
483 if (++pass == 1024)
484 break;
485 }
486 igt_debug("Completed %d/1024 passes\n", pass);
487 }
488 igt_info("Number of migrations for execbuf: %d\n", eb_migrations);
489 igt_info("Number of migrations for reloc: %d, interrupted %d, patched %d\n", reloc_migrations, reloc_interruptions, relocations);
490
491 check_bo(fd, scratch.handle, pass);
492 gem_close(fd, scratch.handle);
493 gem_close(fd, store.handle);
494
495 if (flags & FDS) {
496 for (n = 0; n < 64; n++)
497 close(fds[n]);
498 }
499 if (flags & (CONTEXTS | QUEUES)) {
500 for (n = 0; n < 64; n++)
501 gem_context_destroy(fd, contexts[n]);
502 }
503 for (n = 0; n < 1024; n++)
504 gem_close(fd, batches[n].handle);
505 }
506
507 igt_waitchildren();
508
509 if (flags & HANG)
510 fini_hang(&hang);
511 else
512 igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
513 if (gpu_power_read(&power, &sample[1])) {
514 igt_info("Total energy used: %.1fmJ\n",
515 gpu_power_J(&power, &sample[0], &sample[1]) * 1e3);
516 }
517
518 close(debugfs);
519 }
520
521 igt_main
522 {
523 const struct mode {
524 const char *name;
525 unsigned flags;
526 } modes[] = {
527 { "normal", 0 },
528 { "interruptible", INTERRUPTIBLE },
529 { "forked", FORKED },
530 { "sync", SYNC },
531 { "chain", CHAIN },
532 { "chain-forked", CHAIN | FORKED },
533 { "chain-interruptible", CHAIN | INTERRUPTIBLE },
534 { "chain-sync", CHAIN | SYNC },
535 { "fds", FDS },
536 { "fds-interruptible", FDS | INTERRUPTIBLE},
537 { "fds-forked", FDS | FORKED},
538 { "fds-priority", FDS | FORKED | PRIORITY },
539 { "fds-chain", FDS | CHAIN},
540 { "fds-sync", FDS | SYNC},
541 { "contexts", CONTEXTS },
542 { "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
543 { "contexts-forked", CONTEXTS | FORKED},
544 { "contexts-priority", CONTEXTS | FORKED | PRIORITY },
545 { "contexts-chain", CONTEXTS | CHAIN },
546 { "contexts-sync", CONTEXTS | SYNC },
547 { "queues", QUEUES },
548 { "queues-interruptible", QUEUES | INTERRUPTIBLE},
549 { "queues-forked", QUEUES | FORKED},
550 { "queues-priority", QUEUES | FORKED | PRIORITY },
551 { "queues-chain", QUEUES | CHAIN },
552 { "queues-sync", QUEUES | SYNC },
553 { NULL }
554 };
555 int fd = -1;
556
557 igt_fixture {
558 fd = drm_open_driver_master(DRIVER_INTEL);
559 igt_require_gem(fd);
560 igt_require(gem_can_store_dword(fd, 0));
561 gem_submission_print_method(fd);
562
563 igt_fork_hang_detector(fd);
564 }
565
566 for (const struct mode *m = modes; m->name; m++) {
567 igt_subtest_f("%s", m->name)
568 whisper(fd, ALL_ENGINES, m->flags);
569 igt_subtest_f("%s-all", m->name)
570 whisper(fd, ALL_ENGINES, m->flags | ALL);
571 }
572
573 for (const struct intel_execution_engine *e = intel_execution_engines;
574 e->name; e++) {
575 for (const struct mode *m = modes; m->name; m++) {
576 if (m->flags & CHAIN)
577 continue;
578
579 igt_subtest_f("%s-%s", e->name, m->name)
580 whisper(fd, e->exec_id | e->flags, m->flags);
581 }
582 }
583
584 igt_fixture {
585 igt_stop_hang_detector();
586 }
587
588 igt_subtest_group {
589 for (const struct mode *m = modes; m->name; m++) {
590 if (m->flags & INTERRUPTIBLE)
591 continue;
592 igt_subtest_f("hang-%s", m->name)
593 whisper(fd, ALL_ENGINES, m->flags | HANG);
594 }
595 }
596
597 igt_fixture {
598 close(fd);
599 }
600 }
601