1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 /** @file gem_shrink.c
26 *
27 * Exercise the shrinker by overallocating GEM objects
28 */
29
30 #include "igt.h"
31 #include "igt_gt.h"
32 #include "igt_debugfs.h"
33 #include "igt_sysfs.h"
34
35 #ifndef MADV_FREE
36 #define MADV_FREE 8
37 #endif
38
39 static unsigned int engines[16], nengine;
40
get_pages(int fd,uint64_t alloc)41 static void get_pages(int fd, uint64_t alloc)
42 {
43 uint32_t handle = gem_create(fd, alloc);
44 gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
45 gem_madvise(fd, handle, I915_MADV_DONTNEED);
46 }
47
get_pages_dirty(int fd,uint64_t alloc)48 static void get_pages_dirty(int fd, uint64_t alloc)
49 {
50 uint32_t handle = gem_create(fd, alloc);
51 gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
52 gem_madvise(fd, handle, I915_MADV_DONTNEED);
53 }
54
pwrite_(int fd,uint64_t alloc)55 static void pwrite_(int fd, uint64_t alloc)
56 {
57 uint32_t tmp;
58 uint32_t handle = gem_create(fd, alloc);
59 for (int page = 0; page < alloc>>12; page++)
60 gem_write(fd, handle, (page + page % 4095) & ~3, &tmp, 4);
61 gem_madvise(fd, handle, I915_MADV_DONTNEED);
62 }
63
pread_(int fd,uint64_t alloc)64 static void pread_(int fd, uint64_t alloc)
65 {
66 uint32_t tmp;
67 uint32_t handle = gem_create(fd, alloc);
68 for (int page = 0; page < alloc>>12; page++)
69 gem_read(fd, handle, (page + page % 4095) & ~3, &tmp, 4);
70 gem_madvise(fd, handle, I915_MADV_DONTNEED);
71 }
72
mmap_gtt(int fd,uint64_t alloc)73 static void mmap_gtt(int fd, uint64_t alloc)
74 {
75 uint32_t handle = gem_create(fd, alloc);
76 uint32_t *ptr = gem_mmap__gtt(fd, handle, alloc, PROT_WRITE);
77 for (int page = 0; page < alloc>>12; page++)
78 ptr[page<<10] = 0;
79 munmap(ptr, alloc);
80 gem_madvise(fd, handle, I915_MADV_DONTNEED);
81 }
82
mmap_cpu(int fd,uint64_t alloc)83 static void mmap_cpu(int fd, uint64_t alloc)
84 {
85 uint32_t handle = gem_create(fd, alloc);
86 uint32_t *ptr = gem_mmap__cpu(fd, handle, 0, alloc, PROT_WRITE);
87 for (int page = 0; page < alloc>>12; page++)
88 ptr[page<<10] = 0;
89 munmap(ptr, alloc);
90 gem_madvise(fd, handle, I915_MADV_DONTNEED);
91 }
92
execbuf1(int fd,uint64_t alloc)93 static void execbuf1(int fd, uint64_t alloc)
94 {
95 const uint32_t bbe = MI_BATCH_BUFFER_END;
96 struct drm_i915_gem_exec_object2 obj;
97 struct drm_i915_gem_execbuffer2 execbuf;
98
99 memset(&obj, 0, sizeof(obj));
100 memset(&execbuf, 0, sizeof(execbuf));
101 execbuf.buffers_ptr = to_user_pointer(&obj);
102 execbuf.buffer_count = 1;
103
104 obj.handle = gem_create(fd, alloc);
105 gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
106 gem_execbuf(fd, &execbuf);
107 gem_madvise(fd, obj.handle, I915_MADV_DONTNEED);
108 }
109
110 /* Since we want to trigger oom (SIGKILL), we don't want small allocations
111 * to fail and generate a false error (SIGSEGV)! So we redirect allocations
112 * though GEM objects, which should be much more likely to trigger oom. There
113 * are still small allocations within the kernel, so still a small chance of
114 * ENOMEM instead of a full oom.
115 */
__gem_calloc(int fd,size_t count,size_t size,uint64_t * out_size)116 static void *__gem_calloc(int fd, size_t count, size_t size, uint64_t *out_size)
117 {
118 uint32_t handle;
119 uint64_t total;
120 void *ptr;
121
122 total = count * size;
123 total = (total + 4095) & -4096;
124
125 handle = gem_create(fd, total);
126 ptr = gem_mmap__cpu(fd, handle, 0, total, PROT_WRITE);
127 gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
128 gem_close(fd, handle);
129
130 *out_size = total;
131 return ptr;
132 }
133
execbufN(int fd,uint64_t alloc)134 static void execbufN(int fd, uint64_t alloc)
135 {
136 const uint32_t bbe = MI_BATCH_BUFFER_END;
137 struct drm_i915_gem_exec_object2 *obj;
138 struct drm_i915_gem_execbuffer2 execbuf;
139 int count = alloc >> 20;
140 uint64_t obj_size;
141
142 obj = __gem_calloc(fd, alloc + 1, sizeof(*obj), &obj_size);
143 memset(&execbuf, 0, sizeof(execbuf));
144
145 obj[count].handle = gem_create(fd, 4096);
146 gem_write(fd, obj[count].handle, 0, &bbe, sizeof(bbe));
147
148 for (int i = 1; i <= count; i++) {
149 int j = count - i;
150
151 obj[j].handle = gem_create(fd, 1 << 20);
152 execbuf.buffers_ptr = to_user_pointer(&obj[j]);
153 execbuf.buffer_count = i + 1;
154 gem_execbuf(fd, &execbuf);
155 }
156
157 for (int i = 0; i <= count; i++)
158 gem_madvise(fd, obj[i].handle, I915_MADV_DONTNEED);
159 munmap(obj, obj_size);
160 }
161
execbufX(int fd,uint64_t alloc)162 static void execbufX(int fd, uint64_t alloc)
163 {
164 const uint32_t bbe = MI_BATCH_BUFFER_END;
165 struct drm_i915_gem_exec_object2 *obj;
166 struct drm_i915_gem_execbuffer2 execbuf;
167 int count = alloc >> 20;
168 uint64_t obj_size;
169
170 obj = __gem_calloc(fd, alloc + 1, sizeof(*obj), &obj_size);
171 memset(&execbuf, 0, sizeof(execbuf));
172
173 obj[count].handle = gem_create(fd, 4096);
174 gem_write(fd, obj[count].handle, 0, &bbe, sizeof(bbe));
175
176 for (int i = 1; i <= count; i++) {
177 int j = count - i;
178
179 obj[j+1].flags = 0;
180
181 obj[j].handle = gem_create(fd, 1 << 20);
182 obj[j].flags = EXEC_OBJECT_WRITE;
183
184 execbuf.buffers_ptr = to_user_pointer(&obj[j]);
185 execbuf.buffer_count = i + 1;
186 execbuf.flags = engines[j % nengine];
187 gem_execbuf(fd, &execbuf);
188 }
189
190 for (int i = 0; i <= count; i++)
191 gem_madvise(fd, obj[i].handle, I915_MADV_DONTNEED);
192 munmap(obj, obj_size);
193 }
194
hang(int fd,uint64_t alloc)195 static void hang(int fd, uint64_t alloc)
196 {
197 const uint32_t bbe = MI_BATCH_BUFFER_END;
198 struct drm_i915_gem_exec_object2 *obj;
199 struct drm_i915_gem_execbuffer2 execbuf;
200 int count = alloc >> 20;
201 uint64_t obj_size;
202
203 obj = __gem_calloc(fd, alloc + 1, sizeof(*obj), &obj_size);
204 memset(&execbuf, 0, sizeof(execbuf));
205
206 obj[count].handle = gem_create(fd, 4096);
207 gem_write(fd, obj[count].handle, 0, &bbe, sizeof(bbe));
208
209 for (int i = 1; i <= count; i++) {
210 int j = count - i;
211
212 obj[j].handle = gem_create(fd, 1 << 20);
213 execbuf.buffers_ptr = to_user_pointer(&obj[j]);
214 execbuf.buffer_count = i + 1;
215 gem_execbuf(fd, &execbuf);
216 }
217
218 gem_close(fd, igt_hang_ring(fd, 0).spin->handle);
219 for (int i = 0; i <= count; i++)
220 gem_madvise(fd, obj[i].handle, I915_MADV_DONTNEED);
221 munmap(obj, obj_size);
222 }
223
userptr(int fd,uint64_t alloc,unsigned int flags)224 static void userptr(int fd, uint64_t alloc, unsigned int flags)
225 #define UDIRTY (1 << 0)
226 {
227 struct local_i915_gem_userptr userptr;
228 void *ptr;
229
230 igt_assert((alloc & 4095) == 0);
231
232 ptr = mmap(NULL, alloc,
233 PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,
234 -1, 0);
235 igt_assert(ptr != (void *)-1);
236
237 memset(&userptr, 0, sizeof(userptr));
238 userptr.user_size = alloc;
239 userptr.user_ptr = to_user_pointer(ptr);
240 do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr);
241
242 if (flags & UDIRTY)
243 gem_set_domain(fd, userptr.handle,
244 I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
245 else
246 gem_set_domain(fd, userptr.handle, I915_GEM_DOMAIN_GTT, 0);
247
248 madvise(ptr, alloc, MADV_FREE);
249 }
250
has_userptr(void)251 static bool has_userptr(void)
252 {
253 struct local_i915_gem_userptr userptr;
254 int fd = drm_open_driver(DRIVER_INTEL);
255 int err;
256
257 memset(&userptr, 0, sizeof(userptr));
258 userptr.user_size = 8192;
259 userptr.user_ptr = -4096;
260
261 err = 0;
262 if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr))
263 err = errno;
264
265 close(fd);
266
267 return err == EFAULT;
268 }
269
leak(int fd,uint64_t alloc)270 static void leak(int fd, uint64_t alloc)
271 {
272 char *ptr;
273
274 ptr = mmap(NULL, alloc, PROT_READ | PROT_WRITE,
275 MAP_ANON | MAP_PRIVATE | MAP_POPULATE,
276 -1, 0);
277 if (ptr != (char *)-1)
278 return;
279
280 while (alloc) {
281 alloc -= 4096;
282 ptr[alloc] = 0;
283 }
284 }
285
286 #define SOLO 1
287 #define USERPTR 2
288 #define USERPTR_DIRTY 4
289 #define OOM 8
290
run_test(int nchildren,uint64_t alloc,void (* func)(int,uint64_t),unsigned flags)291 static void run_test(int nchildren, uint64_t alloc,
292 void (*func)(int, uint64_t), unsigned flags)
293 {
294 const int timeout = flags & SOLO ? 1 : 20;
295
296 /* Each pass consumes alloc bytes and doesn't drop
297 * its reference to object (i.e. calls
298 * gem_madvise(DONTNEED) instead of gem_close()).
299 * After nchildren passes we expect each process
300 * to have enough objects to consume all of memory
301 * if left unchecked.
302 */
303
304 if (flags & SOLO)
305 nchildren = 1;
306
307 /* Background load */
308 if (flags & OOM) {
309 igt_fork(child, nchildren) {
310 igt_until_timeout(timeout) {
311 int fd = drm_open_driver(DRIVER_INTEL);
312 for (int pass = 0; pass < nchildren; pass++)
313 leak(fd, alloc);
314 close(fd);
315 }
316 }
317 }
318
319 if (flags & USERPTR) {
320 igt_require(has_userptr());
321 igt_fork(child, (nchildren + 1)/2) {
322 igt_until_timeout(timeout) {
323 int fd = drm_open_driver(DRIVER_INTEL);
324 for (int pass = 0; pass < nchildren; pass++)
325 userptr(fd, alloc, 0);
326 close(fd);
327 }
328 }
329 nchildren = (nchildren + 1)/2;
330 }
331
332 if (flags & USERPTR_DIRTY) {
333 igt_require(has_userptr());
334 igt_fork(child, (nchildren + 1)/2) {
335 igt_until_timeout(timeout) {
336 int fd = drm_open_driver(DRIVER_INTEL);
337 for (int pass = 0; pass < nchildren; pass++)
338 userptr(fd, alloc, UDIRTY);
339 close(fd);
340 }
341 }
342 nchildren = (nchildren + 1)/2;
343 }
344
345 /* Exercise major ioctls */
346 igt_fork(child, nchildren) {
347 igt_until_timeout(timeout) {
348 int fd = drm_open_driver(DRIVER_INTEL);
349 for (int pass = 0; pass < nchildren; pass++)
350 func(fd, alloc);
351 close(fd);
352 }
353 }
354 igt_waitchildren();
355 }
356
reclaim(unsigned engine,int timeout)357 static void reclaim(unsigned engine, int timeout)
358 {
359 const uint64_t timeout_100ms = 100000000LL;
360 int fd = drm_open_driver(DRIVER_INTEL);
361 int debugfs = igt_debugfs_dir(fd);
362 igt_spin_t *spin;
363 volatile uint32_t *shared;
364
365 shared = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
366 igt_assert(shared != MAP_FAILED);
367
368 igt_fork(child, sysconf(_SC_NPROCESSORS_ONLN)) {
369 do {
370 igt_sysfs_printf(debugfs, "i915_drop_caches",
371 "%d", DROP_BOUND | DROP_UNBOUND);
372 } while (!*shared);
373 }
374
375 spin = igt_spin_new(fd, .engine = engine);
376 igt_until_timeout(timeout) {
377 igt_spin_t *next = __igt_spin_new(fd, .engine = engine);
378
379 igt_spin_set_timeout(spin, timeout_100ms);
380 gem_sync(fd, spin->handle);
381
382 igt_spin_free(fd, spin);
383 spin = next;
384 }
385 igt_spin_free(fd, spin);
386
387 *shared = 1;
388 igt_waitchildren();
389
390 munmap((void *)shared, 4096);
391 close(debugfs);
392 close(fd);
393 }
394
395 igt_main
396 {
397 const struct test {
398 const char *name;
399 void (*func)(int, uint64_t);
400 } tests[] = {
401 { "get-pages", get_pages },
402 { "get-pages-dirty", get_pages_dirty },
403 { "pwrite", pwrite_ },
404 { "pread", pread_ },
405 { "mmap-gtt", mmap_gtt },
406 { "mmap-cpu", mmap_cpu },
407 { "execbuf1", execbuf1 },
408 { "execbufN", execbufN },
409 { "execbufX", execbufX },
410 { "hang", hang },
411 { NULL },
412 };
413 const struct mode {
414 const char *suffix;
415 unsigned flags;
416 } modes[] = {
417 { "-sanitycheck", SOLO },
418 { "", 0 },
419 { "-userptr", USERPTR },
420 { "-userptr-dirty", USERPTR | USERPTR_DIRTY },
421 { "-oom", USERPTR | OOM },
422 { NULL },
423 };
424 uint64_t alloc_size = 0;
425 int num_processes = 0;
426
427 igt_skip_on_simulation();
428
429 igt_fixture {
430 uint64_t mem_size = intel_get_total_ram_mb();
431 unsigned int engine;
432 int fd;
433
434 fd = drm_open_driver(DRIVER_INTEL);
435 igt_require_gem(fd);
436
437 /*
438 * Spawn enough processes to use all memory, but each only
439 * uses half the available mappable aperture ~128MiB.
440 * Individually the processes would be ok, but en masse
441 * we expect the shrinker to start purging objects,
442 * and possibly fail.
443 */
444 alloc_size = gem_mappable_aperture_size() / 2;
445 num_processes = 1 + (mem_size / (alloc_size >> 20));
446
447 igt_info("Using %d processes and %'lluMiB per process\n",
448 num_processes, (long long)(alloc_size >> 20));
449
450 intel_require_memory(num_processes, alloc_size,
451 CHECK_SWAP | CHECK_RAM);
452
453 nengine = 0;
454 for_each_engine(fd, engine)
455 engines[nengine++] = engine;
456 igt_require(nengine);
457
458 close(fd);
459 }
460
461 igt_subtest("reclaim")
462 reclaim(I915_EXEC_DEFAULT, 2);
463
464 for(const struct test *t = tests; t->name; t++) {
465 for(const struct mode *m = modes; m->suffix; m++) {
466 igt_subtest_f("%s%s", t->name, m->suffix)
467 run_test(num_processes, alloc_size,
468 t->func, m->flags);
469 }
470 }
471 }
472