1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 /** @file gem_shrink.c
26  *
27  * Exercise the shrinker by overallocating GEM objects
28  */
29 
30 #include "igt.h"
31 #include "igt_gt.h"
32 #include "igt_debugfs.h"
33 #include "igt_sysfs.h"
34 
35 #ifndef MADV_FREE
36 #define MADV_FREE 8
37 #endif
38 
39 static unsigned int engines[16], nengine;
40 
get_pages(int fd,uint64_t alloc)41 static void get_pages(int fd, uint64_t alloc)
42 {
43 	uint32_t handle = gem_create(fd, alloc);
44 	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
45 	gem_madvise(fd, handle, I915_MADV_DONTNEED);
46 }
47 
get_pages_dirty(int fd,uint64_t alloc)48 static void get_pages_dirty(int fd, uint64_t alloc)
49 {
50 	uint32_t handle = gem_create(fd, alloc);
51 	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
52 	gem_madvise(fd, handle, I915_MADV_DONTNEED);
53 }
54 
pwrite_(int fd,uint64_t alloc)55 static void pwrite_(int fd, uint64_t alloc)
56 {
57 	uint32_t tmp;
58 	uint32_t handle = gem_create(fd, alloc);
59 	for (int page = 0; page < alloc>>12; page++)
60 		gem_write(fd, handle, (page + page % 4095) & ~3, &tmp, 4);
61 	gem_madvise(fd, handle, I915_MADV_DONTNEED);
62 }
63 
pread_(int fd,uint64_t alloc)64 static void pread_(int fd, uint64_t alloc)
65 {
66 	uint32_t tmp;
67 	uint32_t handle = gem_create(fd, alloc);
68 	for (int page = 0; page < alloc>>12; page++)
69 		gem_read(fd, handle, (page + page % 4095) & ~3, &tmp, 4);
70 	gem_madvise(fd, handle, I915_MADV_DONTNEED);
71 }
72 
mmap_gtt(int fd,uint64_t alloc)73 static void mmap_gtt(int fd, uint64_t alloc)
74 {
75 	uint32_t handle = gem_create(fd, alloc);
76 	uint32_t *ptr = gem_mmap__gtt(fd, handle, alloc, PROT_WRITE);
77 	for (int page = 0; page < alloc>>12; page++)
78 		ptr[page<<10] = 0;
79 	munmap(ptr, alloc);
80 	gem_madvise(fd, handle, I915_MADV_DONTNEED);
81 }
82 
mmap_cpu(int fd,uint64_t alloc)83 static void mmap_cpu(int fd, uint64_t alloc)
84 {
85 	uint32_t handle = gem_create(fd, alloc);
86 	uint32_t *ptr = gem_mmap__cpu(fd, handle, 0, alloc, PROT_WRITE);
87 	for (int page = 0; page < alloc>>12; page++)
88 		ptr[page<<10] = 0;
89 	munmap(ptr, alloc);
90 	gem_madvise(fd, handle, I915_MADV_DONTNEED);
91 }
92 
execbuf1(int fd,uint64_t alloc)93 static void execbuf1(int fd, uint64_t alloc)
94 {
95 	const uint32_t bbe = MI_BATCH_BUFFER_END;
96 	struct drm_i915_gem_exec_object2 obj;
97 	struct drm_i915_gem_execbuffer2 execbuf;
98 
99 	memset(&obj, 0, sizeof(obj));
100 	memset(&execbuf, 0, sizeof(execbuf));
101 	execbuf.buffers_ptr = to_user_pointer(&obj);
102 	execbuf.buffer_count = 1;
103 
104 	obj.handle = gem_create(fd, alloc);
105 	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
106 	gem_execbuf(fd, &execbuf);
107 	gem_madvise(fd, obj.handle, I915_MADV_DONTNEED);
108 }
109 
110 /* Since we want to trigger oom (SIGKILL), we don't want small allocations
111  * to fail and generate a false error (SIGSEGV)! So we redirect allocations
112  * though GEM objects, which should be much more likely to trigger oom. There
113  * are still small allocations within the kernel, so still a small chance of
114  * ENOMEM instead of a full oom.
115  */
__gem_calloc(int fd,size_t count,size_t size,uint64_t * out_size)116 static void *__gem_calloc(int fd, size_t count, size_t size, uint64_t *out_size)
117 {
118 	uint32_t handle;
119 	uint64_t total;
120 	void *ptr;
121 
122 	total = count * size;
123 	total = (total + 4095) & -4096;
124 
125 	handle = gem_create(fd, total);
126 	ptr = gem_mmap__cpu(fd, handle, 0, total, PROT_WRITE);
127 	gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
128 	gem_close(fd, handle);
129 
130 	*out_size = total;
131 	return ptr;
132 }
133 
execbufN(int fd,uint64_t alloc)134 static void execbufN(int fd, uint64_t alloc)
135 {
136 	const uint32_t bbe = MI_BATCH_BUFFER_END;
137 	struct drm_i915_gem_exec_object2 *obj;
138 	struct drm_i915_gem_execbuffer2 execbuf;
139 	int count = alloc >> 20;
140 	uint64_t obj_size;
141 
142 	obj = __gem_calloc(fd, alloc + 1, sizeof(*obj), &obj_size);
143 	memset(&execbuf, 0, sizeof(execbuf));
144 
145 	obj[count].handle = gem_create(fd, 4096);
146 	gem_write(fd, obj[count].handle, 0, &bbe, sizeof(bbe));
147 
148 	for (int i = 1; i <= count; i++) {
149 		int j = count - i;
150 
151 		obj[j].handle = gem_create(fd, 1 << 20);
152 		execbuf.buffers_ptr = to_user_pointer(&obj[j]);
153 		execbuf.buffer_count = i + 1;
154 		gem_execbuf(fd, &execbuf);
155 	}
156 
157 	for (int i = 0; i <= count; i++)
158 		gem_madvise(fd, obj[i].handle, I915_MADV_DONTNEED);
159 	munmap(obj, obj_size);
160 }
161 
execbufX(int fd,uint64_t alloc)162 static void execbufX(int fd, uint64_t alloc)
163 {
164 	const uint32_t bbe = MI_BATCH_BUFFER_END;
165 	struct drm_i915_gem_exec_object2 *obj;
166 	struct drm_i915_gem_execbuffer2 execbuf;
167 	int count = alloc >> 20;
168 	uint64_t obj_size;
169 
170 	obj = __gem_calloc(fd, alloc + 1, sizeof(*obj), &obj_size);
171 	memset(&execbuf, 0, sizeof(execbuf));
172 
173 	obj[count].handle = gem_create(fd, 4096);
174 	gem_write(fd, obj[count].handle, 0, &bbe, sizeof(bbe));
175 
176 	for (int i = 1; i <= count; i++) {
177 		int j = count - i;
178 
179 		obj[j+1].flags = 0;
180 
181 		obj[j].handle = gem_create(fd, 1 << 20);
182 		obj[j].flags = EXEC_OBJECT_WRITE;
183 
184 		execbuf.buffers_ptr = to_user_pointer(&obj[j]);
185 		execbuf.buffer_count = i + 1;
186 		execbuf.flags = engines[j % nengine];
187 		gem_execbuf(fd, &execbuf);
188 	}
189 
190 	for (int i = 0; i <= count; i++)
191 		gem_madvise(fd, obj[i].handle, I915_MADV_DONTNEED);
192 	munmap(obj, obj_size);
193 }
194 
hang(int fd,uint64_t alloc)195 static void hang(int fd, uint64_t alloc)
196 {
197 	const uint32_t bbe = MI_BATCH_BUFFER_END;
198 	struct drm_i915_gem_exec_object2 *obj;
199 	struct drm_i915_gem_execbuffer2 execbuf;
200 	int count = alloc >> 20;
201 	uint64_t obj_size;
202 
203 	obj = __gem_calloc(fd, alloc + 1, sizeof(*obj), &obj_size);
204 	memset(&execbuf, 0, sizeof(execbuf));
205 
206 	obj[count].handle = gem_create(fd, 4096);
207 	gem_write(fd, obj[count].handle, 0, &bbe, sizeof(bbe));
208 
209 	for (int i = 1; i <= count; i++) {
210 		int j = count - i;
211 
212 		obj[j].handle = gem_create(fd, 1 << 20);
213 		execbuf.buffers_ptr = to_user_pointer(&obj[j]);
214 		execbuf.buffer_count = i + 1;
215 		gem_execbuf(fd, &execbuf);
216 	}
217 
218 	gem_close(fd, igt_hang_ring(fd, 0).spin->handle);
219 	for (int i = 0; i <= count; i++)
220 		gem_madvise(fd, obj[i].handle, I915_MADV_DONTNEED);
221 	munmap(obj, obj_size);
222 }
223 
userptr(int fd,uint64_t alloc,unsigned int flags)224 static void userptr(int fd, uint64_t alloc, unsigned int flags)
225 #define UDIRTY (1 << 0)
226 {
227 	struct local_i915_gem_userptr userptr;
228 	void *ptr;
229 
230 	igt_assert((alloc & 4095) == 0);
231 
232 	ptr = mmap(NULL, alloc,
233 		   PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,
234 		   -1, 0);
235 	igt_assert(ptr != (void *)-1);
236 
237 	memset(&userptr, 0, sizeof(userptr));
238 	userptr.user_size = alloc;
239 	userptr.user_ptr = to_user_pointer(ptr);
240 	do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr);
241 
242 	if (flags & UDIRTY)
243 		gem_set_domain(fd, userptr.handle,
244 			       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
245 	else
246 		gem_set_domain(fd, userptr.handle, I915_GEM_DOMAIN_GTT, 0);
247 
248 	madvise(ptr, alloc, MADV_FREE);
249 }
250 
has_userptr(void)251 static bool has_userptr(void)
252 {
253 	struct local_i915_gem_userptr userptr;
254 	int fd = drm_open_driver(DRIVER_INTEL);
255 	int err;
256 
257 	memset(&userptr, 0, sizeof(userptr));
258 	userptr.user_size = 8192;
259 	userptr.user_ptr = -4096;
260 
261 	err = 0;
262 	if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr))
263 		err = errno;
264 
265 	close(fd);
266 
267 	return err == EFAULT;
268 }
269 
leak(int fd,uint64_t alloc)270 static void leak(int fd, uint64_t alloc)
271 {
272 	char *ptr;
273 
274 	ptr = mmap(NULL, alloc, PROT_READ | PROT_WRITE,
275 		   MAP_ANON | MAP_PRIVATE | MAP_POPULATE,
276 		   -1, 0);
277 	if (ptr != (char *)-1)
278 		return;
279 
280 	while (alloc) {
281 		alloc -= 4096;
282 		ptr[alloc] = 0;
283 	}
284 }
285 
286 #define SOLO 1
287 #define USERPTR 2
288 #define USERPTR_DIRTY 4
289 #define OOM 8
290 
run_test(int nchildren,uint64_t alloc,void (* func)(int,uint64_t),unsigned flags)291 static void run_test(int nchildren, uint64_t alloc,
292 		     void (*func)(int, uint64_t), unsigned flags)
293 {
294 	const int timeout = flags & SOLO ? 1 : 20;
295 
296 	/* Each pass consumes alloc bytes and doesn't drop
297 	 * its reference to object (i.e. calls
298 	 * gem_madvise(DONTNEED) instead of gem_close()).
299 	 * After nchildren passes we expect each process
300 	 * to have enough objects to consume all of memory
301 	 * if left unchecked.
302 	 */
303 
304 	if (flags & SOLO)
305 		nchildren = 1;
306 
307 	/* Background load */
308 	if (flags & OOM) {
309 		igt_fork(child, nchildren) {
310 			igt_until_timeout(timeout) {
311 				int fd = drm_open_driver(DRIVER_INTEL);
312 				for (int pass = 0; pass < nchildren; pass++)
313 					leak(fd, alloc);
314 				close(fd);
315 			}
316 		}
317 	}
318 
319 	if (flags & USERPTR) {
320 		igt_require(has_userptr());
321 		igt_fork(child, (nchildren + 1)/2) {
322 			igt_until_timeout(timeout) {
323 				int fd = drm_open_driver(DRIVER_INTEL);
324 				for (int pass = 0; pass < nchildren; pass++)
325 					userptr(fd, alloc, 0);
326 				close(fd);
327 			}
328 		}
329 		nchildren = (nchildren + 1)/2;
330 	}
331 
332 	if (flags & USERPTR_DIRTY) {
333 		igt_require(has_userptr());
334 		igt_fork(child, (nchildren + 1)/2) {
335 			igt_until_timeout(timeout) {
336 				int fd = drm_open_driver(DRIVER_INTEL);
337 				for (int pass = 0; pass < nchildren; pass++)
338 					userptr(fd, alloc, UDIRTY);
339 				close(fd);
340 			}
341 		}
342 		nchildren = (nchildren + 1)/2;
343 	}
344 
345 	/* Exercise major ioctls */
346 	igt_fork(child, nchildren) {
347 		igt_until_timeout(timeout) {
348 			int fd = drm_open_driver(DRIVER_INTEL);
349 			for (int pass = 0; pass < nchildren; pass++)
350 				func(fd, alloc);
351 			close(fd);
352 		}
353 	}
354 	igt_waitchildren();
355 }
356 
reclaim(unsigned engine,int timeout)357 static void reclaim(unsigned engine, int timeout)
358 {
359 	const uint64_t timeout_100ms = 100000000LL;
360 	int fd = drm_open_driver(DRIVER_INTEL);
361 	int debugfs = igt_debugfs_dir(fd);
362 	igt_spin_t *spin;
363 	volatile uint32_t *shared;
364 
365 	shared = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
366 	igt_assert(shared != MAP_FAILED);
367 
368 	igt_fork(child, sysconf(_SC_NPROCESSORS_ONLN)) {
369 		do {
370 			igt_sysfs_printf(debugfs, "i915_drop_caches",
371 					"%d", DROP_BOUND | DROP_UNBOUND);
372 		} while (!*shared);
373 	}
374 
375 	spin = igt_spin_new(fd, .engine = engine);
376 	igt_until_timeout(timeout) {
377 		igt_spin_t *next = __igt_spin_new(fd, .engine = engine);
378 
379 		igt_spin_set_timeout(spin, timeout_100ms);
380 		gem_sync(fd, spin->handle);
381 
382 		igt_spin_free(fd, spin);
383 		spin = next;
384 	}
385 	igt_spin_free(fd, spin);
386 
387 	*shared = 1;
388 	igt_waitchildren();
389 
390 	munmap((void *)shared, 4096);
391 	close(debugfs);
392 	close(fd);
393 }
394 
395 igt_main
396 {
397 	const struct test {
398 		const char *name;
399 		void (*func)(int, uint64_t);
400 	} tests[] = {
401 		{ "get-pages", get_pages },
402 		{ "get-pages-dirty", get_pages_dirty },
403 		{ "pwrite", pwrite_ },
404 		{ "pread", pread_ },
405 		{ "mmap-gtt", mmap_gtt },
406 		{ "mmap-cpu", mmap_cpu },
407 		{ "execbuf1", execbuf1 },
408 		{ "execbufN", execbufN },
409 		{ "execbufX", execbufX },
410 		{ "hang", hang },
411 		{ NULL },
412 	};
413 	const struct mode {
414 		const char *suffix;
415 		unsigned flags;
416 	} modes[] = {
417 		{ "-sanitycheck", SOLO },
418 		{ "", 0 },
419 		{ "-userptr", USERPTR },
420 		{ "-userptr-dirty", USERPTR | USERPTR_DIRTY },
421 		{ "-oom", USERPTR | OOM },
422 		{ NULL },
423 	};
424 	uint64_t alloc_size = 0;
425 	int num_processes = 0;
426 
427 	igt_skip_on_simulation();
428 
429 	igt_fixture {
430 		uint64_t mem_size = intel_get_total_ram_mb();
431 		unsigned int engine;
432 		int fd;
433 
434 		fd = drm_open_driver(DRIVER_INTEL);
435 		igt_require_gem(fd);
436 
437 		/*
438 		 * Spawn enough processes to use all memory, but each only
439 		 * uses half the available mappable aperture ~128MiB.
440 		 * Individually the processes would be ok, but en masse
441 		 * we expect the shrinker to start purging objects,
442 		 * and possibly fail.
443 		 */
444 		alloc_size = gem_mappable_aperture_size() / 2;
445 		num_processes = 1 + (mem_size / (alloc_size >> 20));
446 
447 		igt_info("Using %d processes and %'lluMiB per process\n",
448 			 num_processes, (long long)(alloc_size >> 20));
449 
450 		intel_require_memory(num_processes, alloc_size,
451 				     CHECK_SWAP | CHECK_RAM);
452 
453 		nengine = 0;
454 		for_each_engine(fd, engine)
455 			engines[nengine++] = engine;
456 		igt_require(nengine);
457 
458 		close(fd);
459 	}
460 
461 	igt_subtest("reclaim")
462 		reclaim(I915_EXEC_DEFAULT, 2);
463 
464 	for(const struct test *t = tests; t->name; t++) {
465 		for(const struct mode *m = modes; m->suffix; m++) {
466 			igt_subtest_f("%s%s", t->name, m->suffix)
467 				run_test(num_processes, alloc_size,
468 					 t->func, m->flags);
469 		}
470 	}
471 }
472