1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <chris@chris-wilson.co.uk>
25  *
26  */
27 
28 #include "config.h"
29 
30 #include "igt.h"
31 #include <unistd.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <fcntl.h>
36 #include <inttypes.h>
37 #include <errno.h>
38 #include <sys/time.h>
39 #include <pthread.h>
40 #include "drm.h"
41 #include "i915_drm.h"
42 
43 #define OBJECT_SIZE (1024*1024) /* restricted to 1MiB alignment on i915 fences */
44 
elapsed(const struct timeval * start,const struct timeval * end)45 static double elapsed(const struct timeval *start,
46 		      const struct timeval *end)
47 {
48 	return (end->tv_sec - start->tv_sec) + 1e-6*(end->tv_usec - start->tv_usec);
49 }
50 
performance(void)51 static void performance(void)
52 {
53 	int n, loop, count;
54 	int fd, num_fences;
55 	double linear[2], tiled[2];
56 
57 	fd = drm_open_driver(DRIVER_INTEL);
58 
59 	num_fences = gem_available_fences(fd);
60 	igt_require(num_fences > 0);
61 
62 	for (count = 2; count < 4*num_fences; count *= 2) {
63 		struct timeval start, end;
64 		uint32_t handle[count];
65 		void *ptr[count];
66 
67 		for (n = 0; n < count; n++) {
68 			handle[n] = gem_create(fd, OBJECT_SIZE);
69 			ptr[n] = gem_mmap__gtt(fd, handle[n], OBJECT_SIZE, PROT_READ | PROT_WRITE);
70 		}
71 
72 		gettimeofday(&start, NULL);
73 		for (loop = 0; loop < 1024; loop++) {
74 			for (n = 0; n < count; n++)
75 				memset(ptr[n], 0, OBJECT_SIZE);
76 		}
77 		gettimeofday(&end, NULL);
78 
79 		linear[count != 2] = count * loop / elapsed(&start, &end);
80 		igt_info("Upload rate for %d linear surfaces:	%7.3fMiB/s\n", count, linear[count != 2]);
81 
82 		for (n = 0; n < count; n++)
83 			gem_set_tiling(fd, handle[n], I915_TILING_X, 1024);
84 
85 		gettimeofday(&start, NULL);
86 		for (loop = 0; loop < 1024; loop++) {
87 			for (n = 0; n < count; n++)
88 				memset(ptr[n], 0, OBJECT_SIZE);
89 		}
90 		gettimeofday(&end, NULL);
91 
92 		tiled[count != 2] = count * loop / elapsed(&start, &end);
93 		igt_info("Upload rate for %d tiled surfaces:	%7.3fMiB/s\n", count, tiled[count != 2]);
94 
95 		for (n = 0; n < count; n++) {
96 			munmap(ptr[n], OBJECT_SIZE);
97 			gem_close(fd, handle[n]);
98 		}
99 
100 	}
101 
102 	errno = 0;
103 	igt_assert(linear[1] > 0.75 * linear[0]);
104 	igt_assert(tiled[1] > 0.75 * tiled[0]);
105 }
106 
107 struct thread_performance {
108 	pthread_t thread;
109 	int id, count, direction, loops;
110 	void **ptr;
111 };
112 
read_thread_performance(void * closure)113 static void *read_thread_performance(void *closure)
114 {
115 	struct thread_performance *t = closure;
116 	uint32_t x = 0;
117 	int n, m;
118 
119 	for (n = 0; n < t->loops; n++) {
120 		uint32_t *src = t->ptr[rand() % t->count];
121 		src += (rand() % 256) * 4096 / 4;
122 		for (m = 0; m < 4096/4; m++)
123 			x += src[m];
124 	}
125 
126 	return (void *)(uintptr_t)x;
127 }
128 
write_thread_performance(void * closure)129 static void *write_thread_performance(void *closure)
130 {
131 	struct thread_performance *t = closure;
132 	int n;
133 
134 	for (n = 0; n < t->loops; n++) {
135 		uint32_t *dst = t->ptr[rand() % t->count];
136 		dst += (rand() % 256) * 4096 / 4;
137 		memset(dst, 0, 4096);
138 	}
139 
140 	return NULL;
141 }
142 
143 #define READ (1<<0)
144 #define WRITE (1<<1)
direction_string(unsigned mask)145 static const char *direction_string(unsigned mask)
146 {
147 	switch (mask) {
148 	case READ: return "Download";
149 	case WRITE: return "Upload";
150 	case READ | WRITE: return "Combined";
151 	default: return "Unknown";
152 	}
153 }
thread_performance(unsigned mask)154 static void thread_performance(unsigned mask)
155 {
156 	const int loops = 4096;
157 	int n, count;
158 	int fd, num_fences;
159 	double linear[2], tiled[2];
160 
161 	fd = drm_open_driver(DRIVER_INTEL);
162 
163 	num_fences = gem_available_fences(fd);
164 	igt_require(num_fences > 0);
165 
166 	for (count = 2; count < 4*num_fences; count *= 2) {
167 		const int nthreads = (mask & READ ? count : 0) + (mask & WRITE ? count : 0);
168 		struct timeval start, end;
169 		struct thread_performance readers[count];
170 		struct thread_performance writers[count];
171 		uint32_t handle[count];
172 		void *ptr[count];
173 
174 		for (n = 0; n < count; n++) {
175 			handle[n] = gem_create(fd, OBJECT_SIZE);
176 			ptr[n] = gem_mmap__gtt(fd, handle[n], OBJECT_SIZE, PROT_READ | PROT_WRITE);
177 
178 			if (mask & READ) {
179 				readers[n].id = n;
180 				readers[n].direction = READ;
181 				readers[n].ptr = ptr;
182 				readers[n].count = count;
183 				readers[n].loops = loops;
184 			}
185 
186 			if (mask & WRITE) {
187 				writers[n].id = count - n - 1;
188 				writers[n].direction = WRITE;
189 				writers[n].ptr = ptr;
190 				writers[n].count = count;
191 				writers[n].loops = loops;
192 			}
193 		}
194 
195 		gettimeofday(&start, NULL);
196 		for (n = 0; n < count; n++) {
197 			if (mask & READ)
198 				pthread_create(&readers[n].thread, NULL, read_thread_performance, &readers[n]);
199 			if (mask & WRITE)
200 				pthread_create(&writers[n].thread, NULL, write_thread_performance, &writers[n]);
201 		}
202 		for (n = 0; n < count; n++) {
203 			if (mask & READ)
204 				pthread_join(readers[n].thread, NULL);
205 			if (mask & WRITE)
206 				pthread_join(writers[n].thread, NULL);
207 		}
208 		gettimeofday(&end, NULL);
209 
210 		linear[count != 2] = nthreads * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
211 		igt_info("%s rate for %d linear surfaces, %d threads:	%7.3fMiB/s\n", direction_string(mask), count, nthreads, linear[count != 2]);
212 
213 		for (n = 0; n < count; n++)
214 			gem_set_tiling(fd, handle[n], I915_TILING_X, 1024);
215 
216 		gettimeofday(&start, NULL);
217 		for (n = 0; n < count; n++) {
218 			if (mask & READ)
219 				pthread_create(&readers[n].thread, NULL, read_thread_performance, &readers[n]);
220 			if (mask & WRITE)
221 				pthread_create(&writers[n].thread, NULL, write_thread_performance, &writers[n]);
222 		}
223 		for (n = 0; n < count; n++) {
224 			if (mask & READ)
225 				pthread_join(readers[n].thread, NULL);
226 			if (mask & WRITE)
227 				pthread_join(writers[n].thread, NULL);
228 		}
229 		gettimeofday(&end, NULL);
230 
231 		tiled[count != 2] = nthreads * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
232 		igt_info("%s rate for %d tiled surfaces, %d threads:	%7.3fMiB/s\n", direction_string(mask), count, nthreads, tiled[count != 2]);
233 
234 		for (n = 0; n < count; n++) {
235 			munmap(ptr[n], OBJECT_SIZE);
236 			gem_close(fd, handle[n]);
237 		}
238 	}
239 
240 	errno = 0;
241 	igt_assert(linear[1] > 0.75 * linear[0]);
242 	igt_assert(tiled[1] > 0.75 * tiled[0]);
243 }
244 
245 struct thread_contention {
246 	pthread_t thread;
247 	uint32_t handle;
248 	int loops, fd;
249 };
no_contention(void * closure)250 static void *no_contention(void *closure)
251 {
252 	struct thread_contention *t = closure;
253 	int n;
254 
255 	for (n = 0; n < t->loops; n++) {
256 		uint32_t *ptr = gem_mmap__gtt(t->fd, t->handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
257 		memset(ptr + (rand() % 256) * 4096 / 4, 0, 4096);
258 		munmap(ptr, OBJECT_SIZE);
259 	}
260 
261 	return NULL;
262 }
263 
wc_mmap(void * closure)264 static void *wc_mmap(void *closure)
265 {
266 	struct thread_contention *t = closure;
267 	int n;
268 
269 	for (n = 0; n < t->loops; n++) {
270 		uint32_t *ptr = gem_mmap__wc(t->fd, t->handle, 0, OBJECT_SIZE, PROT_READ | PROT_WRITE);
271 		memset(ptr + (rand() % 256) * 4096 / 4, 0, 4096);
272 		munmap(ptr, OBJECT_SIZE);
273 	}
274 
275 	return NULL;
276 }
277 
thread_contention(void)278 static void thread_contention(void)
279 {
280 	const int loops = 4096;
281 	int n, count;
282 	int fd, num_fences;
283 	double linear[2], tiled[2];
284 
285 	fd = drm_open_driver(DRIVER_INTEL);
286 
287 	num_fences = gem_available_fences(fd);
288 	igt_require(num_fences > 0);
289 
290 	for (count = 1; count < 4*num_fences; count *= 2) {
291 		struct timeval start, end;
292 		struct thread_contention threads[count];
293 
294 		for (n = 0; n < count; n++) {
295 			threads[n].handle = gem_create(fd, OBJECT_SIZE);
296 			threads[n].loops = loops;
297 			threads[n].fd = fd;
298 		}
299 
300 		gettimeofday(&start, NULL);
301 		for (n = 0; n < count; n++)
302 			pthread_create(&threads[n].thread, NULL, no_contention, &threads[n]);
303 		for (n = 0; n < count; n++)
304 			pthread_join(threads[n].thread, NULL);
305 		gettimeofday(&end, NULL);
306 
307 		linear[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
308 		igt_info("Contended upload rate for %d linear threads:	%7.3fMiB/s\n", count, linear[count != 2]);
309 
310 		for (n = 0; n < count; n++)
311 			gem_set_tiling(fd, threads[n].handle, I915_TILING_X, 1024);
312 
313 		gettimeofday(&start, NULL);
314 		for (n = 0; n < count; n++)
315 			pthread_create(&threads[n].thread, NULL, no_contention, &threads[n]);
316 		for (n = 0; n < count; n++)
317 			pthread_join(threads[n].thread, NULL);
318 		gettimeofday(&end, NULL);
319 
320 		tiled[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
321 		igt_info("Contended upload rate for %d tiled threads:	%7.3fMiB/s\n", count, tiled[count != 2]);
322 
323 		for (n = 0; n < count; n++) {
324 			gem_close(fd, threads[n].handle);
325 		}
326 	}
327 
328 	errno = 0;
329 	igt_assert(linear[1] > 0.75 * linear[0]);
330 	igt_assert(tiled[1] > 0.75 * tiled[0]);
331 }
332 
wc_contention(void)333 static void wc_contention(void)
334 {
335 	const int loops = 4096;
336 	int n, count;
337 	int fd, num_fences;
338 	double linear[2], tiled[2];
339 
340 	fd = drm_open_driver(DRIVER_INTEL);
341 	gem_require_mmap_wc(fd);
342 
343 	num_fences = gem_available_fences(fd);
344 	igt_require(num_fences > 0);
345 
346 	for (count = 1; count < 4*num_fences; count *= 2) {
347 		struct timeval start, end;
348 		struct thread_contention threads[count];
349 
350 		for (n = 0; n < count; n++) {
351 			threads[n].handle = gem_create(fd, OBJECT_SIZE);
352 			threads[n].loops = loops;
353 			threads[n].fd = fd;
354 		}
355 
356 		gettimeofday(&start, NULL);
357 		for (n = 0; n < count; n++)
358 			pthread_create(&threads[n].thread, NULL, wc_mmap, &threads[n]);
359 		for (n = 0; n < count; n++)
360 			pthread_join(threads[n].thread, NULL);
361 		gettimeofday(&end, NULL);
362 
363 		linear[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
364 		igt_info("Contended upload rate for %d linear threads/wc:	%7.3fMiB/s\n", count, linear[count != 2]);
365 
366 		for (n = 0; n < count; n++)
367 			gem_set_tiling(fd, threads[n].handle, I915_TILING_X, 1024);
368 
369 		gettimeofday(&start, NULL);
370 		for (n = 0; n < count; n++)
371 			pthread_create(&threads[n].thread, NULL, wc_mmap, &threads[n]);
372 		for (n = 0; n < count; n++)
373 			pthread_join(threads[n].thread, NULL);
374 		gettimeofday(&end, NULL);
375 
376 		tiled[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
377 		igt_info("Contended upload rate for %d tiled threads/wc:	%7.3fMiB/s\n", count, tiled[count != 2]);
378 
379 		for (n = 0; n < count; n++) {
380 			gem_close(fd, threads[n].handle);
381 		}
382 	}
383 
384 	errno = 0;
385 	igt_assert(linear[1] > 0.75 * linear[0]);
386 	igt_assert(tiled[1] > 0.75 * tiled[0]);
387 }
388 
389 igt_main
390 {
391 	igt_skip_on_simulation();
392 
393 	igt_subtest("performance")
394 		performance();
395 	igt_subtest("thread-contention")
396 		thread_contention();
397 	igt_subtest("wc-contention")
398 		wc_contention();
399 	igt_subtest("thread-performance-read")
400 		thread_performance(READ);
401 	igt_subtest("thread-performance-write")
402 		thread_performance(WRITE);
403 	igt_subtest("thread-performance-both")
404 		thread_performance(READ | WRITE);
405 }
406