1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <chris@chris-wilson.co.uk>
25  *
26  */
27 
28 /* Exercises the basic execbuffer using the handle LUT interface */
29 
30 #include "igt.h"
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <fcntl.h>
35 #include <inttypes.h>
36 #include <errno.h>
37 #include <sys/stat.h>
38 #include <sys/time.h>
39 #include "drm.h"
40 
41 IGT_TEST_DESCRIPTION("Exercises the basic execbuffer using the handle LUT"
42 		     " interface.");
43 
44 #define BATCH_SIZE		(1024*1024)
45 
46 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
47 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
48 
49 #define MAX_NUM_EXEC 2048
50 #define MAX_NUM_RELOC 4096
51 
52 #define SKIP_RELOC 0x1
53 #define NO_RELOC 0x2
54 #define CYCLE_BATCH 0x4
55 #define FAULT 0x8
56 
57 int target[MAX_NUM_RELOC];
58 struct drm_i915_gem_exec_object2 gem_exec[MAX_NUM_EXEC+1];
59 struct drm_i915_gem_relocation_entry mem_reloc[MAX_NUM_RELOC];
60 
61 static uint32_t state = 0x12345678;
62 
63 static uint32_t
hars_petruska_f54_1_random(void)64 hars_petruska_f54_1_random (void)
65 {
66 #define rol(x,k) ((x << k) | (x >> (32-k)))
67     return state = (state ^ rol (state, 5) ^ rol (state, 24)) + 0x37798849;
68 #undef rol
69 }
70 
has_exec_lut(int fd)71 static int has_exec_lut(int fd)
72 {
73 	struct drm_i915_gem_execbuffer2 execbuf;
74 
75 	memset(&execbuf, 0, sizeof(execbuf));
76 	execbuf.buffers_ptr = to_user_pointer((gem_exec + MAX_NUM_EXEC));
77 	execbuf.buffer_count = 1;
78 	execbuf.flags = LOCAL_I915_EXEC_HANDLE_LUT;
79 
80 	return __gem_execbuf(fd, &execbuf) == 0;
81 }
82 
83 #define ELAPSED(a,b) (1e6*((b)->tv_sec - (a)->tv_sec) + ((b)->tv_usec - (a)->tv_usec))
84 igt_simple_main
85 {
86 	uint32_t batch[2] = {MI_BATCH_BUFFER_END};
87 	uint32_t cycle[16];
88 	int fd, n, m, count, c;
89 	const struct {
90 		const char *name;
91 		unsigned int flags;
92 	} pass[] = {
93 		{ .name = "relocation", .flags = 0 },
94 		{ .name = "cycle-relocation", .flags = CYCLE_BATCH },
95 		{ .name = "fault-relocation", .flags = FAULT },
96 		{ .name = "skip-relocs", .flags = SKIP_RELOC },
97 		{ .name = "no-relocs", .flags = SKIP_RELOC | NO_RELOC },
98 		{ .name = NULL },
99 	}, *p;
100 	struct drm_i915_gem_relocation_entry *reloc;
101 	uint32_t reloc_handle;
102 	int size;
103 
104 	igt_skip_on_simulation();
105 
106 	fd = drm_open_driver(DRIVER_INTEL);
107 	igt_require_gem(fd);
108 
109 	memset(gem_exec, 0, sizeof(gem_exec));
110 	for (n = 0; n < MAX_NUM_EXEC; n++)
111 		gem_exec[n].handle = gem_create(fd, 4096);
112 
113 	for (n = 0; n < 16; n++) {
114 		cycle[n] = gem_create(fd, 4096);
115 		gem_write(fd, cycle[n], 0, batch, sizeof(batch));
116 	}
117 	gem_exec[MAX_NUM_EXEC].handle = cycle[0];
118 
119 	memset(mem_reloc, 0, sizeof(mem_reloc));
120 	for (n = 0; n < MAX_NUM_RELOC; n++) {
121 		mem_reloc[n].offset = 1024;
122 		mem_reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
123 	}
124 
125 	size = ALIGN(sizeof(mem_reloc), 4096);
126 	reloc_handle = gem_create(fd, size);
127 	reloc = gem_mmap__cpu(fd, reloc_handle, 0, size, PROT_READ | PROT_WRITE);
128 	for (n = 0; n < MAX_NUM_RELOC; n++) {
129 		reloc[n].offset = 1024;
130 		reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
131 	}
132 	munmap(reloc, size);
133 
134 	igt_require(has_exec_lut(fd));
135 
136 	for (p = pass; p->name != NULL; p++) {
137 		if (p->flags & FAULT)
138 			igt_disable_prefault();
139 		for (n = 1; n <= MAX_NUM_EXEC; n *= 2) {
140 			double elapsed[16][2];
141 			double s_x, s_y, s_xx, s_xy;
142 			double A, B;
143 			int i, j;
144 
145 			for (i = 0, m = 1; m <= MAX_NUM_RELOC; m *= 2, i++) {
146 				struct drm_i915_gem_execbuffer2 execbuf;
147 				struct drm_i915_gem_exec_object2 *objects;
148 				struct timeval start, end;
149 
150 				if (p->flags & FAULT)
151 					reloc = __gem_mmap__cpu(fd, reloc_handle, 0, size, PROT_READ | PROT_WRITE);
152 				else
153 					reloc = mem_reloc;
154 
155 				gem_exec[MAX_NUM_EXEC].relocation_count = m;
156 				gem_exec[MAX_NUM_EXEC].relocs_ptr = to_user_pointer(reloc);
157 				objects = gem_exec + MAX_NUM_EXEC - n;
158 
159 				memset(&execbuf, 0, sizeof(execbuf));
160 				execbuf.buffers_ptr = to_user_pointer(objects);
161 				execbuf.buffer_count = n + 1;
162 				execbuf.flags = LOCAL_I915_EXEC_HANDLE_LUT;
163 				if (p->flags & NO_RELOC)
164 					execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
165 
166 				for (j = 0; j < m; j++) {
167 					target[j] = hars_petruska_f54_1_random() % n;
168 					reloc[j].target_handle = target[j];
169 					reloc[j].presumed_offset = -1;
170 				}
171 
172 				gem_execbuf(fd,&execbuf);
173 				gettimeofday(&start, NULL);
174 				for (count = 0; count < 1000; count++) {
175 					if ((p->flags & SKIP_RELOC) == 0) {
176 						for (j = 0; j < m; j++)
177 							reloc[j].presumed_offset = -1;
178 						if (p->flags & CYCLE_BATCH) {
179 							c = (c + 1) % 16;
180 							gem_exec[MAX_NUM_EXEC].handle = cycle[c];
181 						}
182 					}
183 					if (p->flags & FAULT) {
184 						munmap(reloc, size);
185 						reloc = __gem_mmap__cpu(fd, reloc_handle, 0, size, PROT_READ | PROT_WRITE);
186 						gem_exec[MAX_NUM_EXEC].relocs_ptr = to_user_pointer(reloc);
187 					}
188 					gem_execbuf(fd, &execbuf);
189 				}
190 				gettimeofday(&end, NULL);
191 				c = 16;
192 				do
193 					gem_sync(fd, cycle[--c]);
194 				while (c != 0);
195 				gem_exec[MAX_NUM_EXEC].handle = cycle[c];
196 				elapsed[i][1] = ELAPSED(&start, &end);
197 
198 				execbuf.flags &= ~LOCAL_I915_EXEC_HANDLE_LUT;
199 				for (j = 0; j < m; j++)
200 					reloc[j].target_handle = objects[target[j]].handle;
201 
202 				gem_execbuf(fd,&execbuf);
203 				gettimeofday(&start, NULL);
204 				for (count = 0; count < 1000; count++) {
205 					if ((p->flags & SKIP_RELOC) == 0) {
206 						for (j = 0; j < m; j++)
207 							reloc[j].presumed_offset = -1;
208 						if (p->flags & CYCLE_BATCH) {
209 							c = (c + 1) % 16;
210 							gem_exec[MAX_NUM_EXEC].handle = cycle[c];
211 						}
212 					}
213 					if (p->flags & FAULT) {
214 						munmap(reloc, size);
215 						reloc = __gem_mmap__cpu(fd, reloc_handle, 0, size, PROT_READ | PROT_WRITE);
216 						gem_exec[MAX_NUM_EXEC].relocs_ptr = to_user_pointer(reloc);
217 					}
218 					gem_execbuf(fd, &execbuf);
219 				}
220 				gettimeofday(&end, NULL);
221 				c = 16;
222 				do
223 					gem_sync(fd, cycle[--c]);
224 				while (c != 0);
225 				gem_exec[MAX_NUM_EXEC].handle = cycle[c];
226 				elapsed[i][0] = ELAPSED(&start, &end);
227 
228 				if (p->flags & FAULT)
229 					munmap(reloc, size);
230 			}
231 
232 			igt_info("%s: buffers=%4d:", p->name, n);
233 
234 			s_x = s_y = s_xx = s_xy = 0;
235 			for (j = 0; j < i; j++) {
236 				int k = 1 << j;
237 				s_x += k;
238 				s_y += elapsed[j][0];
239 				s_xx += k * k;
240 				s_xy += k * elapsed[j][0];
241 			}
242 			B = (s_xy - s_x * s_y / j) / (s_xx - s_x * s_x / j);
243 			A = s_y / j - B * s_x / j;
244 			igt_info(" old=%7.0f + %.1f*reloc,", A, B);
245 
246 			s_x = s_y = s_xx = s_xy = 0;
247 			for (j = 0; j < i; j++) {
248 				int k = 1 << j;
249 				s_x += k;
250 				s_y += elapsed[j][1];
251 				s_xx += k * k;
252 				s_xy += k * elapsed[j][1];
253 			}
254 			B = (s_xy - s_x * s_y / j) / (s_xx - s_x * s_x / j);
255 			A = s_y / j - B * s_x / j;
256 			igt_info(" lut=%7.0f + %.1f*reloc (ns)", A, B);
257 
258 			igt_info("\n");
259 		}
260 		if (p->flags & FAULT)
261 			igt_enable_prefault();
262 	}
263 }
264