1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *  Zhenyu Wang <zhenyuw@linux.intel.com>
25  *  Dominik Zeromski <dominik.zeromski@intel.com>
26  */
27 
28 #include <intel_bufmgr.h>
29 #include <i915_drm.h>
30 
31 #include "intel_reg.h"
32 #include "drmtest.h"
33 
34 #include "gpgpu_fill.h"
35 #include "gpu_cmds.h"
36 
37 /* lib/i915/shaders/gpgpu/gpgpu_fill.gxa */
38 static const uint32_t gen7_gpgpu_kernel[][4] = {
39 	{ 0x00400001, 0x20200231, 0x00000020, 0x00000000 },
40 	{ 0x00000041, 0x20400c21, 0x00000004, 0x00000010 },
41 	{ 0x00000001, 0x20440021, 0x00000018, 0x00000000 },
42 	{ 0x00600001, 0x20800021, 0x008d0000, 0x00000000 },
43 	{ 0x00200001, 0x20800021, 0x00450040, 0x00000000 },
44 	{ 0x00000001, 0x20880061, 0x00000000, 0x0000000f },
45 	{ 0x00800001, 0x20a00021, 0x00000020, 0x00000000 },
46 	{ 0x05800031, 0x24001ca8, 0x00000080, 0x060a8000 },
47 	{ 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 },
48 	{ 0x07800031, 0x20001ca8, 0x00000e00, 0x82000010 },
49 };
50 
51 static const uint32_t gen8_gpgpu_kernel[][4] = {
52 	{ 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
53 	{ 0x00000041, 0x20400208, 0x06000004, 0x00000010 },
54 	{ 0x00000001, 0x20440208, 0x00000018, 0x00000000 },
55 	{ 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
56 	{ 0x00200001, 0x20800208, 0x00450040, 0x00000000 },
57 	{ 0x00000001, 0x20880608, 0x00000000, 0x0000000f },
58 	{ 0x00800001, 0x20a00208, 0x00000020, 0x00000000 },
59 	{ 0x0c800031, 0x24000a40, 0x0e000080, 0x060a8000 },
60 	{ 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
61 	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
62 };
63 
64 static const uint32_t gen9_gpgpu_kernel[][4] = {
65 	{ 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
66 	{ 0x00000041, 0x20400208, 0x06000004, 0x00000010 },
67 	{ 0x00000001, 0x20440208, 0x00000018, 0x00000000 },
68 	{ 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
69 	{ 0x00200001, 0x20800208, 0x00450040, 0x00000000 },
70 	{ 0x00000001, 0x20880608, 0x00000000, 0x0000000f },
71 	{ 0x00800001, 0x20a00208, 0x00000020, 0x00000000 },
72 	{ 0x0c800031, 0x24000a40, 0x06000080, 0x060a8000 },
73 	{ 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
74 	{ 0x07800031, 0x20000a40, 0x06000e00, 0x82000010 },
75 };
76 
77 static const uint32_t gen11_gpgpu_kernel[][4] = {
78 	{ 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
79 	{ 0x00000009, 0x20400208, 0x06000004, 0x00000004 },
80 	{ 0x00000001, 0x20440208, 0x00000018, 0x00000000 },
81 	{ 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
82 	{ 0x00200001, 0x20800208, 0x00450040, 0x00000000 },
83 	{ 0x00000001, 0x20880608, 0x00000000, 0x0000000f },
84 	{ 0x00800001, 0x20a00208, 0x00000020, 0x00000000 },
85 	{ 0x0c800031, 0x24000a40, 0x06000080, 0x040a8000 },
86 	{ 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
87 	{ 0x07800031, 0x20000a40, 0x06000e00, 0x82000010 },
88 };
89 
90 /*
91  * This sets up the gpgpu pipeline,
92  *
93  * +---------------+ <---- 4096
94  * |       ^       |
95  * |       |       |
96  * |    various    |
97  * |      state    |
98  * |       |       |
99  * |_______|_______| <---- 2048 + ?
100  * |       ^       |
101  * |       |       |
102  * |   batch       |
103  * |    commands   |
104  * |       |       |
105  * |       |       |
106  * +---------------+ <---- 0 + ?
107  *
108  */
109 
110 #define BATCH_STATE_SPLIT 2048
111 /* VFE STATE params */
112 #define THREADS 1
113 #define GEN7_GPGPU_URB_ENTRIES 0
114 #define GEN8_GPGPU_URB_ENTRIES 1
115 #define GPGPU_URB_SIZE 0
116 #define GPGPU_CURBE_SIZE 1
117 #define GEN7_VFE_STATE_GPGPU_MODE 1
118 
119 void
gen7_gpgpu_fillfunc(struct intel_batchbuffer * batch,const struct igt_buf * dst,unsigned int x,unsigned int y,unsigned int width,unsigned int height,uint8_t color)120 gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
121 		    const struct igt_buf *dst,
122 		    unsigned int x, unsigned int y,
123 		    unsigned int width, unsigned int height,
124 		    uint8_t color)
125 {
126 	uint32_t curbe_buffer, interface_descriptor;
127 	uint32_t batch_end;
128 
129 	intel_batchbuffer_flush(batch);
130 
131 	/* setup states */
132 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
133 
134 	/*
135 	 * const buffer needs to fill for every thread, but as we have just 1
136 	 * thread per every group, so need only one curbe data.
137 	 * For each thread, just use thread group ID for buffer offset.
138 	 */
139 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
140 
141 	interface_descriptor = gen7_fill_interface_descriptor(batch, dst,
142 				gen7_gpgpu_kernel, sizeof(gen7_gpgpu_kernel));
143 
144 	igt_assert(batch->ptr < &batch->buffer[4095]);
145 
146 	batch->ptr = batch->buffer;
147 
148 	/* GPGPU pipeline */
149 	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
150 
151 	gen7_emit_state_base_address(batch);
152 	gen7_emit_vfe_state(batch, THREADS, GEN7_GPGPU_URB_ENTRIES,
153 			    GPGPU_URB_SIZE, GPGPU_CURBE_SIZE,
154 			    GEN7_VFE_STATE_GPGPU_MODE);
155 	gen7_emit_curbe_load(batch, curbe_buffer);
156 	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
157 	gen7_emit_gpgpu_walk(batch, x, y, width, height);
158 
159 	OUT_BATCH(MI_BATCH_BUFFER_END);
160 
161 	batch_end = intel_batchbuffer_align(batch, 8);
162 	igt_assert(batch_end < BATCH_STATE_SPLIT);
163 
164 	gen7_render_flush(batch, batch_end);
165 	intel_batchbuffer_reset(batch);
166 }
167 
168 void
gen8_gpgpu_fillfunc(struct intel_batchbuffer * batch,const struct igt_buf * dst,unsigned int x,unsigned int y,unsigned int width,unsigned int height,uint8_t color)169 gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
170 		    const struct igt_buf *dst,
171 		    unsigned int x, unsigned int y,
172 		    unsigned int width, unsigned int height,
173 		    uint8_t color)
174 {
175 	uint32_t curbe_buffer, interface_descriptor;
176 	uint32_t batch_end;
177 
178 	intel_batchbuffer_flush(batch);
179 
180 	/* setup states */
181 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
182 
183 	/*
184 	 * const buffer needs to fill for every thread, but as we have just 1
185 	 * thread per every group, so need only one curbe data.
186 	 * For each thread, just use thread group ID for buffer offset.
187 	 */
188 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
189 
190 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
191 				gen8_gpgpu_kernel, sizeof(gen8_gpgpu_kernel));
192 
193 	igt_assert(batch->ptr < &batch->buffer[4095]);
194 
195 	batch->ptr = batch->buffer;
196 
197 	/* GPGPU pipeline */
198 	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
199 
200 	gen8_emit_state_base_address(batch);
201 	gen8_emit_vfe_state(batch, THREADS, GEN8_GPGPU_URB_ENTRIES,
202 			    GPGPU_URB_SIZE, GPGPU_CURBE_SIZE);
203 	gen7_emit_curbe_load(batch, curbe_buffer);
204 	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
205 	gen8_emit_gpgpu_walk(batch, x, y, width, height);
206 
207 	OUT_BATCH(MI_BATCH_BUFFER_END);
208 
209 	batch_end = intel_batchbuffer_align(batch, 8);
210 	igt_assert(batch_end < BATCH_STATE_SPLIT);
211 
212 	gen7_render_flush(batch, batch_end);
213 	intel_batchbuffer_reset(batch);
214 }
215 
216 static void
__gen9_gpgpu_fillfunc(struct intel_batchbuffer * batch,const struct igt_buf * dst,unsigned int x,unsigned int y,unsigned int width,unsigned int height,uint8_t color,const uint32_t kernel[][4],size_t kernel_size)217 __gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
218 		      const struct igt_buf *dst,
219 		      unsigned int x, unsigned int y,
220 		      unsigned int width, unsigned int height,
221 		      uint8_t color, const uint32_t kernel[][4],
222 		      size_t kernel_size)
223 {
224 	uint32_t curbe_buffer, interface_descriptor;
225 	uint32_t batch_end;
226 
227 	intel_batchbuffer_flush(batch);
228 
229 	/* setup states */
230 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
231 
232 	/*
233 	 * const buffer needs to fill for every thread, but as we have just 1
234 	 * thread per every group, so need only one curbe data.
235 	 * For each thread, just use thread group ID for buffer offset.
236 	 */
237 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
238 
239 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
240 				kernel, kernel_size);
241 
242 	igt_assert(batch->ptr < &batch->buffer[4095]);
243 
244 	batch->ptr = batch->buffer;
245 
246 	/* GPGPU pipeline */
247 	OUT_BATCH(GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
248 		  PIPELINE_SELECT_GPGPU);
249 
250 	gen9_emit_state_base_address(batch);
251 	gen8_emit_vfe_state(batch, THREADS, GEN8_GPGPU_URB_ENTRIES,
252 			    GPGPU_URB_SIZE, GPGPU_CURBE_SIZE);
253 	gen7_emit_curbe_load(batch, curbe_buffer);
254 	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
255 	gen8_emit_gpgpu_walk(batch, x, y, width, height);
256 
257 	OUT_BATCH(MI_BATCH_BUFFER_END);
258 
259 	batch_end = intel_batchbuffer_align(batch, 8);
260 	igt_assert(batch_end < BATCH_STATE_SPLIT);
261 
262 	gen7_render_flush(batch, batch_end);
263 	intel_batchbuffer_reset(batch);
264 }
265 
gen9_gpgpu_fillfunc(struct intel_batchbuffer * batch,const struct igt_buf * dst,unsigned int x,unsigned int y,unsigned int width,unsigned int height,uint8_t color)266 void gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
267 			 const struct igt_buf *dst,
268 			 unsigned int x, unsigned int y,
269 			 unsigned int width, unsigned int height,
270 			 uint8_t color)
271 {
272 	__gen9_gpgpu_fillfunc(batch, dst, x, y, width, height, color,
273 			      gen9_gpgpu_kernel, sizeof(gen9_gpgpu_kernel));
274 }
275 
gen11_gpgpu_fillfunc(struct intel_batchbuffer * batch,const struct igt_buf * dst,unsigned int x,unsigned int y,unsigned int width,unsigned int height,uint8_t color)276 void gen11_gpgpu_fillfunc(struct intel_batchbuffer *batch,
277 			  const struct igt_buf *dst,
278 			  unsigned int x, unsigned int y,
279 			  unsigned int width, unsigned int height,
280 			  uint8_t color)
281 {
282 	__gen9_gpgpu_fillfunc(batch, dst, x, y, width, height, color,
283 			      gen11_gpgpu_kernel, sizeof(gen11_gpgpu_kernel));
284 }
285