1 /**************************************************************************
2  *
3  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include <inttypes.h>
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <assert.h>
33 
34 #include "drm.h"
35 #include "drmtest.h"
36 #include "intel_batchbuffer.h"
37 #include "intel_bufmgr.h"
38 #include "intel_chipset.h"
39 #include "intel_reg.h"
40 #include "rendercopy.h"
41 #include "media_fill.h"
42 #include "ioctl_wrappers.h"
43 #include "media_spin.h"
44 #include "gpgpu_fill.h"
45 
46 #include <i915_drm.h>
47 
48 /**
49  * SECTION:intel_batchbuffer
50  * @short_description: Batchbuffer and blitter support
51  * @title: Batch Buffer
52  * @include: igt.h
53  *
54  * This library provides some basic support for batchbuffers and using the
55  * blitter engine based upon libdrm. A new batchbuffer is allocated with
56  * intel_batchbuffer_alloc() and for simple blitter commands submitted with
57  * intel_batchbuffer_flush().
58  *
59  * It also provides some convenient macros to easily emit commands into
60  * batchbuffers. All those macros presume that a pointer to a #intel_batchbuffer
61  * structure called batch is in scope. The basic macros are #BEGIN_BATCH,
62  * #OUT_BATCH, #OUT_RELOC and #ADVANCE_BATCH.
63  *
64  * Note that this library's header pulls in the [i-g-t core](igt-gpu-tools-i-g-t-core.html)
65  * library as a dependency.
66  */
67 
68 /**
69  * intel_batchbuffer_align:
70  * @batch: batchbuffer object
71  * @align: value in bytes to which we want to align
72  *
73  * Aligns the current in-batch offset to the given value.
74  *
75  * Returns: Batchbuffer offset aligned to the given value.
76  */
77 uint32_t
intel_batchbuffer_align(struct intel_batchbuffer * batch,uint32_t align)78 intel_batchbuffer_align(struct intel_batchbuffer *batch, uint32_t align)
79 {
80 	uint32_t offset = batch->ptr - batch->buffer;
81 
82 	offset = ALIGN(offset, align);
83 	batch->ptr = batch->buffer + offset;
84 	return offset;
85 }
86 
87 /**
88  * intel_batchbuffer_subdata_alloc:
89  * @batch: batchbuffer object
90  * @size: amount of bytes need to allocate
91  * @align: value in bytes to which we want to align
92  *
93  * Verify if sufficient @size within @batch is available to deny overflow.
94  * Then allocate @size bytes within @batch.
95  *
96  * Returns: Offset within @batch between allocated subdata and base of @batch.
97  */
98 void *
intel_batchbuffer_subdata_alloc(struct intel_batchbuffer * batch,uint32_t size,uint32_t align)99 intel_batchbuffer_subdata_alloc(struct intel_batchbuffer *batch, uint32_t size,
100 				uint32_t align)
101 {
102 	uint32_t offset = intel_batchbuffer_align(batch, align);
103 
104 	igt_assert(size <= intel_batchbuffer_space(batch));
105 
106 	batch->ptr += size;
107 	return memset(batch->buffer + offset, 0, size);
108 }
109 
110 /**
111  * intel_batchbuffer_subdata_offset:
112  * @batch: batchbuffer object
113  * @ptr: pointer to given data
114  *
115  * Returns: Offset within @batch between @ptr and base of @batch.
116  */
117 uint32_t
intel_batchbuffer_subdata_offset(struct intel_batchbuffer * batch,void * ptr)118 intel_batchbuffer_subdata_offset(struct intel_batchbuffer *batch, void *ptr)
119 {
120 	return (uint8_t *)ptr - batch->buffer;
121 }
122 
123 /**
124  * intel_batchbuffer_reset:
125  * @batch: batchbuffer object
126  *
127  * Resets @batch by allocating a new gem buffer object as backing storage.
128  */
129 void
intel_batchbuffer_reset(struct intel_batchbuffer * batch)130 intel_batchbuffer_reset(struct intel_batchbuffer *batch)
131 {
132 	if (batch->bo != NULL) {
133 		drm_intel_bo_unreference(batch->bo);
134 		batch->bo = NULL;
135 	}
136 
137 	batch->bo = drm_intel_bo_alloc(batch->bufmgr, "batchbuffer",
138 				       BATCH_SZ, 4096);
139 
140 	memset(batch->buffer, 0, sizeof(batch->buffer));
141 	batch->ctx = NULL;
142 
143 	batch->ptr = batch->buffer;
144 	batch->end = NULL;
145 }
146 
147 /**
148  * intel_batchbuffer_alloc:
149  * @bufmgr: libdrm buffer manager
150  * @devid: pci device id of the drm device
151  *
152  * Allocates a new batchbuffer object. @devid must be supplied since libdrm
153  * doesn't expose it directly.
154  *
155  * Returns: The allocated and initialized batchbuffer object.
156  */
157 struct intel_batchbuffer *
intel_batchbuffer_alloc(drm_intel_bufmgr * bufmgr,uint32_t devid)158 intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr, uint32_t devid)
159 {
160 	struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
161 
162 	batch->bufmgr = bufmgr;
163 	batch->devid = devid;
164 	batch->gen = intel_gen(devid);
165 	intel_batchbuffer_reset(batch);
166 
167 	return batch;
168 }
169 
170 /**
171  * intel_batchbuffer_free:
172  * @batch: batchbuffer object
173  *
174  * Releases all resource of the batchbuffer object @batch.
175  */
176 void
intel_batchbuffer_free(struct intel_batchbuffer * batch)177 intel_batchbuffer_free(struct intel_batchbuffer *batch)
178 {
179 	drm_intel_bo_unreference(batch->bo);
180 	batch->bo = NULL;
181 	free(batch);
182 }
183 
184 #define CMD_POLY_STIPPLE_OFFSET       0x7906
185 
186 static unsigned int
flush_on_ring_common(struct intel_batchbuffer * batch,int ring)187 flush_on_ring_common(struct intel_batchbuffer *batch, int ring)
188 {
189 	unsigned int used = batch->ptr - batch->buffer;
190 
191 	if (used == 0)
192 		return 0;
193 
194 	if (IS_GEN5(batch->devid)) {
195 		/* emit gen5 w/a without batch space checks - we reserve that
196 		 * already. */
197 		*(uint32_t *) (batch->ptr) = CMD_POLY_STIPPLE_OFFSET << 16;
198 		batch->ptr += 4;
199 		*(uint32_t *) (batch->ptr) = 0;
200 		batch->ptr += 4;
201 	}
202 
203 	/* Round batchbuffer usage to 2 DWORDs. */
204 	if ((used & 4) == 0) {
205 		*(uint32_t *) (batch->ptr) = 0; /* noop */
206 		batch->ptr += 4;
207 	}
208 
209 	/* Mark the end of the buffer. */
210 	*(uint32_t *)(batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
211 	batch->ptr += 4;
212 	return batch->ptr - batch->buffer;
213 }
214 
215 /**
216  * intel_batchbuffer_flush_on_ring:
217  * @batch: batchbuffer object
218  * @ring: execbuf ring flag
219  *
220  * Submits the batch for execution on @ring.
221  */
222 void
intel_batchbuffer_flush_on_ring(struct intel_batchbuffer * batch,int ring)223 intel_batchbuffer_flush_on_ring(struct intel_batchbuffer *batch, int ring)
224 {
225 	unsigned int used = flush_on_ring_common(batch, ring);
226 	drm_intel_context *ctx;
227 
228 	if (used == 0)
229 		return;
230 
231 	do_or_die(drm_intel_bo_subdata(batch->bo, 0, used, batch->buffer));
232 
233 	batch->ptr = NULL;
234 
235 	/* XXX bad kernel API */
236 	ctx = batch->ctx;
237 	if (ring != I915_EXEC_RENDER)
238 		ctx = NULL;
239 	do_or_die(drm_intel_gem_bo_context_exec(batch->bo, ctx, used, ring));
240 
241 	intel_batchbuffer_reset(batch);
242 }
243 
244 void
intel_batchbuffer_set_context(struct intel_batchbuffer * batch,drm_intel_context * context)245 intel_batchbuffer_set_context(struct intel_batchbuffer *batch,
246 				     drm_intel_context *context)
247 {
248 	batch->ctx = context;
249 }
250 
251 /**
252  * intel_batchbuffer_flush_with_context:
253  * @batch: batchbuffer object
254  * @context: libdrm hardware context object
255  *
256  * Submits the batch for execution on the render engine with the supplied
257  * hardware context.
258  */
259 void
intel_batchbuffer_flush_with_context(struct intel_batchbuffer * batch,drm_intel_context * context)260 intel_batchbuffer_flush_with_context(struct intel_batchbuffer *batch,
261 				     drm_intel_context *context)
262 {
263 	int ret;
264 	unsigned int used = flush_on_ring_common(batch, I915_EXEC_RENDER);
265 
266 	if (used == 0)
267 		return;
268 
269 	ret = drm_intel_bo_subdata(batch->bo, 0, used, batch->buffer);
270 	igt_assert(ret == 0);
271 
272 	batch->ptr = NULL;
273 
274 	ret = drm_intel_gem_bo_context_exec(batch->bo, context, used,
275 					    I915_EXEC_RENDER);
276 	igt_assert(ret == 0);
277 
278 	intel_batchbuffer_reset(batch);
279 }
280 
281 /**
282  * intel_batchbuffer_flush:
283  * @batch: batchbuffer object
284  *
285  * Submits the batch for execution on the blitter engine, selecting the right
286  * ring depending upon the hardware platform.
287  */
288 void
intel_batchbuffer_flush(struct intel_batchbuffer * batch)289 intel_batchbuffer_flush(struct intel_batchbuffer *batch)
290 {
291 	int ring = 0;
292 	if (HAS_BLT_RING(batch->devid))
293 		ring = I915_EXEC_BLT;
294 	intel_batchbuffer_flush_on_ring(batch, ring);
295 }
296 
297 
298 /**
299  * intel_batchbuffer_emit_reloc:
300  * @batch: batchbuffer object
301  * @buffer: relocation target libdrm buffer object
302  * @delta: delta value to add to @buffer's gpu address
303  * @read_domains: gem domain bits for the relocation
304  * @write_domain: gem domain bit for the relocation
305  * @fenced: whether this gpu access requires fences
306  *
307  * Emits both a libdrm relocation entry pointing at @buffer and the pre-computed
308  * DWORD of @batch's presumed gpu address plus the supplied @delta into @batch.
309  *
310  * Note that @fenced is only relevant if @buffer is actually tiled.
311  *
312  * This is the only way buffers get added to the validate list.
313  */
314 void
intel_batchbuffer_emit_reloc(struct intel_batchbuffer * batch,drm_intel_bo * buffer,uint64_t delta,uint32_t read_domains,uint32_t write_domain,int fenced)315 intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
316                              drm_intel_bo *buffer, uint64_t delta,
317 			     uint32_t read_domains, uint32_t write_domain,
318 			     int fenced)
319 {
320 	uint64_t offset;
321 	int ret;
322 
323 	if (batch->ptr - batch->buffer > BATCH_SZ)
324 		igt_info("bad relocation ptr %p map %p offset %d size %d\n",
325 			 batch->ptr, batch->buffer,
326 			 (int)(batch->ptr - batch->buffer), BATCH_SZ);
327 
328 	if (fenced)
329 		ret = drm_intel_bo_emit_reloc_fence(batch->bo, batch->ptr - batch->buffer,
330 						    buffer, delta,
331 						    read_domains, write_domain);
332 	else
333 		ret = drm_intel_bo_emit_reloc(batch->bo, batch->ptr - batch->buffer,
334 					      buffer, delta,
335 					      read_domains, write_domain);
336 
337 	offset = buffer->offset64;
338 	offset += delta;
339 	intel_batchbuffer_emit_dword(batch, offset);
340 	if (batch->gen >= 8)
341 		intel_batchbuffer_emit_dword(batch, offset >> 32);
342 	igt_assert(ret == 0);
343 }
344 
345 /**
346  * intel_batchbuffer_copy_data:
347  * @batch: batchbuffer object
348  * @data: pointer to the data to write into the batchbuffer
349  * @bytes: number of bytes to write into the batchbuffer
350  * @align: value in bytes to which we want to align
351  *
352  * This transfers the given @data into the batchbuffer. Note that the length
353  * must be DWORD aligned, i.e. multiples of 32bits. The caller must
354  * confirm that there is enough space in the batch for the data to be
355  * copied.
356  *
357  * Returns: Offset of copied data.
358  */
359 uint32_t
intel_batchbuffer_copy_data(struct intel_batchbuffer * batch,const void * data,unsigned int bytes,uint32_t align)360 intel_batchbuffer_copy_data(struct intel_batchbuffer *batch,
361 			    const void *data, unsigned int bytes,
362 			    uint32_t align)
363 {
364 	uint32_t *subdata;
365 
366 	igt_assert((bytes & 3) == 0);
367 	subdata = intel_batchbuffer_subdata_alloc(batch, bytes, align);
368 	memcpy(subdata, data, bytes);
369 
370 	return intel_batchbuffer_subdata_offset(batch, subdata);
371 }
372 
373 #define CHECK_RANGE(x)	do { \
374 	igt_assert_lte(0, (x)); \
375 	igt_assert_lt((x), (1 << 15)); \
376 } while (0)
377 
378 /**
379  * intel_blt_copy:
380  * @batch: batchbuffer object
381  * @src_bo: source libdrm buffer object
382  * @src_x1: source pixel x-coordination
383  * @src_y1: source pixel y-coordination
384  * @src_pitch: @src_bo's pitch in bytes
385  * @dst_bo: destination libdrm buffer object
386  * @dst_x1: destination pixel x-coordination
387  * @dst_y1: destination pixel y-coordination
388  * @dst_pitch: @dst_bo's pitch in bytes
389  * @width: width of the copied rectangle
390  * @height: height of the copied rectangle
391  * @bpp: bits per pixel
392  *
393  * This emits a 2D copy operation using blitter commands into the supplied batch
394  * buffer object.
395  */
396 void
intel_blt_copy(struct intel_batchbuffer * batch,drm_intel_bo * src_bo,int src_x1,int src_y1,int src_pitch,drm_intel_bo * dst_bo,int dst_x1,int dst_y1,int dst_pitch,int width,int height,int bpp)397 intel_blt_copy(struct intel_batchbuffer *batch,
398 	       drm_intel_bo *src_bo, int src_x1, int src_y1, int src_pitch,
399 	       drm_intel_bo *dst_bo, int dst_x1, int dst_y1, int dst_pitch,
400 	       int width, int height, int bpp)
401 {
402 	const int gen = batch->gen;
403 	uint32_t src_tiling, dst_tiling, swizzle;
404 	uint32_t cmd_bits = 0;
405 	uint32_t br13_bits;
406 
407 	igt_assert(bpp*(src_x1 + width) <= 8*src_pitch);
408 	igt_assert(bpp*(dst_x1 + width) <= 8*dst_pitch);
409 	igt_assert(src_pitch * (src_y1 + height) <= src_bo->size);
410 	igt_assert(dst_pitch * (dst_y1 + height) <= dst_bo->size);
411 
412 	drm_intel_bo_get_tiling(src_bo, &src_tiling, &swizzle);
413 	drm_intel_bo_get_tiling(dst_bo, &dst_tiling, &swizzle);
414 
415 	if (gen >= 4 && src_tiling != I915_TILING_NONE) {
416 		src_pitch /= 4;
417 		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
418 	}
419 
420 	if (gen >= 4 && dst_tiling != I915_TILING_NONE) {
421 		dst_pitch /= 4;
422 		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
423 	}
424 
425 	CHECK_RANGE(src_x1); CHECK_RANGE(src_y1);
426 	CHECK_RANGE(dst_x1); CHECK_RANGE(dst_y1);
427 	CHECK_RANGE(width); CHECK_RANGE(height);
428 	CHECK_RANGE(src_x1 + width); CHECK_RANGE(src_y1 + height);
429 	CHECK_RANGE(dst_x1 + width); CHECK_RANGE(dst_y1 + height);
430 	CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch);
431 
432 	br13_bits = 0;
433 	switch (bpp) {
434 	case 8:
435 		break;
436 	case 16:		/* supporting only RGB565, not ARGB1555 */
437 		br13_bits |= 1 << 24;
438 		break;
439 	case 32:
440 		br13_bits |= 3 << 24;
441 		cmd_bits |= XY_SRC_COPY_BLT_WRITE_ALPHA |
442 			    XY_SRC_COPY_BLT_WRITE_RGB;
443 		break;
444 	default:
445 		igt_fail(IGT_EXIT_FAILURE);
446 	}
447 
448 	BLIT_COPY_BATCH_START(cmd_bits);
449 	OUT_BATCH((br13_bits) |
450 		  (0xcc << 16) | /* copy ROP */
451 		  dst_pitch);
452 	OUT_BATCH((dst_y1 << 16) | dst_x1); /* dst x1,y1 */
453 	OUT_BATCH(((dst_y1 + height) << 16) | (dst_x1 + width)); /* dst x2,y2 */
454 	OUT_RELOC_FENCED(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
455 	OUT_BATCH((src_y1 << 16) | src_x1); /* src x1,y1 */
456 	OUT_BATCH(src_pitch);
457 	OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
458 	ADVANCE_BATCH();
459 
460 #define CMD_POLY_STIPPLE_OFFSET       0x7906
461 	if (gen == 5) {
462 		BEGIN_BATCH(2, 0);
463 		OUT_BATCH(CMD_POLY_STIPPLE_OFFSET << 16);
464 		OUT_BATCH(0);
465 		ADVANCE_BATCH();
466 	}
467 
468 	if (gen >= 6 && src_bo == dst_bo) {
469 		BEGIN_BATCH(3, 0);
470 		OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
471 		OUT_BATCH(0);
472 		OUT_BATCH(0);
473 		ADVANCE_BATCH();
474 	}
475 
476 	intel_batchbuffer_flush(batch);
477 }
478 
479 /**
480  * intel_copy_bo:
481  * @batch: batchbuffer object
482  * @src_bo: source libdrm buffer object
483  * @dst_bo: destination libdrm buffer object
484  * @size: size of the copy range in bytes
485  *
486  * This emits a copy operation using blitter commands into the supplied batch
487  * buffer object. A total of @size bytes from the start of @src_bo is copied
488  * over to @dst_bo. Note that @size must be page-aligned.
489  */
490 void
intel_copy_bo(struct intel_batchbuffer * batch,drm_intel_bo * dst_bo,drm_intel_bo * src_bo,long int size)491 intel_copy_bo(struct intel_batchbuffer *batch,
492 	      drm_intel_bo *dst_bo, drm_intel_bo *src_bo,
493 	      long int size)
494 {
495 	igt_assert(size % 4096 == 0);
496 
497 	intel_blt_copy(batch,
498 		       src_bo, 0, 0, 4096,
499 		       dst_bo, 0, 0, 4096,
500 		       4096/4, size/4096, 32);
501 }
502 
503 /**
504  * igt_buf_width:
505  * @buf: the i-g-t buffer object
506  *
507  * Computes the width in 32-bit pixels of the given buffer.
508  *
509  * Returns:
510  * The width of the buffer.
511  */
igt_buf_width(const struct igt_buf * buf)512 unsigned igt_buf_width(const struct igt_buf *buf)
513 {
514 	return buf->stride/(buf->bpp / 8);
515 }
516 
517 /**
518  * igt_buf_height:
519  * @buf: the i-g-t buffer object
520  *
521  * Computes the height in 32-bit pixels of the given buffer.
522  *
523  * Returns:
524  * The height of the buffer.
525  */
igt_buf_height(const struct igt_buf * buf)526 unsigned igt_buf_height(const struct igt_buf *buf)
527 {
528 	return buf->size/buf->stride;
529 }
530 
531 /*
532  * pitches are in bytes if the surfaces are linear, number of dwords
533  * otherwise
534  */
fast_copy_pitch(unsigned int stride,unsigned int tiling)535 static uint32_t fast_copy_pitch(unsigned int stride, unsigned int tiling)
536 {
537 	if (tiling != I915_TILING_NONE)
538 		return stride / 4;
539 	else
540 		return stride;
541 }
542 
fast_copy_dword0(unsigned int src_tiling,unsigned int dst_tiling)543 static uint32_t fast_copy_dword0(unsigned int src_tiling,
544 				 unsigned int dst_tiling)
545 {
546 	uint32_t dword0 = 0;
547 
548 	dword0 |= XY_FAST_COPY_BLT;
549 
550 	switch (src_tiling) {
551 	case I915_TILING_X:
552 		dword0 |= XY_FAST_COPY_SRC_TILING_X;
553 		break;
554 	case I915_TILING_Y:
555 	case I915_TILING_Yf:
556 		dword0 |= XY_FAST_COPY_SRC_TILING_Yb_Yf;
557 		break;
558 	case I915_TILING_Ys:
559 		dword0 |= XY_FAST_COPY_SRC_TILING_Ys;
560 		break;
561 	case I915_TILING_NONE:
562 	default:
563 		break;
564 	}
565 
566 	switch (dst_tiling) {
567 	case I915_TILING_X:
568 		dword0 |= XY_FAST_COPY_DST_TILING_X;
569 		break;
570 	case I915_TILING_Y:
571 	case I915_TILING_Yf:
572 		dword0 |= XY_FAST_COPY_DST_TILING_Yb_Yf;
573 		break;
574 	case I915_TILING_Ys:
575 		dword0 |= XY_FAST_COPY_DST_TILING_Ys;
576 		break;
577 	case I915_TILING_NONE:
578 	default:
579 		break;
580 	}
581 
582 	return dword0;
583 }
584 
fast_copy_dword1(unsigned int src_tiling,unsigned int dst_tiling,int bpp)585 static uint32_t fast_copy_dword1(unsigned int src_tiling,
586 				 unsigned int dst_tiling,
587 				 int bpp)
588 {
589 	uint32_t dword1 = 0;
590 
591 	if (src_tiling == I915_TILING_Yf)
592 		dword1 |= XY_FAST_COPY_SRC_TILING_Yf;
593 	if (dst_tiling == I915_TILING_Yf)
594 		dword1 |= XY_FAST_COPY_DST_TILING_Yf;
595 
596 	switch (bpp) {
597 	case 8:
598 		dword1 |= XY_FAST_COPY_COLOR_DEPTH_8;
599 		break;
600 	case 16:
601 		dword1 |= XY_FAST_COPY_COLOR_DEPTH_16;
602 		break;
603 	case 32:
604 		dword1 |= XY_FAST_COPY_COLOR_DEPTH_32;
605 		break;
606 	case 64:
607 		dword1 |= XY_FAST_COPY_COLOR_DEPTH_64;
608 		break;
609 	case 128:
610 		dword1 |= XY_FAST_COPY_COLOR_DEPTH_128;
611 		break;
612 	default:
613 		igt_assert(0);
614 	}
615 
616 	return dword1;
617 }
618 
619 static void
fill_relocation(struct drm_i915_gem_relocation_entry * reloc,uint32_t gem_handle,uint32_t delta,uint32_t offset,uint32_t read_domains,uint32_t write_domains)620 fill_relocation(struct drm_i915_gem_relocation_entry *reloc,
621 		uint32_t gem_handle, uint32_t delta, /* in bytes */
622 		uint32_t offset, /* in dwords */
623 		uint32_t read_domains, uint32_t write_domains)
624 {
625 	reloc->target_handle = gem_handle;
626 	reloc->delta = delta;
627 	reloc->offset = offset * sizeof(uint32_t);
628 	reloc->presumed_offset = 0;
629 	reloc->read_domains = read_domains;
630 	reloc->write_domain = write_domains;
631 }
632 
633 static void
fill_object(struct drm_i915_gem_exec_object2 * obj,uint32_t gem_handle,struct drm_i915_gem_relocation_entry * relocs,uint32_t count)634 fill_object(struct drm_i915_gem_exec_object2 *obj, uint32_t gem_handle,
635 	    struct drm_i915_gem_relocation_entry *relocs, uint32_t count)
636 {
637 	memset(obj, 0, sizeof(*obj));
638 	obj->handle = gem_handle;
639 	obj->relocation_count = count;
640 	obj->relocs_ptr = to_user_pointer(relocs);
641 }
642 
exec_blit(int fd,struct drm_i915_gem_exec_object2 * objs,uint32_t count,uint32_t batch_len)643 static void exec_blit(int fd,
644 		      struct drm_i915_gem_exec_object2 *objs, uint32_t count,
645 		      uint32_t batch_len /* in dwords */)
646 {
647 	struct drm_i915_gem_execbuffer2 exec;
648 
649 	exec.buffers_ptr = to_user_pointer(objs);
650 	exec.buffer_count = count;
651 	exec.batch_start_offset = 0;
652 	exec.batch_len = batch_len * 4;
653 	exec.DR1 = exec.DR4 = 0;
654 	exec.num_cliprects = 0;
655 	exec.cliprects_ptr = 0;
656 	exec.flags = I915_EXEC_BLT;
657 	i915_execbuffer2_set_context_id(exec, 0);
658 	exec.rsvd2 = 0;
659 
660 	gem_execbuf(fd, &exec);
661 }
662 
663 /**
664  * igt_blitter_fast_copy__raw:
665  * @fd: file descriptor of the i915 driver
666  * @src_handle: GEM handle of the source buffer
667  * @src_delta: offset into the source GEM bo, in bytes
668  * @src_stride: Stride (in bytes) of the source buffer
669  * @src_tiling: Tiling mode of the source buffer
670  * @src_x: X coordinate of the source region to copy
671  * @src_y: Y coordinate of the source region to copy
672  * @width: Width of the region to copy
673  * @height: Height of the region to copy
674  * @bpp: source and destination bits per pixel
675  * @dst_handle: GEM handle of the destination buffer
676  * @dst_delta: offset into the destination GEM bo, in bytes
677  * @dst_stride: Stride (in bytes) of the destination buffer
678  * @dst_tiling: Tiling mode of the destination buffer
679  * @dst_x: X coordinate of destination
680  * @dst_y: Y coordinate of destination
681  *
682  * Like igt_blitter_fast_copy(), but talking to the kernel directly.
683  */
igt_blitter_fast_copy__raw(int fd,uint32_t src_handle,unsigned int src_delta,unsigned int src_stride,unsigned int src_tiling,unsigned int src_x,unsigned src_y,unsigned int width,unsigned int height,int bpp,uint32_t dst_handle,unsigned dst_delta,unsigned int dst_stride,unsigned int dst_tiling,unsigned int dst_x,unsigned dst_y)684 void igt_blitter_fast_copy__raw(int fd,
685 				/* src */
686 				uint32_t src_handle,
687 				unsigned int src_delta,
688 				unsigned int src_stride,
689 				unsigned int src_tiling,
690 				unsigned int src_x, unsigned src_y,
691 
692 				/* size */
693 				unsigned int width, unsigned int height,
694 
695 				/* bpp */
696 				int bpp,
697 
698 				/* dst */
699 				uint32_t dst_handle,
700 				unsigned dst_delta,
701 				unsigned int dst_stride,
702 				unsigned int dst_tiling,
703 				unsigned int dst_x, unsigned dst_y)
704 {
705 	uint32_t batch[12];
706 	struct drm_i915_gem_exec_object2 objs[3];
707 	struct drm_i915_gem_relocation_entry relocs[2];
708 	uint32_t batch_handle;
709 	uint32_t dword0, dword1;
710 	uint32_t src_pitch, dst_pitch;
711 	int i = 0;
712 
713 	src_pitch = fast_copy_pitch(src_stride, src_tiling);
714 	dst_pitch = fast_copy_pitch(dst_stride, dst_tiling);
715 	dword0 = fast_copy_dword0(src_tiling, dst_tiling);
716 	dword1 = fast_copy_dword1(src_tiling, dst_tiling, bpp);
717 
718 	CHECK_RANGE(src_x); CHECK_RANGE(src_y);
719 	CHECK_RANGE(dst_x); CHECK_RANGE(dst_y);
720 	CHECK_RANGE(width); CHECK_RANGE(height);
721 	CHECK_RANGE(src_x + width); CHECK_RANGE(src_y + height);
722 	CHECK_RANGE(dst_x + width); CHECK_RANGE(dst_y + height);
723 	CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch);
724 
725 	batch[i++] = dword0;
726 	batch[i++] = dword1 | dst_pitch;
727 	batch[i++] = (dst_y << 16) | dst_x; /* dst x1,y1 */
728 	batch[i++] = ((dst_y + height) << 16) | (dst_x + width); /* dst x2,y2 */
729 	batch[i++] = dst_delta; /* dst address lower bits */
730 	batch[i++] = 0;	/* dst address upper bits */
731 	batch[i++] = (src_y << 16) | src_x; /* src x1,y1 */
732 	batch[i++] = src_pitch;
733 	batch[i++] = src_delta; /* src address lower bits */
734 	batch[i++] = 0;	/* src address upper bits */
735 	batch[i++] = MI_BATCH_BUFFER_END;
736 	batch[i++] = MI_NOOP;
737 
738 	igt_assert(i == ARRAY_SIZE(batch));
739 
740 	batch_handle = gem_create(fd, 4096);
741 	gem_write(fd, batch_handle, 0, batch, sizeof(batch));
742 
743 	fill_relocation(&relocs[0], dst_handle, dst_delta, 4,
744 			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
745 	fill_relocation(&relocs[1], src_handle, src_delta, 8, I915_GEM_DOMAIN_RENDER, 0);
746 
747 	fill_object(&objs[0], dst_handle, NULL, 0);
748 	fill_object(&objs[1], src_handle, NULL, 0);
749 	fill_object(&objs[2], batch_handle, relocs, 2);
750 
751 	exec_blit(fd, objs, 3, ARRAY_SIZE(batch));
752 
753 	gem_close(fd, batch_handle);
754 }
755 
756 /**
757  * igt_blitter_fast_copy:
758  * @batch: batchbuffer object
759  * @src: source i-g-t buffer object
760  * @src_delta: offset into the source i-g-t bo
761  * @src_x: source pixel x-coordination
762  * @src_y: source pixel y-coordination
763  * @width: width of the copied rectangle
764  * @height: height of the copied rectangle
765  * @dst: destination i-g-t buffer object
766  * @dst_delta: offset into the destination i-g-t bo
767  * @dst_x: destination pixel x-coordination
768  * @dst_y: destination pixel y-coordination
769  *
770  * Copy @src into @dst using the gen9 fast copy blitter command.
771  *
772  * The source and destination surfaces cannot overlap.
773  */
igt_blitter_fast_copy(struct intel_batchbuffer * batch,const struct igt_buf * src,unsigned src_delta,unsigned src_x,unsigned src_y,unsigned width,unsigned height,int bpp,const struct igt_buf * dst,unsigned dst_delta,unsigned dst_x,unsigned dst_y)774 void igt_blitter_fast_copy(struct intel_batchbuffer *batch,
775 			   const struct igt_buf *src, unsigned src_delta,
776 			   unsigned src_x, unsigned src_y,
777 			   unsigned width, unsigned height,
778 			   int bpp,
779 			   const struct igt_buf *dst, unsigned dst_delta,
780 			   unsigned dst_x, unsigned dst_y)
781 {
782 	uint32_t src_pitch, dst_pitch;
783 	uint32_t dword0, dword1;
784 
785 	igt_assert(src->bpp == dst->bpp);
786 
787 	src_pitch = fast_copy_pitch(src->stride, src->tiling);
788 	dst_pitch = fast_copy_pitch(dst->stride, src->tiling);
789 	dword0 = fast_copy_dword0(src->tiling, dst->tiling);
790 	dword1 = fast_copy_dword1(src->tiling, dst->tiling, dst->bpp);
791 
792 	CHECK_RANGE(src_x); CHECK_RANGE(src_y);
793 	CHECK_RANGE(dst_x); CHECK_RANGE(dst_y);
794 	CHECK_RANGE(width); CHECK_RANGE(height);
795 	CHECK_RANGE(src_x + width); CHECK_RANGE(src_y + height);
796 	CHECK_RANGE(dst_x + width); CHECK_RANGE(dst_y + height);
797 	CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch);
798 
799 	BEGIN_BATCH(10, 2);
800 	OUT_BATCH(dword0);
801 	OUT_BATCH(dword1 | dst_pitch);
802 	OUT_BATCH((dst_y << 16) | dst_x); /* dst x1,y1 */
803 	OUT_BATCH(((dst_y + height) << 16) | (dst_x + width)); /* dst x2,y2 */
804 	OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_delta);
805 	OUT_BATCH(0);	/* dst address upper bits */
806 	OUT_BATCH((src_y << 16) | src_x); /* src x1,y1 */
807 	OUT_BATCH(src_pitch);
808 	OUT_RELOC(src->bo, I915_GEM_DOMAIN_RENDER, 0, src_delta);
809 	OUT_BATCH(0);	/* src address upper bits */
810 	ADVANCE_BATCH();
811 
812 	intel_batchbuffer_flush(batch);
813 }
814 
815 #undef CHECK_RANGE
816 
817 /**
818  * igt_get_render_copyfunc:
819  * @devid: pci device id
820  *
821  * Returns:
822  *
823  * The platform-specific render copy function pointer for the device
824  * specified with @devid. Will return NULL when no render copy function is
825  * implemented.
826  */
igt_get_render_copyfunc(int devid)827 igt_render_copyfunc_t igt_get_render_copyfunc(int devid)
828 {
829 	igt_render_copyfunc_t copy = NULL;
830 
831 	if (IS_GEN2(devid))
832 		copy = gen2_render_copyfunc;
833 	else if (IS_GEN3(devid))
834 		copy = gen3_render_copyfunc;
835 	else if (IS_GEN4(devid) || IS_GEN5(devid))
836 		copy = gen4_render_copyfunc;
837 	else if (IS_GEN6(devid))
838 		copy = gen6_render_copyfunc;
839 	else if (IS_GEN7(devid))
840 		copy = gen7_render_copyfunc;
841 	else if (IS_GEN8(devid))
842 		copy = gen8_render_copyfunc;
843 	else if (IS_GEN9(devid) || IS_GEN10(devid))
844 		copy = gen9_render_copyfunc;
845 	else if (IS_GEN11(devid))
846 		copy = gen11_render_copyfunc;
847 
848 	return copy;
849 }
850 
851 /**
852  * igt_get_media_fillfunc:
853  * @devid: pci device id
854  *
855  * Returns:
856  *
857  * The platform-specific media fill function pointer for the device specified
858  * with @devid. Will return NULL when no media fill function is implemented.
859  */
igt_get_media_fillfunc(int devid)860 igt_fillfunc_t igt_get_media_fillfunc(int devid)
861 {
862 	igt_fillfunc_t fill = NULL;
863 
864 	if (IS_GEN9(devid) || IS_GEN10(devid) || IS_GEN11(devid))
865 		fill = gen9_media_fillfunc;
866 	else if (IS_GEN8(devid))
867 		fill = gen8_media_fillfunc;
868 	else if (IS_GEN7(devid))
869 		fill = gen7_media_fillfunc;
870 
871 	return fill;
872 }
873 
igt_get_media_vme_func(int devid)874 igt_vme_func_t igt_get_media_vme_func(int devid)
875 {
876 	igt_vme_func_t fill = NULL;
877 
878 	if (IS_GEN11(devid))
879 		fill = gen11_media_vme_func;
880 
881 	return fill;
882 }
883 /**
884  * igt_get_gpgpu_fillfunc:
885  * @devid: pci device id
886  *
887  * Returns:
888  *
889  * The platform-specific gpgpu fill function pointer for the device specified
890  * with @devid. Will return NULL when no gpgpu fill function is implemented.
891  */
igt_get_gpgpu_fillfunc(int devid)892 igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid)
893 {
894 	igt_fillfunc_t fill = NULL;
895 
896 	if (IS_GEN7(devid))
897 		fill = gen7_gpgpu_fillfunc;
898 	else if (IS_BROADWELL(devid))
899 		fill = gen8_gpgpu_fillfunc;
900 	else if (IS_GEN9(devid) || IS_GEN10(devid))
901 		fill = gen9_gpgpu_fillfunc;
902 	else if (IS_GEN11(devid))
903 		fill = gen11_gpgpu_fillfunc;
904 
905 	return fill;
906 }
907 
908 /**
909  * igt_get_media_spinfunc:
910  * @devid: pci device id
911  *
912  * Returns:
913  *
914  * The platform-specific media spin function pointer for the device specified
915  * with @devid. Will return NULL when no media spin function is implemented.
916  */
igt_get_media_spinfunc(int devid)917 igt_media_spinfunc_t igt_get_media_spinfunc(int devid)
918 {
919 	igt_media_spinfunc_t spin = NULL;
920 
921 	if (IS_GEN9(devid))
922 		spin = gen9_media_spinfunc;
923 	else if (IS_GEN8(devid))
924 		spin = gen8_media_spinfunc;
925 
926 	return spin;
927 }
928