1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #ifdef HAVE_ALLOCA_H
28 # include <alloca.h>
29 #endif
30 #include <sys/wait.h>
31 
32 #include "CUnit/Basic.h"
33 
34 #include "amdgpu_test.h"
35 #include "amdgpu_drm.h"
36 
37 static  amdgpu_device_handle device_handle;
38 static  uint32_t  major_version;
39 static  uint32_t  minor_version;
40 static  uint32_t  family_id;
41 
42 static void amdgpu_query_info_test(void);
43 static void amdgpu_command_submission_gfx(void);
44 static void amdgpu_command_submission_compute(void);
45 static void amdgpu_command_submission_multi_fence(void);
46 static void amdgpu_command_submission_sdma(void);
47 static void amdgpu_userptr_test(void);
48 static void amdgpu_semaphore_test(void);
49 static void amdgpu_sync_dependency_test(void);
50 static void amdgpu_bo_eviction_test(void);
51 
52 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
53 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
54 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
55 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
56 				       unsigned ip_type,
57 				       int instance, int pm4_dw, uint32_t *pm4_src,
58 				       int res_cnt, amdgpu_bo_handle *resources,
59 				       struct amdgpu_cs_ib_info *ib_info,
60 				       struct amdgpu_cs_request *ibs_request);
61 
62 CU_TestInfo basic_tests[] = {
63 	{ "Query Info Test",  amdgpu_query_info_test },
64 	{ "Userptr Test",  amdgpu_userptr_test },
65 	{ "bo eviction Test",  amdgpu_bo_eviction_test },
66 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
67 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
68 	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
69 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
70 	{ "SW semaphore Test",  amdgpu_semaphore_test },
71 	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
72 	CU_TEST_INFO_NULL,
73 };
74 #define BUFFER_SIZE (8 * 1024)
75 #define SDMA_PKT_HEADER_op_offset 0
76 #define SDMA_PKT_HEADER_op_mask   0x000000FF
77 #define SDMA_PKT_HEADER_op_shift  0
78 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
79 #define SDMA_OPCODE_CONSTANT_FILL  11
80 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
81 	/* 0 = byte fill
82 	 * 2 = DW fill
83 	 */
84 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
85 					(((sub_op) & 0xFF) << 8) |	\
86 					(((op) & 0xFF) << 0))
87 #define	SDMA_OPCODE_WRITE				  2
88 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
89 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
90 
91 #define	SDMA_OPCODE_COPY				  1
92 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
93 
94 #define GFX_COMPUTE_NOP  0xffff1000
95 #define SDMA_NOP  0x0
96 
97 /* PM4 */
98 #define	PACKET_TYPE0	0
99 #define	PACKET_TYPE1	1
100 #define	PACKET_TYPE2	2
101 #define	PACKET_TYPE3	3
102 
103 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
104 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
105 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
106 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
107 #define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
108 			 ((reg) & 0xFFFF) |			\
109 			 ((n) & 0x3FFF) << 16)
110 #define CP_PACKET2			0x80000000
111 #define		PACKET2_PAD_SHIFT		0
112 #define		PACKET2_PAD_MASK		(0x3fffffff << 0)
113 
114 #define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
115 
116 #define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
117 			 (((op) & 0xFF) << 8) |				\
118 			 ((n) & 0x3FFF) << 16)
119 
120 /* Packet 3 types */
121 #define	PACKET3_NOP					0x10
122 
123 #define	PACKET3_WRITE_DATA				0x37
124 #define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
125 		/* 0 - register
126 		 * 1 - memory (sync - via GRBM)
127 		 * 2 - gl2
128 		 * 3 - gds
129 		 * 4 - reserved
130 		 * 5 - memory (async - direct)
131 		 */
132 #define		WR_ONE_ADDR                             (1 << 16)
133 #define		WR_CONFIRM                              (1 << 20)
134 #define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
135 		/* 0 - LRU
136 		 * 1 - Stream
137 		 */
138 #define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
139 		/* 0 - me
140 		 * 1 - pfp
141 		 * 2 - ce
142 		 */
143 
144 #define	PACKET3_DMA_DATA				0x50
145 /* 1. header
146  * 2. CONTROL
147  * 3. SRC_ADDR_LO or DATA [31:0]
148  * 4. SRC_ADDR_HI [31:0]
149  * 5. DST_ADDR_LO [31:0]
150  * 6. DST_ADDR_HI [7:0]
151  * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
152  */
153 /* CONTROL */
154 #              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
155 		/* 0 - ME
156 		 * 1 - PFP
157 		 */
158 #              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
159 		/* 0 - LRU
160 		 * 1 - Stream
161 		 * 2 - Bypass
162 		 */
163 #              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
164 #              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
165 		/* 0 - DST_ADDR using DAS
166 		 * 1 - GDS
167 		 * 3 - DST_ADDR using L2
168 		 */
169 #              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
170 		/* 0 - LRU
171 		 * 1 - Stream
172 		 * 2 - Bypass
173 		 */
174 #              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
175 #              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
176 		/* 0 - SRC_ADDR using SAS
177 		 * 1 - GDS
178 		 * 2 - DATA
179 		 * 3 - SRC_ADDR using L2
180 		 */
181 #              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
182 /* COMMAND */
183 #              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
184 #              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
185 		/* 0 - none
186 		 * 1 - 8 in 16
187 		 * 2 - 8 in 32
188 		 * 3 - 8 in 64
189 		 */
190 #              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
191 		/* 0 - none
192 		 * 1 - 8 in 16
193 		 * 2 - 8 in 32
194 		 * 3 - 8 in 64
195 		 */
196 #              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
197 		/* 0 - memory
198 		 * 1 - register
199 		 */
200 #              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
201 		/* 0 - memory
202 		 * 1 - register
203 		 */
204 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
205 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
206 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
207 
208 #define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
209 						(((b) & 0x1) << 26) |		\
210 						(((t) & 0x1) << 23) |		\
211 						(((s) & 0x1) << 22) |		\
212 						(((cnt) & 0xFFFFF) << 0))
213 #define	SDMA_OPCODE_COPY_SI	3
214 #define SDMA_OPCODE_CONSTANT_FILL_SI	13
215 #define SDMA_NOP_SI  0xf
216 #define GFX_COMPUTE_NOP_SI 0x80000000
217 #define	PACKET3_DMA_DATA_SI	0x41
218 #              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
219 		/* 0 - ME
220 		 * 1 - PFP
221 		 */
222 #              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
223 		/* 0 - DST_ADDR using DAS
224 		 * 1 - GDS
225 		 * 3 - DST_ADDR using L2
226 		 */
227 #              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
228 		/* 0 - SRC_ADDR using SAS
229 		 * 1 - GDS
230 		 * 2 - DATA
231 		 * 3 - SRC_ADDR using L2
232 		 */
233 #              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
234 
235 
236 #define PKT3_CONTEXT_CONTROL                   0x28
237 #define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
238 #define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
239 #define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
240 
241 #define PKT3_CLEAR_STATE                       0x12
242 
243 #define PKT3_SET_SH_REG                        0x76
244 #define		PACKET3_SET_SH_REG_START			0x00002c00
245 
246 #define	PACKET3_DISPATCH_DIRECT				0x15
247 
248 
249 /* gfx 8 */
250 #define mmCOMPUTE_PGM_LO                                                        0x2e0c
251 #define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
252 #define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
253 #define mmCOMPUTE_USER_DATA_0                                                   0x2e40
254 #define mmCOMPUTE_USER_DATA_1                                                   0x2e41
255 #define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
256 #define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
257 
258 
259 
260 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
261 		      ((num & 0x0000ff00) << 8) | \
262 		      ((num & 0x00ff0000) >> 8) | \
263 		      ((num & 0x000000ff) << 24))
264 
265 
266 /* Shader code
267  * void main()
268 {
269 
270 	float x = some_input;
271 		for (unsigned i = 0; i < 1000000; i++)
272   	x = sin(x);
273 
274 	u[0] = 42u;
275 }
276 */
277 
278 static  uint32_t shader_bin[] = {
279 	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
280 	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
281 	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
282 	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
283 };
284 
285 #define CODE_OFFSET 512
286 #define DATA_OFFSET 1024
287 
288 
suite_basic_tests_init(void)289 int suite_basic_tests_init(void)
290 {
291 	struct amdgpu_gpu_info gpu_info = {0};
292 	int r;
293 
294 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
295 				   &minor_version, &device_handle);
296 
297 	if (r) {
298 		if ((r == -EACCES) && (errno == EACCES))
299 			printf("\n\nError:%s. "
300 				"Hint:Try to run this test program as root.",
301 				strerror(errno));
302 		return CUE_SINIT_FAILED;
303 	}
304 
305 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
306 	if (r)
307 		return CUE_SINIT_FAILED;
308 
309 	family_id = gpu_info.family_id;
310 
311 	return CUE_SUCCESS;
312 }
313 
suite_basic_tests_clean(void)314 int suite_basic_tests_clean(void)
315 {
316 	int r = amdgpu_device_deinitialize(device_handle);
317 
318 	if (r == 0)
319 		return CUE_SUCCESS;
320 	else
321 		return CUE_SCLEAN_FAILED;
322 }
323 
amdgpu_query_info_test(void)324 static void amdgpu_query_info_test(void)
325 {
326 	struct amdgpu_gpu_info gpu_info = {0};
327 	uint32_t version, feature;
328 	int r;
329 
330 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
331 	CU_ASSERT_EQUAL(r, 0);
332 
333 	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
334 					  0, &version, &feature);
335 	CU_ASSERT_EQUAL(r, 0);
336 }
337 
amdgpu_command_submission_gfx_separate_ibs(void)338 static void amdgpu_command_submission_gfx_separate_ibs(void)
339 {
340 	amdgpu_context_handle context_handle;
341 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
342 	void *ib_result_cpu, *ib_result_ce_cpu;
343 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
344 	struct amdgpu_cs_request ibs_request = {0};
345 	struct amdgpu_cs_ib_info ib_info[2];
346 	struct amdgpu_cs_fence fence_status = {0};
347 	uint32_t *ptr;
348 	uint32_t expired;
349 	amdgpu_bo_list_handle bo_list;
350 	amdgpu_va_handle va_handle, va_handle_ce;
351 	int r, i = 0;
352 
353 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
354 	CU_ASSERT_EQUAL(r, 0);
355 
356 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
357 				    AMDGPU_GEM_DOMAIN_GTT, 0,
358 				    &ib_result_handle, &ib_result_cpu,
359 				    &ib_result_mc_address, &va_handle);
360 	CU_ASSERT_EQUAL(r, 0);
361 
362 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
363 				    AMDGPU_GEM_DOMAIN_GTT, 0,
364 				    &ib_result_ce_handle, &ib_result_ce_cpu,
365 				    &ib_result_ce_mc_address, &va_handle_ce);
366 	CU_ASSERT_EQUAL(r, 0);
367 
368 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
369 			       ib_result_ce_handle, &bo_list);
370 	CU_ASSERT_EQUAL(r, 0);
371 
372 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
373 
374 	/* IT_SET_CE_DE_COUNTERS */
375 	ptr = ib_result_ce_cpu;
376 	if (family_id != AMDGPU_FAMILY_SI) {
377 		ptr[i++] = 0xc0008900;
378 		ptr[i++] = 0;
379 	}
380 	ptr[i++] = 0xc0008400;
381 	ptr[i++] = 1;
382 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
383 	ib_info[0].size = i;
384 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
385 
386 	/* IT_WAIT_ON_CE_COUNTER */
387 	ptr = ib_result_cpu;
388 	ptr[0] = 0xc0008600;
389 	ptr[1] = 0x00000001;
390 	ib_info[1].ib_mc_address = ib_result_mc_address;
391 	ib_info[1].size = 2;
392 
393 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
394 	ibs_request.number_of_ibs = 2;
395 	ibs_request.ibs = ib_info;
396 	ibs_request.resources = bo_list;
397 	ibs_request.fence_info.handle = NULL;
398 
399 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
400 
401 	CU_ASSERT_EQUAL(r, 0);
402 
403 	fence_status.context = context_handle;
404 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
405 	fence_status.ip_instance = 0;
406 	fence_status.fence = ibs_request.seq_no;
407 
408 	r = amdgpu_cs_query_fence_status(&fence_status,
409 					 AMDGPU_TIMEOUT_INFINITE,
410 					 0, &expired);
411 	CU_ASSERT_EQUAL(r, 0);
412 
413 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
414 				     ib_result_mc_address, 4096);
415 	CU_ASSERT_EQUAL(r, 0);
416 
417 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
418 				     ib_result_ce_mc_address, 4096);
419 	CU_ASSERT_EQUAL(r, 0);
420 
421 	r = amdgpu_bo_list_destroy(bo_list);
422 	CU_ASSERT_EQUAL(r, 0);
423 
424 	r = amdgpu_cs_ctx_free(context_handle);
425 	CU_ASSERT_EQUAL(r, 0);
426 
427 }
428 
amdgpu_command_submission_gfx_shared_ib(void)429 static void amdgpu_command_submission_gfx_shared_ib(void)
430 {
431 	amdgpu_context_handle context_handle;
432 	amdgpu_bo_handle ib_result_handle;
433 	void *ib_result_cpu;
434 	uint64_t ib_result_mc_address;
435 	struct amdgpu_cs_request ibs_request = {0};
436 	struct amdgpu_cs_ib_info ib_info[2];
437 	struct amdgpu_cs_fence fence_status = {0};
438 	uint32_t *ptr;
439 	uint32_t expired;
440 	amdgpu_bo_list_handle bo_list;
441 	amdgpu_va_handle va_handle;
442 	int r, i = 0;
443 
444 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
445 	CU_ASSERT_EQUAL(r, 0);
446 
447 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
448 				    AMDGPU_GEM_DOMAIN_GTT, 0,
449 				    &ib_result_handle, &ib_result_cpu,
450 				    &ib_result_mc_address, &va_handle);
451 	CU_ASSERT_EQUAL(r, 0);
452 
453 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
454 			       &bo_list);
455 	CU_ASSERT_EQUAL(r, 0);
456 
457 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
458 
459 	/* IT_SET_CE_DE_COUNTERS */
460 	ptr = ib_result_cpu;
461 	if (family_id != AMDGPU_FAMILY_SI) {
462 		ptr[i++] = 0xc0008900;
463 		ptr[i++] = 0;
464 	}
465 	ptr[i++] = 0xc0008400;
466 	ptr[i++] = 1;
467 	ib_info[0].ib_mc_address = ib_result_mc_address;
468 	ib_info[0].size = i;
469 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
470 
471 	ptr = (uint32_t *)ib_result_cpu + 4;
472 	ptr[0] = 0xc0008600;
473 	ptr[1] = 0x00000001;
474 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
475 	ib_info[1].size = 2;
476 
477 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
478 	ibs_request.number_of_ibs = 2;
479 	ibs_request.ibs = ib_info;
480 	ibs_request.resources = bo_list;
481 	ibs_request.fence_info.handle = NULL;
482 
483 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
484 
485 	CU_ASSERT_EQUAL(r, 0);
486 
487 	fence_status.context = context_handle;
488 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
489 	fence_status.ip_instance = 0;
490 	fence_status.fence = ibs_request.seq_no;
491 
492 	r = amdgpu_cs_query_fence_status(&fence_status,
493 					 AMDGPU_TIMEOUT_INFINITE,
494 					 0, &expired);
495 	CU_ASSERT_EQUAL(r, 0);
496 
497 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
498 				     ib_result_mc_address, 4096);
499 	CU_ASSERT_EQUAL(r, 0);
500 
501 	r = amdgpu_bo_list_destroy(bo_list);
502 	CU_ASSERT_EQUAL(r, 0);
503 
504 	r = amdgpu_cs_ctx_free(context_handle);
505 	CU_ASSERT_EQUAL(r, 0);
506 }
507 
amdgpu_command_submission_gfx_cp_write_data(void)508 static void amdgpu_command_submission_gfx_cp_write_data(void)
509 {
510 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
511 }
512 
amdgpu_command_submission_gfx_cp_const_fill(void)513 static void amdgpu_command_submission_gfx_cp_const_fill(void)
514 {
515 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
516 }
517 
amdgpu_command_submission_gfx_cp_copy_data(void)518 static void amdgpu_command_submission_gfx_cp_copy_data(void)
519 {
520 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
521 }
522 
amdgpu_bo_eviction_test(void)523 static void amdgpu_bo_eviction_test(void)
524 {
525 	const int sdma_write_length = 1024;
526 	const int pm4_dw = 256;
527 	amdgpu_context_handle context_handle;
528 	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
529 	amdgpu_bo_handle *resources;
530 	uint32_t *pm4;
531 	struct amdgpu_cs_ib_info *ib_info;
532 	struct amdgpu_cs_request *ibs_request;
533 	uint64_t bo1_mc, bo2_mc;
534 	volatile unsigned char *bo1_cpu, *bo2_cpu;
535 	int i, j, r, loop1, loop2;
536 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
537 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
538 	struct amdgpu_heap_info vram_info, gtt_info;
539 
540 	pm4 = calloc(pm4_dw, sizeof(*pm4));
541 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
542 
543 	ib_info = calloc(1, sizeof(*ib_info));
544 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
545 
546 	ibs_request = calloc(1, sizeof(*ibs_request));
547 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
548 
549 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
550 	CU_ASSERT_EQUAL(r, 0);
551 
552 	/* prepare resource */
553 	resources = calloc(4, sizeof(amdgpu_bo_handle));
554 	CU_ASSERT_NOT_EQUAL(resources, NULL);
555 
556 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
557 				   0, &vram_info);
558 	CU_ASSERT_EQUAL(r, 0);
559 
560 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
561 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
562 	CU_ASSERT_EQUAL(r, 0);
563 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
564 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
565 	CU_ASSERT_EQUAL(r, 0);
566 
567 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
568 				   0, &gtt_info);
569 	CU_ASSERT_EQUAL(r, 0);
570 
571 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
572 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
573 	CU_ASSERT_EQUAL(r, 0);
574 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
575 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
576 	CU_ASSERT_EQUAL(r, 0);
577 
578 
579 
580 	loop1 = loop2 = 0;
581 	/* run 9 circle to test all mapping combination */
582 	while(loop1 < 2) {
583 		while(loop2 < 2) {
584 			/* allocate UC bo1for sDMA use */
585 			r = amdgpu_bo_alloc_and_map(device_handle,
586 						    sdma_write_length, 4096,
587 						    AMDGPU_GEM_DOMAIN_GTT,
588 						    gtt_flags[loop1], &bo1,
589 						    (void**)&bo1_cpu, &bo1_mc,
590 						    &bo1_va_handle);
591 			CU_ASSERT_EQUAL(r, 0);
592 
593 			/* set bo1 */
594 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
595 
596 			/* allocate UC bo2 for sDMA use */
597 			r = amdgpu_bo_alloc_and_map(device_handle,
598 						    sdma_write_length, 4096,
599 						    AMDGPU_GEM_DOMAIN_GTT,
600 						    gtt_flags[loop2], &bo2,
601 						    (void**)&bo2_cpu, &bo2_mc,
602 						    &bo2_va_handle);
603 			CU_ASSERT_EQUAL(r, 0);
604 
605 			/* clear bo2 */
606 			memset((void*)bo2_cpu, 0, sdma_write_length);
607 
608 			resources[0] = bo1;
609 			resources[1] = bo2;
610 			resources[2] = vram_max[loop2];
611 			resources[3] = gtt_max[loop2];
612 
613 			/* fulfill PM4: test DMA copy linear */
614 			i = j = 0;
615 			if (family_id == AMDGPU_FAMILY_SI) {
616 				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
617 							  sdma_write_length);
618 				pm4[i++] = 0xffffffff & bo2_mc;
619 				pm4[i++] = 0xffffffff & bo1_mc;
620 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
621 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
622 			} else {
623 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
624 				if (family_id >= AMDGPU_FAMILY_AI)
625 					pm4[i++] = sdma_write_length - 1;
626 				else
627 					pm4[i++] = sdma_write_length;
628 				pm4[i++] = 0;
629 				pm4[i++] = 0xffffffff & bo1_mc;
630 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
631 				pm4[i++] = 0xffffffff & bo2_mc;
632 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
633 			}
634 
635 			amdgpu_test_exec_cs_helper(context_handle,
636 						   AMDGPU_HW_IP_DMA, 0,
637 						   i, pm4,
638 						   4, resources,
639 						   ib_info, ibs_request);
640 
641 			/* verify if SDMA test result meets with expected */
642 			i = 0;
643 			while(i < sdma_write_length) {
644 				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
645 			}
646 			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
647 						     sdma_write_length);
648 			CU_ASSERT_EQUAL(r, 0);
649 			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
650 						     sdma_write_length);
651 			CU_ASSERT_EQUAL(r, 0);
652 			loop2++;
653 		}
654 		loop2 = 0;
655 		loop1++;
656 	}
657 	amdgpu_bo_free(vram_max[0]);
658 	amdgpu_bo_free(vram_max[1]);
659 	amdgpu_bo_free(gtt_max[0]);
660 	amdgpu_bo_free(gtt_max[1]);
661 	/* clean resources */
662 	free(resources);
663 	free(ibs_request);
664 	free(ib_info);
665 	free(pm4);
666 
667 	/* end of test */
668 	r = amdgpu_cs_ctx_free(context_handle);
669 	CU_ASSERT_EQUAL(r, 0);
670 }
671 
672 
amdgpu_command_submission_gfx(void)673 static void amdgpu_command_submission_gfx(void)
674 {
675 	/* write data using the CP */
676 	amdgpu_command_submission_gfx_cp_write_data();
677 	/* const fill using the CP */
678 	amdgpu_command_submission_gfx_cp_const_fill();
679 	/* copy data using the CP */
680 	amdgpu_command_submission_gfx_cp_copy_data();
681 	/* separate IB buffers for multi-IB submission */
682 	amdgpu_command_submission_gfx_separate_ibs();
683 	/* shared IB buffer for multi-IB submission */
684 	amdgpu_command_submission_gfx_shared_ib();
685 }
686 
amdgpu_semaphore_test(void)687 static void amdgpu_semaphore_test(void)
688 {
689 	amdgpu_context_handle context_handle[2];
690 	amdgpu_semaphore_handle sem;
691 	amdgpu_bo_handle ib_result_handle[2];
692 	void *ib_result_cpu[2];
693 	uint64_t ib_result_mc_address[2];
694 	struct amdgpu_cs_request ibs_request[2] = {0};
695 	struct amdgpu_cs_ib_info ib_info[2] = {0};
696 	struct amdgpu_cs_fence fence_status = {0};
697 	uint32_t *ptr;
698 	uint32_t expired;
699 	uint32_t sdma_nop, gfx_nop;
700 	amdgpu_bo_list_handle bo_list[2];
701 	amdgpu_va_handle va_handle[2];
702 	int r, i;
703 
704 	if (family_id == AMDGPU_FAMILY_SI) {
705 		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
706 		gfx_nop = GFX_COMPUTE_NOP_SI;
707 	} else {
708 		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
709 		gfx_nop = GFX_COMPUTE_NOP;
710 	}
711 
712 	r = amdgpu_cs_create_semaphore(&sem);
713 	CU_ASSERT_EQUAL(r, 0);
714 	for (i = 0; i < 2; i++) {
715 		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
716 		CU_ASSERT_EQUAL(r, 0);
717 
718 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
719 					    AMDGPU_GEM_DOMAIN_GTT, 0,
720 					    &ib_result_handle[i], &ib_result_cpu[i],
721 					    &ib_result_mc_address[i], &va_handle[i]);
722 		CU_ASSERT_EQUAL(r, 0);
723 
724 		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
725 				       NULL, &bo_list[i]);
726 		CU_ASSERT_EQUAL(r, 0);
727 	}
728 
729 	/* 1. same context different engine */
730 	ptr = ib_result_cpu[0];
731 	ptr[0] = sdma_nop;
732 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
733 	ib_info[0].size = 1;
734 
735 	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
736 	ibs_request[0].number_of_ibs = 1;
737 	ibs_request[0].ibs = &ib_info[0];
738 	ibs_request[0].resources = bo_list[0];
739 	ibs_request[0].fence_info.handle = NULL;
740 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
741 	CU_ASSERT_EQUAL(r, 0);
742 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
743 	CU_ASSERT_EQUAL(r, 0);
744 
745 	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
746 	CU_ASSERT_EQUAL(r, 0);
747 	ptr = ib_result_cpu[1];
748 	ptr[0] = gfx_nop;
749 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
750 	ib_info[1].size = 1;
751 
752 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
753 	ibs_request[1].number_of_ibs = 1;
754 	ibs_request[1].ibs = &ib_info[1];
755 	ibs_request[1].resources = bo_list[1];
756 	ibs_request[1].fence_info.handle = NULL;
757 
758 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
759 	CU_ASSERT_EQUAL(r, 0);
760 
761 	fence_status.context = context_handle[0];
762 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
763 	fence_status.ip_instance = 0;
764 	fence_status.fence = ibs_request[1].seq_no;
765 	r = amdgpu_cs_query_fence_status(&fence_status,
766 					 500000000, 0, &expired);
767 	CU_ASSERT_EQUAL(r, 0);
768 	CU_ASSERT_EQUAL(expired, true);
769 
770 	/* 2. same engine different context */
771 	ptr = ib_result_cpu[0];
772 	ptr[0] = gfx_nop;
773 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
774 	ib_info[0].size = 1;
775 
776 	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
777 	ibs_request[0].number_of_ibs = 1;
778 	ibs_request[0].ibs = &ib_info[0];
779 	ibs_request[0].resources = bo_list[0];
780 	ibs_request[0].fence_info.handle = NULL;
781 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
782 	CU_ASSERT_EQUAL(r, 0);
783 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
784 	CU_ASSERT_EQUAL(r, 0);
785 
786 	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
787 	CU_ASSERT_EQUAL(r, 0);
788 	ptr = ib_result_cpu[1];
789 	ptr[0] = gfx_nop;
790 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
791 	ib_info[1].size = 1;
792 
793 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
794 	ibs_request[1].number_of_ibs = 1;
795 	ibs_request[1].ibs = &ib_info[1];
796 	ibs_request[1].resources = bo_list[1];
797 	ibs_request[1].fence_info.handle = NULL;
798 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
799 
800 	CU_ASSERT_EQUAL(r, 0);
801 
802 	fence_status.context = context_handle[1];
803 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
804 	fence_status.ip_instance = 0;
805 	fence_status.fence = ibs_request[1].seq_no;
806 	r = amdgpu_cs_query_fence_status(&fence_status,
807 					 500000000, 0, &expired);
808 	CU_ASSERT_EQUAL(r, 0);
809 	CU_ASSERT_EQUAL(expired, true);
810 
811 	for (i = 0; i < 2; i++) {
812 		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
813 					     ib_result_mc_address[i], 4096);
814 		CU_ASSERT_EQUAL(r, 0);
815 
816 		r = amdgpu_bo_list_destroy(bo_list[i]);
817 		CU_ASSERT_EQUAL(r, 0);
818 
819 		r = amdgpu_cs_ctx_free(context_handle[i]);
820 		CU_ASSERT_EQUAL(r, 0);
821 	}
822 
823 	r = amdgpu_cs_destroy_semaphore(sem);
824 	CU_ASSERT_EQUAL(r, 0);
825 }
826 
amdgpu_command_submission_compute_nop(void)827 static void amdgpu_command_submission_compute_nop(void)
828 {
829 	amdgpu_context_handle context_handle;
830 	amdgpu_bo_handle ib_result_handle;
831 	void *ib_result_cpu;
832 	uint64_t ib_result_mc_address;
833 	struct amdgpu_cs_request ibs_request;
834 	struct amdgpu_cs_ib_info ib_info;
835 	struct amdgpu_cs_fence fence_status;
836 	uint32_t *ptr;
837 	uint32_t expired;
838 	int r, instance;
839 	amdgpu_bo_list_handle bo_list;
840 	amdgpu_va_handle va_handle;
841 	struct drm_amdgpu_info_hw_ip info;
842 
843 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
844 	CU_ASSERT_EQUAL(r, 0);
845 
846 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
847 	CU_ASSERT_EQUAL(r, 0);
848 
849 	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
850 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
851 					    AMDGPU_GEM_DOMAIN_GTT, 0,
852 					    &ib_result_handle, &ib_result_cpu,
853 					    &ib_result_mc_address, &va_handle);
854 		CU_ASSERT_EQUAL(r, 0);
855 
856 		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
857 				       &bo_list);
858 		CU_ASSERT_EQUAL(r, 0);
859 
860 		ptr = ib_result_cpu;
861 		memset(ptr, 0, 16);
862 		ptr[0]=PACKET3(PACKET3_NOP, 14);
863 
864 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
865 		ib_info.ib_mc_address = ib_result_mc_address;
866 		ib_info.size = 16;
867 
868 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
869 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
870 		ibs_request.ring = instance;
871 		ibs_request.number_of_ibs = 1;
872 		ibs_request.ibs = &ib_info;
873 		ibs_request.resources = bo_list;
874 		ibs_request.fence_info.handle = NULL;
875 
876 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
877 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
878 		CU_ASSERT_EQUAL(r, 0);
879 
880 		fence_status.context = context_handle;
881 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
882 		fence_status.ip_instance = 0;
883 		fence_status.ring = instance;
884 		fence_status.fence = ibs_request.seq_no;
885 
886 		r = amdgpu_cs_query_fence_status(&fence_status,
887 						 AMDGPU_TIMEOUT_INFINITE,
888 						 0, &expired);
889 		CU_ASSERT_EQUAL(r, 0);
890 
891 		r = amdgpu_bo_list_destroy(bo_list);
892 		CU_ASSERT_EQUAL(r, 0);
893 
894 		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
895 					     ib_result_mc_address, 4096);
896 		CU_ASSERT_EQUAL(r, 0);
897 	}
898 
899 	r = amdgpu_cs_ctx_free(context_handle);
900 	CU_ASSERT_EQUAL(r, 0);
901 }
902 
amdgpu_command_submission_compute_cp_write_data(void)903 static void amdgpu_command_submission_compute_cp_write_data(void)
904 {
905 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
906 }
907 
amdgpu_command_submission_compute_cp_const_fill(void)908 static void amdgpu_command_submission_compute_cp_const_fill(void)
909 {
910 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
911 }
912 
amdgpu_command_submission_compute_cp_copy_data(void)913 static void amdgpu_command_submission_compute_cp_copy_data(void)
914 {
915 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
916 }
917 
amdgpu_command_submission_compute(void)918 static void amdgpu_command_submission_compute(void)
919 {
920 	/* write data using the CP */
921 	amdgpu_command_submission_compute_cp_write_data();
922 	/* const fill using the CP */
923 	amdgpu_command_submission_compute_cp_const_fill();
924 	/* copy data using the CP */
925 	amdgpu_command_submission_compute_cp_copy_data();
926 	/* nop test */
927 	amdgpu_command_submission_compute_nop();
928 }
929 
930 /*
931  * caller need create/release:
932  * pm4_src, resources, ib_info, and ibs_request
933  * submit command stream described in ibs_request and wait for this IB accomplished
934  */
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)935 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
936 				       unsigned ip_type,
937 				       int instance, int pm4_dw, uint32_t *pm4_src,
938 				       int res_cnt, amdgpu_bo_handle *resources,
939 				       struct amdgpu_cs_ib_info *ib_info,
940 				       struct amdgpu_cs_request *ibs_request)
941 {
942 	int r;
943 	uint32_t expired;
944 	uint32_t *ring_ptr;
945 	amdgpu_bo_handle ib_result_handle;
946 	void *ib_result_cpu;
947 	uint64_t ib_result_mc_address;
948 	struct amdgpu_cs_fence fence_status = {0};
949 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
950 	amdgpu_va_handle va_handle;
951 
952 	/* prepare CS */
953 	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
954 	CU_ASSERT_NOT_EQUAL(resources, NULL);
955 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
956 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
957 	CU_ASSERT_TRUE(pm4_dw <= 1024);
958 
959 	/* allocate IB */
960 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
961 				    AMDGPU_GEM_DOMAIN_GTT, 0,
962 				    &ib_result_handle, &ib_result_cpu,
963 				    &ib_result_mc_address, &va_handle);
964 	CU_ASSERT_EQUAL(r, 0);
965 
966 	/* copy PM4 packet to ring from caller */
967 	ring_ptr = ib_result_cpu;
968 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
969 
970 	ib_info->ib_mc_address = ib_result_mc_address;
971 	ib_info->size = pm4_dw;
972 
973 	ibs_request->ip_type = ip_type;
974 	ibs_request->ring = instance;
975 	ibs_request->number_of_ibs = 1;
976 	ibs_request->ibs = ib_info;
977 	ibs_request->fence_info.handle = NULL;
978 
979 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
980 	all_res[res_cnt] = ib_result_handle;
981 
982 	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
983 				  NULL, &ibs_request->resources);
984 	CU_ASSERT_EQUAL(r, 0);
985 
986 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
987 
988 	/* submit CS */
989 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
990 	CU_ASSERT_EQUAL(r, 0);
991 
992 	r = amdgpu_bo_list_destroy(ibs_request->resources);
993 	CU_ASSERT_EQUAL(r, 0);
994 
995 	fence_status.ip_type = ip_type;
996 	fence_status.ip_instance = 0;
997 	fence_status.ring = ibs_request->ring;
998 	fence_status.context = context_handle;
999 	fence_status.fence = ibs_request->seq_no;
1000 
1001 	/* wait for IB accomplished */
1002 	r = amdgpu_cs_query_fence_status(&fence_status,
1003 					 AMDGPU_TIMEOUT_INFINITE,
1004 					 0, &expired);
1005 	CU_ASSERT_EQUAL(r, 0);
1006 	CU_ASSERT_EQUAL(expired, true);
1007 
1008 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1009 				     ib_result_mc_address, 4096);
1010 	CU_ASSERT_EQUAL(r, 0);
1011 }
1012 
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1013 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1014 {
1015 	const int sdma_write_length = 128;
1016 	const int pm4_dw = 256;
1017 	amdgpu_context_handle context_handle;
1018 	amdgpu_bo_handle bo;
1019 	amdgpu_bo_handle *resources;
1020 	uint32_t *pm4;
1021 	struct amdgpu_cs_ib_info *ib_info;
1022 	struct amdgpu_cs_request *ibs_request;
1023 	uint64_t bo_mc;
1024 	volatile uint32_t *bo_cpu;
1025 	int i, j, r, loop, ring_id;
1026 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1027 	amdgpu_va_handle va_handle;
1028 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1029 
1030 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1031 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1032 
1033 	ib_info = calloc(1, sizeof(*ib_info));
1034 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1035 
1036 	ibs_request = calloc(1, sizeof(*ibs_request));
1037 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1038 
1039 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1040 	CU_ASSERT_EQUAL(r, 0);
1041 
1042 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1043 	CU_ASSERT_EQUAL(r, 0);
1044 
1045 	/* prepare resource */
1046 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1047 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1048 
1049 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1050 		loop = 0;
1051 		while(loop < 2) {
1052 			/* allocate UC bo for sDMA use */
1053 			r = amdgpu_bo_alloc_and_map(device_handle,
1054 						    sdma_write_length * sizeof(uint32_t),
1055 						    4096, AMDGPU_GEM_DOMAIN_GTT,
1056 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1057 						    &bo_mc, &va_handle);
1058 			CU_ASSERT_EQUAL(r, 0);
1059 
1060 			/* clear bo */
1061 			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1062 
1063 			resources[0] = bo;
1064 
1065 			/* fulfill PM4: test DMA write-linear */
1066 			i = j = 0;
1067 			if (ip_type == AMDGPU_HW_IP_DMA) {
1068 				if (family_id == AMDGPU_FAMILY_SI)
1069 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1070 								  sdma_write_length);
1071 				else
1072 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1073 							       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1074 				pm4[i++] = 0xffffffff & bo_mc;
1075 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1076 				if (family_id >= AMDGPU_FAMILY_AI)
1077 					pm4[i++] = sdma_write_length - 1;
1078 				else if (family_id != AMDGPU_FAMILY_SI)
1079 					pm4[i++] = sdma_write_length;
1080 				while(j++ < sdma_write_length)
1081 					pm4[i++] = 0xdeadbeaf;
1082 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1083 				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1084 				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1085 				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1086 				pm4[i++] = 0xfffffffc & bo_mc;
1087 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1088 				while(j++ < sdma_write_length)
1089 					pm4[i++] = 0xdeadbeaf;
1090 			}
1091 
1092 			amdgpu_test_exec_cs_helper(context_handle,
1093 						   ip_type, ring_id,
1094 						   i, pm4,
1095 						   1, resources,
1096 						   ib_info, ibs_request);
1097 
1098 			/* verify if SDMA test result meets with expected */
1099 			i = 0;
1100 			while(i < sdma_write_length) {
1101 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1102 			}
1103 
1104 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1105 						     sdma_write_length * sizeof(uint32_t));
1106 			CU_ASSERT_EQUAL(r, 0);
1107 			loop++;
1108 		}
1109 	}
1110 	/* clean resources */
1111 	free(resources);
1112 	free(ibs_request);
1113 	free(ib_info);
1114 	free(pm4);
1115 
1116 	/* end of test */
1117 	r = amdgpu_cs_ctx_free(context_handle);
1118 	CU_ASSERT_EQUAL(r, 0);
1119 }
1120 
amdgpu_command_submission_sdma_write_linear(void)1121 static void amdgpu_command_submission_sdma_write_linear(void)
1122 {
1123 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1124 }
1125 
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1126 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1127 {
1128 	const int sdma_write_length = 1024 * 1024;
1129 	const int pm4_dw = 256;
1130 	amdgpu_context_handle context_handle;
1131 	amdgpu_bo_handle bo;
1132 	amdgpu_bo_handle *resources;
1133 	uint32_t *pm4;
1134 	struct amdgpu_cs_ib_info *ib_info;
1135 	struct amdgpu_cs_request *ibs_request;
1136 	uint64_t bo_mc;
1137 	volatile uint32_t *bo_cpu;
1138 	int i, j, r, loop, ring_id;
1139 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1140 	amdgpu_va_handle va_handle;
1141 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1142 
1143 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1144 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1145 
1146 	ib_info = calloc(1, sizeof(*ib_info));
1147 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1148 
1149 	ibs_request = calloc(1, sizeof(*ibs_request));
1150 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1151 
1152 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1153 	CU_ASSERT_EQUAL(r, 0);
1154 
1155 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1156 	CU_ASSERT_EQUAL(r, 0);
1157 
1158 	/* prepare resource */
1159 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1160 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1161 
1162 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1163 		loop = 0;
1164 		while(loop < 2) {
1165 			/* allocate UC bo for sDMA use */
1166 			r = amdgpu_bo_alloc_and_map(device_handle,
1167 						    sdma_write_length, 4096,
1168 						    AMDGPU_GEM_DOMAIN_GTT,
1169 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1170 						    &bo_mc, &va_handle);
1171 			CU_ASSERT_EQUAL(r, 0);
1172 
1173 			/* clear bo */
1174 			memset((void*)bo_cpu, 0, sdma_write_length);
1175 
1176 			resources[0] = bo;
1177 
1178 			/* fulfill PM4: test DMA const fill */
1179 			i = j = 0;
1180 			if (ip_type == AMDGPU_HW_IP_DMA) {
1181 				if (family_id == AMDGPU_FAMILY_SI) {
1182 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1183 								  0, 0, 0,
1184 								  sdma_write_length / 4);
1185 					pm4[i++] = 0xfffffffc & bo_mc;
1186 					pm4[i++] = 0xdeadbeaf;
1187 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1188 				} else {
1189 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1190 							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1191 					pm4[i++] = 0xffffffff & bo_mc;
1192 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1193 					pm4[i++] = 0xdeadbeaf;
1194 					if (family_id >= AMDGPU_FAMILY_AI)
1195 						pm4[i++] = sdma_write_length - 1;
1196 					else
1197 						pm4[i++] = sdma_write_length;
1198 				}
1199 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1200 				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1201 				if (family_id == AMDGPU_FAMILY_SI) {
1202 					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1203 					pm4[i++] = 0xdeadbeaf;
1204 					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1205 						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1206 						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1207 						   PACKET3_DMA_DATA_SI_CP_SYNC;
1208 					pm4[i++] = 0xffffffff & bo_mc;
1209 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1210 					pm4[i++] = sdma_write_length;
1211 				} else {
1212 					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1213 					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1214 						   PACKET3_DMA_DATA_DST_SEL(0) |
1215 						   PACKET3_DMA_DATA_SRC_SEL(2) |
1216 						   PACKET3_DMA_DATA_CP_SYNC;
1217 					pm4[i++] = 0xdeadbeaf;
1218 					pm4[i++] = 0;
1219 					pm4[i++] = 0xfffffffc & bo_mc;
1220 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1221 					pm4[i++] = sdma_write_length;
1222 				}
1223 			}
1224 
1225 			amdgpu_test_exec_cs_helper(context_handle,
1226 						   ip_type, ring_id,
1227 						   i, pm4,
1228 						   1, resources,
1229 						   ib_info, ibs_request);
1230 
1231 			/* verify if SDMA test result meets with expected */
1232 			i = 0;
1233 			while(i < (sdma_write_length / 4)) {
1234 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1235 			}
1236 
1237 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1238 						     sdma_write_length);
1239 			CU_ASSERT_EQUAL(r, 0);
1240 			loop++;
1241 		}
1242 	}
1243 	/* clean resources */
1244 	free(resources);
1245 	free(ibs_request);
1246 	free(ib_info);
1247 	free(pm4);
1248 
1249 	/* end of test */
1250 	r = amdgpu_cs_ctx_free(context_handle);
1251 	CU_ASSERT_EQUAL(r, 0);
1252 }
1253 
amdgpu_command_submission_sdma_const_fill(void)1254 static void amdgpu_command_submission_sdma_const_fill(void)
1255 {
1256 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1257 }
1258 
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1259 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1260 {
1261 	const int sdma_write_length = 1024;
1262 	const int pm4_dw = 256;
1263 	amdgpu_context_handle context_handle;
1264 	amdgpu_bo_handle bo1, bo2;
1265 	amdgpu_bo_handle *resources;
1266 	uint32_t *pm4;
1267 	struct amdgpu_cs_ib_info *ib_info;
1268 	struct amdgpu_cs_request *ibs_request;
1269 	uint64_t bo1_mc, bo2_mc;
1270 	volatile unsigned char *bo1_cpu, *bo2_cpu;
1271 	int i, j, r, loop1, loop2, ring_id;
1272 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1273 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1274 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1275 
1276 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1277 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1278 
1279 	ib_info = calloc(1, sizeof(*ib_info));
1280 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1281 
1282 	ibs_request = calloc(1, sizeof(*ibs_request));
1283 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1284 
1285 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1286 	CU_ASSERT_EQUAL(r, 0);
1287 
1288 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1289 	CU_ASSERT_EQUAL(r, 0);
1290 
1291 	/* prepare resource */
1292 	resources = calloc(2, sizeof(amdgpu_bo_handle));
1293 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1294 
1295 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1296 		loop1 = loop2 = 0;
1297 		/* run 9 circle to test all mapping combination */
1298 		while(loop1 < 2) {
1299 			while(loop2 < 2) {
1300 				/* allocate UC bo1for sDMA use */
1301 				r = amdgpu_bo_alloc_and_map(device_handle,
1302 							    sdma_write_length, 4096,
1303 							    AMDGPU_GEM_DOMAIN_GTT,
1304 							    gtt_flags[loop1], &bo1,
1305 							    (void**)&bo1_cpu, &bo1_mc,
1306 							    &bo1_va_handle);
1307 				CU_ASSERT_EQUAL(r, 0);
1308 
1309 				/* set bo1 */
1310 				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1311 
1312 				/* allocate UC bo2 for sDMA use */
1313 				r = amdgpu_bo_alloc_and_map(device_handle,
1314 							    sdma_write_length, 4096,
1315 							    AMDGPU_GEM_DOMAIN_GTT,
1316 							    gtt_flags[loop2], &bo2,
1317 							    (void**)&bo2_cpu, &bo2_mc,
1318 							    &bo2_va_handle);
1319 				CU_ASSERT_EQUAL(r, 0);
1320 
1321 				/* clear bo2 */
1322 				memset((void*)bo2_cpu, 0, sdma_write_length);
1323 
1324 				resources[0] = bo1;
1325 				resources[1] = bo2;
1326 
1327 				/* fulfill PM4: test DMA copy linear */
1328 				i = j = 0;
1329 				if (ip_type == AMDGPU_HW_IP_DMA) {
1330 					if (family_id == AMDGPU_FAMILY_SI) {
1331 						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1332 									  0, 0, 0,
1333 									  sdma_write_length);
1334 						pm4[i++] = 0xffffffff & bo2_mc;
1335 						pm4[i++] = 0xffffffff & bo1_mc;
1336 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1337 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1338 					} else {
1339 						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1340 								       SDMA_COPY_SUB_OPCODE_LINEAR,
1341 								       0);
1342 						if (family_id >= AMDGPU_FAMILY_AI)
1343 							pm4[i++] = sdma_write_length - 1;
1344 						else
1345 							pm4[i++] = sdma_write_length;
1346 						pm4[i++] = 0;
1347 						pm4[i++] = 0xffffffff & bo1_mc;
1348 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1349 						pm4[i++] = 0xffffffff & bo2_mc;
1350 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1351 					}
1352 				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1353 					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1354 					if (family_id == AMDGPU_FAMILY_SI) {
1355 						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1356 						pm4[i++] = 0xfffffffc & bo1_mc;
1357 						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1358 							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1359 							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1360 							   PACKET3_DMA_DATA_SI_CP_SYNC |
1361 							   (0xffff00000000 & bo1_mc) >> 32;
1362 						pm4[i++] = 0xfffffffc & bo2_mc;
1363 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1364 						pm4[i++] = sdma_write_length;
1365 					} else {
1366 						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1367 						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1368 							   PACKET3_DMA_DATA_DST_SEL(0) |
1369 							   PACKET3_DMA_DATA_SRC_SEL(0) |
1370 							   PACKET3_DMA_DATA_CP_SYNC;
1371 						pm4[i++] = 0xfffffffc & bo1_mc;
1372 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1373 						pm4[i++] = 0xfffffffc & bo2_mc;
1374 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1375 						pm4[i++] = sdma_write_length;
1376 					}
1377 				}
1378 
1379 				amdgpu_test_exec_cs_helper(context_handle,
1380 							   ip_type, ring_id,
1381 							   i, pm4,
1382 							   2, resources,
1383 							   ib_info, ibs_request);
1384 
1385 				/* verify if SDMA test result meets with expected */
1386 				i = 0;
1387 				while(i < sdma_write_length) {
1388 					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1389 				}
1390 				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1391 							     sdma_write_length);
1392 				CU_ASSERT_EQUAL(r, 0);
1393 				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1394 							     sdma_write_length);
1395 				CU_ASSERT_EQUAL(r, 0);
1396 				loop2++;
1397 			}
1398 			loop1++;
1399 		}
1400 	}
1401 	/* clean resources */
1402 	free(resources);
1403 	free(ibs_request);
1404 	free(ib_info);
1405 	free(pm4);
1406 
1407 	/* end of test */
1408 	r = amdgpu_cs_ctx_free(context_handle);
1409 	CU_ASSERT_EQUAL(r, 0);
1410 }
1411 
amdgpu_command_submission_sdma_copy_linear(void)1412 static void amdgpu_command_submission_sdma_copy_linear(void)
1413 {
1414 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1415 }
1416 
amdgpu_command_submission_sdma(void)1417 static void amdgpu_command_submission_sdma(void)
1418 {
1419 	amdgpu_command_submission_sdma_write_linear();
1420 	amdgpu_command_submission_sdma_const_fill();
1421 	amdgpu_command_submission_sdma_copy_linear();
1422 }
1423 
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1424 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1425 {
1426 	amdgpu_context_handle context_handle;
1427 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1428 	void *ib_result_cpu, *ib_result_ce_cpu;
1429 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1430 	struct amdgpu_cs_request ibs_request[2] = {0};
1431 	struct amdgpu_cs_ib_info ib_info[2];
1432 	struct amdgpu_cs_fence fence_status[2] = {0};
1433 	uint32_t *ptr;
1434 	uint32_t expired;
1435 	amdgpu_bo_list_handle bo_list;
1436 	amdgpu_va_handle va_handle, va_handle_ce;
1437 	int r;
1438 	int i = 0, ib_cs_num = 2;
1439 
1440 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1441 	CU_ASSERT_EQUAL(r, 0);
1442 
1443 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1444 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1445 				    &ib_result_handle, &ib_result_cpu,
1446 				    &ib_result_mc_address, &va_handle);
1447 	CU_ASSERT_EQUAL(r, 0);
1448 
1449 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1450 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1451 				    &ib_result_ce_handle, &ib_result_ce_cpu,
1452 				    &ib_result_ce_mc_address, &va_handle_ce);
1453 	CU_ASSERT_EQUAL(r, 0);
1454 
1455 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1456 			       ib_result_ce_handle, &bo_list);
1457 	CU_ASSERT_EQUAL(r, 0);
1458 
1459 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1460 
1461 	/* IT_SET_CE_DE_COUNTERS */
1462 	ptr = ib_result_ce_cpu;
1463 	if (family_id != AMDGPU_FAMILY_SI) {
1464 		ptr[i++] = 0xc0008900;
1465 		ptr[i++] = 0;
1466 	}
1467 	ptr[i++] = 0xc0008400;
1468 	ptr[i++] = 1;
1469 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1470 	ib_info[0].size = i;
1471 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1472 
1473 	/* IT_WAIT_ON_CE_COUNTER */
1474 	ptr = ib_result_cpu;
1475 	ptr[0] = 0xc0008600;
1476 	ptr[1] = 0x00000001;
1477 	ib_info[1].ib_mc_address = ib_result_mc_address;
1478 	ib_info[1].size = 2;
1479 
1480 	for (i = 0; i < ib_cs_num; i++) {
1481 		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1482 		ibs_request[i].number_of_ibs = 2;
1483 		ibs_request[i].ibs = ib_info;
1484 		ibs_request[i].resources = bo_list;
1485 		ibs_request[i].fence_info.handle = NULL;
1486 	}
1487 
1488 	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1489 
1490 	CU_ASSERT_EQUAL(r, 0);
1491 
1492 	for (i = 0; i < ib_cs_num; i++) {
1493 		fence_status[i].context = context_handle;
1494 		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1495 		fence_status[i].fence = ibs_request[i].seq_no;
1496 	}
1497 
1498 	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1499 				AMDGPU_TIMEOUT_INFINITE,
1500 				&expired, NULL);
1501 	CU_ASSERT_EQUAL(r, 0);
1502 
1503 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1504 				     ib_result_mc_address, 4096);
1505 	CU_ASSERT_EQUAL(r, 0);
1506 
1507 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1508 				     ib_result_ce_mc_address, 4096);
1509 	CU_ASSERT_EQUAL(r, 0);
1510 
1511 	r = amdgpu_bo_list_destroy(bo_list);
1512 	CU_ASSERT_EQUAL(r, 0);
1513 
1514 	r = amdgpu_cs_ctx_free(context_handle);
1515 	CU_ASSERT_EQUAL(r, 0);
1516 }
1517 
amdgpu_command_submission_multi_fence(void)1518 static void amdgpu_command_submission_multi_fence(void)
1519 {
1520 	amdgpu_command_submission_multi_fence_wait_all(true);
1521 	amdgpu_command_submission_multi_fence_wait_all(false);
1522 }
1523 
amdgpu_userptr_test(void)1524 static void amdgpu_userptr_test(void)
1525 {
1526 	int i, r, j;
1527 	uint32_t *pm4 = NULL;
1528 	uint64_t bo_mc;
1529 	void *ptr = NULL;
1530 	int pm4_dw = 256;
1531 	int sdma_write_length = 4;
1532 	amdgpu_bo_handle handle;
1533 	amdgpu_context_handle context_handle;
1534 	struct amdgpu_cs_ib_info *ib_info;
1535 	struct amdgpu_cs_request *ibs_request;
1536 	amdgpu_bo_handle buf_handle;
1537 	amdgpu_va_handle va_handle;
1538 
1539 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1540 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1541 
1542 	ib_info = calloc(1, sizeof(*ib_info));
1543 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1544 
1545 	ibs_request = calloc(1, sizeof(*ibs_request));
1546 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1547 
1548 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1549 	CU_ASSERT_EQUAL(r, 0);
1550 
1551 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
1552 	CU_ASSERT_NOT_EQUAL(ptr, NULL);
1553 	memset(ptr, 0, BUFFER_SIZE);
1554 
1555 	r = amdgpu_create_bo_from_user_mem(device_handle,
1556 					   ptr, BUFFER_SIZE, &buf_handle);
1557 	CU_ASSERT_EQUAL(r, 0);
1558 
1559 	r = amdgpu_va_range_alloc(device_handle,
1560 				  amdgpu_gpu_va_range_general,
1561 				  BUFFER_SIZE, 1, 0, &bo_mc,
1562 				  &va_handle, 0);
1563 	CU_ASSERT_EQUAL(r, 0);
1564 
1565 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
1566 	CU_ASSERT_EQUAL(r, 0);
1567 
1568 	handle = buf_handle;
1569 
1570 	j = i = 0;
1571 
1572 	if (family_id == AMDGPU_FAMILY_SI)
1573 		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1574 				sdma_write_length);
1575 	else
1576 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1577 				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1578 	pm4[i++] = 0xffffffff & bo_mc;
1579 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1580 	if (family_id >= AMDGPU_FAMILY_AI)
1581 		pm4[i++] = sdma_write_length - 1;
1582 	else if (family_id != AMDGPU_FAMILY_SI)
1583 		pm4[i++] = sdma_write_length;
1584 
1585 	while (j++ < sdma_write_length)
1586 		pm4[i++] = 0xdeadbeaf;
1587 
1588 	if (!fork()) {
1589 		pm4[0] = 0x0;
1590 		exit(0);
1591 	}
1592 
1593 	amdgpu_test_exec_cs_helper(context_handle,
1594 				   AMDGPU_HW_IP_DMA, 0,
1595 				   i, pm4,
1596 				   1, &handle,
1597 				   ib_info, ibs_request);
1598 	i = 0;
1599 	while (i < sdma_write_length) {
1600 		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
1601 	}
1602 	free(ibs_request);
1603 	free(ib_info);
1604 	free(pm4);
1605 
1606 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
1607 	CU_ASSERT_EQUAL(r, 0);
1608 	r = amdgpu_va_range_free(va_handle);
1609 	CU_ASSERT_EQUAL(r, 0);
1610 	r = amdgpu_bo_free(buf_handle);
1611 	CU_ASSERT_EQUAL(r, 0);
1612 	free(ptr);
1613 
1614 	r = amdgpu_cs_ctx_free(context_handle);
1615 	CU_ASSERT_EQUAL(r, 0);
1616 
1617 	wait(NULL);
1618 }
1619 
amdgpu_sync_dependency_test(void)1620 static void amdgpu_sync_dependency_test(void)
1621 {
1622 	amdgpu_context_handle context_handle[2];
1623 	amdgpu_bo_handle ib_result_handle;
1624 	void *ib_result_cpu;
1625 	uint64_t ib_result_mc_address;
1626 	struct amdgpu_cs_request ibs_request;
1627 	struct amdgpu_cs_ib_info ib_info;
1628 	struct amdgpu_cs_fence fence_status;
1629 	uint32_t expired;
1630 	int i, j, r;
1631 	amdgpu_bo_list_handle bo_list;
1632 	amdgpu_va_handle va_handle;
1633 	static uint32_t *ptr;
1634 	uint64_t seq_no;
1635 
1636 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
1637 	CU_ASSERT_EQUAL(r, 0);
1638 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
1639 	CU_ASSERT_EQUAL(r, 0);
1640 
1641 	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
1642 			AMDGPU_GEM_DOMAIN_GTT, 0,
1643 						    &ib_result_handle, &ib_result_cpu,
1644 						    &ib_result_mc_address, &va_handle);
1645 	CU_ASSERT_EQUAL(r, 0);
1646 
1647 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1648 			       &bo_list);
1649 	CU_ASSERT_EQUAL(r, 0);
1650 
1651 	ptr = ib_result_cpu;
1652 	i = 0;
1653 
1654 	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
1655 
1656 	/* Dispatch minimal init config and verify it's executed */
1657 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
1658 	ptr[i++] = 0x80000000;
1659 	ptr[i++] = 0x80000000;
1660 
1661 	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
1662 	ptr[i++] = 0x80000000;
1663 
1664 
1665 	/* Program compute regs */
1666 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1667 	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1668 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
1669 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
1670 
1671 
1672 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1673 	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
1674 	/*
1675 	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
1676 	                                      SGPRS = 1
1677 	                                      PRIORITY = 0
1678 	                                      FLOAT_MODE = 192 (0xc0)
1679 	                                      PRIV = 0
1680 	                                      DX10_CLAMP = 1
1681 	                                      DEBUG_MODE = 0
1682 	                                      IEEE_MODE = 0
1683 	                                      BULKY = 0
1684 	                                      CDBG_USER = 0
1685 	 *
1686 	 */
1687 	ptr[i++] = 0x002c0040;
1688 
1689 
1690 	/*
1691 	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
1692 	                                      USER_SGPR = 8
1693 	                                      TRAP_PRESENT = 0
1694 	                                      TGID_X_EN = 0
1695 	                                      TGID_Y_EN = 0
1696 	                                      TGID_Z_EN = 0
1697 	                                      TG_SIZE_EN = 0
1698 	                                      TIDIG_COMP_CNT = 0
1699 	                                      EXCP_EN_MSB = 0
1700 	                                      LDS_SIZE = 0
1701 	                                      EXCP_EN = 0
1702 	 *
1703 	 */
1704 	ptr[i++] = 0x00000010;
1705 
1706 
1707 /*
1708  * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
1709                                          WAVESIZE = 0
1710  *
1711  */
1712 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1713 	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
1714 	ptr[i++] = 0x00000100;
1715 
1716 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1717 	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
1718 	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
1719 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
1720 
1721 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1722 	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
1723 	ptr[i++] = 0;
1724 
1725 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
1726 	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
1727 	ptr[i++] = 1;
1728 	ptr[i++] = 1;
1729 	ptr[i++] = 1;
1730 
1731 
1732 	/* Dispatch */
1733 	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1734 	ptr[i++] = 1;
1735 	ptr[i++] = 1;
1736 	ptr[i++] = 1;
1737 	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
1738 
1739 
1740 	while (i & 7)
1741 		ptr[i++] =  0xffff1000; /* type3 nop packet */
1742 
1743 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1744 	ib_info.ib_mc_address = ib_result_mc_address;
1745 	ib_info.size = i;
1746 
1747 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1748 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1749 	ibs_request.ring = 0;
1750 	ibs_request.number_of_ibs = 1;
1751 	ibs_request.ibs = &ib_info;
1752 	ibs_request.resources = bo_list;
1753 	ibs_request.fence_info.handle = NULL;
1754 
1755 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
1756 	CU_ASSERT_EQUAL(r, 0);
1757 	seq_no = ibs_request.seq_no;
1758 
1759 
1760 
1761 	/* Prepare second command with dependency on the first */
1762 	j = i;
1763 	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
1764 	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1765 	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
1766 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
1767 	ptr[i++] = 99;
1768 
1769 	while (i & 7)
1770 		ptr[i++] =  0xffff1000; /* type3 nop packet */
1771 
1772 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1773 	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
1774 	ib_info.size = i - j;
1775 
1776 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1777 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1778 	ibs_request.ring = 0;
1779 	ibs_request.number_of_ibs = 1;
1780 	ibs_request.ibs = &ib_info;
1781 	ibs_request.resources = bo_list;
1782 	ibs_request.fence_info.handle = NULL;
1783 
1784 	ibs_request.number_of_dependencies = 1;
1785 
1786 	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
1787 	ibs_request.dependencies[0].context = context_handle[1];
1788 	ibs_request.dependencies[0].ip_instance = 0;
1789 	ibs_request.dependencies[0].ring = 0;
1790 	ibs_request.dependencies[0].fence = seq_no;
1791 
1792 
1793 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
1794 	CU_ASSERT_EQUAL(r, 0);
1795 
1796 
1797 	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1798 	fence_status.context = context_handle[0];
1799 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1800 	fence_status.ip_instance = 0;
1801 	fence_status.ring = 0;
1802 	fence_status.fence = ibs_request.seq_no;
1803 
1804 	r = amdgpu_cs_query_fence_status(&fence_status,
1805 		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
1806 	CU_ASSERT_EQUAL(r, 0);
1807 
1808 	/* Expect the second command to wait for shader to complete */
1809 	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
1810 
1811 	r = amdgpu_bo_list_destroy(bo_list);
1812 	CU_ASSERT_EQUAL(r, 0);
1813 
1814 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1815 				     ib_result_mc_address, 4096);
1816 	CU_ASSERT_EQUAL(r, 0);
1817 
1818 	r = amdgpu_cs_ctx_free(context_handle[0]);
1819 	CU_ASSERT_EQUAL(r, 0);
1820 	r = amdgpu_cs_ctx_free(context_handle[1]);
1821 	CU_ASSERT_EQUAL(r, 0);
1822 
1823 	free(ibs_request.dependencies);
1824 }
1825