1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37 
38 #include "CUnit/Basic.h"
39 
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "amdgpu_internal.h"
43 #include "util_math.h"
44 
45 static  amdgpu_device_handle device_handle;
46 static  uint32_t  major_version;
47 static  uint32_t  minor_version;
48 static  uint32_t  family_id;
49 
50 static void amdgpu_query_info_test(void);
51 static void amdgpu_command_submission_gfx(void);
52 static void amdgpu_command_submission_compute(void);
53 static void amdgpu_command_submission_multi_fence(void);
54 static void amdgpu_command_submission_sdma(void);
55 static void amdgpu_userptr_test(void);
56 static void amdgpu_semaphore_test(void);
57 static void amdgpu_sync_dependency_test(void);
58 static void amdgpu_bo_eviction_test(void);
59 static void amdgpu_compute_dispatch_test(void);
60 static void amdgpu_gfx_dispatch_test(void);
61 static void amdgpu_draw_test(void);
62 static void amdgpu_gpu_reset_test(void);
63 
64 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
65 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
66 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
67 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
68 				       unsigned ip_type,
69 				       int instance, int pm4_dw, uint32_t *pm4_src,
70 				       int res_cnt, amdgpu_bo_handle *resources,
71 				       struct amdgpu_cs_ib_info *ib_info,
72 				       struct amdgpu_cs_request *ibs_request);
73 
74 CU_TestInfo basic_tests[] = {
75 	{ "Query Info Test",  amdgpu_query_info_test },
76 	{ "Userptr Test",  amdgpu_userptr_test },
77 	{ "bo eviction Test",  amdgpu_bo_eviction_test },
78 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
79 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
80 	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
81 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
82 	{ "SW semaphore Test",  amdgpu_semaphore_test },
83 	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
84 	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
85 	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
86 	{ "Draw Test",  amdgpu_draw_test },
87 	{ "GPU reset Test", amdgpu_gpu_reset_test },
88 	CU_TEST_INFO_NULL,
89 };
90 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
91 #define SDMA_PKT_HEADER_op_offset 0
92 #define SDMA_PKT_HEADER_op_mask   0x000000FF
93 #define SDMA_PKT_HEADER_op_shift  0
94 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
95 #define SDMA_OPCODE_CONSTANT_FILL  11
96 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
97 	/* 0 = byte fill
98 	 * 2 = DW fill
99 	 */
100 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
101 					(((sub_op) & 0xFF) << 8) |	\
102 					(((op) & 0xFF) << 0))
103 #define	SDMA_OPCODE_WRITE				  2
104 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
105 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
106 
107 #define	SDMA_OPCODE_COPY				  1
108 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
109 
110 #define	SDMA_OPCODE_ATOMIC				  10
111 #		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
112         /* 0 - single_pass_atomic.
113          * 1 - loop_until_compare_satisfied.
114          */
115 #		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
116 		/* 0 - non-TMZ.
117 		 * 1 - TMZ.
118 	     */
119 #		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
120 		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
121 		 * same as Packet 3
122 		 */
123 
124 #define GFX_COMPUTE_NOP  0xffff1000
125 #define SDMA_NOP  0x0
126 
127 /* PM4 */
128 #define	PACKET_TYPE0	0
129 #define	PACKET_TYPE1	1
130 #define	PACKET_TYPE2	2
131 #define	PACKET_TYPE3	3
132 
133 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
134 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
135 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
136 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
137 #define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
138 			 ((reg) & 0xFFFF) |			\
139 			 ((n) & 0x3FFF) << 16)
140 #define CP_PACKET2			0x80000000
141 #define		PACKET2_PAD_SHIFT		0
142 #define		PACKET2_PAD_MASK		(0x3fffffff << 0)
143 
144 #define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
145 
146 #define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
147 			 (((op) & 0xFF) << 8) |				\
148 			 ((n) & 0x3FFF) << 16)
149 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
150 
151 /* Packet 3 types */
152 #define	PACKET3_NOP					0x10
153 
154 #define	PACKET3_WRITE_DATA				0x37
155 #define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
156 		/* 0 - register
157 		 * 1 - memory (sync - via GRBM)
158 		 * 2 - gl2
159 		 * 3 - gds
160 		 * 4 - reserved
161 		 * 5 - memory (async - direct)
162 		 */
163 #define		WR_ONE_ADDR                             (1 << 16)
164 #define		WR_CONFIRM                              (1 << 20)
165 #define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
166 		/* 0 - LRU
167 		 * 1 - Stream
168 		 */
169 #define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
170 		/* 0 - me
171 		 * 1 - pfp
172 		 * 2 - ce
173 		 */
174 
175 #define	PACKET3_ATOMIC_MEM				0x1E
176 #define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
177 #define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
178             /* 0 - single_pass_atomic.
179              * 1 - loop_until_compare_satisfied.
180              */
181 #define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
182             /* 0 - lru.
183              * 1 - stream.
184              */
185 #define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
186             /* 0 - micro_engine.
187 			 */
188 
189 #define	PACKET3_DMA_DATA				0x50
190 /* 1. header
191  * 2. CONTROL
192  * 3. SRC_ADDR_LO or DATA [31:0]
193  * 4. SRC_ADDR_HI [31:0]
194  * 5. DST_ADDR_LO [31:0]
195  * 6. DST_ADDR_HI [7:0]
196  * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
197  */
198 /* CONTROL */
199 #              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
200 		/* 0 - ME
201 		 * 1 - PFP
202 		 */
203 #              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
204 		/* 0 - LRU
205 		 * 1 - Stream
206 		 * 2 - Bypass
207 		 */
208 #              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
209 #              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
210 		/* 0 - DST_ADDR using DAS
211 		 * 1 - GDS
212 		 * 3 - DST_ADDR using L2
213 		 */
214 #              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
215 		/* 0 - LRU
216 		 * 1 - Stream
217 		 * 2 - Bypass
218 		 */
219 #              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
220 #              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
221 		/* 0 - SRC_ADDR using SAS
222 		 * 1 - GDS
223 		 * 2 - DATA
224 		 * 3 - SRC_ADDR using L2
225 		 */
226 #              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
227 /* COMMAND */
228 #              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
229 #              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
230 		/* 0 - none
231 		 * 1 - 8 in 16
232 		 * 2 - 8 in 32
233 		 * 3 - 8 in 64
234 		 */
235 #              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
236 		/* 0 - none
237 		 * 1 - 8 in 16
238 		 * 2 - 8 in 32
239 		 * 3 - 8 in 64
240 		 */
241 #              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
242 		/* 0 - memory
243 		 * 1 - register
244 		 */
245 #              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
246 		/* 0 - memory
247 		 * 1 - register
248 		 */
249 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
250 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
251 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
252 
253 #define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
254 						(((b) & 0x1) << 26) |		\
255 						(((t) & 0x1) << 23) |		\
256 						(((s) & 0x1) << 22) |		\
257 						(((cnt) & 0xFFFFF) << 0))
258 #define	SDMA_OPCODE_COPY_SI	3
259 #define SDMA_OPCODE_CONSTANT_FILL_SI	13
260 #define SDMA_NOP_SI  0xf
261 #define GFX_COMPUTE_NOP_SI 0x80000000
262 #define	PACKET3_DMA_DATA_SI	0x41
263 #              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
264 		/* 0 - ME
265 		 * 1 - PFP
266 		 */
267 #              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
268 		/* 0 - DST_ADDR using DAS
269 		 * 1 - GDS
270 		 * 3 - DST_ADDR using L2
271 		 */
272 #              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
273 		/* 0 - SRC_ADDR using SAS
274 		 * 1 - GDS
275 		 * 2 - DATA
276 		 * 3 - SRC_ADDR using L2
277 		 */
278 #              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
279 
280 
281 #define PKT3_CONTEXT_CONTROL                   0x28
282 #define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
283 #define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
284 #define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
285 
286 #define PKT3_CLEAR_STATE                       0x12
287 
288 #define PKT3_SET_SH_REG                        0x76
289 #define		PACKET3_SET_SH_REG_START			0x00002c00
290 
291 #define	PACKET3_DISPATCH_DIRECT				0x15
292 #define PACKET3_EVENT_WRITE				0x46
293 #define PACKET3_ACQUIRE_MEM				0x58
294 #define PACKET3_SET_CONTEXT_REG				0x69
295 #define PACKET3_SET_UCONFIG_REG				0x79
296 #define PACKET3_DRAW_INDEX_AUTO				0x2D
297 /* gfx 8 */
298 #define mmCOMPUTE_PGM_LO                                                        0x2e0c
299 #define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
300 #define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
301 #define mmCOMPUTE_USER_DATA_0                                                   0x2e40
302 #define mmCOMPUTE_USER_DATA_1                                                   0x2e41
303 #define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
304 #define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
305 
306 
307 
308 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
309 		      ((num & 0x0000ff00) << 8) | \
310 		      ((num & 0x00ff0000) >> 8) | \
311 		      ((num & 0x000000ff) << 24))
312 
313 
314 /* Shader code
315  * void main()
316 {
317 
318 	float x = some_input;
319 		for (unsigned i = 0; i < 1000000; i++)
320   	x = sin(x);
321 
322 	u[0] = 42u;
323 }
324 */
325 
326 static  uint32_t shader_bin[] = {
327 	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
328 	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
329 	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
330 	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
331 };
332 
333 #define CODE_OFFSET 512
334 #define DATA_OFFSET 1024
335 
336 enum cs_type {
337 	CS_BUFFERCLEAR,
338 	CS_BUFFERCOPY,
339 	CS_HANG,
340 	CS_HANG_SLOW
341 };
342 
343 static const uint32_t bufferclear_cs_shader_gfx9[] = {
344     0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
345     0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
346     0xBF810000
347 };
348 
349 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
350 	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
351 	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
352 	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
353 	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
354 	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
355 };
356 
357 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
358 
359 static const uint32_t buffercopy_cs_shader_gfx9[] = {
360     0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
361     0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
362 };
363 
364 static const uint32_t preamblecache_gfx9[] = {
365 	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
366 	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
367 	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
368 	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
369 	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
370 	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
371 	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
372 	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
373 	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
374 	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
375 	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
376 	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
377 	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
378 	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
379 	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
380 	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
381 	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
382 	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
383 	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
384 	0xc0017900, 0x24b, 0x0
385 };
386 
387 enum ps_type {
388 	PS_CONST,
389 	PS_TEX,
390 	PS_HANG,
391 	PS_HANG_SLOW
392 };
393 
394 static const uint32_t ps_const_shader_gfx9[] = {
395     0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
396     0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
397     0xC4001C0F, 0x00000100, 0xBF810000
398 };
399 
400 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
401 
402 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
403     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
404      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
405      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
406      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
407      { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
408      { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
409      { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
410      { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
411      { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
412      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
413     }
414 };
415 
416 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
417     0x00000004
418 };
419 
420 static const uint32_t ps_num_sh_registers_gfx9 = 2;
421 
422 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
423     {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
424     {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
425 };
426 
427 static const uint32_t ps_num_context_registers_gfx9 = 7;
428 
429 static const uint32_t ps_const_context_reg_gfx9[][2] = {
430     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
431     {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
432     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
433     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
434     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
435     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
436     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
437 };
438 
439 static const uint32_t ps_tex_shader_gfx9[] = {
440     0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
441     0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
442     0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
443     0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
444     0x00000100, 0xBF810000
445 };
446 
447 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
448     0x0000000B
449 };
450 
451 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
452 
453 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
454     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
455      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
456      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
457      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
458      { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
459      { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
460      { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
461      { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
462      { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
463      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
464     }
465 };
466 
467 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
468     {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
469     {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
470 };
471 
472 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
473     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
474     {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
475     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
476     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
477     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
478     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
479     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
480 };
481 
482 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
483     0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
484     0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
485     0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
486     0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
487     0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
488     0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
489     0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
490     0xC400020F, 0x05060403, 0xBF810000
491 };
492 
493 static const uint32_t cached_cmd_gfx9[] = {
494 	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
495 	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
496 	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
497 	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
498 	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
499 	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
500 	0xc0026900, 0x292, 0x20, 0x60201b8,
501 	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
502 };
503 
504 unsigned int memcpy_ps_hang[] = {
505         0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
506         0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
507         0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
508         0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
509         0xF800180F, 0x03020100, 0xBF810000
510 };
511 
512 struct amdgpu_test_shader {
513 	uint32_t *shader;
514 	uint32_t header_length;
515 	uint32_t body_length;
516 	uint32_t foot_length;
517 };
518 
519 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
520     0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
521     0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
522 };
523 
524 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
525         memcpy_cs_hang_slow_ai_codes,
526         4,
527         3,
528         1
529 };
530 
531 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
532     0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
533     0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
534 };
535 
536 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
537         memcpy_cs_hang_slow_rv_codes,
538         4,
539         3,
540         1
541 };
542 
543 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
544         0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
545         0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
546         0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
547         0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
548         0x03020100, 0xbf810000
549 };
550 
551 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
552         memcpy_ps_hang_slow_ai_codes,
553         7,
554         2,
555         9
556 };
557 
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)558 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
559 			unsigned alignment, unsigned heap, uint64_t alloc_flags,
560 			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
561 			uint64_t *mc_address,
562 			amdgpu_va_handle *va_handle)
563 {
564 	struct amdgpu_bo_alloc_request request = {};
565 	amdgpu_bo_handle buf_handle;
566 	amdgpu_va_handle handle;
567 	uint64_t vmc_addr;
568 	int r;
569 
570 	request.alloc_size = size;
571 	request.phys_alignment = alignment;
572 	request.preferred_heap = heap;
573 	request.flags = alloc_flags;
574 
575 	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
576 	if (r)
577 		return r;
578 
579 	r = amdgpu_va_range_alloc(dev,
580 				  amdgpu_gpu_va_range_general,
581 				  size, alignment, 0, &vmc_addr,
582 				  &handle, 0);
583 	if (r)
584 		goto error_va_alloc;
585 
586 	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
587 				   AMDGPU_VM_PAGE_READABLE |
588 				   AMDGPU_VM_PAGE_WRITEABLE |
589 				   AMDGPU_VM_PAGE_EXECUTABLE |
590 				   mapping_flags,
591 				   AMDGPU_VA_OP_MAP);
592 	if (r)
593 		goto error_va_map;
594 
595 	r = amdgpu_bo_cpu_map(buf_handle, cpu);
596 	if (r)
597 		goto error_cpu_map;
598 
599 	*bo = buf_handle;
600 	*mc_address = vmc_addr;
601 	*va_handle = handle;
602 
603 	return 0;
604 
605  error_cpu_map:
606 	amdgpu_bo_cpu_unmap(buf_handle);
607 
608  error_va_map:
609 	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
610 
611  error_va_alloc:
612 	amdgpu_bo_free(buf_handle);
613 	return r;
614 }
615 
616 
617 
suite_basic_tests_enable(void)618 CU_BOOL suite_basic_tests_enable(void)
619 {
620 	uint32_t asic_id;
621 
622 	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
623 					     &minor_version, &device_handle))
624 		return CU_FALSE;
625 
626 	asic_id = device_handle->info.asic_id;
627 
628 	if (amdgpu_device_deinitialize(device_handle))
629 		return CU_FALSE;
630 
631 	/* disable gfx engine basic test cases for Arturus due to no CPG */
632 	if (asic_is_arcturus(asic_id)) {
633 		if (amdgpu_set_test_active("Basic Tests",
634 					"Command submission Test (GFX)",
635 					CU_FALSE))
636 			fprintf(stderr, "test deactivation failed - %s\n",
637 				CU_get_error_msg());
638 
639 		if (amdgpu_set_test_active("Basic Tests",
640 					"Command submission Test (Multi-Fence)",
641 					CU_FALSE))
642 			fprintf(stderr, "test deactivation failed - %s\n",
643 				CU_get_error_msg());
644 
645 		if (amdgpu_set_test_active("Basic Tests",
646 					"Sync dependency Test",
647 					CU_FALSE))
648 			fprintf(stderr, "test deactivation failed - %s\n",
649 				CU_get_error_msg());
650 	}
651 
652 	return CU_TRUE;
653 }
654 
suite_basic_tests_init(void)655 int suite_basic_tests_init(void)
656 {
657 	struct amdgpu_gpu_info gpu_info = {0};
658 	int r;
659 
660 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
661 				   &minor_version, &device_handle);
662 
663 	if (r) {
664 		if ((r == -EACCES) && (errno == EACCES))
665 			printf("\n\nError:%s. "
666 				"Hint:Try to run this test program as root.",
667 				strerror(errno));
668 		return CUE_SINIT_FAILED;
669 	}
670 
671 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
672 	if (r)
673 		return CUE_SINIT_FAILED;
674 
675 	family_id = gpu_info.family_id;
676 
677 	return CUE_SUCCESS;
678 }
679 
suite_basic_tests_clean(void)680 int suite_basic_tests_clean(void)
681 {
682 	int r = amdgpu_device_deinitialize(device_handle);
683 
684 	if (r == 0)
685 		return CUE_SUCCESS;
686 	else
687 		return CUE_SCLEAN_FAILED;
688 }
689 
amdgpu_query_info_test(void)690 static void amdgpu_query_info_test(void)
691 {
692 	struct amdgpu_gpu_info gpu_info = {0};
693 	uint32_t version, feature;
694 	int r;
695 
696 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
697 	CU_ASSERT_EQUAL(r, 0);
698 
699 	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
700 					  0, &version, &feature);
701 	CU_ASSERT_EQUAL(r, 0);
702 }
703 
amdgpu_command_submission_gfx_separate_ibs(void)704 static void amdgpu_command_submission_gfx_separate_ibs(void)
705 {
706 	amdgpu_context_handle context_handle;
707 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
708 	void *ib_result_cpu, *ib_result_ce_cpu;
709 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
710 	struct amdgpu_cs_request ibs_request = {0};
711 	struct amdgpu_cs_ib_info ib_info[2];
712 	struct amdgpu_cs_fence fence_status = {0};
713 	uint32_t *ptr;
714 	uint32_t expired;
715 	amdgpu_bo_list_handle bo_list;
716 	amdgpu_va_handle va_handle, va_handle_ce;
717 	int r, i = 0;
718 
719 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
720 	CU_ASSERT_EQUAL(r, 0);
721 
722 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
723 				    AMDGPU_GEM_DOMAIN_GTT, 0,
724 				    &ib_result_handle, &ib_result_cpu,
725 				    &ib_result_mc_address, &va_handle);
726 	CU_ASSERT_EQUAL(r, 0);
727 
728 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
729 				    AMDGPU_GEM_DOMAIN_GTT, 0,
730 				    &ib_result_ce_handle, &ib_result_ce_cpu,
731 				    &ib_result_ce_mc_address, &va_handle_ce);
732 	CU_ASSERT_EQUAL(r, 0);
733 
734 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
735 			       ib_result_ce_handle, &bo_list);
736 	CU_ASSERT_EQUAL(r, 0);
737 
738 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
739 
740 	/* IT_SET_CE_DE_COUNTERS */
741 	ptr = ib_result_ce_cpu;
742 	if (family_id != AMDGPU_FAMILY_SI) {
743 		ptr[i++] = 0xc0008900;
744 		ptr[i++] = 0;
745 	}
746 	ptr[i++] = 0xc0008400;
747 	ptr[i++] = 1;
748 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
749 	ib_info[0].size = i;
750 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
751 
752 	/* IT_WAIT_ON_CE_COUNTER */
753 	ptr = ib_result_cpu;
754 	ptr[0] = 0xc0008600;
755 	ptr[1] = 0x00000001;
756 	ib_info[1].ib_mc_address = ib_result_mc_address;
757 	ib_info[1].size = 2;
758 
759 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
760 	ibs_request.number_of_ibs = 2;
761 	ibs_request.ibs = ib_info;
762 	ibs_request.resources = bo_list;
763 	ibs_request.fence_info.handle = NULL;
764 
765 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
766 
767 	CU_ASSERT_EQUAL(r, 0);
768 
769 	fence_status.context = context_handle;
770 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
771 	fence_status.ip_instance = 0;
772 	fence_status.fence = ibs_request.seq_no;
773 
774 	r = amdgpu_cs_query_fence_status(&fence_status,
775 					 AMDGPU_TIMEOUT_INFINITE,
776 					 0, &expired);
777 	CU_ASSERT_EQUAL(r, 0);
778 
779 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
780 				     ib_result_mc_address, 4096);
781 	CU_ASSERT_EQUAL(r, 0);
782 
783 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
784 				     ib_result_ce_mc_address, 4096);
785 	CU_ASSERT_EQUAL(r, 0);
786 
787 	r = amdgpu_bo_list_destroy(bo_list);
788 	CU_ASSERT_EQUAL(r, 0);
789 
790 	r = amdgpu_cs_ctx_free(context_handle);
791 	CU_ASSERT_EQUAL(r, 0);
792 
793 }
794 
amdgpu_command_submission_gfx_shared_ib(void)795 static void amdgpu_command_submission_gfx_shared_ib(void)
796 {
797 	amdgpu_context_handle context_handle;
798 	amdgpu_bo_handle ib_result_handle;
799 	void *ib_result_cpu;
800 	uint64_t ib_result_mc_address;
801 	struct amdgpu_cs_request ibs_request = {0};
802 	struct amdgpu_cs_ib_info ib_info[2];
803 	struct amdgpu_cs_fence fence_status = {0};
804 	uint32_t *ptr;
805 	uint32_t expired;
806 	amdgpu_bo_list_handle bo_list;
807 	amdgpu_va_handle va_handle;
808 	int r, i = 0;
809 
810 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
811 	CU_ASSERT_EQUAL(r, 0);
812 
813 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
814 				    AMDGPU_GEM_DOMAIN_GTT, 0,
815 				    &ib_result_handle, &ib_result_cpu,
816 				    &ib_result_mc_address, &va_handle);
817 	CU_ASSERT_EQUAL(r, 0);
818 
819 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
820 			       &bo_list);
821 	CU_ASSERT_EQUAL(r, 0);
822 
823 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
824 
825 	/* IT_SET_CE_DE_COUNTERS */
826 	ptr = ib_result_cpu;
827 	if (family_id != AMDGPU_FAMILY_SI) {
828 		ptr[i++] = 0xc0008900;
829 		ptr[i++] = 0;
830 	}
831 	ptr[i++] = 0xc0008400;
832 	ptr[i++] = 1;
833 	ib_info[0].ib_mc_address = ib_result_mc_address;
834 	ib_info[0].size = i;
835 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
836 
837 	ptr = (uint32_t *)ib_result_cpu + 4;
838 	ptr[0] = 0xc0008600;
839 	ptr[1] = 0x00000001;
840 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
841 	ib_info[1].size = 2;
842 
843 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
844 	ibs_request.number_of_ibs = 2;
845 	ibs_request.ibs = ib_info;
846 	ibs_request.resources = bo_list;
847 	ibs_request.fence_info.handle = NULL;
848 
849 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
850 
851 	CU_ASSERT_EQUAL(r, 0);
852 
853 	fence_status.context = context_handle;
854 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
855 	fence_status.ip_instance = 0;
856 	fence_status.fence = ibs_request.seq_no;
857 
858 	r = amdgpu_cs_query_fence_status(&fence_status,
859 					 AMDGPU_TIMEOUT_INFINITE,
860 					 0, &expired);
861 	CU_ASSERT_EQUAL(r, 0);
862 
863 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
864 				     ib_result_mc_address, 4096);
865 	CU_ASSERT_EQUAL(r, 0);
866 
867 	r = amdgpu_bo_list_destroy(bo_list);
868 	CU_ASSERT_EQUAL(r, 0);
869 
870 	r = amdgpu_cs_ctx_free(context_handle);
871 	CU_ASSERT_EQUAL(r, 0);
872 }
873 
amdgpu_command_submission_gfx_cp_write_data(void)874 static void amdgpu_command_submission_gfx_cp_write_data(void)
875 {
876 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
877 }
878 
amdgpu_command_submission_gfx_cp_const_fill(void)879 static void amdgpu_command_submission_gfx_cp_const_fill(void)
880 {
881 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
882 }
883 
amdgpu_command_submission_gfx_cp_copy_data(void)884 static void amdgpu_command_submission_gfx_cp_copy_data(void)
885 {
886 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
887 }
888 
amdgpu_bo_eviction_test(void)889 static void amdgpu_bo_eviction_test(void)
890 {
891 	const int sdma_write_length = 1024;
892 	const int pm4_dw = 256;
893 	amdgpu_context_handle context_handle;
894 	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
895 	amdgpu_bo_handle *resources;
896 	uint32_t *pm4;
897 	struct amdgpu_cs_ib_info *ib_info;
898 	struct amdgpu_cs_request *ibs_request;
899 	uint64_t bo1_mc, bo2_mc;
900 	volatile unsigned char *bo1_cpu, *bo2_cpu;
901 	int i, j, r, loop1, loop2;
902 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
903 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
904 	struct amdgpu_heap_info vram_info, gtt_info;
905 
906 	pm4 = calloc(pm4_dw, sizeof(*pm4));
907 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
908 
909 	ib_info = calloc(1, sizeof(*ib_info));
910 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
911 
912 	ibs_request = calloc(1, sizeof(*ibs_request));
913 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
914 
915 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
916 	CU_ASSERT_EQUAL(r, 0);
917 
918 	/* prepare resource */
919 	resources = calloc(4, sizeof(amdgpu_bo_handle));
920 	CU_ASSERT_NOT_EQUAL(resources, NULL);
921 
922 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
923 				   0, &vram_info);
924 	CU_ASSERT_EQUAL(r, 0);
925 
926 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
927 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
928 	CU_ASSERT_EQUAL(r, 0);
929 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
930 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
931 	CU_ASSERT_EQUAL(r, 0);
932 
933 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
934 				   0, &gtt_info);
935 	CU_ASSERT_EQUAL(r, 0);
936 
937 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
938 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
939 	CU_ASSERT_EQUAL(r, 0);
940 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
941 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
942 	CU_ASSERT_EQUAL(r, 0);
943 
944 
945 
946 	loop1 = loop2 = 0;
947 	/* run 9 circle to test all mapping combination */
948 	while(loop1 < 2) {
949 		while(loop2 < 2) {
950 			/* allocate UC bo1for sDMA use */
951 			r = amdgpu_bo_alloc_and_map(device_handle,
952 						    sdma_write_length, 4096,
953 						    AMDGPU_GEM_DOMAIN_GTT,
954 						    gtt_flags[loop1], &bo1,
955 						    (void**)&bo1_cpu, &bo1_mc,
956 						    &bo1_va_handle);
957 			CU_ASSERT_EQUAL(r, 0);
958 
959 			/* set bo1 */
960 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
961 
962 			/* allocate UC bo2 for sDMA use */
963 			r = amdgpu_bo_alloc_and_map(device_handle,
964 						    sdma_write_length, 4096,
965 						    AMDGPU_GEM_DOMAIN_GTT,
966 						    gtt_flags[loop2], &bo2,
967 						    (void**)&bo2_cpu, &bo2_mc,
968 						    &bo2_va_handle);
969 			CU_ASSERT_EQUAL(r, 0);
970 
971 			/* clear bo2 */
972 			memset((void*)bo2_cpu, 0, sdma_write_length);
973 
974 			resources[0] = bo1;
975 			resources[1] = bo2;
976 			resources[2] = vram_max[loop2];
977 			resources[3] = gtt_max[loop2];
978 
979 			/* fulfill PM4: test DMA copy linear */
980 			i = j = 0;
981 			if (family_id == AMDGPU_FAMILY_SI) {
982 				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
983 							  sdma_write_length);
984 				pm4[i++] = 0xffffffff & bo2_mc;
985 				pm4[i++] = 0xffffffff & bo1_mc;
986 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
987 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
988 			} else {
989 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
990 				if (family_id >= AMDGPU_FAMILY_AI)
991 					pm4[i++] = sdma_write_length - 1;
992 				else
993 					pm4[i++] = sdma_write_length;
994 				pm4[i++] = 0;
995 				pm4[i++] = 0xffffffff & bo1_mc;
996 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
997 				pm4[i++] = 0xffffffff & bo2_mc;
998 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
999 			}
1000 
1001 			amdgpu_test_exec_cs_helper(context_handle,
1002 						   AMDGPU_HW_IP_DMA, 0,
1003 						   i, pm4,
1004 						   4, resources,
1005 						   ib_info, ibs_request);
1006 
1007 			/* verify if SDMA test result meets with expected */
1008 			i = 0;
1009 			while(i < sdma_write_length) {
1010 				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1011 			}
1012 			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1013 						     sdma_write_length);
1014 			CU_ASSERT_EQUAL(r, 0);
1015 			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1016 						     sdma_write_length);
1017 			CU_ASSERT_EQUAL(r, 0);
1018 			loop2++;
1019 		}
1020 		loop2 = 0;
1021 		loop1++;
1022 	}
1023 	amdgpu_bo_free(vram_max[0]);
1024 	amdgpu_bo_free(vram_max[1]);
1025 	amdgpu_bo_free(gtt_max[0]);
1026 	amdgpu_bo_free(gtt_max[1]);
1027 	/* clean resources */
1028 	free(resources);
1029 	free(ibs_request);
1030 	free(ib_info);
1031 	free(pm4);
1032 
1033 	/* end of test */
1034 	r = amdgpu_cs_ctx_free(context_handle);
1035 	CU_ASSERT_EQUAL(r, 0);
1036 }
1037 
1038 
amdgpu_command_submission_gfx(void)1039 static void amdgpu_command_submission_gfx(void)
1040 {
1041 	/* write data using the CP */
1042 	amdgpu_command_submission_gfx_cp_write_data();
1043 	/* const fill using the CP */
1044 	amdgpu_command_submission_gfx_cp_const_fill();
1045 	/* copy data using the CP */
1046 	amdgpu_command_submission_gfx_cp_copy_data();
1047 	/* separate IB buffers for multi-IB submission */
1048 	amdgpu_command_submission_gfx_separate_ibs();
1049 	/* shared IB buffer for multi-IB submission */
1050 	amdgpu_command_submission_gfx_shared_ib();
1051 }
1052 
amdgpu_semaphore_test(void)1053 static void amdgpu_semaphore_test(void)
1054 {
1055 	amdgpu_context_handle context_handle[2];
1056 	amdgpu_semaphore_handle sem;
1057 	amdgpu_bo_handle ib_result_handle[2];
1058 	void *ib_result_cpu[2];
1059 	uint64_t ib_result_mc_address[2];
1060 	struct amdgpu_cs_request ibs_request[2] = {0};
1061 	struct amdgpu_cs_ib_info ib_info[2] = {0};
1062 	struct amdgpu_cs_fence fence_status = {0};
1063 	uint32_t *ptr;
1064 	uint32_t expired;
1065 	uint32_t sdma_nop, gfx_nop;
1066 	amdgpu_bo_list_handle bo_list[2];
1067 	amdgpu_va_handle va_handle[2];
1068 	int r, i;
1069 
1070 	if (family_id == AMDGPU_FAMILY_SI) {
1071 		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1072 		gfx_nop = GFX_COMPUTE_NOP_SI;
1073 	} else {
1074 		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1075 		gfx_nop = GFX_COMPUTE_NOP;
1076 	}
1077 
1078 	r = amdgpu_cs_create_semaphore(&sem);
1079 	CU_ASSERT_EQUAL(r, 0);
1080 	for (i = 0; i < 2; i++) {
1081 		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1082 		CU_ASSERT_EQUAL(r, 0);
1083 
1084 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1085 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1086 					    &ib_result_handle[i], &ib_result_cpu[i],
1087 					    &ib_result_mc_address[i], &va_handle[i]);
1088 		CU_ASSERT_EQUAL(r, 0);
1089 
1090 		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1091 				       NULL, &bo_list[i]);
1092 		CU_ASSERT_EQUAL(r, 0);
1093 	}
1094 
1095 	/* 1. same context different engine */
1096 	ptr = ib_result_cpu[0];
1097 	ptr[0] = sdma_nop;
1098 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1099 	ib_info[0].size = 1;
1100 
1101 	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1102 	ibs_request[0].number_of_ibs = 1;
1103 	ibs_request[0].ibs = &ib_info[0];
1104 	ibs_request[0].resources = bo_list[0];
1105 	ibs_request[0].fence_info.handle = NULL;
1106 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1107 	CU_ASSERT_EQUAL(r, 0);
1108 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1109 	CU_ASSERT_EQUAL(r, 0);
1110 
1111 	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1112 	CU_ASSERT_EQUAL(r, 0);
1113 	ptr = ib_result_cpu[1];
1114 	ptr[0] = gfx_nop;
1115 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1116 	ib_info[1].size = 1;
1117 
1118 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1119 	ibs_request[1].number_of_ibs = 1;
1120 	ibs_request[1].ibs = &ib_info[1];
1121 	ibs_request[1].resources = bo_list[1];
1122 	ibs_request[1].fence_info.handle = NULL;
1123 
1124 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1125 	CU_ASSERT_EQUAL(r, 0);
1126 
1127 	fence_status.context = context_handle[0];
1128 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1129 	fence_status.ip_instance = 0;
1130 	fence_status.fence = ibs_request[1].seq_no;
1131 	r = amdgpu_cs_query_fence_status(&fence_status,
1132 					 500000000, 0, &expired);
1133 	CU_ASSERT_EQUAL(r, 0);
1134 	CU_ASSERT_EQUAL(expired, true);
1135 
1136 	/* 2. same engine different context */
1137 	ptr = ib_result_cpu[0];
1138 	ptr[0] = gfx_nop;
1139 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1140 	ib_info[0].size = 1;
1141 
1142 	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
1143 	ibs_request[0].number_of_ibs = 1;
1144 	ibs_request[0].ibs = &ib_info[0];
1145 	ibs_request[0].resources = bo_list[0];
1146 	ibs_request[0].fence_info.handle = NULL;
1147 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1148 	CU_ASSERT_EQUAL(r, 0);
1149 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1150 	CU_ASSERT_EQUAL(r, 0);
1151 
1152 	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
1153 	CU_ASSERT_EQUAL(r, 0);
1154 	ptr = ib_result_cpu[1];
1155 	ptr[0] = gfx_nop;
1156 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1157 	ib_info[1].size = 1;
1158 
1159 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1160 	ibs_request[1].number_of_ibs = 1;
1161 	ibs_request[1].ibs = &ib_info[1];
1162 	ibs_request[1].resources = bo_list[1];
1163 	ibs_request[1].fence_info.handle = NULL;
1164 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1165 
1166 	CU_ASSERT_EQUAL(r, 0);
1167 
1168 	fence_status.context = context_handle[1];
1169 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1170 	fence_status.ip_instance = 0;
1171 	fence_status.fence = ibs_request[1].seq_no;
1172 	r = amdgpu_cs_query_fence_status(&fence_status,
1173 					 500000000, 0, &expired);
1174 	CU_ASSERT_EQUAL(r, 0);
1175 	CU_ASSERT_EQUAL(expired, true);
1176 
1177 	for (i = 0; i < 2; i++) {
1178 		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1179 					     ib_result_mc_address[i], 4096);
1180 		CU_ASSERT_EQUAL(r, 0);
1181 
1182 		r = amdgpu_bo_list_destroy(bo_list[i]);
1183 		CU_ASSERT_EQUAL(r, 0);
1184 
1185 		r = amdgpu_cs_ctx_free(context_handle[i]);
1186 		CU_ASSERT_EQUAL(r, 0);
1187 	}
1188 
1189 	r = amdgpu_cs_destroy_semaphore(sem);
1190 	CU_ASSERT_EQUAL(r, 0);
1191 }
1192 
amdgpu_command_submission_compute_nop(void)1193 static void amdgpu_command_submission_compute_nop(void)
1194 {
1195 	amdgpu_context_handle context_handle;
1196 	amdgpu_bo_handle ib_result_handle;
1197 	void *ib_result_cpu;
1198 	uint64_t ib_result_mc_address;
1199 	struct amdgpu_cs_request ibs_request;
1200 	struct amdgpu_cs_ib_info ib_info;
1201 	struct amdgpu_cs_fence fence_status;
1202 	uint32_t *ptr;
1203 	uint32_t expired;
1204 	int r, instance;
1205 	amdgpu_bo_list_handle bo_list;
1206 	amdgpu_va_handle va_handle;
1207 	struct drm_amdgpu_info_hw_ip info;
1208 
1209 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1210 	CU_ASSERT_EQUAL(r, 0);
1211 
1212 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1213 	CU_ASSERT_EQUAL(r, 0);
1214 
1215 	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1216 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1217 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1218 					    &ib_result_handle, &ib_result_cpu,
1219 					    &ib_result_mc_address, &va_handle);
1220 		CU_ASSERT_EQUAL(r, 0);
1221 
1222 		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1223 				       &bo_list);
1224 		CU_ASSERT_EQUAL(r, 0);
1225 
1226 		ptr = ib_result_cpu;
1227 		memset(ptr, 0, 16);
1228 		ptr[0]=PACKET3(PACKET3_NOP, 14);
1229 
1230 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1231 		ib_info.ib_mc_address = ib_result_mc_address;
1232 		ib_info.size = 16;
1233 
1234 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1235 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1236 		ibs_request.ring = instance;
1237 		ibs_request.number_of_ibs = 1;
1238 		ibs_request.ibs = &ib_info;
1239 		ibs_request.resources = bo_list;
1240 		ibs_request.fence_info.handle = NULL;
1241 
1242 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1243 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1244 		CU_ASSERT_EQUAL(r, 0);
1245 
1246 		fence_status.context = context_handle;
1247 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1248 		fence_status.ip_instance = 0;
1249 		fence_status.ring = instance;
1250 		fence_status.fence = ibs_request.seq_no;
1251 
1252 		r = amdgpu_cs_query_fence_status(&fence_status,
1253 						 AMDGPU_TIMEOUT_INFINITE,
1254 						 0, &expired);
1255 		CU_ASSERT_EQUAL(r, 0);
1256 
1257 		r = amdgpu_bo_list_destroy(bo_list);
1258 		CU_ASSERT_EQUAL(r, 0);
1259 
1260 		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1261 					     ib_result_mc_address, 4096);
1262 		CU_ASSERT_EQUAL(r, 0);
1263 	}
1264 
1265 	r = amdgpu_cs_ctx_free(context_handle);
1266 	CU_ASSERT_EQUAL(r, 0);
1267 }
1268 
amdgpu_command_submission_compute_cp_write_data(void)1269 static void amdgpu_command_submission_compute_cp_write_data(void)
1270 {
1271 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1272 }
1273 
amdgpu_command_submission_compute_cp_const_fill(void)1274 static void amdgpu_command_submission_compute_cp_const_fill(void)
1275 {
1276 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1277 }
1278 
amdgpu_command_submission_compute_cp_copy_data(void)1279 static void amdgpu_command_submission_compute_cp_copy_data(void)
1280 {
1281 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1282 }
1283 
amdgpu_command_submission_compute(void)1284 static void amdgpu_command_submission_compute(void)
1285 {
1286 	/* write data using the CP */
1287 	amdgpu_command_submission_compute_cp_write_data();
1288 	/* const fill using the CP */
1289 	amdgpu_command_submission_compute_cp_const_fill();
1290 	/* copy data using the CP */
1291 	amdgpu_command_submission_compute_cp_copy_data();
1292 	/* nop test */
1293 	amdgpu_command_submission_compute_nop();
1294 }
1295 
1296 /*
1297  * caller need create/release:
1298  * pm4_src, resources, ib_info, and ibs_request
1299  * submit command stream described in ibs_request and wait for this IB accomplished
1300  */
1301 void
amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request,bool secure)1302 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1303 			       amdgpu_context_handle context_handle,
1304 			       unsigned ip_type, int instance, int pm4_dw,
1305 			       uint32_t *pm4_src, int res_cnt,
1306 			       amdgpu_bo_handle *resources,
1307 			       struct amdgpu_cs_ib_info *ib_info,
1308 			       struct amdgpu_cs_request *ibs_request,
1309 			       bool secure)
1310 {
1311 	int r;
1312 	uint32_t expired;
1313 	uint32_t *ring_ptr;
1314 	amdgpu_bo_handle ib_result_handle;
1315 	void *ib_result_cpu;
1316 	uint64_t ib_result_mc_address;
1317 	struct amdgpu_cs_fence fence_status = {0};
1318 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1319 	amdgpu_va_handle va_handle;
1320 
1321 	/* prepare CS */
1322 	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1323 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1324 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1325 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1326 	CU_ASSERT_TRUE(pm4_dw <= 1024);
1327 
1328 	/* allocate IB */
1329 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1330 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1331 				    &ib_result_handle, &ib_result_cpu,
1332 				    &ib_result_mc_address, &va_handle);
1333 	CU_ASSERT_EQUAL(r, 0);
1334 
1335 	/* copy PM4 packet to ring from caller */
1336 	ring_ptr = ib_result_cpu;
1337 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1338 
1339 	ib_info->ib_mc_address = ib_result_mc_address;
1340 	ib_info->size = pm4_dw;
1341 	if (secure)
1342 		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1343 
1344 	ibs_request->ip_type = ip_type;
1345 	ibs_request->ring = instance;
1346 	ibs_request->number_of_ibs = 1;
1347 	ibs_request->ibs = ib_info;
1348 	ibs_request->fence_info.handle = NULL;
1349 
1350 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1351 	all_res[res_cnt] = ib_result_handle;
1352 
1353 	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1354 				  NULL, &ibs_request->resources);
1355 	CU_ASSERT_EQUAL(r, 0);
1356 
1357 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1358 
1359 	/* submit CS */
1360 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1361 	CU_ASSERT_EQUAL(r, 0);
1362 
1363 	r = amdgpu_bo_list_destroy(ibs_request->resources);
1364 	CU_ASSERT_EQUAL(r, 0);
1365 
1366 	fence_status.ip_type = ip_type;
1367 	fence_status.ip_instance = 0;
1368 	fence_status.ring = ibs_request->ring;
1369 	fence_status.context = context_handle;
1370 	fence_status.fence = ibs_request->seq_no;
1371 
1372 	/* wait for IB accomplished */
1373 	r = amdgpu_cs_query_fence_status(&fence_status,
1374 					 AMDGPU_TIMEOUT_INFINITE,
1375 					 0, &expired);
1376 	CU_ASSERT_EQUAL(r, 0);
1377 	CU_ASSERT_EQUAL(expired, true);
1378 
1379 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1380 				     ib_result_mc_address, 4096);
1381 	CU_ASSERT_EQUAL(r, 0);
1382 }
1383 
1384 static void
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1385 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1386 			   unsigned ip_type, int instance, int pm4_dw,
1387 			   uint32_t *pm4_src, int res_cnt,
1388 			   amdgpu_bo_handle *resources,
1389 			   struct amdgpu_cs_ib_info *ib_info,
1390 			   struct amdgpu_cs_request *ibs_request)
1391 {
1392 	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1393 				       ip_type, instance, pm4_dw, pm4_src,
1394 				       res_cnt, resources, ib_info,
1395 				       ibs_request, false);
1396 }
1397 
1398 void
amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle device,unsigned ip_type,bool secure)1399 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1400 							  device, unsigned
1401 							  ip_type, bool secure)
1402 {
1403 	const int sdma_write_length = 128;
1404 	const int pm4_dw = 256;
1405 	amdgpu_context_handle context_handle;
1406 	amdgpu_bo_handle bo;
1407 	amdgpu_bo_handle *resources;
1408 	uint32_t *pm4;
1409 	struct amdgpu_cs_ib_info *ib_info;
1410 	struct amdgpu_cs_request *ibs_request;
1411 	uint64_t bo_mc;
1412 	volatile uint32_t *bo_cpu;
1413 	uint32_t bo_cpu_origin;
1414 	int i, j, r, loop, ring_id;
1415 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1416 	amdgpu_va_handle va_handle;
1417 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1418 
1419 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1420 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1421 
1422 	ib_info = calloc(1, sizeof(*ib_info));
1423 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1424 
1425 	ibs_request = calloc(1, sizeof(*ibs_request));
1426 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1427 
1428 	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1429 	CU_ASSERT_EQUAL(r, 0);
1430 
1431 	for (i = 0; secure && (i < 2); i++)
1432 		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1433 
1434 	r = amdgpu_cs_ctx_create(device, &context_handle);
1435 
1436 	CU_ASSERT_EQUAL(r, 0);
1437 
1438 	/* prepare resource */
1439 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1440 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1441 
1442 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1443 		loop = 0;
1444 		while(loop < 2) {
1445 			/* allocate UC bo for sDMA use */
1446 			r = amdgpu_bo_alloc_and_map(device,
1447 						    sdma_write_length * sizeof(uint32_t),
1448 						    4096, AMDGPU_GEM_DOMAIN_GTT,
1449 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1450 						    &bo_mc, &va_handle);
1451 			CU_ASSERT_EQUAL(r, 0);
1452 
1453 			/* clear bo */
1454 			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1455 
1456 			resources[0] = bo;
1457 
1458 			/* fulfill PM4: test DMA write-linear */
1459 			i = j = 0;
1460 			if (ip_type == AMDGPU_HW_IP_DMA) {
1461 				if (family_id == AMDGPU_FAMILY_SI)
1462 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1463 								  sdma_write_length);
1464 				else
1465 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1466 							       SDMA_WRITE_SUB_OPCODE_LINEAR,
1467 							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
1468 				pm4[i++] = 0xfffffffc & bo_mc;
1469 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1470 				if (family_id >= AMDGPU_FAMILY_AI)
1471 					pm4[i++] = sdma_write_length - 1;
1472 				else if (family_id != AMDGPU_FAMILY_SI)
1473 					pm4[i++] = sdma_write_length;
1474 				while(j++ < sdma_write_length)
1475 					pm4[i++] = 0xdeadbeaf;
1476 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1477 				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1478 				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1479 				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1480 				pm4[i++] = 0xfffffffc & bo_mc;
1481 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1482 				while(j++ < sdma_write_length)
1483 					pm4[i++] = 0xdeadbeaf;
1484 			}
1485 
1486 			amdgpu_test_exec_cs_helper_raw(device, context_handle,
1487 						       ip_type, ring_id, i, pm4,
1488 						       1, resources, ib_info,
1489 						       ibs_request, secure);
1490 
1491 			/* verify if SDMA test result meets with expected */
1492 			i = 0;
1493 			if (!secure) {
1494 				while(i < sdma_write_length) {
1495 					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1496 				}
1497 			} else if (ip_type == AMDGPU_HW_IP_GFX) {
1498 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1499 				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1500 				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1501 				 * command, 1-loop_until_compare_satisfied.
1502 				 * single_pass_atomic, 0-lru
1503 				 * engine_sel, 0-micro_engine
1504 				 */
1505 				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1506 							ATOMIC_MEM_COMMAND(1) |
1507 							ATOMIC_MEM_CACHEPOLICAY(0) |
1508 							ATOMIC_MEM_ENGINESEL(0));
1509 				pm4[i++] = 0xfffffffc & bo_mc;
1510 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1511 				pm4[i++] = 0x12345678;
1512 				pm4[i++] = 0x0;
1513 				pm4[i++] = 0xdeadbeaf;
1514 				pm4[i++] = 0x0;
1515 				pm4[i++] = 0x100;
1516 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1517 							ip_type, ring_id, i, pm4,
1518 							1, resources, ib_info,
1519 							ibs_request, true);
1520 			} else if (ip_type == AMDGPU_HW_IP_DMA) {
1521 				/* restore the bo_cpu to compare */
1522 				bo_cpu_origin = bo_cpu[0];
1523 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1524 				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1525 				 * loop, 1-loop_until_compare_satisfied.
1526 				 * single_pass_atomic, 0-lru
1527 				 */
1528 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1529 							       0,
1530 							       SDMA_ATOMIC_LOOP(1) |
1531 							       SDMA_ATOMIC_TMZ(1) |
1532 							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1533 				pm4[i++] = 0xfffffffc & bo_mc;
1534 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1535 				pm4[i++] = 0x12345678;
1536 				pm4[i++] = 0x0;
1537 				pm4[i++] = 0xdeadbeaf;
1538 				pm4[i++] = 0x0;
1539 				pm4[i++] = 0x100;
1540 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1541 							ip_type, ring_id, i, pm4,
1542 							1, resources, ib_info,
1543 							ibs_request, true);
1544 				/* DMA's atomic behavir is unlike GFX
1545 				 * If the comparing data is not equal to destination data,
1546 				 * For GFX, loop again till gfx timeout(system hang).
1547 				 * For DMA, loop again till timer expired and then send interrupt.
1548 				 * So testcase can't use interrupt mechanism.
1549 				 * We take another way to verify. When the comparing data is not
1550 				 * equal to destination data, overwrite the source data to the destination
1551 				 * buffer. Otherwise, original destination data unchanged.
1552 				 * So if the bo_cpu data is overwritten, the result is passed.
1553 				 */
1554 				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1555 
1556 				/* compare again for the case of dest_data != cmp_data */
1557 				i = 0;
1558 				/* restore again, here dest_data should be */
1559 				bo_cpu_origin = bo_cpu[0];
1560 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1561 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1562 							       0,
1563 							       SDMA_ATOMIC_LOOP(1) |
1564 							       SDMA_ATOMIC_TMZ(1) |
1565 							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1566 				pm4[i++] = 0xfffffffc & bo_mc;
1567 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1568 				pm4[i++] = 0x87654321;
1569 				pm4[i++] = 0x0;
1570 				pm4[i++] = 0xdeadbeaf;
1571 				pm4[i++] = 0x0;
1572 				pm4[i++] = 0x100;
1573 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1574 							ip_type, ring_id, i, pm4,
1575 							1, resources, ib_info,
1576 							ibs_request, true);
1577 				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1578 				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1579 			}
1580 
1581 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1582 						     sdma_write_length * sizeof(uint32_t));
1583 			CU_ASSERT_EQUAL(r, 0);
1584 			loop++;
1585 		}
1586 	}
1587 	/* clean resources */
1588 	free(resources);
1589 	free(ibs_request);
1590 	free(ib_info);
1591 	free(pm4);
1592 
1593 	/* end of test */
1594 	r = amdgpu_cs_ctx_free(context_handle);
1595 	CU_ASSERT_EQUAL(r, 0);
1596 }
1597 
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1598 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1599 {
1600 	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1601 								  ip_type,
1602 								  false);
1603 }
1604 
amdgpu_command_submission_sdma_write_linear(void)1605 static void amdgpu_command_submission_sdma_write_linear(void)
1606 {
1607 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1608 }
1609 
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1610 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1611 {
1612 	const int sdma_write_length = 1024 * 1024;
1613 	const int pm4_dw = 256;
1614 	amdgpu_context_handle context_handle;
1615 	amdgpu_bo_handle bo;
1616 	amdgpu_bo_handle *resources;
1617 	uint32_t *pm4;
1618 	struct amdgpu_cs_ib_info *ib_info;
1619 	struct amdgpu_cs_request *ibs_request;
1620 	uint64_t bo_mc;
1621 	volatile uint32_t *bo_cpu;
1622 	int i, j, r, loop, ring_id;
1623 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1624 	amdgpu_va_handle va_handle;
1625 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1626 
1627 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1628 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1629 
1630 	ib_info = calloc(1, sizeof(*ib_info));
1631 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1632 
1633 	ibs_request = calloc(1, sizeof(*ibs_request));
1634 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1635 
1636 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1637 	CU_ASSERT_EQUAL(r, 0);
1638 
1639 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1640 	CU_ASSERT_EQUAL(r, 0);
1641 
1642 	/* prepare resource */
1643 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1644 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1645 
1646 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1647 		loop = 0;
1648 		while(loop < 2) {
1649 			/* allocate UC bo for sDMA use */
1650 			r = amdgpu_bo_alloc_and_map(device_handle,
1651 						    sdma_write_length, 4096,
1652 						    AMDGPU_GEM_DOMAIN_GTT,
1653 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1654 						    &bo_mc, &va_handle);
1655 			CU_ASSERT_EQUAL(r, 0);
1656 
1657 			/* clear bo */
1658 			memset((void*)bo_cpu, 0, sdma_write_length);
1659 
1660 			resources[0] = bo;
1661 
1662 			/* fulfill PM4: test DMA const fill */
1663 			i = j = 0;
1664 			if (ip_type == AMDGPU_HW_IP_DMA) {
1665 				if (family_id == AMDGPU_FAMILY_SI) {
1666 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1667 								  0, 0, 0,
1668 								  sdma_write_length / 4);
1669 					pm4[i++] = 0xfffffffc & bo_mc;
1670 					pm4[i++] = 0xdeadbeaf;
1671 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1672 				} else {
1673 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1674 							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1675 					pm4[i++] = 0xffffffff & bo_mc;
1676 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1677 					pm4[i++] = 0xdeadbeaf;
1678 					if (family_id >= AMDGPU_FAMILY_AI)
1679 						pm4[i++] = sdma_write_length - 1;
1680 					else
1681 						pm4[i++] = sdma_write_length;
1682 				}
1683 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1684 				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1685 				if (family_id == AMDGPU_FAMILY_SI) {
1686 					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1687 					pm4[i++] = 0xdeadbeaf;
1688 					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1689 						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1690 						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1691 						   PACKET3_DMA_DATA_SI_CP_SYNC;
1692 					pm4[i++] = 0xffffffff & bo_mc;
1693 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1694 					pm4[i++] = sdma_write_length;
1695 				} else {
1696 					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1697 					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1698 						   PACKET3_DMA_DATA_DST_SEL(0) |
1699 						   PACKET3_DMA_DATA_SRC_SEL(2) |
1700 						   PACKET3_DMA_DATA_CP_SYNC;
1701 					pm4[i++] = 0xdeadbeaf;
1702 					pm4[i++] = 0;
1703 					pm4[i++] = 0xfffffffc & bo_mc;
1704 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1705 					pm4[i++] = sdma_write_length;
1706 				}
1707 			}
1708 
1709 			amdgpu_test_exec_cs_helper(context_handle,
1710 						   ip_type, ring_id,
1711 						   i, pm4,
1712 						   1, resources,
1713 						   ib_info, ibs_request);
1714 
1715 			/* verify if SDMA test result meets with expected */
1716 			i = 0;
1717 			while(i < (sdma_write_length / 4)) {
1718 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1719 			}
1720 
1721 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1722 						     sdma_write_length);
1723 			CU_ASSERT_EQUAL(r, 0);
1724 			loop++;
1725 		}
1726 	}
1727 	/* clean resources */
1728 	free(resources);
1729 	free(ibs_request);
1730 	free(ib_info);
1731 	free(pm4);
1732 
1733 	/* end of test */
1734 	r = amdgpu_cs_ctx_free(context_handle);
1735 	CU_ASSERT_EQUAL(r, 0);
1736 }
1737 
amdgpu_command_submission_sdma_const_fill(void)1738 static void amdgpu_command_submission_sdma_const_fill(void)
1739 {
1740 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1741 }
1742 
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1743 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1744 {
1745 	const int sdma_write_length = 1024;
1746 	const int pm4_dw = 256;
1747 	amdgpu_context_handle context_handle;
1748 	amdgpu_bo_handle bo1, bo2;
1749 	amdgpu_bo_handle *resources;
1750 	uint32_t *pm4;
1751 	struct amdgpu_cs_ib_info *ib_info;
1752 	struct amdgpu_cs_request *ibs_request;
1753 	uint64_t bo1_mc, bo2_mc;
1754 	volatile unsigned char *bo1_cpu, *bo2_cpu;
1755 	int i, j, r, loop1, loop2, ring_id;
1756 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1757 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1758 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1759 
1760 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1761 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1762 
1763 	ib_info = calloc(1, sizeof(*ib_info));
1764 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1765 
1766 	ibs_request = calloc(1, sizeof(*ibs_request));
1767 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1768 
1769 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1770 	CU_ASSERT_EQUAL(r, 0);
1771 
1772 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1773 	CU_ASSERT_EQUAL(r, 0);
1774 
1775 	/* prepare resource */
1776 	resources = calloc(2, sizeof(amdgpu_bo_handle));
1777 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1778 
1779 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1780 		loop1 = loop2 = 0;
1781 		/* run 9 circle to test all mapping combination */
1782 		while(loop1 < 2) {
1783 			while(loop2 < 2) {
1784 				/* allocate UC bo1for sDMA use */
1785 				r = amdgpu_bo_alloc_and_map(device_handle,
1786 							    sdma_write_length, 4096,
1787 							    AMDGPU_GEM_DOMAIN_GTT,
1788 							    gtt_flags[loop1], &bo1,
1789 							    (void**)&bo1_cpu, &bo1_mc,
1790 							    &bo1_va_handle);
1791 				CU_ASSERT_EQUAL(r, 0);
1792 
1793 				/* set bo1 */
1794 				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1795 
1796 				/* allocate UC bo2 for sDMA use */
1797 				r = amdgpu_bo_alloc_and_map(device_handle,
1798 							    sdma_write_length, 4096,
1799 							    AMDGPU_GEM_DOMAIN_GTT,
1800 							    gtt_flags[loop2], &bo2,
1801 							    (void**)&bo2_cpu, &bo2_mc,
1802 							    &bo2_va_handle);
1803 				CU_ASSERT_EQUAL(r, 0);
1804 
1805 				/* clear bo2 */
1806 				memset((void*)bo2_cpu, 0, sdma_write_length);
1807 
1808 				resources[0] = bo1;
1809 				resources[1] = bo2;
1810 
1811 				/* fulfill PM4: test DMA copy linear */
1812 				i = j = 0;
1813 				if (ip_type == AMDGPU_HW_IP_DMA) {
1814 					if (family_id == AMDGPU_FAMILY_SI) {
1815 						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1816 									  0, 0, 0,
1817 									  sdma_write_length);
1818 						pm4[i++] = 0xffffffff & bo2_mc;
1819 						pm4[i++] = 0xffffffff & bo1_mc;
1820 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1821 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1822 					} else {
1823 						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1824 								       SDMA_COPY_SUB_OPCODE_LINEAR,
1825 								       0);
1826 						if (family_id >= AMDGPU_FAMILY_AI)
1827 							pm4[i++] = sdma_write_length - 1;
1828 						else
1829 							pm4[i++] = sdma_write_length;
1830 						pm4[i++] = 0;
1831 						pm4[i++] = 0xffffffff & bo1_mc;
1832 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1833 						pm4[i++] = 0xffffffff & bo2_mc;
1834 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1835 					}
1836 				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1837 					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1838 					if (family_id == AMDGPU_FAMILY_SI) {
1839 						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1840 						pm4[i++] = 0xfffffffc & bo1_mc;
1841 						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1842 							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1843 							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1844 							   PACKET3_DMA_DATA_SI_CP_SYNC |
1845 							   (0xffff00000000 & bo1_mc) >> 32;
1846 						pm4[i++] = 0xfffffffc & bo2_mc;
1847 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1848 						pm4[i++] = sdma_write_length;
1849 					} else {
1850 						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1851 						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1852 							   PACKET3_DMA_DATA_DST_SEL(0) |
1853 							   PACKET3_DMA_DATA_SRC_SEL(0) |
1854 							   PACKET3_DMA_DATA_CP_SYNC;
1855 						pm4[i++] = 0xfffffffc & bo1_mc;
1856 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1857 						pm4[i++] = 0xfffffffc & bo2_mc;
1858 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1859 						pm4[i++] = sdma_write_length;
1860 					}
1861 				}
1862 
1863 				amdgpu_test_exec_cs_helper(context_handle,
1864 							   ip_type, ring_id,
1865 							   i, pm4,
1866 							   2, resources,
1867 							   ib_info, ibs_request);
1868 
1869 				/* verify if SDMA test result meets with expected */
1870 				i = 0;
1871 				while(i < sdma_write_length) {
1872 					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1873 				}
1874 				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1875 							     sdma_write_length);
1876 				CU_ASSERT_EQUAL(r, 0);
1877 				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1878 							     sdma_write_length);
1879 				CU_ASSERT_EQUAL(r, 0);
1880 				loop2++;
1881 			}
1882 			loop1++;
1883 		}
1884 	}
1885 	/* clean resources */
1886 	free(resources);
1887 	free(ibs_request);
1888 	free(ib_info);
1889 	free(pm4);
1890 
1891 	/* end of test */
1892 	r = amdgpu_cs_ctx_free(context_handle);
1893 	CU_ASSERT_EQUAL(r, 0);
1894 }
1895 
amdgpu_command_submission_sdma_copy_linear(void)1896 static void amdgpu_command_submission_sdma_copy_linear(void)
1897 {
1898 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1899 }
1900 
amdgpu_command_submission_sdma(void)1901 static void amdgpu_command_submission_sdma(void)
1902 {
1903 	amdgpu_command_submission_sdma_write_linear();
1904 	amdgpu_command_submission_sdma_const_fill();
1905 	amdgpu_command_submission_sdma_copy_linear();
1906 }
1907 
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1908 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1909 {
1910 	amdgpu_context_handle context_handle;
1911 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1912 	void *ib_result_cpu, *ib_result_ce_cpu;
1913 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1914 	struct amdgpu_cs_request ibs_request[2] = {0};
1915 	struct amdgpu_cs_ib_info ib_info[2];
1916 	struct amdgpu_cs_fence fence_status[2] = {0};
1917 	uint32_t *ptr;
1918 	uint32_t expired;
1919 	amdgpu_bo_list_handle bo_list;
1920 	amdgpu_va_handle va_handle, va_handle_ce;
1921 	int r;
1922 	int i = 0, ib_cs_num = 2;
1923 
1924 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1925 	CU_ASSERT_EQUAL(r, 0);
1926 
1927 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1928 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1929 				    &ib_result_handle, &ib_result_cpu,
1930 				    &ib_result_mc_address, &va_handle);
1931 	CU_ASSERT_EQUAL(r, 0);
1932 
1933 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1934 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1935 				    &ib_result_ce_handle, &ib_result_ce_cpu,
1936 				    &ib_result_ce_mc_address, &va_handle_ce);
1937 	CU_ASSERT_EQUAL(r, 0);
1938 
1939 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1940 			       ib_result_ce_handle, &bo_list);
1941 	CU_ASSERT_EQUAL(r, 0);
1942 
1943 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1944 
1945 	/* IT_SET_CE_DE_COUNTERS */
1946 	ptr = ib_result_ce_cpu;
1947 	if (family_id != AMDGPU_FAMILY_SI) {
1948 		ptr[i++] = 0xc0008900;
1949 		ptr[i++] = 0;
1950 	}
1951 	ptr[i++] = 0xc0008400;
1952 	ptr[i++] = 1;
1953 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1954 	ib_info[0].size = i;
1955 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1956 
1957 	/* IT_WAIT_ON_CE_COUNTER */
1958 	ptr = ib_result_cpu;
1959 	ptr[0] = 0xc0008600;
1960 	ptr[1] = 0x00000001;
1961 	ib_info[1].ib_mc_address = ib_result_mc_address;
1962 	ib_info[1].size = 2;
1963 
1964 	for (i = 0; i < ib_cs_num; i++) {
1965 		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1966 		ibs_request[i].number_of_ibs = 2;
1967 		ibs_request[i].ibs = ib_info;
1968 		ibs_request[i].resources = bo_list;
1969 		ibs_request[i].fence_info.handle = NULL;
1970 	}
1971 
1972 	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1973 
1974 	CU_ASSERT_EQUAL(r, 0);
1975 
1976 	for (i = 0; i < ib_cs_num; i++) {
1977 		fence_status[i].context = context_handle;
1978 		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1979 		fence_status[i].fence = ibs_request[i].seq_no;
1980 	}
1981 
1982 	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1983 				AMDGPU_TIMEOUT_INFINITE,
1984 				&expired, NULL);
1985 	CU_ASSERT_EQUAL(r, 0);
1986 
1987 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1988 				     ib_result_mc_address, 4096);
1989 	CU_ASSERT_EQUAL(r, 0);
1990 
1991 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1992 				     ib_result_ce_mc_address, 4096);
1993 	CU_ASSERT_EQUAL(r, 0);
1994 
1995 	r = amdgpu_bo_list_destroy(bo_list);
1996 	CU_ASSERT_EQUAL(r, 0);
1997 
1998 	r = amdgpu_cs_ctx_free(context_handle);
1999 	CU_ASSERT_EQUAL(r, 0);
2000 }
2001 
amdgpu_command_submission_multi_fence(void)2002 static void amdgpu_command_submission_multi_fence(void)
2003 {
2004 	amdgpu_command_submission_multi_fence_wait_all(true);
2005 	amdgpu_command_submission_multi_fence_wait_all(false);
2006 }
2007 
amdgpu_userptr_test(void)2008 static void amdgpu_userptr_test(void)
2009 {
2010 	int i, r, j;
2011 	uint32_t *pm4 = NULL;
2012 	uint64_t bo_mc;
2013 	void *ptr = NULL;
2014 	int pm4_dw = 256;
2015 	int sdma_write_length = 4;
2016 	amdgpu_bo_handle handle;
2017 	amdgpu_context_handle context_handle;
2018 	struct amdgpu_cs_ib_info *ib_info;
2019 	struct amdgpu_cs_request *ibs_request;
2020 	amdgpu_bo_handle buf_handle;
2021 	amdgpu_va_handle va_handle;
2022 
2023 	pm4 = calloc(pm4_dw, sizeof(*pm4));
2024 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
2025 
2026 	ib_info = calloc(1, sizeof(*ib_info));
2027 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2028 
2029 	ibs_request = calloc(1, sizeof(*ibs_request));
2030 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2031 
2032 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2033 	CU_ASSERT_EQUAL(r, 0);
2034 
2035 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2036 	CU_ASSERT_NOT_EQUAL(ptr, NULL);
2037 	memset(ptr, 0, BUFFER_SIZE);
2038 
2039 	r = amdgpu_create_bo_from_user_mem(device_handle,
2040 					   ptr, BUFFER_SIZE, &buf_handle);
2041 	CU_ASSERT_EQUAL(r, 0);
2042 
2043 	r = amdgpu_va_range_alloc(device_handle,
2044 				  amdgpu_gpu_va_range_general,
2045 				  BUFFER_SIZE, 1, 0, &bo_mc,
2046 				  &va_handle, 0);
2047 	CU_ASSERT_EQUAL(r, 0);
2048 
2049 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2050 	CU_ASSERT_EQUAL(r, 0);
2051 
2052 	handle = buf_handle;
2053 
2054 	j = i = 0;
2055 
2056 	if (family_id == AMDGPU_FAMILY_SI)
2057 		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2058 				sdma_write_length);
2059 	else
2060 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2061 				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2062 	pm4[i++] = 0xffffffff & bo_mc;
2063 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2064 	if (family_id >= AMDGPU_FAMILY_AI)
2065 		pm4[i++] = sdma_write_length - 1;
2066 	else if (family_id != AMDGPU_FAMILY_SI)
2067 		pm4[i++] = sdma_write_length;
2068 
2069 	while (j++ < sdma_write_length)
2070 		pm4[i++] = 0xdeadbeaf;
2071 
2072 	if (!fork()) {
2073 		pm4[0] = 0x0;
2074 		exit(0);
2075 	}
2076 
2077 	amdgpu_test_exec_cs_helper(context_handle,
2078 				   AMDGPU_HW_IP_DMA, 0,
2079 				   i, pm4,
2080 				   1, &handle,
2081 				   ib_info, ibs_request);
2082 	i = 0;
2083 	while (i < sdma_write_length) {
2084 		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2085 	}
2086 	free(ibs_request);
2087 	free(ib_info);
2088 	free(pm4);
2089 
2090 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2091 	CU_ASSERT_EQUAL(r, 0);
2092 	r = amdgpu_va_range_free(va_handle);
2093 	CU_ASSERT_EQUAL(r, 0);
2094 	r = amdgpu_bo_free(buf_handle);
2095 	CU_ASSERT_EQUAL(r, 0);
2096 	free(ptr);
2097 
2098 	r = amdgpu_cs_ctx_free(context_handle);
2099 	CU_ASSERT_EQUAL(r, 0);
2100 
2101 	wait(NULL);
2102 }
2103 
amdgpu_sync_dependency_test(void)2104 static void amdgpu_sync_dependency_test(void)
2105 {
2106 	amdgpu_context_handle context_handle[2];
2107 	amdgpu_bo_handle ib_result_handle;
2108 	void *ib_result_cpu;
2109 	uint64_t ib_result_mc_address;
2110 	struct amdgpu_cs_request ibs_request;
2111 	struct amdgpu_cs_ib_info ib_info;
2112 	struct amdgpu_cs_fence fence_status;
2113 	uint32_t expired;
2114 	int i, j, r;
2115 	amdgpu_bo_list_handle bo_list;
2116 	amdgpu_va_handle va_handle;
2117 	static uint32_t *ptr;
2118 	uint64_t seq_no;
2119 
2120 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2121 	CU_ASSERT_EQUAL(r, 0);
2122 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2123 	CU_ASSERT_EQUAL(r, 0);
2124 
2125 	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2126 			AMDGPU_GEM_DOMAIN_GTT, 0,
2127 						    &ib_result_handle, &ib_result_cpu,
2128 						    &ib_result_mc_address, &va_handle);
2129 	CU_ASSERT_EQUAL(r, 0);
2130 
2131 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2132 			       &bo_list);
2133 	CU_ASSERT_EQUAL(r, 0);
2134 
2135 	ptr = ib_result_cpu;
2136 	i = 0;
2137 
2138 	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2139 
2140 	/* Dispatch minimal init config and verify it's executed */
2141 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2142 	ptr[i++] = 0x80000000;
2143 	ptr[i++] = 0x80000000;
2144 
2145 	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2146 	ptr[i++] = 0x80000000;
2147 
2148 
2149 	/* Program compute regs */
2150 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2151 	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2152 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2153 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2154 
2155 
2156 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2157 	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2158 	/*
2159 	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
2160 	                                      SGPRS = 1
2161 	                                      PRIORITY = 0
2162 	                                      FLOAT_MODE = 192 (0xc0)
2163 	                                      PRIV = 0
2164 	                                      DX10_CLAMP = 1
2165 	                                      DEBUG_MODE = 0
2166 	                                      IEEE_MODE = 0
2167 	                                      BULKY = 0
2168 	                                      CDBG_USER = 0
2169 	 *
2170 	 */
2171 	ptr[i++] = 0x002c0040;
2172 
2173 
2174 	/*
2175 	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2176 	                                      USER_SGPR = 8
2177 	                                      TRAP_PRESENT = 0
2178 	                                      TGID_X_EN = 0
2179 	                                      TGID_Y_EN = 0
2180 	                                      TGID_Z_EN = 0
2181 	                                      TG_SIZE_EN = 0
2182 	                                      TIDIG_COMP_CNT = 0
2183 	                                      EXCP_EN_MSB = 0
2184 	                                      LDS_SIZE = 0
2185 	                                      EXCP_EN = 0
2186 	 *
2187 	 */
2188 	ptr[i++] = 0x00000010;
2189 
2190 
2191 /*
2192  * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2193                                          WAVESIZE = 0
2194  *
2195  */
2196 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2197 	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2198 	ptr[i++] = 0x00000100;
2199 
2200 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2201 	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2202 	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2203 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2204 
2205 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2206 	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2207 	ptr[i++] = 0;
2208 
2209 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2210 	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2211 	ptr[i++] = 1;
2212 	ptr[i++] = 1;
2213 	ptr[i++] = 1;
2214 
2215 
2216 	/* Dispatch */
2217 	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2218 	ptr[i++] = 1;
2219 	ptr[i++] = 1;
2220 	ptr[i++] = 1;
2221 	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2222 
2223 
2224 	while (i & 7)
2225 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2226 
2227 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2228 	ib_info.ib_mc_address = ib_result_mc_address;
2229 	ib_info.size = i;
2230 
2231 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2232 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2233 	ibs_request.ring = 0;
2234 	ibs_request.number_of_ibs = 1;
2235 	ibs_request.ibs = &ib_info;
2236 	ibs_request.resources = bo_list;
2237 	ibs_request.fence_info.handle = NULL;
2238 
2239 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2240 	CU_ASSERT_EQUAL(r, 0);
2241 	seq_no = ibs_request.seq_no;
2242 
2243 
2244 
2245 	/* Prepare second command with dependency on the first */
2246 	j = i;
2247 	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2248 	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2249 	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2250 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2251 	ptr[i++] = 99;
2252 
2253 	while (i & 7)
2254 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2255 
2256 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2257 	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2258 	ib_info.size = i - j;
2259 
2260 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2261 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2262 	ibs_request.ring = 0;
2263 	ibs_request.number_of_ibs = 1;
2264 	ibs_request.ibs = &ib_info;
2265 	ibs_request.resources = bo_list;
2266 	ibs_request.fence_info.handle = NULL;
2267 
2268 	ibs_request.number_of_dependencies = 1;
2269 
2270 	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2271 	ibs_request.dependencies[0].context = context_handle[1];
2272 	ibs_request.dependencies[0].ip_instance = 0;
2273 	ibs_request.dependencies[0].ring = 0;
2274 	ibs_request.dependencies[0].fence = seq_no;
2275 
2276 
2277 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2278 	CU_ASSERT_EQUAL(r, 0);
2279 
2280 
2281 	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2282 	fence_status.context = context_handle[0];
2283 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2284 	fence_status.ip_instance = 0;
2285 	fence_status.ring = 0;
2286 	fence_status.fence = ibs_request.seq_no;
2287 
2288 	r = amdgpu_cs_query_fence_status(&fence_status,
2289 		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2290 	CU_ASSERT_EQUAL(r, 0);
2291 
2292 	/* Expect the second command to wait for shader to complete */
2293 	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2294 
2295 	r = amdgpu_bo_list_destroy(bo_list);
2296 	CU_ASSERT_EQUAL(r, 0);
2297 
2298 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2299 				     ib_result_mc_address, 4096);
2300 	CU_ASSERT_EQUAL(r, 0);
2301 
2302 	r = amdgpu_cs_ctx_free(context_handle[0]);
2303 	CU_ASSERT_EQUAL(r, 0);
2304 	r = amdgpu_cs_ctx_free(context_handle[1]);
2305 	CU_ASSERT_EQUAL(r, 0);
2306 
2307 	free(ibs_request.dependencies);
2308 }
2309 
amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t * ptr,int family)2310 static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2311 {
2312 	struct amdgpu_test_shader *shader;
2313 	int i, loop = 0x10000;
2314 
2315 	switch (family) {
2316 		case AMDGPU_FAMILY_AI:
2317 			shader = &memcpy_cs_hang_slow_ai;
2318 			break;
2319 		case AMDGPU_FAMILY_RV:
2320 			shader = &memcpy_cs_hang_slow_rv;
2321 			break;
2322 		default:
2323 			return -1;
2324 			break;
2325 	}
2326 
2327 	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2328 
2329 	for (i = 0; i < loop; i++)
2330 		memcpy(ptr + shader->header_length + shader->body_length * i,
2331 			shader->shader + shader->header_length,
2332 			shader->body_length * sizeof(uint32_t));
2333 
2334 	memcpy(ptr + shader->header_length + shader->body_length * loop,
2335 		shader->shader + shader->header_length + shader->body_length,
2336 		shader->foot_length * sizeof(uint32_t));
2337 
2338 	return 0;
2339 }
2340 
amdgpu_dispatch_load_cs_shader(uint8_t * ptr,int cs_type)2341 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2342 					   int cs_type)
2343 {
2344 	uint32_t shader_size;
2345 	const uint32_t *shader;
2346 
2347 	switch (cs_type) {
2348 		case CS_BUFFERCLEAR:
2349 			shader = bufferclear_cs_shader_gfx9;
2350 			shader_size = sizeof(bufferclear_cs_shader_gfx9);
2351 			break;
2352 		case CS_BUFFERCOPY:
2353 			shader = buffercopy_cs_shader_gfx9;
2354 			shader_size = sizeof(buffercopy_cs_shader_gfx9);
2355 			break;
2356 		case CS_HANG:
2357 			shader = memcpy_ps_hang;
2358 			shader_size = sizeof(memcpy_ps_hang);
2359 			break;
2360 		default:
2361 			return -1;
2362 			break;
2363 	}
2364 
2365 	memcpy(ptr, shader, shader_size);
2366 	return 0;
2367 }
2368 
amdgpu_dispatch_init(uint32_t * ptr,uint32_t ip_type)2369 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2370 {
2371 	int i = 0;
2372 
2373 	/* Write context control and load shadowing register if necessary */
2374 	if (ip_type == AMDGPU_HW_IP_GFX) {
2375 		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2376 		ptr[i++] = 0x80000000;
2377 		ptr[i++] = 0x80000000;
2378 	}
2379 
2380 	/* Issue commands to set default compute state. */
2381 	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2382 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2383 	ptr[i++] = 0x204;
2384 	i += 3;
2385 
2386 	/* clear mmCOMPUTE_TMPRING_SIZE */
2387 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2388 	ptr[i++] = 0x218;
2389 	ptr[i++] = 0;
2390 
2391 	return i;
2392 }
2393 
amdgpu_dispatch_write_cumask(uint32_t * ptr)2394 static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2395 {
2396 	int i = 0;
2397 
2398 	/*  Issue commands to set cu mask used in current dispatch */
2399 	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2400 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2401 	ptr[i++] = 0x216;
2402 	ptr[i++] = 0xffffffff;
2403 	ptr[i++] = 0xffffffff;
2404 	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2405 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2406 	ptr[i++] = 0x219;
2407 	ptr[i++] = 0xffffffff;
2408 	ptr[i++] = 0xffffffff;
2409 
2410 	return i;
2411 }
2412 
amdgpu_dispatch_write2hw(uint32_t * ptr,uint64_t shader_addr)2413 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2414 {
2415 	int i, j;
2416 
2417 	i = 0;
2418 
2419 	/* Writes shader state to HW */
2420 	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2421 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2422 	ptr[i++] = 0x20c;
2423 	ptr[i++] = (shader_addr >> 8);
2424 	ptr[i++] = (shader_addr >> 40);
2425 	/* write sh regs*/
2426 	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2427 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2428 		/* - Gfx9ShRegBase */
2429 		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2430 		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2431 	}
2432 
2433 	return i;
2434 }
2435 
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2436 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2437 					 uint32_t ip_type,
2438 					 uint32_t ring)
2439 {
2440 	amdgpu_context_handle context_handle;
2441 	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2442 	volatile unsigned char *ptr_dst;
2443 	void *ptr_shader;
2444 	uint32_t *ptr_cmd;
2445 	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2446 	amdgpu_va_handle va_dst, va_shader, va_cmd;
2447 	int i, r;
2448 	int bo_dst_size = 16384;
2449 	int bo_shader_size = 4096;
2450 	int bo_cmd_size = 4096;
2451 	struct amdgpu_cs_request ibs_request = {0};
2452 	struct amdgpu_cs_ib_info ib_info= {0};
2453 	amdgpu_bo_list_handle bo_list;
2454 	struct amdgpu_cs_fence fence_status = {0};
2455 	uint32_t expired;
2456 
2457 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2458 	CU_ASSERT_EQUAL(r, 0);
2459 
2460 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2461 					AMDGPU_GEM_DOMAIN_GTT, 0,
2462 					&bo_cmd, (void **)&ptr_cmd,
2463 					&mc_address_cmd, &va_cmd);
2464 	CU_ASSERT_EQUAL(r, 0);
2465 	memset(ptr_cmd, 0, bo_cmd_size);
2466 
2467 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2468 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2469 					&bo_shader, &ptr_shader,
2470 					&mc_address_shader, &va_shader);
2471 	CU_ASSERT_EQUAL(r, 0);
2472 	memset(ptr_shader, 0, bo_shader_size);
2473 
2474 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2475 	CU_ASSERT_EQUAL(r, 0);
2476 
2477 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2478 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2479 					&bo_dst, (void **)&ptr_dst,
2480 					&mc_address_dst, &va_dst);
2481 	CU_ASSERT_EQUAL(r, 0);
2482 
2483 	i = 0;
2484 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2485 
2486 	/*  Issue commands to set cu mask used in current dispatch */
2487 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2488 
2489 	/* Writes shader state to HW */
2490 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2491 
2492 	/* Write constant data */
2493 	/* Writes the UAV constant data to the SGPRs. */
2494 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2495 	ptr_cmd[i++] = 0x240;
2496 	ptr_cmd[i++] = mc_address_dst;
2497 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2498 	ptr_cmd[i++] = 0x400;
2499 	ptr_cmd[i++] = 0x74fac;
2500 
2501 	/* Sets a range of pixel shader constants */
2502 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2503 	ptr_cmd[i++] = 0x244;
2504 	ptr_cmd[i++] = 0x22222222;
2505 	ptr_cmd[i++] = 0x22222222;
2506 	ptr_cmd[i++] = 0x22222222;
2507 	ptr_cmd[i++] = 0x22222222;
2508 
2509 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2510 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2511 	ptr_cmd[i++] = 0x215;
2512 	ptr_cmd[i++] = 0;
2513 
2514 	/* dispatch direct command */
2515 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2516 	ptr_cmd[i++] = 0x10;
2517 	ptr_cmd[i++] = 1;
2518 	ptr_cmd[i++] = 1;
2519 	ptr_cmd[i++] = 1;
2520 
2521 	while (i & 7)
2522 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2523 
2524 	resources[0] = bo_dst;
2525 	resources[1] = bo_shader;
2526 	resources[2] = bo_cmd;
2527 	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2528 	CU_ASSERT_EQUAL(r, 0);
2529 
2530 	ib_info.ib_mc_address = mc_address_cmd;
2531 	ib_info.size = i;
2532 	ibs_request.ip_type = ip_type;
2533 	ibs_request.ring = ring;
2534 	ibs_request.resources = bo_list;
2535 	ibs_request.number_of_ibs = 1;
2536 	ibs_request.ibs = &ib_info;
2537 	ibs_request.fence_info.handle = NULL;
2538 
2539 	/* submit CS */
2540 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2541 	CU_ASSERT_EQUAL(r, 0);
2542 
2543 	r = amdgpu_bo_list_destroy(bo_list);
2544 	CU_ASSERT_EQUAL(r, 0);
2545 
2546 	fence_status.ip_type = ip_type;
2547 	fence_status.ip_instance = 0;
2548 	fence_status.ring = ring;
2549 	fence_status.context = context_handle;
2550 	fence_status.fence = ibs_request.seq_no;
2551 
2552 	/* wait for IB accomplished */
2553 	r = amdgpu_cs_query_fence_status(&fence_status,
2554 					 AMDGPU_TIMEOUT_INFINITE,
2555 					 0, &expired);
2556 	CU_ASSERT_EQUAL(r, 0);
2557 	CU_ASSERT_EQUAL(expired, true);
2558 
2559 	/* verify if memset test result meets with expected */
2560 	i = 0;
2561 	while(i < bo_dst_size) {
2562 		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2563 	}
2564 
2565 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2566 	CU_ASSERT_EQUAL(r, 0);
2567 
2568 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2569 	CU_ASSERT_EQUAL(r, 0);
2570 
2571 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2572 	CU_ASSERT_EQUAL(r, 0);
2573 
2574 	r = amdgpu_cs_ctx_free(context_handle);
2575 	CU_ASSERT_EQUAL(r, 0);
2576 }
2577 
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,int hang)2578 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2579 					uint32_t ip_type,
2580 					uint32_t ring,
2581 					int hang)
2582 {
2583 	amdgpu_context_handle context_handle;
2584 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2585 	volatile unsigned char *ptr_dst;
2586 	void *ptr_shader;
2587 	unsigned char *ptr_src;
2588 	uint32_t *ptr_cmd;
2589 	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2590 	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2591 	int i, r;
2592 	int bo_dst_size = 16384;
2593 	int bo_shader_size = 4096;
2594 	int bo_cmd_size = 4096;
2595 	struct amdgpu_cs_request ibs_request = {0};
2596 	struct amdgpu_cs_ib_info ib_info= {0};
2597 	uint32_t expired, hang_state, hangs;
2598 	enum cs_type cs_type;
2599 	amdgpu_bo_list_handle bo_list;
2600 	struct amdgpu_cs_fence fence_status = {0};
2601 
2602 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2603 	CU_ASSERT_EQUAL(r, 0);
2604 
2605 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2606 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2607 				    &bo_cmd, (void **)&ptr_cmd,
2608 				    &mc_address_cmd, &va_cmd);
2609 	CU_ASSERT_EQUAL(r, 0);
2610 	memset(ptr_cmd, 0, bo_cmd_size);
2611 
2612 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2613 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2614 					&bo_shader, &ptr_shader,
2615 					&mc_address_shader, &va_shader);
2616 	CU_ASSERT_EQUAL(r, 0);
2617 	memset(ptr_shader, 0, bo_shader_size);
2618 
2619 	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2620 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2621 	CU_ASSERT_EQUAL(r, 0);
2622 
2623 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2624 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2625 					&bo_src, (void **)&ptr_src,
2626 					&mc_address_src, &va_src);
2627 	CU_ASSERT_EQUAL(r, 0);
2628 
2629 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2630 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2631 					&bo_dst, (void **)&ptr_dst,
2632 					&mc_address_dst, &va_dst);
2633 	CU_ASSERT_EQUAL(r, 0);
2634 
2635 	memset(ptr_src, 0x55, bo_dst_size);
2636 
2637 	i = 0;
2638 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2639 
2640 	/*  Issue commands to set cu mask used in current dispatch */
2641 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2642 
2643 	/* Writes shader state to HW */
2644 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2645 
2646 	/* Write constant data */
2647 	/* Writes the texture resource constants data to the SGPRs */
2648 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2649 	ptr_cmd[i++] = 0x240;
2650 	ptr_cmd[i++] = mc_address_src;
2651 	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2652 	ptr_cmd[i++] = 0x400;
2653 	ptr_cmd[i++] = 0x74fac;
2654 
2655 	/* Writes the UAV constant data to the SGPRs. */
2656 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2657 	ptr_cmd[i++] = 0x244;
2658 	ptr_cmd[i++] = mc_address_dst;
2659 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2660 	ptr_cmd[i++] = 0x400;
2661 	ptr_cmd[i++] = 0x74fac;
2662 
2663 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2664 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2665 	ptr_cmd[i++] = 0x215;
2666 	ptr_cmd[i++] = 0;
2667 
2668 	/* dispatch direct command */
2669 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2670 	ptr_cmd[i++] = 0x10;
2671 	ptr_cmd[i++] = 1;
2672 	ptr_cmd[i++] = 1;
2673 	ptr_cmd[i++] = 1;
2674 
2675 	while (i & 7)
2676 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2677 
2678 	resources[0] = bo_shader;
2679 	resources[1] = bo_src;
2680 	resources[2] = bo_dst;
2681 	resources[3] = bo_cmd;
2682 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2683 	CU_ASSERT_EQUAL(r, 0);
2684 
2685 	ib_info.ib_mc_address = mc_address_cmd;
2686 	ib_info.size = i;
2687 	ibs_request.ip_type = ip_type;
2688 	ibs_request.ring = ring;
2689 	ibs_request.resources = bo_list;
2690 	ibs_request.number_of_ibs = 1;
2691 	ibs_request.ibs = &ib_info;
2692 	ibs_request.fence_info.handle = NULL;
2693 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2694 	CU_ASSERT_EQUAL(r, 0);
2695 
2696 	fence_status.ip_type = ip_type;
2697 	fence_status.ip_instance = 0;
2698 	fence_status.ring = ring;
2699 	fence_status.context = context_handle;
2700 	fence_status.fence = ibs_request.seq_no;
2701 
2702 	/* wait for IB accomplished */
2703 	r = amdgpu_cs_query_fence_status(&fence_status,
2704 					 AMDGPU_TIMEOUT_INFINITE,
2705 					 0, &expired);
2706 
2707 	if (!hang) {
2708 		CU_ASSERT_EQUAL(r, 0);
2709 		CU_ASSERT_EQUAL(expired, true);
2710 
2711 		/* verify if memcpy test result meets with expected */
2712 		i = 0;
2713 		while(i < bo_dst_size) {
2714 			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2715 			i++;
2716 		}
2717 	} else {
2718 		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2719 		CU_ASSERT_EQUAL(r, 0);
2720 		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2721 	}
2722 
2723 	r = amdgpu_bo_list_destroy(bo_list);
2724 	CU_ASSERT_EQUAL(r, 0);
2725 
2726 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2727 	CU_ASSERT_EQUAL(r, 0);
2728 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2729 	CU_ASSERT_EQUAL(r, 0);
2730 
2731 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2732 	CU_ASSERT_EQUAL(r, 0);
2733 
2734 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2735 	CU_ASSERT_EQUAL(r, 0);
2736 
2737 	r = amdgpu_cs_ctx_free(context_handle);
2738 	CU_ASSERT_EQUAL(r, 0);
2739 }
2740 
amdgpu_compute_dispatch_test(void)2741 static void amdgpu_compute_dispatch_test(void)
2742 {
2743 	int r;
2744 	struct drm_amdgpu_info_hw_ip info;
2745 	uint32_t ring_id;
2746 
2747 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2748 	CU_ASSERT_EQUAL(r, 0);
2749 	if (!info.available_rings)
2750 		printf("SKIP ... as there's no compute ring\n");
2751 
2752 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2753 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2754 		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2755 	}
2756 }
2757 
amdgpu_gfx_dispatch_test(void)2758 static void amdgpu_gfx_dispatch_test(void)
2759 {
2760 	int r;
2761 	struct drm_amdgpu_info_hw_ip info;
2762 	uint32_t ring_id;
2763 
2764 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2765 	CU_ASSERT_EQUAL(r, 0);
2766 	if (!info.available_rings)
2767 		printf("SKIP ... as there's no graphics ring\n");
2768 
2769 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2770 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2771 		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2772 	}
2773 }
2774 
amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2775 void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2776 {
2777 	int r;
2778 	struct drm_amdgpu_info_hw_ip info;
2779 	uint32_t ring_id;
2780 
2781 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2782 	CU_ASSERT_EQUAL(r, 0);
2783 	if (!info.available_rings)
2784 		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2785 
2786 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2787 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2788 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2789 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2790 	}
2791 }
2792 
amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2793 static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2794 						  uint32_t ip_type, uint32_t ring)
2795 {
2796 	amdgpu_context_handle context_handle;
2797 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2798 	volatile unsigned char *ptr_dst;
2799 	void *ptr_shader;
2800 	unsigned char *ptr_src;
2801 	uint32_t *ptr_cmd;
2802 	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2803 	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2804 	int i, r;
2805 	int bo_dst_size = 0x4000000;
2806 	int bo_shader_size = 0x400000;
2807 	int bo_cmd_size = 4096;
2808 	struct amdgpu_cs_request ibs_request = {0};
2809 	struct amdgpu_cs_ib_info ib_info= {0};
2810 	uint32_t hang_state, hangs, expired;
2811 	struct amdgpu_gpu_info gpu_info = {0};
2812 	amdgpu_bo_list_handle bo_list;
2813 	struct amdgpu_cs_fence fence_status = {0};
2814 
2815 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2816 	CU_ASSERT_EQUAL(r, 0);
2817 
2818 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2819 	CU_ASSERT_EQUAL(r, 0);
2820 
2821 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2822 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2823 				    &bo_cmd, (void **)&ptr_cmd,
2824 				    &mc_address_cmd, &va_cmd);
2825 	CU_ASSERT_EQUAL(r, 0);
2826 	memset(ptr_cmd, 0, bo_cmd_size);
2827 
2828 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2829 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2830 					&bo_shader, &ptr_shader,
2831 					&mc_address_shader, &va_shader);
2832 	CU_ASSERT_EQUAL(r, 0);
2833 	memset(ptr_shader, 0, bo_shader_size);
2834 
2835 	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2836 	CU_ASSERT_EQUAL(r, 0);
2837 
2838 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2839 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2840 					&bo_src, (void **)&ptr_src,
2841 					&mc_address_src, &va_src);
2842 	CU_ASSERT_EQUAL(r, 0);
2843 
2844 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2845 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2846 					&bo_dst, (void **)&ptr_dst,
2847 					&mc_address_dst, &va_dst);
2848 	CU_ASSERT_EQUAL(r, 0);
2849 
2850 	memset(ptr_src, 0x55, bo_dst_size);
2851 
2852 	i = 0;
2853 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2854 
2855 	/*  Issue commands to set cu mask used in current dispatch */
2856 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2857 
2858 	/* Writes shader state to HW */
2859 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2860 
2861 	/* Write constant data */
2862 	/* Writes the texture resource constants data to the SGPRs */
2863 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2864 	ptr_cmd[i++] = 0x240;
2865 	ptr_cmd[i++] = mc_address_src;
2866 	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2867 	ptr_cmd[i++] = 0x400000;
2868 	ptr_cmd[i++] = 0x74fac;
2869 
2870 	/* Writes the UAV constant data to the SGPRs. */
2871 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2872 	ptr_cmd[i++] = 0x244;
2873 	ptr_cmd[i++] = mc_address_dst;
2874 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2875 	ptr_cmd[i++] = 0x400000;
2876 	ptr_cmd[i++] = 0x74fac;
2877 
2878 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2879 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2880 	ptr_cmd[i++] = 0x215;
2881 	ptr_cmd[i++] = 0;
2882 
2883 	/* dispatch direct command */
2884 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2885 	ptr_cmd[i++] = 0x10000;
2886 	ptr_cmd[i++] = 1;
2887 	ptr_cmd[i++] = 1;
2888 	ptr_cmd[i++] = 1;
2889 
2890 	while (i & 7)
2891 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2892 
2893 	resources[0] = bo_shader;
2894 	resources[1] = bo_src;
2895 	resources[2] = bo_dst;
2896 	resources[3] = bo_cmd;
2897 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2898 	CU_ASSERT_EQUAL(r, 0);
2899 
2900 	ib_info.ib_mc_address = mc_address_cmd;
2901 	ib_info.size = i;
2902 	ibs_request.ip_type = ip_type;
2903 	ibs_request.ring = ring;
2904 	ibs_request.resources = bo_list;
2905 	ibs_request.number_of_ibs = 1;
2906 	ibs_request.ibs = &ib_info;
2907 	ibs_request.fence_info.handle = NULL;
2908 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2909 	CU_ASSERT_EQUAL(r, 0);
2910 
2911 	fence_status.ip_type = ip_type;
2912 	fence_status.ip_instance = 0;
2913 	fence_status.ring = ring;
2914 	fence_status.context = context_handle;
2915 	fence_status.fence = ibs_request.seq_no;
2916 
2917 	/* wait for IB accomplished */
2918 	r = amdgpu_cs_query_fence_status(&fence_status,
2919 					 AMDGPU_TIMEOUT_INFINITE,
2920 					 0, &expired);
2921 
2922 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2923 	CU_ASSERT_EQUAL(r, 0);
2924 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2925 
2926 	r = amdgpu_bo_list_destroy(bo_list);
2927 	CU_ASSERT_EQUAL(r, 0);
2928 
2929 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2930 	CU_ASSERT_EQUAL(r, 0);
2931 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2932 	CU_ASSERT_EQUAL(r, 0);
2933 
2934 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2935 	CU_ASSERT_EQUAL(r, 0);
2936 
2937 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2938 	CU_ASSERT_EQUAL(r, 0);
2939 
2940 	r = amdgpu_cs_ctx_free(context_handle);
2941 	CU_ASSERT_EQUAL(r, 0);
2942 }
2943 
amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2944 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2945 {
2946 	int r;
2947 	struct drm_amdgpu_info_hw_ip info;
2948 	uint32_t ring_id;
2949 
2950 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2951 	CU_ASSERT_EQUAL(r, 0);
2952 	if (!info.available_rings)
2953 		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2954 
2955 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2956 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2957 		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2958 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2959 	}
2960 }
2961 
amdgpu_draw_load_ps_shader_hang_slow(uint32_t * ptr,int family)2962 static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2963 {
2964 	struct amdgpu_test_shader *shader;
2965 	int i, loop = 0x40000;
2966 
2967 	switch (family) {
2968 		case AMDGPU_FAMILY_AI:
2969 		case AMDGPU_FAMILY_RV:
2970 			shader = &memcpy_ps_hang_slow_ai;
2971 			break;
2972 		default:
2973 			return -1;
2974 			break;
2975 	}
2976 
2977 	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2978 
2979 	for (i = 0; i < loop; i++)
2980 		memcpy(ptr + shader->header_length + shader->body_length * i,
2981 			shader->shader + shader->header_length,
2982 			shader->body_length * sizeof(uint32_t));
2983 
2984 	memcpy(ptr + shader->header_length + shader->body_length * loop,
2985 		shader->shader + shader->header_length + shader->body_length,
2986 		shader->foot_length * sizeof(uint32_t));
2987 
2988 	return 0;
2989 }
2990 
amdgpu_draw_load_ps_shader(uint8_t * ptr,int ps_type)2991 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
2992 {
2993 	int i;
2994 	uint32_t shader_offset= 256;
2995 	uint32_t mem_offset, patch_code_offset;
2996 	uint32_t shader_size, patchinfo_code_size;
2997 	const uint32_t *shader;
2998 	const uint32_t *patchinfo_code;
2999 	const uint32_t *patchcode_offset;
3000 
3001 	switch (ps_type) {
3002 		case PS_CONST:
3003 			shader = ps_const_shader_gfx9;
3004 			shader_size = sizeof(ps_const_shader_gfx9);
3005 			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3006 			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3007 			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3008 			break;
3009 		case PS_TEX:
3010 			shader = ps_tex_shader_gfx9;
3011 			shader_size = sizeof(ps_tex_shader_gfx9);
3012 			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3013 			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3014 			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3015 			break;
3016 		case PS_HANG:
3017 			shader = memcpy_ps_hang;
3018 			shader_size = sizeof(memcpy_ps_hang);
3019 
3020 			memcpy(ptr, shader, shader_size);
3021 			return 0;
3022 		default:
3023 			return -1;
3024 			break;
3025 	}
3026 
3027 	/* write main shader program */
3028 	for (i = 0 ; i < 10; i++) {
3029 		mem_offset = i * shader_offset;
3030 		memcpy(ptr + mem_offset, shader, shader_size);
3031 	}
3032 
3033 	/* overwrite patch codes */
3034 	for (i = 0 ; i < 10; i++) {
3035 		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3036 		patch_code_offset = i * patchinfo_code_size;
3037 		memcpy(ptr + mem_offset,
3038 			patchinfo_code + patch_code_offset,
3039 			patchinfo_code_size * sizeof(uint32_t));
3040 	}
3041 
3042 	return 0;
3043 }
3044 
3045 /* load RectPosTexFast_VS */
amdgpu_draw_load_vs_shader(uint8_t * ptr)3046 static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
3047 {
3048 	const uint32_t *shader;
3049 	uint32_t shader_size;
3050 
3051 	shader = vs_RectPosTexFast_shader_gfx9;
3052 	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3053 
3054 	memcpy(ptr, shader, shader_size);
3055 
3056 	return 0;
3057 }
3058 
amdgpu_draw_init(uint32_t * ptr)3059 static int amdgpu_draw_init(uint32_t *ptr)
3060 {
3061 	int i = 0;
3062 	const uint32_t *preamblecache_ptr;
3063 	uint32_t preamblecache_size;
3064 
3065 	/* Write context control and load shadowing register if necessary */
3066 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3067 	ptr[i++] = 0x80000000;
3068 	ptr[i++] = 0x80000000;
3069 
3070 	preamblecache_ptr = preamblecache_gfx9;
3071 	preamblecache_size = sizeof(preamblecache_gfx9);
3072 
3073 	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3074 	return i + preamblecache_size/sizeof(uint32_t);
3075 }
3076 
amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t * ptr,uint64_t dst_addr,int hang_slow)3077 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3078 							 uint64_t dst_addr,
3079 							 int hang_slow)
3080 {
3081 	int i = 0;
3082 
3083 	/* setup color buffer */
3084 	/* offset   reg
3085 	   0xA318   CB_COLOR0_BASE
3086 	   0xA319   CB_COLOR0_BASE_EXT
3087 	   0xA31A   CB_COLOR0_ATTRIB2
3088 	   0xA31B   CB_COLOR0_VIEW
3089 	   0xA31C   CB_COLOR0_INFO
3090 	   0xA31D   CB_COLOR0_ATTRIB
3091 	   0xA31E   CB_COLOR0_DCC_CONTROL
3092 	   0xA31F   CB_COLOR0_CMASK
3093 	   0xA320   CB_COLOR0_CMASK_BASE_EXT
3094 	   0xA321   CB_COLOR0_FMASK
3095 	   0xA322   CB_COLOR0_FMASK_BASE_EXT
3096 	   0xA323   CB_COLOR0_CLEAR_WORD0
3097 	   0xA324   CB_COLOR0_CLEAR_WORD1
3098 	   0xA325   CB_COLOR0_DCC_BASE
3099 	   0xA326   CB_COLOR0_DCC_BASE_EXT */
3100 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3101 	ptr[i++] = 0x318;
3102 	ptr[i++] = dst_addr >> 8;
3103 	ptr[i++] = dst_addr >> 40;
3104 	ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
3105 	ptr[i++] = 0;
3106 	ptr[i++] = 0x50438;
3107 	ptr[i++] = 0x10140000;
3108 	i += 9;
3109 
3110 	/* mmCB_MRT0_EPITCH */
3111 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3112 	ptr[i++] = 0x1e8;
3113 	ptr[i++] = hang_slow ? 0x7ff : 0x1f;
3114 
3115 	/* 0xA32B   CB_COLOR1_BASE */
3116 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3117 	ptr[i++] = 0x32b;
3118 	ptr[i++] = 0;
3119 
3120 	/* 0xA33A   CB_COLOR1_BASE */
3121 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3122 	ptr[i++] = 0x33a;
3123 	ptr[i++] = 0;
3124 
3125 	/* SPI_SHADER_COL_FORMAT */
3126 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3127 	ptr[i++] = 0x1c5;
3128 	ptr[i++] = 9;
3129 
3130 	/* Setup depth buffer */
3131 	/* mmDB_Z_INFO */
3132 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3133 	ptr[i++] = 0xe;
3134 	i += 2;
3135 
3136 	return i;
3137 }
3138 
amdgpu_draw_setup_and_write_drawblt_state(uint32_t * ptr,int hang_slow)3139 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
3140 {
3141 	int i = 0;
3142 	const uint32_t *cached_cmd_ptr;
3143 	uint32_t cached_cmd_size;
3144 
3145 	/* mmPA_SC_TILE_STEERING_OVERRIDE */
3146 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3147 	ptr[i++] = 0xd7;
3148 	ptr[i++] = 0;
3149 
3150 	ptr[i++] = 0xffff1000;
3151 	ptr[i++] = 0xc0021000;
3152 
3153 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3154 	ptr[i++] = 0xd7;
3155 	ptr[i++] = 1;
3156 
3157 	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3158 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3159 	ptr[i++] = 0x2fe;
3160 	i += 16;
3161 
3162 	/* mmPA_SC_CENTROID_PRIORITY_0 */
3163 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3164 	ptr[i++] = 0x2f5;
3165 	i += 2;
3166 
3167 	cached_cmd_ptr = cached_cmd_gfx9;
3168 	cached_cmd_size = sizeof(cached_cmd_gfx9);
3169 
3170 	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3171 	if (hang_slow)
3172 		*(ptr + i + 12) = 0x8000800;
3173 	i += cached_cmd_size/sizeof(uint32_t);
3174 
3175 	return i;
3176 }
3177 
amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr,int hang_slow)3178 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3179 						  int ps_type,
3180 						  uint64_t shader_addr,
3181 						  int hang_slow)
3182 {
3183 	int i = 0;
3184 
3185 	/* mmPA_CL_VS_OUT_CNTL */
3186 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3187 	ptr[i++] = 0x207;
3188 	ptr[i++] = 0;
3189 
3190 	/* mmSPI_SHADER_PGM_RSRC3_VS */
3191 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3192 	ptr[i++] = 0x46;
3193 	ptr[i++] = 0xffff;
3194 
3195 	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3196 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3197 	ptr[i++] = 0x48;
3198 	ptr[i++] = shader_addr >> 8;
3199 	ptr[i++] = shader_addr >> 40;
3200 
3201 	/* mmSPI_SHADER_PGM_RSRC1_VS */
3202 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3203 	ptr[i++] = 0x4a;
3204 	ptr[i++] = 0xc0081;
3205 	/* mmSPI_SHADER_PGM_RSRC2_VS */
3206 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3207 	ptr[i++] = 0x4b;
3208 	ptr[i++] = 0x18;
3209 
3210 	/* mmSPI_VS_OUT_CONFIG */
3211 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3212 	ptr[i++] = 0x1b1;
3213 	ptr[i++] = 2;
3214 
3215 	/* mmSPI_SHADER_POS_FORMAT */
3216 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3217 	ptr[i++] = 0x1c3;
3218 	ptr[i++] = 4;
3219 
3220 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3221 	ptr[i++] = 0x4c;
3222 	i += 2;
3223 	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3224 	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3225 
3226 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3227 	ptr[i++] = 0x50;
3228 	i += 2;
3229 	if (ps_type == PS_CONST) {
3230 		i += 2;
3231 	} else if (ps_type == PS_TEX) {
3232 		ptr[i++] = 0x3f800000;
3233 		ptr[i++] = 0x3f800000;
3234 	}
3235 
3236 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3237 	ptr[i++] = 0x54;
3238 	i += 4;
3239 
3240 	return i;
3241 }
3242 
amdgpu_draw_ps_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr)3243 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3244 				   int ps_type,
3245 				   uint64_t shader_addr)
3246 {
3247 	int i, j;
3248 	const uint32_t *sh_registers;
3249 	const uint32_t *context_registers;
3250 	uint32_t num_sh_reg, num_context_reg;
3251 
3252 	if (ps_type == PS_CONST) {
3253 		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3254 		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3255 		num_sh_reg = ps_num_sh_registers_gfx9;
3256 		num_context_reg = ps_num_context_registers_gfx9;
3257 	} else if (ps_type == PS_TEX) {
3258 		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3259 		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3260 		num_sh_reg = ps_num_sh_registers_gfx9;
3261 		num_context_reg = ps_num_context_registers_gfx9;
3262 	}
3263 
3264 	i = 0;
3265 
3266 	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
3267 	   0x2c08   SPI_SHADER_PGM_LO_PS
3268 	   0x2c09   SPI_SHADER_PGM_HI_PS */
3269 	shader_addr += 256 * 9;
3270 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3271 	ptr[i++] = 0x7;
3272 	ptr[i++] = 0xffff;
3273 	ptr[i++] = shader_addr >> 8;
3274 	ptr[i++] = shader_addr >> 40;
3275 
3276 	for (j = 0; j < num_sh_reg; j++) {
3277 		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3278 		ptr[i++] = sh_registers[j * 2] - 0x2c00;
3279 		ptr[i++] = sh_registers[j * 2 + 1];
3280 	}
3281 
3282 	for (j = 0; j < num_context_reg; j++) {
3283 		if (context_registers[j * 2] != 0xA1C5) {
3284 			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3285 			ptr[i++] = context_registers[j * 2] - 0xa000;
3286 			ptr[i++] = context_registers[j * 2 + 1];
3287 		}
3288 
3289 		if (context_registers[j * 2] == 0xA1B4) {
3290 			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3291 			ptr[i++] = 0x1b3;
3292 			ptr[i++] = 2;
3293 		}
3294 	}
3295 
3296 	return i;
3297 }
3298 
amdgpu_draw_draw(uint32_t * ptr)3299 static int amdgpu_draw_draw(uint32_t *ptr)
3300 {
3301 	int i = 0;
3302 
3303 	/* mmIA_MULTI_VGT_PARAM */
3304 	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3305 	ptr[i++] = 0x40000258;
3306 	ptr[i++] = 0xd00ff;
3307 
3308 	/* mmVGT_PRIMITIVE_TYPE */
3309 	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3310 	ptr[i++] = 0x10000242;
3311 	ptr[i++] = 0x11;
3312 
3313 	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3314 	ptr[i++] = 3;
3315 	ptr[i++] = 2;
3316 
3317 	return i;
3318 }
3319 
amdgpu_memset_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring_id)3320 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3321 			amdgpu_bo_handle bo_shader_ps,
3322 			amdgpu_bo_handle bo_shader_vs,
3323 			uint64_t mc_address_shader_ps,
3324 			uint64_t mc_address_shader_vs,
3325 			uint32_t ring_id)
3326 {
3327 	amdgpu_context_handle context_handle;
3328 	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3329 	volatile unsigned char *ptr_dst;
3330 	uint32_t *ptr_cmd;
3331 	uint64_t mc_address_dst, mc_address_cmd;
3332 	amdgpu_va_handle va_dst, va_cmd;
3333 	int i, r;
3334 	int bo_dst_size = 16384;
3335 	int bo_cmd_size = 4096;
3336 	struct amdgpu_cs_request ibs_request = {0};
3337 	struct amdgpu_cs_ib_info ib_info = {0};
3338 	struct amdgpu_cs_fence fence_status = {0};
3339 	uint32_t expired;
3340 	amdgpu_bo_list_handle bo_list;
3341 
3342 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3343 	CU_ASSERT_EQUAL(r, 0);
3344 
3345 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3346 					AMDGPU_GEM_DOMAIN_GTT, 0,
3347 					&bo_cmd, (void **)&ptr_cmd,
3348 					&mc_address_cmd, &va_cmd);
3349 	CU_ASSERT_EQUAL(r, 0);
3350 	memset(ptr_cmd, 0, bo_cmd_size);
3351 
3352 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3353 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3354 					&bo_dst, (void **)&ptr_dst,
3355 					&mc_address_dst, &va_dst);
3356 	CU_ASSERT_EQUAL(r, 0);
3357 
3358 	i = 0;
3359 	i += amdgpu_draw_init(ptr_cmd + i);
3360 
3361 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3362 
3363 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3364 
3365 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3366 
3367 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3368 
3369 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3370 	ptr_cmd[i++] = 0xc;
3371 	ptr_cmd[i++] = 0x33333333;
3372 	ptr_cmd[i++] = 0x33333333;
3373 	ptr_cmd[i++] = 0x33333333;
3374 	ptr_cmd[i++] = 0x33333333;
3375 
3376 	i += amdgpu_draw_draw(ptr_cmd + i);
3377 
3378 	while (i & 7)
3379 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3380 
3381 	resources[0] = bo_dst;
3382 	resources[1] = bo_shader_ps;
3383 	resources[2] = bo_shader_vs;
3384 	resources[3] = bo_cmd;
3385 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3386 	CU_ASSERT_EQUAL(r, 0);
3387 
3388 	ib_info.ib_mc_address = mc_address_cmd;
3389 	ib_info.size = i;
3390 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3391 	ibs_request.ring = ring_id;
3392 	ibs_request.resources = bo_list;
3393 	ibs_request.number_of_ibs = 1;
3394 	ibs_request.ibs = &ib_info;
3395 	ibs_request.fence_info.handle = NULL;
3396 
3397 	/* submit CS */
3398 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3399 	CU_ASSERT_EQUAL(r, 0);
3400 
3401 	r = amdgpu_bo_list_destroy(bo_list);
3402 	CU_ASSERT_EQUAL(r, 0);
3403 
3404 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3405 	fence_status.ip_instance = 0;
3406 	fence_status.ring = ring_id;
3407 	fence_status.context = context_handle;
3408 	fence_status.fence = ibs_request.seq_no;
3409 
3410 	/* wait for IB accomplished */
3411 	r = amdgpu_cs_query_fence_status(&fence_status,
3412 					 AMDGPU_TIMEOUT_INFINITE,
3413 					 0, &expired);
3414 	CU_ASSERT_EQUAL(r, 0);
3415 	CU_ASSERT_EQUAL(expired, true);
3416 
3417 	/* verify if memset test result meets with expected */
3418 	i = 0;
3419 	while(i < bo_dst_size) {
3420 		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3421 	}
3422 
3423 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3424 	CU_ASSERT_EQUAL(r, 0);
3425 
3426 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3427 	CU_ASSERT_EQUAL(r, 0);
3428 
3429 	r = amdgpu_cs_ctx_free(context_handle);
3430 	CU_ASSERT_EQUAL(r, 0);
3431 }
3432 
amdgpu_memset_draw_test(amdgpu_device_handle device_handle,uint32_t ring)3433 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3434 				    uint32_t ring)
3435 {
3436 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3437 	void *ptr_shader_ps;
3438 	void *ptr_shader_vs;
3439 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3440 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3441 	int r;
3442 	int bo_shader_size = 4096;
3443 
3444 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3445 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3446 					&bo_shader_ps, &ptr_shader_ps,
3447 					&mc_address_shader_ps, &va_shader_ps);
3448 	CU_ASSERT_EQUAL(r, 0);
3449 	memset(ptr_shader_ps, 0, bo_shader_size);
3450 
3451 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3452 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3453 					&bo_shader_vs, &ptr_shader_vs,
3454 					&mc_address_shader_vs, &va_shader_vs);
3455 	CU_ASSERT_EQUAL(r, 0);
3456 	memset(ptr_shader_vs, 0, bo_shader_size);
3457 
3458 	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3459 	CU_ASSERT_EQUAL(r, 0);
3460 
3461 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3462 	CU_ASSERT_EQUAL(r, 0);
3463 
3464 	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3465 			mc_address_shader_ps, mc_address_shader_vs, ring);
3466 
3467 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3468 	CU_ASSERT_EQUAL(r, 0);
3469 
3470 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3471 	CU_ASSERT_EQUAL(r, 0);
3472 }
3473 
amdgpu_memcpy_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring,int hang)3474 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3475 			       amdgpu_bo_handle bo_shader_ps,
3476 			       amdgpu_bo_handle bo_shader_vs,
3477 			       uint64_t mc_address_shader_ps,
3478 			       uint64_t mc_address_shader_vs,
3479 			       uint32_t ring, int hang)
3480 {
3481 	amdgpu_context_handle context_handle;
3482 	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3483 	volatile unsigned char *ptr_dst;
3484 	unsigned char *ptr_src;
3485 	uint32_t *ptr_cmd;
3486 	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3487 	amdgpu_va_handle va_dst, va_src, va_cmd;
3488 	int i, r;
3489 	int bo_size = 16384;
3490 	int bo_cmd_size = 4096;
3491 	struct amdgpu_cs_request ibs_request = {0};
3492 	struct amdgpu_cs_ib_info ib_info= {0};
3493 	uint32_t hang_state, hangs;
3494 	uint32_t expired;
3495 	amdgpu_bo_list_handle bo_list;
3496 	struct amdgpu_cs_fence fence_status = {0};
3497 
3498 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3499 	CU_ASSERT_EQUAL(r, 0);
3500 
3501 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3502 				    AMDGPU_GEM_DOMAIN_GTT, 0,
3503 				    &bo_cmd, (void **)&ptr_cmd,
3504 				    &mc_address_cmd, &va_cmd);
3505 	CU_ASSERT_EQUAL(r, 0);
3506 	memset(ptr_cmd, 0, bo_cmd_size);
3507 
3508 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3509 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3510 					&bo_src, (void **)&ptr_src,
3511 					&mc_address_src, &va_src);
3512 	CU_ASSERT_EQUAL(r, 0);
3513 
3514 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3515 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3516 					&bo_dst, (void **)&ptr_dst,
3517 					&mc_address_dst, &va_dst);
3518 	CU_ASSERT_EQUAL(r, 0);
3519 
3520 	memset(ptr_src, 0x55, bo_size);
3521 
3522 	i = 0;
3523 	i += amdgpu_draw_init(ptr_cmd + i);
3524 
3525 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3526 
3527 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3528 
3529 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3530 
3531 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3532 
3533 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3534 	ptr_cmd[i++] = 0xc;
3535 	ptr_cmd[i++] = mc_address_src >> 8;
3536 	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3537 	ptr_cmd[i++] = 0x7c01f;
3538 	ptr_cmd[i++] = 0x90500fac;
3539 	ptr_cmd[i++] = 0x3e000;
3540 	i += 3;
3541 
3542 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3543 	ptr_cmd[i++] = 0x14;
3544 	ptr_cmd[i++] = 0x92;
3545 	i += 3;
3546 
3547 	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3548 	ptr_cmd[i++] = 0x191;
3549 	ptr_cmd[i++] = 0;
3550 
3551 	i += amdgpu_draw_draw(ptr_cmd + i);
3552 
3553 	while (i & 7)
3554 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3555 
3556 	resources[0] = bo_dst;
3557 	resources[1] = bo_src;
3558 	resources[2] = bo_shader_ps;
3559 	resources[3] = bo_shader_vs;
3560 	resources[4] = bo_cmd;
3561 	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3562 	CU_ASSERT_EQUAL(r, 0);
3563 
3564 	ib_info.ib_mc_address = mc_address_cmd;
3565 	ib_info.size = i;
3566 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3567 	ibs_request.ring = ring;
3568 	ibs_request.resources = bo_list;
3569 	ibs_request.number_of_ibs = 1;
3570 	ibs_request.ibs = &ib_info;
3571 	ibs_request.fence_info.handle = NULL;
3572 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3573 	CU_ASSERT_EQUAL(r, 0);
3574 
3575 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3576 	fence_status.ip_instance = 0;
3577 	fence_status.ring = ring;
3578 	fence_status.context = context_handle;
3579 	fence_status.fence = ibs_request.seq_no;
3580 
3581 	/* wait for IB accomplished */
3582 	r = amdgpu_cs_query_fence_status(&fence_status,
3583 					 AMDGPU_TIMEOUT_INFINITE,
3584 					 0, &expired);
3585 	if (!hang) {
3586 		CU_ASSERT_EQUAL(r, 0);
3587 		CU_ASSERT_EQUAL(expired, true);
3588 
3589 		/* verify if memcpy test result meets with expected */
3590 		i = 0;
3591 		while(i < bo_size) {
3592 			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3593 			i++;
3594 		}
3595 	} else {
3596 		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3597 		CU_ASSERT_EQUAL(r, 0);
3598 		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3599 	}
3600 
3601 	r = amdgpu_bo_list_destroy(bo_list);
3602 	CU_ASSERT_EQUAL(r, 0);
3603 
3604 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3605 	CU_ASSERT_EQUAL(r, 0);
3606 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3607 	CU_ASSERT_EQUAL(r, 0);
3608 
3609 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3610 	CU_ASSERT_EQUAL(r, 0);
3611 
3612 	r = amdgpu_cs_ctx_free(context_handle);
3613 	CU_ASSERT_EQUAL(r, 0);
3614 }
3615 
amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle,uint32_t ring,int hang)3616 void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3617 			     int hang)
3618 {
3619 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3620 	void *ptr_shader_ps;
3621 	void *ptr_shader_vs;
3622 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3623 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3624 	int bo_shader_size = 4096;
3625 	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3626 	int r;
3627 
3628 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3629 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3630 					&bo_shader_ps, &ptr_shader_ps,
3631 					&mc_address_shader_ps, &va_shader_ps);
3632 	CU_ASSERT_EQUAL(r, 0);
3633 	memset(ptr_shader_ps, 0, bo_shader_size);
3634 
3635 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3636 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3637 					&bo_shader_vs, &ptr_shader_vs,
3638 					&mc_address_shader_vs, &va_shader_vs);
3639 	CU_ASSERT_EQUAL(r, 0);
3640 	memset(ptr_shader_vs, 0, bo_shader_size);
3641 
3642 	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3643 	CU_ASSERT_EQUAL(r, 0);
3644 
3645 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3646 	CU_ASSERT_EQUAL(r, 0);
3647 
3648 	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3649 			mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3650 
3651 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3652 	CU_ASSERT_EQUAL(r, 0);
3653 
3654 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3655 	CU_ASSERT_EQUAL(r, 0);
3656 }
3657 
amdgpu_draw_test(void)3658 static void amdgpu_draw_test(void)
3659 {
3660 	int r;
3661 	struct drm_amdgpu_info_hw_ip info;
3662 	uint32_t ring_id;
3663 
3664 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3665 	CU_ASSERT_EQUAL(r, 0);
3666 	if (!info.available_rings)
3667 		printf("SKIP ... as there's no graphics ring\n");
3668 
3669 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3670 		amdgpu_memset_draw_test(device_handle, ring_id);
3671 		amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3672 	}
3673 }
3674 
amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ring)3675 void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3676 {
3677 	amdgpu_context_handle context_handle;
3678 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3679 	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3680 	void *ptr_shader_ps;
3681 	void *ptr_shader_vs;
3682 	volatile unsigned char *ptr_dst;
3683 	unsigned char *ptr_src;
3684 	uint32_t *ptr_cmd;
3685 	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3686 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3687 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3688 	amdgpu_va_handle va_dst, va_src, va_cmd;
3689 	struct amdgpu_gpu_info gpu_info = {0};
3690 	int i, r;
3691 	int bo_size = 0x4000000;
3692 	int bo_shader_ps_size = 0x400000;
3693 	int bo_shader_vs_size = 4096;
3694 	int bo_cmd_size = 4096;
3695 	struct amdgpu_cs_request ibs_request = {0};
3696 	struct amdgpu_cs_ib_info ib_info= {0};
3697 	uint32_t hang_state, hangs, expired;
3698 	amdgpu_bo_list_handle bo_list;
3699 	struct amdgpu_cs_fence fence_status = {0};
3700 
3701 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3702 	CU_ASSERT_EQUAL(r, 0);
3703 
3704 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3705 	CU_ASSERT_EQUAL(r, 0);
3706 
3707 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3708 				    AMDGPU_GEM_DOMAIN_GTT, 0,
3709 				    &bo_cmd, (void **)&ptr_cmd,
3710 				    &mc_address_cmd, &va_cmd);
3711 	CU_ASSERT_EQUAL(r, 0);
3712 	memset(ptr_cmd, 0, bo_cmd_size);
3713 
3714 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3715 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3716 					&bo_shader_ps, &ptr_shader_ps,
3717 					&mc_address_shader_ps, &va_shader_ps);
3718 	CU_ASSERT_EQUAL(r, 0);
3719 	memset(ptr_shader_ps, 0, bo_shader_ps_size);
3720 
3721 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3722 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3723 					&bo_shader_vs, &ptr_shader_vs,
3724 					&mc_address_shader_vs, &va_shader_vs);
3725 	CU_ASSERT_EQUAL(r, 0);
3726 	memset(ptr_shader_vs, 0, bo_shader_vs_size);
3727 
3728 	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3729 	CU_ASSERT_EQUAL(r, 0);
3730 
3731 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3732 	CU_ASSERT_EQUAL(r, 0);
3733 
3734 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3735 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3736 					&bo_src, (void **)&ptr_src,
3737 					&mc_address_src, &va_src);
3738 	CU_ASSERT_EQUAL(r, 0);
3739 
3740 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3741 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3742 					&bo_dst, (void **)&ptr_dst,
3743 					&mc_address_dst, &va_dst);
3744 	CU_ASSERT_EQUAL(r, 0);
3745 
3746 	memset(ptr_src, 0x55, bo_size);
3747 
3748 	i = 0;
3749 	i += amdgpu_draw_init(ptr_cmd + i);
3750 
3751 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3752 
3753 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3754 
3755 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3756 							mc_address_shader_vs, 1);
3757 
3758 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3759 
3760 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3761 	ptr_cmd[i++] = 0xc;
3762 	ptr_cmd[i++] = mc_address_src >> 8;
3763 	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3764 	ptr_cmd[i++] = 0x1ffc7ff;
3765 	ptr_cmd[i++] = 0x90500fac;
3766 	ptr_cmd[i++] = 0xffe000;
3767 	i += 3;
3768 
3769 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3770 	ptr_cmd[i++] = 0x14;
3771 	ptr_cmd[i++] = 0x92;
3772 	i += 3;
3773 
3774 	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3775 	ptr_cmd[i++] = 0x191;
3776 	ptr_cmd[i++] = 0;
3777 
3778 	i += amdgpu_draw_draw(ptr_cmd + i);
3779 
3780 	while (i & 7)
3781 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3782 
3783 	resources[0] = bo_dst;
3784 	resources[1] = bo_src;
3785 	resources[2] = bo_shader_ps;
3786 	resources[3] = bo_shader_vs;
3787 	resources[4] = bo_cmd;
3788 	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3789 	CU_ASSERT_EQUAL(r, 0);
3790 
3791 	ib_info.ib_mc_address = mc_address_cmd;
3792 	ib_info.size = i;
3793 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3794 	ibs_request.ring = ring;
3795 	ibs_request.resources = bo_list;
3796 	ibs_request.number_of_ibs = 1;
3797 	ibs_request.ibs = &ib_info;
3798 	ibs_request.fence_info.handle = NULL;
3799 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3800 	CU_ASSERT_EQUAL(r, 0);
3801 
3802 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3803 	fence_status.ip_instance = 0;
3804 	fence_status.ring = ring;
3805 	fence_status.context = context_handle;
3806 	fence_status.fence = ibs_request.seq_no;
3807 
3808 	/* wait for IB accomplished */
3809 	r = amdgpu_cs_query_fence_status(&fence_status,
3810 					 AMDGPU_TIMEOUT_INFINITE,
3811 					 0, &expired);
3812 
3813 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3814 	CU_ASSERT_EQUAL(r, 0);
3815 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3816 
3817 	r = amdgpu_bo_list_destroy(bo_list);
3818 	CU_ASSERT_EQUAL(r, 0);
3819 
3820 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3821 	CU_ASSERT_EQUAL(r, 0);
3822 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3823 	CU_ASSERT_EQUAL(r, 0);
3824 
3825 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3826 	CU_ASSERT_EQUAL(r, 0);
3827 
3828 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3829 	CU_ASSERT_EQUAL(r, 0);
3830 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3831 	CU_ASSERT_EQUAL(r, 0);
3832 
3833 	r = amdgpu_cs_ctx_free(context_handle);
3834 	CU_ASSERT_EQUAL(r, 0);
3835 }
3836 
amdgpu_gpu_reset_test(void)3837 static void amdgpu_gpu_reset_test(void)
3838 {
3839 	int r;
3840 	char debugfs_path[256], tmp[10];
3841 	int fd;
3842 	struct stat sbuf;
3843 	amdgpu_context_handle context_handle;
3844 	uint32_t hang_state, hangs;
3845 
3846 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3847 	CU_ASSERT_EQUAL(r, 0);
3848 
3849 	r = fstat(drm_amdgpu[0], &sbuf);
3850 	CU_ASSERT_EQUAL(r, 0);
3851 
3852 	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3853 	fd = open(debugfs_path, O_RDONLY);
3854 	CU_ASSERT(fd >= 0);
3855 
3856 	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3857 	CU_ASSERT(r > 0);
3858 
3859 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3860 	CU_ASSERT_EQUAL(r, 0);
3861 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3862 
3863 	close(fd);
3864 	r = amdgpu_cs_ctx_free(context_handle);
3865 	CU_ASSERT_EQUAL(r, 0);
3866 
3867 	amdgpu_compute_dispatch_test();
3868 	amdgpu_gfx_dispatch_test();
3869 }
3870