1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #ifdef HAVE_ALLOCA_H
32 # include <alloca.h>
33 #endif
34 
35 #include "CUnit/Basic.h"
36 
37 #include "amdgpu_test.h"
38 #include "amdgpu_drm.h"
39 
40 static  amdgpu_device_handle device_handle;
41 static  uint32_t  major_version;
42 static  uint32_t  minor_version;
43 
44 static void amdgpu_query_info_test(void);
45 static void amdgpu_memory_alloc(void);
46 static void amdgpu_command_submission_gfx(void);
47 static void amdgpu_command_submission_compute(void);
48 static void amdgpu_command_submission_sdma(void);
49 static void amdgpu_userptr_test(void);
50 
51 CU_TestInfo basic_tests[] = {
52 	{ "Query Info Test",  amdgpu_query_info_test },
53 	{ "Memory alloc Test",  amdgpu_memory_alloc },
54 	{ "Userptr Test",  amdgpu_userptr_test },
55 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
56 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
57 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
58 	CU_TEST_INFO_NULL,
59 };
60 #define BUFFER_SIZE (8 * 1024)
61 #define SDMA_PKT_HEADER_op_offset 0
62 #define SDMA_PKT_HEADER_op_mask   0x000000FF
63 #define SDMA_PKT_HEADER_op_shift  0
64 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
65 #define SDMA_OPCODE_CONSTANT_FILL  11
66 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
67 	/* 0 = byte fill
68 	 * 2 = DW fill
69 	 */
70 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
71 					(((sub_op) & 0xFF) << 8) |	\
72 					(((op) & 0xFF) << 0))
73 #define	SDMA_OPCODE_WRITE				  2
74 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
75 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
76 
77 #define	SDMA_OPCODE_COPY				  1
78 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
79 
suite_basic_tests_init(void)80 int suite_basic_tests_init(void)
81 {
82 	int r;
83 
84 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
85 				   &minor_version, &device_handle);
86 
87 	if (r == 0)
88 		return CUE_SUCCESS;
89 	else
90 		return CUE_SINIT_FAILED;
91 }
92 
suite_basic_tests_clean(void)93 int suite_basic_tests_clean(void)
94 {
95 	int r = amdgpu_device_deinitialize(device_handle);
96 
97 	if (r == 0)
98 		return CUE_SUCCESS;
99 	else
100 		return CUE_SCLEAN_FAILED;
101 }
102 
amdgpu_query_info_test(void)103 static void amdgpu_query_info_test(void)
104 {
105 	struct amdgpu_gpu_info gpu_info = {0};
106 	uint32_t version, feature;
107 	int r;
108 
109 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
110 	CU_ASSERT_EQUAL(r, 0);
111 
112 	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
113 					  0, &version, &feature);
114 	CU_ASSERT_EQUAL(r, 0);
115 }
116 
amdgpu_memory_alloc(void)117 static void amdgpu_memory_alloc(void)
118 {
119 	amdgpu_bo_handle bo;
120 	amdgpu_va_handle va_handle;
121 	uint64_t bo_mc;
122 	int r;
123 
124 	/* Test visible VRAM */
125 	bo = gpu_mem_alloc(device_handle,
126 			4096, 4096,
127 			AMDGPU_GEM_DOMAIN_VRAM,
128 			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
129 			&bo_mc, &va_handle);
130 
131 	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
132 	CU_ASSERT_EQUAL(r, 0);
133 
134 	/* Test invisible VRAM */
135 	bo = gpu_mem_alloc(device_handle,
136 			4096, 4096,
137 			AMDGPU_GEM_DOMAIN_VRAM,
138 			AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
139 			&bo_mc, &va_handle);
140 
141 	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
142 	CU_ASSERT_EQUAL(r, 0);
143 
144 	/* Test GART Cacheable */
145 	bo = gpu_mem_alloc(device_handle,
146 			4096, 4096,
147 			AMDGPU_GEM_DOMAIN_GTT,
148 			0, &bo_mc, &va_handle);
149 
150 	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
151 	CU_ASSERT_EQUAL(r, 0);
152 
153 	/* Test GART USWC */
154 	bo = gpu_mem_alloc(device_handle,
155 			4096, 4096,
156 			AMDGPU_GEM_DOMAIN_GTT,
157 			AMDGPU_GEM_CREATE_CPU_GTT_USWC,
158 			&bo_mc, &va_handle);
159 
160 	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
161 	CU_ASSERT_EQUAL(r, 0);
162 }
163 
amdgpu_command_submission_gfx_separate_ibs(void)164 static void amdgpu_command_submission_gfx_separate_ibs(void)
165 {
166 	amdgpu_context_handle context_handle;
167 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
168 	void *ib_result_cpu, *ib_result_ce_cpu;
169 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
170 	struct amdgpu_cs_request ibs_request = {0};
171 	struct amdgpu_cs_ib_info ib_info[2];
172 	struct amdgpu_cs_fence fence_status = {0};
173 	uint32_t *ptr;
174 	uint32_t expired;
175 	amdgpu_bo_list_handle bo_list;
176 	amdgpu_va_handle va_handle, va_handle_ce;
177 	int r;
178 
179 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
180 	CU_ASSERT_EQUAL(r, 0);
181 
182 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
183 				    AMDGPU_GEM_DOMAIN_GTT, 0,
184 				    &ib_result_handle, &ib_result_cpu,
185 				    &ib_result_mc_address, &va_handle);
186 	CU_ASSERT_EQUAL(r, 0);
187 
188 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
189 				    AMDGPU_GEM_DOMAIN_GTT, 0,
190 				    &ib_result_ce_handle, &ib_result_ce_cpu,
191 				    &ib_result_ce_mc_address, &va_handle_ce);
192 	CU_ASSERT_EQUAL(r, 0);
193 
194 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
195 			       ib_result_ce_handle, &bo_list);
196 	CU_ASSERT_EQUAL(r, 0);
197 
198 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
199 
200 	/* IT_SET_CE_DE_COUNTERS */
201 	ptr = ib_result_ce_cpu;
202 	ptr[0] = 0xc0008900;
203 	ptr[1] = 0;
204 	ptr[2] = 0xc0008400;
205 	ptr[3] = 1;
206 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
207 	ib_info[0].size = 4;
208 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
209 
210 	/* IT_WAIT_ON_CE_COUNTER */
211 	ptr = ib_result_cpu;
212 	ptr[0] = 0xc0008600;
213 	ptr[1] = 0x00000001;
214 	ib_info[1].ib_mc_address = ib_result_mc_address;
215 	ib_info[1].size = 2;
216 
217 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
218 	ibs_request.number_of_ibs = 2;
219 	ibs_request.ibs = ib_info;
220 	ibs_request.resources = bo_list;
221 	ibs_request.fence_info.handle = NULL;
222 
223 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
224 
225 	CU_ASSERT_EQUAL(r, 0);
226 
227 	fence_status.context = context_handle;
228 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
229 	fence_status.fence = ibs_request.seq_no;
230 
231 	r = amdgpu_cs_query_fence_status(&fence_status,
232 					 AMDGPU_TIMEOUT_INFINITE,
233 					 0, &expired);
234 	CU_ASSERT_EQUAL(r, 0);
235 
236 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
237 				     ib_result_mc_address, 4096);
238 	CU_ASSERT_EQUAL(r, 0);
239 
240 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
241 				     ib_result_ce_mc_address, 4096);
242 	CU_ASSERT_EQUAL(r, 0);
243 
244 	r = amdgpu_bo_list_destroy(bo_list);
245 	CU_ASSERT_EQUAL(r, 0);
246 
247 	r = amdgpu_cs_ctx_free(context_handle);
248 	CU_ASSERT_EQUAL(r, 0);
249 
250 }
251 
amdgpu_command_submission_gfx_shared_ib(void)252 static void amdgpu_command_submission_gfx_shared_ib(void)
253 {
254 	amdgpu_context_handle context_handle;
255 	amdgpu_bo_handle ib_result_handle;
256 	void *ib_result_cpu;
257 	uint64_t ib_result_mc_address;
258 	struct amdgpu_cs_request ibs_request = {0};
259 	struct amdgpu_cs_ib_info ib_info[2];
260 	struct amdgpu_cs_fence fence_status = {0};
261 	uint32_t *ptr;
262 	uint32_t expired;
263 	amdgpu_bo_list_handle bo_list;
264 	amdgpu_va_handle va_handle;
265 	int r;
266 
267 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
268 	CU_ASSERT_EQUAL(r, 0);
269 
270 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
271 				    AMDGPU_GEM_DOMAIN_GTT, 0,
272 				    &ib_result_handle, &ib_result_cpu,
273 				    &ib_result_mc_address, &va_handle);
274 	CU_ASSERT_EQUAL(r, 0);
275 
276 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
277 			       &bo_list);
278 	CU_ASSERT_EQUAL(r, 0);
279 
280 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
281 
282 	/* IT_SET_CE_DE_COUNTERS */
283 	ptr = ib_result_cpu;
284 	ptr[0] = 0xc0008900;
285 	ptr[1] = 0;
286 	ptr[2] = 0xc0008400;
287 	ptr[3] = 1;
288 	ib_info[0].ib_mc_address = ib_result_mc_address;
289 	ib_info[0].size = 4;
290 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
291 
292 	ptr = (uint32_t *)ib_result_cpu + 4;
293 	ptr[0] = 0xc0008600;
294 	ptr[1] = 0x00000001;
295 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
296 	ib_info[1].size = 2;
297 
298 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
299 	ibs_request.number_of_ibs = 2;
300 	ibs_request.ibs = ib_info;
301 	ibs_request.resources = bo_list;
302 	ibs_request.fence_info.handle = NULL;
303 
304 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
305 
306 	CU_ASSERT_EQUAL(r, 0);
307 
308 	fence_status.context = context_handle;
309 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
310 	fence_status.fence = ibs_request.seq_no;
311 
312 	r = amdgpu_cs_query_fence_status(&fence_status,
313 					 AMDGPU_TIMEOUT_INFINITE,
314 					 0, &expired);
315 	CU_ASSERT_EQUAL(r, 0);
316 
317 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
318 				     ib_result_mc_address, 4096);
319 	CU_ASSERT_EQUAL(r, 0);
320 
321 	r = amdgpu_bo_list_destroy(bo_list);
322 	CU_ASSERT_EQUAL(r, 0);
323 
324 	r = amdgpu_cs_ctx_free(context_handle);
325 	CU_ASSERT_EQUAL(r, 0);
326 }
327 
amdgpu_command_submission_gfx(void)328 static void amdgpu_command_submission_gfx(void)
329 {
330 	/* separate IB buffers for multi-IB submission */
331 	amdgpu_command_submission_gfx_separate_ibs();
332 	/* shared IB buffer for multi-IB submission */
333 	amdgpu_command_submission_gfx_shared_ib();
334 }
335 
amdgpu_command_submission_compute(void)336 static void amdgpu_command_submission_compute(void)
337 {
338 	amdgpu_context_handle context_handle;
339 	amdgpu_bo_handle ib_result_handle;
340 	void *ib_result_cpu;
341 	uint64_t ib_result_mc_address;
342 	struct amdgpu_cs_request ibs_request;
343 	struct amdgpu_cs_ib_info ib_info;
344 	struct amdgpu_cs_fence fence_status;
345 	uint32_t *ptr;
346 	uint32_t expired;
347 	int i, r, instance;
348 	amdgpu_bo_list_handle bo_list;
349 	amdgpu_va_handle va_handle;
350 
351 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
352 	CU_ASSERT_EQUAL(r, 0);
353 
354 	for (instance = 0; instance < 8; instance++) {
355 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
356 					    AMDGPU_GEM_DOMAIN_GTT, 0,
357 					    &ib_result_handle, &ib_result_cpu,
358 					    &ib_result_mc_address, &va_handle);
359 		CU_ASSERT_EQUAL(r, 0);
360 
361 		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
362 				       &bo_list);
363 		CU_ASSERT_EQUAL(r, 0);
364 
365 		ptr = ib_result_cpu;
366 		for (i = 0; i < 16; ++i)
367 			ptr[i] = 0xffff1000;
368 
369 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
370 		ib_info.ib_mc_address = ib_result_mc_address;
371 		ib_info.size = 16;
372 
373 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
374 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
375 		ibs_request.ring = instance;
376 		ibs_request.number_of_ibs = 1;
377 		ibs_request.ibs = &ib_info;
378 		ibs_request.resources = bo_list;
379 		ibs_request.fence_info.handle = NULL;
380 
381 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
382 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
383 		CU_ASSERT_EQUAL(r, 0);
384 
385 		fence_status.context = context_handle;
386 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
387 		fence_status.ring = instance;
388 		fence_status.fence = ibs_request.seq_no;
389 
390 		r = amdgpu_cs_query_fence_status(&fence_status,
391 						 AMDGPU_TIMEOUT_INFINITE,
392 						 0, &expired);
393 		CU_ASSERT_EQUAL(r, 0);
394 
395 		r = amdgpu_bo_list_destroy(bo_list);
396 		CU_ASSERT_EQUAL(r, 0);
397 
398 		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
399 					     ib_result_mc_address, 4096);
400 		CU_ASSERT_EQUAL(r, 0);
401 	}
402 
403 	r = amdgpu_cs_ctx_free(context_handle);
404 	CU_ASSERT_EQUAL(r, 0);
405 }
406 
407 /*
408  * caller need create/release:
409  * pm4_src, resources, ib_info, and ibs_request
410  * submit command stream described in ibs_request and wait for this IB accomplished
411  */
amdgpu_sdma_test_exec_cs(amdgpu_context_handle context_handle,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)412 static void amdgpu_sdma_test_exec_cs(amdgpu_context_handle context_handle,
413 				 int instance, int pm4_dw, uint32_t *pm4_src,
414 				 int res_cnt, amdgpu_bo_handle *resources,
415 				 struct amdgpu_cs_ib_info *ib_info,
416 				 struct amdgpu_cs_request *ibs_request)
417 {
418 	int r;
419 	uint32_t expired;
420 	uint32_t *ring_ptr;
421 	amdgpu_bo_handle ib_result_handle;
422 	void *ib_result_cpu;
423 	uint64_t ib_result_mc_address;
424 	struct amdgpu_cs_fence fence_status = {0};
425 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
426 	amdgpu_va_handle va_handle;
427 
428 	/* prepare CS */
429 	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
430 	CU_ASSERT_NOT_EQUAL(resources, NULL);
431 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
432 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
433 	CU_ASSERT_TRUE(pm4_dw <= 1024);
434 
435 	/* allocate IB */
436 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
437 				    AMDGPU_GEM_DOMAIN_GTT, 0,
438 				    &ib_result_handle, &ib_result_cpu,
439 				    &ib_result_mc_address, &va_handle);
440 	CU_ASSERT_EQUAL(r, 0);
441 
442 	/* copy PM4 packet to ring from caller */
443 	ring_ptr = ib_result_cpu;
444 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
445 
446 	ib_info->ib_mc_address = ib_result_mc_address;
447 	ib_info->size = pm4_dw;
448 
449 	ibs_request->ip_type = AMDGPU_HW_IP_DMA;
450 	ibs_request->ring = instance;
451 	ibs_request->number_of_ibs = 1;
452 	ibs_request->ibs = ib_info;
453 	ibs_request->fence_info.handle = NULL;
454 
455 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
456 	all_res[res_cnt] = ib_result_handle;
457 
458 	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
459 				  NULL, &ibs_request->resources);
460 	CU_ASSERT_EQUAL(r, 0);
461 
462 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
463 
464 	/* submit CS */
465 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
466 	CU_ASSERT_EQUAL(r, 0);
467 
468 	r = amdgpu_bo_list_destroy(ibs_request->resources);
469 	CU_ASSERT_EQUAL(r, 0);
470 
471 	fence_status.ip_type = AMDGPU_HW_IP_DMA;
472 	fence_status.ring = ibs_request->ring;
473 	fence_status.context = context_handle;
474 	fence_status.fence = ibs_request->seq_no;
475 
476 	/* wait for IB accomplished */
477 	r = amdgpu_cs_query_fence_status(&fence_status,
478 					 AMDGPU_TIMEOUT_INFINITE,
479 					 0, &expired);
480 	CU_ASSERT_EQUAL(r, 0);
481 	CU_ASSERT_EQUAL(expired, true);
482 
483 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
484 				     ib_result_mc_address, 4096);
485 	CU_ASSERT_EQUAL(r, 0);
486 }
487 
amdgpu_command_submission_sdma_write_linear(void)488 static void amdgpu_command_submission_sdma_write_linear(void)
489 {
490 	const int sdma_write_length = 128;
491 	const int pm4_dw = 256;
492 	amdgpu_context_handle context_handle;
493 	amdgpu_bo_handle bo;
494 	amdgpu_bo_handle *resources;
495 	uint32_t *pm4;
496 	struct amdgpu_cs_ib_info *ib_info;
497 	struct amdgpu_cs_request *ibs_request;
498 	uint64_t bo_mc;
499 	volatile uint32_t *bo_cpu;
500 	int i, j, r, loop;
501 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
502 	amdgpu_va_handle va_handle;
503 
504 	pm4 = calloc(pm4_dw, sizeof(*pm4));
505 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
506 
507 	ib_info = calloc(1, sizeof(*ib_info));
508 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
509 
510 	ibs_request = calloc(1, sizeof(*ibs_request));
511 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
512 
513 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
514 	CU_ASSERT_EQUAL(r, 0);
515 
516 	/* prepare resource */
517 	resources = calloc(1, sizeof(amdgpu_bo_handle));
518 	CU_ASSERT_NOT_EQUAL(resources, NULL);
519 
520 	loop = 0;
521 	while(loop < 2) {
522 		/* allocate UC bo for sDMA use */
523 		r = amdgpu_bo_alloc_and_map(device_handle,
524 					    sdma_write_length * sizeof(uint32_t),
525 					    4096, AMDGPU_GEM_DOMAIN_GTT,
526 					    gtt_flags[loop], &bo, (void**)&bo_cpu,
527 					    &bo_mc, &va_handle);
528 		CU_ASSERT_EQUAL(r, 0);
529 
530 		/* clear bo */
531 		memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
532 
533 
534 		resources[0] = bo;
535 
536 		/* fullfill PM4: test DMA write-linear */
537 		i = j = 0;
538 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
539 				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
540 		pm4[i++] = 0xffffffff & bo_mc;
541 		pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
542 		pm4[i++] = sdma_write_length;
543 		while(j++ < sdma_write_length)
544 			pm4[i++] = 0xdeadbeaf;
545 
546 		amdgpu_sdma_test_exec_cs(context_handle, 0,
547 					i, pm4,
548 					1, resources,
549 					ib_info, ibs_request);
550 
551 		/* verify if SDMA test result meets with expected */
552 		i = 0;
553 		while(i < sdma_write_length) {
554 			CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
555 		}
556 
557 		r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
558 					     sdma_write_length * sizeof(uint32_t));
559 		CU_ASSERT_EQUAL(r, 0);
560 		loop++;
561 	}
562 	/* clean resources */
563 	free(resources);
564 	free(ibs_request);
565 	free(ib_info);
566 	free(pm4);
567 
568 	/* end of test */
569 	r = amdgpu_cs_ctx_free(context_handle);
570 	CU_ASSERT_EQUAL(r, 0);
571 }
572 
amdgpu_command_submission_sdma_const_fill(void)573 static void amdgpu_command_submission_sdma_const_fill(void)
574 {
575 	const int sdma_write_length = 1024 * 1024;
576 	const int pm4_dw = 256;
577 	amdgpu_context_handle context_handle;
578 	amdgpu_bo_handle bo;
579 	amdgpu_bo_handle *resources;
580 	uint32_t *pm4;
581 	struct amdgpu_cs_ib_info *ib_info;
582 	struct amdgpu_cs_request *ibs_request;
583 	uint64_t bo_mc;
584 	volatile uint32_t *bo_cpu;
585 	int i, j, r, loop;
586 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
587 	amdgpu_va_handle va_handle;
588 
589 	pm4 = calloc(pm4_dw, sizeof(*pm4));
590 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
591 
592 	ib_info = calloc(1, sizeof(*ib_info));
593 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
594 
595 	ibs_request = calloc(1, sizeof(*ibs_request));
596 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
597 
598 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
599 	CU_ASSERT_EQUAL(r, 0);
600 
601 	/* prepare resource */
602 	resources = calloc(1, sizeof(amdgpu_bo_handle));
603 	CU_ASSERT_NOT_EQUAL(resources, NULL);
604 
605 	loop = 0;
606 	while(loop < 2) {
607 		/* allocate UC bo for sDMA use */
608 		r = amdgpu_bo_alloc_and_map(device_handle,
609 					    sdma_write_length, 4096,
610 					    AMDGPU_GEM_DOMAIN_GTT,
611 					    gtt_flags[loop], &bo, (void**)&bo_cpu,
612 					    &bo_mc, &va_handle);
613 		CU_ASSERT_EQUAL(r, 0);
614 
615 		/* clear bo */
616 		memset((void*)bo_cpu, 0, sdma_write_length);
617 
618 		resources[0] = bo;
619 
620 		/* fullfill PM4: test DMA const fill */
621 		i = j = 0;
622 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
623 				   SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
624 		pm4[i++] = 0xffffffff & bo_mc;
625 		pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
626 		pm4[i++] = 0xdeadbeaf;
627 		pm4[i++] = sdma_write_length;
628 
629 		amdgpu_sdma_test_exec_cs(context_handle, 0,
630 					i, pm4,
631 					1, resources,
632 					ib_info, ibs_request);
633 
634 		/* verify if SDMA test result meets with expected */
635 		i = 0;
636 		while(i < (sdma_write_length / 4)) {
637 			CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
638 		}
639 
640 		r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
641 					     sdma_write_length);
642 		CU_ASSERT_EQUAL(r, 0);
643 		loop++;
644 	}
645 	/* clean resources */
646 	free(resources);
647 	free(ibs_request);
648 	free(ib_info);
649 	free(pm4);
650 
651 	/* end of test */
652 	r = amdgpu_cs_ctx_free(context_handle);
653 	CU_ASSERT_EQUAL(r, 0);
654 }
655 
amdgpu_command_submission_sdma_copy_linear(void)656 static void amdgpu_command_submission_sdma_copy_linear(void)
657 {
658 	const int sdma_write_length = 1024;
659 	const int pm4_dw = 256;
660 	amdgpu_context_handle context_handle;
661 	amdgpu_bo_handle bo1, bo2;
662 	amdgpu_bo_handle *resources;
663 	uint32_t *pm4;
664 	struct amdgpu_cs_ib_info *ib_info;
665 	struct amdgpu_cs_request *ibs_request;
666 	uint64_t bo1_mc, bo2_mc;
667 	volatile unsigned char *bo1_cpu, *bo2_cpu;
668 	int i, j, r, loop1, loop2;
669 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
670 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
671 
672 	pm4 = calloc(pm4_dw, sizeof(*pm4));
673 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
674 
675 	ib_info = calloc(1, sizeof(*ib_info));
676 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
677 
678 	ibs_request = calloc(1, sizeof(*ibs_request));
679 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
680 
681 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
682 	CU_ASSERT_EQUAL(r, 0);
683 
684 	/* prepare resource */
685 	resources = calloc(2, sizeof(amdgpu_bo_handle));
686 	CU_ASSERT_NOT_EQUAL(resources, NULL);
687 
688 	loop1 = loop2 = 0;
689 	/* run 9 circle to test all mapping combination */
690 	while(loop1 < 2) {
691 		while(loop2 < 2) {
692 			/* allocate UC bo1for sDMA use */
693 			r = amdgpu_bo_alloc_and_map(device_handle,
694 						    sdma_write_length, 4096,
695 						    AMDGPU_GEM_DOMAIN_GTT,
696 						    gtt_flags[loop1], &bo1,
697 						    (void**)&bo1_cpu, &bo1_mc,
698 						    &bo1_va_handle);
699 			CU_ASSERT_EQUAL(r, 0);
700 
701 			/* set bo1 */
702 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
703 
704 			/* allocate UC bo2 for sDMA use */
705 			r = amdgpu_bo_alloc_and_map(device_handle,
706 						    sdma_write_length, 4096,
707 						    AMDGPU_GEM_DOMAIN_GTT,
708 						    gtt_flags[loop2], &bo2,
709 						    (void**)&bo2_cpu, &bo2_mc,
710 						    &bo2_va_handle);
711 			CU_ASSERT_EQUAL(r, 0);
712 
713 			/* clear bo2 */
714 			memset((void*)bo2_cpu, 0, sdma_write_length);
715 
716 			resources[0] = bo1;
717 			resources[1] = bo2;
718 
719 			/* fullfill PM4: test DMA copy linear */
720 			i = j = 0;
721 			pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
722 			pm4[i++] = sdma_write_length;
723 			pm4[i++] = 0;
724 			pm4[i++] = 0xffffffff & bo1_mc;
725 			pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
726 			pm4[i++] = 0xffffffff & bo2_mc;
727 			pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
728 
729 
730 			amdgpu_sdma_test_exec_cs(context_handle, 0,
731 						i, pm4,
732 						2, resources,
733 						ib_info, ibs_request);
734 
735 			/* verify if SDMA test result meets with expected */
736 			i = 0;
737 			while(i < sdma_write_length) {
738 				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
739 			}
740 			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
741 						     sdma_write_length);
742 			CU_ASSERT_EQUAL(r, 0);
743 			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
744 						     sdma_write_length);
745 			CU_ASSERT_EQUAL(r, 0);
746 			loop2++;
747 		}
748 		loop1++;
749 	}
750 	/* clean resources */
751 	free(resources);
752 	free(ibs_request);
753 	free(ib_info);
754 	free(pm4);
755 
756 	/* end of test */
757 	r = amdgpu_cs_ctx_free(context_handle);
758 	CU_ASSERT_EQUAL(r, 0);
759 }
760 
amdgpu_command_submission_sdma(void)761 static void amdgpu_command_submission_sdma(void)
762 {
763 	amdgpu_command_submission_sdma_write_linear();
764 	amdgpu_command_submission_sdma_const_fill();
765 	amdgpu_command_submission_sdma_copy_linear();
766 }
767 
amdgpu_userptr_test(void)768 static void amdgpu_userptr_test(void)
769 {
770 	int i, r, j;
771 	uint32_t *pm4 = NULL;
772 	uint64_t bo_mc;
773 	void *ptr = NULL;
774 	int pm4_dw = 256;
775 	int sdma_write_length = 4;
776 	amdgpu_bo_handle handle;
777 	amdgpu_context_handle context_handle;
778 	struct amdgpu_cs_ib_info *ib_info;
779 	struct amdgpu_cs_request *ibs_request;
780 	amdgpu_bo_handle buf_handle;
781 	amdgpu_va_handle va_handle;
782 
783 	pm4 = calloc(pm4_dw, sizeof(*pm4));
784 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
785 
786 	ib_info = calloc(1, sizeof(*ib_info));
787 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
788 
789 	ibs_request = calloc(1, sizeof(*ibs_request));
790 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
791 
792 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
793 	CU_ASSERT_EQUAL(r, 0);
794 
795 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
796 	CU_ASSERT_NOT_EQUAL(ptr, NULL);
797 	memset(ptr, 0, BUFFER_SIZE);
798 
799 	r = amdgpu_create_bo_from_user_mem(device_handle,
800 					   ptr, BUFFER_SIZE, &buf_handle);
801 	CU_ASSERT_EQUAL(r, 0);
802 
803 	r = amdgpu_va_range_alloc(device_handle,
804 				  amdgpu_gpu_va_range_general,
805 				  BUFFER_SIZE, 1, 0, &bo_mc,
806 				  &va_handle, 0);
807 	CU_ASSERT_EQUAL(r, 0);
808 
809 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
810 	CU_ASSERT_EQUAL(r, 0);
811 
812 	handle = buf_handle;
813 
814 	j = i = 0;
815 	pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
816 			       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
817 	pm4[i++] = 0xffffffff & bo_mc;
818 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
819 	pm4[i++] = sdma_write_length;
820 
821 	while (j++ < sdma_write_length)
822 		pm4[i++] = 0xdeadbeaf;
823 
824 	amdgpu_sdma_test_exec_cs(context_handle, 0,
825 				 i, pm4,
826 				 1, &handle,
827 				 ib_info, ibs_request);
828 	i = 0;
829 	while (i < sdma_write_length) {
830 		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
831 	}
832 	free(ibs_request);
833 	free(ib_info);
834 	free(pm4);
835 
836 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
837 	CU_ASSERT_EQUAL(r, 0);
838 	r = amdgpu_va_range_free(va_handle);
839 	CU_ASSERT_EQUAL(r, 0);
840 	r = amdgpu_bo_free(buf_handle);
841 	CU_ASSERT_EQUAL(r, 0);
842 	free(ptr);
843 
844 	r = amdgpu_cs_ctx_free(context_handle);
845 	CU_ASSERT_EQUAL(r, 0);
846 }
847