1 /*
2  * Copyright 2017 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include "CUnit/Basic.h"
25 #include "xf86drm.h"
26 
27 #include "amdgpu_test.h"
28 #include "amdgpu_drm.h"
29 #include "amdgpu_internal.h"
30 #include <pthread.h>
31 
32 static  amdgpu_device_handle device_handle;
33 static  uint32_t  major_version;
34 static  uint32_t  minor_version;
35 
36 static void amdgpu_syncobj_timeline_test(void);
37 
suite_syncobj_timeline_tests_enable(void)38 CU_BOOL suite_syncobj_timeline_tests_enable(void)
39 {
40 	int r;
41 	uint64_t cap = 0;
42 
43 	r = drmGetCap(drm_amdgpu[0], DRM_CAP_SYNCOBJ_TIMELINE, &cap);
44 	if (r || cap == 0)
45 		return CU_FALSE;
46 
47 	return CU_TRUE;
48 }
49 
suite_syncobj_timeline_tests_init(void)50 int suite_syncobj_timeline_tests_init(void)
51 {
52 	int r;
53 
54 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
55 				   &minor_version, &device_handle);
56 
57 	if (r) {
58 		if ((r == -EACCES) && (errno == EACCES))
59 			printf("\n\nError:%s. "
60 				"Hint:Try to run this test program as root.",
61 				strerror(errno));
62 		return CUE_SINIT_FAILED;
63 	}
64 
65 	return CUE_SUCCESS;
66 }
67 
suite_syncobj_timeline_tests_clean(void)68 int suite_syncobj_timeline_tests_clean(void)
69 {
70 	int r = amdgpu_device_deinitialize(device_handle);
71 
72 	if (r == 0)
73 		return CUE_SUCCESS;
74 	else
75 		return CUE_SCLEAN_FAILED;
76 }
77 
78 
79 CU_TestInfo syncobj_timeline_tests[] = {
80 	{ "syncobj timeline test",  amdgpu_syncobj_timeline_test },
81 	CU_TEST_INFO_NULL,
82 };
83 
84 #define GFX_COMPUTE_NOP  0xffff1000
85 #define SDMA_NOP  0x0
syncobj_command_submission_helper(uint32_t syncobj_handle,bool wait_or_signal,uint64_t point)86 static int syncobj_command_submission_helper(uint32_t syncobj_handle, bool
87 					     wait_or_signal, uint64_t point)
88 {
89 	amdgpu_context_handle context_handle;
90 	amdgpu_bo_handle ib_result_handle;
91 	void *ib_result_cpu;
92 	uint64_t ib_result_mc_address;
93 	struct drm_amdgpu_cs_chunk chunks[2];
94 	struct drm_amdgpu_cs_chunk_data chunk_data;
95 	struct drm_amdgpu_cs_chunk_syncobj syncobj_data;
96 	struct amdgpu_cs_fence fence_status;
97 	amdgpu_bo_list_handle bo_list;
98 	amdgpu_va_handle va_handle;
99 	uint32_t expired;
100 	int i, r;
101 	uint64_t seq_no;
102 	static uint32_t *ptr;
103 
104 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
105 	CU_ASSERT_EQUAL(r, 0);
106 
107 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
108 				    AMDGPU_GEM_DOMAIN_GTT, 0,
109 				    &ib_result_handle, &ib_result_cpu,
110 				    &ib_result_mc_address, &va_handle);
111 	CU_ASSERT_EQUAL(r, 0);
112 
113 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
114 			       &bo_list);
115 	CU_ASSERT_EQUAL(r, 0);
116 
117 	ptr = ib_result_cpu;
118 
119 	for (i = 0; i < 16; ++i)
120 		ptr[i] = wait_or_signal ? GFX_COMPUTE_NOP: SDMA_NOP;
121 
122 	chunks[0].chunk_id = AMDGPU_CHUNK_ID_IB;
123 	chunks[0].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
124 	chunks[0].chunk_data = (uint64_t)(uintptr_t)&chunk_data;
125 	chunk_data.ib_data._pad = 0;
126 	chunk_data.ib_data.va_start = ib_result_mc_address;
127 	chunk_data.ib_data.ib_bytes = 16 * 4;
128 	chunk_data.ib_data.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX :
129 		AMDGPU_HW_IP_DMA;
130 	chunk_data.ib_data.ip_instance = 0;
131 	chunk_data.ib_data.ring = 0;
132 	chunk_data.ib_data.flags = 0;
133 
134 	chunks[1].chunk_id = wait_or_signal ?
135 		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT :
136 		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL;
137 	chunks[1].length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4;
138 	chunks[1].chunk_data = (uint64_t)(uintptr_t)&syncobj_data;
139 	syncobj_data.handle = syncobj_handle;
140 	syncobj_data.point = point;
141 	syncobj_data.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
142 
143 	r = amdgpu_cs_submit_raw(device_handle,
144 				 context_handle,
145 				 bo_list,
146 				 2,
147 				 chunks,
148 				 &seq_no);
149 	CU_ASSERT_EQUAL(r, 0);
150 
151 
152 	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
153 	fence_status.context = context_handle;
154 	fence_status.ip_type = wait_or_signal ? AMDGPU_HW_IP_GFX:
155 		AMDGPU_HW_IP_DMA;
156 	fence_status.ip_instance = 0;
157 	fence_status.ring = 0;
158 	fence_status.fence = seq_no;
159 
160 	r = amdgpu_cs_query_fence_status(&fence_status,
161 			AMDGPU_TIMEOUT_INFINITE,0, &expired);
162 	CU_ASSERT_EQUAL(r, 0);
163 
164 	r = amdgpu_bo_list_destroy(bo_list);
165 	CU_ASSERT_EQUAL(r, 0);
166 
167 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
168 				     ib_result_mc_address, 4096);
169 	CU_ASSERT_EQUAL(r, 0);
170 
171 	r = amdgpu_cs_ctx_free(context_handle);
172 	CU_ASSERT_EQUAL(r, 0);
173 
174 	return r;
175 }
176 
177 struct syncobj_point {
178 	uint32_t syncobj_handle;
179 	uint64_t point;
180 };
181 
syncobj_wait(void * data)182 static void *syncobj_wait(void *data)
183 {
184 	struct syncobj_point *sp = (struct syncobj_point *)data;
185 	int r;
186 
187 	r = syncobj_command_submission_helper(sp->syncobj_handle, true,
188 					      sp->point);
189 	CU_ASSERT_EQUAL(r, 0);
190 
191 	return (void *)(long)r;
192 }
193 
syncobj_signal(void * data)194 static void *syncobj_signal(void *data)
195 {
196 	struct syncobj_point *sp = (struct syncobj_point *)data;
197 	int r;
198 
199 	r = syncobj_command_submission_helper(sp->syncobj_handle, false,
200 					      sp->point);
201 	CU_ASSERT_EQUAL(r, 0);
202 
203 	return (void *)(long)r;
204 }
205 
amdgpu_syncobj_timeline_test(void)206 static void amdgpu_syncobj_timeline_test(void)
207 {
208 	static pthread_t wait_thread;
209 	static pthread_t signal_thread;
210 	static pthread_t c_thread;
211 	struct syncobj_point sp1, sp2, sp3;
212 	uint32_t syncobj_handle;
213 	uint64_t payload;
214 	uint64_t wait_point, signal_point;
215 	uint64_t timeout;
216 	struct timespec tp;
217 	int r, sync_fd;
218 	void *tmp;
219 
220 	r =  amdgpu_cs_create_syncobj2(device_handle, 0, &syncobj_handle);
221 	CU_ASSERT_EQUAL(r, 0);
222 
223 	// wait on point 5
224 	sp1.syncobj_handle = syncobj_handle;
225 	sp1.point = 5;
226 	r = pthread_create(&wait_thread, NULL, syncobj_wait, &sp1);
227 	CU_ASSERT_EQUAL(r, 0);
228 
229 	// signal on point 10
230 	sp2.syncobj_handle = syncobj_handle;
231 	sp2.point = 10;
232 	r = pthread_create(&signal_thread, NULL, syncobj_signal, &sp2);
233 	CU_ASSERT_EQUAL(r, 0);
234 
235 	r = pthread_join(wait_thread, &tmp);
236 	CU_ASSERT_EQUAL(r, 0);
237 	CU_ASSERT_EQUAL(tmp, 0);
238 
239 	r = pthread_join(signal_thread, &tmp);
240 	CU_ASSERT_EQUAL(r, 0);
241 	CU_ASSERT_EQUAL(tmp, 0);
242 
243 	//query timeline payload
244 	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
245 				    &payload, 1);
246 	CU_ASSERT_EQUAL(r, 0);
247 	CU_ASSERT_EQUAL(payload, 10);
248 
249 	//signal on point 16
250 	sp3.syncobj_handle = syncobj_handle;
251 	sp3.point = 16;
252 	r = pthread_create(&c_thread, NULL, syncobj_signal, &sp3);
253 	CU_ASSERT_EQUAL(r, 0);
254 	//CPU wait on point 16
255 	wait_point = 16;
256 	timeout = 0;
257 	clock_gettime(CLOCK_MONOTONIC, &tp);
258 	timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
259 	timeout += 0x10000000000; //10s
260 	r = amdgpu_cs_syncobj_timeline_wait(device_handle, &syncobj_handle,
261 					    &wait_point, 1, timeout,
262 					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
263 					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
264 					    NULL);
265 
266 	CU_ASSERT_EQUAL(r, 0);
267 	r = pthread_join(c_thread, &tmp);
268 	CU_ASSERT_EQUAL(r, 0);
269 	CU_ASSERT_EQUAL(tmp, 0);
270 
271 	// export point 16 and import to point 18
272 	r = amdgpu_cs_syncobj_export_sync_file2(device_handle, syncobj_handle,
273 						16,
274 						DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
275 						&sync_fd);
276 	CU_ASSERT_EQUAL(r, 0);
277 	r = amdgpu_cs_syncobj_import_sync_file2(device_handle, syncobj_handle,
278 						18, sync_fd);
279 	CU_ASSERT_EQUAL(r, 0);
280 	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
281 				    &payload, 1);
282 	CU_ASSERT_EQUAL(r, 0);
283 	CU_ASSERT_EQUAL(payload, 18);
284 
285 	// CPU signal on point 20
286 	signal_point = 20;
287 	r = amdgpu_cs_syncobj_timeline_signal(device_handle, &syncobj_handle,
288 					      &signal_point, 1);
289 	CU_ASSERT_EQUAL(r, 0);
290 	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
291 				    &payload, 1);
292 	CU_ASSERT_EQUAL(r, 0);
293 	CU_ASSERT_EQUAL(payload, 20);
294 
295 	r = amdgpu_cs_destroy_syncobj(device_handle, syncobj_handle);
296 	CU_ASSERT_EQUAL(r, 0);
297 
298 }
299