1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <errno.h>
28 #include <pthread.h>
29 #include <sched.h>
30 #include <sys/ioctl.h>
31 #ifdef HAVE_ALLOCA_H
32 # include <alloca.h>
33 #endif
34 
35 #include "xf86drm.h"
36 #include "amdgpu_drm.h"
37 #include "amdgpu_internal.h"
38 
39 static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem);
40 static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem);
41 
42 /**
43  * Create command submission context
44  *
45  * \param   dev      - \c [in] Device handle. See #amdgpu_device_initialize()
46  * \param   priority - \c [in] Context creation flags. See AMDGPU_CTX_PRIORITY_*
47  * \param   context  - \c [out] GPU Context handle
48  *
49  * \return  0 on success otherwise POSIX Error code
50 */
amdgpu_cs_ctx_create2(amdgpu_device_handle dev,uint32_t priority,amdgpu_context_handle * context)51 int amdgpu_cs_ctx_create2(amdgpu_device_handle dev, uint32_t priority,
52 							amdgpu_context_handle *context)
53 {
54 	struct amdgpu_context *gpu_context;
55 	union drm_amdgpu_ctx args;
56 	int i, j, k;
57 	int r;
58 
59 	if (!dev || !context)
60 		return -EINVAL;
61 
62 	gpu_context = calloc(1, sizeof(struct amdgpu_context));
63 	if (!gpu_context)
64 		return -ENOMEM;
65 
66 	gpu_context->dev = dev;
67 
68 	r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
69 	if (r)
70 		goto error;
71 
72 	/* Create the context */
73 	memset(&args, 0, sizeof(args));
74 	args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
75 	args.in.priority = priority;
76 
77 	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
78 	if (r)
79 		goto error;
80 
81 	gpu_context->id = args.out.alloc.ctx_id;
82 	for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
83 		for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++)
84 			for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++)
85 				list_inithead(&gpu_context->sem_list[i][j][k]);
86 	*context = (amdgpu_context_handle)gpu_context;
87 
88 	return 0;
89 
90 error:
91 	pthread_mutex_destroy(&gpu_context->sequence_mutex);
92 	free(gpu_context);
93 	return r;
94 }
95 
amdgpu_cs_ctx_create(amdgpu_device_handle dev,amdgpu_context_handle * context)96 int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
97 			 amdgpu_context_handle *context)
98 {
99 	return amdgpu_cs_ctx_create2(dev, AMDGPU_CTX_PRIORITY_NORMAL, context);
100 }
101 
102 /**
103  * Release command submission context
104  *
105  * \param   dev - \c [in] amdgpu device handle
106  * \param   context - \c [in] amdgpu context handle
107  *
108  * \return  0 on success otherwise POSIX Error code
109 */
amdgpu_cs_ctx_free(amdgpu_context_handle context)110 int amdgpu_cs_ctx_free(amdgpu_context_handle context)
111 {
112 	union drm_amdgpu_ctx args;
113 	int i, j, k;
114 	int r;
115 
116 	if (!context)
117 		return -EINVAL;
118 
119 	pthread_mutex_destroy(&context->sequence_mutex);
120 
121 	/* now deal with kernel side */
122 	memset(&args, 0, sizeof(args));
123 	args.in.op = AMDGPU_CTX_OP_FREE_CTX;
124 	args.in.ctx_id = context->id;
125 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
126 				&args, sizeof(args));
127 	for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
128 		for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++) {
129 			for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++) {
130 				amdgpu_semaphore_handle sem;
131 				LIST_FOR_EACH_ENTRY(sem, &context->sem_list[i][j][k], list) {
132 					list_del(&sem->list);
133 					amdgpu_cs_reset_sem(sem);
134 					amdgpu_cs_unreference_sem(sem);
135 				}
136 			}
137 		}
138 	}
139 	free(context);
140 
141 	return r;
142 }
143 
amdgpu_cs_query_reset_state(amdgpu_context_handle context,uint32_t * state,uint32_t * hangs)144 int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
145 				uint32_t *state, uint32_t *hangs)
146 {
147 	union drm_amdgpu_ctx args;
148 	int r;
149 
150 	if (!context)
151 		return -EINVAL;
152 
153 	memset(&args, 0, sizeof(args));
154 	args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
155 	args.in.ctx_id = context->id;
156 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
157 				&args, sizeof(args));
158 	if (!r) {
159 		*state = args.out.state.reset_status;
160 		*hangs = args.out.state.hangs;
161 	}
162 	return r;
163 }
164 
165 /**
166  * Submit command to kernel DRM
167  * \param   dev - \c [in]  Device handle
168  * \param   context - \c [in]  GPU Context
169  * \param   ibs_request - \c [in]  Pointer to submission requests
170  * \param   fence - \c [out] return fence for this submission
171  *
172  * \return  0 on success otherwise POSIX Error code
173  * \sa amdgpu_cs_submit()
174 */
amdgpu_cs_submit_one(amdgpu_context_handle context,struct amdgpu_cs_request * ibs_request)175 static int amdgpu_cs_submit_one(amdgpu_context_handle context,
176 				struct amdgpu_cs_request *ibs_request)
177 {
178 	union drm_amdgpu_cs cs;
179 	uint64_t *chunk_array;
180 	struct drm_amdgpu_cs_chunk *chunks;
181 	struct drm_amdgpu_cs_chunk_data *chunk_data;
182 	struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
183 	struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
184 	struct list_head *sem_list;
185 	amdgpu_semaphore_handle sem, tmp;
186 	uint32_t i, size, sem_count = 0;
187 	bool user_fence;
188 	int r = 0;
189 
190 	if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
191 		return -EINVAL;
192 	if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS)
193 		return -EINVAL;
194 	if (ibs_request->number_of_ibs == 0) {
195 		ibs_request->seq_no = AMDGPU_NULL_SUBMIT_SEQ;
196 		return 0;
197 	}
198 	user_fence = (ibs_request->fence_info.handle != NULL);
199 
200 	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
201 
202 	chunk_array = alloca(sizeof(uint64_t) * size);
203 	chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
204 
205 	size = ibs_request->number_of_ibs + (user_fence ? 1 : 0);
206 
207 	chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
208 
209 	memset(&cs, 0, sizeof(cs));
210 	cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
211 	cs.in.ctx_id = context->id;
212 	if (ibs_request->resources)
213 		cs.in.bo_list_handle = ibs_request->resources->handle;
214 	cs.in.num_chunks = ibs_request->number_of_ibs;
215 	/* IB chunks */
216 	for (i = 0; i < ibs_request->number_of_ibs; i++) {
217 		struct amdgpu_cs_ib_info *ib;
218 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
219 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
220 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
221 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
222 
223 		ib = &ibs_request->ibs[i];
224 
225 		chunk_data[i].ib_data._pad = 0;
226 		chunk_data[i].ib_data.va_start = ib->ib_mc_address;
227 		chunk_data[i].ib_data.ib_bytes = ib->size * 4;
228 		chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
229 		chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
230 		chunk_data[i].ib_data.ring = ibs_request->ring;
231 		chunk_data[i].ib_data.flags = ib->flags;
232 	}
233 
234 	pthread_mutex_lock(&context->sequence_mutex);
235 
236 	if (user_fence) {
237 		i = cs.in.num_chunks++;
238 
239 		/* fence chunk */
240 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
241 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
242 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
243 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
244 
245 		/* fence bo handle */
246 		chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle;
247 		/* offset */
248 		chunk_data[i].fence_data.offset =
249 			ibs_request->fence_info.offset * sizeof(uint64_t);
250 	}
251 
252 	if (ibs_request->number_of_dependencies) {
253 		dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) *
254 			ibs_request->number_of_dependencies);
255 		if (!dependencies) {
256 			r = -ENOMEM;
257 			goto error_unlock;
258 		}
259 
260 		for (i = 0; i < ibs_request->number_of_dependencies; ++i) {
261 			struct amdgpu_cs_fence *info = &ibs_request->dependencies[i];
262 			struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i];
263 			dep->ip_type = info->ip_type;
264 			dep->ip_instance = info->ip_instance;
265 			dep->ring = info->ring;
266 			dep->ctx_id = info->context->id;
267 			dep->handle = info->fence;
268 		}
269 
270 		i = cs.in.num_chunks++;
271 
272 		/* dependencies chunk */
273 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
274 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
275 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4
276 			* ibs_request->number_of_dependencies;
277 		chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
278 	}
279 
280 	sem_list = &context->sem_list[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring];
281 	LIST_FOR_EACH_ENTRY(sem, sem_list, list)
282 		sem_count++;
283 	if (sem_count) {
284 		sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_count);
285 		if (!sem_dependencies) {
286 			r = -ENOMEM;
287 			goto error_unlock;
288 		}
289 		sem_count = 0;
290 		LIST_FOR_EACH_ENTRY_SAFE(sem, tmp, sem_list, list) {
291 			struct amdgpu_cs_fence *info = &sem->signal_fence;
292 			struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++];
293 			dep->ip_type = info->ip_type;
294 			dep->ip_instance = info->ip_instance;
295 			dep->ring = info->ring;
296 			dep->ctx_id = info->context->id;
297 			dep->handle = info->fence;
298 
299 			list_del(&sem->list);
300 			amdgpu_cs_reset_sem(sem);
301 			amdgpu_cs_unreference_sem(sem);
302 		}
303 		i = cs.in.num_chunks++;
304 
305 		/* dependencies chunk */
306 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
307 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
308 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count;
309 		chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
310 	}
311 
312 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
313 				&cs, sizeof(cs));
314 	if (r)
315 		goto error_unlock;
316 
317 	ibs_request->seq_no = cs.out.handle;
318 	context->last_seq[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring] = ibs_request->seq_no;
319 error_unlock:
320 	pthread_mutex_unlock(&context->sequence_mutex);
321 	free(dependencies);
322 	free(sem_dependencies);
323 	return r;
324 }
325 
amdgpu_cs_submit(amdgpu_context_handle context,uint64_t flags,struct amdgpu_cs_request * ibs_request,uint32_t number_of_requests)326 int amdgpu_cs_submit(amdgpu_context_handle context,
327 		     uint64_t flags,
328 		     struct amdgpu_cs_request *ibs_request,
329 		     uint32_t number_of_requests)
330 {
331 	uint32_t i;
332 	int r;
333 
334 	if (!context || !ibs_request)
335 		return -EINVAL;
336 
337 	r = 0;
338 	for (i = 0; i < number_of_requests; i++) {
339 		r = amdgpu_cs_submit_one(context, ibs_request);
340 		if (r)
341 			break;
342 		ibs_request++;
343 	}
344 
345 	return r;
346 }
347 
348 /**
349  * Calculate absolute timeout.
350  *
351  * \param   timeout - \c [in] timeout in nanoseconds.
352  *
353  * \return  absolute timeout in nanoseconds
354 */
amdgpu_cs_calculate_timeout(uint64_t timeout)355 drm_private uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout)
356 {
357 	int r;
358 
359 	if (timeout != AMDGPU_TIMEOUT_INFINITE) {
360 		struct timespec current;
361 		uint64_t current_ns;
362 		r = clock_gettime(CLOCK_MONOTONIC, &current);
363 		if (r) {
364 			fprintf(stderr, "clock_gettime() returned error (%d)!", errno);
365 			return AMDGPU_TIMEOUT_INFINITE;
366 		}
367 
368 		current_ns = ((uint64_t)current.tv_sec) * 1000000000ull;
369 		current_ns += current.tv_nsec;
370 		timeout += current_ns;
371 		if (timeout < current_ns)
372 			timeout = AMDGPU_TIMEOUT_INFINITE;
373 	}
374 	return timeout;
375 }
376 
amdgpu_ioctl_wait_cs(amdgpu_context_handle context,unsigned ip,unsigned ip_instance,uint32_t ring,uint64_t handle,uint64_t timeout_ns,uint64_t flags,bool * busy)377 static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
378 				unsigned ip,
379 				unsigned ip_instance,
380 				uint32_t ring,
381 				uint64_t handle,
382 				uint64_t timeout_ns,
383 				uint64_t flags,
384 				bool *busy)
385 {
386 	amdgpu_device_handle dev = context->dev;
387 	union drm_amdgpu_wait_cs args;
388 	int r;
389 
390 	memset(&args, 0, sizeof(args));
391 	args.in.handle = handle;
392 	args.in.ip_type = ip;
393 	args.in.ip_instance = ip_instance;
394 	args.in.ring = ring;
395 	args.in.ctx_id = context->id;
396 
397 	if (flags & AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE)
398 		args.in.timeout = timeout_ns;
399 	else
400 		args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
401 
402 	r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args);
403 	if (r)
404 		return -errno;
405 
406 	*busy = args.out.status;
407 	return 0;
408 }
409 
amdgpu_cs_query_fence_status(struct amdgpu_cs_fence * fence,uint64_t timeout_ns,uint64_t flags,uint32_t * expired)410 int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
411 				 uint64_t timeout_ns,
412 				 uint64_t flags,
413 				 uint32_t *expired)
414 {
415 	bool busy = true;
416 	int r;
417 
418 	if (!fence || !expired || !fence->context)
419 		return -EINVAL;
420 	if (fence->ip_type >= AMDGPU_HW_IP_NUM)
421 		return -EINVAL;
422 	if (fence->ring >= AMDGPU_CS_MAX_RINGS)
423 		return -EINVAL;
424 	if (fence->fence == AMDGPU_NULL_SUBMIT_SEQ) {
425 		*expired = true;
426 		return 0;
427 	}
428 
429 	*expired = false;
430 
431 	r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type,
432 				fence->ip_instance, fence->ring,
433 			       	fence->fence, timeout_ns, flags, &busy);
434 
435 	if (!r && !busy)
436 		*expired = true;
437 
438 	return r;
439 }
440 
amdgpu_ioctl_wait_fences(struct amdgpu_cs_fence * fences,uint32_t fence_count,bool wait_all,uint64_t timeout_ns,uint32_t * status,uint32_t * first)441 static int amdgpu_ioctl_wait_fences(struct amdgpu_cs_fence *fences,
442 				    uint32_t fence_count,
443 				    bool wait_all,
444 				    uint64_t timeout_ns,
445 				    uint32_t *status,
446 				    uint32_t *first)
447 {
448 	struct drm_amdgpu_fence *drm_fences;
449 	amdgpu_device_handle dev = fences[0].context->dev;
450 	union drm_amdgpu_wait_fences args;
451 	int r;
452 	uint32_t i;
453 
454 	drm_fences = alloca(sizeof(struct drm_amdgpu_fence) * fence_count);
455 	for (i = 0; i < fence_count; i++) {
456 		drm_fences[i].ctx_id = fences[i].context->id;
457 		drm_fences[i].ip_type = fences[i].ip_type;
458 		drm_fences[i].ip_instance = fences[i].ip_instance;
459 		drm_fences[i].ring = fences[i].ring;
460 		drm_fences[i].seq_no = fences[i].fence;
461 	}
462 
463 	memset(&args, 0, sizeof(args));
464 	args.in.fences = (uint64_t)(uintptr_t)drm_fences;
465 	args.in.fence_count = fence_count;
466 	args.in.wait_all = wait_all;
467 	args.in.timeout_ns = amdgpu_cs_calculate_timeout(timeout_ns);
468 
469 	r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_FENCES, &args);
470 	if (r)
471 		return -errno;
472 
473 	*status = args.out.status;
474 
475 	if (first)
476 		*first = args.out.first_signaled;
477 
478 	return 0;
479 }
480 
amdgpu_cs_wait_fences(struct amdgpu_cs_fence * fences,uint32_t fence_count,bool wait_all,uint64_t timeout_ns,uint32_t * status,uint32_t * first)481 int amdgpu_cs_wait_fences(struct amdgpu_cs_fence *fences,
482 			  uint32_t fence_count,
483 			  bool wait_all,
484 			  uint64_t timeout_ns,
485 			  uint32_t *status,
486 			  uint32_t *first)
487 {
488 	uint32_t i;
489 
490 	/* Sanity check */
491 	if (!fences || !status || !fence_count)
492 		return -EINVAL;
493 
494 	for (i = 0; i < fence_count; i++) {
495 		if (NULL == fences[i].context)
496 			return -EINVAL;
497 		if (fences[i].ip_type >= AMDGPU_HW_IP_NUM)
498 			return -EINVAL;
499 		if (fences[i].ring >= AMDGPU_CS_MAX_RINGS)
500 			return -EINVAL;
501 	}
502 
503 	*status = 0;
504 
505 	return amdgpu_ioctl_wait_fences(fences, fence_count, wait_all,
506 					timeout_ns, status, first);
507 }
508 
amdgpu_cs_create_semaphore(amdgpu_semaphore_handle * sem)509 int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem)
510 {
511 	struct amdgpu_semaphore *gpu_semaphore;
512 
513 	if (!sem)
514 		return -EINVAL;
515 
516 	gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore));
517 	if (!gpu_semaphore)
518 		return -ENOMEM;
519 
520 	atomic_set(&gpu_semaphore->refcount, 1);
521 	*sem = gpu_semaphore;
522 
523 	return 0;
524 }
525 
amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,uint32_t ip_type,uint32_t ip_instance,uint32_t ring,amdgpu_semaphore_handle sem)526 int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
527 			       uint32_t ip_type,
528 			       uint32_t ip_instance,
529 			       uint32_t ring,
530 			       amdgpu_semaphore_handle sem)
531 {
532 	if (!ctx || !sem)
533 		return -EINVAL;
534 	if (ip_type >= AMDGPU_HW_IP_NUM)
535 		return -EINVAL;
536 	if (ring >= AMDGPU_CS_MAX_RINGS)
537 		return -EINVAL;
538 	/* sem has been signaled */
539 	if (sem->signal_fence.context)
540 		return -EINVAL;
541 	pthread_mutex_lock(&ctx->sequence_mutex);
542 	sem->signal_fence.context = ctx;
543 	sem->signal_fence.ip_type = ip_type;
544 	sem->signal_fence.ip_instance = ip_instance;
545 	sem->signal_fence.ring = ring;
546 	sem->signal_fence.fence = ctx->last_seq[ip_type][ip_instance][ring];
547 	update_references(NULL, &sem->refcount);
548 	pthread_mutex_unlock(&ctx->sequence_mutex);
549 	return 0;
550 }
551 
amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,uint32_t ip_type,uint32_t ip_instance,uint32_t ring,amdgpu_semaphore_handle sem)552 int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
553 			     uint32_t ip_type,
554 			     uint32_t ip_instance,
555 			     uint32_t ring,
556 			     amdgpu_semaphore_handle sem)
557 {
558 	if (!ctx || !sem)
559 		return -EINVAL;
560 	if (ip_type >= AMDGPU_HW_IP_NUM)
561 		return -EINVAL;
562 	if (ring >= AMDGPU_CS_MAX_RINGS)
563 		return -EINVAL;
564 	/* must signal first */
565 	if (!sem->signal_fence.context)
566 		return -EINVAL;
567 
568 	pthread_mutex_lock(&ctx->sequence_mutex);
569 	list_add(&sem->list, &ctx->sem_list[ip_type][ip_instance][ring]);
570 	pthread_mutex_unlock(&ctx->sequence_mutex);
571 	return 0;
572 }
573 
amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)574 static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)
575 {
576 	if (!sem || !sem->signal_fence.context)
577 		return -EINVAL;
578 
579 	sem->signal_fence.context = NULL;
580 	sem->signal_fence.ip_type = 0;
581 	sem->signal_fence.ip_instance = 0;
582 	sem->signal_fence.ring = 0;
583 	sem->signal_fence.fence = 0;
584 
585 	return 0;
586 }
587 
amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)588 static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)
589 {
590 	if (!sem)
591 		return -EINVAL;
592 
593 	if (update_references(&sem->refcount, NULL))
594 		free(sem);
595 	return 0;
596 }
597 
amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)598 int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
599 {
600 	return amdgpu_cs_unreference_sem(sem);
601 }
602 
amdgpu_cs_create_syncobj2(amdgpu_device_handle dev,uint32_t flags,uint32_t * handle)603 int amdgpu_cs_create_syncobj2(amdgpu_device_handle dev,
604 			      uint32_t  flags,
605 			      uint32_t *handle)
606 {
607 	if (NULL == dev)
608 		return -EINVAL;
609 
610 	return drmSyncobjCreate(dev->fd, flags, handle);
611 }
612 
amdgpu_cs_create_syncobj(amdgpu_device_handle dev,uint32_t * handle)613 int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
614 			     uint32_t *handle)
615 {
616 	if (NULL == dev)
617 		return -EINVAL;
618 
619 	return drmSyncobjCreate(dev->fd, 0, handle);
620 }
621 
amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,uint32_t handle)622 int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
623 			      uint32_t handle)
624 {
625 	if (NULL == dev)
626 		return -EINVAL;
627 
628 	return drmSyncobjDestroy(dev->fd, handle);
629 }
630 
amdgpu_cs_syncobj_reset(amdgpu_device_handle dev,const uint32_t * syncobjs,uint32_t syncobj_count)631 int amdgpu_cs_syncobj_reset(amdgpu_device_handle dev,
632 			    const uint32_t *syncobjs, uint32_t syncobj_count)
633 {
634 	if (NULL == dev)
635 		return -EINVAL;
636 
637 	return drmSyncobjReset(dev->fd, syncobjs, syncobj_count);
638 }
639 
amdgpu_cs_syncobj_signal(amdgpu_device_handle dev,const uint32_t * syncobjs,uint32_t syncobj_count)640 int amdgpu_cs_syncobj_signal(amdgpu_device_handle dev,
641 			     const uint32_t *syncobjs, uint32_t syncobj_count)
642 {
643 	if (NULL == dev)
644 		return -EINVAL;
645 
646 	return drmSyncobjSignal(dev->fd, syncobjs, syncobj_count);
647 }
648 
amdgpu_cs_syncobj_wait(amdgpu_device_handle dev,uint32_t * handles,unsigned num_handles,int64_t timeout_nsec,unsigned flags,uint32_t * first_signaled)649 int amdgpu_cs_syncobj_wait(amdgpu_device_handle dev,
650 			   uint32_t *handles, unsigned num_handles,
651 			   int64_t timeout_nsec, unsigned flags,
652 			   uint32_t *first_signaled)
653 {
654 	if (NULL == dev)
655 		return -EINVAL;
656 
657 	return drmSyncobjWait(dev->fd, handles, num_handles, timeout_nsec,
658 			      flags, first_signaled);
659 }
660 
amdgpu_cs_export_syncobj(amdgpu_device_handle dev,uint32_t handle,int * shared_fd)661 int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
662 			     uint32_t handle,
663 			     int *shared_fd)
664 {
665 	if (NULL == dev)
666 		return -EINVAL;
667 
668 	return drmSyncobjHandleToFD(dev->fd, handle, shared_fd);
669 }
670 
amdgpu_cs_import_syncobj(amdgpu_device_handle dev,int shared_fd,uint32_t * handle)671 int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
672 			     int shared_fd,
673 			     uint32_t *handle)
674 {
675 	if (NULL == dev)
676 		return -EINVAL;
677 
678 	return drmSyncobjFDToHandle(dev->fd, shared_fd, handle);
679 }
680 
amdgpu_cs_syncobj_export_sync_file(amdgpu_device_handle dev,uint32_t syncobj,int * sync_file_fd)681 int amdgpu_cs_syncobj_export_sync_file(amdgpu_device_handle dev,
682 				       uint32_t syncobj,
683 				       int *sync_file_fd)
684 {
685 	if (NULL == dev)
686 		return -EINVAL;
687 
688 	return drmSyncobjExportSyncFile(dev->fd, syncobj, sync_file_fd);
689 }
690 
amdgpu_cs_syncobj_import_sync_file(amdgpu_device_handle dev,uint32_t syncobj,int sync_file_fd)691 int amdgpu_cs_syncobj_import_sync_file(amdgpu_device_handle dev,
692 				       uint32_t syncobj,
693 				       int sync_file_fd)
694 {
695 	if (NULL == dev)
696 		return -EINVAL;
697 
698 	return drmSyncobjImportSyncFile(dev->fd, syncobj, sync_file_fd);
699 }
700 
amdgpu_cs_submit_raw(amdgpu_device_handle dev,amdgpu_context_handle context,amdgpu_bo_list_handle bo_list_handle,int num_chunks,struct drm_amdgpu_cs_chunk * chunks,uint64_t * seq_no)701 int amdgpu_cs_submit_raw(amdgpu_device_handle dev,
702 			 amdgpu_context_handle context,
703 			 amdgpu_bo_list_handle bo_list_handle,
704 			 int num_chunks,
705 			 struct drm_amdgpu_cs_chunk *chunks,
706 			 uint64_t *seq_no)
707 {
708 	union drm_amdgpu_cs cs;
709 	uint64_t *chunk_array;
710 	int i, r;
711 	if (num_chunks == 0)
712 		return -EINVAL;
713 
714 	memset(&cs, 0, sizeof(cs));
715 	chunk_array = alloca(sizeof(uint64_t) * num_chunks);
716 	for (i = 0; i < num_chunks; i++)
717 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
718 	cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
719 	cs.in.ctx_id = context->id;
720 	cs.in.bo_list_handle = bo_list_handle ? bo_list_handle->handle : 0;
721 	cs.in.num_chunks = num_chunks;
722 	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CS,
723 				&cs, sizeof(cs));
724 	if (r)
725 		return r;
726 
727 	if (seq_no)
728 		*seq_no = cs.out.handle;
729 	return 0;
730 }
731 
amdgpu_cs_chunk_fence_info_to_data(struct amdgpu_cs_fence_info * fence_info,struct drm_amdgpu_cs_chunk_data * data)732 void amdgpu_cs_chunk_fence_info_to_data(struct amdgpu_cs_fence_info *fence_info,
733 					struct drm_amdgpu_cs_chunk_data *data)
734 {
735 	data->fence_data.handle = fence_info->handle->handle;
736 	data->fence_data.offset = fence_info->offset * sizeof(uint64_t);
737 }
738 
amdgpu_cs_chunk_fence_to_dep(struct amdgpu_cs_fence * fence,struct drm_amdgpu_cs_chunk_dep * dep)739 void amdgpu_cs_chunk_fence_to_dep(struct amdgpu_cs_fence *fence,
740 				  struct drm_amdgpu_cs_chunk_dep *dep)
741 {
742 	dep->ip_type = fence->ip_type;
743 	dep->ip_instance = fence->ip_instance;
744 	dep->ring = fence->ring;
745 	dep->ctx_id = fence->context->id;
746 	dep->handle = fence->fence;
747 }
748 
amdgpu_cs_fence_to_handle(amdgpu_device_handle dev,struct amdgpu_cs_fence * fence,uint32_t what,uint32_t * out_handle)749 int amdgpu_cs_fence_to_handle(amdgpu_device_handle dev,
750 			      struct amdgpu_cs_fence *fence,
751 			      uint32_t what,
752 			      uint32_t *out_handle)
753 {
754 	union drm_amdgpu_fence_to_handle fth;
755 	int r;
756 
757 	memset(&fth, 0, sizeof(fth));
758 	fth.in.fence.ctx_id = fence->context->id;
759 	fth.in.fence.ip_type = fence->ip_type;
760 	fth.in.fence.ip_instance = fence->ip_instance;
761 	fth.in.fence.ring = fence->ring;
762 	fth.in.fence.seq_no = fence->fence;
763 	fth.in.what = what;
764 
765 	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_FENCE_TO_HANDLE,
766 				&fth, sizeof(fth));
767 	if (r == 0)
768 		*out_handle = fth.out.handle;
769 	return r;
770 }
771