1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27 
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <errno.h>
32 #include <pthread.h>
33 #include <sched.h>
34 #include <sys/ioctl.h>
35 #ifdef HAVE_ALLOCA_H
36 # include <alloca.h>
37 #endif
38 
39 #include "xf86drm.h"
40 #include "amdgpu_drm.h"
41 #include "amdgpu_internal.h"
42 
43 static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem);
44 static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem);
45 
46 /**
47  * Create command submission context
48  *
49  * \param   dev - \c [in] amdgpu device handle
50  * \param   context - \c [out] amdgpu context handle
51  *
52  * \return  0 on success otherwise POSIX Error code
53 */
amdgpu_cs_ctx_create(amdgpu_device_handle dev,amdgpu_context_handle * context)54 int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
55 			 amdgpu_context_handle *context)
56 {
57 	struct amdgpu_context *gpu_context;
58 	union drm_amdgpu_ctx args;
59 	int i, j, k;
60 	int r;
61 
62 	if (NULL == dev)
63 		return -EINVAL;
64 	if (NULL == context)
65 		return -EINVAL;
66 
67 	gpu_context = calloc(1, sizeof(struct amdgpu_context));
68 	if (NULL == gpu_context)
69 		return -ENOMEM;
70 
71 	gpu_context->dev = dev;
72 
73 	r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
74 	if (r)
75 		goto error;
76 
77 	/* Create the context */
78 	memset(&args, 0, sizeof(args));
79 	args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
80 	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
81 	if (r)
82 		goto error;
83 
84 	gpu_context->id = args.out.alloc.ctx_id;
85 	for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
86 		for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++)
87 			for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++)
88 				list_inithead(&gpu_context->sem_list[i][j][k]);
89 	*context = (amdgpu_context_handle)gpu_context;
90 
91 	return 0;
92 
93 error:
94 	pthread_mutex_destroy(&gpu_context->sequence_mutex);
95 	free(gpu_context);
96 	return r;
97 }
98 
99 /**
100  * Release command submission context
101  *
102  * \param   dev - \c [in] amdgpu device handle
103  * \param   context - \c [in] amdgpu context handle
104  *
105  * \return  0 on success otherwise POSIX Error code
106 */
amdgpu_cs_ctx_free(amdgpu_context_handle context)107 int amdgpu_cs_ctx_free(amdgpu_context_handle context)
108 {
109 	union drm_amdgpu_ctx args;
110 	int i, j, k;
111 	int r;
112 
113 	if (NULL == context)
114 		return -EINVAL;
115 
116 	pthread_mutex_destroy(&context->sequence_mutex);
117 
118 	/* now deal with kernel side */
119 	memset(&args, 0, sizeof(args));
120 	args.in.op = AMDGPU_CTX_OP_FREE_CTX;
121 	args.in.ctx_id = context->id;
122 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
123 				&args, sizeof(args));
124 	for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
125 		for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++) {
126 			for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++) {
127 				amdgpu_semaphore_handle sem;
128 				LIST_FOR_EACH_ENTRY(sem, &context->sem_list[i][j][k], list) {
129 					list_del(&sem->list);
130 					amdgpu_cs_reset_sem(sem);
131 					amdgpu_cs_unreference_sem(sem);
132 				}
133 			}
134 		}
135 	}
136 	free(context);
137 
138 	return r;
139 }
140 
amdgpu_cs_query_reset_state(amdgpu_context_handle context,uint32_t * state,uint32_t * hangs)141 int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
142 				uint32_t *state, uint32_t *hangs)
143 {
144 	union drm_amdgpu_ctx args;
145 	int r;
146 
147 	if (!context)
148 		return -EINVAL;
149 
150 	memset(&args, 0, sizeof(args));
151 	args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
152 	args.in.ctx_id = context->id;
153 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
154 				&args, sizeof(args));
155 	if (!r) {
156 		*state = args.out.state.reset_status;
157 		*hangs = args.out.state.hangs;
158 	}
159 	return r;
160 }
161 
162 /**
163  * Submit command to kernel DRM
164  * \param   dev - \c [in]  Device handle
165  * \param   context - \c [in]  GPU Context
166  * \param   ibs_request - \c [in]  Pointer to submission requests
167  * \param   fence - \c [out] return fence for this submission
168  *
169  * \return  0 on success otherwise POSIX Error code
170  * \sa amdgpu_cs_submit()
171 */
amdgpu_cs_submit_one(amdgpu_context_handle context,struct amdgpu_cs_request * ibs_request)172 static int amdgpu_cs_submit_one(amdgpu_context_handle context,
173 				struct amdgpu_cs_request *ibs_request)
174 {
175 	union drm_amdgpu_cs cs;
176 	uint64_t *chunk_array;
177 	struct drm_amdgpu_cs_chunk *chunks;
178 	struct drm_amdgpu_cs_chunk_data *chunk_data;
179 	struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
180 	struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
181 	struct list_head *sem_list;
182 	amdgpu_semaphore_handle sem, tmp;
183 	uint32_t i, size, sem_count = 0;
184 	bool user_fence;
185 	int r = 0;
186 
187 	if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
188 		return -EINVAL;
189 	if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS)
190 		return -EINVAL;
191 	if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
192 		return -EINVAL;
193 	if (ibs_request->number_of_ibs == 0) {
194 		ibs_request->seq_no = AMDGPU_NULL_SUBMIT_SEQ;
195 		return 0;
196 	}
197 	user_fence = (ibs_request->fence_info.handle != NULL);
198 
199 	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
200 
201 	chunk_array = alloca(sizeof(uint64_t) * size);
202 	chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
203 
204 	size = ibs_request->number_of_ibs + (user_fence ? 1 : 0);
205 
206 	chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
207 
208 	memset(&cs, 0, sizeof(cs));
209 	cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
210 	cs.in.ctx_id = context->id;
211 	if (ibs_request->resources)
212 		cs.in.bo_list_handle = ibs_request->resources->handle;
213 	cs.in.num_chunks = ibs_request->number_of_ibs;
214 	/* IB chunks */
215 	for (i = 0; i < ibs_request->number_of_ibs; i++) {
216 		struct amdgpu_cs_ib_info *ib;
217 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
218 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
219 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
220 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
221 
222 		ib = &ibs_request->ibs[i];
223 
224 		chunk_data[i].ib_data._pad = 0;
225 		chunk_data[i].ib_data.va_start = ib->ib_mc_address;
226 		chunk_data[i].ib_data.ib_bytes = ib->size * 4;
227 		chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
228 		chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
229 		chunk_data[i].ib_data.ring = ibs_request->ring;
230 		chunk_data[i].ib_data.flags = ib->flags;
231 	}
232 
233 	pthread_mutex_lock(&context->sequence_mutex);
234 
235 	if (user_fence) {
236 		i = cs.in.num_chunks++;
237 
238 		/* fence chunk */
239 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
240 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
241 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
242 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
243 
244 		/* fence bo handle */
245 		chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle;
246 		/* offset */
247 		chunk_data[i].fence_data.offset =
248 			ibs_request->fence_info.offset * sizeof(uint64_t);
249 	}
250 
251 	if (ibs_request->number_of_dependencies) {
252 		dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) *
253 			ibs_request->number_of_dependencies);
254 		if (!dependencies) {
255 			r = -ENOMEM;
256 			goto error_unlock;
257 		}
258 
259 		for (i = 0; i < ibs_request->number_of_dependencies; ++i) {
260 			struct amdgpu_cs_fence *info = &ibs_request->dependencies[i];
261 			struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i];
262 			dep->ip_type = info->ip_type;
263 			dep->ip_instance = info->ip_instance;
264 			dep->ring = info->ring;
265 			dep->ctx_id = info->context->id;
266 			dep->handle = info->fence;
267 		}
268 
269 		i = cs.in.num_chunks++;
270 
271 		/* dependencies chunk */
272 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
273 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
274 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4
275 			* ibs_request->number_of_dependencies;
276 		chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
277 	}
278 
279 	sem_list = &context->sem_list[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring];
280 	LIST_FOR_EACH_ENTRY(sem, sem_list, list)
281 		sem_count++;
282 	if (sem_count) {
283 		sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_count);
284 		if (!sem_dependencies) {
285 			r = -ENOMEM;
286 			goto error_unlock;
287 		}
288 		sem_count = 0;
289 		LIST_FOR_EACH_ENTRY_SAFE(sem, tmp, sem_list, list) {
290 			struct amdgpu_cs_fence *info = &sem->signal_fence;
291 			struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++];
292 			dep->ip_type = info->ip_type;
293 			dep->ip_instance = info->ip_instance;
294 			dep->ring = info->ring;
295 			dep->ctx_id = info->context->id;
296 			dep->handle = info->fence;
297 
298 			list_del(&sem->list);
299 			amdgpu_cs_reset_sem(sem);
300 			amdgpu_cs_unreference_sem(sem);
301 		}
302 		i = cs.in.num_chunks++;
303 
304 		/* dependencies chunk */
305 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
306 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
307 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count;
308 		chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
309 	}
310 
311 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
312 				&cs, sizeof(cs));
313 	if (r)
314 		goto error_unlock;
315 
316 	ibs_request->seq_no = cs.out.handle;
317 	context->last_seq[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring] = ibs_request->seq_no;
318 error_unlock:
319 	pthread_mutex_unlock(&context->sequence_mutex);
320 	free(dependencies);
321 	free(sem_dependencies);
322 	return r;
323 }
324 
amdgpu_cs_submit(amdgpu_context_handle context,uint64_t flags,struct amdgpu_cs_request * ibs_request,uint32_t number_of_requests)325 int amdgpu_cs_submit(amdgpu_context_handle context,
326 		     uint64_t flags,
327 		     struct amdgpu_cs_request *ibs_request,
328 		     uint32_t number_of_requests)
329 {
330 	uint32_t i;
331 	int r;
332 
333 	if (NULL == context)
334 		return -EINVAL;
335 	if (NULL == ibs_request)
336 		return -EINVAL;
337 
338 	r = 0;
339 	for (i = 0; i < number_of_requests; i++) {
340 		r = amdgpu_cs_submit_one(context, ibs_request);
341 		if (r)
342 			break;
343 		ibs_request++;
344 	}
345 
346 	return r;
347 }
348 
349 /**
350  * Calculate absolute timeout.
351  *
352  * \param   timeout - \c [in] timeout in nanoseconds.
353  *
354  * \return  absolute timeout in nanoseconds
355 */
amdgpu_cs_calculate_timeout(uint64_t timeout)356 drm_private uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout)
357 {
358 	int r;
359 
360 	if (timeout != AMDGPU_TIMEOUT_INFINITE) {
361 		struct timespec current;
362 		uint64_t current_ns;
363 		r = clock_gettime(CLOCK_MONOTONIC, &current);
364 		if (r) {
365 			fprintf(stderr, "clock_gettime() returned error (%d)!", errno);
366 			return AMDGPU_TIMEOUT_INFINITE;
367 		}
368 
369 		current_ns = ((uint64_t)current.tv_sec) * 1000000000ull;
370 		current_ns += current.tv_nsec;
371 		timeout += current_ns;
372 		if (timeout < current_ns)
373 			timeout = AMDGPU_TIMEOUT_INFINITE;
374 	}
375 	return timeout;
376 }
377 
amdgpu_ioctl_wait_cs(amdgpu_context_handle context,unsigned ip,unsigned ip_instance,uint32_t ring,uint64_t handle,uint64_t timeout_ns,uint64_t flags,bool * busy)378 static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
379 				unsigned ip,
380 				unsigned ip_instance,
381 				uint32_t ring,
382 				uint64_t handle,
383 				uint64_t timeout_ns,
384 				uint64_t flags,
385 				bool *busy)
386 {
387 	amdgpu_device_handle dev = context->dev;
388 	union drm_amdgpu_wait_cs args;
389 	int r;
390 
391 	memset(&args, 0, sizeof(args));
392 	args.in.handle = handle;
393 	args.in.ip_type = ip;
394 	args.in.ip_instance = ip_instance;
395 	args.in.ring = ring;
396 	args.in.ctx_id = context->id;
397 
398 	if (flags & AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE)
399 		args.in.timeout = timeout_ns;
400 	else
401 		args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
402 
403 	r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args);
404 	if (r)
405 		return -errno;
406 
407 	*busy = args.out.status;
408 	return 0;
409 }
410 
amdgpu_cs_query_fence_status(struct amdgpu_cs_fence * fence,uint64_t timeout_ns,uint64_t flags,uint32_t * expired)411 int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
412 				 uint64_t timeout_ns,
413 				 uint64_t flags,
414 				 uint32_t *expired)
415 {
416 	bool busy = true;
417 	int r;
418 
419 	if (NULL == fence)
420 		return -EINVAL;
421 	if (NULL == expired)
422 		return -EINVAL;
423 	if (NULL == fence->context)
424 		return -EINVAL;
425 	if (fence->ip_type >= AMDGPU_HW_IP_NUM)
426 		return -EINVAL;
427 	if (fence->ring >= AMDGPU_CS_MAX_RINGS)
428 		return -EINVAL;
429 	if (fence->fence == AMDGPU_NULL_SUBMIT_SEQ) {
430 		*expired = true;
431 		return 0;
432 	}
433 
434 	*expired = false;
435 
436 	r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type,
437 				fence->ip_instance, fence->ring,
438 			       	fence->fence, timeout_ns, flags, &busy);
439 
440 	if (!r && !busy)
441 		*expired = true;
442 
443 	return r;
444 }
445 
amdgpu_cs_create_semaphore(amdgpu_semaphore_handle * sem)446 int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem)
447 {
448 	struct amdgpu_semaphore *gpu_semaphore;
449 
450 	if (NULL == sem)
451 		return -EINVAL;
452 
453 	gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore));
454 	if (NULL == gpu_semaphore)
455 		return -ENOMEM;
456 
457 	atomic_set(&gpu_semaphore->refcount, 1);
458 	*sem = gpu_semaphore;
459 
460 	return 0;
461 }
462 
amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,uint32_t ip_type,uint32_t ip_instance,uint32_t ring,amdgpu_semaphore_handle sem)463 int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
464 			       uint32_t ip_type,
465 			       uint32_t ip_instance,
466 			       uint32_t ring,
467 			       amdgpu_semaphore_handle sem)
468 {
469 	if (NULL == ctx)
470 		return -EINVAL;
471 	if (ip_type >= AMDGPU_HW_IP_NUM)
472 		return -EINVAL;
473 	if (ring >= AMDGPU_CS_MAX_RINGS)
474 		return -EINVAL;
475 	if (NULL == sem)
476 		return -EINVAL;
477 	/* sem has been signaled */
478 	if (sem->signal_fence.context)
479 		return -EINVAL;
480 	pthread_mutex_lock(&ctx->sequence_mutex);
481 	sem->signal_fence.context = ctx;
482 	sem->signal_fence.ip_type = ip_type;
483 	sem->signal_fence.ip_instance = ip_instance;
484 	sem->signal_fence.ring = ring;
485 	sem->signal_fence.fence = ctx->last_seq[ip_type][ip_instance][ring];
486 	update_references(NULL, &sem->refcount);
487 	pthread_mutex_unlock(&ctx->sequence_mutex);
488 	return 0;
489 }
490 
amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,uint32_t ip_type,uint32_t ip_instance,uint32_t ring,amdgpu_semaphore_handle sem)491 int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
492 			     uint32_t ip_type,
493 			     uint32_t ip_instance,
494 			     uint32_t ring,
495 			     amdgpu_semaphore_handle sem)
496 {
497 	if (NULL == ctx)
498 		return -EINVAL;
499 	if (ip_type >= AMDGPU_HW_IP_NUM)
500 		return -EINVAL;
501 	if (ring >= AMDGPU_CS_MAX_RINGS)
502 		return -EINVAL;
503 	if (NULL == sem)
504 		return -EINVAL;
505 	/* must signal first */
506 	if (NULL == sem->signal_fence.context)
507 		return -EINVAL;
508 
509 	pthread_mutex_lock(&ctx->sequence_mutex);
510 	list_add(&sem->list, &ctx->sem_list[ip_type][ip_instance][ring]);
511 	pthread_mutex_unlock(&ctx->sequence_mutex);
512 	return 0;
513 }
514 
amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)515 static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)
516 {
517 	if (NULL == sem)
518 		return -EINVAL;
519 	if (NULL == sem->signal_fence.context)
520 		return -EINVAL;
521 
522 	sem->signal_fence.context = NULL;;
523 	sem->signal_fence.ip_type = 0;
524 	sem->signal_fence.ip_instance = 0;
525 	sem->signal_fence.ring = 0;
526 	sem->signal_fence.fence = 0;
527 
528 	return 0;
529 }
530 
amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)531 static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)
532 {
533 	if (NULL == sem)
534 		return -EINVAL;
535 
536 	if (update_references(&sem->refcount, NULL))
537 		free(sem);
538 	return 0;
539 }
540 
amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)541 int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
542 {
543 	return amdgpu_cs_unreference_sem(sem);
544 }
545