1 /**************************************************************************
2 
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5 
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
9 
10 All Rights Reserved.
11 
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
18 
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
21 Software.
22 
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
30 
31 **************************************************************************/
32 
33 #include <errno.h>
34 #include "radeon_common.h"
35 #include "radeon_fog.h"
36 #include "main/simple_list.h"
37 
38 #if defined(USE_X86_ASM)
39 #define COPY_DWORDS( dst, src, nr )					\
40 do {									\
41 	int __tmp;							\
42 	__asm__ __volatile__( "rep ; movsl"				\
43 			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
44 			      : "0" (nr),				\
45 			        "D" ((long)dst),			\
46 			        "S" ((long)src) );			\
47 } while (0)
48 #else
49 #define COPY_DWORDS( dst, src, nr )		\
50 do {						\
51    int j;					\
52    for ( j = 0 ; j < nr ; j++ )			\
53       dst[j] = ((int *)src)[j];			\
54    dst += nr;					\
55 } while (0)
56 #endif
57 
radeonEmitVec4(uint32_t * out,const GLvoid * data,int stride,int count)58 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
59 {
60 	int i;
61 
62 	if (RADEON_DEBUG & RADEON_VERTS)
63 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
64 			__FUNCTION__, count, stride, (void *)out, (void *)data);
65 
66 	if (stride == 4)
67 		COPY_DWORDS(out, data, count);
68 	else
69 		for (i = 0; i < count; i++) {
70 			out[0] = *(int *)data;
71 			out++;
72 			data += stride;
73 		}
74 }
75 
radeonEmitVec8(uint32_t * out,const GLvoid * data,int stride,int count)76 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
77 {
78 	int i;
79 
80 	if (RADEON_DEBUG & RADEON_VERTS)
81 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
82 			__FUNCTION__, count, stride, (void *)out, (void *)data);
83 
84 	if (stride == 8)
85 		COPY_DWORDS(out, data, count * 2);
86 	else
87 		for (i = 0; i < count; i++) {
88 			out[0] = *(int *)data;
89 			out[1] = *(int *)(data + 4);
90 			out += 2;
91 			data += stride;
92 		}
93 }
94 
radeonEmitVec12(uint32_t * out,const GLvoid * data,int stride,int count)95 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
96 {
97 	int i;
98 
99 	if (RADEON_DEBUG & RADEON_VERTS)
100 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
101 			__FUNCTION__, count, stride, (void *)out, (void *)data);
102 
103 	if (stride == 12) {
104 		COPY_DWORDS(out, data, count * 3);
105     }
106 	else
107 		for (i = 0; i < count; i++) {
108 			out[0] = *(int *)data;
109 			out[1] = *(int *)(data + 4);
110 			out[2] = *(int *)(data + 8);
111 			out += 3;
112 			data += stride;
113 		}
114 }
115 
radeonEmitVec16(uint32_t * out,const GLvoid * data,int stride,int count)116 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
117 {
118 	int i;
119 
120 	if (RADEON_DEBUG & RADEON_VERTS)
121 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
122 			__FUNCTION__, count, stride, (void *)out, (void *)data);
123 
124 	if (stride == 16)
125 		COPY_DWORDS(out, data, count * 4);
126 	else
127 		for (i = 0; i < count; i++) {
128 			out[0] = *(int *)data;
129 			out[1] = *(int *)(data + 4);
130 			out[2] = *(int *)(data + 8);
131 			out[3] = *(int *)(data + 12);
132 			out += 4;
133 			data += stride;
134 		}
135 }
136 
rcommon_emit_vector(struct gl_context * ctx,struct radeon_aos * aos,const GLvoid * data,int size,int stride,int count)137 void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
138 			 const GLvoid * data, int size, int stride, int count)
139 {
140 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
141 	uint32_t *out;
142 
143 	if (stride == 0) {
144 		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
145 		count = 1;
146 		aos->stride = 0;
147 	} else {
148 		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
149 		aos->stride = size;
150 	}
151 
152 	aos->components = size;
153 	aos->count = count;
154 
155 	radeon_bo_map(aos->bo, 1);
156 	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
157 	switch (size) {
158 	case 1: radeonEmitVec4(out, data, stride, count); break;
159 	case 2: radeonEmitVec8(out, data, stride, count); break;
160 	case 3: radeonEmitVec12(out, data, stride, count); break;
161 	case 4: radeonEmitVec16(out, data, stride, count); break;
162 	default:
163 		assert(0);
164 		break;
165 	}
166 	radeon_bo_unmap(aos->bo);
167 }
168 
rcommon_emit_vecfog(struct gl_context * ctx,struct radeon_aos * aos,GLvoid * data,int stride,int count)169 void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos,
170 			 GLvoid *data, int stride, int count)
171 {
172 	int i;
173 	float *out;
174 	int size = 1;
175 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
176 
177 	if (RADEON_DEBUG & RADEON_VERTS)
178 		fprintf(stderr, "%s count %d stride %d\n",
179 			__FUNCTION__, count, stride);
180 
181 	if (stride == 0) {
182 		radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
183 		count = 1;
184 		aos->stride = 0;
185 	} else {
186 		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
187 		aos->stride = size;
188 	}
189 
190 	aos->components = size;
191 	aos->count = count;
192 
193 	/* Emit the data */
194 	radeon_bo_map(aos->bo, 1);
195 	out = (float*)((char*)aos->bo->ptr + aos->offset);
196 	for (i = 0; i < count; i++) {
197 		out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
198 		out++;
199 		data += stride;
200 	}
201 	radeon_bo_unmap(aos->bo);
202 }
203 
radeon_init_dma(radeonContextPtr rmesa)204 void radeon_init_dma(radeonContextPtr rmesa)
205 {
206 	make_empty_list(&rmesa->dma.free);
207 	make_empty_list(&rmesa->dma.wait);
208 	make_empty_list(&rmesa->dma.reserved);
209 	rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
210 }
211 
radeonRefillCurrentDmaRegion(radeonContextPtr rmesa,int size)212 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
213 {
214 	struct radeon_dma_bo *dma_bo = NULL;
215 	/* we set minimum sizes to at least requested size
216 	   aligned to next 16 bytes. */
217 	if (size > rmesa->dma.minimum_size)
218 		rmesa->dma.minimum_size = (size + 15) & (~15);
219 
220 	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",
221 			__FUNCTION__, size, rmesa->dma.minimum_size);
222 
223 	if (is_empty_list(&rmesa->dma.free)
224 	      || last_elem(&rmesa->dma.free)->bo->size < size) {
225 		dma_bo = CALLOC_STRUCT(radeon_dma_bo);
226 		assert(dma_bo);
227 
228 again_alloc:
229 		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
230 					    0, rmesa->dma.minimum_size, 4,
231 					    RADEON_GEM_DOMAIN_GTT, 0);
232 
233 		if (!dma_bo->bo) {
234 			rcommonFlushCmdBuf(rmesa, __FUNCTION__);
235 			goto again_alloc;
236 		}
237 		insert_at_head(&rmesa->dma.reserved, dma_bo);
238 	} else {
239 		/* We push and pop buffers from end of list so we can keep
240 		   counter on unused buffers for later freeing them from
241 		   begin of list */
242 		dma_bo = last_elem(&rmesa->dma.free);
243 		remove_from_list(dma_bo);
244 		insert_at_head(&rmesa->dma.reserved, dma_bo);
245 	}
246 
247 	rmesa->dma.current_used = 0;
248 	rmesa->dma.current_vertexptr = 0;
249 
250 	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
251 					  first_elem(&rmesa->dma.reserved)->bo,
252 					  RADEON_GEM_DOMAIN_GTT, 0))
253 		fprintf(stderr,"failure to revalidate BOs - badness\n");
254 
255 	if (is_empty_list(&rmesa->dma.reserved)) {
256         /* Cmd buff have been flushed in radeon_revalidate_bos */
257 		goto again_alloc;
258 	}
259 	radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
260 }
261 
262 /* Allocates a region from rmesa->dma.current.  If there isn't enough
263  * space in current, grab a new buffer (and discard what was left of current)
264  */
radeonAllocDmaRegion(radeonContextPtr rmesa,struct radeon_bo ** pbo,int * poffset,int bytes,int alignment)265 void radeonAllocDmaRegion(radeonContextPtr rmesa,
266 			  struct radeon_bo **pbo, int *poffset,
267 			  int bytes, int alignment)
268 {
269 	if (RADEON_DEBUG & RADEON_IOCTL)
270 		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
271 
272 	if (rmesa->dma.flush)
273 		rmesa->dma.flush(rmesa->glCtx);
274 
275 	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
276 
277 	alignment--;
278 	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
279 
280 	if (is_empty_list(&rmesa->dma.reserved)
281 		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
282 		radeonRefillCurrentDmaRegion(rmesa, bytes);
283 
284 	*poffset = rmesa->dma.current_used;
285 	*pbo = first_elem(&rmesa->dma.reserved)->bo;
286 	radeon_bo_ref(*pbo);
287 
288 	/* Always align to at least 16 bytes */
289 	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
290 	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
291 
292 	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
293 }
294 
radeonFreeDmaRegions(radeonContextPtr rmesa)295 void radeonFreeDmaRegions(radeonContextPtr rmesa)
296 {
297 	struct radeon_dma_bo *dma_bo;
298 	struct radeon_dma_bo *temp;
299 	if (RADEON_DEBUG & RADEON_DMA)
300 		fprintf(stderr, "%s\n", __FUNCTION__);
301 
302 	foreach_s(dma_bo, temp, &rmesa->dma.free) {
303 		remove_from_list(dma_bo);
304 	        radeon_bo_unref(dma_bo->bo);
305 		FREE(dma_bo);
306 	}
307 
308 	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
309 		remove_from_list(dma_bo);
310 	        radeon_bo_unref(dma_bo->bo);
311 		FREE(dma_bo);
312 	}
313 
314 	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
315 		remove_from_list(dma_bo);
316 	        radeon_bo_unref(dma_bo->bo);
317 		FREE(dma_bo);
318 	}
319 }
320 
radeonReturnDmaRegion(radeonContextPtr rmesa,int return_bytes)321 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
322 {
323 	if (is_empty_list(&rmesa->dma.reserved))
324 		return;
325 
326 	if (RADEON_DEBUG & RADEON_IOCTL)
327 		fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
328 	rmesa->dma.current_used -= return_bytes;
329 	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
330 }
331 
radeon_bo_is_idle(struct radeon_bo * bo)332 static int radeon_bo_is_idle(struct radeon_bo* bo)
333 {
334 	uint32_t domain;
335 	int ret = radeon_bo_is_busy(bo, &domain);
336 	if (ret == -EINVAL) {
337 		WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
338 			"This may cause small performance drop for you.\n");
339 	}
340 	return ret != -EBUSY;
341 }
342 
radeonReleaseDmaRegions(radeonContextPtr rmesa)343 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
344 {
345 	struct radeon_dma_bo *dma_bo;
346 	struct radeon_dma_bo *temp;
347 	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
348 	const int time = rmesa->dma.free.expire_counter;
349 
350 	if (RADEON_DEBUG & RADEON_DMA) {
351 		size_t free = 0,
352 		       wait = 0,
353 		       reserved = 0;
354 		foreach(dma_bo, &rmesa->dma.free)
355 			++free;
356 
357 		foreach(dma_bo, &rmesa->dma.wait)
358 			++wait;
359 
360 		foreach(dma_bo, &rmesa->dma.reserved)
361 			++reserved;
362 
363 		fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
364 		      __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
365 	}
366 
367 	/* move waiting bos to free list.
368 	   wait list provides gpu time to handle data before reuse */
369 	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
370 		if (dma_bo->expire_counter == time) {
371 			WARN_ONCE("Leaking dma buffer object!\n");
372 			radeon_bo_unref(dma_bo->bo);
373 			remove_from_list(dma_bo);
374 			FREE(dma_bo);
375 			continue;
376 		}
377 		/* free objects that are too small to be used because of large request */
378 		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
379 		   radeon_bo_unref(dma_bo->bo);
380 		   remove_from_list(dma_bo);
381 		   FREE(dma_bo);
382 		   continue;
383 		}
384 		if (!radeon_bo_is_idle(dma_bo->bo)) {
385 			break;
386 		}
387 		remove_from_list(dma_bo);
388 		dma_bo->expire_counter = expire_at;
389 		insert_at_tail(&rmesa->dma.free, dma_bo);
390 	}
391 
392 	/* move reserved to wait list */
393 	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
394 		radeon_bo_unmap(dma_bo->bo);
395 		/* free objects that are too small to be used because of large request */
396 		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
397 		   radeon_bo_unref(dma_bo->bo);
398 		   remove_from_list(dma_bo);
399 		   FREE(dma_bo);
400 		   continue;
401 		}
402 		remove_from_list(dma_bo);
403 		dma_bo->expire_counter = expire_at;
404 		insert_at_tail(&rmesa->dma.wait, dma_bo);
405 	}
406 
407 	/* free bos that have been unused for some time */
408 	foreach_s(dma_bo, temp, &rmesa->dma.free) {
409 		if (dma_bo->expire_counter != time)
410 			break;
411 		remove_from_list(dma_bo);
412 	        radeon_bo_unref(dma_bo->bo);
413 		FREE(dma_bo);
414 	}
415 
416 }
417 
418 
419 /* Flush vertices in the current dma region.
420  */
rcommon_flush_last_swtcl_prim(struct gl_context * ctx)421 void rcommon_flush_last_swtcl_prim( struct gl_context *ctx  )
422 {
423 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
424 	struct radeon_dma *dma = &rmesa->dma;
425 
426 	if (RADEON_DEBUG & RADEON_IOCTL)
427 		fprintf(stderr, "%s\n", __FUNCTION__);
428 	dma->flush = NULL;
429 
430 	radeon_bo_unmap(rmesa->swtcl.bo);
431 
432 	if (!is_empty_list(&dma->reserved)) {
433 	    GLuint current_offset = dma->current_used;
434 
435 	    assert (dma->current_used +
436 		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
437 		    dma->current_vertexptr);
438 
439 	    if (dma->current_used != dma->current_vertexptr) {
440 		    dma->current_used = dma->current_vertexptr;
441 
442 		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
443 	    }
444 	    rmesa->swtcl.numverts = 0;
445 	}
446 	radeon_bo_unref(rmesa->swtcl.bo);
447 	rmesa->swtcl.bo = NULL;
448 }
449 /* Alloc space in the current dma region.
450  */
451 void *
rcommonAllocDmaLowVerts(radeonContextPtr rmesa,int nverts,int vsize)452 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
453 {
454 	GLuint bytes = vsize * nverts;
455 	void *head;
456 	if (RADEON_DEBUG & RADEON_IOCTL)
457 		fprintf(stderr, "%s\n", __FUNCTION__);
458 
459 	if(is_empty_list(&rmesa->dma.reserved)
460 	      ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
461 		if (rmesa->dma.flush) {
462 			rmesa->dma.flush(rmesa->glCtx);
463 		}
464 
465                 radeonRefillCurrentDmaRegion(rmesa, bytes);
466 
467 		return NULL;
468 	}
469 
470         if (!rmesa->dma.flush) {
471 		/* if cmdbuf flushed DMA restart */
472                 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
473                 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
474         }
475 
476 	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
477         ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
478         ASSERT( rmesa->dma.current_used +
479                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
480                 rmesa->dma.current_vertexptr );
481 
482 	if (!rmesa->swtcl.bo) {
483 		rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
484 		radeon_bo_ref(rmesa->swtcl.bo);
485 		radeon_bo_map(rmesa->swtcl.bo, 1);
486 	}
487 
488 	head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
489 	rmesa->dma.current_vertexptr += bytes;
490 	rmesa->swtcl.numverts += nverts;
491 	return head;
492 }
493 
radeonReleaseArrays(struct gl_context * ctx,GLuint newinputs)494 void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )
495 {
496    radeonContextPtr radeon = RADEON_CONTEXT( ctx );
497    int i;
498 	if (RADEON_DEBUG & RADEON_IOCTL)
499 		fprintf(stderr, "%s\n", __FUNCTION__);
500 
501    if (radeon->dma.flush) {
502        radeon->dma.flush(radeon->glCtx);
503    }
504    for (i = 0; i < radeon->tcl.aos_count; i++) {
505       if (radeon->tcl.aos[i].bo) {
506          radeon_bo_unref(radeon->tcl.aos[i].bo);
507          radeon->tcl.aos[i].bo = NULL;
508 
509       }
510    }
511 }
512