1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
5
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
9
10 All Rights Reserved.
11
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
21 Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31 **************************************************************************/
32
33 #include <errno.h>
34 #include "radeon_common.h"
35 #include "radeon_fog.h"
36 #include "util/simple_list.h"
37
38 #if defined(USE_X86_ASM)
39 #define COPY_DWORDS( dst, src, nr ) \
40 do { \
41 int __tmp; \
42 __asm__ __volatile__( "rep ; movsl" \
43 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
44 : "0" (nr), \
45 "D" ((long)dst), \
46 "S" ((long)src) ); \
47 } while (0)
48 #else
49 #define COPY_DWORDS( dst, src, nr ) \
50 do { \
51 int j; \
52 for ( j = 0 ; j < nr ; j++ ) \
53 dst[j] = ((int *)src)[j]; \
54 dst += nr; \
55 } while (0)
56 #endif
57
radeonEmitVec4(uint32_t * out,const GLvoid * data,int stride,int count)58 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
59 {
60 int i;
61
62 if (RADEON_DEBUG & RADEON_VERTS)
63 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
64 __func__, count, stride, (void *)out, (void *)data);
65
66 if (stride == 4)
67 COPY_DWORDS(out, data, count);
68 else
69 for (i = 0; i < count; i++) {
70 out[0] = *(int *)data;
71 out++;
72 data += stride;
73 }
74 }
75
radeonEmitVec8(uint32_t * out,const GLvoid * data,int stride,int count)76 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
77 {
78 int i;
79
80 if (RADEON_DEBUG & RADEON_VERTS)
81 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
82 __func__, count, stride, (void *)out, (void *)data);
83
84 if (stride == 8)
85 COPY_DWORDS(out, data, count * 2);
86 else
87 for (i = 0; i < count; i++) {
88 out[0] = *(int *)data;
89 out[1] = *(int *)(data + 4);
90 out += 2;
91 data += stride;
92 }
93 }
94
radeonEmitVec12(uint32_t * out,const GLvoid * data,int stride,int count)95 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
96 {
97 int i;
98
99 if (RADEON_DEBUG & RADEON_VERTS)
100 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
101 __func__, count, stride, (void *)out, (void *)data);
102
103 if (stride == 12) {
104 COPY_DWORDS(out, data, count * 3);
105 }
106 else
107 for (i = 0; i < count; i++) {
108 out[0] = *(int *)data;
109 out[1] = *(int *)(data + 4);
110 out[2] = *(int *)(data + 8);
111 out += 3;
112 data += stride;
113 }
114 }
115
radeonEmitVec16(uint32_t * out,const GLvoid * data,int stride,int count)116 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
117 {
118 int i;
119
120 if (RADEON_DEBUG & RADEON_VERTS)
121 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
122 __func__, count, stride, (void *)out, (void *)data);
123
124 if (stride == 16)
125 COPY_DWORDS(out, data, count * 4);
126 else
127 for (i = 0; i < count; i++) {
128 out[0] = *(int *)data;
129 out[1] = *(int *)(data + 4);
130 out[2] = *(int *)(data + 8);
131 out[3] = *(int *)(data + 12);
132 out += 4;
133 data += stride;
134 }
135 }
136
rcommon_emit_vector(struct gl_context * ctx,struct radeon_aos * aos,const GLvoid * data,int size,int stride,int count)137 void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
138 const GLvoid * data, int size, int stride, int count)
139 {
140 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
141 uint32_t *out;
142
143 if (stride == 0) {
144 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
145 count = 1;
146 aos->stride = 0;
147 } else {
148 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
149 aos->stride = size;
150 }
151
152 aos->components = size;
153 aos->count = count;
154
155 radeon_bo_map(aos->bo, 1);
156 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
157 switch (size) {
158 case 1: radeonEmitVec4(out, data, stride, count); break;
159 case 2: radeonEmitVec8(out, data, stride, count); break;
160 case 3: radeonEmitVec12(out, data, stride, count); break;
161 case 4: radeonEmitVec16(out, data, stride, count); break;
162 default:
163 assert(0);
164 break;
165 }
166 radeon_bo_unmap(aos->bo);
167 }
168
rcommon_emit_vecfog(struct gl_context * ctx,struct radeon_aos * aos,GLvoid * data,int stride,int count)169 void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos,
170 GLvoid *data, int stride, int count)
171 {
172 int i;
173 float *out;
174 int size = 1;
175 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
176
177 if (RADEON_DEBUG & RADEON_VERTS)
178 fprintf(stderr, "%s count %d stride %d\n",
179 __func__, count, stride);
180
181 if (stride == 0) {
182 radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
183 count = 1;
184 aos->stride = 0;
185 } else {
186 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
187 aos->stride = size;
188 }
189
190 aos->components = size;
191 aos->count = count;
192
193 /* Emit the data */
194 radeon_bo_map(aos->bo, 1);
195 out = (float*)((char*)aos->bo->ptr + aos->offset);
196 for (i = 0; i < count; i++) {
197 out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
198 out++;
199 data += stride;
200 }
201 radeon_bo_unmap(aos->bo);
202 }
203
radeon_init_dma(radeonContextPtr rmesa)204 void radeon_init_dma(radeonContextPtr rmesa)
205 {
206 make_empty_list(&rmesa->dma.free);
207 make_empty_list(&rmesa->dma.wait);
208 make_empty_list(&rmesa->dma.reserved);
209 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
210 }
211
radeonRefillCurrentDmaRegion(radeonContextPtr rmesa,int size)212 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
213 {
214 struct radeon_dma_bo *dma_bo = NULL;
215 /* we set minimum sizes to at least requested size
216 aligned to next 16 bytes. */
217 if (size > rmesa->dma.minimum_size)
218 rmesa->dma.minimum_size = (size + 15) & (~15);
219
220 radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",
221 __func__, size, rmesa->dma.minimum_size);
222
223 if (is_empty_list(&rmesa->dma.free)
224 || last_elem(&rmesa->dma.free)->bo->size < size) {
225 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
226 assert(dma_bo);
227
228 again_alloc:
229 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
230 0, rmesa->dma.minimum_size, 4,
231 RADEON_GEM_DOMAIN_GTT, 0);
232
233 if (!dma_bo->bo) {
234 rcommonFlushCmdBuf(rmesa, __func__);
235 goto again_alloc;
236 }
237 insert_at_head(&rmesa->dma.reserved, dma_bo);
238 } else {
239 /* We push and pop buffers from end of list so we can keep
240 counter on unused buffers for later freeing them from
241 begin of list */
242 dma_bo = last_elem(&rmesa->dma.free);
243 remove_from_list(dma_bo);
244 insert_at_head(&rmesa->dma.reserved, dma_bo);
245 }
246
247 rmesa->dma.current_used = 0;
248 rmesa->dma.current_vertexptr = 0;
249
250 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
251 first_elem(&rmesa->dma.reserved)->bo,
252 RADEON_GEM_DOMAIN_GTT, 0))
253 fprintf(stderr,"failure to revalidate BOs - badness\n");
254
255 if (is_empty_list(&rmesa->dma.reserved)) {
256 /* Cmd buff have been flushed in radeon_revalidate_bos */
257 goto again_alloc;
258 }
259 radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
260 }
261
262 /* Allocates a region from rmesa->dma.current. If there isn't enough
263 * space in current, grab a new buffer (and discard what was left of current)
264 */
radeonAllocDmaRegion(radeonContextPtr rmesa,struct radeon_bo ** pbo,int * poffset,int bytes,int alignment)265 void radeonAllocDmaRegion(radeonContextPtr rmesa,
266 struct radeon_bo **pbo, int *poffset,
267 int bytes, int alignment)
268 {
269 if (RADEON_DEBUG & RADEON_IOCTL)
270 fprintf(stderr, "%s %d\n", __func__, bytes);
271
272 if (rmesa->dma.flush)
273 rmesa->dma.flush(&rmesa->glCtx);
274
275 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
276
277 alignment--;
278 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
279
280 if (is_empty_list(&rmesa->dma.reserved)
281 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
282 radeonRefillCurrentDmaRegion(rmesa, bytes);
283
284 *poffset = rmesa->dma.current_used;
285 *pbo = first_elem(&rmesa->dma.reserved)->bo;
286 radeon_bo_ref(*pbo);
287
288 /* Always align to at least 16 bytes */
289 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
290 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
291
292 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
293 }
294
radeonFreeDmaRegions(radeonContextPtr rmesa)295 void radeonFreeDmaRegions(radeonContextPtr rmesa)
296 {
297 struct radeon_dma_bo *dma_bo;
298 struct radeon_dma_bo *temp;
299 if (RADEON_DEBUG & RADEON_DMA)
300 fprintf(stderr, "%s\n", __func__);
301
302 foreach_s(dma_bo, temp, &rmesa->dma.free) {
303 remove_from_list(dma_bo);
304 radeon_bo_unref(dma_bo->bo);
305 free(dma_bo);
306 }
307
308 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
309 remove_from_list(dma_bo);
310 radeon_bo_unref(dma_bo->bo);
311 free(dma_bo);
312 }
313
314 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
315 remove_from_list(dma_bo);
316 radeon_bo_unref(dma_bo->bo);
317 free(dma_bo);
318 }
319 }
320
radeonReturnDmaRegion(radeonContextPtr rmesa,int return_bytes)321 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
322 {
323 if (is_empty_list(&rmesa->dma.reserved))
324 return;
325
326 if (RADEON_DEBUG & RADEON_IOCTL)
327 fprintf(stderr, "%s %d\n", __func__, return_bytes);
328 rmesa->dma.current_used -= return_bytes;
329 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
330 }
331
radeon_bo_is_idle(struct radeon_bo * bo)332 static int radeon_bo_is_idle(struct radeon_bo* bo)
333 {
334 uint32_t domain;
335 int ret = radeon_bo_is_busy(bo, &domain);
336 if (ret == -EINVAL) {
337 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
338 "This may cause small performance drop for you.\n");
339 }
340 return ret != -EBUSY;
341 }
342
radeonReleaseDmaRegions(radeonContextPtr rmesa)343 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
344 {
345 struct radeon_dma_bo *dma_bo;
346 struct radeon_dma_bo *temp;
347 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
348 const int time = rmesa->dma.free.expire_counter;
349
350 if (RADEON_DEBUG & RADEON_DMA) {
351 size_t free = 0,
352 wait = 0,
353 reserved = 0;
354 foreach(dma_bo, &rmesa->dma.free)
355 ++free;
356
357 foreach(dma_bo, &rmesa->dma.wait)
358 ++wait;
359
360 foreach(dma_bo, &rmesa->dma.reserved)
361 ++reserved;
362
363 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
364 __func__, free, wait, reserved, rmesa->dma.minimum_size);
365 }
366
367 /* move waiting bos to free list.
368 wait list provides gpu time to handle data before reuse */
369 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
370 if (dma_bo->expire_counter == time) {
371 WARN_ONCE("Leaking dma buffer object!\n");
372 radeon_bo_unref(dma_bo->bo);
373 remove_from_list(dma_bo);
374 free(dma_bo);
375 continue;
376 }
377 /* free objects that are too small to be used because of large request */
378 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
379 radeon_bo_unref(dma_bo->bo);
380 remove_from_list(dma_bo);
381 free(dma_bo);
382 continue;
383 }
384 if (!radeon_bo_is_idle(dma_bo->bo)) {
385 break;
386 }
387 remove_from_list(dma_bo);
388 dma_bo->expire_counter = expire_at;
389 insert_at_tail(&rmesa->dma.free, dma_bo);
390 }
391
392 /* move reserved to wait list */
393 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
394 radeon_bo_unmap(dma_bo->bo);
395 /* free objects that are too small to be used because of large request */
396 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
397 radeon_bo_unref(dma_bo->bo);
398 remove_from_list(dma_bo);
399 free(dma_bo);
400 continue;
401 }
402 remove_from_list(dma_bo);
403 dma_bo->expire_counter = expire_at;
404 insert_at_tail(&rmesa->dma.wait, dma_bo);
405 }
406
407 /* free bos that have been unused for some time */
408 foreach_s(dma_bo, temp, &rmesa->dma.free) {
409 if (dma_bo->expire_counter != time)
410 break;
411 remove_from_list(dma_bo);
412 radeon_bo_unref(dma_bo->bo);
413 free(dma_bo);
414 }
415
416 }
417
418
419 /* Flush vertices in the current dma region.
420 */
rcommon_flush_last_swtcl_prim(struct gl_context * ctx)421 void rcommon_flush_last_swtcl_prim( struct gl_context *ctx )
422 {
423 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
424 struct radeon_dma *dma = &rmesa->dma;
425
426 if (RADEON_DEBUG & RADEON_IOCTL)
427 fprintf(stderr, "%s\n", __func__);
428 dma->flush = NULL;
429
430 radeon_bo_unmap(rmesa->swtcl.bo);
431
432 if (!is_empty_list(&dma->reserved)) {
433 GLuint current_offset = dma->current_used;
434
435 assert (dma->current_used +
436 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
437 dma->current_vertexptr);
438
439 if (dma->current_used != dma->current_vertexptr) {
440 dma->current_used = dma->current_vertexptr;
441
442 rmesa->vtbl.swtcl_flush(ctx, current_offset);
443 }
444 rmesa->swtcl.numverts = 0;
445 }
446 radeon_bo_unref(rmesa->swtcl.bo);
447 rmesa->swtcl.bo = NULL;
448 }
449 /* Alloc space in the current dma region.
450 */
451 void *
rcommonAllocDmaLowVerts(radeonContextPtr rmesa,int nverts,int vsize)452 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
453 {
454 GLuint bytes = vsize * nverts;
455 void *head;
456 if (RADEON_DEBUG & RADEON_IOCTL)
457 fprintf(stderr, "%s\n", __func__);
458
459 if(is_empty_list(&rmesa->dma.reserved)
460 ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
461 if (rmesa->dma.flush) {
462 rmesa->dma.flush(&rmesa->glCtx);
463 }
464
465 radeonRefillCurrentDmaRegion(rmesa, bytes);
466
467 return NULL;
468 }
469
470 if (!rmesa->dma.flush) {
471 /* if cmdbuf flushed DMA restart */
472 rmesa->glCtx.Driver.NeedFlush |= FLUSH_STORED_VERTICES;
473 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
474 }
475
476 assert( vsize == rmesa->swtcl.vertex_size * 4 );
477 assert( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
478 assert( rmesa->dma.current_used +
479 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
480 rmesa->dma.current_vertexptr );
481
482 if (!rmesa->swtcl.bo) {
483 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
484 radeon_bo_ref(rmesa->swtcl.bo);
485 radeon_bo_map(rmesa->swtcl.bo, 1);
486 }
487
488 head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
489 rmesa->dma.current_vertexptr += bytes;
490 rmesa->swtcl.numverts += nverts;
491 return head;
492 }
493
radeonReleaseArrays(struct gl_context * ctx,GLuint newinputs)494 void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )
495 {
496 radeonContextPtr radeon = RADEON_CONTEXT( ctx );
497 int i;
498 if (RADEON_DEBUG & RADEON_IOCTL)
499 fprintf(stderr, "%s\n", __func__);
500
501 if (radeon->dma.flush) {
502 radeon->dma.flush(&radeon->glCtx);
503 }
504 for (i = 0; i < radeon->tcl.aos_count; i++) {
505 if (radeon->tcl.aos[i].bo) {
506 radeon_bo_unref(radeon->tcl.aos[i].bo);
507 radeon->tcl.aos[i].bo = NULL;
508
509 }
510 }
511 }
512