1 /**************************************************************************
2 
3 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
4                      VMware, Inc.
5 
6 All Rights Reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15 
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 
28 **************************************************************************/
29 
30 /*
31  * Authors:
32  *   Keith Whitwell <keithw@vmware.com>
33  */
34 
35 #include "main/glheader.h"
36 #include "main/imports.h"
37 #include "main/mtypes.h"
38 #include "main/light.h"
39 #include "main/enums.h"
40 #include "main/state.h"
41 
42 #include "util/macros.h"
43 
44 #include "vbo/vbo.h"
45 #include "tnl/tnl.h"
46 #include "tnl/t_pipeline.h"
47 
48 #include "radeon_common.h"
49 #include "radeon_context.h"
50 #include "radeon_state.h"
51 #include "radeon_ioctl.h"
52 #include "radeon_tcl.h"
53 #include "radeon_swtcl.h"
54 #include "radeon_maos.h"
55 #include "radeon_common_context.h"
56 
57 
58 
59 /*
60  * Render unclipped vertex buffers by emitting vertices directly to
61  * dma buffers.  Use strip/fan hardware primitives where possible.
62  * Try to simulate missing primitives with indexed vertices.
63  */
64 #define HAVE_POINTS      1
65 #define HAVE_LINES       1
66 #define HAVE_LINE_LOOP   0
67 #define HAVE_LINE_STRIPS 1
68 #define HAVE_TRIANGLES   1
69 #define HAVE_TRI_STRIPS  1
70 #define HAVE_TRI_FANS    1
71 #define HAVE_QUADS       0
72 #define HAVE_QUAD_STRIPS 0
73 #define HAVE_POLYGONS    1
74 #define HAVE_ELTS        1
75 
76 
77 #define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
78 #define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
79 #define HW_LINE_LOOP        0
80 #define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
81 #define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
82 #define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
83 #define HW_TRIANGLE_STRIP_1 0
84 #define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
85 #define HW_QUADS            0
86 #define HW_QUAD_STRIP       0
87 #define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
88 
89 
90 static GLboolean discrete_prim[0x10] = {
91    0,				/* 0 none */
92    1,				/* 1 points */
93    1,				/* 2 lines */
94    0,				/* 3 line_strip */
95    1,				/* 4 tri_list */
96    0,				/* 5 tri_fan */
97    0,				/* 6 tri_type2 */
98    1,				/* 7 rect list (unused) */
99    1,				/* 8 3vert point */
100    1,				/* 9 3vert line */
101    0,
102    0,
103    0,
104    0,
105    0,
106    0,
107 };
108 
109 
110 #define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
111 #define ELT_TYPE  GLushort
112 
113 #define ELT_INIT(prim, hw_prim) \
114    radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
115 
116 #define GET_MESA_ELTS() rmesa->tcl.Elts
117 
118 
119 /* Don't really know how many elts will fit in what's left of cmdbuf,
120  * as there is state to emit, etc:
121  */
122 
123 /* Testing on isosurf shows a maximum around here.  Don't know if it's
124  * the card or driver or kernel module that is causing the behaviour.
125  */
126 #define GET_MAX_HW_ELTS() 300
127 
128 
129 #define RESET_STIPPLE() do {			\
130    RADEON_STATECHANGE( rmesa, lin );		\
131    radeonEmitState(&rmesa->radeon);			\
132 } while (0)
133 
134 #define AUTO_STIPPLE( mode )  do {		\
135    RADEON_STATECHANGE( rmesa, lin );		\
136    if (mode)					\
137       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |=	\
138 	 RADEON_LINE_PATTERN_AUTO_RESET;	\
139    else						\
140       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
141 	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
142    radeonEmitState(&rmesa->radeon);		\
143 } while (0)
144 
145 
146 
147 #define ALLOC_ELTS(nr)	radeonAllocElts( rmesa, nr )
148 
radeonAllocElts(r100ContextPtr rmesa,GLuint nr)149 static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )
150 {
151       if (rmesa->radeon.dma.flush)
152 	 rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
153 
154       radeonEmitAOS( rmesa,
155 		     rmesa->radeon.tcl.aos_count, 0 );
156 
157       return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
158 				       rmesa->tcl.hw_primitive, nr );
159 }
160 
161 #define CLOSE_ELTS() if (0)  RADEON_NEWPRIM( rmesa )
162 
163 
164 
165 /* TODO: Try to extend existing primitive if both are identical,
166  * discrete and there are no intervening state changes.  (Somewhat
167  * duplicates changes to DrawArrays code)
168  */
radeonEmitPrim(struct gl_context * ctx,GLenum prim,GLuint hwprim,GLuint start,GLuint count)169 static void radeonEmitPrim( struct gl_context *ctx,
170 		       GLenum prim,
171 		       GLuint hwprim,
172 		       GLuint start,
173 		       GLuint count)
174 {
175    r100ContextPtr rmesa = R100_CONTEXT( ctx );
176    radeonTclPrimitive( ctx, prim, hwprim );
177 
178    radeonEmitAOS( rmesa,
179 		  rmesa->radeon.tcl.aos_count,
180 		  start );
181 
182    /* Why couldn't this packet have taken an offset param?
183     */
184    radeonEmitVbufPrim( rmesa,
185 		       rmesa->tcl.vertex_format,
186 		       rmesa->tcl.hw_primitive,
187 		       count - start );
188 }
189 
190 #define EMIT_PRIM( ctx, prim, hwprim, start, count ) do {       \
191    radeonEmitPrim( ctx, prim, hwprim, start, count );           \
192    (void) rmesa; } while (0)
193 
194 #define MAX_CONVERSION_SIZE 40
195 
196 /* Try & join small primitives
197  */
198 #if 0
199 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
200 #else
201 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )			\
202   ((NR) < 20 ||							\
203    ((NR) < 40 &&						\
204     rmesa->tcl.hw_primitive == (PRIM|				\
205 			    RADEON_CP_VC_CNTL_PRIM_WALK_IND|	\
206 			    RADEON_CP_VC_CNTL_TCL_ENABLE)))
207 #endif
208 
209 #ifdef MESA_BIG_ENDIAN
210 /* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
211 #define EMIT_ELT(dest, offset, x) do {				\
212 	int off = offset + ( ( (uintptr_t)dest & 0x2 ) >> 1 );	\
213 	GLushort *des = (GLushort *)( (uintptr_t)dest & ~0x2 );	\
214 	(des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); 	\
215 	(void)rmesa; } while (0)
216 #else
217 #define EMIT_ELT(dest, offset, x) do {				\
218 	(dest)[offset] = (GLushort) (x);			\
219 	(void)rmesa; } while (0)
220 #endif
221 
222 #define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
223 
224 
225 
226 #define TAG(x) tcl_##x
227 #include "tnl_dd/t_dd_dmatmp2.h"
228 
229 /**********************************************************************/
230 /*                          External entrypoints                     */
231 /**********************************************************************/
232 
radeonEmitPrimitive(struct gl_context * ctx,GLuint first,GLuint last,GLuint flags)233 void radeonEmitPrimitive( struct gl_context *ctx,
234 			  GLuint first,
235 			  GLuint last,
236 			  GLuint flags )
237 {
238    tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
239 }
240 
radeonEmitEltPrimitive(struct gl_context * ctx,GLuint first,GLuint last,GLuint flags)241 void radeonEmitEltPrimitive( struct gl_context *ctx,
242 			     GLuint first,
243 			     GLuint last,
244 			     GLuint flags )
245 {
246    tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
247 }
248 
radeonTclPrimitive(struct gl_context * ctx,GLenum prim,int hw_prim)249 void radeonTclPrimitive( struct gl_context *ctx,
250 			 GLenum prim,
251 			 int hw_prim )
252 {
253    r100ContextPtr rmesa = R100_CONTEXT(ctx);
254    GLuint se_cntl;
255    GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
256 
257    radeon_prepare_render(&rmesa->radeon);
258    if (rmesa->radeon.NewGLState)
259       radeonValidateState( ctx );
260 
261    if (newprim != rmesa->tcl.hw_primitive ||
262        !discrete_prim[hw_prim&0xf]) {
263       RADEON_NEWPRIM( rmesa );
264       rmesa->tcl.hw_primitive = newprim;
265    }
266 
267    se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
268    se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
269 
270    if (prim == GL_POLYGON && ctx->Light.ShadeModel == GL_FLAT)
271       se_cntl |= RADEON_FLAT_SHADE_VTX_0;
272    else
273       se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
274 
275    if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
276       RADEON_STATECHANGE( rmesa, set );
277       rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
278    }
279 }
280 
281 /**
282  * Predict total emit size for next rendering operation so there is no flush in middle of rendering
283  * Prediction has to aim towards the best possible value that is worse than worst case scenario
284  */
radeonEnsureEmitSize(struct gl_context * ctx,GLuint inputs)285 static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs )
286 {
287   r100ContextPtr rmesa = R100_CONTEXT(ctx);
288   TNLcontext *tnl = TNL_CONTEXT(ctx);
289   struct vertex_buffer *VB = &tnl->vb;
290   GLuint space_required;
291   GLuint state_size;
292   GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
293   int i;
294   /* list of flags that are allocating aos object */
295   const GLuint flags_to_check[] = {
296     VERT_BIT_NORMAL,
297     VERT_BIT_COLOR0,
298     VERT_BIT_COLOR1,
299     VERT_BIT_FOG
300   };
301   /* predict number of aos to emit */
302   for (i=0; i < ARRAY_SIZE(flags_to_check); ++i)
303   {
304     if (inputs & flags_to_check[i])
305       ++nr_aos;
306   }
307   for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
308   {
309     if (inputs & VERT_BIT_TEX(i))
310       ++nr_aos;
311   }
312 
313   {
314     /* count the prediction for state size */
315     space_required = 0;
316     state_size = radeonCountStateEmitSize( &rmesa->radeon );
317     /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
318     if (!rmesa->hw.tcl.dirty)
319       state_size += rmesa->hw.tcl.check( &rmesa->radeon.glCtx, &rmesa->hw.tcl );
320     /* predict size for elements */
321     for (i = 0; i < VB->PrimitiveCount; ++i)
322     {
323       /* If primitive.count is less than MAX_CONVERSION_SIZE
324 	 rendering code may decide convert to elts.
325 	 In that case we have to make pessimistic prediction.
326 	 and use larger of 2 paths. */
327       const GLuint elts = ELTS_BUFSZ(nr_aos);
328       const GLuint index = INDEX_BUFSZ;
329       const GLuint vbuf = VBUF_BUFSZ;
330       if (!VB->Primitive[i].count)
331 	continue;
332       if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
333 	  || vbuf > index + elts)
334 	space_required += vbuf;
335       else
336 	space_required += index + elts;
337       space_required += VB->Primitive[i].count * 3;
338       space_required += AOS_BUFSZ(nr_aos);
339     }
340     space_required += SCISSOR_BUFSZ;
341   }
342   /* flush the buffer in case we need more than is left. */
343   if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __func__))
344     return space_required + radeonCountStateEmitSize( &rmesa->radeon );
345   else
346     return space_required + state_size;
347 }
348 
349 /**********************************************************************/
350 /*                          Render pipeline stage                     */
351 /**********************************************************************/
352 
353 
354 /* TCL render.
355  */
radeon_run_tcl_render(struct gl_context * ctx,struct tnl_pipeline_stage * stage)356 static GLboolean radeon_run_tcl_render( struct gl_context *ctx,
357 					struct tnl_pipeline_stage *stage )
358 {
359    r100ContextPtr rmesa = R100_CONTEXT(ctx);
360    TNLcontext *tnl = TNL_CONTEXT(ctx);
361    struct vertex_buffer *VB = &tnl->vb;
362    GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
363    GLuint i;
364    GLuint emit_end;
365 
366    /* TODO: separate this from the swtnl pipeline
367     */
368    if (rmesa->radeon.TclFallback)
369       return GL_TRUE;	/* fallback to software t&l */
370 
371    if (VB->Count == 0)
372       return GL_FALSE;
373 
374    /* NOTE: inputs != tnl->render_inputs - these are the untransformed
375     * inputs.
376     */
377    if (ctx->Light.Enabled) {
378       inputs |= VERT_BIT_NORMAL;
379    }
380 
381    if (_mesa_need_secondary_color(ctx)) {
382       inputs |= VERT_BIT_COLOR1;
383    }
384 
385    if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
386       inputs |= VERT_BIT_FOG;
387    }
388 
389    for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
390       if (ctx->Texture.Unit[i]._Current) {
391       /* TODO: probably should not emit texture coords when texgen is enabled */
392 	 if (rmesa->TexGenNeedNormals[i]) {
393 	    inputs |= VERT_BIT_NORMAL;
394 	 }
395 	 inputs |= VERT_BIT_TEX(i);
396       }
397    }
398 
399    radeonReleaseArrays( ctx, ~0 );
400    emit_end = radeonEnsureEmitSize( ctx, inputs )
401      + rmesa->radeon.cmdbuf.cs->cdw;
402    radeonEmitArrays( ctx, inputs );
403 
404    rmesa->tcl.Elts = VB->Elts;
405 
406    for (i = 0 ; i < VB->PrimitiveCount ; i++)
407    {
408       GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
409       GLuint start = VB->Primitive[i].start;
410       GLuint length = VB->Primitive[i].count;
411 
412       if (!length)
413 	 continue;
414 
415       if (rmesa->tcl.Elts)
416 	 radeonEmitEltPrimitive( ctx, start, start+length, prim );
417       else
418 	 radeonEmitPrimitive( ctx, start, start+length, prim );
419    }
420 
421    if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
422       WARN_ONCE("Rendering was %d commands larger than predicted size."
423 	  " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
424 
425    return GL_FALSE;		/* finished the pipe */
426 }
427 
428 
429 
430 /* Initial state for tcl stage.
431  */
432 const struct tnl_pipeline_stage _radeon_tcl_stage =
433 {
434    "radeon render",
435    NULL,
436    NULL,
437    NULL,
438    NULL,
439    radeon_run_tcl_render	/* run */
440 };
441 
442 
443 
444 /**********************************************************************/
445 /*                 Validate state at pipeline start                   */
446 /**********************************************************************/
447 
448 
449 /*-----------------------------------------------------------------------
450  * Manage TCL fallbacks
451  */
452 
453 
transition_to_swtnl(struct gl_context * ctx)454 static void transition_to_swtnl( struct gl_context *ctx )
455 {
456    r100ContextPtr rmesa = R100_CONTEXT(ctx);
457    TNLcontext *tnl = TNL_CONTEXT(ctx);
458    GLuint se_cntl;
459 
460    RADEON_NEWPRIM( rmesa );
461    rmesa->swtcl.vertex_format = 0;
462 
463    radeonChooseVertexState( ctx );
464    radeonChooseRenderState( ctx );
465 
466    _tnl_validate_shine_tables( ctx );
467 
468    tnl->Driver.NotifyMaterialChange =
469       _tnl_validate_shine_tables;
470 
471    radeonReleaseArrays( ctx, ~0 );
472 
473    se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
474    se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
475 
476    if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
477       RADEON_STATECHANGE( rmesa, set );
478       rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
479    }
480 }
481 
482 
transition_to_hwtnl(struct gl_context * ctx)483 static void transition_to_hwtnl( struct gl_context *ctx )
484 {
485    r100ContextPtr rmesa = R100_CONTEXT(ctx);
486    TNLcontext *tnl = TNL_CONTEXT(ctx);
487    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
488 
489    se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
490 		     RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
491 		     RADEON_VTX_W0_IS_NOT_1_OVER_W0);
492    se_coord_fmt |= RADEON_VTX_W0_IS_NOT_1_OVER_W0;
493 
494    if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
495       RADEON_STATECHANGE( rmesa, set );
496       rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
497       _tnl_need_projected_coords( ctx, GL_FALSE );
498    }
499 
500    radeonUpdateMaterial( ctx );
501 
502    tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
503 
504    if ( rmesa->radeon.dma.flush )
505       rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
506 
507    rmesa->radeon.dma.flush = NULL;
508    rmesa->swtcl.vertex_format = 0;
509 
510    //   if (rmesa->swtcl.indexed_verts.buf)
511    //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
512    //			      __func__ );
513 
514    if (RADEON_DEBUG & RADEON_FALLBACKS)
515       fprintf(stderr, "Radeon end tcl fallback\n");
516 }
517 
518 static char *fallbackStrings[] = {
519    "Rasterization fallback",
520    "Unfilled triangles",
521    "Twosided lighting, differing materials",
522    "Materials in VB (maybe between begin/end)",
523    "Texgen unit 0",
524    "Texgen unit 1",
525    "Texgen unit 2",
526    "User disable",
527    "Fogcoord with separate specular lighting"
528 };
529 
530 
getFallbackString(GLuint bit)531 static char *getFallbackString(GLuint bit)
532 {
533    int i = 0;
534    while (bit > 1) {
535       i++;
536       bit >>= 1;
537    }
538    return fallbackStrings[i];
539 }
540 
541 
542 
radeonTclFallback(struct gl_context * ctx,GLuint bit,GLboolean mode)543 void radeonTclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode )
544 {
545    r100ContextPtr rmesa = R100_CONTEXT(ctx);
546    GLuint oldfallback = rmesa->radeon.TclFallback;
547 
548    if (mode) {
549       rmesa->radeon.TclFallback |= bit;
550       if (oldfallback == 0) {
551 	 if (RADEON_DEBUG & RADEON_FALLBACKS)
552 	    fprintf(stderr, "Radeon begin tcl fallback %s\n",
553 		    getFallbackString( bit ));
554 	 transition_to_swtnl( ctx );
555       }
556    }
557    else {
558       rmesa->radeon.TclFallback &= ~bit;
559       if (oldfallback == bit) {
560 	 if (RADEON_DEBUG & RADEON_FALLBACKS)
561 	    fprintf(stderr, "Radeon end tcl fallback %s\n",
562 		    getFallbackString( bit ));
563 	 transition_to_hwtnl( ctx );
564       }
565    }
566 }
567