1 /*
2 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
3 
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15 
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 */
28 
29 /*
30  * Authors:
31  *   Keith Whitwell <keith@tungstengraphics.com>
32  */
33 
34 #include "main/glheader.h"
35 #include "main/imports.h"
36 #include "main/enums.h"
37 #include "main/colormac.h"
38 #include "main/api_arrayelt.h"
39 
40 #include "swrast/swrast.h"
41 #include "vbo/vbo.h"
42 #include "tnl/t_pipeline.h"
43 #include "swrast_setup/swrast_setup.h"
44 
45 #include "radeon_common.h"
46 #include "radeon_mipmap_tree.h"
47 #include "r200_context.h"
48 #include "r200_ioctl.h"
49 #include "r200_state.h"
50 #include "radeon_queryobj.h"
51 
52 #include "xmlpool.h"
53 
54 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
55  * 1.3 cmdbuffers allow all previous state to be updated as well as
56  * the tcl scalar and vector areas.
57  */
58 static struct {
59 	int start;
60 	int len;
61 	const char *name;
62 } packet[RADEON_MAX_STATE_PACKETS] = {
63 	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
64 	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
65 	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
66 	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
67 	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
68 	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
69 	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
70 	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
71 	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
72 	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
73 	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
74 	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
75 	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
76 	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
77 	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
78 	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
79 	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
80 	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
81 	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
82 	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
83 	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
84 		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
85 	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
86 	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
87 	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
88 	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
89 	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
90 	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
91 	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
92 	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
93 	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
94 	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
95 	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
96 	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
97 	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
98 	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
99 	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
100 	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
101 	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
102 	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
103 	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
104 	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
105 	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
106 	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
107 	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
108 	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
109 	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
110 	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
111 	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
112 	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
113 	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
114 	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
115 	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
116 	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
117 	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
118 	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
119 	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
120 	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
121 	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
122 	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
123 	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
124 	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
125 	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
126 		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
127 	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
128 	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
129 	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
130 	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
131 	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
132 	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
133 	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
134 	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
135 	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
136 	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
137 	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
138 	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
139 	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
140 	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
141 	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
142 	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
143 	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
144 	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
145 	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
146 	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
147 	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
148 	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
149 	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
150 	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
151 	{R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
152 	{R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
153 	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
154 	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
155 	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
156 	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
157 	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
158 	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
159 	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
160 	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
161 };
162 
163 /* =============================================================
164  * State initialization
165  */
cmdpkt(r200ContextPtr rmesa,int id)166 static int cmdpkt( r200ContextPtr rmesa, int id )
167 {
168    return CP_PACKET0(packet[id].start, packet[id].len - 1);
169 }
170 
cmdvec(int offset,int stride,int count)171 static int cmdvec( int offset, int stride, int count )
172 {
173    drm_radeon_cmd_header_t h;
174    h.i = 0;
175    h.vectors.cmd_type = RADEON_CMD_VECTORS;
176    h.vectors.offset = offset;
177    h.vectors.stride = stride;
178    h.vectors.count = count;
179    return h.i;
180 }
181 
182 /* warning: the count here is divided by 4 compared to other cmds
183    (so it doesn't exceed the char size)! */
cmdveclinear(int offset,int count)184 static int cmdveclinear( int offset, int count )
185 {
186    drm_radeon_cmd_header_t h;
187    h.i = 0;
188    h.veclinear.cmd_type = RADEON_CMD_VECLINEAR;
189    h.veclinear.addr_lo = offset & 0xff;
190    h.veclinear.addr_hi = (offset & 0xff00) >> 8;
191    h.veclinear.count = count;
192    return h.i;
193 }
194 
cmdscl(int offset,int stride,int count)195 static int cmdscl( int offset, int stride, int count )
196 {
197    drm_radeon_cmd_header_t h;
198    h.i = 0;
199    h.scalars.cmd_type = RADEON_CMD_SCALARS;
200    h.scalars.offset = offset;
201    h.scalars.stride = stride;
202    h.scalars.count = count;
203    return h.i;
204 }
205 
cmdscl2(int offset,int stride,int count)206 static int cmdscl2( int offset, int stride, int count )
207 {
208    drm_radeon_cmd_header_t h;
209    h.i = 0;
210    h.scalars.cmd_type = RADEON_CMD_SCALARS2;
211    h.scalars.offset = offset - 0x100;
212    h.scalars.stride = stride;
213    h.scalars.count = count;
214    return h.i;
215 }
216 
217 /**
218  * Check functions are used to check if state is active.
219  * If it is active check function returns maximum emit size.
220  */
221 #define CHECK( NM, FLAG, ADD )				\
222 static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom) \
223 {							\
224    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
225    (void) rmesa;					\
226    return (FLAG) ? atom->cmd_size + (ADD) : 0;			\
227 }
228 
229 #define TCL_CHECK( NM, FLAG, ADD )				\
230 static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom) \
231 {									\
232    r200ContextPtr rmesa = R200_CONTEXT(ctx);				\
233    return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
234 }
235 
236 #define TCL_OR_VP_CHECK( NM, FLAG, ADD )			\
237 static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom ) \
238 {							\
239    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
240    return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0;	\
241 }
242 
243 #define VP_CHECK( NM, FLAG, ADD )				\
244 static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom ) \
245 {									\
246    r200ContextPtr rmesa = R200_CONTEXT(ctx);				\
247    (void) atom;								\
248    return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
249 }
250 
251 CHECK( always, GL_TRUE, 0 )
252 CHECK( always_add4, GL_TRUE, 4 )
253 CHECK( never, GL_FALSE, 0 )
254 CHECK( tex_any, ctx->Texture._EnabledUnits, 0 )
255 CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled), 0 );
256 CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled, 0 )
257    CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 )
258 CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)), 0 )
259 CHECK( afs, ctx->ATIFragmentShader._Enabled, 0 )
260 CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 3 + 3*5 - CUBE_STATE_SIZE )
261 CHECK( tex_cube_cs, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 2 + 4*5 - CUBE_STATE_SIZE )
262 TCL_CHECK( tcl_fog_add4, ctx->Fog.Enabled, 4 )
263 TCL_CHECK( tcl, GL_TRUE, 0 )
264 TCL_CHECK( tcl_add8, GL_TRUE, 8 )
265 TCL_CHECK( tcl_add4, GL_TRUE, 4 )
266 TCL_CHECK( tcl_tex_add4, rmesa->state.texture.unit[atom->idx].unitneeded, 4 )
267 TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 )
268 TCL_CHECK( tcl_lighting_add6, ctx->Light.Enabled, 6 )
269 TCL_CHECK( tcl_light_add6, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 6 )
270 TCL_OR_VP_CHECK( tcl_ucp_add4, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 4 )
271 TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE, 0 )
272 TCL_OR_VP_CHECK( tcl_or_vp_add2, GL_TRUE, 2 )
273 VP_CHECK( tcl_vp, GL_TRUE, 0 )
274 VP_CHECK( tcl_vp_add4, GL_TRUE, 4 )
275 VP_CHECK( tcl_vp_size_add4, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64, 4 )
276 VP_CHECK( tcl_vpp_size_add4, ctx->VertexProgram.Current->Base.NumNativeParameters > 96, 4 )
277 
278 #define OUT_VEC(hdr, data) do {			\
279     drm_radeon_cmd_header_t h;					\
280     h.i = hdr;								\
281     OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
282     OUT_BATCH(0);							\
283     OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
284     OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
285     OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));	\
286     OUT_BATCH_TABLE((data), h.vectors.count);				\
287   } while(0)
288 
289 #define OUT_VECLINEAR(hdr, data) do {					\
290     drm_radeon_cmd_header_t h;						\
291     uint32_t _start, _sz;						\
292     h.i = hdr;								\
293     _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8);		\
294     _sz = h.veclinear.count * 4;					\
295     if (_sz) {								\
296     BEGIN_BATCH_NO_AUTOSTATE(dwords); \
297     OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
298     OUT_BATCH(0);							\
299     OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
300     OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));	\
301     OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1));	\
302     OUT_BATCH_TABLE((data), _sz);					\
303     END_BATCH(); \
304     } \
305   } while(0)
306 
307 #define OUT_SCL(hdr, data) do {					\
308     drm_radeon_cmd_header_t h;						\
309     h.i = hdr;								\
310     OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
311     OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
312     OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
313     OUT_BATCH_TABLE((data), h.scalars.count);				\
314   } while(0)
315 
316 #define OUT_SCL2(hdr, data) do {					\
317     drm_radeon_cmd_header_t h;						\
318     h.i = hdr;								\
319     OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
320     OUT_BATCH((h.scalars.offset + 0x100) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
321     OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
322     OUT_BATCH_TABLE((data), h.scalars.count);				\
323   } while(0)
check_rrb(struct gl_context * ctx,struct radeon_state_atom * atom)324 static int check_rrb(struct gl_context *ctx, struct radeon_state_atom *atom)
325 {
326    r200ContextPtr r200 = R200_CONTEXT(ctx);
327    struct radeon_renderbuffer *rrb;
328    rrb = radeon_get_colorbuffer(&r200->radeon);
329    if (!rrb || !rrb->bo)
330       return 0;
331    return atom->cmd_size;
332 }
333 
check_polygon_stipple(struct gl_context * ctx,struct radeon_state_atom * atom)334 static int check_polygon_stipple(struct gl_context *ctx,
335 		struct radeon_state_atom *atom)
336 {
337    r200ContextPtr r200 = R200_CONTEXT(ctx);
338    if (r200->hw.set.cmd[SET_RE_CNTL] & R200_STIPPLE_ENABLE)
339 	   return atom->cmd_size;
340    return 0;
341 }
342 
mtl_emit(struct gl_context * ctx,struct radeon_state_atom * atom)343 static void mtl_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
344 {
345    r200ContextPtr r200 = R200_CONTEXT(ctx);
346    BATCH_LOCALS(&r200->radeon);
347    uint32_t dwords = atom->check(ctx, atom);
348 
349    BEGIN_BATCH_NO_AUTOSTATE(dwords);
350    OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1));
351    OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18));
352    END_BATCH();
353 }
354 
lit_emit(struct gl_context * ctx,struct radeon_state_atom * atom)355 static void lit_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
356 {
357    r200ContextPtr r200 = R200_CONTEXT(ctx);
358    BATCH_LOCALS(&r200->radeon);
359    uint32_t dwords = atom->check(ctx, atom);
360 
361    BEGIN_BATCH_NO_AUTOSTATE(dwords);
362    OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
363    OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
364    END_BATCH();
365 }
366 
ptp_emit(struct gl_context * ctx,struct radeon_state_atom * atom)367 static void ptp_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
368 {
369    r200ContextPtr r200 = R200_CONTEXT(ctx);
370    BATCH_LOCALS(&r200->radeon);
371    uint32_t dwords = atom->check(ctx, atom);
372 
373    BEGIN_BATCH_NO_AUTOSTATE(dwords);
374    OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1);
375    OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1);
376    END_BATCH();
377 }
378 
veclinear_emit(struct gl_context * ctx,struct radeon_state_atom * atom)379 static void veclinear_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
380 {
381    r200ContextPtr r200 = R200_CONTEXT(ctx);
382    BATCH_LOCALS(&r200->radeon);
383    uint32_t dwords = atom->check(ctx, atom);
384 
385    OUT_VECLINEAR(atom->cmd[0], atom->cmd+1);
386 }
387 
scl_emit(struct gl_context * ctx,struct radeon_state_atom * atom)388 static void scl_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
389 {
390    r200ContextPtr r200 = R200_CONTEXT(ctx);
391    BATCH_LOCALS(&r200->radeon);
392    uint32_t dwords = atom->check(ctx, atom);
393 
394    BEGIN_BATCH_NO_AUTOSTATE(dwords);
395    OUT_SCL(atom->cmd[0], atom->cmd+1);
396    END_BATCH();
397 }
398 
399 
vec_emit(struct gl_context * ctx,struct radeon_state_atom * atom)400 static void vec_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
401 {
402    r200ContextPtr r200 = R200_CONTEXT(ctx);
403    BATCH_LOCALS(&r200->radeon);
404    uint32_t dwords = atom->check(ctx, atom);
405 
406    BEGIN_BATCH_NO_AUTOSTATE(dwords);
407    OUT_VEC(atom->cmd[0], atom->cmd+1);
408    END_BATCH();
409 }
410 
check_always_ctx(struct gl_context * ctx,struct radeon_state_atom * atom)411 static int check_always_ctx( struct gl_context *ctx, struct radeon_state_atom *atom)
412 {
413    r200ContextPtr r200 = R200_CONTEXT(ctx);
414    struct radeon_renderbuffer *rrb, *drb;
415    uint32_t dwords;
416 
417    rrb = radeon_get_colorbuffer(&r200->radeon);
418    if (!rrb || !rrb->bo) {
419       return 0;
420    }
421 
422    drb = radeon_get_depthbuffer(&r200->radeon);
423 
424    dwords = 10;
425    if (drb)
426      dwords += 6;
427    if (rrb)
428      dwords += 8;
429    if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
430      dwords += 4;
431 
432 
433    return dwords;
434 }
435 
ctx_emit_cs(struct gl_context * ctx,struct radeon_state_atom * atom)436 static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
437 {
438    r200ContextPtr r200 = R200_CONTEXT(ctx);
439    BATCH_LOCALS(&r200->radeon);
440    struct radeon_renderbuffer *rrb, *drb;
441    uint32_t cbpitch = 0;
442    uint32_t zbpitch = 0;
443    uint32_t dwords = atom->check(ctx, atom);
444    uint32_t depth_fmt;
445 
446    rrb = radeon_get_colorbuffer(&r200->radeon);
447    if (!rrb || !rrb->bo) {
448       return;
449    }
450 
451    atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
452    if (rrb->cpp == 4)
453 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
454    else switch (rrb->base.Base.Format) {
455    case MESA_FORMAT_RGB565:
456 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
457 	break;
458    case MESA_FORMAT_ARGB4444:
459 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
460 	break;
461    case MESA_FORMAT_ARGB1555:
462 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
463 	break;
464    default:
465 	_mesa_problem(ctx, "Unexpected format in ctx_emit_cs");
466    }
467 
468    cbpitch = (rrb->pitch / rrb->cpp);
469    if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
470        cbpitch |= R200_COLOR_TILE_ENABLE;
471    if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE)
472        cbpitch |= R200_COLOR_MICROTILE_ENABLE;
473 
474 
475    drb = radeon_get_depthbuffer(&r200->radeon);
476    if (drb) {
477      zbpitch = (drb->pitch / drb->cpp);
478      if (drb->cpp == 4)
479         depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
480      else
481         depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
482      atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
483      atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
484    }
485 
486    /* output the first 7 bytes of context */
487    BEGIN_BATCH_NO_AUTOSTATE(dwords);
488 
489    /* In the CS case we need to split this up */
490    OUT_BATCH(CP_PACKET0(packet[0].start, 3));
491    OUT_BATCH_TABLE((atom->cmd + 1), 4);
492 
493    if (drb) {
494      OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
495      OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
496 
497      OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
498      OUT_BATCH(zbpitch);
499    }
500 
501    OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
502    OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
503    OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
504    OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
505    OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
506 
507 
508    if (rrb) {
509      OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
510      OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
511 
512      OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
513      OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
514    }
515 
516    if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
517      OUT_BATCH_TABLE((atom->cmd + 14), 4);
518    }
519 
520    END_BATCH();
521 }
522 
get_tex_mm_size(struct gl_context * ctx,struct radeon_state_atom * atom)523 static int get_tex_mm_size(struct gl_context* ctx, struct radeon_state_atom *atom)
524 {
525    r200ContextPtr r200 = R200_CONTEXT(ctx);
526    uint32_t dwords = atom->cmd_size + 2;
527    int hastexture = 1;
528    int i = atom->idx;
529    radeonTexObj *t = r200->state.texture.unit[i].texobj;
530    if (!t)
531 	hastexture = 0;
532    else {
533 	if (!t->mt && !t->bo)
534 		hastexture = 0;
535    }
536 
537    if (!hastexture)
538      dwords -= 4;
539    return dwords;
540 }
541 
check_tex_pair_mm(struct gl_context * ctx,struct radeon_state_atom * atom)542 static int check_tex_pair_mm(struct gl_context* ctx, struct radeon_state_atom *atom)
543 {
544    r200ContextPtr r200 = R200_CONTEXT(ctx);
545    /** XOR is bit flip operation so use it for finding pair */
546    if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx ^ 1].unitneeded))
547      return 0;
548 
549    return get_tex_mm_size(ctx, atom);
550 }
551 
check_tex_mm(struct gl_context * ctx,struct radeon_state_atom * atom)552 static int check_tex_mm(struct gl_context* ctx, struct radeon_state_atom *atom)
553 {
554    r200ContextPtr r200 = R200_CONTEXT(ctx);
555    if (!(r200->state.texture.unit[atom->idx].unitneeded))
556      return 0;
557 
558    return get_tex_mm_size(ctx, atom);
559 }
560 
561 
tex_emit_mm(struct gl_context * ctx,struct radeon_state_atom * atom)562 static void tex_emit_mm(struct gl_context *ctx, struct radeon_state_atom *atom)
563 {
564    r200ContextPtr r200 = R200_CONTEXT(ctx);
565    BATCH_LOCALS(&r200->radeon);
566    uint32_t dwords = atom->check(ctx, atom);
567    int i = atom->idx;
568    radeonTexObj *t = r200->state.texture.unit[i].texobj;
569 
570    if (!r200->state.texture.unit[i].unitneeded && !(dwords <= atom->cmd_size))
571         dwords -= 4;
572    BEGIN_BATCH_NO_AUTOSTATE(dwords);
573 
574    OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (32 * i), 7));
575    OUT_BATCH_TABLE((atom->cmd + 1), 8);
576 
577    if (dwords > atom->cmd_size) {
578      OUT_BATCH(CP_PACKET0(R200_PP_TXOFFSET_0 + (24 * i), 0));
579      if (t->mt && !t->image_override) {
580         OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
581 		  RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
582       } else {
583 	if (t->bo)
584             OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
585                             RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
586       }
587    }
588    END_BATCH();
589 }
590 
cube_emit_cs(struct gl_context * ctx,struct radeon_state_atom * atom)591 static void cube_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
592 {
593    r200ContextPtr r200 = R200_CONTEXT(ctx);
594    BATCH_LOCALS(&r200->radeon);
595    uint32_t dwords = atom->check(ctx, atom);
596    int i = atom->idx, j;
597    radeonTexObj *t = r200->state.texture.unit[i].texobj;
598    radeon_mipmap_level *lvl;
599    if (!(t && !t->image_override))
600      dwords = 2;
601 
602    BEGIN_BATCH_NO_AUTOSTATE(dwords);
603    OUT_BATCH_TABLE(atom->cmd, 2);
604 
605    if (t && !t->image_override) {
606      lvl = &t->mt->levels[0];
607      for (j = 1; j <= 5; j++) {
608        OUT_BATCH(CP_PACKET0(R200_PP_CUBIC_OFFSET_F1_0 + (24*i) + (4 * (j-1)), 0));
609        OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
610 			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
611      }
612    }
613    END_BATCH();
614 }
615 
616 /* Initialize the context's hardware state.
617  */
r200InitState(r200ContextPtr rmesa)618 void r200InitState( r200ContextPtr rmesa )
619 {
620    struct gl_context *ctx = rmesa->radeon.glCtx;
621    GLuint i;
622 
623    rmesa->radeon.Fallback = 0;
624 
625    rmesa->radeon.hw.max_state_size = 0;
626 
627 #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX )				\
628    do {								\
629       rmesa->hw.ATOM.cmd_size = SZ;				\
630       rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
631       rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
632       rmesa->hw.ATOM.name = NM;					\
633       rmesa->hw.ATOM.idx = IDX;					\
634       if (check_##CHK != check_never) {				\
635          rmesa->hw.ATOM.check = check_##CHK;			\
636          rmesa->radeon.hw.max_state_size += SZ * sizeof(int);	\
637       } else {							\
638          rmesa->hw.ATOM.check = NULL;				\
639       }								\
640       rmesa->hw.ATOM.dirty = GL_FALSE;				\
641    } while (0)
642 
643 
644    /* Allocate state buffers:
645     */
646    ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 );
647 
648    rmesa->hw.ctx.emit = ctx_emit_cs;
649    rmesa->hw.ctx.check = check_always_ctx;
650    ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
651    ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
652    ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
653    ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
654    ALLOC_STATE( vtx, always, VTX_STATE_SIZE, "VTX/vertex", 0 );
655    ALLOC_STATE( vap, always, VAP_STATE_SIZE, "VAP/vap", 0 );
656    ALLOC_STATE( vte, always, VTE_STATE_SIZE, "VTE/vte", 0 );
657    ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
658    ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
659    ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
660    ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
661    {
662       int state_size = TEX_STATE_SIZE_NEWDRM;
663       if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
664          /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
665          ALLOC_STATE( tex[0], tex_pair_mm, state_size, "TEX/tex-0", 0 );
666          ALLOC_STATE( tex[1], tex_pair_mm, state_size, "TEX/tex-1", 1 );
667          ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
668       }
669       else {
670          ALLOC_STATE( tex[0], tex_mm, state_size, "TEX/tex-0", 0 );
671          ALLOC_STATE( tex[1], tex_mm, state_size, "TEX/tex-1", 1 );
672          ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
673       }
674       ALLOC_STATE( tex[2], tex_mm, state_size, "TEX/tex-2", 2 );
675       ALLOC_STATE( tex[3], tex_mm, state_size, "TEX/tex-3", 3 );
676       ALLOC_STATE( tex[4], tex_mm, state_size, "TEX/tex-4", 4 );
677       ALLOC_STATE( tex[5], tex_mm, state_size, "TEX/tex-5", 5 );
678       ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 );
679       ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
680       ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
681    }
682 
683    ALLOC_STATE( stp, polygon_stipple, STP_STATE_SIZE, "STP/stp", 0 );
684 
685    for (i = 0; i < 6; i++)
686       rmesa->hw.tex[i].emit = tex_emit_mm;
687    ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
688    ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
689    ALLOC_STATE( cube[2], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-2", 2 );
690    ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 );
691    ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
692    ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
693    for (i = 0; i < 6; i++) {
694       rmesa->hw.cube[i].emit = cube_emit_cs;
695       rmesa->hw.cube[i].check = check_tex_cube_cs;
696    }
697 
698    ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
699    ALLOC_STATE( vpi[0], tcl_vp_add4, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
700    ALLOC_STATE( vpi[1], tcl_vp_size_add4, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
701    ALLOC_STATE( vpp[0], tcl_vp_add4, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 );
702    ALLOC_STATE( vpp[1], tcl_vpp_size_add4, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 );
703 
704    /* FIXME: this atom has two commands, we need only one (ucp_vert_blend) for vp */
705    ALLOC_STATE( tcl, tcl_or_vp, TCL_STATE_SIZE, "TCL/tcl", 0 );
706    ALLOC_STATE( msl, tcl, MSL_STATE_SIZE, "MSL/matrix-select", 0 );
707    ALLOC_STATE( tcg, tcl, TCG_STATE_SIZE, "TCG/texcoordgen", 0 );
708    ALLOC_STATE( mtl[0], tcl_lighting_add6, MTL_STATE_SIZE, "MTL0/material0", 0 );
709    ALLOC_STATE( mtl[1], tcl_lighting_add6, MTL_STATE_SIZE, "MTL1/material1", 1 );
710    ALLOC_STATE( grd, tcl_or_vp_add2, GRD_STATE_SIZE, "GRD/guard-band", 0 );
711    ALLOC_STATE( fog, tcl_fog_add4, FOG_STATE_SIZE, "FOG/fog", 0 );
712    ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 0 );
713    ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 0 );
714    ALLOC_STATE( mat[R200_MTX_MV], tcl_add4, MAT_STATE_SIZE, "MAT/modelview", 0 );
715    ALLOC_STATE( mat[R200_MTX_IMV], tcl_add4, MAT_STATE_SIZE, "MAT/it-modelview", 0 );
716    ALLOC_STATE( mat[R200_MTX_MVP], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 0 );
717    ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat0", 0 );
718    ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 );
719    ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat2", 2 );
720    ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat3", 3 );
721    ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat4", 4 );
722    ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat5", 5 );
723    ALLOC_STATE( ucp[0], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-0", 0 );
724    ALLOC_STATE( ucp[1], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
725    ALLOC_STATE( ucp[2], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-2", 2 );
726    ALLOC_STATE( ucp[3], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-3", 3 );
727    ALLOC_STATE( ucp[4], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-4", 4 );
728    ALLOC_STATE( ucp[5], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-5", 5 );
729    ALLOC_STATE( lit[0], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-0", 0 );
730    ALLOC_STATE( lit[1], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-1", 1 );
731    ALLOC_STATE( lit[2], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-2", 2 );
732    ALLOC_STATE( lit[3], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-3", 3 );
733    ALLOC_STATE( lit[4], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-4", 4 );
734    ALLOC_STATE( lit[5], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-5", 5 );
735    ALLOC_STATE( lit[6], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-6", 6 );
736    ALLOC_STATE( lit[7], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-7", 7 );
737    ALLOC_STATE( sci, rrb, SCI_STATE_SIZE, "SCI/scissor", 0 );
738    ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
739    ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 );
740    ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 );
741    ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 );
742    ALLOC_STATE( pix[4], texenv, PIX_STATE_SIZE, "PIX/pixstage-4", 4 );
743    ALLOC_STATE( pix[5], texenv, PIX_STATE_SIZE, "PIX/pixstage-5", 5 );
744    ALLOC_STATE( prf, always, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
745    ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
746    ALLOC_STATE( ptp, tcl_add8, PTP_STATE_SIZE, "PTP/pointparams", 0 );
747 
748    r200SetUpAtomList( rmesa );
749 
750    /* Fill in the packet headers:
751     */
752    rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
753    rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
754    rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
755    rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(rmesa, R200_EMIT_RB3D_BLENDCOLOR);
756    rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
757    rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
758    rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
759    rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
760    rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
761    rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
762    rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CNTL_X);
763    rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(rmesa, R200_EMIT_RB3D_DEPTHXY_OFFSET);
764    rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(rmesa, R200_EMIT_RE_AUX_SCISSOR_CNTL);
765    rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(rmesa, R200_EMIT_SE_VAP_CNTL_STATUS);
766    rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(rmesa, R200_EMIT_RE_POINTSIZE);
767    rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(rmesa, R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
768    rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TAM_DEBUG3);
769    rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(rmesa, R200_EMIT_TFACTOR_0);
770    rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(rmesa, R200_EMIT_ATF_TFACTOR);
771    rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_0);
772    rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
773    rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_1);
774    rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
775    rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_2);
776    rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
777    rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_3);
778    rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
779    rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_4);
780    rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
781    rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_5);
782    rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
783    rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_0);
784    rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_1);
785    rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_PVS_CNTL);
786    rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_0);
787    rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_0);
788    rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_1);
789    rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_1);
790    rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_2);
791    rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_2);
792    rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_3);
793    rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_3);
794    rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_4);
795    rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_4);
796    rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_5);
797    rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_5);
798    rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_0);
799    rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_1);
800    rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_2);
801    rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_3);
802    rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_4);
803    rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_5);
804    rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
805    rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
806    rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(rmesa, R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
807    rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(rmesa, R200_EMIT_TEX_PROC_CTL_2);
808    rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(rmesa, R200_EMIT_MATRIX_SELECT_0);
809    rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_CTL);
810    rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTX_FMT_0);
811    rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(rmesa, R200_EMIT_OUTPUT_VTX_COMP_SEL);
812    rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(rmesa, R200_EMIT_SE_VTX_STATE_CNTL);
813    rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTE_CNTL);
814    rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL);
815    rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL);
816 
817    rmesa->hw.sci.cmd[SCI_CMD_1] = CP_PACKET0(R200_RE_TOP_LEFT, 0);
818    rmesa->hw.sci.cmd[SCI_CMD_2] = CP_PACKET0(R200_RE_WIDTH_HEIGHT, 0);
819 
820    rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
821    rmesa->hw.stp.cmd[STP_DATA_0] = 0;
822    rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
823 
824    rmesa->hw.mtl[0].emit = mtl_emit;
825    rmesa->hw.mtl[1].emit = mtl_emit;
826 
827    rmesa->hw.vpi[0].emit = veclinear_emit;
828    rmesa->hw.vpi[1].emit = veclinear_emit;
829    rmesa->hw.vpp[0].emit = veclinear_emit;
830    rmesa->hw.vpp[1].emit = veclinear_emit;
831 
832    rmesa->hw.grd.emit = scl_emit;
833    rmesa->hw.fog.emit = vec_emit;
834    rmesa->hw.glt.emit = vec_emit;
835    rmesa->hw.eye.emit = vec_emit;
836 
837    for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++)
838       rmesa->hw.mat[i].emit = vec_emit;
839 
840    for (i = 0; i < 8; i++)
841       rmesa->hw.lit[i].emit = lit_emit;
842 
843    for (i = 0; i < 6; i++)
844       rmesa->hw.ucp[i].emit = vec_emit;
845 
846    rmesa->hw.ptp.emit = ptp_emit;
847 
848    rmesa->hw.mtl[0].cmd[MTL_CMD_0] =
849       cmdvec( R200_VS_MAT_0_EMISS, 1, 16 );
850    rmesa->hw.mtl[0].cmd[MTL_CMD_1] =
851       cmdscl2( R200_SS_MAT_0_SHININESS, 1, 1 );
852    rmesa->hw.mtl[1].cmd[MTL_CMD_0] =
853       cmdvec( R200_VS_MAT_1_EMISS, 1, 16 );
854    rmesa->hw.mtl[1].cmd[MTL_CMD_1] =
855       cmdscl2( R200_SS_MAT_1_SHININESS, 1, 1 );
856 
857    rmesa->hw.vpi[0].cmd[VPI_CMD_0] =
858       cmdveclinear( R200_PVS_PROG0, 64 );
859    rmesa->hw.vpi[1].cmd[VPI_CMD_0] =
860       cmdveclinear( R200_PVS_PROG1, 64 );
861    rmesa->hw.vpp[0].cmd[VPP_CMD_0] =
862       cmdveclinear( R200_PVS_PARAM0, 96 );
863    rmesa->hw.vpp[1].cmd[VPP_CMD_0] =
864       cmdveclinear( R200_PVS_PARAM1, 96 );
865 
866    rmesa->hw.grd.cmd[GRD_CMD_0] =
867       cmdscl( R200_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
868    rmesa->hw.fog.cmd[FOG_CMD_0] =
869       cmdvec( R200_VS_FOG_PARAM_ADDR, 1, 4 );
870    rmesa->hw.glt.cmd[GLT_CMD_0] =
871       cmdvec( R200_VS_GLOBAL_AMBIENT_ADDR, 1, 4 );
872    rmesa->hw.eye.cmd[EYE_CMD_0] =
873       cmdvec( R200_VS_EYE_VECTOR_ADDR, 1, 4 );
874 
875    rmesa->hw.mat[R200_MTX_MV].cmd[MAT_CMD_0] =
876       cmdvec( R200_VS_MATRIX_0_MV, 1, 16);
877    rmesa->hw.mat[R200_MTX_IMV].cmd[MAT_CMD_0] =
878       cmdvec( R200_VS_MATRIX_1_INV_MV, 1, 16);
879    rmesa->hw.mat[R200_MTX_MVP].cmd[MAT_CMD_0] =
880       cmdvec( R200_VS_MATRIX_2_MVP, 1, 16);
881    rmesa->hw.mat[R200_MTX_TEX0].cmd[MAT_CMD_0] =
882       cmdvec( R200_VS_MATRIX_3_TEX0, 1, 16);
883    rmesa->hw.mat[R200_MTX_TEX1].cmd[MAT_CMD_0] =
884       cmdvec( R200_VS_MATRIX_4_TEX1, 1, 16);
885    rmesa->hw.mat[R200_MTX_TEX2].cmd[MAT_CMD_0] =
886       cmdvec( R200_VS_MATRIX_5_TEX2, 1, 16);
887    rmesa->hw.mat[R200_MTX_TEX3].cmd[MAT_CMD_0] =
888       cmdvec( R200_VS_MATRIX_6_TEX3, 1, 16);
889    rmesa->hw.mat[R200_MTX_TEX4].cmd[MAT_CMD_0] =
890       cmdvec( R200_VS_MATRIX_7_TEX4, 1, 16);
891    rmesa->hw.mat[R200_MTX_TEX5].cmd[MAT_CMD_0] =
892       cmdvec( R200_VS_MATRIX_8_TEX5, 1, 16);
893 
894    for (i = 0 ; i < 8; i++) {
895       rmesa->hw.lit[i].cmd[LIT_CMD_0] =
896 	 cmdvec( R200_VS_LIGHT_AMBIENT_ADDR + i, 8, 24 );
897       rmesa->hw.lit[i].cmd[LIT_CMD_1] =
898 	 cmdscl( R200_SS_LIGHT_DCD_ADDR + i, 8, 7 );
899    }
900 
901    for (i = 0 ; i < 6; i++) {
902       rmesa->hw.ucp[i].cmd[UCP_CMD_0] =
903 	 cmdvec( R200_VS_UCP_ADDR + i, 1, 4 );
904    }
905 
906    rmesa->hw.ptp.cmd[PTP_CMD_0] =
907       cmdvec( R200_VS_PNT_SPRITE_VPORT_SCALE, 1, 4 );
908    rmesa->hw.ptp.cmd[PTP_CMD_1] =
909       cmdvec( R200_VS_PNT_SPRITE_ATT_CONST, 1, 12 );
910 
911    /* Initial Harware state:
912     */
913    rmesa->hw.ctx.cmd[CTX_PP_MISC] = (R200_ALPHA_TEST_PASS
914 				     /* | R200_RIGHT_HAND_CUBE_OGL*/);
915 
916    rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (R200_FOG_VERTEX |
917 					  R200_FOG_USE_SPEC_ALPHA);
918 
919    rmesa->hw.ctx.cmd[CTX_RE_SOLID_COLOR] = 0x00000000;
920 
921    rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
922 				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
923 				(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
924 
925    rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = 0x00000000;
926    rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
927 				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
928 				(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
929    rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
930 				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
931 				(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
932 
933    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
934       rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation;
935 
936    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] =
937       ((rmesa->radeon.radeonScreen->depthPitch &
938 	R200_DEPTHPITCH_MASK) |
939        R200_DEPTH_ENDIAN_NO_SWAP);
940 
941    if (rmesa->using_hyperz)
942       rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= R200_DEPTH_HYPERZ;
943 
944    rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (R200_Z_TEST_LESS |
945 					       R200_STENCIL_TEST_ALWAYS |
946 					       R200_STENCIL_FAIL_KEEP |
947 					       R200_STENCIL_ZPASS_KEEP |
948 					       R200_STENCIL_ZFAIL_KEEP |
949 					       R200_Z_WRITE_ENABLE);
950 
951    if (rmesa->using_hyperz) {
952       rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE |
953 						  R200_Z_DECOMPRESSION_ENABLE;
954 /*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
955 	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
956    }
957 
958    rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE
959  				     | R200_TEX_BLEND_0_ENABLE);
960 
961    switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
962    case DRI_CONF_DITHER_XERRORDIFFRESET:
963       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT;
964       break;
965    case DRI_CONF_DITHER_ORDERED:
966       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_SCALE_DITHER_ENABLE;
967       break;
968    }
969    if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
970 	DRI_CONF_ROUND_ROUND )
971       rmesa->radeon.state.color.roundEnable = R200_ROUND_ENABLE;
972    else
973       rmesa->radeon.state.color.roundEnable = 0;
974    if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
975 	DRI_CONF_COLOR_REDUCTION_DITHER )
976       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE;
977    else
978       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
979 
980    rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK *
981 			driQueryOptionf (&rmesa->radeon.optionCache,"texture_blend_quality");
982    rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0;
983 
984    rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW |
985 				     R200_BFACE_SOLID |
986 				     R200_FFACE_SOLID |
987 				     R200_FLAT_SHADE_VTX_LAST |
988 				     R200_DIFFUSE_SHADE_GOURAUD |
989 				     R200_ALPHA_SHADE_GOURAUD |
990 				     R200_SPECULAR_SHADE_GOURAUD |
991 				     R200_FOG_SHADE_GOURAUD |
992 				     R200_DISC_FOG_SHADE_GOURAUD |
993 				     R200_VTX_PIX_CENTER_OGL |
994 				     R200_ROUND_MODE_TRUNC |
995 				     R200_ROUND_PREC_8TH_PIX);
996 
997    rmesa->hw.set.cmd[SET_RE_CNTL] = (R200_PERSPECTIVE_ENABLE |
998 				     R200_SCISSOR_ENABLE);
999 
1000    rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((1 << 16) | 0xffff);
1001 
1002    rmesa->hw.lin.cmd[LIN_RE_LINE_STATE] =
1003       ((0 << R200_LINE_CURRENT_PTR_SHIFT) |
1004        (1 << R200_LINE_CURRENT_COUNT_SHIFT));
1005 
1006    rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (1 << 4);
1007 
1008    rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] =
1009       ((0x00 << R200_STENCIL_REF_SHIFT) |
1010        (0xff << R200_STENCIL_MASK_SHIFT) |
1011        (0xff << R200_STENCIL_WRITEMASK_SHIFT));
1012 
1013    rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = R200_ROP_COPY;
1014    rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = 0xffffffff;
1015 
1016    rmesa->hw.tam.cmd[TAM_DEBUG3] = 0;
1017 
1018    rmesa->hw.msc.cmd[MSC_RE_MISC] =
1019       ((0 << R200_STIPPLE_X_OFFSET_SHIFT) |
1020        (0 << R200_STIPPLE_Y_OFFSET_SHIFT) |
1021        R200_STIPPLE_BIG_BIT_ORDER);
1022 
1023 
1024    rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
1025    rmesa->hw.cst.cmd[CST_RB3D_DEPTHXY_OFFSET] = 0;
1026    rmesa->hw.cst.cmd[CST_RE_AUX_SCISSOR_CNTL] = 0x0;
1027    rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] =
1028 #ifdef MESA_BIG_ENDIAN
1029 						R200_VC_32BIT_SWAP;
1030 #else
1031 						R200_VC_NO_SWAP;
1032 #endif
1033 
1034    if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
1035       /* Bypass TCL */
1036       rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] |= (1<<8);
1037    }
1038 
1039    rmesa->hw.cst.cmd[CST_RE_POINTSIZE] =
1040       (((GLuint)(ctx->Const.MaxPointSize * 16.0)) << R200_MAXPOINTSIZE_SHIFT) | 0x10;
1041    rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_0] =
1042       (0x0 << R200_VERTEX_POSITION_ADDR__SHIFT);
1043    rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_1] =
1044       (0x02 << R200_VTX_COLOR_0_ADDR__SHIFT) |
1045       (0x03 << R200_VTX_COLOR_1_ADDR__SHIFT);
1046    rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_2] =
1047       (0x06 << R200_VTX_TEX_0_ADDR__SHIFT) |
1048       (0x07 << R200_VTX_TEX_1_ADDR__SHIFT) |
1049       (0x08 << R200_VTX_TEX_2_ADDR__SHIFT) |
1050       (0x09 << R200_VTX_TEX_3_ADDR__SHIFT);
1051    rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_3] =
1052       (0x0A << R200_VTX_TEX_4_ADDR__SHIFT) |
1053       (0x0B << R200_VTX_TEX_5_ADDR__SHIFT);
1054 
1055 
1056    rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = 0x00000000;
1057    rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = 0x00000000;
1058    rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = 0x00000000;
1059    rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = 0x00000000;
1060    rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = 0x00000000;
1061    rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = 0x00000000;
1062 
1063    for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) {
1064       rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] = R200_BORDER_MODE_OGL;
1065       rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT] =
1066          ((i << R200_TXFORMAT_ST_ROUTE_SHIFT) |  /* <-- note i */
1067           (2 << R200_TXFORMAT_WIDTH_SHIFT) |
1068           (2 << R200_TXFORMAT_HEIGHT_SHIFT));
1069       rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
1070       rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
1071          (/* R200_TEXCOORD_PROJ | */
1072           R200_LOD_BIAS_CORRECTION);	/* Small default bias */
1073       rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] =
1074 	     rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
1075       rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0;
1076       rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0;
1077 
1078       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
1079       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] =
1080          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
1081       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] =
1082          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
1083       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] =
1084          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
1085       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] =
1086          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
1087       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] =
1088          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
1089 
1090       rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND] =
1091          (R200_TXC_ARG_A_ZERO |
1092           R200_TXC_ARG_B_ZERO |
1093           R200_TXC_ARG_C_DIFFUSE_COLOR |
1094           R200_TXC_OP_MADD);
1095 
1096       rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND2] =
1097          ((i << R200_TXC_TFACTOR_SEL_SHIFT) |
1098           R200_TXC_SCALE_1X |
1099           R200_TXC_CLAMP_0_1 |
1100           R200_TXC_OUTPUT_REG_R0);
1101 
1102       rmesa->hw.pix[i].cmd[PIX_PP_TXABLEND] =
1103          (R200_TXA_ARG_A_ZERO |
1104           R200_TXA_ARG_B_ZERO |
1105           R200_TXA_ARG_C_DIFFUSE_ALPHA |
1106           R200_TXA_OP_MADD);
1107 
1108       rmesa->hw.pix[i].cmd[PIX_PP_TXABLEND2] =
1109          ((i << R200_TXA_TFACTOR_SEL_SHIFT) |
1110           R200_TXA_SCALE_1X |
1111           R200_TXA_CLAMP_0_1 |
1112           R200_TXA_OUTPUT_REG_R0);
1113    }
1114 
1115    rmesa->hw.tf.cmd[TF_TFACTOR_0] = 0;
1116    rmesa->hw.tf.cmd[TF_TFACTOR_1] = 0;
1117    rmesa->hw.tf.cmd[TF_TFACTOR_2] = 0;
1118    rmesa->hw.tf.cmd[TF_TFACTOR_3] = 0;
1119    rmesa->hw.tf.cmd[TF_TFACTOR_4] = 0;
1120    rmesa->hw.tf.cmd[TF_TFACTOR_5] = 0;
1121 
1122    rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] =
1123       (R200_VAP_TCL_ENABLE |
1124        (0x9 << R200_VAP_VF_MAX_VTX_NUM__SHIFT));
1125 
1126    rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] =
1127       (R200_VPORT_X_SCALE_ENA |
1128        R200_VPORT_Y_SCALE_ENA |
1129        R200_VPORT_Z_SCALE_ENA |
1130        R200_VPORT_X_OFFSET_ENA |
1131        R200_VPORT_Y_OFFSET_ENA |
1132        R200_VPORT_Z_OFFSET_ENA |
1133 /* FIXME: Turn on for tex rect only */
1134        R200_VTX_ST_DENORMALIZED |
1135        R200_VTX_W0_FMT);
1136 
1137 
1138    rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = 0;
1139    rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = 0;
1140    rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] =
1141       ((R200_VTX_Z0 | R200_VTX_W0 |
1142        (R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)));
1143    rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] = 0;
1144    rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = (R200_OUTPUT_XYZW);
1145    rmesa->hw.vtx.cmd[VTX_STATE_CNTL] = R200_VSC_UPDATE_USER_COLOR_0_ENABLE;
1146 
1147 
1148    /* Matrix selection */
1149    rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_0] =
1150       (R200_MTX_MV << R200_MODELVIEW_0_SHIFT);
1151 
1152    rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_1] =
1153        (R200_MTX_IMV << R200_IT_MODELVIEW_0_SHIFT);
1154 
1155    rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_2] =
1156       (R200_MTX_MVP << R200_MODELPROJECT_0_SHIFT);
1157 
1158    rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_3] =
1159       ((R200_MTX_TEX0 << R200_TEXMAT_0_SHIFT) |
1160        (R200_MTX_TEX1 << R200_TEXMAT_1_SHIFT) |
1161        (R200_MTX_TEX2 << R200_TEXMAT_2_SHIFT) |
1162        (R200_MTX_TEX3 << R200_TEXMAT_3_SHIFT));
1163 
1164    rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_4] =
1165       ((R200_MTX_TEX4 << R200_TEXMAT_4_SHIFT) |
1166        (R200_MTX_TEX5 << R200_TEXMAT_5_SHIFT));
1167 
1168 
1169    /* General TCL state */
1170    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] =
1171       (R200_SPECULAR_LIGHTS |
1172        R200_DIFFUSE_SPECULAR_COMBINE |
1173        R200_LOCAL_LIGHT_VEC_GL |
1174        R200_LM0_SOURCE_MATERIAL_0 << R200_FRONT_SHININESS_SOURCE_SHIFT |
1175        R200_LM0_SOURCE_MATERIAL_1 << R200_BACK_SHININESS_SOURCE_SHIFT);
1176 
1177    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] =
1178       ((R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
1179        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
1180        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
1181        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT) |
1182        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_EMISSIVE_SOURCE_SHIFT) |
1183        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_AMBIENT_SOURCE_SHIFT) |
1184        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_DIFFUSE_SOURCE_SHIFT) |
1185        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_SPECULAR_SOURCE_SHIFT));
1186 
1187    rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_0] = 0; /* filled in via callbacks */
1188    rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_1] = 0;
1189    rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_2] = 0;
1190    rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_3] = 0;
1191 
1192    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] =
1193       (R200_UCP_IN_CLIP_SPACE |
1194        R200_CULL_FRONT_IS_CCW);
1195 
1196    /* Texgen/Texmat state */
1197    rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] = 0x00ffffff;
1198    rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_3] =
1199       ((0 << R200_TEXGEN_0_INPUT_TEX_SHIFT) |
1200        (1 << R200_TEXGEN_1_INPUT_TEX_SHIFT) |
1201        (2 << R200_TEXGEN_2_INPUT_TEX_SHIFT) |
1202        (3 << R200_TEXGEN_3_INPUT_TEX_SHIFT) |
1203        (4 << R200_TEXGEN_4_INPUT_TEX_SHIFT) |
1204        (5 << R200_TEXGEN_5_INPUT_TEX_SHIFT));
1205    rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = 0;
1206    rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] =
1207       ((0 << R200_TEXGEN_0_INPUT_SHIFT) |
1208        (1 << R200_TEXGEN_1_INPUT_SHIFT) |
1209        (2 << R200_TEXGEN_2_INPUT_SHIFT) |
1210        (3 << R200_TEXGEN_3_INPUT_SHIFT) |
1211        (4 << R200_TEXGEN_4_INPUT_SHIFT) |
1212        (5 << R200_TEXGEN_5_INPUT_SHIFT));
1213    rmesa->hw.tcg.cmd[TCG_TEX_CYL_WRAP_CTL] = 0;
1214 
1215 
1216    for (i = 0 ; i < 8; i++) {
1217       struct gl_light *l = &ctx->Light.Light[i];
1218       GLenum p = GL_LIGHT0 + i;
1219       *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX;
1220 
1221       ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient );
1222       ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse );
1223       ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular );
1224       ctx->Driver.Lightfv( ctx, p, GL_POSITION, NULL );
1225       ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, NULL );
1226       ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent );
1227       ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff );
1228       ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION,
1229 			   &l->ConstantAttenuation );
1230       ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION,
1231 			   &l->LinearAttenuation );
1232       ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION,
1233 			   &l->QuadraticAttenuation );
1234       *(float *)&(rmesa->hw.lit[i].cmd[LIT_ATTEN_XXX]) = 0.0;
1235    }
1236 
1237    ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT,
1238 			     ctx->Light.Model.Ambient );
1239 
1240    TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx );
1241 
1242    for (i = 0 ; i < 6; i++) {
1243       ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, NULL );
1244    }
1245 
1246    ctx->Driver.Fogfv( ctx, GL_FOG_MODE, NULL );
1247    ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
1248    ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
1249    ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
1250    ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
1251    ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL );
1252 
1253    rmesa->hw.grd.cmd[GRD_VERT_GUARD_CLIP_ADJ] = IEEE_ONE;
1254    rmesa->hw.grd.cmd[GRD_VERT_GUARD_DISCARD_ADJ] = IEEE_ONE;
1255    rmesa->hw.grd.cmd[GRD_HORZ_GUARD_CLIP_ADJ] = IEEE_ONE;
1256    rmesa->hw.grd.cmd[GRD_HORZ_GUARD_DISCARD_ADJ] = IEEE_ONE;
1257 
1258    rmesa->hw.eye.cmd[EYE_X] = 0;
1259    rmesa->hw.eye.cmd[EYE_Y] = 0;
1260    rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
1261    rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
1262 
1263    rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] =
1264       R200_PS_SE_SEL_STATE | R200_PS_MULT_CONST;
1265 
1266    /* ptp_eye is presumably used to calculate the attenuation wrt a different
1267       location? In any case, since point attenuation triggers _needeyecoords,
1268       it is constant. Probably ignored as long as R200_PS_USE_MODEL_EYE_VEC
1269       isn't set */
1270    rmesa->hw.ptp.cmd[PTP_EYE_X] = 0;
1271    rmesa->hw.ptp.cmd[PTP_EYE_Y] = 0;
1272    rmesa->hw.ptp.cmd[PTP_EYE_Z] = IEEE_ONE | 0x80000000; /* -1.0 */
1273    rmesa->hw.ptp.cmd[PTP_EYE_3] = 0;
1274    /* no idea what the ptp_vport_scale values are good for, except the
1275       PTSIZE one - hopefully doesn't matter */
1276    rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_0] = IEEE_ONE;
1277    rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_1] = IEEE_ONE;
1278    rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_PTSIZE] = IEEE_ONE;
1279    rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_3] = IEEE_ONE;
1280    rmesa->hw.ptp.cmd[PTP_ATT_CONST_QUAD] = 0;
1281    rmesa->hw.ptp.cmd[PTP_ATT_CONST_LIN] = 0;
1282    rmesa->hw.ptp.cmd[PTP_ATT_CONST_CON] = IEEE_ONE;
1283    rmesa->hw.ptp.cmd[PTP_ATT_CONST_3] = 0;
1284    rmesa->hw.ptp.cmd[PTP_CLAMP_MIN] = IEEE_ONE;
1285    rmesa->hw.ptp.cmd[PTP_CLAMP_MAX] = 0x44ffe000; /* 2047 */
1286    rmesa->hw.ptp.cmd[PTP_CLAMP_2] = 0;
1287    rmesa->hw.ptp.cmd[PTP_CLAMP_3] = 0;
1288 
1289    r200LightingSpaceChange( ctx );
1290 
1291    radeon_init_query_stateobj(&rmesa->radeon, R200_QUERYOBJ_CMDSIZE);
1292    rmesa->radeon.query.queryobj.cmd[R200_QUERYOBJ_CMD_0] = CP_PACKET0(RADEON_RB3D_ZPASS_DATA, 0);
1293    rmesa->radeon.query.queryobj.cmd[R200_QUERYOBJ_DATA_0] = 0;
1294 
1295    rmesa->radeon.hw.all_dirty = GL_TRUE;
1296 
1297    rcommonInitCmdBuf(&rmesa->radeon);
1298 }
1299