1 /*
2 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
3 
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15 
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 */
28 
29 /*
30  * Authors:
31  *   Keith Whitwell <keithw@vmware.com>
32  */
33 
34 #include "main/glheader.h"
35 #include "main/imports.h"
36 #include "main/enums.h"
37 #include "main/api_arrayelt.h"
38 #include "main/state.h"
39 
40 #include "swrast/swrast.h"
41 #include "vbo/vbo.h"
42 #include "tnl/t_pipeline.h"
43 #include "swrast_setup/swrast_setup.h"
44 
45 #include "radeon_common.h"
46 #include "radeon_mipmap_tree.h"
47 #include "r200_context.h"
48 #include "r200_ioctl.h"
49 #include "r200_state.h"
50 #include "radeon_queryobj.h"
51 
52 #include "util/xmlpool.h"
53 
54 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
55  * 1.3 cmdbuffers allow all previous state to be updated as well as
56  * the tcl scalar and vector areas.
57  */
58 static struct {
59 	int start;
60 	int len;
61 	const char *name;
62 } packet[RADEON_MAX_STATE_PACKETS] = {
63 	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
64 	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
65 	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
66 	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
67 	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
68 	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
69 	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
70 	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
71 	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
72 	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
73 	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
74 	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
75 	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
76 	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
77 	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
78 	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
79 	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
80 	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
81 	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
82 	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
83 	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
84 		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
85 	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
86 	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
87 	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
88 	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
89 	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
90 	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
91 	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
92 	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
93 	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
94 	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
95 	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
96 	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
97 	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
98 	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
99 	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
100 	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
101 	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
102 	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
103 	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
104 	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
105 	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
106 	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
107 	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
108 	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
109 	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
110 	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
111 	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
112 	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
113 	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
114 	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
115 	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
116 	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
117 	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
118 	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
119 	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
120 	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
121 	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
122 	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
123 	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
124 	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
125 	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
126 		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
127 	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
128 	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
129 	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
130 	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
131 	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
132 	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
133 	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
134 	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
135 	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
136 	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
137 	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
138 	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
139 	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
140 	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
141 	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
142 	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
143 	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
144 	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
145 	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
146 	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
147 	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
148 	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
149 	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
150 	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
151 	{R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
152 	{R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
153 	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
154 	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
155 	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
156 	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
157 	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
158 	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
159 	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
160 	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
161 };
162 
163 /* =============================================================
164  * State initialization
165  */
cmdpkt(r200ContextPtr rmesa,int id)166 static int cmdpkt( r200ContextPtr rmesa, int id )
167 {
168    return CP_PACKET0(packet[id].start, packet[id].len - 1);
169 }
170 
cmdvec(int offset,int stride,int count)171 static int cmdvec( int offset, int stride, int count )
172 {
173    drm_radeon_cmd_header_t h;
174    h.i = 0;
175    h.vectors.cmd_type = RADEON_CMD_VECTORS;
176    h.vectors.offset = offset;
177    h.vectors.stride = stride;
178    h.vectors.count = count;
179    return h.i;
180 }
181 
182 /* warning: the count here is divided by 4 compared to other cmds
183    (so it doesn't exceed the char size)! */
cmdveclinear(int offset,int count)184 static int cmdveclinear( int offset, int count )
185 {
186    drm_radeon_cmd_header_t h;
187    h.i = 0;
188    h.veclinear.cmd_type = RADEON_CMD_VECLINEAR;
189    h.veclinear.addr_lo = offset & 0xff;
190    h.veclinear.addr_hi = (offset & 0xff00) >> 8;
191    h.veclinear.count = count;
192    return h.i;
193 }
194 
cmdscl(int offset,int stride,int count)195 static int cmdscl( int offset, int stride, int count )
196 {
197    drm_radeon_cmd_header_t h;
198    h.i = 0;
199    h.scalars.cmd_type = RADEON_CMD_SCALARS;
200    h.scalars.offset = offset;
201    h.scalars.stride = stride;
202    h.scalars.count = count;
203    return h.i;
204 }
205 
cmdscl2(int offset,int stride,int count)206 static int cmdscl2( int offset, int stride, int count )
207 {
208    drm_radeon_cmd_header_t h;
209    h.i = 0;
210    h.scalars.cmd_type = RADEON_CMD_SCALARS2;
211    h.scalars.offset = offset - 0x100;
212    h.scalars.stride = stride;
213    h.scalars.count = count;
214    return h.i;
215 }
216 
217 /**
218  * Check functions are used to check if state is active.
219  * If it is active check function returns maximum emit size.
220  */
221 #define CHECK( NM, FLAG, ADD )				\
222 static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom) \
223 {							\
224    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
225    (void) rmesa;					\
226    return (FLAG) ? atom->cmd_size + (ADD) : 0;			\
227 }
228 
229 #define TCL_CHECK( NM, FLAG, ADD )				\
230 static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom) \
231 {									\
232    r200ContextPtr rmesa = R200_CONTEXT(ctx);				\
233    return (!rmesa->radeon.TclFallback && !_mesa_arb_vertex_program_enabled(ctx) && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
234 }
235 
236 #define TCL_OR_VP_CHECK( NM, FLAG, ADD )			\
237 static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom ) \
238 {							\
239    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
240    return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0;	\
241 }
242 
243 #define VP_CHECK( NM, FLAG, ADD )				\
244 static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom ) \
245 {									\
246    r200ContextPtr rmesa = R200_CONTEXT(ctx);				\
247    (void) atom;								\
248    return (!rmesa->radeon.TclFallback && _mesa_arb_vertex_program_enabled(ctx) && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
249 }
250 
251 CHECK( always, GL_TRUE, 0 )
252 CHECK( always_add4, GL_TRUE, 4 )
253 CHECK( never, GL_FALSE, 0 )
254 CHECK( tex_any, ctx->Texture._MaxEnabledTexImageUnit != -1, 0 )
255 CHECK( tf, (ctx->Texture._MaxEnabledTexImageUnit != -1 && !_mesa_ati_fragment_shader_enabled(ctx)), 0 );
256 CHECK( pix_zero, !_mesa_ati_fragment_shader_enabled(ctx), 0 )
257 CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !_mesa_ati_fragment_shader_enabled(ctx)), 0 )
258 CHECK( afs_pass1, (_mesa_ati_fragment_shader_enabled(ctx) && (ctx->ATIFragmentShader.Current->NumPasses > 1)), 0 )
259 CHECK( afs, _mesa_ati_fragment_shader_enabled(ctx), 0 )
260 CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 3 + 3*5 - CUBE_STATE_SIZE )
261 CHECK( tex_cube_cs, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 2 + 4*5 - CUBE_STATE_SIZE )
262 TCL_CHECK( tcl_fog_add4, ctx->Fog.Enabled, 4 )
263 TCL_CHECK( tcl, GL_TRUE, 0 )
264 TCL_CHECK( tcl_add8, GL_TRUE, 8 )
265 TCL_CHECK( tcl_add4, GL_TRUE, 4 )
266 TCL_CHECK( tcl_tex_add4, rmesa->state.texture.unit[atom->idx].unitneeded, 4 )
267 TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 )
268 TCL_CHECK( tcl_lighting_add6, ctx->Light.Enabled, 6 )
269 TCL_CHECK( tcl_light_add6, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 6 )
270 TCL_OR_VP_CHECK( tcl_ucp_add4, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 4 )
271 TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE, 0 )
272 TCL_OR_VP_CHECK( tcl_or_vp_add2, GL_TRUE, 2 )
273 VP_CHECK( tcl_vp, GL_TRUE, 0 )
274 VP_CHECK( tcl_vp_add4, GL_TRUE, 4 )
275 VP_CHECK( tcl_vp_size_add4, ctx->VertexProgram.Current->arb.NumNativeInstructions > 64, 4 )
276 VP_CHECK( tcl_vpp_size_add4, ctx->VertexProgram.Current->arb.NumNativeParameters > 96, 4 )
277 
278 #define OUT_VEC(hdr, data) do {			\
279     drm_radeon_cmd_header_t h;					\
280     h.i = hdr;								\
281     OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
282     OUT_BATCH(0);							\
283     OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
284     OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
285     OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));	\
286     OUT_BATCH_TABLE((data), h.vectors.count);				\
287   } while(0)
288 
289 #define OUT_VECLINEAR(hdr, data) do {					\
290     drm_radeon_cmd_header_t h;						\
291     uint32_t _start, _sz;						\
292     h.i = hdr;								\
293     _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8);		\
294     _sz = h.veclinear.count * 4;					\
295     if (_sz) {								\
296     BEGIN_BATCH(dwords); \
297     OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
298     OUT_BATCH(0);							\
299     OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
300     OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));	\
301     OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1));	\
302     OUT_BATCH_TABLE((data), _sz);					\
303     END_BATCH(); \
304     } \
305   } while(0)
306 
307 #define OUT_SCL(hdr, data) do {					\
308     drm_radeon_cmd_header_t h;						\
309     h.i = hdr;								\
310     OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
311     OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
312     OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
313     OUT_BATCH_TABLE((data), h.scalars.count);				\
314   } while(0)
315 
316 #define OUT_SCL2(hdr, data) do {					\
317     drm_radeon_cmd_header_t h;						\
318     h.i = hdr;								\
319     OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
320     OUT_BATCH((h.scalars.offset + 0x100) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
321     OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
322     OUT_BATCH_TABLE((data), h.scalars.count);				\
323   } while(0)
check_rrb(struct gl_context * ctx,struct radeon_state_atom * atom)324 static int check_rrb(struct gl_context *ctx, struct radeon_state_atom *atom)
325 {
326    r200ContextPtr r200 = R200_CONTEXT(ctx);
327    struct radeon_renderbuffer *rrb;
328    rrb = radeon_get_colorbuffer(&r200->radeon);
329    if (!rrb || !rrb->bo)
330       return 0;
331    return atom->cmd_size;
332 }
333 
check_polygon_stipple(struct gl_context * ctx,struct radeon_state_atom * atom)334 static int check_polygon_stipple(struct gl_context *ctx,
335 		struct radeon_state_atom *atom)
336 {
337    r200ContextPtr r200 = R200_CONTEXT(ctx);
338    if (r200->hw.set.cmd[SET_RE_CNTL] & R200_STIPPLE_ENABLE)
339 	   return atom->cmd_size;
340    return 0;
341 }
342 
mtl_emit(struct gl_context * ctx,struct radeon_state_atom * atom)343 static void mtl_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
344 {
345    r200ContextPtr r200 = R200_CONTEXT(ctx);
346    BATCH_LOCALS(&r200->radeon);
347    uint32_t dwords = atom->check(ctx, atom);
348 
349    BEGIN_BATCH(dwords);
350    OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1));
351    OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18));
352    END_BATCH();
353 }
354 
lit_emit(struct gl_context * ctx,struct radeon_state_atom * atom)355 static void lit_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
356 {
357    r200ContextPtr r200 = R200_CONTEXT(ctx);
358    BATCH_LOCALS(&r200->radeon);
359    uint32_t dwords = atom->check(ctx, atom);
360 
361    BEGIN_BATCH(dwords);
362    OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
363    OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
364    END_BATCH();
365 }
366 
ptp_emit(struct gl_context * ctx,struct radeon_state_atom * atom)367 static void ptp_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
368 {
369    r200ContextPtr r200 = R200_CONTEXT(ctx);
370    BATCH_LOCALS(&r200->radeon);
371    uint32_t dwords = atom->check(ctx, atom);
372 
373    BEGIN_BATCH(dwords);
374    OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1);
375    OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1);
376    END_BATCH();
377 }
378 
veclinear_emit(struct gl_context * ctx,struct radeon_state_atom * atom)379 static void veclinear_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
380 {
381    r200ContextPtr r200 = R200_CONTEXT(ctx);
382    BATCH_LOCALS(&r200->radeon);
383    uint32_t dwords = atom->check(ctx, atom);
384 
385    OUT_VECLINEAR(atom->cmd[0], atom->cmd+1);
386 }
387 
scl_emit(struct gl_context * ctx,struct radeon_state_atom * atom)388 static void scl_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
389 {
390    r200ContextPtr r200 = R200_CONTEXT(ctx);
391    BATCH_LOCALS(&r200->radeon);
392    uint32_t dwords = atom->check(ctx, atom);
393 
394    BEGIN_BATCH(dwords);
395    OUT_SCL(atom->cmd[0], atom->cmd+1);
396    END_BATCH();
397 }
398 
399 
vec_emit(struct gl_context * ctx,struct radeon_state_atom * atom)400 static void vec_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
401 {
402    r200ContextPtr r200 = R200_CONTEXT(ctx);
403    BATCH_LOCALS(&r200->radeon);
404    uint32_t dwords = atom->check(ctx, atom);
405 
406    BEGIN_BATCH(dwords);
407    OUT_VEC(atom->cmd[0], atom->cmd+1);
408    END_BATCH();
409 }
410 
check_always_ctx(struct gl_context * ctx,struct radeon_state_atom * atom)411 static int check_always_ctx( struct gl_context *ctx, struct radeon_state_atom *atom)
412 {
413    r200ContextPtr r200 = R200_CONTEXT(ctx);
414    struct radeon_renderbuffer *rrb, *drb;
415    uint32_t dwords;
416 
417    rrb = radeon_get_colorbuffer(&r200->radeon);
418    if (!rrb || !rrb->bo) {
419       return 0;
420    }
421 
422    drb = radeon_get_depthbuffer(&r200->radeon);
423 
424    dwords = 10;
425    if (drb)
426      dwords += 6;
427    if (rrb)
428      dwords += 8;
429    if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
430      dwords += 4;
431 
432 
433    return dwords;
434 }
435 
ctx_emit_cs(struct gl_context * ctx,struct radeon_state_atom * atom)436 static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
437 {
438    r200ContextPtr r200 = R200_CONTEXT(ctx);
439    BATCH_LOCALS(&r200->radeon);
440    struct radeon_renderbuffer *rrb, *drb;
441    uint32_t cbpitch = 0;
442    uint32_t zbpitch = 0;
443    uint32_t dwords = atom->check(ctx, atom);
444    uint32_t depth_fmt;
445 
446    rrb = radeon_get_colorbuffer(&r200->radeon);
447    if (!rrb || !rrb->bo) {
448       return;
449    }
450 
451    atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
452    if (rrb->cpp == 4)
453 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
454    else switch (rrb->base.Base.Format) {
455    case MESA_FORMAT_B5G6R5_UNORM:
456    case MESA_FORMAT_R5G6B5_UNORM:
457 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
458 	break;
459    case MESA_FORMAT_B4G4R4A4_UNORM:
460    case MESA_FORMAT_A4R4G4B4_UNORM:
461 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
462 	break;
463    case MESA_FORMAT_B5G5R5A1_UNORM:
464    case MESA_FORMAT_A1R5G5B5_UNORM:
465 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
466 	break;
467    default:
468 	_mesa_problem(ctx, "Unexpected format in ctx_emit_cs");
469    }
470 
471    cbpitch = (rrb->pitch / rrb->cpp);
472    if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
473        cbpitch |= R200_COLOR_TILE_ENABLE;
474    if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE)
475        cbpitch |= R200_COLOR_MICROTILE_ENABLE;
476 
477 
478    drb = radeon_get_depthbuffer(&r200->radeon);
479    if (drb) {
480      zbpitch = (drb->pitch / drb->cpp);
481      if (drb->cpp == 4)
482         depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
483      else
484         depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
485      atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
486      atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
487    }
488 
489    /* output the first 7 bytes of context */
490    BEGIN_BATCH(dwords);
491 
492    /* In the CS case we need to split this up */
493    OUT_BATCH(CP_PACKET0(packet[0].start, 3));
494    OUT_BATCH_TABLE((atom->cmd + 1), 4);
495 
496    if (drb) {
497      OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
498      OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
499 
500      OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
501      OUT_BATCH(zbpitch);
502    }
503 
504    OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
505    OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
506    OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
507    OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
508    OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
509 
510 
511    if (rrb) {
512      OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
513      OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
514 
515      OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
516      OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
517    }
518 
519    if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
520      OUT_BATCH_TABLE((atom->cmd + 14), 4);
521    }
522 
523    END_BATCH();
524 }
525 
get_tex_mm_size(struct gl_context * ctx,struct radeon_state_atom * atom)526 static int get_tex_mm_size(struct gl_context* ctx, struct radeon_state_atom *atom)
527 {
528    r200ContextPtr r200 = R200_CONTEXT(ctx);
529    uint32_t dwords = atom->cmd_size + 2;
530    int hastexture = 1;
531    int i = atom->idx;
532    radeonTexObj *t = r200->state.texture.unit[i].texobj;
533    if (!t)
534 	hastexture = 0;
535    else {
536 	if (!t->mt && !t->bo)
537 		hastexture = 0;
538    }
539 
540    if (!hastexture)
541      dwords -= 4;
542    return dwords;
543 }
544 
check_tex_pair_mm(struct gl_context * ctx,struct radeon_state_atom * atom)545 static int check_tex_pair_mm(struct gl_context* ctx, struct radeon_state_atom *atom)
546 {
547    r200ContextPtr r200 = R200_CONTEXT(ctx);
548    /** XOR is bit flip operation so use it for finding pair */
549    if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx ^ 1].unitneeded))
550      return 0;
551 
552    return get_tex_mm_size(ctx, atom);
553 }
554 
check_tex_mm(struct gl_context * ctx,struct radeon_state_atom * atom)555 static int check_tex_mm(struct gl_context* ctx, struct radeon_state_atom *atom)
556 {
557    r200ContextPtr r200 = R200_CONTEXT(ctx);
558    if (!(r200->state.texture.unit[atom->idx].unitneeded))
559      return 0;
560 
561    return get_tex_mm_size(ctx, atom);
562 }
563 
564 
tex_emit_mm(struct gl_context * ctx,struct radeon_state_atom * atom)565 static void tex_emit_mm(struct gl_context *ctx, struct radeon_state_atom *atom)
566 {
567    r200ContextPtr r200 = R200_CONTEXT(ctx);
568    BATCH_LOCALS(&r200->radeon);
569    uint32_t dwords = atom->check(ctx, atom);
570    int i = atom->idx;
571    radeonTexObj *t = r200->state.texture.unit[i].texobj;
572 
573    if (!r200->state.texture.unit[i].unitneeded && !(dwords <= atom->cmd_size))
574         dwords -= 4;
575    BEGIN_BATCH(dwords);
576 
577    OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (32 * i), 7));
578    OUT_BATCH_TABLE((atom->cmd + 1), 8);
579 
580    if (dwords > atom->cmd_size) {
581      OUT_BATCH(CP_PACKET0(R200_PP_TXOFFSET_0 + (24 * i), 0));
582      if (t->mt && !t->image_override) {
583         OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
584 		  RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
585       } else {
586 	if (t->bo)
587             OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
588                             RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
589       }
590    }
591    END_BATCH();
592 }
593 
cube_emit_cs(struct gl_context * ctx,struct radeon_state_atom * atom)594 static void cube_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
595 {
596    r200ContextPtr r200 = R200_CONTEXT(ctx);
597    BATCH_LOCALS(&r200->radeon);
598    uint32_t dwords = atom->check(ctx, atom);
599    int i = atom->idx, j;
600    radeonTexObj *t = r200->state.texture.unit[i].texobj;
601    radeon_mipmap_level *lvl;
602    if (!(t && !t->image_override))
603      dwords = 2;
604 
605    BEGIN_BATCH(dwords);
606    OUT_BATCH_TABLE(atom->cmd, 2);
607 
608    if (t && !t->image_override) {
609      lvl = &t->mt->levels[0];
610      for (j = 1; j <= 5; j++) {
611        OUT_BATCH(CP_PACKET0(R200_PP_CUBIC_OFFSET_F1_0 + (24*i) + (4 * (j-1)), 0));
612        OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
613 			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
614      }
615    }
616    END_BATCH();
617 }
618 
619 /* Initialize the context's hardware state.
620  */
r200InitState(r200ContextPtr rmesa)621 void r200InitState( r200ContextPtr rmesa )
622 {
623    struct gl_context *ctx = &rmesa->radeon.glCtx;
624    GLuint i;
625 
626    rmesa->radeon.Fallback = 0;
627 
628    rmesa->radeon.hw.max_state_size = 0;
629 
630 #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX )				\
631    do {								\
632       rmesa->hw.ATOM.cmd_size = SZ;				\
633       rmesa->hw.ATOM.cmd = (GLuint *) calloc(SZ, sizeof(int));          \
634       rmesa->hw.ATOM.lastcmd = (GLuint *) calloc(SZ, sizeof(int));	\
635       rmesa->hw.ATOM.name = NM;					\
636       rmesa->hw.ATOM.idx = IDX;					\
637       if (check_##CHK != check_never) {				\
638          rmesa->hw.ATOM.check = check_##CHK;			\
639          rmesa->radeon.hw.max_state_size += SZ * sizeof(int);	\
640       } else {							\
641          rmesa->hw.ATOM.check = NULL;				\
642       }								\
643       rmesa->hw.ATOM.dirty = GL_FALSE;				\
644    } while (0)
645 
646 
647    /* Allocate state buffers:
648     */
649    ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 );
650 
651    rmesa->hw.ctx.emit = ctx_emit_cs;
652    rmesa->hw.ctx.check = check_always_ctx;
653    ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
654    ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
655    ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
656    ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
657    ALLOC_STATE( vtx, always, VTX_STATE_SIZE, "VTX/vertex", 0 );
658    ALLOC_STATE( vap, always, VAP_STATE_SIZE, "VAP/vap", 0 );
659    ALLOC_STATE( vte, always, VTE_STATE_SIZE, "VTE/vte", 0 );
660    ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
661    ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
662    ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
663    ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
664    {
665       int state_size = TEX_STATE_SIZE_NEWDRM;
666       if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
667          /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
668          ALLOC_STATE( tex[0], tex_pair_mm, state_size, "TEX/tex-0", 0 );
669          ALLOC_STATE( tex[1], tex_pair_mm, state_size, "TEX/tex-1", 1 );
670          ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
671       }
672       else {
673          ALLOC_STATE( tex[0], tex_mm, state_size, "TEX/tex-0", 0 );
674          ALLOC_STATE( tex[1], tex_mm, state_size, "TEX/tex-1", 1 );
675          ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
676       }
677       ALLOC_STATE( tex[2], tex_mm, state_size, "TEX/tex-2", 2 );
678       ALLOC_STATE( tex[3], tex_mm, state_size, "TEX/tex-3", 3 );
679       ALLOC_STATE( tex[4], tex_mm, state_size, "TEX/tex-4", 4 );
680       ALLOC_STATE( tex[5], tex_mm, state_size, "TEX/tex-5", 5 );
681       ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 );
682       ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
683       ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
684    }
685 
686    ALLOC_STATE( stp, polygon_stipple, STP_STATE_SIZE, "STP/stp", 0 );
687 
688    for (i = 0; i < 6; i++)
689       rmesa->hw.tex[i].emit = tex_emit_mm;
690    ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
691    ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
692    ALLOC_STATE( cube[2], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-2", 2 );
693    ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 );
694    ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
695    ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
696    for (i = 0; i < 6; i++) {
697       rmesa->hw.cube[i].emit = cube_emit_cs;
698       rmesa->hw.cube[i].check = check_tex_cube_cs;
699    }
700 
701    ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
702    ALLOC_STATE( vpi[0], tcl_vp_add4, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
703    ALLOC_STATE( vpi[1], tcl_vp_size_add4, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
704    ALLOC_STATE( vpp[0], tcl_vp_add4, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 );
705    ALLOC_STATE( vpp[1], tcl_vpp_size_add4, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 );
706 
707    /* FIXME: this atom has two commands, we need only one (ucp_vert_blend) for vp */
708    ALLOC_STATE( tcl, tcl_or_vp, TCL_STATE_SIZE, "TCL/tcl", 0 );
709    ALLOC_STATE( msl, tcl, MSL_STATE_SIZE, "MSL/matrix-select", 0 );
710    ALLOC_STATE( tcg, tcl, TCG_STATE_SIZE, "TCG/texcoordgen", 0 );
711    ALLOC_STATE( mtl[0], tcl_lighting_add6, MTL_STATE_SIZE, "MTL0/material0", 0 );
712    ALLOC_STATE( mtl[1], tcl_lighting_add6, MTL_STATE_SIZE, "MTL1/material1", 1 );
713    ALLOC_STATE( grd, tcl_or_vp_add2, GRD_STATE_SIZE, "GRD/guard-band", 0 );
714    ALLOC_STATE( fog, tcl_fog_add4, FOG_STATE_SIZE, "FOG/fog", 0 );
715    ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 0 );
716    ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 0 );
717    ALLOC_STATE( mat[R200_MTX_MV], tcl_add4, MAT_STATE_SIZE, "MAT/modelview", 0 );
718    ALLOC_STATE( mat[R200_MTX_IMV], tcl_add4, MAT_STATE_SIZE, "MAT/it-modelview", 0 );
719    ALLOC_STATE( mat[R200_MTX_MVP], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 0 );
720    ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat0", 0 );
721    ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 );
722    ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat2", 2 );
723    ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat3", 3 );
724    ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat4", 4 );
725    ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat5", 5 );
726    ALLOC_STATE( ucp[0], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-0", 0 );
727    ALLOC_STATE( ucp[1], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
728    ALLOC_STATE( ucp[2], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-2", 2 );
729    ALLOC_STATE( ucp[3], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-3", 3 );
730    ALLOC_STATE( ucp[4], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-4", 4 );
731    ALLOC_STATE( ucp[5], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-5", 5 );
732    ALLOC_STATE( lit[0], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-0", 0 );
733    ALLOC_STATE( lit[1], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-1", 1 );
734    ALLOC_STATE( lit[2], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-2", 2 );
735    ALLOC_STATE( lit[3], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-3", 3 );
736    ALLOC_STATE( lit[4], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-4", 4 );
737    ALLOC_STATE( lit[5], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-5", 5 );
738    ALLOC_STATE( lit[6], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-6", 6 );
739    ALLOC_STATE( lit[7], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-7", 7 );
740    ALLOC_STATE( sci, rrb, SCI_STATE_SIZE, "SCI/scissor", 0 );
741    ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
742    ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 );
743    ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 );
744    ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 );
745    ALLOC_STATE( pix[4], texenv, PIX_STATE_SIZE, "PIX/pixstage-4", 4 );
746    ALLOC_STATE( pix[5], texenv, PIX_STATE_SIZE, "PIX/pixstage-5", 5 );
747    ALLOC_STATE( prf, always, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
748    ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
749    ALLOC_STATE( ptp, tcl_add8, PTP_STATE_SIZE, "PTP/pointparams", 0 );
750 
751    r200SetUpAtomList( rmesa );
752 
753    /* Fill in the packet headers:
754     */
755    rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
756    rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
757    rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
758    rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(rmesa, R200_EMIT_RB3D_BLENDCOLOR);
759    rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
760    rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
761    rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
762    rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
763    rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
764    rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
765    rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CNTL_X);
766    rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(rmesa, R200_EMIT_RB3D_DEPTHXY_OFFSET);
767    rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(rmesa, R200_EMIT_RE_AUX_SCISSOR_CNTL);
768    rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(rmesa, R200_EMIT_SE_VAP_CNTL_STATUS);
769    rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(rmesa, R200_EMIT_RE_POINTSIZE);
770    rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(rmesa, R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
771    rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TAM_DEBUG3);
772    rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(rmesa, R200_EMIT_TFACTOR_0);
773    rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(rmesa, R200_EMIT_ATF_TFACTOR);
774    rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_0);
775    rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
776    rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_1);
777    rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
778    rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_2);
779    rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
780    rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_3);
781    rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
782    rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_4);
783    rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
784    rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_5);
785    rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
786    rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_0);
787    rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_1);
788    rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_PVS_CNTL);
789    rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_0);
790    rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_0);
791    rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_1);
792    rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_1);
793    rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_2);
794    rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_2);
795    rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_3);
796    rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_3);
797    rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_4);
798    rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_4);
799    rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_5);
800    rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_5);
801    rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_0);
802    rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_1);
803    rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_2);
804    rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_3);
805    rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_4);
806    rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_5);
807    rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
808    rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
809    rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(rmesa, R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
810    rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(rmesa, R200_EMIT_TEX_PROC_CTL_2);
811    rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(rmesa, R200_EMIT_MATRIX_SELECT_0);
812    rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_CTL);
813    rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTX_FMT_0);
814    rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(rmesa, R200_EMIT_OUTPUT_VTX_COMP_SEL);
815    rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(rmesa, R200_EMIT_SE_VTX_STATE_CNTL);
816    rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTE_CNTL);
817    rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL);
818    rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL);
819 
820    rmesa->hw.sci.cmd[SCI_CMD_1] = CP_PACKET0(R200_RE_TOP_LEFT, 0);
821    rmesa->hw.sci.cmd[SCI_CMD_2] = CP_PACKET0(R200_RE_WIDTH_HEIGHT, 0);
822 
823    rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
824    rmesa->hw.stp.cmd[STP_DATA_0] = 0;
825    rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
826 
827    rmesa->hw.mtl[0].emit = mtl_emit;
828    rmesa->hw.mtl[1].emit = mtl_emit;
829 
830    rmesa->hw.vpi[0].emit = veclinear_emit;
831    rmesa->hw.vpi[1].emit = veclinear_emit;
832    rmesa->hw.vpp[0].emit = veclinear_emit;
833    rmesa->hw.vpp[1].emit = veclinear_emit;
834 
835    rmesa->hw.grd.emit = scl_emit;
836    rmesa->hw.fog.emit = vec_emit;
837    rmesa->hw.glt.emit = vec_emit;
838    rmesa->hw.eye.emit = vec_emit;
839 
840    for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++)
841       rmesa->hw.mat[i].emit = vec_emit;
842 
843    for (i = 0; i < 8; i++)
844       rmesa->hw.lit[i].emit = lit_emit;
845 
846    for (i = 0; i < 6; i++)
847       rmesa->hw.ucp[i].emit = vec_emit;
848 
849    rmesa->hw.ptp.emit = ptp_emit;
850 
851    rmesa->hw.mtl[0].cmd[MTL_CMD_0] =
852       cmdvec( R200_VS_MAT_0_EMISS, 1, 16 );
853    rmesa->hw.mtl[0].cmd[MTL_CMD_1] =
854       cmdscl2( R200_SS_MAT_0_SHININESS, 1, 1 );
855    rmesa->hw.mtl[1].cmd[MTL_CMD_0] =
856       cmdvec( R200_VS_MAT_1_EMISS, 1, 16 );
857    rmesa->hw.mtl[1].cmd[MTL_CMD_1] =
858       cmdscl2( R200_SS_MAT_1_SHININESS, 1, 1 );
859 
860    rmesa->hw.vpi[0].cmd[VPI_CMD_0] =
861       cmdveclinear( R200_PVS_PROG0, 64 );
862    rmesa->hw.vpi[1].cmd[VPI_CMD_0] =
863       cmdveclinear( R200_PVS_PROG1, 64 );
864    rmesa->hw.vpp[0].cmd[VPP_CMD_0] =
865       cmdveclinear( R200_PVS_PARAM0, 96 );
866    rmesa->hw.vpp[1].cmd[VPP_CMD_0] =
867       cmdveclinear( R200_PVS_PARAM1, 96 );
868 
869    rmesa->hw.grd.cmd[GRD_CMD_0] =
870       cmdscl( R200_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
871    rmesa->hw.fog.cmd[FOG_CMD_0] =
872       cmdvec( R200_VS_FOG_PARAM_ADDR, 1, 4 );
873    rmesa->hw.glt.cmd[GLT_CMD_0] =
874       cmdvec( R200_VS_GLOBAL_AMBIENT_ADDR, 1, 4 );
875    rmesa->hw.eye.cmd[EYE_CMD_0] =
876       cmdvec( R200_VS_EYE_VECTOR_ADDR, 1, 4 );
877 
878    rmesa->hw.mat[R200_MTX_MV].cmd[MAT_CMD_0] =
879       cmdvec( R200_VS_MATRIX_0_MV, 1, 16);
880    rmesa->hw.mat[R200_MTX_IMV].cmd[MAT_CMD_0] =
881       cmdvec( R200_VS_MATRIX_1_INV_MV, 1, 16);
882    rmesa->hw.mat[R200_MTX_MVP].cmd[MAT_CMD_0] =
883       cmdvec( R200_VS_MATRIX_2_MVP, 1, 16);
884    rmesa->hw.mat[R200_MTX_TEX0].cmd[MAT_CMD_0] =
885       cmdvec( R200_VS_MATRIX_3_TEX0, 1, 16);
886    rmesa->hw.mat[R200_MTX_TEX1].cmd[MAT_CMD_0] =
887       cmdvec( R200_VS_MATRIX_4_TEX1, 1, 16);
888    rmesa->hw.mat[R200_MTX_TEX2].cmd[MAT_CMD_0] =
889       cmdvec( R200_VS_MATRIX_5_TEX2, 1, 16);
890    rmesa->hw.mat[R200_MTX_TEX3].cmd[MAT_CMD_0] =
891       cmdvec( R200_VS_MATRIX_6_TEX3, 1, 16);
892    rmesa->hw.mat[R200_MTX_TEX4].cmd[MAT_CMD_0] =
893       cmdvec( R200_VS_MATRIX_7_TEX4, 1, 16);
894    rmesa->hw.mat[R200_MTX_TEX5].cmd[MAT_CMD_0] =
895       cmdvec( R200_VS_MATRIX_8_TEX5, 1, 16);
896 
897    for (i = 0 ; i < 8; i++) {
898       rmesa->hw.lit[i].cmd[LIT_CMD_0] =
899 	 cmdvec( R200_VS_LIGHT_AMBIENT_ADDR + i, 8, 24 );
900       rmesa->hw.lit[i].cmd[LIT_CMD_1] =
901 	 cmdscl( R200_SS_LIGHT_DCD_ADDR + i, 8, 7 );
902    }
903 
904    for (i = 0 ; i < 6; i++) {
905       rmesa->hw.ucp[i].cmd[UCP_CMD_0] =
906 	 cmdvec( R200_VS_UCP_ADDR + i, 1, 4 );
907    }
908 
909    rmesa->hw.ptp.cmd[PTP_CMD_0] =
910       cmdvec( R200_VS_PNT_SPRITE_VPORT_SCALE, 1, 4 );
911    rmesa->hw.ptp.cmd[PTP_CMD_1] =
912       cmdvec( R200_VS_PNT_SPRITE_ATT_CONST, 1, 12 );
913 
914    /* Initial Harware state:
915     */
916    rmesa->hw.ctx.cmd[CTX_PP_MISC] = (R200_ALPHA_TEST_PASS
917 				     /* | R200_RIGHT_HAND_CUBE_OGL*/);
918 
919    rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (R200_FOG_VERTEX |
920 					  R200_FOG_USE_SPEC_ALPHA);
921 
922    rmesa->hw.ctx.cmd[CTX_RE_SOLID_COLOR] = 0x00000000;
923 
924    rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
925 				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
926 				(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
927 
928    rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = 0x00000000;
929    rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
930 				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
931 				(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
932    rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
933 				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
934 				(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
935 
936    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
937       rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation;
938 
939    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] =
940       ((rmesa->radeon.radeonScreen->depthPitch &
941 	R200_DEPTHPITCH_MASK) |
942        R200_DEPTH_ENDIAN_NO_SWAP);
943 
944    if (rmesa->using_hyperz)
945       rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= R200_DEPTH_HYPERZ;
946 
947    rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (R200_Z_TEST_LESS |
948 					       R200_STENCIL_TEST_ALWAYS |
949 					       R200_STENCIL_FAIL_KEEP |
950 					       R200_STENCIL_ZPASS_KEEP |
951 					       R200_STENCIL_ZFAIL_KEEP |
952 					       R200_Z_WRITE_ENABLE);
953 
954    if (rmesa->using_hyperz) {
955       rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE |
956 						  R200_Z_DECOMPRESSION_ENABLE;
957 /*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
958 	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
959    }
960 
961    rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE
962  				     | R200_TEX_BLEND_0_ENABLE);
963 
964    switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
965    case DRI_CONF_DITHER_XERRORDIFFRESET:
966       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT;
967       break;
968    case DRI_CONF_DITHER_ORDERED:
969       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_SCALE_DITHER_ENABLE;
970       break;
971    }
972    if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
973 	DRI_CONF_ROUND_ROUND )
974       rmesa->radeon.state.color.roundEnable = R200_ROUND_ENABLE;
975    else
976       rmesa->radeon.state.color.roundEnable = 0;
977    if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
978 	DRI_CONF_COLOR_REDUCTION_DITHER )
979       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE;
980    else
981       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
982 
983    rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK *
984 			driQueryOptionf (&rmesa->radeon.optionCache,"texture_blend_quality");
985    rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0;
986 
987    rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW |
988 				     R200_BFACE_SOLID |
989 				     R200_FFACE_SOLID |
990 				     R200_FLAT_SHADE_VTX_LAST |
991 				     R200_DIFFUSE_SHADE_GOURAUD |
992 				     R200_ALPHA_SHADE_GOURAUD |
993 				     R200_SPECULAR_SHADE_GOURAUD |
994 				     R200_FOG_SHADE_GOURAUD |
995 				     R200_DISC_FOG_SHADE_GOURAUD |
996 				     R200_VTX_PIX_CENTER_OGL |
997 				     R200_ROUND_MODE_TRUNC |
998 				     R200_ROUND_PREC_8TH_PIX);
999 
1000    rmesa->hw.set.cmd[SET_RE_CNTL] = (R200_PERSPECTIVE_ENABLE |
1001 				     R200_SCISSOR_ENABLE);
1002 
1003    rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((1 << 16) | 0xffff);
1004 
1005    rmesa->hw.lin.cmd[LIN_RE_LINE_STATE] =
1006       ((0 << R200_LINE_CURRENT_PTR_SHIFT) |
1007        (1 << R200_LINE_CURRENT_COUNT_SHIFT));
1008 
1009    rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (1 << 4);
1010 
1011    rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] =
1012       ((0x00 << R200_STENCIL_REF_SHIFT) |
1013        (0xff << R200_STENCIL_MASK_SHIFT) |
1014        (0xff << R200_STENCIL_WRITEMASK_SHIFT));
1015 
1016    rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = R200_ROP_COPY;
1017    rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = 0xffffffff;
1018 
1019    rmesa->hw.tam.cmd[TAM_DEBUG3] = 0;
1020 
1021    rmesa->hw.msc.cmd[MSC_RE_MISC] =
1022       ((0 << R200_STIPPLE_X_OFFSET_SHIFT) |
1023        (0 << R200_STIPPLE_Y_OFFSET_SHIFT) |
1024        R200_STIPPLE_BIG_BIT_ORDER);
1025 
1026 
1027    rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
1028    rmesa->hw.cst.cmd[CST_RB3D_DEPTHXY_OFFSET] = 0;
1029    rmesa->hw.cst.cmd[CST_RE_AUX_SCISSOR_CNTL] = 0x0;
1030    rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] =
1031 #ifdef MESA_BIG_ENDIAN
1032 						R200_VC_32BIT_SWAP;
1033 #else
1034 						R200_VC_NO_SWAP;
1035 #endif
1036 
1037    if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
1038       /* Bypass TCL */
1039       rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] |= (1<<8);
1040    }
1041 
1042    rmesa->hw.cst.cmd[CST_RE_POINTSIZE] =
1043       (((GLuint)(ctx->Const.MaxPointSize * 16.0)) << R200_MAXPOINTSIZE_SHIFT) | 0x10;
1044    rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_0] =
1045       (0x0 << R200_VERTEX_POSITION_ADDR__SHIFT);
1046    rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_1] =
1047       (0x02 << R200_VTX_COLOR_0_ADDR__SHIFT) |
1048       (0x03 << R200_VTX_COLOR_1_ADDR__SHIFT);
1049    rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_2] =
1050       (0x06 << R200_VTX_TEX_0_ADDR__SHIFT) |
1051       (0x07 << R200_VTX_TEX_1_ADDR__SHIFT) |
1052       (0x08 << R200_VTX_TEX_2_ADDR__SHIFT) |
1053       (0x09 << R200_VTX_TEX_3_ADDR__SHIFT);
1054    rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_3] =
1055       (0x0A << R200_VTX_TEX_4_ADDR__SHIFT) |
1056       (0x0B << R200_VTX_TEX_5_ADDR__SHIFT);
1057 
1058 
1059    rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = 0x00000000;
1060    rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = 0x00000000;
1061    rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = 0x00000000;
1062    rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = 0x00000000;
1063    rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = 0x00000000;
1064    rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = 0x00000000;
1065 
1066    for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) {
1067       rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] = R200_BORDER_MODE_OGL;
1068       rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT] =
1069          ((i << R200_TXFORMAT_ST_ROUTE_SHIFT) |  /* <-- note i */
1070           (2 << R200_TXFORMAT_WIDTH_SHIFT) |
1071           (2 << R200_TXFORMAT_HEIGHT_SHIFT));
1072       rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
1073       rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
1074          (/* R200_TEXCOORD_PROJ | */
1075           R200_LOD_BIAS_CORRECTION);	/* Small default bias */
1076       rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] =
1077 	     rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
1078       rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0;
1079       rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0;
1080 
1081       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
1082       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] =
1083          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
1084       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] =
1085          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
1086       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] =
1087          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
1088       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] =
1089          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
1090       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] =
1091          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
1092 
1093       rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND] =
1094          (R200_TXC_ARG_A_ZERO |
1095           R200_TXC_ARG_B_ZERO |
1096           R200_TXC_ARG_C_DIFFUSE_COLOR |
1097           R200_TXC_OP_MADD);
1098 
1099       rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND2] =
1100          ((i << R200_TXC_TFACTOR_SEL_SHIFT) |
1101           R200_TXC_SCALE_1X |
1102           R200_TXC_CLAMP_0_1 |
1103           R200_TXC_OUTPUT_REG_R0);
1104 
1105       rmesa->hw.pix[i].cmd[PIX_PP_TXABLEND] =
1106          (R200_TXA_ARG_A_ZERO |
1107           R200_TXA_ARG_B_ZERO |
1108           R200_TXA_ARG_C_DIFFUSE_ALPHA |
1109           R200_TXA_OP_MADD);
1110 
1111       rmesa->hw.pix[i].cmd[PIX_PP_TXABLEND2] =
1112          ((i << R200_TXA_TFACTOR_SEL_SHIFT) |
1113           R200_TXA_SCALE_1X |
1114           R200_TXA_CLAMP_0_1 |
1115           R200_TXA_OUTPUT_REG_R0);
1116    }
1117 
1118    rmesa->hw.tf.cmd[TF_TFACTOR_0] = 0;
1119    rmesa->hw.tf.cmd[TF_TFACTOR_1] = 0;
1120    rmesa->hw.tf.cmd[TF_TFACTOR_2] = 0;
1121    rmesa->hw.tf.cmd[TF_TFACTOR_3] = 0;
1122    rmesa->hw.tf.cmd[TF_TFACTOR_4] = 0;
1123    rmesa->hw.tf.cmd[TF_TFACTOR_5] = 0;
1124 
1125    rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] =
1126       (R200_VAP_TCL_ENABLE |
1127        (0x9 << R200_VAP_VF_MAX_VTX_NUM__SHIFT));
1128 
1129    rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] =
1130       (R200_VPORT_X_SCALE_ENA |
1131        R200_VPORT_Y_SCALE_ENA |
1132        R200_VPORT_Z_SCALE_ENA |
1133        R200_VPORT_X_OFFSET_ENA |
1134        R200_VPORT_Y_OFFSET_ENA |
1135        R200_VPORT_Z_OFFSET_ENA |
1136 /* FIXME: Turn on for tex rect only */
1137        R200_VTX_ST_DENORMALIZED |
1138        R200_VTX_W0_FMT);
1139 
1140 
1141    rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = 0;
1142    rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = 0;
1143    rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] =
1144       ((R200_VTX_Z0 | R200_VTX_W0 |
1145        (R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)));
1146    rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] = 0;
1147    rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = (R200_OUTPUT_XYZW);
1148    rmesa->hw.vtx.cmd[VTX_STATE_CNTL] = R200_VSC_UPDATE_USER_COLOR_0_ENABLE;
1149 
1150 
1151    /* Matrix selection */
1152    rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_0] =
1153       (R200_MTX_MV << R200_MODELVIEW_0_SHIFT);
1154 
1155    rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_1] =
1156        (R200_MTX_IMV << R200_IT_MODELVIEW_0_SHIFT);
1157 
1158    rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_2] =
1159       (R200_MTX_MVP << R200_MODELPROJECT_0_SHIFT);
1160 
1161    rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_3] =
1162       ((R200_MTX_TEX0 << R200_TEXMAT_0_SHIFT) |
1163        (R200_MTX_TEX1 << R200_TEXMAT_1_SHIFT) |
1164        (R200_MTX_TEX2 << R200_TEXMAT_2_SHIFT) |
1165        (R200_MTX_TEX3 << R200_TEXMAT_3_SHIFT));
1166 
1167    rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_4] =
1168       ((R200_MTX_TEX4 << R200_TEXMAT_4_SHIFT) |
1169        (R200_MTX_TEX5 << R200_TEXMAT_5_SHIFT));
1170 
1171 
1172    /* General TCL state */
1173    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] =
1174       (R200_SPECULAR_LIGHTS |
1175        R200_DIFFUSE_SPECULAR_COMBINE |
1176        R200_LOCAL_LIGHT_VEC_GL |
1177        R200_LM0_SOURCE_MATERIAL_0 << R200_FRONT_SHININESS_SOURCE_SHIFT |
1178        R200_LM0_SOURCE_MATERIAL_1 << R200_BACK_SHININESS_SOURCE_SHIFT);
1179 
1180    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] =
1181       ((R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
1182        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
1183        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
1184        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT) |
1185        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_EMISSIVE_SOURCE_SHIFT) |
1186        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_AMBIENT_SOURCE_SHIFT) |
1187        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_DIFFUSE_SOURCE_SHIFT) |
1188        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_SPECULAR_SOURCE_SHIFT));
1189 
1190    rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_0] = 0; /* filled in via callbacks */
1191    rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_1] = 0;
1192    rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_2] = 0;
1193    rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_3] = 0;
1194 
1195    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] =
1196       (R200_UCP_IN_CLIP_SPACE |
1197        R200_CULL_FRONT_IS_CCW);
1198 
1199    /* Texgen/Texmat state */
1200    rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] = 0x00ffffff;
1201    rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_3] =
1202       ((0 << R200_TEXGEN_0_INPUT_TEX_SHIFT) |
1203        (1 << R200_TEXGEN_1_INPUT_TEX_SHIFT) |
1204        (2 << R200_TEXGEN_2_INPUT_TEX_SHIFT) |
1205        (3 << R200_TEXGEN_3_INPUT_TEX_SHIFT) |
1206        (4 << R200_TEXGEN_4_INPUT_TEX_SHIFT) |
1207        (5 << R200_TEXGEN_5_INPUT_TEX_SHIFT));
1208    rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = 0;
1209    rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] =
1210       ((0 << R200_TEXGEN_0_INPUT_SHIFT) |
1211        (1 << R200_TEXGEN_1_INPUT_SHIFT) |
1212        (2 << R200_TEXGEN_2_INPUT_SHIFT) |
1213        (3 << R200_TEXGEN_3_INPUT_SHIFT) |
1214        (4 << R200_TEXGEN_4_INPUT_SHIFT) |
1215        (5 << R200_TEXGEN_5_INPUT_SHIFT));
1216    rmesa->hw.tcg.cmd[TCG_TEX_CYL_WRAP_CTL] = 0;
1217 
1218 
1219    for (i = 0 ; i < 8; i++) {
1220       struct gl_light *l = &ctx->Light.Light[i];
1221       GLenum p = GL_LIGHT0 + i;
1222       *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX;
1223 
1224       ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient );
1225       ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse );
1226       ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular );
1227       ctx->Driver.Lightfv( ctx, p, GL_POSITION, NULL );
1228       ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, NULL );
1229       ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent );
1230       ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff );
1231       ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION,
1232 			   &l->ConstantAttenuation );
1233       ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION,
1234 			   &l->LinearAttenuation );
1235       ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION,
1236 			   &l->QuadraticAttenuation );
1237       *(float *)&(rmesa->hw.lit[i].cmd[LIT_ATTEN_XXX]) = 0.0;
1238    }
1239 
1240    ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT,
1241 			     ctx->Light.Model.Ambient );
1242 
1243    TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx );
1244 
1245    for (i = 0 ; i < 6; i++) {
1246       ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, NULL );
1247    }
1248 
1249    ctx->Driver.Fogfv( ctx, GL_FOG_MODE, NULL );
1250    ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
1251    ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
1252    ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
1253    ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
1254    ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL );
1255 
1256    rmesa->hw.grd.cmd[GRD_VERT_GUARD_CLIP_ADJ] = IEEE_ONE;
1257    rmesa->hw.grd.cmd[GRD_VERT_GUARD_DISCARD_ADJ] = IEEE_ONE;
1258    rmesa->hw.grd.cmd[GRD_HORZ_GUARD_CLIP_ADJ] = IEEE_ONE;
1259    rmesa->hw.grd.cmd[GRD_HORZ_GUARD_DISCARD_ADJ] = IEEE_ONE;
1260 
1261    rmesa->hw.eye.cmd[EYE_X] = 0;
1262    rmesa->hw.eye.cmd[EYE_Y] = 0;
1263    rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
1264    rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
1265 
1266    rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] =
1267       R200_PS_SE_SEL_STATE | R200_PS_MULT_CONST;
1268 
1269    /* ptp_eye is presumably used to calculate the attenuation wrt a different
1270       location? In any case, since point attenuation triggers _needeyecoords,
1271       it is constant. Probably ignored as long as R200_PS_USE_MODEL_EYE_VEC
1272       isn't set */
1273    rmesa->hw.ptp.cmd[PTP_EYE_X] = 0;
1274    rmesa->hw.ptp.cmd[PTP_EYE_Y] = 0;
1275    rmesa->hw.ptp.cmd[PTP_EYE_Z] = IEEE_ONE | 0x80000000; /* -1.0 */
1276    rmesa->hw.ptp.cmd[PTP_EYE_3] = 0;
1277    /* no idea what the ptp_vport_scale values are good for, except the
1278       PTSIZE one - hopefully doesn't matter */
1279    rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_0] = IEEE_ONE;
1280    rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_1] = IEEE_ONE;
1281    rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_PTSIZE] = IEEE_ONE;
1282    rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_3] = IEEE_ONE;
1283    rmesa->hw.ptp.cmd[PTP_ATT_CONST_QUAD] = 0;
1284    rmesa->hw.ptp.cmd[PTP_ATT_CONST_LIN] = 0;
1285    rmesa->hw.ptp.cmd[PTP_ATT_CONST_CON] = IEEE_ONE;
1286    rmesa->hw.ptp.cmd[PTP_ATT_CONST_3] = 0;
1287    rmesa->hw.ptp.cmd[PTP_CLAMP_MIN] = IEEE_ONE;
1288    rmesa->hw.ptp.cmd[PTP_CLAMP_MAX] = 0x44ffe000; /* 2047 */
1289    rmesa->hw.ptp.cmd[PTP_CLAMP_2] = 0;
1290    rmesa->hw.ptp.cmd[PTP_CLAMP_3] = 0;
1291 
1292    r200LightingSpaceChange( ctx );
1293 
1294    radeon_init_query_stateobj(&rmesa->radeon, R200_QUERYOBJ_CMDSIZE);
1295    rmesa->radeon.query.queryobj.cmd[R200_QUERYOBJ_CMD_0] = CP_PACKET0(RADEON_RB3D_ZPASS_DATA, 0);
1296    rmesa->radeon.query.queryobj.cmd[R200_QUERYOBJ_DATA_0] = 0;
1297 
1298    rmesa->radeon.hw.all_dirty = GL_TRUE;
1299 
1300    rcommonInitCmdBuf(&rmesa->radeon);
1301 }
1302