1 /**************************************************************************
2 
3 Copyright (C) 2005 Aapo Tahkola.
4 
5 All Rights Reserved.
6 
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26 **************************************************************************/
27 
28 /*
29  * Authors:
30  *   Aapo Tahkola <aet@rasterburn.org>
31  *   Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32  */
33 
34 #include "main/errors.h"
35 #include "main/glheader.h"
36 #include "main/macros.h"
37 #include "main/enums.h"
38 #include "program/program.h"
39 #include "program/prog_instruction.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_statevars.h"
42 #include "program/programopt.h"
43 #include "tnl/tnl.h"
44 
45 #include "r200_context.h"
46 #include "r200_vertprog.h"
47 #include "r200_ioctl.h"
48 #include "r200_tcl.h"
49 
50 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
51     SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
52     SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
53     SWIZZLE_W != VSF_IN_COMPONENT_W || \
54     SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
55     SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
56     WRITEMASK_X != VSF_FLAG_X || \
57     WRITEMASK_Y != VSF_FLAG_Y || \
58     WRITEMASK_Z != VSF_FLAG_Z || \
59     WRITEMASK_W != VSF_FLAG_W
60 #error Cannot change these!
61 #endif
62 
63 #define SCALAR_FLAG (1<<31)
64 #define FLAG_MASK (1<<31)
65 #define OP_MASK (0xf)  /* we are unlikely to have more than 15 */
66 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
67 
68 static struct{
69    char *name;
70    int opcode;
71    unsigned long ip; /* number of input operands and flags */
72 }op_names[]={
73    OPN(ABS, 1),
74    OPN(ADD, 2),
75    OPN(ARL, 1|SCALAR_FLAG),
76    OPN(DP3, 2),
77    OPN(DP4, 2),
78    OPN(DPH, 2),
79    OPN(DST, 2),
80    OPN(EX2, 1|SCALAR_FLAG),
81    OPN(EXP, 1|SCALAR_FLAG),
82    OPN(FLR, 1),
83    OPN(FRC, 1),
84    OPN(LG2, 1|SCALAR_FLAG),
85    OPN(LIT, 1),
86    OPN(LOG, 1|SCALAR_FLAG),
87    OPN(MAD, 3),
88    OPN(MAX, 2),
89    OPN(MIN, 2),
90    OPN(MOV, 1),
91    OPN(MUL, 2),
92    OPN(POW, 2|SCALAR_FLAG),
93    OPN(RCP, 1|SCALAR_FLAG),
94    OPN(RSQ, 1|SCALAR_FLAG),
95    OPN(SGE, 2),
96    OPN(SLT, 2),
97    OPN(SUB, 2),
98    OPN(SWZ, 1),
99    OPN(XPD, 2),
100    OPN(END, 0),
101 };
102 #undef OPN
103 
r200VertexProgUpdateParams(struct gl_context * ctx,struct r200_vertex_program * vp)104 static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_vertex_program *vp)
105 {
106    r200ContextPtr rmesa = R200_CONTEXT( ctx );
107    GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
108    int pi;
109    struct gl_program *mesa_vp = &vp->mesa_program;
110    struct gl_program_parameter_list *paramList;
111    drm_radeon_cmd_header_t tmp;
112 
113    R200_STATECHANGE( rmesa, vpp[0] );
114    R200_STATECHANGE( rmesa, vpp[1] );
115    assert(mesa_vp->Parameters);
116    _mesa_load_state_parameters(ctx, mesa_vp->Parameters);
117    paramList = mesa_vp->Parameters;
118 
119    if(paramList->NumParameters > R200_VSF_MAX_PARAM){
120       fprintf(stderr, "%s:Params exhausted\n", __func__);
121       return GL_FALSE;
122    }
123 
124    for(pi = 0; pi < paramList->NumParameters; pi++) {
125       unsigned pvo = paramList->ParameterValueOffset[pi];
126 
127       switch(paramList->Parameters[pi].Type) {
128       case PROGRAM_STATE_VAR:
129       //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
130       case PROGRAM_CONSTANT:
131 	 *fcmd++ = paramList->ParameterValues[pvo + 0].f;
132 	 *fcmd++ = paramList->ParameterValues[pvo + 1].f;
133 	 *fcmd++ = paramList->ParameterValues[pvo + 2].f;
134 	 *fcmd++ = paramList->ParameterValues[pvo + 3].f;
135 	 break;
136       default:
137 	 _mesa_problem(NULL, "Bad param type in %s", __func__);
138 	 break;
139       }
140       if (pi == 95) {
141 	 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
142       }
143    }
144    /* hack up the cmd_size so not the whole state atom is emitted always. */
145    rmesa->hw.vpp[0].cmd_size =
146       1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
147    tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
148    tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
149    rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
150    if (paramList->NumParameters > 96) {
151       rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
152       tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
153       tmp.veclinear.count = paramList->NumParameters - 96;
154       rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
155    }
156    return GL_TRUE;
157 }
158 
t_dst_mask(GLuint mask)159 static inline unsigned long t_dst_mask(GLuint mask)
160 {
161    /* WRITEMASK_* is equivalent to VSF_FLAG_* */
162    return mask & VSF_FLAG_ALL;
163 }
164 
t_dst(struct prog_dst_register * dst)165 static unsigned long t_dst(struct prog_dst_register *dst)
166 {
167    switch(dst->File) {
168    case PROGRAM_TEMPORARY:
169       return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
170 	 | R200_VSF_OUT_CLASS_TMP);
171    case PROGRAM_OUTPUT:
172       switch (dst->Index) {
173       case VARYING_SLOT_POS:
174 	 return R200_VSF_OUT_CLASS_RESULT_POS;
175       case VARYING_SLOT_COL0:
176 	 return R200_VSF_OUT_CLASS_RESULT_COLOR;
177       case VARYING_SLOT_COL1:
178 	 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
179 	    | R200_VSF_OUT_CLASS_RESULT_COLOR);
180       case VARYING_SLOT_FOGC:
181 	 return R200_VSF_OUT_CLASS_RESULT_FOGC;
182       case VARYING_SLOT_TEX0:
183       case VARYING_SLOT_TEX1:
184       case VARYING_SLOT_TEX2:
185       case VARYING_SLOT_TEX3:
186       case VARYING_SLOT_TEX4:
187       case VARYING_SLOT_TEX5:
188 	 return (((dst->Index - VARYING_SLOT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
189 	    | R200_VSF_OUT_CLASS_RESULT_TEXC);
190       case VARYING_SLOT_PSIZ:
191 	 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
192       default:
193 	 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __func__, dst->Index);
194 	 exit(0);
195 	 return 0;
196       }
197    case PROGRAM_ADDRESS:
198       assert (dst->Index == 0);
199       return R200_VSF_OUT_CLASS_ADDR;
200    default:
201       fprintf(stderr, "problem in %s, unknown register type %d\n", __func__, dst->File);
202       exit(0);
203       return 0;
204    }
205 }
206 
t_src_class(gl_register_file file)207 static unsigned long t_src_class(gl_register_file file)
208 {
209 
210    switch(file){
211    case PROGRAM_TEMPORARY:
212       return VSF_IN_CLASS_TMP;
213 
214    case PROGRAM_INPUT:
215       return VSF_IN_CLASS_ATTR;
216 
217    case PROGRAM_CONSTANT:
218    case PROGRAM_STATE_VAR:
219       return VSF_IN_CLASS_PARAM;
220    /*
221    case PROGRAM_OUTPUT:
222    case PROGRAM_ADDRESS:
223    */
224    default:
225       fprintf(stderr, "problem in %s", __func__);
226       exit(0);
227    }
228 }
229 
t_swizzle(GLubyte swizzle)230 static inline unsigned long t_swizzle(GLubyte swizzle)
231 {
232 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
233    return swizzle;
234 }
235 
236 #if 0
237 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
238 {
239    int i;
240 
241    if(vp == NULL){
242       fprintf(stderr, "vp null in call to %s from %s\n", __func__, caller);
243       return ;
244    }
245 
246    fprintf(stderr, "%s:<", caller);
247    for(i=0; i < VERT_ATTRIB_MAX; i++)
248    fprintf(stderr, "%d ", vp->inputs[i]);
249    fprintf(stderr, ">\n");
250 
251 }
252 #endif
253 
t_src_index(struct r200_vertex_program * vp,struct prog_src_register * src)254 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
255 {
256 /*
257    int i;
258    int max_reg = -1;
259 */
260    if(src->File == PROGRAM_INPUT){
261 /*      if(vp->inputs[src->Index] != -1)
262 	 return vp->inputs[src->Index];
263 
264       for(i=0; i < VERT_ATTRIB_MAX; i++)
265 	 if(vp->inputs[i] > max_reg)
266 	    max_reg = vp->inputs[i];
267 
268       vp->inputs[src->Index] = max_reg+1;*/
269 
270       //vp_dump_inputs(vp, __func__);
271       assert(vp->inputs[src->Index] != -1);
272       return vp->inputs[src->Index];
273    } else {
274       if (src->Index < 0) {
275 	 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
276 	 return 0;
277       }
278       return src->Index;
279    }
280 }
281 
t_src(struct r200_vertex_program * vp,struct prog_src_register * src)282 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
283 {
284 
285    return MAKE_VSF_SOURCE(t_src_index(vp, src),
286 			t_swizzle(GET_SWZ(src->Swizzle, 0)),
287 			t_swizzle(GET_SWZ(src->Swizzle, 1)),
288 			t_swizzle(GET_SWZ(src->Swizzle, 2)),
289 			t_swizzle(GET_SWZ(src->Swizzle, 3)),
290 			t_src_class(src->File),
291 			src->Negate) | (src->RelAddr << 4);
292 }
293 
t_src_scalar(struct r200_vertex_program * vp,struct prog_src_register * src)294 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
295 {
296 
297    return MAKE_VSF_SOURCE(t_src_index(vp, src),
298 			t_swizzle(GET_SWZ(src->Swizzle, 0)),
299 			t_swizzle(GET_SWZ(src->Swizzle, 0)),
300 			t_swizzle(GET_SWZ(src->Swizzle, 0)),
301 			t_swizzle(GET_SWZ(src->Swizzle, 0)),
302 			t_src_class(src->File),
303 			src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
304 }
305 
t_opcode(enum prog_opcode opcode)306 static unsigned long t_opcode(enum prog_opcode opcode)
307 {
308 
309    switch(opcode){
310    case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
311    /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
312     * seems to ignore neg offsets which isn't quite correct...
313     */
314    case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
315    case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
316    case OPCODE_DST: return R200_VPI_OUT_OP_DST;
317    case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
318    case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
319    case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
320    case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
321    case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
322    case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
323    case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
324    case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
325    case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
326    case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
327    case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
328    case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
329    case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
330 
331    default:
332       fprintf(stderr, "%s: Should not be called with opcode %d!", __func__, opcode);
333    }
334    exit(-1);
335    return 0;
336 }
337 
op_operands(enum prog_opcode opcode)338 static unsigned long op_operands(enum prog_opcode opcode)
339 {
340    int i;
341 
342    /* Can we trust mesas opcodes to be in order ? */
343    for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
344       if(op_names[i].opcode == opcode)
345 	 return op_names[i].ip;
346 
347    fprintf(stderr, "op %d not found in op_names\n", opcode);
348    exit(-1);
349    return 0;
350 }
351 
352 /* TODO: Get rid of t_src_class call */
353 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
354 		       ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
355 			 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
356 			(t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
357 			 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))
358 
359 /* fglrx on rv250 codes up unused sources as follows:
360    unused but necessary sources are same as previous source, zero-ed out.
361    unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
362    i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
363    set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
364 
365 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
366    Those are NOT semantically equivalent to the r300 ones, requires code changes */
367 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
368 				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
369 				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
370 				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
371 				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
372 
373 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
374 				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
375 				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
376 				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
377 				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
378 
379 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
380 				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
381 				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
382 				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
383 				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
384 
385 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
386 
387 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
388 
389 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
390 
391 
392 /**
393  * Generate an R200 vertex program from Mesa's internal representation.
394  *
395  * \return  GL_TRUE for success, GL_FALSE for failure.
396  */
r200_translate_vertex_program(struct gl_context * ctx,struct r200_vertex_program * vp)397 static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
398 {
399    struct gl_program *mesa_vp = &vp->mesa_program;
400    struct prog_instruction *vpi;
401    int i;
402    VERTEX_SHADER_INSTRUCTION *o_inst;
403    unsigned long operands;
404    int are_srcs_scalar;
405    unsigned long hw_op;
406    int dofogfix = 0;
407    int fog_temp_i = 0;
408    int free_inputs;
409    int array_count = 0;
410    int u_temp_used;
411 
412    vp->native = GL_FALSE;
413    vp->translated = GL_TRUE;
414    vp->fogmode = ctx->Fog.Mode;
415 
416    if (mesa_vp->arb.NumInstructions == 0)
417       return GL_FALSE;
418 
419 #if 0
420    if ((mesa_vp->info.inputs_read &
421       ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
422       VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
423       VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
424       if (R200_DEBUG & RADEON_FALLBACKS) {
425 	 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
426 	    mesa_vp->info.inputs_read);
427       }
428       return GL_FALSE;
429    }
430 #endif
431 
432    if ((mesa_vp->info.outputs_written &
433       ~((1 << VARYING_SLOT_POS) | (1 << VARYING_SLOT_COL0) | (1 << VARYING_SLOT_COL1) |
434       (1 << VARYING_SLOT_FOGC) | (1 << VARYING_SLOT_TEX0) | (1 << VARYING_SLOT_TEX1) |
435       (1 << VARYING_SLOT_TEX2) | (1 << VARYING_SLOT_TEX3) | (1 << VARYING_SLOT_TEX4) |
436       (1 << VARYING_SLOT_TEX5) | (1 << VARYING_SLOT_PSIZ))) != 0) {
437       if (R200_DEBUG & RADEON_FALLBACKS) {
438 	 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
439                  (unsigned long long) mesa_vp->info.outputs_written);
440       }
441       return GL_FALSE;
442    }
443 
444    /* Initial value should be last tmp reg that hw supports.
445       Strangely enough r300 doesnt mind even though these would be out of range.
446       Smart enough to realize that it doesnt need it? */
447    int u_temp_i = R200_VSF_MAX_TEMPS - 1;
448    struct prog_src_register src[3];
449    struct prog_dst_register dst;
450 
451 /* FIXME: is changing the prog safe to do here? */
452    if (mesa_vp->arb.IsPositionInvariant &&
453       /* make sure we only do this once */
454        !(mesa_vp->info.outputs_written & (1 << VARYING_SLOT_POS))) {
455 	 _mesa_insert_mvp_code(ctx, mesa_vp);
456       }
457 
458    /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
459       base e isn't directly available neither. */
460    if ((mesa_vp->info.outputs_written & (1 << VARYING_SLOT_FOGC)) &&
461        !vp->fogpidx) {
462       struct gl_program_parameter_list *paramList;
463       gl_state_index16 tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
464       paramList = mesa_vp->Parameters;
465       vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
466    }
467 
468    vp->pos_end = 0;
469    mesa_vp->arb.NumNativeInstructions = 0;
470    if (mesa_vp->Parameters)
471       mesa_vp->arb.NumNativeParameters = mesa_vp->Parameters->NumParameters;
472    else
473       mesa_vp->arb.NumNativeParameters = 0;
474 
475    for(i = 0; i < VERT_ATTRIB_MAX; i++)
476       vp->inputs[i] = -1;
477    for(i = 0; i < 15; i++)
478       vp->inputmap_rev[i] = 255;
479    free_inputs = 0x2ffd;
480 
481 /* fglrx uses fixed inputs as follows for conventional attribs.
482    generic attribs use non-fixed assignment, fglrx will always use the
483    lowest attrib values available. We'll just do the same.
484    There are 12 generic attribs possible, corresponding to attrib 0, 2-11
485    and 13 in a hw vertex prog.
486    attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
487    (correspond to vertex normal/weight - maybe weight actually could be made vec4).
488    Additionally, not more than 12 arrays in total are possible I think.
489    attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
490    attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
491    attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
492    attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
493 */
494 
495 /* attr 4,5 and 13 are only used with generic attribs.
496    Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
497    not possibe to use with vertex progs as it is lacking in vert prog specification) */
498 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
499    if (mesa_vp->info.inputs_read & VERT_BIT_POS) {
500       vp->inputs[VERT_ATTRIB_POS] = 0;
501       vp->inputmap_rev[0] = VERT_ATTRIB_POS;
502       free_inputs &= ~(1 << 0);
503       array_count++;
504    }
505    if (mesa_vp->info.inputs_read & VERT_BIT_NORMAL) {
506       vp->inputs[VERT_ATTRIB_NORMAL] = 1;
507       vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
508       array_count++;
509    }
510    if (mesa_vp->info.inputs_read & VERT_BIT_COLOR0) {
511       vp->inputs[VERT_ATTRIB_COLOR0] = 2;
512       vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
513       free_inputs &= ~(1 << 2);
514       array_count++;
515    }
516    if (mesa_vp->info.inputs_read & VERT_BIT_COLOR1) {
517       vp->inputs[VERT_ATTRIB_COLOR1] = 3;
518       vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
519       free_inputs &= ~(1 << 3);
520       array_count++;
521    }
522    if (mesa_vp->info.inputs_read & VERT_BIT_FOG) {
523       vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
524       vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
525       array_count++;
526    }
527    /* VERT_ATTRIB_TEX0-5 */
528    for (i = 0; i <= 5; i++) {
529       if (mesa_vp->info.inputs_read & VERT_BIT_TEX(i)) {
530 	 vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
531 	 vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
532 	 free_inputs &= ~(1 << (i + 6));
533 	 array_count++;
534       }
535    }
536    /* using VERT_ATTRIB_TEX6/7 would be illegal */
537    for (; i < VERT_ATTRIB_TEX_MAX; i++) {
538       if (mesa_vp->info.inputs_read & VERT_BIT_TEX(i)) {
539           if (R200_DEBUG & RADEON_FALLBACKS) {
540               fprintf(stderr, "texture attribute %d in vert prog\n", i);
541           }
542           return GL_FALSE;
543       }
544    }
545    /* completely ignore aliasing? */
546    for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
547       int j;
548    /* completely ignore aliasing? */
549       if (mesa_vp->info.inputs_read & VERT_BIT_GENERIC(i)) {
550 	 array_count++;
551 	 if (array_count > 12) {
552 	    if (R200_DEBUG & RADEON_FALLBACKS) {
553 	       fprintf(stderr, "more than 12 attribs used in vert prog\n");
554 	    }
555 	    return GL_FALSE;
556 	 }
557 	 for (j = 0; j < 14; j++) {
558 	    /* will always find one due to limited array_count */
559 	    if (free_inputs & (1 << j)) {
560 	       free_inputs &= ~(1 << j);
561 	       vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
562 	       if (j == 0) {
563                   /* mapped to pos */
564                   vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
565 	       } else if (j < 12) {
566                   /* mapped to col/tex */
567                   vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
568 	       } else {
569                   /* mapped to pos1 */
570                   vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
571                }
572 	       break;
573 	    }
574 	 }
575       }
576    }
577 
578    if (!(mesa_vp->info.outputs_written & (1 << VARYING_SLOT_POS))) {
579       if (R200_DEBUG & RADEON_FALLBACKS) {
580 	 fprintf(stderr, "can't handle vert prog without position output\n");
581       }
582       return GL_FALSE;
583    }
584    if (free_inputs & 1) {
585       if (R200_DEBUG & RADEON_FALLBACKS) {
586 	 fprintf(stderr, "can't handle vert prog without position input\n");
587       }
588       return GL_FALSE;
589    }
590 
591    o_inst = vp->instr;
592    for (vpi = mesa_vp->arb.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
593       operands = op_operands(vpi->Opcode);
594       are_srcs_scalar = operands & SCALAR_FLAG;
595       operands &= OP_MASK;
596 
597       for(i = 0; i < operands; i++) {
598 	 src[i] = vpi->SrcReg[i];
599 	 /* hack up default attrib values as per spec as swizzling.
600 	    normal, fog, secondary color. Crazy?
601 	    May need more if we don't submit vec4 elements? */
602 	 if (src[i].File == PROGRAM_INPUT) {
603 	    if (src[i].Index == VERT_ATTRIB_NORMAL) {
604 	       int j;
605 	       for (j = 0; j < 4; j++) {
606 		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
607 		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
608 		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
609 		  }
610 	       }
611 	    }
612 	    else if (src[i].Index == VERT_ATTRIB_COLOR1) {
613 	       int j;
614 	       for (j = 0; j < 4; j++) {
615 		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
616 		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
617 		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
618 		  }
619 	       }
620 	    }
621 	    else if (src[i].Index == VERT_ATTRIB_FOG) {
622 	       int j;
623 	       for (j = 0; j < 4; j++) {
624 		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
625 		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
626 		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
627 		  }
628 		  else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
629 			    GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
630 		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
631 		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
632 		  }
633 	       }
634 	    }
635 	 }
636       }
637 
638       if(operands == 3){
639 	 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
640 	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
641 		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
642 		VSF_FLAG_ALL);
643 
644 	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
645 		  SWIZZLE_X, SWIZZLE_Y,
646 		  SWIZZLE_Z, SWIZZLE_W,
647 		  t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
648 
649 	    o_inst->src1 = ZERO_SRC_0;
650 	    o_inst->src2 = UNUSED_SRC_1;
651 	    o_inst++;
652 
653 	    src[2].File = PROGRAM_TEMPORARY;
654 	    src[2].Index = u_temp_i;
655 	    src[2].RelAddr = 0;
656 	    u_temp_i--;
657 	 }
658       }
659 
660       if(operands >= 2){
661 	 if( CMP_SRCS(src[1], src[0]) ){
662 	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
663 		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
664 		VSF_FLAG_ALL);
665 
666 	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
667 		  SWIZZLE_X, SWIZZLE_Y,
668 		  SWIZZLE_Z, SWIZZLE_W,
669 		  t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
670 
671 	    o_inst->src1 = ZERO_SRC_0;
672 	    o_inst->src2 = UNUSED_SRC_1;
673 	    o_inst++;
674 
675 	    src[0].File = PROGRAM_TEMPORARY;
676 	    src[0].Index = u_temp_i;
677 	    src[0].RelAddr = 0;
678 	    u_temp_i--;
679 	 }
680       }
681 
682       dst = vpi->DstReg;
683       if (dst.File == PROGRAM_OUTPUT &&
684 	  dst.Index == VARYING_SLOT_FOGC &&
685 	  dst.WriteMask & WRITEMASK_X) {
686 	  fog_temp_i = u_temp_i;
687 	  dst.File = PROGRAM_TEMPORARY;
688 	  dst.Index = fog_temp_i;
689 	  dofogfix = 1;
690 	  u_temp_i--;
691       }
692 
693       /* These ops need special handling. */
694       switch(vpi->Opcode){
695       case OPCODE_POW:
696 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
697    So may need to insert additional instruction */
698 	 if ((src[0].File == src[1].File) &&
699 	     (src[0].Index == src[1].Index)) {
700 	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
701 		   t_dst_mask(dst.WriteMask));
702 	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
703 		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
704 		   SWIZZLE_ZERO,
705 		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
706 		   SWIZZLE_ZERO,
707 		   t_src_class(src[0].File),
708 		   src[0].Negate) | (src[0].RelAddr << 4);
709 	    o_inst->src1 = UNUSED_SRC_0;
710 	    o_inst->src2 = UNUSED_SRC_0;
711 	 }
712 	 else {
713 	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
714 		   (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
715 		   VSF_FLAG_ALL);
716 	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
717 		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
718 		   SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
719 		   t_src_class(src[0].File),
720 		   src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
721 	    o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
722 		   SWIZZLE_ZERO, SWIZZLE_ZERO,
723 		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
724 		   t_src_class(src[1].File),
725 		   src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
726 	    o_inst->src2 = UNUSED_SRC_1;
727 	    o_inst++;
728 
729 	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
730 		   t_dst_mask(dst.WriteMask));
731 	    o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
732 		   VSF_IN_COMPONENT_X,
733 		   VSF_IN_COMPONENT_Y,
734 		   VSF_IN_COMPONENT_Z,
735 		   VSF_IN_COMPONENT_W,
736 		   VSF_IN_CLASS_TMP,
737 		   VSF_FLAG_NONE);
738 	    o_inst->src1 = UNUSED_SRC_0;
739 	    o_inst->src2 = UNUSED_SRC_0;
740 	    u_temp_i--;
741 	 }
742 	 goto next;
743 
744       case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
745       case OPCODE_SWZ:
746 	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
747 		t_dst_mask(dst.WriteMask));
748 	 o_inst->src0 = t_src(vp, &src[0]);
749 	 o_inst->src1 = ZERO_SRC_0;
750 	 o_inst->src2 = UNUSED_SRC_1;
751 	 goto next;
752 
753       case OPCODE_MAD:
754 	 /* only 2 read ports into temp memory thus may need the macro op MAD_2
755 	    instead (requiring 2 clocks) if all inputs are in temp memory
756 	    (and, only if they actually reference 3 distinct temps) */
757 	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
758 	    src[1].File == PROGRAM_TEMPORARY &&
759 	    src[2].File == PROGRAM_TEMPORARY &&
760 	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
761 	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
762 	    (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
763 	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
764 
765 	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
766 	    t_dst_mask(dst.WriteMask));
767 	 o_inst->src0 = t_src(vp, &src[0]);
768 #if 0
769 if ((o_inst - vp->instr) == 31) {
770 /* fix up the broken vertex program of quake4 demo... */
771 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
772 			SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
773 			t_src_class(src[1].File),
774 			src[1].Negate) | (src[1].RelAddr << 4);
775 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
776 			SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
777 			t_src_class(src[1].File),
778 			src[1].Negate) | (src[1].RelAddr << 4);
779 }
780 else {
781 	 o_inst->src1 = t_src(vp, &src[1]);
782 	 o_inst->src2 = t_src(vp, &src[2]);
783 }
784 #else
785 	 o_inst->src1 = t_src(vp, &src[1]);
786 	 o_inst->src2 = t_src(vp, &src[2]);
787 #endif
788 	 goto next;
789 
790       case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
791 	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
792 		t_dst_mask(dst.WriteMask));
793 
794 	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
795 		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
796 		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
797 		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
798 		SWIZZLE_ZERO,
799 		t_src_class(src[0].File),
800 		src[0].Negate) | (src[0].RelAddr << 4);
801 
802 	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
803 		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
804 		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
805 		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
806 		SWIZZLE_ZERO,
807 		t_src_class(src[1].File),
808 		src[1].Negate) | (src[1].RelAddr << 4);
809 
810 	 o_inst->src2 = UNUSED_SRC_1;
811 	 goto next;
812 
813       case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
814 	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
815 		t_dst_mask(dst.WriteMask));
816 
817 	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
818 		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
819 		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
820 		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
821 		VSF_IN_COMPONENT_ONE,
822 		t_src_class(src[0].File),
823 		src[0].Negate) | (src[0].RelAddr << 4);
824 	 o_inst->src1 = t_src(vp, &src[1]);
825 	 o_inst->src2 = UNUSED_SRC_1;
826 	 goto next;
827 
828       case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
829 	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
830 		t_dst_mask(dst.WriteMask));
831 
832 	 o_inst->src0 = t_src(vp, &src[0]);
833 	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
834 		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
835 		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
836 		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
837 		t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
838 		t_src_class(src[1].File),
839 		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
840 	 o_inst->src2 = UNUSED_SRC_1;
841 	 goto next;
842 
843       case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
844 	 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
845 		t_dst_mask(dst.WriteMask));
846 
847 	 o_inst->src0=t_src(vp, &src[0]);
848 	 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
849 		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
850 		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
851 		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
852 		t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
853 		t_src_class(src[0].File),
854 		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
855 	 o_inst->src2 = UNUSED_SRC_1;
856 	 goto next;
857 
858       case OPCODE_FLR:
859       /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
860          ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
861 
862 	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
863 	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
864 	    t_dst_mask(dst.WriteMask));
865 
866 	 o_inst->src0 = t_src(vp, &src[0]);
867 	 o_inst->src1 = UNUSED_SRC_0;
868 	 o_inst->src2 = UNUSED_SRC_1;
869 	 o_inst++;
870 
871 	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
872 		t_dst_mask(dst.WriteMask));
873 
874 	 o_inst->src0 = t_src(vp, &src[0]);
875 	 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
876 		VSF_IN_COMPONENT_X,
877 		VSF_IN_COMPONENT_Y,
878 		VSF_IN_COMPONENT_Z,
879 		VSF_IN_COMPONENT_W,
880 		VSF_IN_CLASS_TMP,
881 		/* Not 100% sure about this */
882 		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
883 
884 	 o_inst->src2 = UNUSED_SRC_0;
885 	 u_temp_i--;
886 	 goto next;
887 
888       case OPCODE_XPD:
889 	 /* mul r0, r1.yzxw, r2.zxyw
890 	    mad r0, -r2.yzxw, r1.zxyw, r0
891 	  */
892 	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
893 	    src[1].File == PROGRAM_TEMPORARY &&
894 	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
895 	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
896 
897 	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
898 	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
899 	    t_dst_mask(dst.WriteMask));
900 
901 	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
902 		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
903 		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
904 		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
905 		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
906 		t_src_class(src[0].File),
907 		src[0].Negate) | (src[0].RelAddr << 4);
908 
909 	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
910 		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
911 		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
912 		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
913 		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
914 		t_src_class(src[1].File),
915 		src[1].Negate) | (src[1].RelAddr << 4);
916 
917 	 o_inst->src2 = UNUSED_SRC_1;
918 	 o_inst++;
919 	 u_temp_i--;
920 
921 	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
922 		t_dst_mask(dst.WriteMask));
923 
924 	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
925 		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
926 		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
927 		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
928 		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
929 		t_src_class(src[1].File),
930 		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
931 
932 	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
933 		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
934 		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
935 		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
936 		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
937 		t_src_class(src[0].File),
938 		src[0].Negate) | (src[0].RelAddr << 4);
939 
940 	 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
941 		VSF_IN_COMPONENT_X,
942 		VSF_IN_COMPONENT_Y,
943 		VSF_IN_COMPONENT_Z,
944 		VSF_IN_COMPONENT_W,
945 		VSF_IN_CLASS_TMP,
946 		VSF_FLAG_NONE);
947 	 goto next;
948 
949       case OPCODE_END:
950 	 assert(0);
951       default:
952 	 break;
953       }
954 
955       o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
956 	    t_dst_mask(dst.WriteMask));
957 
958       if(are_srcs_scalar){
959 	 switch(operands){
960 	    case 1:
961 		o_inst->src0 = t_src_scalar(vp, &src[0]);
962 		o_inst->src1 = UNUSED_SRC_0;
963 		o_inst->src2 = UNUSED_SRC_1;
964 	    break;
965 
966 	    case 2:
967 		o_inst->src0 = t_src_scalar(vp, &src[0]);
968 		o_inst->src1 = t_src_scalar(vp, &src[1]);
969 		o_inst->src2 = UNUSED_SRC_1;
970 	    break;
971 
972 	    case 3:
973 		o_inst->src0 = t_src_scalar(vp, &src[0]);
974 		o_inst->src1 = t_src_scalar(vp, &src[1]);
975 		o_inst->src2 = t_src_scalar(vp, &src[2]);
976 	    break;
977 
978 	    default:
979 		fprintf(stderr, "illegal number of operands %lu\n", operands);
980 		exit(-1);
981 	    break;
982 	 }
983       } else {
984 	 switch(operands){
985 	    case 1:
986 		o_inst->src0 = t_src(vp, &src[0]);
987 		o_inst->src1 = UNUSED_SRC_0;
988 		o_inst->src2 = UNUSED_SRC_1;
989 	    break;
990 
991 	    case 2:
992 		o_inst->src0 = t_src(vp, &src[0]);
993 		o_inst->src1 = t_src(vp, &src[1]);
994 		o_inst->src2 = UNUSED_SRC_1;
995 	    break;
996 
997 	    case 3:
998 		o_inst->src0 = t_src(vp, &src[0]);
999 		o_inst->src1 = t_src(vp, &src[1]);
1000 		o_inst->src2 = t_src(vp, &src[2]);
1001 	    break;
1002 
1003 	    default:
1004 		fprintf(stderr, "illegal number of operands %lu\n", operands);
1005 		exit(-1);
1006 	    break;
1007 	 }
1008       }
1009       next:
1010 
1011       if (dofogfix) {
1012 	 o_inst++;
1013 	 if (vp->fogmode == GL_EXP) {
1014 	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1015 		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1016 		VSF_FLAG_X);
1017 	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1018 	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1019 	    o_inst->src2 = UNUSED_SRC_1;
1020 	    o_inst++;
1021 	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1022 		R200_VSF_OUT_CLASS_RESULT_FOGC,
1023 		VSF_FLAG_X);
1024 	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1025 	    o_inst->src1 = UNUSED_SRC_0;
1026 	    o_inst->src2 = UNUSED_SRC_1;
1027 	 }
1028 	 else if (vp->fogmode == GL_EXP2) {
1029 	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1030 		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1031 		VSF_FLAG_X);
1032 	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1033 	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1034 	    o_inst->src2 = UNUSED_SRC_1;
1035 	    o_inst++;
1036 	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1037 		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1038 		VSF_FLAG_X);
1039 	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1040 	    o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1041 	    o_inst->src2 = UNUSED_SRC_1;
1042 	    o_inst++;
1043 	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1044 		R200_VSF_OUT_CLASS_RESULT_FOGC,
1045 		VSF_FLAG_X);
1046 	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1047 	    o_inst->src1 = UNUSED_SRC_0;
1048 	    o_inst->src2 = UNUSED_SRC_1;
1049 	 }
1050 	 else { /* fogmode == GL_LINEAR */
1051 		/* could do that with single op (dot) if using params like
1052 		   with fixed function pipeline fog */
1053 	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1054 		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1055 		VSF_FLAG_X);
1056 	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1057 	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1058 	    o_inst->src2 = UNUSED_SRC_1;
1059 	    o_inst++;
1060 	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1061 		R200_VSF_OUT_CLASS_RESULT_FOGC,
1062 		VSF_FLAG_X);
1063 	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1064 	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1065 	    o_inst->src2 = UNUSED_SRC_1;
1066 
1067 	 }
1068          dofogfix = 0;
1069       }
1070 
1071       u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
1072       if (mesa_vp->arb.NumNativeTemporaries <
1073           (mesa_vp->arb.NumTemporaries + u_temp_used)) {
1074          mesa_vp->arb.NumNativeTemporaries =
1075             mesa_vp->arb.NumTemporaries + u_temp_used;
1076       }
1077       if ((mesa_vp->arb.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
1078 	 if (R200_DEBUG & RADEON_FALLBACKS) {
1079             fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->arb.NumTemporaries, u_temp_used);
1080 	 }
1081 	 return GL_FALSE;
1082       }
1083       u_temp_i = R200_VSF_MAX_TEMPS - 1;
1084       if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1085          mesa_vp->arb.NumNativeInstructions = 129;
1086 	 if (R200_DEBUG & RADEON_FALLBACKS) {
1087 	    fprintf(stderr, "more than 128 native instructions\n");
1088 	 }
1089 	 return GL_FALSE;
1090       }
1091       if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1092 	 vp->pos_end = (o_inst - vp->instr);
1093       }
1094    }
1095 
1096    vp->native = GL_TRUE;
1097    mesa_vp->arb.NumNativeInstructions = (o_inst - vp->instr);
1098 #if 0
1099    fprintf(stderr, "hw program:\n");
1100    for(i=0; i < vp->program.length; i++)
1101       fprintf(stderr, "%08x\n", vp->instr[i]);
1102 #endif
1103    return GL_TRUE;
1104 }
1105 
r200SetupVertexProg(struct gl_context * ctx)1106 void r200SetupVertexProg( struct gl_context *ctx ) {
1107    r200ContextPtr rmesa = R200_CONTEXT(ctx);
1108    struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1109    GLboolean fallback;
1110    GLint i;
1111 
1112    if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1113       rmesa->curr_vp_hw = NULL;
1114       r200_translate_vertex_program(ctx, vp);
1115    }
1116    /* could optimize setting up vertex progs away for non-tcl hw */
1117    fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp));
1118    TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1119    if (rmesa->radeon.TclFallback) return;
1120 
1121    R200_STATECHANGE( rmesa, vap );
1122    /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1123              maybe only when using more than 64 inst / 96 param? */
1124    rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1125 
1126    R200_STATECHANGE( rmesa, pvs );
1127 
1128    rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1129       ((vp->mesa_program.arb.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1130       (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1131    rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1132       (vp->mesa_program.arb.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1133 
1134    /* maybe user clip planes just work with vertex progs... untested */
1135    if (ctx->Transform.ClipPlanesEnabled) {
1136       R200_STATECHANGE( rmesa, tcl );
1137       if (vp->mesa_program.arb.IsPositionInvariant) {
1138 	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1139       }
1140       else {
1141 	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1142       }
1143    }
1144 
1145    if (vp != rmesa->curr_vp_hw) {
1146       GLuint count = vp->mesa_program.arb.NumNativeInstructions;
1147       drm_radeon_cmd_header_t tmp;
1148 
1149       R200_STATECHANGE( rmesa, vpi[0] );
1150       R200_STATECHANGE( rmesa, vpi[1] );
1151 
1152       /* FIXME: what about using a memcopy... */
1153       for (i = 0; (i < 64) && i < count; i++) {
1154 	 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1155 	 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1156 	 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1157 	 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1158       }
1159       /* hack up the cmd_size so not the whole state atom is emitted always.
1160          This may require some more thought, we may emit half progs on lost state, but
1161          hopefully it won't matter?
1162          WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1163          packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1164       rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1165       tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1166       tmp.veclinear.count = (count > 64) ? 64 : count;
1167       rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1168       if (count > 64) {
1169 	 for (i = 0; i < (count - 64); i++) {
1170 	    rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1171 	    rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1172 	    rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1173 	    rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1174 	 }
1175 	 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1176 	 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1177 	 tmp.veclinear.count = count - 64;
1178 	 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1179       }
1180       rmesa->curr_vp_hw = vp;
1181    }
1182 }
1183 
1184 
1185 static struct gl_program *
r200NewProgram(struct gl_context * ctx,gl_shader_stage stage,GLuint id,bool is_arb_asm)1186 r200NewProgram(struct gl_context *ctx, gl_shader_stage stage, GLuint id,
1187                bool is_arb_asm)
1188 {
1189    switch(stage){
1190    case MESA_SHADER_VERTEX: {
1191       struct r200_vertex_program *vp = rzalloc(NULL,
1192                                                struct r200_vertex_program);
1193       return _mesa_init_gl_program(&vp->mesa_program, stage, id, is_arb_asm);
1194    }
1195    case MESA_SHADER_FRAGMENT: {
1196       struct gl_program *prog = rzalloc(NULL, struct gl_program);
1197       return _mesa_init_gl_program(prog, stage, id, is_arb_asm);
1198    }
1199    default:
1200       _mesa_problem(ctx, "Bad target in r200NewProgram");
1201       return NULL;
1202    }
1203 }
1204 
1205 
1206 static void
r200DeleteProgram(struct gl_context * ctx,struct gl_program * prog)1207 r200DeleteProgram(struct gl_context *ctx, struct gl_program *prog)
1208 {
1209    _mesa_delete_program(ctx, prog);
1210 }
1211 
1212 static GLboolean
r200ProgramStringNotify(struct gl_context * ctx,GLenum target,struct gl_program * prog)1213 r200ProgramStringNotify(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1214 {
1215    struct r200_vertex_program *vp = (void *)prog;
1216    r200ContextPtr rmesa = R200_CONTEXT(ctx);
1217 
1218    switch(target) {
1219    case GL_VERTEX_PROGRAM_ARB:
1220       vp->translated = GL_FALSE;
1221       vp->fogpidx = 0;
1222 /*      memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_program));*/
1223       r200_translate_vertex_program(ctx, vp);
1224       rmesa->curr_vp_hw = NULL;
1225       break;
1226    case GL_FRAGMENT_SHADER_ATI:
1227       rmesa->afs_loaded = NULL;
1228       break;
1229    }
1230    /* need this for tcl fallbacks */
1231    (void) _tnl_program_string(ctx, target, prog);
1232 
1233    /* XXX check if program is legal, within limits */
1234    return GL_TRUE;
1235 }
1236 
1237 static GLboolean
r200IsProgramNative(struct gl_context * ctx,GLenum target,struct gl_program * prog)1238 r200IsProgramNative(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1239 {
1240    struct r200_vertex_program *vp = (void *)prog;
1241 
1242    switch(target){
1243    case GL_VERTEX_PROGRAM_ARB:
1244       if (!vp->translated) {
1245 	 r200_translate_vertex_program(ctx, vp);
1246       }
1247      /* does not take parameters etc. into account */
1248       return vp->native;
1249    default:
1250       _mesa_problem(ctx, "Bad target in r200NewProgram");
1251    }
1252    return 0;
1253 }
1254 
r200InitShaderFuncs(struct dd_function_table * functions)1255 void r200InitShaderFuncs(struct dd_function_table *functions)
1256 {
1257    functions->NewProgram = r200NewProgram;
1258    functions->DeleteProgram = r200DeleteProgram;
1259    functions->ProgramStringNotify = r200ProgramStringNotify;
1260    functions->IsProgramNative = r200IsProgramNative;
1261 }
1262