1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "program/program.h"
37 #include "program/prog_instruction.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_statevars.h"
40 #include "program/programopt.h"
41 #include "tnl/tnl.h"
42
43 #include "r200_context.h"
44 #include "r200_vertprog.h"
45 #include "r200_ioctl.h"
46 #include "r200_tcl.h"
47
48 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51 SWIZZLE_W != VSF_IN_COMPONENT_W || \
52 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54 WRITEMASK_X != VSF_FLAG_X || \
55 WRITEMASK_Y != VSF_FLAG_Y || \
56 WRITEMASK_Z != VSF_FLAG_Z || \
57 WRITEMASK_W != VSF_FLAG_W
58 #error Cannot change these!
59 #endif
60
61 #define SCALAR_FLAG (1<<31)
62 #define FLAG_MASK (1<<31)
63 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
64 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
65
66 static struct{
67 char *name;
68 int opcode;
69 unsigned long ip; /* number of input operands and flags */
70 }op_names[]={
71 OPN(ABS, 1),
72 OPN(ADD, 2),
73 OPN(ARL, 1|SCALAR_FLAG),
74 OPN(DP3, 2),
75 OPN(DP4, 2),
76 OPN(DPH, 2),
77 OPN(DST, 2),
78 OPN(EX2, 1|SCALAR_FLAG),
79 OPN(EXP, 1|SCALAR_FLAG),
80 OPN(FLR, 1),
81 OPN(FRC, 1),
82 OPN(LG2, 1|SCALAR_FLAG),
83 OPN(LIT, 1),
84 OPN(LOG, 1|SCALAR_FLAG),
85 OPN(MAD, 3),
86 OPN(MAX, 2),
87 OPN(MIN, 2),
88 OPN(MOV, 1),
89 OPN(MUL, 2),
90 OPN(POW, 2|SCALAR_FLAG),
91 OPN(RCP, 1|SCALAR_FLAG),
92 OPN(RSQ, 1|SCALAR_FLAG),
93 OPN(SGE, 2),
94 OPN(SLT, 2),
95 OPN(SUB, 2),
96 OPN(SWZ, 1),
97 OPN(XPD, 2),
98 OPN(PRINT, 0),
99 OPN(END, 0),
100 };
101 #undef OPN
102
r200VertexProgUpdateParams(struct gl_context * ctx,struct r200_vertex_program * vp)103 static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_vertex_program *vp)
104 {
105 r200ContextPtr rmesa = R200_CONTEXT( ctx );
106 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
107 int pi;
108 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
109 struct gl_program_parameter_list *paramList;
110 drm_radeon_cmd_header_t tmp;
111
112 R200_STATECHANGE( rmesa, vpp[0] );
113 R200_STATECHANGE( rmesa, vpp[1] );
114 assert(mesa_vp->Base.Parameters);
115 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
116 paramList = mesa_vp->Base.Parameters;
117
118 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
119 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
120 return GL_FALSE;
121 }
122
123 for(pi = 0; pi < paramList->NumParameters; pi++) {
124 switch(paramList->Parameters[pi].Type) {
125 case PROGRAM_STATE_VAR:
126 case PROGRAM_NAMED_PARAM:
127 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
128 case PROGRAM_CONSTANT:
129 *fcmd++ = paramList->ParameterValues[pi][0].f;
130 *fcmd++ = paramList->ParameterValues[pi][1].f;
131 *fcmd++ = paramList->ParameterValues[pi][2].f;
132 *fcmd++ = paramList->ParameterValues[pi][3].f;
133 break;
134 default:
135 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
136 break;
137 }
138 if (pi == 95) {
139 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
140 }
141 }
142 /* hack up the cmd_size so not the whole state atom is emitted always. */
143 rmesa->hw.vpp[0].cmd_size =
144 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
145 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
146 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
147 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
148 if (paramList->NumParameters > 96) {
149 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
150 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
151 tmp.veclinear.count = paramList->NumParameters - 96;
152 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
153 }
154 return GL_TRUE;
155 }
156
t_dst_mask(GLuint mask)157 static INLINE unsigned long t_dst_mask(GLuint mask)
158 {
159 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
160 return mask & VSF_FLAG_ALL;
161 }
162
t_dst(struct prog_dst_register * dst)163 static unsigned long t_dst(struct prog_dst_register *dst)
164 {
165 switch(dst->File) {
166 case PROGRAM_TEMPORARY:
167 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
168 | R200_VSF_OUT_CLASS_TMP);
169 case PROGRAM_OUTPUT:
170 switch (dst->Index) {
171 case VERT_RESULT_HPOS:
172 return R200_VSF_OUT_CLASS_RESULT_POS;
173 case VERT_RESULT_COL0:
174 return R200_VSF_OUT_CLASS_RESULT_COLOR;
175 case VERT_RESULT_COL1:
176 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
177 | R200_VSF_OUT_CLASS_RESULT_COLOR);
178 case VERT_RESULT_FOGC:
179 return R200_VSF_OUT_CLASS_RESULT_FOGC;
180 case VERT_RESULT_TEX0:
181 case VERT_RESULT_TEX1:
182 case VERT_RESULT_TEX2:
183 case VERT_RESULT_TEX3:
184 case VERT_RESULT_TEX4:
185 case VERT_RESULT_TEX5:
186 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
187 | R200_VSF_OUT_CLASS_RESULT_TEXC);
188 case VERT_RESULT_PSIZ:
189 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
190 default:
191 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
192 exit(0);
193 return 0;
194 }
195 case PROGRAM_ADDRESS:
196 assert (dst->Index == 0);
197 return R200_VSF_OUT_CLASS_ADDR;
198 default:
199 fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
200 exit(0);
201 return 0;
202 }
203 }
204
t_src_class(gl_register_file file)205 static unsigned long t_src_class(gl_register_file file)
206 {
207
208 switch(file){
209 case PROGRAM_TEMPORARY:
210 return VSF_IN_CLASS_TMP;
211
212 case PROGRAM_INPUT:
213 return VSF_IN_CLASS_ATTR;
214
215 case PROGRAM_LOCAL_PARAM:
216 case PROGRAM_ENV_PARAM:
217 case PROGRAM_NAMED_PARAM:
218 case PROGRAM_CONSTANT:
219 case PROGRAM_STATE_VAR:
220 return VSF_IN_CLASS_PARAM;
221 /*
222 case PROGRAM_OUTPUT:
223 case PROGRAM_WRITE_ONLY:
224 case PROGRAM_ADDRESS:
225 */
226 default:
227 fprintf(stderr, "problem in %s", __FUNCTION__);
228 exit(0);
229 }
230 }
231
t_swizzle(GLubyte swizzle)232 static INLINE unsigned long t_swizzle(GLubyte swizzle)
233 {
234 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
235 return swizzle;
236 }
237
238 #if 0
239 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
240 {
241 int i;
242
243 if(vp == NULL){
244 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
245 return ;
246 }
247
248 fprintf(stderr, "%s:<", caller);
249 for(i=0; i < VERT_ATTRIB_MAX; i++)
250 fprintf(stderr, "%d ", vp->inputs[i]);
251 fprintf(stderr, ">\n");
252
253 }
254 #endif
255
t_src_index(struct r200_vertex_program * vp,struct prog_src_register * src)256 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
257 {
258 /*
259 int i;
260 int max_reg = -1;
261 */
262 if(src->File == PROGRAM_INPUT){
263 /* if(vp->inputs[src->Index] != -1)
264 return vp->inputs[src->Index];
265
266 for(i=0; i < VERT_ATTRIB_MAX; i++)
267 if(vp->inputs[i] > max_reg)
268 max_reg = vp->inputs[i];
269
270 vp->inputs[src->Index] = max_reg+1;*/
271
272 //vp_dump_inputs(vp, __FUNCTION__);
273 assert(vp->inputs[src->Index] != -1);
274 return vp->inputs[src->Index];
275 } else {
276 if (src->Index < 0) {
277 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
278 return 0;
279 }
280 return src->Index;
281 }
282 }
283
t_src(struct r200_vertex_program * vp,struct prog_src_register * src)284 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
285 {
286
287 return MAKE_VSF_SOURCE(t_src_index(vp, src),
288 t_swizzle(GET_SWZ(src->Swizzle, 0)),
289 t_swizzle(GET_SWZ(src->Swizzle, 1)),
290 t_swizzle(GET_SWZ(src->Swizzle, 2)),
291 t_swizzle(GET_SWZ(src->Swizzle, 3)),
292 t_src_class(src->File),
293 src->Negate) | (src->RelAddr << 4);
294 }
295
t_src_scalar(struct r200_vertex_program * vp,struct prog_src_register * src)296 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
297 {
298
299 return MAKE_VSF_SOURCE(t_src_index(vp, src),
300 t_swizzle(GET_SWZ(src->Swizzle, 0)),
301 t_swizzle(GET_SWZ(src->Swizzle, 0)),
302 t_swizzle(GET_SWZ(src->Swizzle, 0)),
303 t_swizzle(GET_SWZ(src->Swizzle, 0)),
304 t_src_class(src->File),
305 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
306 }
307
t_opcode(enum prog_opcode opcode)308 static unsigned long t_opcode(enum prog_opcode opcode)
309 {
310
311 switch(opcode){
312 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
313 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
314 * seems to ignore neg offsets which isn't quite correct...
315 */
316 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
317 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
318 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
319 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
320 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
321 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
322 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
323 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
324 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
325 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
326 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
327 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
328 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
329 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
330 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
331 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
332
333 default:
334 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
335 }
336 exit(-1);
337 return 0;
338 }
339
op_operands(enum prog_opcode opcode)340 static unsigned long op_operands(enum prog_opcode opcode)
341 {
342 int i;
343
344 /* Can we trust mesas opcodes to be in order ? */
345 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
346 if(op_names[i].opcode == opcode)
347 return op_names[i].ip;
348
349 fprintf(stderr, "op %d not found in op_names\n", opcode);
350 exit(-1);
351 return 0;
352 }
353
354 /* TODO: Get rid of t_src_class call */
355 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
356 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
357 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
358 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
359 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
360
361 /* fglrx on rv250 codes up unused sources as follows:
362 unused but necessary sources are same as previous source, zero-ed out.
363 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
364 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
365 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
366
367 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
368 Those are NOT semantically equivalent to the r300 ones, requires code changes */
369 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
370 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
371 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
372 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
374
375 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
376 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
377 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
378 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
380
381 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
382 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
383 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
384 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
385 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
386
387 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
388
389 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
390
391 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
392
393
394 /**
395 * Generate an R200 vertex program from Mesa's internal representation.
396 *
397 * \return GL_TRUE for success, GL_FALSE for failure.
398 */
r200_translate_vertex_program(struct gl_context * ctx,struct r200_vertex_program * vp)399 static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
400 {
401 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
402 struct prog_instruction *vpi;
403 int i;
404 VERTEX_SHADER_INSTRUCTION *o_inst;
405 unsigned long operands;
406 int are_srcs_scalar;
407 unsigned long hw_op;
408 int dofogfix = 0;
409 int fog_temp_i = 0;
410 int free_inputs;
411 int array_count = 0;
412 int u_temp_used;
413
414 vp->native = GL_FALSE;
415 vp->translated = GL_TRUE;
416 vp->fogmode = ctx->Fog.Mode;
417
418 if (mesa_vp->Base.NumInstructions == 0)
419 return GL_FALSE;
420
421 #if 0
422 if ((mesa_vp->Base.InputsRead &
423 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
424 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
425 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
426 if (R200_DEBUG & RADEON_FALLBACKS) {
427 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
428 mesa_vp->Base.InputsRead);
429 }
430 return GL_FALSE;
431 }
432 #endif
433
434 if ((mesa_vp->Base.OutputsWritten &
435 ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) |
436 (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) |
437 (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) |
438 (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) {
439 if (R200_DEBUG & RADEON_FALLBACKS) {
440 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
441 (unsigned long long) mesa_vp->Base.OutputsWritten);
442 }
443 return GL_FALSE;
444 }
445
446 if (mesa_vp->IsNVProgram) {
447 /* subtle differences in spec like guaranteed initialized regs could cause
448 headaches. Might want to remove the driconf option to enable it completely */
449 return GL_FALSE;
450 }
451 /* Initial value should be last tmp reg that hw supports.
452 Strangely enough r300 doesnt mind even though these would be out of range.
453 Smart enough to realize that it doesnt need it? */
454 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
455 struct prog_src_register src[3];
456 struct prog_dst_register dst;
457
458 /* FIXME: is changing the prog safe to do here? */
459 if (mesa_vp->IsPositionInvariant &&
460 /* make sure we only do this once */
461 !(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
462 _mesa_insert_mvp_code(ctx, mesa_vp);
463 }
464
465 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
466 base e isn't directly available neither. */
467 if ((mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) && !vp->fogpidx) {
468 struct gl_program_parameter_list *paramList;
469 gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
470 paramList = mesa_vp->Base.Parameters;
471 vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
472 }
473
474 vp->pos_end = 0;
475 mesa_vp->Base.NumNativeInstructions = 0;
476 if (mesa_vp->Base.Parameters)
477 mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
478 else
479 mesa_vp->Base.NumNativeParameters = 0;
480
481 for(i = 0; i < VERT_ATTRIB_MAX; i++)
482 vp->inputs[i] = -1;
483 for(i = 0; i < 15; i++)
484 vp->inputmap_rev[i] = 255;
485 free_inputs = 0x2ffd;
486
487 /* fglrx uses fixed inputs as follows for conventional attribs.
488 generic attribs use non-fixed assignment, fglrx will always use the
489 lowest attrib values available. We'll just do the same.
490 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
491 and 13 in a hw vertex prog.
492 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
493 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
494 Additionally, not more than 12 arrays in total are possible I think.
495 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
496 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
497 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
498 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
499 */
500
501 /* attr 4,5 and 13 are only used with generic attribs.
502 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
503 not possibe to use with vertex progs as it is lacking in vert prog specification) */
504 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
505 if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
506 vp->inputs[VERT_ATTRIB_POS] = 0;
507 vp->inputmap_rev[0] = VERT_ATTRIB_POS;
508 free_inputs &= ~(1 << 0);
509 array_count++;
510 }
511 if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
512 vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
513 vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
514 array_count++;
515 }
516 if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
517 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
518 vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
519 array_count++;
520 }
521 if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
522 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
523 vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
524 free_inputs &= ~(1 << 2);
525 array_count++;
526 }
527 if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
528 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
529 vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
530 free_inputs &= ~(1 << 3);
531 array_count++;
532 }
533 if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
534 vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
535 vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
536 array_count++;
537 }
538 /* VERT_ATTRIB_TEX0-5 */
539 for (i = 0; i <= 5; i++) {
540 if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
541 vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
542 vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
543 free_inputs &= ~(1 << (i + 6));
544 array_count++;
545 }
546 }
547 /* using VERT_ATTRIB_TEX6/7 would be illegal */
548 for (; i < VERT_ATTRIB_TEX_MAX; i++) {
549 if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
550 if (R200_DEBUG & RADEON_FALLBACKS) {
551 fprintf(stderr, "texture attribute %d in vert prog\n", i);
552 }
553 return GL_FALSE;
554 }
555 }
556 /* completely ignore aliasing? */
557 for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
558 int j;
559 /* completely ignore aliasing? */
560 if (mesa_vp->Base.InputsRead & VERT_BIT_GENERIC(i)) {
561 array_count++;
562 if (array_count > 12) {
563 if (R200_DEBUG & RADEON_FALLBACKS) {
564 fprintf(stderr, "more than 12 attribs used in vert prog\n");
565 }
566 return GL_FALSE;
567 }
568 for (j = 0; j < 14; j++) {
569 /* will always find one due to limited array_count */
570 if (free_inputs & (1 << j)) {
571 free_inputs &= ~(1 << j);
572 vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
573 if (j == 0) {
574 /* mapped to pos */
575 vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
576 } else if (j < 12) {
577 /* mapped to col/tex */
578 vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
579 } else {
580 /* mapped to pos1 */
581 vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
582 }
583 break;
584 }
585 }
586 }
587 }
588
589 if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
590 if (R200_DEBUG & RADEON_FALLBACKS) {
591 fprintf(stderr, "can't handle vert prog without position output\n");
592 }
593 return GL_FALSE;
594 }
595 if (free_inputs & 1) {
596 if (R200_DEBUG & RADEON_FALLBACKS) {
597 fprintf(stderr, "can't handle vert prog without position input\n");
598 }
599 return GL_FALSE;
600 }
601
602 o_inst = vp->instr;
603 for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
604 operands = op_operands(vpi->Opcode);
605 are_srcs_scalar = operands & SCALAR_FLAG;
606 operands &= OP_MASK;
607
608 for(i = 0; i < operands; i++) {
609 src[i] = vpi->SrcReg[i];
610 /* hack up default attrib values as per spec as swizzling.
611 normal, fog, secondary color. Crazy?
612 May need more if we don't submit vec4 elements? */
613 if (src[i].File == PROGRAM_INPUT) {
614 if (src[i].Index == VERT_ATTRIB_NORMAL) {
615 int j;
616 for (j = 0; j < 4; j++) {
617 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
618 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
619 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
620 }
621 }
622 }
623 else if (src[i].Index == VERT_ATTRIB_COLOR1) {
624 int j;
625 for (j = 0; j < 4; j++) {
626 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
627 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
628 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
629 }
630 }
631 }
632 else if (src[i].Index == VERT_ATTRIB_FOG) {
633 int j;
634 for (j = 0; j < 4; j++) {
635 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
636 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
637 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
638 }
639 else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
640 GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
641 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
642 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
643 }
644 }
645 }
646 }
647 }
648
649 if(operands == 3){
650 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
651 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
652 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
653 VSF_FLAG_ALL);
654
655 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
656 SWIZZLE_X, SWIZZLE_Y,
657 SWIZZLE_Z, SWIZZLE_W,
658 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
659
660 o_inst->src1 = ZERO_SRC_0;
661 o_inst->src2 = UNUSED_SRC_1;
662 o_inst++;
663
664 src[2].File = PROGRAM_TEMPORARY;
665 src[2].Index = u_temp_i;
666 src[2].RelAddr = 0;
667 u_temp_i--;
668 }
669 }
670
671 if(operands >= 2){
672 if( CMP_SRCS(src[1], src[0]) ){
673 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
674 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
675 VSF_FLAG_ALL);
676
677 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
678 SWIZZLE_X, SWIZZLE_Y,
679 SWIZZLE_Z, SWIZZLE_W,
680 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
681
682 o_inst->src1 = ZERO_SRC_0;
683 o_inst->src2 = UNUSED_SRC_1;
684 o_inst++;
685
686 src[0].File = PROGRAM_TEMPORARY;
687 src[0].Index = u_temp_i;
688 src[0].RelAddr = 0;
689 u_temp_i--;
690 }
691 }
692
693 dst = vpi->DstReg;
694 if (dst.File == PROGRAM_OUTPUT &&
695 dst.Index == VERT_RESULT_FOGC &&
696 dst.WriteMask & WRITEMASK_X) {
697 fog_temp_i = u_temp_i;
698 dst.File = PROGRAM_TEMPORARY;
699 dst.Index = fog_temp_i;
700 dofogfix = 1;
701 u_temp_i--;
702 }
703
704 /* These ops need special handling. */
705 switch(vpi->Opcode){
706 case OPCODE_POW:
707 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
708 So may need to insert additional instruction */
709 if ((src[0].File == src[1].File) &&
710 (src[0].Index == src[1].Index)) {
711 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
712 t_dst_mask(dst.WriteMask));
713 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
714 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
715 SWIZZLE_ZERO,
716 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
717 SWIZZLE_ZERO,
718 t_src_class(src[0].File),
719 src[0].Negate) | (src[0].RelAddr << 4);
720 o_inst->src1 = UNUSED_SRC_0;
721 o_inst->src2 = UNUSED_SRC_0;
722 }
723 else {
724 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
725 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
726 VSF_FLAG_ALL);
727 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
728 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
729 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
730 t_src_class(src[0].File),
731 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
732 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
733 SWIZZLE_ZERO, SWIZZLE_ZERO,
734 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
735 t_src_class(src[1].File),
736 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
737 o_inst->src2 = UNUSED_SRC_1;
738 o_inst++;
739
740 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
741 t_dst_mask(dst.WriteMask));
742 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
743 VSF_IN_COMPONENT_X,
744 VSF_IN_COMPONENT_Y,
745 VSF_IN_COMPONENT_Z,
746 VSF_IN_COMPONENT_W,
747 VSF_IN_CLASS_TMP,
748 VSF_FLAG_NONE);
749 o_inst->src1 = UNUSED_SRC_0;
750 o_inst->src2 = UNUSED_SRC_0;
751 u_temp_i--;
752 }
753 goto next;
754
755 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
756 case OPCODE_SWZ:
757 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
758 t_dst_mask(dst.WriteMask));
759 o_inst->src0 = t_src(vp, &src[0]);
760 o_inst->src1 = ZERO_SRC_0;
761 o_inst->src2 = UNUSED_SRC_1;
762 goto next;
763
764 case OPCODE_MAD:
765 /* only 2 read ports into temp memory thus may need the macro op MAD_2
766 instead (requiring 2 clocks) if all inputs are in temp memory
767 (and, only if they actually reference 3 distinct temps) */
768 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
769 src[1].File == PROGRAM_TEMPORARY &&
770 src[2].File == PROGRAM_TEMPORARY &&
771 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
772 (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
773 (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
774 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
775
776 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
777 t_dst_mask(dst.WriteMask));
778 o_inst->src0 = t_src(vp, &src[0]);
779 #if 0
780 if ((o_inst - vp->instr) == 31) {
781 /* fix up the broken vertex program of quake4 demo... */
782 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
783 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
784 t_src_class(src[1].File),
785 src[1].Negate) | (src[1].RelAddr << 4);
786 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
787 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
788 t_src_class(src[1].File),
789 src[1].Negate) | (src[1].RelAddr << 4);
790 }
791 else {
792 o_inst->src1 = t_src(vp, &src[1]);
793 o_inst->src2 = t_src(vp, &src[2]);
794 }
795 #else
796 o_inst->src1 = t_src(vp, &src[1]);
797 o_inst->src2 = t_src(vp, &src[2]);
798 #endif
799 goto next;
800
801 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
802 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
803 t_dst_mask(dst.WriteMask));
804
805 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
806 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
807 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
808 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
809 SWIZZLE_ZERO,
810 t_src_class(src[0].File),
811 src[0].Negate) | (src[0].RelAddr << 4);
812
813 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
814 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
815 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
816 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
817 SWIZZLE_ZERO,
818 t_src_class(src[1].File),
819 src[1].Negate) | (src[1].RelAddr << 4);
820
821 o_inst->src2 = UNUSED_SRC_1;
822 goto next;
823
824 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
825 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
826 t_dst_mask(dst.WriteMask));
827
828 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
829 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
830 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
831 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
832 VSF_IN_COMPONENT_ONE,
833 t_src_class(src[0].File),
834 src[0].Negate) | (src[0].RelAddr << 4);
835 o_inst->src1 = t_src(vp, &src[1]);
836 o_inst->src2 = UNUSED_SRC_1;
837 goto next;
838
839 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
840 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
841 t_dst_mask(dst.WriteMask));
842
843 o_inst->src0 = t_src(vp, &src[0]);
844 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
845 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
846 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
847 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
848 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
849 t_src_class(src[1].File),
850 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
851 o_inst->src2 = UNUSED_SRC_1;
852 goto next;
853
854 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
855 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
856 t_dst_mask(dst.WriteMask));
857
858 o_inst->src0=t_src(vp, &src[0]);
859 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
860 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
861 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
862 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
863 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
864 t_src_class(src[0].File),
865 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
866 o_inst->src2 = UNUSED_SRC_1;
867 goto next;
868
869 case OPCODE_FLR:
870 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
871 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
872
873 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
874 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
875 t_dst_mask(dst.WriteMask));
876
877 o_inst->src0 = t_src(vp, &src[0]);
878 o_inst->src1 = UNUSED_SRC_0;
879 o_inst->src2 = UNUSED_SRC_1;
880 o_inst++;
881
882 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
883 t_dst_mask(dst.WriteMask));
884
885 o_inst->src0 = t_src(vp, &src[0]);
886 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
887 VSF_IN_COMPONENT_X,
888 VSF_IN_COMPONENT_Y,
889 VSF_IN_COMPONENT_Z,
890 VSF_IN_COMPONENT_W,
891 VSF_IN_CLASS_TMP,
892 /* Not 100% sure about this */
893 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
894
895 o_inst->src2 = UNUSED_SRC_0;
896 u_temp_i--;
897 goto next;
898
899 case OPCODE_XPD:
900 /* mul r0, r1.yzxw, r2.zxyw
901 mad r0, -r2.yzxw, r1.zxyw, r0
902 */
903 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
904 src[1].File == PROGRAM_TEMPORARY &&
905 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
906 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
907
908 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
909 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
910 t_dst_mask(dst.WriteMask));
911
912 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
913 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
914 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
915 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
916 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
917 t_src_class(src[0].File),
918 src[0].Negate) | (src[0].RelAddr << 4);
919
920 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
921 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
922 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
923 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
924 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
925 t_src_class(src[1].File),
926 src[1].Negate) | (src[1].RelAddr << 4);
927
928 o_inst->src2 = UNUSED_SRC_1;
929 o_inst++;
930 u_temp_i--;
931
932 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
933 t_dst_mask(dst.WriteMask));
934
935 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
936 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
937 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
938 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
939 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
940 t_src_class(src[1].File),
941 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
942
943 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
944 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
945 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
946 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
947 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
948 t_src_class(src[0].File),
949 src[0].Negate) | (src[0].RelAddr << 4);
950
951 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
952 VSF_IN_COMPONENT_X,
953 VSF_IN_COMPONENT_Y,
954 VSF_IN_COMPONENT_Z,
955 VSF_IN_COMPONENT_W,
956 VSF_IN_CLASS_TMP,
957 VSF_FLAG_NONE);
958 goto next;
959
960 case OPCODE_END:
961 assert(0);
962 default:
963 break;
964 }
965
966 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
967 t_dst_mask(dst.WriteMask));
968
969 if(are_srcs_scalar){
970 switch(operands){
971 case 1:
972 o_inst->src0 = t_src_scalar(vp, &src[0]);
973 o_inst->src1 = UNUSED_SRC_0;
974 o_inst->src2 = UNUSED_SRC_1;
975 break;
976
977 case 2:
978 o_inst->src0 = t_src_scalar(vp, &src[0]);
979 o_inst->src1 = t_src_scalar(vp, &src[1]);
980 o_inst->src2 = UNUSED_SRC_1;
981 break;
982
983 case 3:
984 o_inst->src0 = t_src_scalar(vp, &src[0]);
985 o_inst->src1 = t_src_scalar(vp, &src[1]);
986 o_inst->src2 = t_src_scalar(vp, &src[2]);
987 break;
988
989 default:
990 fprintf(stderr, "illegal number of operands %lu\n", operands);
991 exit(-1);
992 break;
993 }
994 } else {
995 switch(operands){
996 case 1:
997 o_inst->src0 = t_src(vp, &src[0]);
998 o_inst->src1 = UNUSED_SRC_0;
999 o_inst->src2 = UNUSED_SRC_1;
1000 break;
1001
1002 case 2:
1003 o_inst->src0 = t_src(vp, &src[0]);
1004 o_inst->src1 = t_src(vp, &src[1]);
1005 o_inst->src2 = UNUSED_SRC_1;
1006 break;
1007
1008 case 3:
1009 o_inst->src0 = t_src(vp, &src[0]);
1010 o_inst->src1 = t_src(vp, &src[1]);
1011 o_inst->src2 = t_src(vp, &src[2]);
1012 break;
1013
1014 default:
1015 fprintf(stderr, "illegal number of operands %lu\n", operands);
1016 exit(-1);
1017 break;
1018 }
1019 }
1020 next:
1021
1022 if (dofogfix) {
1023 o_inst++;
1024 if (vp->fogmode == GL_EXP) {
1025 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1026 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1027 VSF_FLAG_X);
1028 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1029 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1030 o_inst->src2 = UNUSED_SRC_1;
1031 o_inst++;
1032 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1033 R200_VSF_OUT_CLASS_RESULT_FOGC,
1034 VSF_FLAG_X);
1035 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1036 o_inst->src1 = UNUSED_SRC_0;
1037 o_inst->src2 = UNUSED_SRC_1;
1038 }
1039 else if (vp->fogmode == GL_EXP2) {
1040 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1041 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1042 VSF_FLAG_X);
1043 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1044 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1045 o_inst->src2 = UNUSED_SRC_1;
1046 o_inst++;
1047 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1048 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1049 VSF_FLAG_X);
1050 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1051 o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1052 o_inst->src2 = UNUSED_SRC_1;
1053 o_inst++;
1054 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1055 R200_VSF_OUT_CLASS_RESULT_FOGC,
1056 VSF_FLAG_X);
1057 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1058 o_inst->src1 = UNUSED_SRC_0;
1059 o_inst->src2 = UNUSED_SRC_1;
1060 }
1061 else { /* fogmode == GL_LINEAR */
1062 /* could do that with single op (dot) if using params like
1063 with fixed function pipeline fog */
1064 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1065 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1066 VSF_FLAG_X);
1067 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1068 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1069 o_inst->src2 = UNUSED_SRC_1;
1070 o_inst++;
1071 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1072 R200_VSF_OUT_CLASS_RESULT_FOGC,
1073 VSF_FLAG_X);
1074 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1075 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1076 o_inst->src2 = UNUSED_SRC_1;
1077
1078 }
1079 dofogfix = 0;
1080 }
1081
1082 u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
1083 if (mesa_vp->Base.NumNativeTemporaries <
1084 (mesa_vp->Base.NumTemporaries + u_temp_used)) {
1085 mesa_vp->Base.NumNativeTemporaries =
1086 mesa_vp->Base.NumTemporaries + u_temp_used;
1087 }
1088 if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
1089 if (R200_DEBUG & RADEON_FALLBACKS) {
1090 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
1091 }
1092 return GL_FALSE;
1093 }
1094 u_temp_i = R200_VSF_MAX_TEMPS - 1;
1095 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1096 mesa_vp->Base.NumNativeInstructions = 129;
1097 if (R200_DEBUG & RADEON_FALLBACKS) {
1098 fprintf(stderr, "more than 128 native instructions\n");
1099 }
1100 return GL_FALSE;
1101 }
1102 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1103 vp->pos_end = (o_inst - vp->instr);
1104 }
1105 }
1106
1107 vp->native = GL_TRUE;
1108 mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
1109 #if 0
1110 fprintf(stderr, "hw program:\n");
1111 for(i=0; i < vp->program.length; i++)
1112 fprintf(stderr, "%08x\n", vp->instr[i]);
1113 #endif
1114 return GL_TRUE;
1115 }
1116
r200SetupVertexProg(struct gl_context * ctx)1117 void r200SetupVertexProg( struct gl_context *ctx ) {
1118 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1119 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1120 GLboolean fallback;
1121 GLint i;
1122
1123 if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1124 rmesa->curr_vp_hw = NULL;
1125 r200_translate_vertex_program(ctx, vp);
1126 }
1127 /* could optimize setting up vertex progs away for non-tcl hw */
1128 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp));
1129 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1130 if (rmesa->radeon.TclFallback) return;
1131
1132 R200_STATECHANGE( rmesa, vap );
1133 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1134 maybe only when using more than 64 inst / 96 param? */
1135 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1136
1137 R200_STATECHANGE( rmesa, pvs );
1138
1139 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1140 ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1141 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1142 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1143 (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1144
1145 /* maybe user clip planes just work with vertex progs... untested */
1146 if (ctx->Transform.ClipPlanesEnabled) {
1147 R200_STATECHANGE( rmesa, tcl );
1148 if (vp->mesa_program.IsPositionInvariant) {
1149 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1150 }
1151 else {
1152 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1153 }
1154 }
1155
1156 if (vp != rmesa->curr_vp_hw) {
1157 GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1158 drm_radeon_cmd_header_t tmp;
1159
1160 R200_STATECHANGE( rmesa, vpi[0] );
1161 R200_STATECHANGE( rmesa, vpi[1] );
1162
1163 /* FIXME: what about using a memcopy... */
1164 for (i = 0; (i < 64) && i < count; i++) {
1165 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1166 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1167 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1168 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1169 }
1170 /* hack up the cmd_size so not the whole state atom is emitted always.
1171 This may require some more thought, we may emit half progs on lost state, but
1172 hopefully it won't matter?
1173 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1174 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1175 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1176 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1177 tmp.veclinear.count = (count > 64) ? 64 : count;
1178 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1179 if (count > 64) {
1180 for (i = 0; i < (count - 64); i++) {
1181 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1182 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1183 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1184 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1185 }
1186 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1187 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1188 tmp.veclinear.count = count - 64;
1189 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1190 }
1191 rmesa->curr_vp_hw = vp;
1192 }
1193 }
1194
1195
1196 static void
r200BindProgram(struct gl_context * ctx,GLenum target,struct gl_program * prog)1197 r200BindProgram(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1198 {
1199 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1200
1201 switch(target){
1202 case GL_VERTEX_PROGRAM_ARB:
1203 rmesa->curr_vp_hw = NULL;
1204 break;
1205 default:
1206 _mesa_problem(ctx, "Target not supported yet!");
1207 break;
1208 }
1209 }
1210
1211 static struct gl_program *
r200NewProgram(struct gl_context * ctx,GLenum target,GLuint id)1212 r200NewProgram(struct gl_context *ctx, GLenum target, GLuint id)
1213 {
1214 struct r200_vertex_program *vp;
1215
1216 switch(target){
1217 case GL_VERTEX_PROGRAM_ARB:
1218 vp = CALLOC_STRUCT(r200_vertex_program);
1219 return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1220 case GL_FRAGMENT_PROGRAM_ARB:
1221 case GL_FRAGMENT_PROGRAM_NV:
1222 return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1223 default:
1224 _mesa_problem(ctx, "Bad target in r200NewProgram");
1225 }
1226 return NULL;
1227 }
1228
1229
1230 static void
r200DeleteProgram(struct gl_context * ctx,struct gl_program * prog)1231 r200DeleteProgram(struct gl_context *ctx, struct gl_program *prog)
1232 {
1233 _mesa_delete_program(ctx, prog);
1234 }
1235
1236 static GLboolean
r200ProgramStringNotify(struct gl_context * ctx,GLenum target,struct gl_program * prog)1237 r200ProgramStringNotify(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1238 {
1239 struct r200_vertex_program *vp = (void *)prog;
1240 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1241
1242 switch(target) {
1243 case GL_VERTEX_PROGRAM_ARB:
1244 vp->translated = GL_FALSE;
1245 vp->fogpidx = 0;
1246 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1247 r200_translate_vertex_program(ctx, vp);
1248 rmesa->curr_vp_hw = NULL;
1249 break;
1250 case GL_FRAGMENT_SHADER_ATI:
1251 rmesa->afs_loaded = NULL;
1252 break;
1253 }
1254 /* need this for tcl fallbacks */
1255 (void) _tnl_program_string(ctx, target, prog);
1256
1257 /* XXX check if program is legal, within limits */
1258 return GL_TRUE;
1259 }
1260
1261 static GLboolean
r200IsProgramNative(struct gl_context * ctx,GLenum target,struct gl_program * prog)1262 r200IsProgramNative(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1263 {
1264 struct r200_vertex_program *vp = (void *)prog;
1265
1266 switch(target){
1267 case GL_VERTEX_STATE_PROGRAM_NV:
1268 case GL_VERTEX_PROGRAM_ARB:
1269 if (!vp->translated) {
1270 r200_translate_vertex_program(ctx, vp);
1271 }
1272 /* does not take parameters etc. into account */
1273 return vp->native;
1274 default:
1275 _mesa_problem(ctx, "Bad target in r200NewProgram");
1276 }
1277 return 0;
1278 }
1279
r200InitShaderFuncs(struct dd_function_table * functions)1280 void r200InitShaderFuncs(struct dd_function_table *functions)
1281 {
1282 functions->NewProgram = r200NewProgram;
1283 functions->BindProgram = r200BindProgram;
1284 functions->DeleteProgram = r200DeleteProgram;
1285 functions->ProgramStringNotify = r200ProgramStringNotify;
1286 functions->IsProgramNative = r200IsProgramNative;
1287 }
1288