1 /*
2  * Copyright (C) 2004  David Airlie   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20  */
21 
22 #include "main/glheader.h"
23 #include "main/colormac.h"
24 #include "main/macros.h"
25 #include "main/atifragshader.h"
26 #include "main/samplerobj.h"
27 #include "swrast/s_atifragshader.h"
28 #include "swrast/s_context.h"
29 
30 
31 /**
32  * State for executing ATI fragment shader.
33  */
34 struct atifs_machine
35 {
36    GLfloat Registers[6][4];         /** six temporary registers */
37    GLfloat PrevPassRegisters[6][4];
38    GLfloat Inputs[2][4];   /** Primary, secondary input colors */
39 };
40 
41 
42 
43 /**
44  * Fetch a texel.
45  */
46 static void
fetch_texel(struct gl_context * ctx,const GLfloat texcoord[4],GLfloat lambda,GLuint unit,GLfloat color[4])47 fetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda,
48 	    GLuint unit, GLfloat color[4])
49 {
50    SWcontext *swrast = SWRAST_CONTEXT(ctx);
51 
52    /* XXX use a float-valued TextureSample routine here!!! */
53    swrast->TextureSample[unit](ctx, _mesa_get_samplerobj(ctx, unit),
54                                ctx->Texture.Unit[unit]._Current,
55 			       1, (const GLfloat(*)[4]) texcoord,
56                                &lambda, (GLfloat (*)[4]) color);
57 }
58 
59 static void
apply_swizzle(GLfloat values[4],GLuint swizzle)60 apply_swizzle(GLfloat values[4], GLuint swizzle)
61 {
62    GLfloat s, t, r, q;
63 
64    s = values[0];
65    t = values[1];
66    r = values[2];
67    q = values[3];
68 
69    switch (swizzle) {
70    case GL_SWIZZLE_STR_ATI:
71       values[0] = s;
72       values[1] = t;
73       values[2] = r;
74       break;
75    case GL_SWIZZLE_STQ_ATI:
76       values[0] = s;
77       values[1] = t;
78       values[2] = q;
79       break;
80    case GL_SWIZZLE_STR_DR_ATI:
81       values[0] = s / r;
82       values[1] = t / r;
83       values[2] = 1 / r;
84       break;
85    case GL_SWIZZLE_STQ_DQ_ATI:
86 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
87       if (q == 0.0F)
88          q = 0.000000001F;
89       values[0] = s / q;
90       values[1] = t / q;
91       values[2] = 1.0F / q;
92       break;
93    }
94    values[3] = 0.0;
95 }
96 
97 static void
apply_src_rep(GLint optype,GLuint rep,GLfloat * val)98 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
99 {
100    GLint i;
101    GLint start, end;
102    if (!rep)
103       return;
104 
105    start = optype ? 3 : 0;
106    end = 4;
107 
108    for (i = start; i < end; i++) {
109       switch (rep) {
110       case GL_RED:
111 	 val[i] = val[0];
112 	 break;
113       case GL_GREEN:
114 	 val[i] = val[1];
115 	 break;
116       case GL_BLUE:
117 	 val[i] = val[2];
118 	 break;
119       case GL_ALPHA:
120 	 val[i] = val[3];
121 	 break;
122       }
123    }
124 }
125 
126 static void
apply_src_mod(GLint optype,GLuint mod,GLfloat * val)127 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
128 {
129    GLint i;
130    GLint start, end;
131 
132    if (!mod)
133       return;
134 
135    start = optype ? 3 : 0;
136    end = 4;
137 
138    for (i = start; i < end; i++) {
139       if (mod & GL_COMP_BIT_ATI)
140 	 val[i] = 1 - val[i];
141 
142       if (mod & GL_BIAS_BIT_ATI)
143 	 val[i] = val[i] - 0.5F;
144 
145       if (mod & GL_2X_BIT_ATI)
146 	 val[i] = 2 * val[i];
147 
148       if (mod & GL_NEGATE_BIT_ATI)
149 	 val[i] = -val[i];
150    }
151 }
152 
153 static void
apply_dst_mod(GLuint optype,GLuint mod,GLfloat * val)154 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
155 {
156    GLint i;
157    GLint has_sat = mod & GL_SATURATE_BIT_ATI;
158    GLint start, end;
159 
160    mod &= ~GL_SATURATE_BIT_ATI;
161 
162    start = optype ? 3 : 0;
163    end = optype ? 4 : 3;
164 
165    for (i = start; i < end; i++) {
166       switch (mod) {
167       case GL_2X_BIT_ATI:
168 	 val[i] = 2 * val[i];
169 	 break;
170       case GL_4X_BIT_ATI:
171 	 val[i] = 4 * val[i];
172 	 break;
173       case GL_8X_BIT_ATI:
174 	 val[i] = 8 * val[i];
175 	 break;
176       case GL_HALF_BIT_ATI:
177 	 val[i] = val[i] * 0.5F;
178 	 break;
179       case GL_QUARTER_BIT_ATI:
180 	 val[i] = val[i] * 0.25F;
181 	 break;
182       case GL_EIGHTH_BIT_ATI:
183 	 val[i] = val[i] * 0.125F;
184 	 break;
185       }
186 
187       if (has_sat) {
188 	 if (val[i] < 0.0F)
189 	    val[i] = 0.0F;
190 	 else if (val[i] > 1.0F)
191 	    val[i] = 1.0F;
192       }
193       else {
194 	 if (val[i] < -8.0F)
195 	    val[i] = -8.0F;
196 	 else if (val[i] > 8.0F)
197 	    val[i] = 8.0F;
198       }
199    }
200 }
201 
202 
203 static void
write_dst_addr(GLuint optype,GLuint mod,GLuint mask,GLfloat * src,GLfloat * dst)204 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
205 	       GLfloat * dst)
206 {
207    GLint i;
208    apply_dst_mod(optype, mod, src);
209 
210    if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
211       if (mask) {
212 	 if (mask & GL_RED_BIT_ATI)
213 	    dst[0] = src[0];
214 
215 	 if (mask & GL_GREEN_BIT_ATI)
216 	    dst[1] = src[1];
217 
218 	 if (mask & GL_BLUE_BIT_ATI)
219 	    dst[2] = src[2];
220       }
221       else {
222 	 for (i = 0; i < 3; i++)
223 	    dst[i] = src[i];
224       }
225    }
226    else
227       dst[3] = src[3];
228 }
229 
230 static void
finish_pass(struct atifs_machine * machine)231 finish_pass(struct atifs_machine *machine)
232 {
233    GLint i;
234 
235    for (i = 0; i < 6; i++) {
236       COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
237    }
238 }
239 
240 struct ati_fs_opcode_st ati_fs_opcodes[] = {
241    {GL_ADD_ATI, 2},
242    {GL_SUB_ATI, 2},
243    {GL_MUL_ATI, 2},
244    {GL_MAD_ATI, 3},
245    {GL_LERP_ATI, 3},
246    {GL_MOV_ATI, 1},
247    {GL_CND_ATI, 3},
248    {GL_CND0_ATI, 3},
249    {GL_DOT2_ADD_ATI, 3},
250    {GL_DOT3_ATI, 2},
251    {GL_DOT4_ATI, 2}
252 };
253 
254 
255 
256 static void
handle_pass_op(struct atifs_machine * machine,struct atifs_setupinst * texinst,const SWspan * span,GLuint column,GLuint idx)257 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
258 	       const SWspan *span, GLuint column, GLuint idx)
259 {
260    GLuint swizzle = texinst->swizzle;
261    GLuint pass_tex = texinst->src;
262 
263    if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
264       pass_tex -= GL_TEXTURE0_ARB;
265       COPY_4V(machine->Registers[idx],
266 	      span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]);
267    }
268    else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
269       pass_tex -= GL_REG_0_ATI;
270       COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
271    }
272    apply_swizzle(machine->Registers[idx], swizzle);
273 
274 }
275 
276 static void
handle_sample_op(struct gl_context * ctx,struct atifs_machine * machine,struct atifs_setupinst * texinst,const SWspan * span,GLuint column,GLuint idx)277 handle_sample_op(struct gl_context * ctx, struct atifs_machine *machine,
278 		 struct atifs_setupinst *texinst, const SWspan *span,
279 		 GLuint column, GLuint idx)
280 {
281 /* sample from unit idx using texinst->src as coords */
282    GLuint swizzle = texinst->swizzle;
283    GLuint coord_source = texinst->src;
284    GLfloat tex_coords[4] = { 0 };
285 
286    if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
287       coord_source -= GL_TEXTURE0_ARB;
288       COPY_4V(tex_coords,
289               span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]);
290    }
291    else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
292       coord_source -= GL_REG_0_ATI;
293       COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
294    }
295    apply_swizzle(tex_coords, swizzle);
296    fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
297 }
298 
299 #define SETUP_SRC_REG(optype, i, x)		\
300 do {						\
301    COPY_4V(src[optype][i], x); 			\
302 } while (0)
303 
304 
305 
306 /**
307  * Execute the given fragment shader.
308  * NOTE: we do everything in single-precision floating point
309  * \param ctx - rendering context
310  * \param shader - the shader to execute
311  * \param machine - virtual machine state
312  * \param span - the SWspan we're operating on
313  * \param column - which pixel [i] we're operating on in the span
314  */
315 static void
execute_shader(struct gl_context * ctx,const struct ati_fragment_shader * shader,struct atifs_machine * machine,const SWspan * span,GLuint column)316 execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader,
317 	       struct atifs_machine *machine, const SWspan *span,
318                GLuint column)
319 {
320    GLuint pc;
321    struct atifs_instruction *inst;
322    struct atifs_setupinst *texinst;
323    GLint optype;
324    GLuint i;
325    GLint j, pass;
326    GLint dstreg;
327    GLfloat src[2][3][4];
328    GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
329    GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
330    GLfloat dst[2][4], *dstp;
331 
332    for (pass = 0; pass < shader->NumPasses; pass++) {
333       if (pass > 0)
334 	 finish_pass(machine);
335       for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
336 	 texinst = &shader->SetupInst[pass][j];
337 	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
338 	    handle_pass_op(machine, texinst, span, column, j);
339 	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
340 	    handle_sample_op(ctx, machine, texinst, span, column, j);
341       }
342 
343       for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
344 	 inst = &shader->Instructions[pass][pc];
345 
346 	 /* setup the source registers for color and alpha ops */
347 	 for (optype = 0; optype < 2; optype++) {
348  	    for (i = 0; i < inst->ArgCount[optype]; i++) {
349 	       GLint index = inst->SrcReg[optype][i].Index;
350 
351 	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
352 		  SETUP_SRC_REG(optype, i,
353 				machine->Registers[index - GL_REG_0_ATI]);
354 	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
355 		  if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
356 		     SETUP_SRC_REG(optype, i,
357 				shader->Constants[index - GL_CON_0_ATI]);
358 		  } else {
359 		     SETUP_SRC_REG(optype, i,
360 				ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
361 		  }
362 	       }
363 	       else if (index == GL_ONE)
364 		  SETUP_SRC_REG(optype, i, ones);
365 	       else if (index == GL_ZERO)
366 		  SETUP_SRC_REG(optype, i, zeros);
367 	       else if (index == GL_PRIMARY_COLOR_EXT)
368 		  SETUP_SRC_REG(optype, i,
369 				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
370 	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
371 		  SETUP_SRC_REG(optype, i,
372 				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
373 
374 	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
375 			     src[optype][i]);
376 	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
377 			     src[optype][i]);
378 	    }
379 	 }
380 
381 	 /* Execute the operations - color then alpha */
382 	 for (optype = 0; optype < 2; optype++) {
383 	    if (inst->Opcode[optype]) {
384 	       switch (inst->Opcode[optype]) {
385 	       case GL_ADD_ATI:
386 		  if (!optype)
387 		     for (i = 0; i < 3; i++) {
388 			dst[optype][i] =
389 			   src[optype][0][i] + src[optype][1][i];
390 		     }
391 		  else
392 		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
393 		  break;
394 	       case GL_SUB_ATI:
395 		  if (!optype)
396 		     for (i = 0; i < 3; i++) {
397 			dst[optype][i] =
398 			   src[optype][0][i] - src[optype][1][i];
399 		     }
400 		  else
401 		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
402 		  break;
403 	       case GL_MUL_ATI:
404 		  if (!optype)
405 		     for (i = 0; i < 3; i++) {
406 			dst[optype][i] =
407 			   src[optype][0][i] * src[optype][1][i];
408 		     }
409 		  else
410 		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
411 		  break;
412 	       case GL_MAD_ATI:
413 		  if (!optype)
414 		     for (i = 0; i < 3; i++) {
415 			dst[optype][i] =
416 			   src[optype][0][i] * src[optype][1][i] +
417 			   src[optype][2][i];
418 		     }
419 		  else
420 		     dst[optype][3] =
421 			src[optype][0][3] * src[optype][1][3] +
422 			src[optype][2][3];
423 		  break;
424 	       case GL_LERP_ATI:
425 		  if (!optype)
426 		     for (i = 0; i < 3; i++) {
427 			dst[optype][i] =
428 			   src[optype][0][i] * src[optype][1][i] + (1 -
429 								    src
430 								    [optype]
431 								    [0][i]) *
432 			   src[optype][2][i];
433 		     }
434 		  else
435 		     dst[optype][3] =
436 			src[optype][0][3] * src[optype][1][3] + (1 -
437 								 src[optype]
438 								 [0][3]) *
439 			src[optype][2][3];
440 		  break;
441 
442 	       case GL_MOV_ATI:
443 		  if (!optype)
444 		     for (i = 0; i < 3; i++) {
445 			dst[optype][i] = src[optype][0][i];
446 		     }
447 		  else
448 		     dst[optype][3] = src[optype][0][3];
449 		  break;
450 	       case GL_CND_ATI:
451 		  if (!optype) {
452 		     for (i = 0; i < 3; i++) {
453 			dst[optype][i] =
454 			   (src[optype][2][i] >
455 			    0.5) ? src[optype][0][i] : src[optype][1][i];
456 		     }
457 		  }
458 		  else {
459 		     dst[optype][3] =
460 			(src[optype][2][3] >
461 			 0.5) ? src[optype][0][3] : src[optype][1][3];
462 		  }
463 		  break;
464 
465 	       case GL_CND0_ATI:
466 		  if (!optype)
467 		     for (i = 0; i < 3; i++) {
468 			dst[optype][i] =
469 			   (src[optype][2][i] >=
470 			    0) ? src[optype][0][i] : src[optype][1][i];
471 		     }
472 		  else {
473 		     dst[optype][3] =
474 			(src[optype][2][3] >=
475 			 0) ? src[optype][0][3] : src[optype][1][3];
476 		  }
477 		  break;
478 	       case GL_DOT2_ADD_ATI:
479 		  {
480 		     GLfloat result;
481 
482 		     /* DOT 2 always uses the source from the color op */
483 		     /* could save recalculation of dot products for alpha inst */
484 		     result = src[0][0][0] * src[0][1][0] +
485 			src[0][0][1] * src[0][1][1] + src[0][2][2];
486 		     if (!optype) {
487 			for (i = 0; i < 3; i++) {
488 			   dst[optype][i] = result;
489 			}
490 		     }
491 		     else
492 			dst[optype][3] = result;
493 		  }
494 		  break;
495 	       case GL_DOT3_ATI:
496 		  {
497 		     GLfloat result;
498 
499 		     /* DOT 3 always uses the source from the color op */
500 		     result = src[0][0][0] * src[0][1][0] +
501 			src[0][0][1] * src[0][1][1] +
502 			src[0][0][2] * src[0][1][2];
503 
504 		     if (!optype) {
505 			for (i = 0; i < 3; i++) {
506 			   dst[optype][i] = result;
507 			}
508 		     }
509 		     else
510 			dst[optype][3] = result;
511 		  }
512 		  break;
513 	       case GL_DOT4_ATI:
514 		  {
515 		     GLfloat result;
516 
517 		     /* DOT 4 always uses the source from the color op */
518 		     result = src[0][0][0] * src[0][1][0] +
519 			src[0][0][1] * src[0][1][1] +
520 			src[0][0][2] * src[0][1][2] +
521 			src[0][0][3] * src[0][1][3];
522 		     if (!optype) {
523 			for (i = 0; i < 3; i++) {
524 			   dst[optype][i] = result;
525 			}
526 		     }
527 		     else
528 			dst[optype][3] = result;
529 		  }
530 		  break;
531 
532 	       }
533 	    }
534 	 }
535 
536 	 /* write out the destination registers */
537 	 for (optype = 0; optype < 2; optype++) {
538 	    if (inst->Opcode[optype]) {
539 	       dstreg = inst->DstReg[optype].Index;
540 	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
541 
542 	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
543 		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
544 	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
545 			      inst->DstReg[optype].dstMask, dst[optype],
546 			      dstp);
547 	       else
548 		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
549 	    }
550 	 }
551       }
552    }
553 }
554 
555 
556 /**
557  * Init fragment shader virtual machine state.
558  */
559 static void
init_machine(struct gl_context * ctx,struct atifs_machine * machine,const struct ati_fragment_shader * shader,const SWspan * span,GLuint col)560 init_machine(struct gl_context * ctx, struct atifs_machine *machine,
561 	     const struct ati_fragment_shader *shader,
562 	     const SWspan *span, GLuint col)
563 {
564    GLfloat (*inputs)[4] = machine->Inputs;
565    GLint i, j;
566 
567    for (i = 0; i < 6; i++) {
568       for (j = 0; j < 4; j++)
569 	 machine->Registers[i][j] = 0.0;
570    }
571 
572    COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]);
573    COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]);
574 }
575 
576 
577 
578 /**
579  * Execute the current ATI shader program, operating on the given span.
580  */
581 void
_swrast_exec_fragment_shader(struct gl_context * ctx,SWspan * span)582 _swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span)
583 {
584    const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
585    struct atifs_machine machine;
586    GLuint i;
587 
588    /* incoming colors should be floats */
589    ASSERT(span->array->ChanType == GL_FLOAT);
590 
591    for (i = 0; i < span->end; i++) {
592       if (span->array->mask[i]) {
593 	 init_machine(ctx, &machine, shader, span, i);
594 
595 	 execute_shader(ctx, shader, &machine, span, i);
596 
597          /* store result color */
598 	 {
599 	    const GLfloat *colOut = machine.Registers[0];
600             /*fprintf(stderr,"outputs %f %f %f %f\n",
601               colOut[0], colOut[1], colOut[2], colOut[3]); */
602             COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut);
603 	 }
604       }
605    }
606 }
607