• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright (C) 2004  David Airlie   All Rights Reserved.
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8   * and/or sell copies of the Software, and to permit persons to whom the
9   * Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included
12   * in all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17   * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18   * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20   */
21  
22  #include "main/glheader.h"
23  #include "main/colormac.h"
24  #include "main/macros.h"
25  #include "main/atifragshader.h"
26  #include "main/samplerobj.h"
27  #include "swrast/s_atifragshader.h"
28  #include "swrast/s_context.h"
29  
30  
31  /**
32   * State for executing ATI fragment shader.
33   */
34  struct atifs_machine
35  {
36     GLfloat Registers[6][4];         /** six temporary registers */
37     GLfloat PrevPassRegisters[6][4];
38     GLfloat Inputs[2][4];   /** Primary, secondary input colors */
39  };
40  
41  
42  
43  /**
44   * Fetch a texel.
45   */
46  static void
fetch_texel(struct gl_context * ctx,const GLfloat texcoord[4],GLfloat lambda,GLuint unit,GLfloat color[4])47  fetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda,
48  	    GLuint unit, GLfloat color[4])
49  {
50     SWcontext *swrast = SWRAST_CONTEXT(ctx);
51  
52     /* XXX use a float-valued TextureSample routine here!!! */
53     swrast->TextureSample[unit](ctx, _mesa_get_samplerobj(ctx, unit),
54                                 ctx->Texture.Unit[unit]._Current,
55  			       1, (const GLfloat(*)[4]) texcoord,
56                                 &lambda, (GLfloat (*)[4]) color);
57  }
58  
59  static void
apply_swizzle(GLfloat values[4],GLuint swizzle)60  apply_swizzle(GLfloat values[4], GLuint swizzle)
61  {
62     GLfloat s, t, r, q;
63  
64     s = values[0];
65     t = values[1];
66     r = values[2];
67     q = values[3];
68  
69     switch (swizzle) {
70     case GL_SWIZZLE_STR_ATI:
71        values[0] = s;
72        values[1] = t;
73        values[2] = r;
74        break;
75     case GL_SWIZZLE_STQ_ATI:
76        values[0] = s;
77        values[1] = t;
78        values[2] = q;
79        break;
80     case GL_SWIZZLE_STR_DR_ATI:
81        values[0] = s / r;
82        values[1] = t / r;
83        values[2] = 1 / r;
84        break;
85     case GL_SWIZZLE_STQ_DQ_ATI:
86  /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
87        if (q == 0.0F)
88           q = 0.000000001F;
89        values[0] = s / q;
90        values[1] = t / q;
91        values[2] = 1.0F / q;
92        break;
93     }
94     values[3] = 0.0;
95  }
96  
97  static void
apply_src_rep(GLint optype,GLuint rep,GLfloat * val)98  apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
99  {
100     GLint i;
101     GLint start, end;
102     if (!rep)
103        return;
104  
105     start = optype ? 3 : 0;
106     end = 4;
107  
108     for (i = start; i < end; i++) {
109        switch (rep) {
110        case GL_RED:
111  	 val[i] = val[0];
112  	 break;
113        case GL_GREEN:
114  	 val[i] = val[1];
115  	 break;
116        case GL_BLUE:
117  	 val[i] = val[2];
118  	 break;
119        case GL_ALPHA:
120  	 val[i] = val[3];
121  	 break;
122        }
123     }
124  }
125  
126  static void
apply_src_mod(GLint optype,GLuint mod,GLfloat * val)127  apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
128  {
129     GLint i;
130     GLint start, end;
131  
132     if (!mod)
133        return;
134  
135     start = optype ? 3 : 0;
136     end = 4;
137  
138     for (i = start; i < end; i++) {
139        if (mod & GL_COMP_BIT_ATI)
140  	 val[i] = 1 - val[i];
141  
142        if (mod & GL_BIAS_BIT_ATI)
143  	 val[i] = val[i] - 0.5F;
144  
145        if (mod & GL_2X_BIT_ATI)
146  	 val[i] = 2 * val[i];
147  
148        if (mod & GL_NEGATE_BIT_ATI)
149  	 val[i] = -val[i];
150     }
151  }
152  
153  static void
apply_dst_mod(GLuint optype,GLuint mod,GLfloat * val)154  apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
155  {
156     GLint i;
157     GLint has_sat = mod & GL_SATURATE_BIT_ATI;
158     GLint start, end;
159  
160     mod &= ~GL_SATURATE_BIT_ATI;
161  
162     start = optype ? 3 : 0;
163     end = optype ? 4 : 3;
164  
165     for (i = start; i < end; i++) {
166        switch (mod) {
167        case GL_2X_BIT_ATI:
168  	 val[i] = 2 * val[i];
169  	 break;
170        case GL_4X_BIT_ATI:
171  	 val[i] = 4 * val[i];
172  	 break;
173        case GL_8X_BIT_ATI:
174  	 val[i] = 8 * val[i];
175  	 break;
176        case GL_HALF_BIT_ATI:
177  	 val[i] = val[i] * 0.5F;
178  	 break;
179        case GL_QUARTER_BIT_ATI:
180  	 val[i] = val[i] * 0.25F;
181  	 break;
182        case GL_EIGHTH_BIT_ATI:
183  	 val[i] = val[i] * 0.125F;
184  	 break;
185        }
186  
187        if (has_sat) {
188  	 if (val[i] < 0.0F)
189  	    val[i] = 0.0F;
190  	 else if (val[i] > 1.0F)
191  	    val[i] = 1.0F;
192        }
193        else {
194  	 if (val[i] < -8.0F)
195  	    val[i] = -8.0F;
196  	 else if (val[i] > 8.0F)
197  	    val[i] = 8.0F;
198        }
199     }
200  }
201  
202  
203  static void
write_dst_addr(GLuint optype,GLuint mod,GLuint mask,GLfloat * src,GLfloat * dst)204  write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
205  	       GLfloat * dst)
206  {
207     GLint i;
208     apply_dst_mod(optype, mod, src);
209  
210     if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
211        if (mask) {
212  	 if (mask & GL_RED_BIT_ATI)
213  	    dst[0] = src[0];
214  
215  	 if (mask & GL_GREEN_BIT_ATI)
216  	    dst[1] = src[1];
217  
218  	 if (mask & GL_BLUE_BIT_ATI)
219  	    dst[2] = src[2];
220        }
221        else {
222  	 for (i = 0; i < 3; i++)
223  	    dst[i] = src[i];
224        }
225     }
226     else
227        dst[3] = src[3];
228  }
229  
230  static void
finish_pass(struct atifs_machine * machine)231  finish_pass(struct atifs_machine *machine)
232  {
233     GLint i;
234  
235     for (i = 0; i < 6; i++) {
236        COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
237     }
238  }
239  
240  struct ati_fs_opcode_st ati_fs_opcodes[] = {
241     {GL_ADD_ATI, 2},
242     {GL_SUB_ATI, 2},
243     {GL_MUL_ATI, 2},
244     {GL_MAD_ATI, 3},
245     {GL_LERP_ATI, 3},
246     {GL_MOV_ATI, 1},
247     {GL_CND_ATI, 3},
248     {GL_CND0_ATI, 3},
249     {GL_DOT2_ADD_ATI, 3},
250     {GL_DOT3_ATI, 2},
251     {GL_DOT4_ATI, 2}
252  };
253  
254  
255  
256  static void
handle_pass_op(struct atifs_machine * machine,struct atifs_setupinst * texinst,const SWspan * span,GLuint column,GLuint idx)257  handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
258  	       const SWspan *span, GLuint column, GLuint idx)
259  {
260     GLuint swizzle = texinst->swizzle;
261     GLuint pass_tex = texinst->src;
262  
263     if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
264        pass_tex -= GL_TEXTURE0_ARB;
265        COPY_4V(machine->Registers[idx],
266  	      span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]);
267     }
268     else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
269        pass_tex -= GL_REG_0_ATI;
270        COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
271     }
272     apply_swizzle(machine->Registers[idx], swizzle);
273  
274  }
275  
276  static void
handle_sample_op(struct gl_context * ctx,struct atifs_machine * machine,struct atifs_setupinst * texinst,const SWspan * span,GLuint column,GLuint idx)277  handle_sample_op(struct gl_context * ctx, struct atifs_machine *machine,
278  		 struct atifs_setupinst *texinst, const SWspan *span,
279  		 GLuint column, GLuint idx)
280  {
281  /* sample from unit idx using texinst->src as coords */
282     GLuint swizzle = texinst->swizzle;
283     GLuint coord_source = texinst->src;
284     GLfloat tex_coords[4] = { 0 };
285  
286     if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
287        coord_source -= GL_TEXTURE0_ARB;
288        COPY_4V(tex_coords,
289                span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]);
290     }
291     else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
292        coord_source -= GL_REG_0_ATI;
293        COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
294     }
295     apply_swizzle(tex_coords, swizzle);
296     fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
297  }
298  
299  #define SETUP_SRC_REG(optype, i, x)		\
300  do {						\
301     COPY_4V(src[optype][i], x); 			\
302  } while (0)
303  
304  
305  
306  /**
307   * Execute the given fragment shader.
308   * NOTE: we do everything in single-precision floating point
309   * \param ctx - rendering context
310   * \param shader - the shader to execute
311   * \param machine - virtual machine state
312   * \param span - the SWspan we're operating on
313   * \param column - which pixel [i] we're operating on in the span
314   */
315  static void
execute_shader(struct gl_context * ctx,const struct ati_fragment_shader * shader,struct atifs_machine * machine,const SWspan * span,GLuint column)316  execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader,
317  	       struct atifs_machine *machine, const SWspan *span,
318                 GLuint column)
319  {
320     GLuint pc;
321     struct atifs_instruction *inst;
322     struct atifs_setupinst *texinst;
323     GLint optype;
324     GLuint i;
325     GLint j, pass;
326     GLint dstreg;
327     GLfloat src[2][3][4];
328     GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
329     GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
330     GLfloat dst[2][4], *dstp;
331  
332     for (pass = 0; pass < shader->NumPasses; pass++) {
333        if (pass > 0)
334  	 finish_pass(machine);
335        for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
336  	 texinst = &shader->SetupInst[pass][j];
337  	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
338  	    handle_pass_op(machine, texinst, span, column, j);
339  	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
340  	    handle_sample_op(ctx, machine, texinst, span, column, j);
341        }
342  
343        for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
344  	 inst = &shader->Instructions[pass][pc];
345  
346  	 /* setup the source registers for color and alpha ops */
347  	 for (optype = 0; optype < 2; optype++) {
348   	    for (i = 0; i < inst->ArgCount[optype]; i++) {
349  	       GLint index = inst->SrcReg[optype][i].Index;
350  
351  	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
352  		  SETUP_SRC_REG(optype, i,
353  				machine->Registers[index - GL_REG_0_ATI]);
354  	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
355  		  if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
356  		     SETUP_SRC_REG(optype, i,
357  				shader->Constants[index - GL_CON_0_ATI]);
358  		  } else {
359  		     SETUP_SRC_REG(optype, i,
360  				ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
361  		  }
362  	       }
363  	       else if (index == GL_ONE)
364  		  SETUP_SRC_REG(optype, i, ones);
365  	       else if (index == GL_ZERO)
366  		  SETUP_SRC_REG(optype, i, zeros);
367  	       else if (index == GL_PRIMARY_COLOR_EXT)
368  		  SETUP_SRC_REG(optype, i,
369  				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
370  	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
371  		  SETUP_SRC_REG(optype, i,
372  				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
373  
374  	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
375  			     src[optype][i]);
376  	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
377  			     src[optype][i]);
378  	    }
379  	 }
380  
381  	 /* Execute the operations - color then alpha */
382  	 for (optype = 0; optype < 2; optype++) {
383  	    if (inst->Opcode[optype]) {
384  	       switch (inst->Opcode[optype]) {
385  	       case GL_ADD_ATI:
386  		  if (!optype)
387  		     for (i = 0; i < 3; i++) {
388  			dst[optype][i] =
389  			   src[optype][0][i] + src[optype][1][i];
390  		     }
391  		  else
392  		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
393  		  break;
394  	       case GL_SUB_ATI:
395  		  if (!optype)
396  		     for (i = 0; i < 3; i++) {
397  			dst[optype][i] =
398  			   src[optype][0][i] - src[optype][1][i];
399  		     }
400  		  else
401  		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
402  		  break;
403  	       case GL_MUL_ATI:
404  		  if (!optype)
405  		     for (i = 0; i < 3; i++) {
406  			dst[optype][i] =
407  			   src[optype][0][i] * src[optype][1][i];
408  		     }
409  		  else
410  		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
411  		  break;
412  	       case GL_MAD_ATI:
413  		  if (!optype)
414  		     for (i = 0; i < 3; i++) {
415  			dst[optype][i] =
416  			   src[optype][0][i] * src[optype][1][i] +
417  			   src[optype][2][i];
418  		     }
419  		  else
420  		     dst[optype][3] =
421  			src[optype][0][3] * src[optype][1][3] +
422  			src[optype][2][3];
423  		  break;
424  	       case GL_LERP_ATI:
425  		  if (!optype)
426  		     for (i = 0; i < 3; i++) {
427  			dst[optype][i] =
428  			   src[optype][0][i] * src[optype][1][i] + (1 -
429  								    src
430  								    [optype]
431  								    [0][i]) *
432  			   src[optype][2][i];
433  		     }
434  		  else
435  		     dst[optype][3] =
436  			src[optype][0][3] * src[optype][1][3] + (1 -
437  								 src[optype]
438  								 [0][3]) *
439  			src[optype][2][3];
440  		  break;
441  
442  	       case GL_MOV_ATI:
443  		  if (!optype)
444  		     for (i = 0; i < 3; i++) {
445  			dst[optype][i] = src[optype][0][i];
446  		     }
447  		  else
448  		     dst[optype][3] = src[optype][0][3];
449  		  break;
450  	       case GL_CND_ATI:
451  		  if (!optype) {
452  		     for (i = 0; i < 3; i++) {
453  			dst[optype][i] =
454  			   (src[optype][2][i] >
455  			    0.5) ? src[optype][0][i] : src[optype][1][i];
456  		     }
457  		  }
458  		  else {
459  		     dst[optype][3] =
460  			(src[optype][2][3] >
461  			 0.5) ? src[optype][0][3] : src[optype][1][3];
462  		  }
463  		  break;
464  
465  	       case GL_CND0_ATI:
466  		  if (!optype)
467  		     for (i = 0; i < 3; i++) {
468  			dst[optype][i] =
469  			   (src[optype][2][i] >=
470  			    0) ? src[optype][0][i] : src[optype][1][i];
471  		     }
472  		  else {
473  		     dst[optype][3] =
474  			(src[optype][2][3] >=
475  			 0) ? src[optype][0][3] : src[optype][1][3];
476  		  }
477  		  break;
478  	       case GL_DOT2_ADD_ATI:
479  		  {
480  		     GLfloat result;
481  
482  		     /* DOT 2 always uses the source from the color op */
483  		     /* could save recalculation of dot products for alpha inst */
484  		     result = src[0][0][0] * src[0][1][0] +
485  			src[0][0][1] * src[0][1][1] + src[0][2][2];
486  		     if (!optype) {
487  			for (i = 0; i < 3; i++) {
488  			   dst[optype][i] = result;
489  			}
490  		     }
491  		     else
492  			dst[optype][3] = result;
493  		  }
494  		  break;
495  	       case GL_DOT3_ATI:
496  		  {
497  		     GLfloat result;
498  
499  		     /* DOT 3 always uses the source from the color op */
500  		     result = src[0][0][0] * src[0][1][0] +
501  			src[0][0][1] * src[0][1][1] +
502  			src[0][0][2] * src[0][1][2];
503  
504  		     if (!optype) {
505  			for (i = 0; i < 3; i++) {
506  			   dst[optype][i] = result;
507  			}
508  		     }
509  		     else
510  			dst[optype][3] = result;
511  		  }
512  		  break;
513  	       case GL_DOT4_ATI:
514  		  {
515  		     GLfloat result;
516  
517  		     /* DOT 4 always uses the source from the color op */
518  		     result = src[0][0][0] * src[0][1][0] +
519  			src[0][0][1] * src[0][1][1] +
520  			src[0][0][2] * src[0][1][2] +
521  			src[0][0][3] * src[0][1][3];
522  		     if (!optype) {
523  			for (i = 0; i < 3; i++) {
524  			   dst[optype][i] = result;
525  			}
526  		     }
527  		     else
528  			dst[optype][3] = result;
529  		  }
530  		  break;
531  
532  	       }
533  	    }
534  	 }
535  
536  	 /* write out the destination registers */
537  	 for (optype = 0; optype < 2; optype++) {
538  	    if (inst->Opcode[optype]) {
539  	       dstreg = inst->DstReg[optype].Index;
540  	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
541  
542  	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
543  		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
544  	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
545  			      inst->DstReg[optype].dstMask, dst[optype],
546  			      dstp);
547  	       else
548  		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
549  	    }
550  	 }
551        }
552     }
553  }
554  
555  
556  /**
557   * Init fragment shader virtual machine state.
558   */
559  static void
init_machine(struct gl_context * ctx,struct atifs_machine * machine,const struct ati_fragment_shader * shader,const SWspan * span,GLuint col)560  init_machine(struct gl_context * ctx, struct atifs_machine *machine,
561  	     const struct ati_fragment_shader *shader,
562  	     const SWspan *span, GLuint col)
563  {
564     GLfloat (*inputs)[4] = machine->Inputs;
565     GLint i, j;
566  
567     for (i = 0; i < 6; i++) {
568        for (j = 0; j < 4; j++)
569  	 machine->Registers[i][j] = 0.0;
570     }
571  
572     COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]);
573     COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]);
574  }
575  
576  
577  
578  /**
579   * Execute the current ATI shader program, operating on the given span.
580   */
581  void
_swrast_exec_fragment_shader(struct gl_context * ctx,SWspan * span)582  _swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span)
583  {
584     const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
585     struct atifs_machine machine;
586     GLuint i;
587  
588     /* incoming colors should be floats */
589     ASSERT(span->array->ChanType == GL_FLOAT);
590  
591     for (i = 0; i < span->end; i++) {
592        if (span->array->mask[i]) {
593  	 init_machine(ctx, &machine, shader, span, i);
594  
595  	 execute_shader(ctx, shader, &machine, span, i);
596  
597           /* store result color */
598  	 {
599  	    const GLfloat *colOut = machine.Registers[0];
600              /*fprintf(stderr,"outputs %f %f %f %f\n",
601                colOut[0], colOut[1], colOut[2], colOut[3]); */
602              COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut);
603  	 }
604        }
605     }
606  }
607