1 /*
2  * Mesa 3-D graphics library
3  * Version:  7.5
4  *
5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
6  * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included
16  * in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 
26 
27 #include "main/glheader.h"
28 #include "main/context.h"
29 #include "main/colormac.h"
30 #include "main/imports.h"
31 #include "main/pixeltransfer.h"
32 #include "main/samplerobj.h"
33 #include "program/prog_instruction.h"
34 
35 #include "s_context.h"
36 #include "s_texcombine.h"
37 
38 
39 /**
40  * Pointer to array of float[4]
41  * This type makes the code below more concise and avoids a lot of casting.
42  */
43 typedef float (*float4_array)[4];
44 
45 
46 /**
47  * Return array of texels for given unit.
48  */
49 static inline float4_array
get_texel_array(SWcontext * swrast,GLuint unit)50 get_texel_array(SWcontext *swrast, GLuint unit)
51 {
52 #ifdef _OPENMP
53    return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4 * omp_get_num_threads() + (SWRAST_MAX_WIDTH * 4 * omp_get_thread_num()));
54 #else
55    return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4);
56 #endif
57 }
58 
59 
60 
61 /**
62  * Do texture application for:
63  *  GL_EXT_texture_env_combine
64  *  GL_ARB_texture_env_combine
65  *  GL_EXT_texture_env_dot3
66  *  GL_ARB_texture_env_dot3
67  *  GL_ATI_texture_env_combine3
68  *  GL_NV_texture_env_combine4
69  *  conventional GL texture env modes
70  *
71  * \param ctx          rendering context
72  * \param unit         the texture combiner unit
73  * \param primary_rgba incoming fragment color array
74  * \param texelBuffer  pointer to texel colors for all texture units
75  *
76  * \param span         two fields are used in this function:
77  *                       span->end: number of fragments to process
78  *                       span->array->rgba: incoming/result fragment colors
79  */
80 static void
texture_combine(struct gl_context * ctx,GLuint unit,const float4_array primary_rgba,const GLfloat * texelBuffer,SWspan * span)81 texture_combine( struct gl_context *ctx, GLuint unit,
82                  const float4_array primary_rgba,
83                  const GLfloat *texelBuffer,
84                  SWspan *span )
85 {
86    SWcontext *swrast = SWRAST_CONTEXT(ctx);
87    const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]);
88    const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
89    float4_array argRGB[MAX_COMBINER_TERMS];
90    float4_array argA[MAX_COMBINER_TERMS];
91    const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
92    const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
93    const GLuint numArgsRGB = combine->_NumArgsRGB;
94    const GLuint numArgsA = combine->_NumArgsA;
95    float4_array ccolor[4], rgba;
96    GLuint i, term;
97    GLuint n = span->end;
98    GLchan (*rgbaChan)[4] = span->array->rgba;
99 
100    /* alloc temp pixel buffers */
101    rgba = (float4_array) malloc(4 * n * sizeof(GLfloat));
102    if (!rgba) {
103       _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
104       return;
105    }
106 
107    for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
108       ccolor[i] = (float4_array) malloc(4 * n * sizeof(GLfloat));
109       if (!ccolor[i]) {
110          while (i) {
111             free(ccolor[i]);
112             i--;
113          }
114          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
115          free(rgba);
116          return;
117       }
118    }
119 
120    for (i = 0; i < n; i++) {
121       rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
122       rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
123       rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
124       rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
125    }
126 
127    /*
128    printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
129           combine->ModeRGB,
130           combine->ModeA,
131           combine->SourceRGB[0],
132           combine->SourceA[0],
133           combine->SourceRGB[1],
134           combine->SourceA[1]);
135    */
136 
137    /*
138     * Do operand setup for up to 4 operands.  Loop over the terms.
139     */
140    for (term = 0; term < numArgsRGB; term++) {
141       const GLenum srcRGB = combine->SourceRGB[term];
142       const GLenum operandRGB = combine->OperandRGB[term];
143 
144       switch (srcRGB) {
145          case GL_TEXTURE:
146             argRGB[term] = get_texel_array(swrast, unit);
147             break;
148          case GL_PRIMARY_COLOR:
149             argRGB[term] = primary_rgba;
150             break;
151          case GL_PREVIOUS:
152             argRGB[term] = rgba;
153             break;
154          case GL_CONSTANT:
155             {
156                float4_array c = ccolor[term];
157                GLfloat red   = textureUnit->EnvColor[0];
158                GLfloat green = textureUnit->EnvColor[1];
159                GLfloat blue  = textureUnit->EnvColor[2];
160                GLfloat alpha = textureUnit->EnvColor[3];
161                for (i = 0; i < n; i++) {
162                   ASSIGN_4V(c[i], red, green, blue, alpha);
163                }
164                argRGB[term] = ccolor[term];
165             }
166             break;
167 	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
168 	  */
169 	 case GL_ZERO:
170             {
171                float4_array c = ccolor[term];
172                for (i = 0; i < n; i++) {
173                   ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
174                }
175                argRGB[term] = ccolor[term];
176             }
177             break;
178 	 case GL_ONE:
179             {
180                float4_array c = ccolor[term];
181                for (i = 0; i < n; i++) {
182                   ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
183                }
184                argRGB[term] = ccolor[term];
185             }
186             break;
187          default:
188             /* ARB_texture_env_crossbar source */
189             {
190                const GLuint srcUnit = srcRGB - GL_TEXTURE0;
191                ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
192                if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
193                   goto end;
194                argRGB[term] = get_texel_array(swrast, srcUnit);
195             }
196       }
197 
198       if (operandRGB != GL_SRC_COLOR) {
199          float4_array src = argRGB[term];
200          float4_array dst = ccolor[term];
201 
202          /* point to new arg[term] storage */
203          argRGB[term] = ccolor[term];
204 
205          switch (operandRGB) {
206          case GL_ONE_MINUS_SRC_COLOR:
207             for (i = 0; i < n; i++) {
208                dst[i][RCOMP] = 1.0F - src[i][RCOMP];
209                dst[i][GCOMP] = 1.0F - src[i][GCOMP];
210                dst[i][BCOMP] = 1.0F - src[i][BCOMP];
211             }
212             break;
213          case GL_SRC_ALPHA:
214             for (i = 0; i < n; i++) {
215                dst[i][RCOMP] =
216                dst[i][GCOMP] =
217                dst[i][BCOMP] = src[i][ACOMP];
218             }
219             break;
220          case GL_ONE_MINUS_SRC_ALPHA:
221             for (i = 0; i < n; i++) {
222                dst[i][RCOMP] =
223                dst[i][GCOMP] =
224                dst[i][BCOMP] = 1.0F - src[i][ACOMP];
225             }
226             break;
227          default:
228             _mesa_problem(ctx, "Bad operandRGB");
229          }
230       }
231    }
232 
233    /*
234     * Set up the argA[term] pointers
235     */
236    for (term = 0; term < numArgsA; term++) {
237       const GLenum srcA = combine->SourceA[term];
238       const GLenum operandA = combine->OperandA[term];
239 
240       switch (srcA) {
241          case GL_TEXTURE:
242             argA[term] = get_texel_array(swrast, unit);
243             break;
244          case GL_PRIMARY_COLOR:
245             argA[term] = primary_rgba;
246             break;
247          case GL_PREVIOUS:
248             argA[term] = rgba;
249             break;
250          case GL_CONSTANT:
251             {
252                float4_array c = ccolor[term];
253                GLfloat alpha = textureUnit->EnvColor[3];
254                for (i = 0; i < n; i++)
255                   c[i][ACOMP] = alpha;
256                argA[term] = ccolor[term];
257             }
258             break;
259 	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
260 	  */
261 	 case GL_ZERO:
262             {
263                float4_array c = ccolor[term];
264                for (i = 0; i < n; i++)
265                   c[i][ACOMP] = 0.0F;
266                argA[term] = ccolor[term];
267             }
268             break;
269 	 case GL_ONE:
270             {
271                float4_array c = ccolor[term];
272                for (i = 0; i < n; i++)
273                   c[i][ACOMP] = 1.0F;
274                argA[term] = ccolor[term];
275             }
276             break;
277          default:
278             /* ARB_texture_env_crossbar source */
279             {
280                const GLuint srcUnit = srcA - GL_TEXTURE0;
281                ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
282                if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
283                   goto end;
284                argA[term] = get_texel_array(swrast, srcUnit);
285             }
286       }
287 
288       if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
289          float4_array src = argA[term];
290          float4_array dst = ccolor[term];
291          argA[term] = ccolor[term];
292          for (i = 0; i < n; i++) {
293             dst[i][ACOMP] = 1.0F - src[i][ACOMP];
294          }
295       }
296    }
297 
298    /* RGB channel combine */
299    {
300       float4_array arg0 = argRGB[0];
301       float4_array arg1 = argRGB[1];
302       float4_array arg2 = argRGB[2];
303       float4_array arg3 = argRGB[3];
304 
305       switch (combine->ModeRGB) {
306       case GL_REPLACE:
307          for (i = 0; i < n; i++) {
308             rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
309             rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
310             rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
311          }
312          break;
313       case GL_MODULATE:
314          for (i = 0; i < n; i++) {
315             rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
316             rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
317             rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
318          }
319          break;
320       case GL_ADD:
321          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
322             /* (a * b) + (c * d) */
323             for (i = 0; i < n; i++) {
324                rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
325                                  arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
326                rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
327                                  arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
328                rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
329                                  arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
330             }
331          }
332          else {
333             /* 2-term addition */
334             for (i = 0; i < n; i++) {
335                rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
336                rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
337                rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
338             }
339          }
340          break;
341       case GL_ADD_SIGNED:
342          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
343             /* (a * b) + (c * d) - 0.5 */
344             for (i = 0; i < n; i++) {
345                rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
346                                  arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
347                rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
348                                  arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
349                rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
350                                  arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
351             }
352          }
353          else {
354             for (i = 0; i < n; i++) {
355                rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
356                rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
357                rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
358             }
359          }
360          break;
361       case GL_INTERPOLATE:
362          for (i = 0; i < n; i++) {
363             rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
364                           arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
365             rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
366                           arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
367             rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
368                           arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
369          }
370          break;
371       case GL_SUBTRACT:
372          for (i = 0; i < n; i++) {
373             rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
374             rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
375             rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
376          }
377          break;
378       case GL_DOT3_RGB_EXT:
379       case GL_DOT3_RGBA_EXT:
380          /* Do not scale the result by 1 2 or 4 */
381          for (i = 0; i < n; i++) {
382             GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
383                            (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
384                            (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
385                * 4.0F;
386             dot = CLAMP(dot, 0.0F, 1.0F);
387             rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
388          }
389          break;
390       case GL_DOT3_RGB:
391       case GL_DOT3_RGBA:
392          /* DO scale the result by 1 2 or 4 */
393          for (i = 0; i < n; i++) {
394             GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
395                            (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
396                            (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
397                * 4.0F * scaleRGB;
398             dot = CLAMP(dot, 0.0F, 1.0F);
399             rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
400          }
401          break;
402       case GL_MODULATE_ADD_ATI:
403          for (i = 0; i < n; i++) {
404             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
405                               arg1[i][RCOMP]) * scaleRGB;
406             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
407                               arg1[i][GCOMP]) * scaleRGB;
408             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
409                               arg1[i][BCOMP]) * scaleRGB;
410 	 }
411          break;
412       case GL_MODULATE_SIGNED_ADD_ATI:
413          for (i = 0; i < n; i++) {
414             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
415                               arg1[i][RCOMP] - 0.5F) * scaleRGB;
416             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
417                               arg1[i][GCOMP] - 0.5F) * scaleRGB;
418             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
419                               arg1[i][BCOMP] - 0.5F) * scaleRGB;
420 	 }
421          break;
422       case GL_MODULATE_SUBTRACT_ATI:
423          for (i = 0; i < n; i++) {
424             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
425                               arg1[i][RCOMP]) * scaleRGB;
426             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
427                               arg1[i][GCOMP]) * scaleRGB;
428             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
429                               arg1[i][BCOMP]) * scaleRGB;
430 	 }
431          break;
432       case GL_BUMP_ENVMAP_ATI:
433          /* this produces a fixed rgba color, and the coord calc is done elsewhere */
434          for (i = 0; i < n; i++) {
435             /* rgba result is 0,0,0,1 */
436             rgba[i][RCOMP] = 0.0;
437             rgba[i][GCOMP] = 0.0;
438             rgba[i][BCOMP] = 0.0;
439             rgba[i][ACOMP] = 1.0;
440 	 }
441          goto end; /* no alpha processing */
442       default:
443          _mesa_problem(ctx, "invalid combine mode");
444       }
445    }
446 
447    /* Alpha channel combine */
448    {
449       float4_array arg0 = argA[0];
450       float4_array arg1 = argA[1];
451       float4_array arg2 = argA[2];
452       float4_array arg3 = argA[3];
453 
454       switch (combine->ModeA) {
455       case GL_REPLACE:
456          for (i = 0; i < n; i++) {
457             rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
458          }
459          break;
460       case GL_MODULATE:
461          for (i = 0; i < n; i++) {
462             rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
463          }
464          break;
465       case GL_ADD:
466          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
467             /* (a * b) + (c * d) */
468             for (i = 0; i < n; i++) {
469                rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
470                                  arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
471             }
472          }
473          else {
474             /* two-term add */
475             for (i = 0; i < n; i++) {
476                rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
477             }
478          }
479          break;
480       case GL_ADD_SIGNED:
481          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
482             /* (a * b) + (c * d) - 0.5 */
483             for (i = 0; i < n; i++) {
484                rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
485                                  arg2[i][ACOMP] * arg3[i][ACOMP] -
486                                  0.5F) * scaleA;
487             }
488          }
489          else {
490             /* a + b - 0.5 */
491             for (i = 0; i < n; i++) {
492                rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
493             }
494          }
495          break;
496       case GL_INTERPOLATE:
497          for (i = 0; i < n; i++) {
498             rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
499                               arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
500                * scaleA;
501          }
502          break;
503       case GL_SUBTRACT:
504          for (i = 0; i < n; i++) {
505             rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
506          }
507          break;
508       case GL_MODULATE_ADD_ATI:
509          for (i = 0; i < n; i++) {
510             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
511                               + arg1[i][ACOMP]) * scaleA;
512          }
513          break;
514       case GL_MODULATE_SIGNED_ADD_ATI:
515          for (i = 0; i < n; i++) {
516             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
517                               arg1[i][ACOMP] - 0.5F) * scaleA;
518          }
519          break;
520       case GL_MODULATE_SUBTRACT_ATI:
521          for (i = 0; i < n; i++) {
522             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
523                               - arg1[i][ACOMP]) * scaleA;
524          }
525          break;
526       default:
527          _mesa_problem(ctx, "invalid combine mode");
528       }
529    }
530 
531    /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
532     * This is kind of a kludge.  It would have been better if the spec
533     * were written such that the GL_COMBINE_ALPHA value could be set to
534     * GL_DOT3.
535     */
536    if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
537        combine->ModeRGB == GL_DOT3_RGBA) {
538       for (i = 0; i < n; i++) {
539 	 rgba[i][ACOMP] = rgba[i][RCOMP];
540       }
541    }
542 
543    for (i = 0; i < n; i++) {
544       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
545       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
546       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
547       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
548    }
549    /* The span->array->rgba values are of CHAN type so set
550     * span->array->ChanType field accordingly.
551     */
552    span->array->ChanType = CHAN_TYPE;
553 
554 end:
555    for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
556       free(ccolor[i]);
557    }
558    free(rgba);
559 }
560 
561 
562 /**
563  * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
564  * See GL_EXT_texture_swizzle.
565  */
566 static void
swizzle_texels(GLuint swizzle,GLuint count,float4_array texels)567 swizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
568 {
569    const GLuint swzR = GET_SWZ(swizzle, 0);
570    const GLuint swzG = GET_SWZ(swizzle, 1);
571    const GLuint swzB = GET_SWZ(swizzle, 2);
572    const GLuint swzA = GET_SWZ(swizzle, 3);
573    GLfloat vector[6];
574    GLuint i;
575 
576    vector[SWIZZLE_ZERO] = 0;
577    vector[SWIZZLE_ONE] = 1.0F;
578 
579    for (i = 0; i < count; i++) {
580       vector[SWIZZLE_X] = texels[i][0];
581       vector[SWIZZLE_Y] = texels[i][1];
582       vector[SWIZZLE_Z] = texels[i][2];
583       vector[SWIZZLE_W] = texels[i][3];
584       texels[i][RCOMP] = vector[swzR];
585       texels[i][GCOMP] = vector[swzG];
586       texels[i][BCOMP] = vector[swzB];
587       texels[i][ACOMP] = vector[swzA];
588    }
589 }
590 
591 
592 /**
593  * Apply texture mapping to a span of fragments.
594  */
595 void
_swrast_texture_span(struct gl_context * ctx,SWspan * span)596 _swrast_texture_span( struct gl_context *ctx, SWspan *span )
597 {
598    SWcontext *swrast = SWRAST_CONTEXT(ctx);
599    float4_array primary_rgba;
600    GLuint unit;
601 
602    if (!swrast->TexelBuffer) {
603 #ifdef _OPENMP
604       const GLint maxThreads = omp_get_max_threads();
605 #else
606       const GLint maxThreads = 1;
607 #endif
608 
609       /* TexelBuffer is also global and normally shared by all SWspan
610        * instances; when running with multiple threads, create one per
611        * thread.
612        */
613       swrast->TexelBuffer =
614 	 (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads *
615 			    SWRAST_MAX_WIDTH * 4 * sizeof(GLfloat));
616       if (!swrast->TexelBuffer) {
617 	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
618 	 return;
619       }
620    }
621 
622    primary_rgba = (float4_array) malloc(span->end * 4 * sizeof(GLfloat));
623 
624    if (!primary_rgba) {
625       _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
626       return;
627    }
628 
629    ASSERT(span->end <= SWRAST_MAX_WIDTH);
630 
631    /*
632     * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
633     */
634    if (swrast->_TextureCombinePrimary) {
635       GLuint i;
636       for (i = 0; i < span->end; i++) {
637          primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
638          primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
639          primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
640          primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
641       }
642    }
643 
644    /* First must sample all bump maps */
645    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
646       const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
647 
648       if (texUnit->_ReallyEnabled &&
649          texUnit->_CurrentCombine->ModeRGB == GL_BUMP_ENVMAP_ATI) {
650          const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
651             span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
652          float4_array targetcoords =
653             span->array->attribs[FRAG_ATTRIB_TEX0 +
654                ctx->Texture.Unit[unit].BumpTarget - GL_TEXTURE0];
655 
656          const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
657          GLfloat *lambda = span->array->lambda[unit];
658          float4_array texels = get_texel_array(swrast, unit);
659          GLuint i;
660          GLfloat rotMatrix00 = ctx->Texture.Unit[unit].RotMatrix[0];
661          GLfloat rotMatrix01 = ctx->Texture.Unit[unit].RotMatrix[1];
662          GLfloat rotMatrix10 = ctx->Texture.Unit[unit].RotMatrix[2];
663          GLfloat rotMatrix11 = ctx->Texture.Unit[unit].RotMatrix[3];
664 
665          /* adjust texture lod (lambda) */
666          if (span->arrayMask & SPAN_LAMBDA) {
667             if (texUnit->LodBias + samp->LodBias != 0.0F) {
668                /* apply LOD bias, but don't clamp yet */
669                const GLfloat bias = CLAMP(texUnit->LodBias + samp->LodBias,
670                                           -ctx->Const.MaxTextureLodBias,
671                                           ctx->Const.MaxTextureLodBias);
672                GLuint i;
673                for (i = 0; i < span->end; i++) {
674                   lambda[i] += bias;
675                }
676             }
677 
678             if (samp->MinLod != -1000.0 ||
679                 samp->MaxLod != 1000.0) {
680                /* apply LOD clamping to lambda */
681                const GLfloat min = samp->MinLod;
682                const GLfloat max = samp->MaxLod;
683                GLuint i;
684                for (i = 0; i < span->end; i++) {
685                   GLfloat l = lambda[i];
686                   lambda[i] = CLAMP(l, min, max);
687                }
688             }
689          }
690 
691          /* Sample the texture (span->end = number of fragments) */
692          swrast->TextureSample[unit]( ctx, samp,
693                                       ctx->Texture.Unit[unit]._Current,
694                                       span->end, texcoords, lambda, texels );
695 
696          /* manipulate the span values of the bump target
697             not sure this can work correctly even ignoring
698             the problem that channel is unsigned */
699          for (i = 0; i < span->end; i++) {
700             targetcoords[i][0] += (texels[i][0] * rotMatrix00 + texels[i][1] *
701                                   rotMatrix01) / targetcoords[i][3];
702             targetcoords[i][1] += (texels[i][0] * rotMatrix10 + texels[i][1] *
703                                   rotMatrix11) / targetcoords[i][3];
704          }
705       }
706    }
707 
708    /*
709     * Must do all texture sampling before combining in order to
710     * accomodate GL_ARB_texture_env_crossbar.
711     */
712    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
713       const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
714       if (texUnit->_ReallyEnabled &&
715           texUnit->_CurrentCombine->ModeRGB != GL_BUMP_ENVMAP_ATI) {
716          const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
717             span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
718          const struct gl_texture_object *curObj = texUnit->_Current;
719          const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
720          GLfloat *lambda = span->array->lambda[unit];
721          float4_array texels = get_texel_array(swrast, unit);
722 
723          /* adjust texture lod (lambda) */
724          if (span->arrayMask & SPAN_LAMBDA) {
725             if (texUnit->LodBias + samp->LodBias != 0.0F) {
726                /* apply LOD bias, but don't clamp yet */
727                const GLfloat bias = CLAMP(texUnit->LodBias + samp->LodBias,
728                                           -ctx->Const.MaxTextureLodBias,
729                                           ctx->Const.MaxTextureLodBias);
730                GLuint i;
731                for (i = 0; i < span->end; i++) {
732                   lambda[i] += bias;
733                }
734             }
735 
736             if (samp->MinLod != -1000.0 ||
737                 samp->MaxLod != 1000.0) {
738                /* apply LOD clamping to lambda */
739                const GLfloat min = samp->MinLod;
740                const GLfloat max = samp->MaxLod;
741                GLuint i;
742                for (i = 0; i < span->end; i++) {
743                   GLfloat l = lambda[i];
744                   lambda[i] = CLAMP(l, min, max);
745                }
746             }
747          }
748          else if (samp->MaxAnisotropy > 1.0 &&
749                   samp->MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
750             /* sample_lambda_2d_aniso is beeing used as texture_sample_func,
751              * it requires the current SWspan *span as an additional parameter.
752              * In order to keep the same function signature, the unused lambda
753              * parameter will be modified to actually contain the SWspan pointer.
754              * This is a Hack. To make it right, the texture_sample_func
755              * signature and all implementing functions need to be modified.
756              */
757             /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */
758             lambda = (GLfloat *)span;
759          }
760 
761          /* Sample the texture (span->end = number of fragments) */
762          swrast->TextureSample[unit]( ctx, samp,
763                                       ctx->Texture.Unit[unit]._Current,
764                                       span->end, texcoords, lambda, texels );
765 
766          /* GL_EXT_texture_swizzle */
767          if (curObj->_Swizzle != SWIZZLE_NOOP) {
768             swizzle_texels(curObj->_Swizzle, span->end, texels);
769          }
770       }
771    }
772 
773    /*
774     * OK, now apply the texture (aka texture combine/blend).
775     * We modify the span->color.rgba values.
776     */
777    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
778       if (ctx->Texture.Unit[unit]._ReallyEnabled)
779          texture_combine(ctx, unit, primary_rgba, swrast->TexelBuffer, span);
780    }
781 
782    free(primary_rgba);
783 }
784