1 /*
2  * libtxc_dxtn
3  * Version:  1.0
4  *
5  * Copyright (C) 2004  Roland Scheidegger   All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included
15  * in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #ifdef __APPLE__
26 #include <OpenGL/gl.h>
27 #else
28 #include <GL/gl.h>
29 #endif
30 
31 typedef GLubyte GLchan;
32 #define UBYTE_TO_CHAN(b)  (b)
33 #define CHAN_MAX 255
34 #define RCOMP 0
35 #define GCOMP 1
36 #define BCOMP 2
37 #define ACOMP 3
38 
39 #define EXP5TO8R(packedcol)					\
40    ((((packedcol) >> 8) & 0xf8) | (((packedcol) >> 13) & 0x7))
41 
42 #define EXP6TO8G(packedcol)					\
43    ((((packedcol) >> 3) & 0xfc) | (((packedcol) >>  9) & 0x3))
44 
45 #define EXP5TO8B(packedcol)					\
46    ((((packedcol) << 3) & 0xf8) | (((packedcol) >>  2) & 0x7))
47 
48 #define EXP4TO8(col)						\
49    ((col) | ((col) << 4))
50 
51 /* inefficient. To be efficient, it would be necessary to decode 16 pixels at once */
52 
dxt135_decode_imageblock(const GLubyte * img_block_src,GLint i,GLint j,GLuint dxt_type,GLvoid * texel)53 static void dxt135_decode_imageblock ( const GLubyte *img_block_src,
54                          GLint i, GLint j, GLuint dxt_type, GLvoid *texel ) {
55    GLchan *rgba = (GLchan *) texel;
56    const GLushort color0 = img_block_src[0] | (img_block_src[1] << 8);
57    const GLushort color1 = img_block_src[2] | (img_block_src[3] << 8);
58    const GLuint bits = img_block_src[4] | (img_block_src[5] << 8) |
59       (img_block_src[6] << 16) | (img_block_src[7] << 24);
60    /* What about big/little endian? */
61    GLubyte bit_pos = 2 * (j * 4 + i) ;
62    GLubyte code = (GLubyte) ((bits >> bit_pos) & 3);
63 
64    rgba[ACOMP] = CHAN_MAX;
65    switch (code) {
66    case 0:
67       rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color0) );
68       rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color0) );
69       rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color0) );
70       break;
71    case 1:
72       rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color1) );
73       rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color1) );
74       rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color1) );
75       break;
76    case 2:
77       if ((dxt_type > 1) || (color0 > color1)) {
78          rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) * 2 + EXP5TO8R(color1)) / 3) );
79          rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) * 2 + EXP6TO8G(color1)) / 3) );
80          rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) * 2 + EXP5TO8B(color1)) / 3) );
81       }
82       else {
83          rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1)) / 2) );
84          rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1)) / 2) );
85          rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1)) / 2) );
86       }
87       break;
88    case 3:
89       if ((dxt_type > 1) || (color0 > color1)) {
90          rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1) * 2) / 3) );
91          rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1) * 2) / 3) );
92          rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1) * 2) / 3) );
93       }
94       else {
95          rgba[RCOMP] = 0;
96          rgba[GCOMP] = 0;
97          rgba[BCOMP] = 0;
98          if (dxt_type == 1) rgba[ACOMP] = UBYTE_TO_CHAN(0);
99       }
100       break;
101    default:
102    /* CANNOT happen (I hope) */
103       break;
104    }
105 }
106 
107 
fetch_2d_texel_rgb_dxt1(GLint srcRowStride,const GLubyte * pixdata,GLint i,GLint j,GLvoid * texel)108 static void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata,
109                          GLint i, GLint j, GLvoid *texel)
110 {
111    /* Extract the (i,j) pixel from pixdata and return it
112     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
113     */
114 
115    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
116    dxt135_decode_imageblock(blksrc, (i&3), (j&3), 0, texel);
117 }
118 
119 
fetch_2d_texel_rgba_dxt1(GLint srcRowStride,const GLubyte * pixdata,GLint i,GLint j,GLvoid * texel)120 static void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata,
121                          GLint i, GLint j, GLvoid *texel)
122 {
123    /* Extract the (i,j) pixel from pixdata and return it
124     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
125     */
126 
127    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
128    dxt135_decode_imageblock(blksrc, (i&3), (j&3), 1, texel);
129 }
130 
fetch_2d_texel_rgba_dxt3(GLint srcRowStride,const GLubyte * pixdata,GLint i,GLint j,GLvoid * texel)131 static void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata,
132                          GLint i, GLint j, GLvoid *texel) {
133 
134    /* Extract the (i,j) pixel from pixdata and return it
135     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
136     */
137 
138    GLchan *rgba = (GLchan *) texel;
139    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
140    const GLubyte anibble = (blksrc[((j&3) * 4 + (i&3)) / 2] >> (4 * (i&1))) & 0xf;
141    dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
142    rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8(anibble)) );
143 }
144 
fetch_2d_texel_rgba_dxt5(GLint srcRowStride,const GLubyte * pixdata,GLint i,GLint j,GLvoid * texel)145 static void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata,
146                          GLint i, GLint j, GLvoid *texel) {
147 
148    /* Extract the (i,j) pixel from pixdata and return it
149     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
150     */
151 
152    GLchan *rgba = (GLchan *) texel;
153    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
154    const GLubyte alpha0 = blksrc[0];
155    const GLubyte alpha1 = blksrc[1];
156    const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
157    const GLubyte acodelow = blksrc[2 + bit_pos / 8];
158    const GLubyte acodehigh = blksrc[3 + bit_pos / 8];
159    const GLubyte code = (acodelow >> (bit_pos & 0x7) |
160       (acodehigh  << (8 - (bit_pos & 0x7)))) & 0x7;
161    dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
162    if (code == 0)
163       rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
164    else if (code == 1)
165       rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
166    else if (alpha0 > alpha1)
167       rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
168    else if (code < 6)
169       rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
170    else if (code == 6)
171       rgba[ACOMP] = 0;
172    else
173       rgba[ACOMP] = CHAN_MAX;
174 }
175 
176 
177 /* weights used for error function, basically weights (unsquared 2/4/1) according to rgb->luminance conversion
178    not sure if this really reflects visual perception */
179 #define REDWEIGHT 4
180 #define GREENWEIGHT 16
181 #define BLUEWEIGHT 1
182 
183 #define ALPHACUT 127
184 
fancybasecolorsearch(UNUSED GLubyte * blkaddr,GLubyte srccolors[4][4][4],GLubyte * bestcolor[2],GLint numxpixels,GLint numypixels,UNUSED GLint type,UNUSED GLboolean haveAlpha)185 static void fancybasecolorsearch( UNUSED GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
186                            GLint numxpixels, GLint numypixels, UNUSED GLint type, UNUSED GLboolean haveAlpha)
187 {
188    /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
189 
190    /* TODO could also try to find a better encoding for the 3-color-encoding type, this really should be done
191       if it's rgba_dxt1 and we have alpha in the block, currently even values which will be mapped to black
192       due to their alpha value will influence the result */
193    GLint i, j, colors, z;
194    GLuint pixerror, pixerrorred, pixerrorgreen, pixerrorblue, pixerrorbest;
195    GLint colordist, blockerrlin[2][3];
196    GLubyte nrcolor[2];
197    GLint pixerrorcolorbest[3];
198    GLubyte enc = 0;
199    GLubyte cv[4][4];
200    GLubyte testcolor[2][3];
201 
202 /*   fprintf(stderr, "color begin 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
203       bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
204    if (((bestcolor[0][0] & 0xf8) << 8 | (bestcolor[0][1] & 0xfc) << 3 | bestcolor[0][2] >> 3) <
205       ((bestcolor[1][0] & 0xf8) << 8 | (bestcolor[1][1] & 0xfc) << 3 | bestcolor[1][2] >> 3)) {
206       testcolor[0][0] = bestcolor[0][0];
207       testcolor[0][1] = bestcolor[0][1];
208       testcolor[0][2] = bestcolor[0][2];
209       testcolor[1][0] = bestcolor[1][0];
210       testcolor[1][1] = bestcolor[1][1];
211       testcolor[1][2] = bestcolor[1][2];
212    }
213    else {
214       testcolor[1][0] = bestcolor[0][0];
215       testcolor[1][1] = bestcolor[0][1];
216       testcolor[1][2] = bestcolor[0][2];
217       testcolor[0][0] = bestcolor[1][0];
218       testcolor[0][1] = bestcolor[1][1];
219       testcolor[0][2] = bestcolor[1][2];
220    }
221 
222    for (i = 0; i < 3; i ++) {
223       cv[0][i] = testcolor[0][i];
224       cv[1][i] = testcolor[1][i];
225       cv[2][i] = (testcolor[0][i] * 2 + testcolor[1][i]) / 3;
226       cv[3][i] = (testcolor[0][i] + testcolor[1][i] * 2) / 3;
227    }
228 
229    blockerrlin[0][0] = 0;
230    blockerrlin[0][1] = 0;
231    blockerrlin[0][2] = 0;
232    blockerrlin[1][0] = 0;
233    blockerrlin[1][1] = 0;
234    blockerrlin[1][2] = 0;
235 
236    nrcolor[0] = 0;
237    nrcolor[1] = 0;
238 
239    for (j = 0; j < numypixels; j++) {
240       for (i = 0; i < numxpixels; i++) {
241          pixerrorbest = 0xffffffff;
242          for (colors = 0; colors < 4; colors++) {
243             colordist = srccolors[j][i][0] - (cv[colors][0]);
244             pixerror = colordist * colordist * REDWEIGHT;
245             pixerrorred = colordist;
246             colordist = srccolors[j][i][1] - (cv[colors][1]);
247             pixerror += colordist * colordist * GREENWEIGHT;
248             pixerrorgreen = colordist;
249             colordist = srccolors[j][i][2] - (cv[colors][2]);
250             pixerror += colordist * colordist * BLUEWEIGHT;
251             pixerrorblue = colordist;
252             if (pixerror < pixerrorbest) {
253                enc = colors;
254                pixerrorbest = pixerror;
255                pixerrorcolorbest[0] = pixerrorred;
256                pixerrorcolorbest[1] = pixerrorgreen;
257                pixerrorcolorbest[2] = pixerrorblue;
258             }
259          }
260          if (enc == 0) {
261             for (z = 0; z < 3; z++) {
262                blockerrlin[0][z] += 3 * pixerrorcolorbest[z];
263             }
264             nrcolor[0] += 3;
265          }
266          else if (enc == 2) {
267             for (z = 0; z < 3; z++) {
268                blockerrlin[0][z] += 2 * pixerrorcolorbest[z];
269             }
270             nrcolor[0] += 2;
271             for (z = 0; z < 3; z++) {
272                blockerrlin[1][z] += 1 * pixerrorcolorbest[z];
273             }
274             nrcolor[1] += 1;
275          }
276          else if (enc == 3) {
277             for (z = 0; z < 3; z++) {
278                blockerrlin[0][z] += 1 * pixerrorcolorbest[z];
279             }
280             nrcolor[0] += 1;
281             for (z = 0; z < 3; z++) {
282                blockerrlin[1][z] += 2 * pixerrorcolorbest[z];
283             }
284             nrcolor[1] += 2;
285          }
286          else if (enc == 1) {
287             for (z = 0; z < 3; z++) {
288                blockerrlin[1][z] += 3 * pixerrorcolorbest[z];
289             }
290             nrcolor[1] += 3;
291          }
292       }
293    }
294    if (nrcolor[0] == 0) nrcolor[0] = 1;
295    if (nrcolor[1] == 0) nrcolor[1] = 1;
296    for (j = 0; j < 2; j++) {
297       for (i = 0; i < 3; i++) {
298 	 GLint newvalue = testcolor[j][i] + blockerrlin[j][i] / nrcolor[j];
299 	 if (newvalue <= 0)
300 	    testcolor[j][i] = 0;
301 	 else if (newvalue >= 255)
302 	    testcolor[j][i] = 255;
303 	 else testcolor[j][i] = newvalue;
304       }
305    }
306 
307    if ((abs(testcolor[0][0] - testcolor[1][0]) < 8) &&
308        (abs(testcolor[0][1] - testcolor[1][1]) < 4) &&
309        (abs(testcolor[0][2] - testcolor[1][2]) < 8)) {
310        /* both colors are so close they might get encoded as the same 16bit values */
311       GLubyte coldiffred, coldiffgreen, coldiffblue, coldiffmax, factor, ind0, ind1;
312 
313       coldiffred = abs(testcolor[0][0] - testcolor[1][0]);
314       coldiffgreen = 2 * abs(testcolor[0][1] - testcolor[1][1]);
315       coldiffblue = abs(testcolor[0][2] - testcolor[1][2]);
316       coldiffmax = coldiffred;
317       if (coldiffmax < coldiffgreen) coldiffmax = coldiffgreen;
318       if (coldiffmax < coldiffblue) coldiffmax = coldiffblue;
319       if (coldiffmax > 0) {
320          if (coldiffmax > 4) factor = 2;
321          else if (coldiffmax > 2) factor = 3;
322          else factor = 4;
323          /* Won't do much if the color value is near 255... */
324          /* argh so many ifs */
325          if (testcolor[1][1] >= testcolor[0][1]) {
326             ind1 = 1; ind0 = 0;
327          }
328          else {
329             ind1 = 0; ind0 = 1;
330          }
331          if ((testcolor[ind1][1] + factor * coldiffgreen) <= 255)
332             testcolor[ind1][1] += factor * coldiffgreen;
333          else testcolor[ind1][1] = 255;
334          if ((testcolor[ind1][0] - testcolor[ind0][1]) > 0) {
335             if ((testcolor[ind1][0] + factor * coldiffred) <= 255)
336                testcolor[ind1][0] += factor * coldiffred;
337             else testcolor[ind1][0] = 255;
338          }
339          else {
340             if ((testcolor[ind0][0] + factor * coldiffred) <= 255)
341                testcolor[ind0][0] += factor * coldiffred;
342             else testcolor[ind0][0] = 255;
343          }
344          if ((testcolor[ind1][2] - testcolor[ind0][2]) > 0) {
345             if ((testcolor[ind1][2] + factor * coldiffblue) <= 255)
346                testcolor[ind1][2] += factor * coldiffblue;
347             else testcolor[ind1][2] = 255;
348          }
349          else {
350             if ((testcolor[ind0][2] + factor * coldiffblue) <= 255)
351                testcolor[ind0][2] += factor * coldiffblue;
352             else testcolor[ind0][2] = 255;
353          }
354       }
355    }
356 
357    if (((testcolor[0][0] & 0xf8) << 8 | (testcolor[0][1] & 0xfc) << 3 | testcolor[0][2] >> 3) <
358       ((testcolor[1][0] & 0xf8) << 8 | (testcolor[1][1] & 0xfc) << 3 | testcolor[1][2]) >> 3) {
359       for (i = 0; i < 3; i++) {
360          bestcolor[0][i] = testcolor[0][i];
361          bestcolor[1][i] = testcolor[1][i];
362       }
363    }
364    else {
365       for (i = 0; i < 3; i++) {
366          bestcolor[0][i] = testcolor[1][i];
367          bestcolor[1][i] = testcolor[0][i];
368       }
369    }
370 
371 /*     fprintf(stderr, "color end 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
372      bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
373 }
374 
375 
376 
storedxtencodedblock(GLubyte * blkaddr,GLubyte srccolors[4][4][4],GLubyte * bestcolor[2],GLint numxpixels,GLint numypixels,GLuint type,GLboolean haveAlpha)377 static void storedxtencodedblock( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
378                            GLint numxpixels, GLint numypixels, GLuint type, GLboolean haveAlpha)
379 {
380    /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
381 
382    GLint i, j, colors;
383    GLuint testerror, testerror2, pixerror, pixerrorbest;
384    GLint colordist;
385    GLushort color0, color1, tempcolor;
386    GLuint bits = 0, bits2 = 0;
387    GLubyte *colorptr;
388    GLubyte enc = 0;
389    GLubyte cv[4][4];
390 
391    bestcolor[0][0] = bestcolor[0][0] & 0xf8;
392    bestcolor[0][1] = bestcolor[0][1] & 0xfc;
393    bestcolor[0][2] = bestcolor[0][2] & 0xf8;
394    bestcolor[1][0] = bestcolor[1][0] & 0xf8;
395    bestcolor[1][1] = bestcolor[1][1] & 0xfc;
396    bestcolor[1][2] = bestcolor[1][2] & 0xf8;
397 
398    color0 = bestcolor[0][0] << 8 | bestcolor[0][1] << 3 | bestcolor[0][2] >> 3;
399    color1 = bestcolor[1][0] << 8 | bestcolor[1][1] << 3 | bestcolor[1][2] >> 3;
400    if (color0 < color1) {
401       tempcolor = color0; color0 = color1; color1 = tempcolor;
402       colorptr = bestcolor[0]; bestcolor[0] = bestcolor[1]; bestcolor[1] = colorptr;
403    }
404 
405 
406    for (i = 0; i < 3; i++) {
407       cv[0][i] = bestcolor[0][i];
408       cv[1][i] = bestcolor[1][i];
409       cv[2][i] = (bestcolor[0][i] * 2 + bestcolor[1][i]) / 3;
410       cv[3][i] = (bestcolor[0][i] + bestcolor[1][i] * 2) / 3;
411    }
412 
413    testerror = 0;
414    for (j = 0; j < numypixels; j++) {
415       for (i = 0; i < numxpixels; i++) {
416          pixerrorbest = 0xffffffff;
417          for (colors = 0; colors < 4; colors++) {
418             colordist = srccolors[j][i][0] - cv[colors][0];
419             pixerror = colordist * colordist * REDWEIGHT;
420             colordist = srccolors[j][i][1] - cv[colors][1];
421             pixerror += colordist * colordist * GREENWEIGHT;
422             colordist = srccolors[j][i][2] - cv[colors][2];
423             pixerror += colordist * colordist * BLUEWEIGHT;
424             if (pixerror < pixerrorbest) {
425                pixerrorbest = pixerror;
426                enc = colors;
427             }
428          }
429          testerror += pixerrorbest;
430          bits |= enc << (2 * (j * 4 + i));
431       }
432    }
433    /* some hw might disagree but actually decoding should always use 4-color encoding
434       for non-dxt1 formats */
435    if (type == GL_COMPRESSED_RGB_S3TC_DXT1_EXT || type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
436       for (i = 0; i < 3; i++) {
437          cv[2][i] = (bestcolor[0][i] + bestcolor[1][i]) / 2;
438          /* this isn't used. Looks like the black color constant can only be used
439             with RGB_DXT1 if I read the spec correctly (note though that the radeon gpu disagrees,
440             it will decode 3 to black even with DXT3/5), and due to how the color searching works
441             it won't get used even then */
442          cv[3][i] = 0;
443       }
444       testerror2 = 0;
445       for (j = 0; j < numypixels; j++) {
446          for (i = 0; i < numxpixels; i++) {
447             pixerrorbest = 0xffffffff;
448             if ((type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) && (srccolors[j][i][3] <= ALPHACUT)) {
449                enc = 3;
450                pixerrorbest = 0; /* don't calculate error */
451             }
452             else {
453                /* we're calculating the same what we have done already for colors 0-1 above... */
454                for (colors = 0; colors < 3; colors++) {
455                   colordist = srccolors[j][i][0] - cv[colors][0];
456                   pixerror = colordist * colordist * REDWEIGHT;
457                   colordist = srccolors[j][i][1] - cv[colors][1];
458                   pixerror += colordist * colordist * GREENWEIGHT;
459                   colordist = srccolors[j][i][2] - cv[colors][2];
460                   pixerror += colordist * colordist * BLUEWEIGHT;
461                   if (pixerror < pixerrorbest) {
462                      pixerrorbest = pixerror;
463                      /* need to exchange colors later */
464                      if (colors > 1) enc = colors;
465                      else enc = colors ^ 1;
466                   }
467                }
468             }
469             testerror2 += pixerrorbest;
470             bits2 |= enc << (2 * (j * 4 + i));
471          }
472       }
473    } else {
474       testerror2 = 0xffffffff;
475    }
476 
477    /* finally we're finished, write back colors and bits */
478    if ((testerror > testerror2) || (haveAlpha)) {
479       *blkaddr++ = color1 & 0xff;
480       *blkaddr++ = color1 >> 8;
481       *blkaddr++ = color0 & 0xff;
482       *blkaddr++ = color0 >> 8;
483       *blkaddr++ = bits2 & 0xff;
484       *blkaddr++ = ( bits2 >> 8) & 0xff;
485       *blkaddr++ = ( bits2 >> 16) & 0xff;
486       *blkaddr = bits2 >> 24;
487    }
488    else {
489       *blkaddr++ = color0 & 0xff;
490       *blkaddr++ = color0 >> 8;
491       *blkaddr++ = color1 & 0xff;
492       *blkaddr++ = color1 >> 8;
493       *blkaddr++ = bits & 0xff;
494       *blkaddr++ = ( bits >> 8) & 0xff;
495       *blkaddr++ = ( bits >> 16) & 0xff;
496       *blkaddr = bits >> 24;
497    }
498 }
499 
encodedxtcolorblockfaster(GLubyte * blkaddr,GLubyte srccolors[4][4][4],GLint numxpixels,GLint numypixels,GLuint type)500 static void encodedxtcolorblockfaster( GLubyte *blkaddr, GLubyte srccolors[4][4][4],
501                          GLint numxpixels, GLint numypixels, GLuint type )
502 {
503 /* simplistic approach. We need two base colors, simply use the "highest" and the "lowest" color
504    present in the picture as base colors */
505 
506    /* define lowest and highest color as shortest and longest vector to 0/0/0, though the
507       vectors are weighted similar to their importance in rgb-luminance conversion
508       doesn't work too well though...
509       This seems to be a rather difficult problem */
510 
511    GLubyte *bestcolor[2];
512    GLubyte basecolors[2][3];
513    GLubyte i, j;
514    GLuint lowcv, highcv, testcv;
515    GLboolean haveAlpha = GL_FALSE;
516 
517    lowcv = highcv = srccolors[0][0][0] * srccolors[0][0][0] * REDWEIGHT +
518                           srccolors[0][0][1] * srccolors[0][0][1] * GREENWEIGHT +
519                           srccolors[0][0][2] * srccolors[0][0][2] * BLUEWEIGHT;
520    bestcolor[0] = bestcolor[1] = srccolors[0][0];
521    for (j = 0; j < numypixels; j++) {
522       for (i = 0; i < numxpixels; i++) {
523          /* don't use this as a base color if the pixel will get black/transparent anyway */
524          if ((type != GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) || (srccolors[j][i][3] > ALPHACUT)) {
525             testcv = srccolors[j][i][0] * srccolors[j][i][0] * REDWEIGHT +
526                      srccolors[j][i][1] * srccolors[j][i][1] * GREENWEIGHT +
527                      srccolors[j][i][2] * srccolors[j][i][2] * BLUEWEIGHT;
528             if (testcv > highcv) {
529                highcv = testcv;
530                bestcolor[1] = srccolors[j][i];
531             }
532             else if (testcv < lowcv) {
533                lowcv = testcv;
534                bestcolor[0] = srccolors[j][i];
535             }
536          }
537          else haveAlpha = GL_TRUE;
538       }
539    }
540    /* make sure the original color values won't get touched... */
541    for (j = 0; j < 2; j++) {
542       for (i = 0; i < 3; i++) {
543          basecolors[j][i] = bestcolor[j][i];
544       }
545    }
546    bestcolor[0] = basecolors[0];
547    bestcolor[1] = basecolors[1];
548 
549    /* try to find better base colors */
550    fancybasecolorsearch(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
551    /* find the best encoding for these colors, and store the result */
552    storedxtencodedblock(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
553 }
554 
writedxt5encodedalphablock(GLubyte * blkaddr,GLubyte alphabase1,GLubyte alphabase2,GLubyte alphaenc[16])555 static void writedxt5encodedalphablock( GLubyte *blkaddr, GLubyte alphabase1, GLubyte alphabase2,
556                          GLubyte alphaenc[16])
557 {
558    *blkaddr++ = alphabase1;
559    *blkaddr++ = alphabase2;
560    *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
561    *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
562    *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
563    *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
564    *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
565    *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
566 }
567 
encodedxt5alpha(GLubyte * blkaddr,GLubyte srccolors[4][4][4],GLint numxpixels,GLint numypixels)568 static void encodedxt5alpha(GLubyte *blkaddr, GLubyte srccolors[4][4][4],
569                             GLint numxpixels, GLint numypixels)
570 {
571    GLubyte alphabase[2], alphause[2];
572    GLshort alphatest[2];
573    GLuint alphablockerror1, alphablockerror2, alphablockerror3;
574    GLubyte i, j, aindex, acutValues[7];
575    GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
576    GLboolean alphaabsmin = GL_FALSE;
577    GLboolean alphaabsmax = GL_FALSE;
578    GLshort alphadist;
579 
580    /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
581    alphabase[0] = 0xff; alphabase[1] = 0x0;
582    for (j = 0; j < numypixels; j++) {
583       for (i = 0; i < numxpixels; i++) {
584          if (srccolors[j][i][3] == 0)
585             alphaabsmin = GL_TRUE;
586          else if (srccolors[j][i][3] == 255)
587             alphaabsmax = GL_TRUE;
588          else {
589             if (srccolors[j][i][3] > alphabase[1])
590                alphabase[1] = srccolors[j][i][3];
591             if (srccolors[j][i][3] < alphabase[0])
592                alphabase[0] = srccolors[j][i][3];
593          }
594       }
595    }
596 
597 
598    if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
599       /* shortcut here since it is a very common case (and also avoids later problems) */
600       /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
601       /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
602 
603       *blkaddr++ = srccolors[0][0][3];
604       blkaddr++;
605       *blkaddr++ = 0;
606       *blkaddr++ = 0;
607       *blkaddr++ = 0;
608       *blkaddr++ = 0;
609       *blkaddr++ = 0;
610       *blkaddr++ = 0;
611 /*      fprintf(stderr, "enc0 used\n");*/
612       return;
613    }
614 
615    /* find best encoding for alpha0 > alpha1 */
616    /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
617    alphablockerror1 = 0x0;
618    alphablockerror2 = 0xffffffff;
619    alphablockerror3 = 0xffffffff;
620    if (alphaabsmin) alphause[0] = 0;
621    else alphause[0] = alphabase[0];
622    if (alphaabsmax) alphause[1] = 255;
623    else alphause[1] = alphabase[1];
624    /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
625    for (aindex = 0; aindex < 7; aindex++) {
626       /* don't forget here is always rounded down */
627       acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
628    }
629 
630    for (j = 0; j < numypixels; j++) {
631       for (i = 0; i < numxpixels; i++) {
632          /* maybe it's overkill to have the most complicated calculation just for the error
633             calculation which we only need to figure out if encoding1 or encoding2 is better... */
634          if (srccolors[j][i][3] > acutValues[0]) {
635             alphaenc1[4*j + i] = 0;
636             alphadist = srccolors[j][i][3] - alphause[1];
637          }
638          else if (srccolors[j][i][3] > acutValues[1]) {
639             alphaenc1[4*j + i] = 2;
640             alphadist = srccolors[j][i][3] - (alphause[1] * 6 + alphause[0] * 1) / 7;
641          }
642          else if (srccolors[j][i][3] > acutValues[2]) {
643             alphaenc1[4*j + i] = 3;
644             alphadist = srccolors[j][i][3] - (alphause[1] * 5 + alphause[0] * 2) / 7;
645          }
646          else if (srccolors[j][i][3] > acutValues[3]) {
647             alphaenc1[4*j + i] = 4;
648             alphadist = srccolors[j][i][3] - (alphause[1] * 4 + alphause[0] * 3) / 7;
649          }
650          else if (srccolors[j][i][3] > acutValues[4]) {
651             alphaenc1[4*j + i] = 5;
652             alphadist = srccolors[j][i][3] - (alphause[1] * 3 + alphause[0] * 4) / 7;
653          }
654          else if (srccolors[j][i][3] > acutValues[5]) {
655             alphaenc1[4*j + i] = 6;
656             alphadist = srccolors[j][i][3] - (alphause[1] * 2 + alphause[0] * 5) / 7;
657          }
658          else if (srccolors[j][i][3] > acutValues[6]) {
659             alphaenc1[4*j + i] = 7;
660             alphadist = srccolors[j][i][3] - (alphause[1] * 1 + alphause[0] * 6) / 7;
661          }
662          else {
663             alphaenc1[4*j + i] = 1;
664             alphadist = srccolors[j][i][3] - alphause[0];
665          }
666          alphablockerror1 += alphadist * alphadist;
667       }
668    }
669 /*      for (i = 0; i < 16; i++) {
670          fprintf(stderr, "%d ", alphaenc1[i]);
671       }
672       fprintf(stderr, "cutVals ");
673       for (i = 0; i < 8; i++) {
674          fprintf(stderr, "%d ", acutValues[i]);
675       }
676       fprintf(stderr, "srcVals ");
677       for (j = 0; j < numypixels; j++)
678          for (i = 0; i < numxpixels; i++) {
679             fprintf(stderr, "%d ", srccolors[j][i][3]);
680          }
681 
682       fprintf(stderr, "\n");
683    }*/
684    /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
685       are false but try it anyway */
686    if (alphablockerror1 >= 32) {
687 
688       /* don't bother if encoding is already very good, this condition should also imply
689       we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
690       alphablockerror2 = 0;
691       for (aindex = 0; aindex < 5; aindex++) {
692          /* don't forget here is always rounded down */
693          acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
694       }
695       for (j = 0; j < numypixels; j++) {
696          for (i = 0; i < numxpixels; i++) {
697              /* maybe it's overkill to have the most complicated calculation just for the error
698                calculation which we only need to figure out if encoding1 or encoding2 is better... */
699             if (srccolors[j][i][3] == 0) {
700                alphaenc2[4*j + i] = 6;
701                alphadist = 0;
702             }
703             else if (srccolors[j][i][3] == 255) {
704                alphaenc2[4*j + i] = 7;
705                alphadist = 0;
706             }
707             else if (srccolors[j][i][3] <= acutValues[0]) {
708                alphaenc2[4*j + i] = 0;
709                alphadist = srccolors[j][i][3] - alphabase[0];
710             }
711             else if (srccolors[j][i][3] <= acutValues[1]) {
712                alphaenc2[4*j + i] = 2;
713                alphadist = srccolors[j][i][3] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
714             }
715             else if (srccolors[j][i][3] <= acutValues[2]) {
716                alphaenc2[4*j + i] = 3;
717                alphadist = srccolors[j][i][3] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
718             }
719             else if (srccolors[j][i][3] <= acutValues[3]) {
720                alphaenc2[4*j + i] = 4;
721                alphadist = srccolors[j][i][3] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
722             }
723             else if (srccolors[j][i][3] <= acutValues[4]) {
724                alphaenc2[4*j + i] = 5;
725                alphadist = srccolors[j][i][3] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
726             }
727             else {
728                alphaenc2[4*j + i] = 1;
729                alphadist = srccolors[j][i][3] - alphabase[1];
730             }
731             alphablockerror2 += alphadist * alphadist;
732          }
733       }
734 
735 
736       /* skip this if the error is already very small
737          this encoding is MUCH better on average than #2 though, but expensive! */
738       if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
739          GLshort blockerrlin1 = 0;
740          GLshort blockerrlin2 = 0;
741          GLubyte nralphainrangelow = 0;
742          GLubyte nralphainrangehigh = 0;
743          alphatest[0] = 0xff;
744          alphatest[1] = 0x0;
745          /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
746          for (j = 0; j < numypixels; j++) {
747             for (i = 0; i < numxpixels; i++) {
748                if ((srccolors[j][i][3] > alphatest[1]) && (srccolors[j][i][3] < (255 -(alphabase[1] - alphabase[0]) / 28)))
749                   alphatest[1] = srccolors[j][i][3];
750                if ((srccolors[j][i][3] < alphatest[0]) && (srccolors[j][i][3] > (alphabase[1] - alphabase[0]) / 28))
751                   alphatest[0] = srccolors[j][i][3];
752             }
753          }
754           /* shouldn't happen too often, don't really care about those degenerated cases */
755           if (alphatest[1] <= alphatest[0]) {
756              alphatest[0] = 1;
757              alphatest[1] = 254;
758 /*             fprintf(stderr, "only 1 or 0 colors for encoding!\n");*/
759          }
760          for (aindex = 0; aindex < 5; aindex++) {
761          /* don't forget here is always rounded down */
762             acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
763          }
764 
765          /* find the "average" difference between the alpha values and the next encoded value.
766             This is then used to calculate new base values.
767             Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
768             since they will see more improvement, and also because the values in the middle are somewhat
769             likely to get no improvement at all (because the base values might move in different directions)?
770             OTOH it would mean the values in the middle are even less likely to get an improvement
771          */
772          for (j = 0; j < numypixels; j++) {
773             for (i = 0; i < numxpixels; i++) {
774                if (srccolors[j][i][3] <= alphatest[0] / 2) {
775                }
776                else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
777                }
778                else if (srccolors[j][i][3] <= acutValues[0]) {
779                   blockerrlin1 += (srccolors[j][i][3] - alphatest[0]);
780                   nralphainrangelow += 1;
781                }
782                else if (srccolors[j][i][3] <= acutValues[1]) {
783                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
784                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
785                   nralphainrangelow += 1;
786                   nralphainrangehigh += 1;
787                }
788                else if (srccolors[j][i][3] <= acutValues[2]) {
789                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
790                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
791                   nralphainrangelow += 1;
792                   nralphainrangehigh += 1;
793                }
794                else if (srccolors[j][i][3] <= acutValues[3]) {
795                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
796                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
797                   nralphainrangelow += 1;
798                   nralphainrangehigh += 1;
799                }
800                else if (srccolors[j][i][3] <= acutValues[4]) {
801                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
802                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
803                   nralphainrangelow += 1;
804                   nralphainrangehigh += 1;
805                   }
806                else {
807                   blockerrlin2 += (srccolors[j][i][3] - alphatest[1]);
808                   nralphainrangehigh += 1;
809                }
810             }
811          }
812          /* shouldn't happen often, needed to avoid div by zero */
813          if (nralphainrangelow == 0) nralphainrangelow = 1;
814          if (nralphainrangehigh == 0) nralphainrangehigh = 1;
815          alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
816 /*         fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
817          fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);*/
818          /* again shouldn't really happen often... */
819          if (alphatest[0] < 0) {
820             alphatest[0] = 0;
821 /*            fprintf(stderr, "adj alpha base val to 0\n");*/
822          }
823          alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
824          if (alphatest[1] > 255) {
825             alphatest[1] = 255;
826 /*            fprintf(stderr, "adj alpha base val to 255\n");*/
827          }
828 
829          alphablockerror3 = 0;
830          for (aindex = 0; aindex < 5; aindex++) {
831          /* don't forget here is always rounded down */
832             acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
833          }
834          for (j = 0; j < numypixels; j++) {
835             for (i = 0; i < numxpixels; i++) {
836                 /* maybe it's overkill to have the most complicated calculation just for the error
837                   calculation which we only need to figure out if encoding1 or encoding2 is better... */
838                if (srccolors[j][i][3] <= alphatest[0] / 2) {
839                   alphaenc3[4*j + i] = 6;
840                   alphadist = srccolors[j][i][3];
841                }
842                else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
843                   alphaenc3[4*j + i] = 7;
844                   alphadist = 255 - srccolors[j][i][3];
845                }
846                else if (srccolors[j][i][3] <= acutValues[0]) {
847                   alphaenc3[4*j + i] = 0;
848                   alphadist = srccolors[j][i][3] - alphatest[0];
849                }
850                else if (srccolors[j][i][3] <= acutValues[1]) {
851                  alphaenc3[4*j + i] = 2;
852                  alphadist = srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
853                }
854                else if (srccolors[j][i][3] <= acutValues[2]) {
855                   alphaenc3[4*j + i] = 3;
856                   alphadist = srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
857                }
858                else if (srccolors[j][i][3] <= acutValues[3]) {
859                   alphaenc3[4*j + i] = 4;
860                   alphadist = srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
861                }
862                else if (srccolors[j][i][3] <= acutValues[4]) {
863                   alphaenc3[4*j + i] = 5;
864                   alphadist = srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
865                }
866                else {
867                   alphaenc3[4*j + i] = 1;
868                   alphadist = srccolors[j][i][3] - alphatest[1];
869                }
870                alphablockerror3 += alphadist * alphadist;
871             }
872          }
873       }
874    }
875   /* write the alpha values and encoding back. */
876    if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
877 /*      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);*/
878       writedxt5encodedalphablock( blkaddr, alphause[1], alphause[0], alphaenc1 );
879    }
880    else if (alphablockerror2 <= alphablockerror3) {
881 /*      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);*/
882       writedxt5encodedalphablock( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
883    }
884    else {
885 /*      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);*/
886       writedxt5encodedalphablock( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
887    }
888 }
889 
extractsrccolors(GLubyte srcpixels[4][4][4],const GLchan * srcaddr,GLint srcRowStride,GLint numxpixels,GLint numypixels,GLint comps)890 static void extractsrccolors( GLubyte srcpixels[4][4][4], const GLchan *srcaddr,
891                          GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
892 {
893    GLubyte i, j, c;
894    const GLchan *curaddr;
895    for (j = 0; j < numypixels; j++) {
896       curaddr = srcaddr + j * srcRowStride * comps;
897       for (i = 0; i < numxpixels; i++) {
898          for (c = 0; c < comps; c++) {
899             srcpixels[j][i][c] = *curaddr++ / (CHAN_MAX / 255);
900          }
901       }
902    }
903 }
904 
905 
tx_compress_dxtn(GLint srccomps,GLint width,GLint height,const GLubyte * srcPixData,GLenum destFormat,GLubyte * dest,GLint dstRowStride)906 static void tx_compress_dxtn(GLint srccomps, GLint width, GLint height, const GLubyte *srcPixData,
907                      GLenum destFormat, GLubyte *dest, GLint dstRowStride)
908 {
909       GLubyte *blkaddr = dest;
910       GLubyte srcpixels[4][4][4];
911       const GLchan *srcaddr = srcPixData;
912       GLint numxpixels, numypixels;
913       GLint i, j;
914       GLint dstRowDiff;
915 
916    switch (destFormat) {
917    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
918    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
919       /* hmm we used to get called without dstRowStride... */
920       dstRowDiff = dstRowStride >= (width * 2) ? dstRowStride - (((width + 3) & ~3) * 2) : 0;
921 /*      fprintf(stderr, "dxt1 tex width %d tex height %d dstRowStride %d\n",
922               width, height, dstRowStride); */
923       for (j = 0; j < height; j += 4) {
924          if (height > j + 3) numypixels = 4;
925          else numypixels = height - j;
926          srcaddr = srcPixData + j * width * srccomps;
927          for (i = 0; i < width; i += 4) {
928             if (width > i + 3) numxpixels = 4;
929             else numxpixels = width - i;
930             extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
931             encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
932             srcaddr += srccomps * numxpixels;
933             blkaddr += 8;
934          }
935          blkaddr += dstRowDiff;
936       }
937       break;
938    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
939       dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
940 /*      fprintf(stderr, "dxt3 tex width %d tex height %d dstRowStride %d\n",
941               width, height, dstRowStride); */
942       for (j = 0; j < height; j += 4) {
943          if (height > j + 3) numypixels = 4;
944          else numypixels = height - j;
945          srcaddr = srcPixData + j * width * srccomps;
946          for (i = 0; i < width; i += 4) {
947             if (width > i + 3) numxpixels = 4;
948             else numxpixels = width - i;
949             extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
950             *blkaddr++ = (srcpixels[0][0][3] >> 4) | (srcpixels[0][1][3] & 0xf0);
951             *blkaddr++ = (srcpixels[0][2][3] >> 4) | (srcpixels[0][3][3] & 0xf0);
952             *blkaddr++ = (srcpixels[1][0][3] >> 4) | (srcpixels[1][1][3] & 0xf0);
953             *blkaddr++ = (srcpixels[1][2][3] >> 4) | (srcpixels[1][3][3] & 0xf0);
954             *blkaddr++ = (srcpixels[2][0][3] >> 4) | (srcpixels[2][1][3] & 0xf0);
955             *blkaddr++ = (srcpixels[2][2][3] >> 4) | (srcpixels[2][3][3] & 0xf0);
956             *blkaddr++ = (srcpixels[3][0][3] >> 4) | (srcpixels[3][1][3] & 0xf0);
957             *blkaddr++ = (srcpixels[3][2][3] >> 4) | (srcpixels[3][3][3] & 0xf0);
958             encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
959             srcaddr += srccomps * numxpixels;
960             blkaddr += 8;
961          }
962          blkaddr += dstRowDiff;
963       }
964       break;
965    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
966       dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
967 /*      fprintf(stderr, "dxt5 tex width %d tex height %d dstRowStride %d\n",
968               width, height, dstRowStride); */
969       for (j = 0; j < height; j += 4) {
970          if (height > j + 3) numypixels = 4;
971          else numypixels = height - j;
972          srcaddr = srcPixData + j * width * srccomps;
973          for (i = 0; i < width; i += 4) {
974             if (width > i + 3) numxpixels = 4;
975             else numxpixels = width - i;
976             extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
977             encodedxt5alpha(blkaddr, srcpixels, numxpixels, numypixels);
978             encodedxtcolorblockfaster(blkaddr + 8, srcpixels, numxpixels, numypixels, destFormat);
979             srcaddr += srccomps * numxpixels;
980             blkaddr += 16;
981          }
982          blkaddr += dstRowDiff;
983       }
984       break;
985    default:
986       assert(false);
987       return;
988    }
989 }
990