1 /*
2  * libtxc_dxtn
3  * Version:  1.0
4  *
5  * Copyright (C) 2004  Roland Scheidegger   All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included
15  * in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #ifndef TEXCOMPRESS_S3TC_TMP_H
26 #define TEXCOMPRESS_S3TC_TMP_H
27 
28 #include "util/glheader.h"
29 
30 typedef GLubyte GLchan;
31 #define UBYTE_TO_CHAN(b)  (b)
32 #define CHAN_MAX 255
33 #define RCOMP 0
34 #define GCOMP 1
35 #define BCOMP 2
36 #define ACOMP 3
37 
38 #define EXP5TO8R(packedcol)					\
39    ((((packedcol) >> 8) & 0xf8) | (((packedcol) >> 13) & 0x7))
40 
41 #define EXP6TO8G(packedcol)					\
42    ((((packedcol) >> 3) & 0xfc) | (((packedcol) >>  9) & 0x3))
43 
44 #define EXP5TO8B(packedcol)					\
45    ((((packedcol) << 3) & 0xf8) | (((packedcol) >>  2) & 0x7))
46 
47 #define EXP4TO8(col)						\
48    ((col) | ((col) << 4))
49 
50 /* inefficient. To be efficient, it would be necessary to decode 16 pixels at once */
51 
dxt135_decode_imageblock(const GLubyte * img_block_src,GLint i,GLint j,GLuint dxt_type,GLvoid * texel)52 static void dxt135_decode_imageblock ( const GLubyte *img_block_src,
53                          GLint i, GLint j, GLuint dxt_type, GLvoid *texel ) {
54    GLchan *rgba = (GLchan *) texel;
55    const GLushort color0 = img_block_src[0] | (img_block_src[1] << 8);
56    const GLushort color1 = img_block_src[2] | (img_block_src[3] << 8);
57    const GLuint bits = img_block_src[4] | (img_block_src[5] << 8) |
58       (img_block_src[6] << 16) | ((GLuint)img_block_src[7] << 24);
59    /* What about big/little endian? */
60    GLubyte bit_pos = 2 * (j * 4 + i) ;
61    GLubyte code = (GLubyte) ((bits >> bit_pos) & 3);
62 
63    rgba[ACOMP] = CHAN_MAX;
64    switch (code) {
65    case 0:
66       rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color0) );
67       rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color0) );
68       rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color0) );
69       break;
70    case 1:
71       rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color1) );
72       rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color1) );
73       rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color1) );
74       break;
75    case 2:
76       if ((dxt_type > 1) || (color0 > color1)) {
77          rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) * 2 + EXP5TO8R(color1)) / 3) );
78          rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) * 2 + EXP6TO8G(color1)) / 3) );
79          rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) * 2 + EXP5TO8B(color1)) / 3) );
80       }
81       else {
82          rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1)) / 2) );
83          rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1)) / 2) );
84          rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1)) / 2) );
85       }
86       break;
87    case 3:
88       if ((dxt_type > 1) || (color0 > color1)) {
89          rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1) * 2) / 3) );
90          rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1) * 2) / 3) );
91          rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1) * 2) / 3) );
92       }
93       else {
94          rgba[RCOMP] = 0;
95          rgba[GCOMP] = 0;
96          rgba[BCOMP] = 0;
97          if (dxt_type == 1) rgba[ACOMP] = UBYTE_TO_CHAN(0);
98       }
99       break;
100    default:
101    /* CANNOT happen (I hope) */
102       break;
103    }
104 }
105 
106 
fetch_2d_texel_rgb_dxt1(GLint srcRowStride,const GLubyte * pixdata,GLint i,GLint j,GLvoid * texel)107 static void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata,
108                          GLint i, GLint j, GLvoid *texel)
109 {
110    /* Extract the (i,j) pixel from pixdata and return it
111     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
112     */
113 
114    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
115    dxt135_decode_imageblock(blksrc, (i&3), (j&3), 0, texel);
116 }
117 
118 
fetch_2d_texel_rgba_dxt1(GLint srcRowStride,const GLubyte * pixdata,GLint i,GLint j,GLvoid * texel)119 static void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata,
120                          GLint i, GLint j, GLvoid *texel)
121 {
122    /* Extract the (i,j) pixel from pixdata and return it
123     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
124     */
125 
126    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
127    dxt135_decode_imageblock(blksrc, (i&3), (j&3), 1, texel);
128 }
129 
fetch_2d_texel_rgba_dxt3(GLint srcRowStride,const GLubyte * pixdata,GLint i,GLint j,GLvoid * texel)130 static void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata,
131                          GLint i, GLint j, GLvoid *texel) {
132 
133    /* Extract the (i,j) pixel from pixdata and return it
134     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
135     */
136 
137    GLchan *rgba = (GLchan *) texel;
138    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
139    const GLubyte anibble = (blksrc[((j&3) * 4 + (i&3)) / 2] >> (4 * (i&1))) & 0xf;
140    dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
141    rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8(anibble)) );
142 }
143 
fetch_2d_texel_rgba_dxt5(GLint srcRowStride,const GLubyte * pixdata,GLint i,GLint j,GLvoid * texel)144 static void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata,
145                          GLint i, GLint j, GLvoid *texel) {
146 
147    /* Extract the (i,j) pixel from pixdata and return it
148     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
149     */
150 
151    GLchan *rgba = (GLchan *) texel;
152    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
153    const GLubyte alpha0 = blksrc[0];
154    const GLubyte alpha1 = blksrc[1];
155    const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
156    const GLubyte acodelow = blksrc[2 + bit_pos / 8];
157    const GLubyte acodehigh = blksrc[3 + bit_pos / 8];
158    const GLubyte code = (acodelow >> (bit_pos & 0x7) |
159       (acodehigh  << (8 - (bit_pos & 0x7)))) & 0x7;
160    dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
161    if (code == 0)
162       rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
163    else if (code == 1)
164       rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
165    else if (alpha0 > alpha1)
166       rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
167    else if (code < 6)
168       rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
169    else if (code == 6)
170       rgba[ACOMP] = 0;
171    else
172       rgba[ACOMP] = CHAN_MAX;
173 }
174 
175 
176 /* weights used for error function, basically weights (unsquared 2/4/1) according to rgb->luminance conversion
177    not sure if this really reflects visual perception */
178 #define REDWEIGHT 4
179 #define GREENWEIGHT 16
180 #define BLUEWEIGHT 1
181 
182 #define ALPHACUT 127
183 
fancybasecolorsearch(UNUSED GLubyte * blkaddr,GLubyte srccolors[4][4][4],GLubyte * bestcolor[2],GLint numxpixels,GLint numypixels,UNUSED GLint type,UNUSED GLboolean haveAlpha)184 static void fancybasecolorsearch( UNUSED GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
185                            GLint numxpixels, GLint numypixels, UNUSED GLint type, UNUSED GLboolean haveAlpha)
186 {
187    /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
188 
189    /* TODO could also try to find a better encoding for the 3-color-encoding type, this really should be done
190       if it's rgba_dxt1 and we have alpha in the block, currently even values which will be mapped to black
191       due to their alpha value will influence the result */
192    GLint i, j, colors, z;
193    GLuint pixerror, pixerrorred, pixerrorgreen, pixerrorblue, pixerrorbest;
194    GLint colordist, blockerrlin[2][3];
195    GLubyte nrcolor[2];
196    GLint pixerrorcolorbest[3] = {0};
197    GLubyte enc = 0;
198    GLubyte cv[4][4];
199    GLubyte testcolor[2][3];
200 
201 /*   fprintf(stderr, "color begin 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
202       bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
203    if (((bestcolor[0][0] & 0xf8) << 8 | (bestcolor[0][1] & 0xfc) << 3 | bestcolor[0][2] >> 3) <
204       ((bestcolor[1][0] & 0xf8) << 8 | (bestcolor[1][1] & 0xfc) << 3 | bestcolor[1][2] >> 3)) {
205       testcolor[0][0] = bestcolor[0][0];
206       testcolor[0][1] = bestcolor[0][1];
207       testcolor[0][2] = bestcolor[0][2];
208       testcolor[1][0] = bestcolor[1][0];
209       testcolor[1][1] = bestcolor[1][1];
210       testcolor[1][2] = bestcolor[1][2];
211    }
212    else {
213       testcolor[1][0] = bestcolor[0][0];
214       testcolor[1][1] = bestcolor[0][1];
215       testcolor[1][2] = bestcolor[0][2];
216       testcolor[0][0] = bestcolor[1][0];
217       testcolor[0][1] = bestcolor[1][1];
218       testcolor[0][2] = bestcolor[1][2];
219    }
220 
221    for (i = 0; i < 3; i ++) {
222       cv[0][i] = testcolor[0][i];
223       cv[1][i] = testcolor[1][i];
224       cv[2][i] = (testcolor[0][i] * 2 + testcolor[1][i]) / 3;
225       cv[3][i] = (testcolor[0][i] + testcolor[1][i] * 2) / 3;
226    }
227 
228    blockerrlin[0][0] = 0;
229    blockerrlin[0][1] = 0;
230    blockerrlin[0][2] = 0;
231    blockerrlin[1][0] = 0;
232    blockerrlin[1][1] = 0;
233    blockerrlin[1][2] = 0;
234 
235    nrcolor[0] = 0;
236    nrcolor[1] = 0;
237 
238    for (j = 0; j < numypixels; j++) {
239       for (i = 0; i < numxpixels; i++) {
240          pixerrorbest = 0xffffffff;
241          for (colors = 0; colors < 4; colors++) {
242             colordist = srccolors[j][i][0] - (cv[colors][0]);
243             pixerror = colordist * colordist * REDWEIGHT;
244             pixerrorred = colordist;
245             colordist = srccolors[j][i][1] - (cv[colors][1]);
246             pixerror += colordist * colordist * GREENWEIGHT;
247             pixerrorgreen = colordist;
248             colordist = srccolors[j][i][2] - (cv[colors][2]);
249             pixerror += colordist * colordist * BLUEWEIGHT;
250             pixerrorblue = colordist;
251             if (pixerror < pixerrorbest) {
252                enc = colors;
253                pixerrorbest = pixerror;
254                pixerrorcolorbest[0] = pixerrorred;
255                pixerrorcolorbest[1] = pixerrorgreen;
256                pixerrorcolorbest[2] = pixerrorblue;
257             }
258          }
259          if (enc == 0) {
260             for (z = 0; z < 3; z++) {
261                blockerrlin[0][z] += 3 * pixerrorcolorbest[z];
262             }
263             nrcolor[0] += 3;
264          }
265          else if (enc == 2) {
266             for (z = 0; z < 3; z++) {
267                blockerrlin[0][z] += 2 * pixerrorcolorbest[z];
268             }
269             nrcolor[0] += 2;
270             for (z = 0; z < 3; z++) {
271                blockerrlin[1][z] += 1 * pixerrorcolorbest[z];
272             }
273             nrcolor[1] += 1;
274          }
275          else if (enc == 3) {
276             for (z = 0; z < 3; z++) {
277                blockerrlin[0][z] += 1 * pixerrorcolorbest[z];
278             }
279             nrcolor[0] += 1;
280             for (z = 0; z < 3; z++) {
281                blockerrlin[1][z] += 2 * pixerrorcolorbest[z];
282             }
283             nrcolor[1] += 2;
284          }
285          else if (enc == 1) {
286             for (z = 0; z < 3; z++) {
287                blockerrlin[1][z] += 3 * pixerrorcolorbest[z];
288             }
289             nrcolor[1] += 3;
290          }
291       }
292    }
293    if (nrcolor[0] == 0) nrcolor[0] = 1;
294    if (nrcolor[1] == 0) nrcolor[1] = 1;
295    for (j = 0; j < 2; j++) {
296       for (i = 0; i < 3; i++) {
297 	 GLint newvalue = testcolor[j][i] + blockerrlin[j][i] / nrcolor[j];
298 	 if (newvalue <= 0)
299 	    testcolor[j][i] = 0;
300 	 else if (newvalue >= 255)
301 	    testcolor[j][i] = 255;
302 	 else testcolor[j][i] = newvalue;
303       }
304    }
305 
306    if ((abs(testcolor[0][0] - testcolor[1][0]) < 8) &&
307        (abs(testcolor[0][1] - testcolor[1][1]) < 4) &&
308        (abs(testcolor[0][2] - testcolor[1][2]) < 8)) {
309        /* both colors are so close they might get encoded as the same 16bit values */
310       GLubyte coldiffred, coldiffgreen, coldiffblue, coldiffmax, factor, ind0, ind1;
311 
312       coldiffred = abs(testcolor[0][0] - testcolor[1][0]);
313       coldiffgreen = 2 * abs(testcolor[0][1] - testcolor[1][1]);
314       coldiffblue = abs(testcolor[0][2] - testcolor[1][2]);
315       coldiffmax = coldiffred;
316       if (coldiffmax < coldiffgreen) coldiffmax = coldiffgreen;
317       if (coldiffmax < coldiffblue) coldiffmax = coldiffblue;
318       if (coldiffmax > 0) {
319          if (coldiffmax > 4) factor = 2;
320          else if (coldiffmax > 2) factor = 3;
321          else factor = 4;
322          /* Won't do much if the color value is near 255... */
323          /* argh so many ifs */
324          if (testcolor[1][1] >= testcolor[0][1]) {
325             ind1 = 1; ind0 = 0;
326          }
327          else {
328             ind1 = 0; ind0 = 1;
329          }
330          if ((testcolor[ind1][1] + factor * coldiffgreen) <= 255)
331             testcolor[ind1][1] += factor * coldiffgreen;
332          else testcolor[ind1][1] = 255;
333          if ((testcolor[ind1][0] - testcolor[ind0][1]) > 0) {
334             if ((testcolor[ind1][0] + factor * coldiffred) <= 255)
335                testcolor[ind1][0] += factor * coldiffred;
336             else testcolor[ind1][0] = 255;
337          }
338          else {
339             if ((testcolor[ind0][0] + factor * coldiffred) <= 255)
340                testcolor[ind0][0] += factor * coldiffred;
341             else testcolor[ind0][0] = 255;
342          }
343          if ((testcolor[ind1][2] - testcolor[ind0][2]) > 0) {
344             if ((testcolor[ind1][2] + factor * coldiffblue) <= 255)
345                testcolor[ind1][2] += factor * coldiffblue;
346             else testcolor[ind1][2] = 255;
347          }
348          else {
349             if ((testcolor[ind0][2] + factor * coldiffblue) <= 255)
350                testcolor[ind0][2] += factor * coldiffblue;
351             else testcolor[ind0][2] = 255;
352          }
353       }
354    }
355 
356    if (((testcolor[0][0] & 0xf8) << 8 | (testcolor[0][1] & 0xfc) << 3 | testcolor[0][2] >> 3) <
357       ((testcolor[1][0] & 0xf8) << 8 | (testcolor[1][1] & 0xfc) << 3 | testcolor[1][2]) >> 3) {
358       for (i = 0; i < 3; i++) {
359          bestcolor[0][i] = testcolor[0][i];
360          bestcolor[1][i] = testcolor[1][i];
361       }
362    }
363    else {
364       for (i = 0; i < 3; i++) {
365          bestcolor[0][i] = testcolor[1][i];
366          bestcolor[1][i] = testcolor[0][i];
367       }
368    }
369 
370 /*     fprintf(stderr, "color end 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
371      bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
372 }
373 
374 
375 
storedxtencodedblock(GLubyte * blkaddr,GLubyte srccolors[4][4][4],GLubyte * bestcolor[2],GLint numxpixels,GLint numypixels,GLuint type,GLboolean haveAlpha)376 static void storedxtencodedblock( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
377                            GLint numxpixels, GLint numypixels, GLuint type, GLboolean haveAlpha)
378 {
379    /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
380 
381    GLint i, j, colors;
382    GLuint testerror, testerror2, pixerror, pixerrorbest;
383    GLint colordist;
384    GLushort color0, color1, tempcolor;
385    GLuint bits = 0, bits2 = 0;
386    GLubyte *colorptr;
387    GLubyte enc = 0;
388    GLubyte cv[4][4];
389 
390    bestcolor[0][0] = bestcolor[0][0] & 0xf8;
391    bestcolor[0][1] = bestcolor[0][1] & 0xfc;
392    bestcolor[0][2] = bestcolor[0][2] & 0xf8;
393    bestcolor[1][0] = bestcolor[1][0] & 0xf8;
394    bestcolor[1][1] = bestcolor[1][1] & 0xfc;
395    bestcolor[1][2] = bestcolor[1][2] & 0xf8;
396 
397    color0 = bestcolor[0][0] << 8 | bestcolor[0][1] << 3 | bestcolor[0][2] >> 3;
398    color1 = bestcolor[1][0] << 8 | bestcolor[1][1] << 3 | bestcolor[1][2] >> 3;
399    if (color0 < color1) {
400       tempcolor = color0; color0 = color1; color1 = tempcolor;
401       colorptr = bestcolor[0]; bestcolor[0] = bestcolor[1]; bestcolor[1] = colorptr;
402    }
403 
404 
405    for (i = 0; i < 3; i++) {
406       cv[0][i] = bestcolor[0][i];
407       cv[1][i] = bestcolor[1][i];
408       cv[2][i] = (bestcolor[0][i] * 2 + bestcolor[1][i]) / 3;
409       cv[3][i] = (bestcolor[0][i] + bestcolor[1][i] * 2) / 3;
410    }
411 
412    testerror = 0;
413    for (j = 0; j < numypixels; j++) {
414       for (i = 0; i < numxpixels; i++) {
415          pixerrorbest = 0xffffffff;
416          for (colors = 0; colors < 4; colors++) {
417             colordist = srccolors[j][i][0] - cv[colors][0];
418             pixerror = colordist * colordist * REDWEIGHT;
419             colordist = srccolors[j][i][1] - cv[colors][1];
420             pixerror += colordist * colordist * GREENWEIGHT;
421             colordist = srccolors[j][i][2] - cv[colors][2];
422             pixerror += colordist * colordist * BLUEWEIGHT;
423             if (pixerror < pixerrorbest) {
424                pixerrorbest = pixerror;
425                enc = colors;
426             }
427          }
428          testerror += pixerrorbest;
429          bits |= (uint32_t)enc << (2 * (j * 4 + i));
430       }
431    }
432    /* some hw might disagree but actually decoding should always use 4-color encoding
433       for non-dxt1 formats */
434    if (type == GL_COMPRESSED_RGB_S3TC_DXT1_EXT || type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
435       for (i = 0; i < 3; i++) {
436          cv[2][i] = (bestcolor[0][i] + bestcolor[1][i]) / 2;
437          /* this isn't used. Looks like the black color constant can only be used
438             with RGB_DXT1 if I read the spec correctly (note though that the radeon gpu disagrees,
439             it will decode 3 to black even with DXT3/5), and due to how the color searching works
440             it won't get used even then */
441          cv[3][i] = 0;
442       }
443       testerror2 = 0;
444       for (j = 0; j < numypixels; j++) {
445          for (i = 0; i < numxpixels; i++) {
446             pixerrorbest = 0xffffffff;
447             if ((type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) && (srccolors[j][i][3] <= ALPHACUT)) {
448                enc = 3;
449                pixerrorbest = 0; /* don't calculate error */
450             }
451             else {
452                /* we're calculating the same what we have done already for colors 0-1 above... */
453                for (colors = 0; colors < 3; colors++) {
454                   colordist = srccolors[j][i][0] - cv[colors][0];
455                   pixerror = colordist * colordist * REDWEIGHT;
456                   colordist = srccolors[j][i][1] - cv[colors][1];
457                   pixerror += colordist * colordist * GREENWEIGHT;
458                   colordist = srccolors[j][i][2] - cv[colors][2];
459                   pixerror += colordist * colordist * BLUEWEIGHT;
460                   if (pixerror < pixerrorbest) {
461                      pixerrorbest = pixerror;
462                      /* need to exchange colors later */
463                      if (colors > 1) enc = colors;
464                      else enc = colors ^ 1;
465                   }
466                }
467             }
468             testerror2 += pixerrorbest;
469             bits2 |= (uint32_t)enc << (2 * (j * 4 + i));
470          }
471       }
472    } else {
473       testerror2 = 0xffffffff;
474    }
475 
476    /* finally we're finished, write back colors and bits */
477    if ((testerror > testerror2) || (haveAlpha)) {
478       *blkaddr++ = color1 & 0xff;
479       *blkaddr++ = color1 >> 8;
480       *blkaddr++ = color0 & 0xff;
481       *blkaddr++ = color0 >> 8;
482       *blkaddr++ = bits2 & 0xff;
483       *blkaddr++ = ( bits2 >> 8) & 0xff;
484       *blkaddr++ = ( bits2 >> 16) & 0xff;
485       *blkaddr = bits2 >> 24;
486    }
487    else {
488       *blkaddr++ = color0 & 0xff;
489       *blkaddr++ = color0 >> 8;
490       *blkaddr++ = color1 & 0xff;
491       *blkaddr++ = color1 >> 8;
492       *blkaddr++ = bits & 0xff;
493       *blkaddr++ = ( bits >> 8) & 0xff;
494       *blkaddr++ = ( bits >> 16) & 0xff;
495       *blkaddr = bits >> 24;
496    }
497 }
498 
encodedxtcolorblockfaster(GLubyte * blkaddr,GLubyte srccolors[4][4][4],GLint numxpixels,GLint numypixels,GLuint type)499 static void encodedxtcolorblockfaster( GLubyte *blkaddr, GLubyte srccolors[4][4][4],
500                          GLint numxpixels, GLint numypixels, GLuint type )
501 {
502 /* simplistic approach. We need two base colors, simply use the "highest" and the "lowest" color
503    present in the picture as base colors */
504 
505    /* define lowest and highest color as shortest and longest vector to 0/0/0, though the
506       vectors are weighted similar to their importance in rgb-luminance conversion
507       doesn't work too well though...
508       This seems to be a rather difficult problem */
509 
510    GLubyte *bestcolor[2];
511    GLubyte basecolors[2][3];
512    GLubyte i, j;
513    GLuint lowcv, highcv, testcv;
514    GLboolean haveAlpha = GL_FALSE;
515 
516    lowcv = highcv = srccolors[0][0][0] * srccolors[0][0][0] * REDWEIGHT +
517                           srccolors[0][0][1] * srccolors[0][0][1] * GREENWEIGHT +
518                           srccolors[0][0][2] * srccolors[0][0][2] * BLUEWEIGHT;
519    bestcolor[0] = bestcolor[1] = srccolors[0][0];
520    for (j = 0; j < numypixels; j++) {
521       for (i = 0; i < numxpixels; i++) {
522          /* don't use this as a base color if the pixel will get black/transparent anyway */
523          if ((type != GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) || (srccolors[j][i][3] > ALPHACUT)) {
524             testcv = srccolors[j][i][0] * srccolors[j][i][0] * REDWEIGHT +
525                      srccolors[j][i][1] * srccolors[j][i][1] * GREENWEIGHT +
526                      srccolors[j][i][2] * srccolors[j][i][2] * BLUEWEIGHT;
527             if (testcv > highcv) {
528                highcv = testcv;
529                bestcolor[1] = srccolors[j][i];
530             }
531             else if (testcv < lowcv) {
532                lowcv = testcv;
533                bestcolor[0] = srccolors[j][i];
534             }
535          }
536          else haveAlpha = GL_TRUE;
537       }
538    }
539    /* make sure the original color values won't get touched... */
540    for (j = 0; j < 2; j++) {
541       for (i = 0; i < 3; i++) {
542          basecolors[j][i] = bestcolor[j][i];
543       }
544    }
545    bestcolor[0] = basecolors[0];
546    bestcolor[1] = basecolors[1];
547 
548    /* try to find better base colors */
549    fancybasecolorsearch(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
550    /* find the best encoding for these colors, and store the result */
551    storedxtencodedblock(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
552 }
553 
writedxt5encodedalphablock(GLubyte * blkaddr,GLubyte alphabase1,GLubyte alphabase2,GLubyte alphaenc[16])554 static void writedxt5encodedalphablock( GLubyte *blkaddr, GLubyte alphabase1, GLubyte alphabase2,
555                          GLubyte alphaenc[16])
556 {
557    *blkaddr++ = alphabase1;
558    *blkaddr++ = alphabase2;
559    *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
560    *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
561    *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
562    *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
563    *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
564    *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
565 }
566 
encodedxt5alpha(GLubyte * blkaddr,GLubyte srccolors[4][4][4],GLint numxpixels,GLint numypixels)567 static void encodedxt5alpha(GLubyte *blkaddr, GLubyte srccolors[4][4][4],
568                             GLint numxpixels, GLint numypixels)
569 {
570    GLubyte alphabase[2], alphause[2];
571    GLshort alphatest[2];
572    GLuint alphablockerror1, alphablockerror2, alphablockerror3;
573    GLubyte i, j, aindex, acutValues[7];
574    GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
575    GLboolean alphaabsmin = GL_FALSE;
576    GLboolean alphaabsmax = GL_FALSE;
577    GLshort alphadist;
578 
579    /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
580    alphabase[0] = 0xff; alphabase[1] = 0x0;
581    for (j = 0; j < numypixels; j++) {
582       for (i = 0; i < numxpixels; i++) {
583          if (srccolors[j][i][3] == 0)
584             alphaabsmin = GL_TRUE;
585          else if (srccolors[j][i][3] == 255)
586             alphaabsmax = GL_TRUE;
587          else {
588             if (srccolors[j][i][3] > alphabase[1])
589                alphabase[1] = srccolors[j][i][3];
590             if (srccolors[j][i][3] < alphabase[0])
591                alphabase[0] = srccolors[j][i][3];
592          }
593       }
594    }
595 
596 
597    if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
598       /* shortcut here since it is a very common case (and also avoids later problems) */
599       /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
600       /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
601 
602       *blkaddr++ = srccolors[0][0][3];
603       blkaddr++;
604       *blkaddr++ = 0;
605       *blkaddr++ = 0;
606       *blkaddr++ = 0;
607       *blkaddr++ = 0;
608       *blkaddr++ = 0;
609       *blkaddr++ = 0;
610 /*      fprintf(stderr, "enc0 used\n");*/
611       return;
612    }
613 
614    /* find best encoding for alpha0 > alpha1 */
615    /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
616    alphablockerror1 = 0x0;
617    alphablockerror2 = 0xffffffff;
618    alphablockerror3 = 0xffffffff;
619    if (alphaabsmin) alphause[0] = 0;
620    else alphause[0] = alphabase[0];
621    if (alphaabsmax) alphause[1] = 255;
622    else alphause[1] = alphabase[1];
623    /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
624    for (aindex = 0; aindex < 7; aindex++) {
625       /* don't forget here is always rounded down */
626       acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
627    }
628 
629    for (j = 0; j < numypixels; j++) {
630       for (i = 0; i < numxpixels; i++) {
631          /* maybe it's overkill to have the most complicated calculation just for the error
632             calculation which we only need to figure out if encoding1 or encoding2 is better... */
633          if (srccolors[j][i][3] > acutValues[0]) {
634             alphaenc1[4*j + i] = 0;
635             alphadist = srccolors[j][i][3] - alphause[1];
636          }
637          else if (srccolors[j][i][3] > acutValues[1]) {
638             alphaenc1[4*j + i] = 2;
639             alphadist = srccolors[j][i][3] - (alphause[1] * 6 + alphause[0] * 1) / 7;
640          }
641          else if (srccolors[j][i][3] > acutValues[2]) {
642             alphaenc1[4*j + i] = 3;
643             alphadist = srccolors[j][i][3] - (alphause[1] * 5 + alphause[0] * 2) / 7;
644          }
645          else if (srccolors[j][i][3] > acutValues[3]) {
646             alphaenc1[4*j + i] = 4;
647             alphadist = srccolors[j][i][3] - (alphause[1] * 4 + alphause[0] * 3) / 7;
648          }
649          else if (srccolors[j][i][3] > acutValues[4]) {
650             alphaenc1[4*j + i] = 5;
651             alphadist = srccolors[j][i][3] - (alphause[1] * 3 + alphause[0] * 4) / 7;
652          }
653          else if (srccolors[j][i][3] > acutValues[5]) {
654             alphaenc1[4*j + i] = 6;
655             alphadist = srccolors[j][i][3] - (alphause[1] * 2 + alphause[0] * 5) / 7;
656          }
657          else if (srccolors[j][i][3] > acutValues[6]) {
658             alphaenc1[4*j + i] = 7;
659             alphadist = srccolors[j][i][3] - (alphause[1] * 1 + alphause[0] * 6) / 7;
660          }
661          else {
662             alphaenc1[4*j + i] = 1;
663             alphadist = srccolors[j][i][3] - alphause[0];
664          }
665          alphablockerror1 += alphadist * alphadist;
666       }
667    }
668 /*      for (i = 0; i < 16; i++) {
669          fprintf(stderr, "%d ", alphaenc1[i]);
670       }
671       fprintf(stderr, "cutVals ");
672       for (i = 0; i < 8; i++) {
673          fprintf(stderr, "%d ", acutValues[i]);
674       }
675       fprintf(stderr, "srcVals ");
676       for (j = 0; j < numypixels; j++)
677          for (i = 0; i < numxpixels; i++) {
678             fprintf(stderr, "%d ", srccolors[j][i][3]);
679          }
680 
681       fprintf(stderr, "\n");
682    }*/
683    /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
684       are false but try it anyway */
685    if (alphablockerror1 >= 32) {
686 
687       /* don't bother if encoding is already very good, this condition should also imply
688       we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
689       alphablockerror2 = 0;
690       for (aindex = 0; aindex < 5; aindex++) {
691          /* don't forget here is always rounded down */
692          acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
693       }
694       for (j = 0; j < numypixels; j++) {
695          for (i = 0; i < numxpixels; i++) {
696              /* maybe it's overkill to have the most complicated calculation just for the error
697                calculation which we only need to figure out if encoding1 or encoding2 is better... */
698             if (srccolors[j][i][3] == 0) {
699                alphaenc2[4*j + i] = 6;
700                alphadist = 0;
701             }
702             else if (srccolors[j][i][3] == 255) {
703                alphaenc2[4*j + i] = 7;
704                alphadist = 0;
705             }
706             else if (srccolors[j][i][3] <= acutValues[0]) {
707                alphaenc2[4*j + i] = 0;
708                alphadist = srccolors[j][i][3] - alphabase[0];
709             }
710             else if (srccolors[j][i][3] <= acutValues[1]) {
711                alphaenc2[4*j + i] = 2;
712                alphadist = srccolors[j][i][3] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
713             }
714             else if (srccolors[j][i][3] <= acutValues[2]) {
715                alphaenc2[4*j + i] = 3;
716                alphadist = srccolors[j][i][3] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
717             }
718             else if (srccolors[j][i][3] <= acutValues[3]) {
719                alphaenc2[4*j + i] = 4;
720                alphadist = srccolors[j][i][3] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
721             }
722             else if (srccolors[j][i][3] <= acutValues[4]) {
723                alphaenc2[4*j + i] = 5;
724                alphadist = srccolors[j][i][3] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
725             }
726             else {
727                alphaenc2[4*j + i] = 1;
728                alphadist = srccolors[j][i][3] - alphabase[1];
729             }
730             alphablockerror2 += alphadist * alphadist;
731          }
732       }
733 
734 
735       /* skip this if the error is already very small
736          this encoding is MUCH better on average than #2 though, but expensive! */
737       if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
738          GLshort blockerrlin1 = 0;
739          GLshort blockerrlin2 = 0;
740          GLubyte nralphainrangelow = 0;
741          GLubyte nralphainrangehigh = 0;
742          alphatest[0] = 0xff;
743          alphatest[1] = 0x0;
744          /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
745          for (j = 0; j < numypixels; j++) {
746             for (i = 0; i < numxpixels; i++) {
747                if ((srccolors[j][i][3] > alphatest[1]) && (srccolors[j][i][3] < (255 -(alphabase[1] - alphabase[0]) / 28)))
748                   alphatest[1] = srccolors[j][i][3];
749                if ((srccolors[j][i][3] < alphatest[0]) && (srccolors[j][i][3] > (alphabase[1] - alphabase[0]) / 28))
750                   alphatest[0] = srccolors[j][i][3];
751             }
752          }
753           /* shouldn't happen too often, don't really care about those degenerated cases */
754           if (alphatest[1] <= alphatest[0]) {
755              alphatest[0] = 1;
756              alphatest[1] = 254;
757 /*             fprintf(stderr, "only 1 or 0 colors for encoding!\n");*/
758          }
759          for (aindex = 0; aindex < 5; aindex++) {
760          /* don't forget here is always rounded down */
761             acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
762          }
763 
764          /* find the "average" difference between the alpha values and the next encoded value.
765             This is then used to calculate new base values.
766             Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
767             since they will see more improvement, and also because the values in the middle are somewhat
768             likely to get no improvement at all (because the base values might move in different directions)?
769             OTOH it would mean the values in the middle are even less likely to get an improvement
770          */
771          for (j = 0; j < numypixels; j++) {
772             for (i = 0; i < numxpixels; i++) {
773                if (srccolors[j][i][3] <= alphatest[0] / 2) {
774                }
775                else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
776                }
777                else if (srccolors[j][i][3] <= acutValues[0]) {
778                   blockerrlin1 += (srccolors[j][i][3] - alphatest[0]);
779                   nralphainrangelow += 1;
780                }
781                else if (srccolors[j][i][3] <= acutValues[1]) {
782                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
783                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
784                   nralphainrangelow += 1;
785                   nralphainrangehigh += 1;
786                }
787                else if (srccolors[j][i][3] <= acutValues[2]) {
788                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
789                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
790                   nralphainrangelow += 1;
791                   nralphainrangehigh += 1;
792                }
793                else if (srccolors[j][i][3] <= acutValues[3]) {
794                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
795                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
796                   nralphainrangelow += 1;
797                   nralphainrangehigh += 1;
798                }
799                else if (srccolors[j][i][3] <= acutValues[4]) {
800                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
801                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
802                   nralphainrangelow += 1;
803                   nralphainrangehigh += 1;
804                   }
805                else {
806                   blockerrlin2 += (srccolors[j][i][3] - alphatest[1]);
807                   nralphainrangehigh += 1;
808                }
809             }
810          }
811          /* shouldn't happen often, needed to avoid div by zero */
812          if (nralphainrangelow == 0) nralphainrangelow = 1;
813          if (nralphainrangehigh == 0) nralphainrangehigh = 1;
814          alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
815 /*         fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
816          fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);*/
817          /* again shouldn't really happen often... */
818          if (alphatest[0] < 0) {
819             alphatest[0] = 0;
820 /*            fprintf(stderr, "adj alpha base val to 0\n");*/
821          }
822          alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
823          if (alphatest[1] > 255) {
824             alphatest[1] = 255;
825 /*            fprintf(stderr, "adj alpha base val to 255\n");*/
826          }
827 
828          alphablockerror3 = 0;
829          for (aindex = 0; aindex < 5; aindex++) {
830          /* don't forget here is always rounded down */
831             acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
832          }
833          for (j = 0; j < numypixels; j++) {
834             for (i = 0; i < numxpixels; i++) {
835                 /* maybe it's overkill to have the most complicated calculation just for the error
836                   calculation which we only need to figure out if encoding1 or encoding2 is better... */
837                if (srccolors[j][i][3] <= alphatest[0] / 2) {
838                   alphaenc3[4*j + i] = 6;
839                   alphadist = srccolors[j][i][3];
840                }
841                else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
842                   alphaenc3[4*j + i] = 7;
843                   alphadist = 255 - srccolors[j][i][3];
844                }
845                else if (srccolors[j][i][3] <= acutValues[0]) {
846                   alphaenc3[4*j + i] = 0;
847                   alphadist = srccolors[j][i][3] - alphatest[0];
848                }
849                else if (srccolors[j][i][3] <= acutValues[1]) {
850                  alphaenc3[4*j + i] = 2;
851                  alphadist = srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
852                }
853                else if (srccolors[j][i][3] <= acutValues[2]) {
854                   alphaenc3[4*j + i] = 3;
855                   alphadist = srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
856                }
857                else if (srccolors[j][i][3] <= acutValues[3]) {
858                   alphaenc3[4*j + i] = 4;
859                   alphadist = srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
860                }
861                else if (srccolors[j][i][3] <= acutValues[4]) {
862                   alphaenc3[4*j + i] = 5;
863                   alphadist = srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
864                }
865                else {
866                   alphaenc3[4*j + i] = 1;
867                   alphadist = srccolors[j][i][3] - alphatest[1];
868                }
869                alphablockerror3 += alphadist * alphadist;
870             }
871          }
872       }
873    }
874   /* write the alpha values and encoding back. */
875    if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
876 /*      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);*/
877       writedxt5encodedalphablock( blkaddr, alphause[1], alphause[0], alphaenc1 );
878    }
879    else if (alphablockerror2 <= alphablockerror3) {
880 /*      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);*/
881       writedxt5encodedalphablock( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
882    }
883    else {
884 /*      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);*/
885       writedxt5encodedalphablock( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
886    }
887 }
888 
extractsrccolors(GLubyte srcpixels[4][4][4],const GLchan * srcaddr,GLint srcRowStride,GLint numxpixels,GLint numypixels,GLint comps)889 static void extractsrccolors( GLubyte srcpixels[4][4][4], const GLchan *srcaddr,
890                          GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
891 {
892    GLubyte i, j, c;
893    const GLchan *curaddr;
894    for (j = 0; j < numypixels; j++) {
895       curaddr = srcaddr + j * srcRowStride * comps;
896       for (i = 0; i < numxpixels; i++) {
897          for (c = 0; c < comps; c++) {
898             srcpixels[j][i][c] = *curaddr++ / (CHAN_MAX / 255);
899          }
900       }
901    }
902 }
903 
904 
905 static void
tx_compress_dxt1(int srccomps,int width,int height,const GLubyte * srcPixData,GLubyte * dest,int dstRowStride,unsigned dstComps)906 tx_compress_dxt1(int srccomps, int width, int height,
907                  const GLubyte *srcPixData, GLubyte *dest, int dstRowStride,
908                  unsigned dstComps)
909 {
910    GLenum destFormat = dstComps == 3 ? GL_COMPRESSED_RGB_S3TC_DXT1_EXT
911                                      : GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
912    GLubyte *blkaddr = dest;
913    GLubyte srcpixels[4][4][4];
914    const GLchan *srcaddr = srcPixData;
915    int numxpixels, numypixels;
916 
917    /* hmm we used to get called without dstRowStride... */
918    int dstRowDiff = dstRowStride >= (width * 2) ?
919                     dstRowStride - (((width + 3) & ~3) * 2) : 0;
920    /* fprintf(stderr, "dxt1 tex width %d tex height %d dstRowStride %d\n",
921               width, height, dstRowStride); */
922    for (int j = 0; j < height; j += 4) {
923       if (height > j + 3) numypixels = 4;
924       else numypixels = height - j;
925       srcaddr = srcPixData + j * width * srccomps;
926       for (int i = 0; i < width; i += 4) {
927          if (width > i + 3) numxpixels = 4;
928          else numxpixels = width - i;
929          extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
930          encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
931          srcaddr += srccomps * numxpixels;
932          blkaddr += 8;
933       }
934       blkaddr += dstRowDiff;
935    }
936 }
937 
938 static void
tx_compress_dxt3(int srccomps,int width,int height,const GLubyte * srcPixData,GLubyte * dest,int dstRowStride)939 tx_compress_dxt3(int srccomps, int width, int height,
940                  const GLubyte *srcPixData, GLubyte *dest, int dstRowStride)
941 {
942    GLenum destFormat = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
943    GLubyte *blkaddr = dest;
944    GLubyte srcpixels[4][4][4];
945    const GLchan *srcaddr = srcPixData;
946    int numxpixels, numypixels;
947 
948    int dstRowDiff = dstRowStride >= (width * 4) ?
949                     dstRowStride - (((width + 3) & ~3) * 4) : 0;
950    /* fprintf(stderr, "dxt3 tex width %d tex height %d dstRowStride %d\n",
951               width, height, dstRowStride); */
952    for (int j = 0; j < height; j += 4) {
953       if (height > j + 3) numypixels = 4;
954       else numypixels = height - j;
955       srcaddr = srcPixData + j * width * srccomps;
956       for (int i = 0; i < width; i += 4) {
957          if (width > i + 3) numxpixels = 4;
958          else numxpixels = width - i;
959          extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
960          *blkaddr++ = (srcpixels[0][0][3] >> 4) | (srcpixels[0][1][3] & 0xf0);
961          *blkaddr++ = (srcpixels[0][2][3] >> 4) | (srcpixels[0][3][3] & 0xf0);
962          *blkaddr++ = (srcpixels[1][0][3] >> 4) | (srcpixels[1][1][3] & 0xf0);
963          *blkaddr++ = (srcpixels[1][2][3] >> 4) | (srcpixels[1][3][3] & 0xf0);
964          *blkaddr++ = (srcpixels[2][0][3] >> 4) | (srcpixels[2][1][3] & 0xf0);
965          *blkaddr++ = (srcpixels[2][2][3] >> 4) | (srcpixels[2][3][3] & 0xf0);
966          *blkaddr++ = (srcpixels[3][0][3] >> 4) | (srcpixels[3][1][3] & 0xf0);
967          *blkaddr++ = (srcpixels[3][2][3] >> 4) | (srcpixels[3][3][3] & 0xf0);
968          encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
969          srcaddr += srccomps * numxpixels;
970          blkaddr += 8;
971       }
972       blkaddr += dstRowDiff;
973    }
974 }
975 
976 static void
tx_compress_dxt5(int srccomps,int width,int height,const GLubyte * srcPixData,GLubyte * dest,int dstRowStride)977 tx_compress_dxt5(int srccomps, int width, int height,
978                  const GLubyte *srcPixData, GLubyte *dest, int dstRowStride)
979 {
980    GLenum destFormat = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
981    GLubyte *blkaddr = dest;
982    GLubyte srcpixels[4][4][4];
983    const GLchan *srcaddr = srcPixData;
984    int numxpixels, numypixels;
985 
986    int dstRowDiff = dstRowStride >= (width * 4) ?
987                     dstRowStride - (((width + 3) & ~3) * 4) : 0;
988    /* fprintf(stderr, "dxt5 tex width %d tex height %d dstRowStride %d\n",
989               width, height, dstRowStride); */
990    for (int j = 0; j < height; j += 4) {
991       if (height > j + 3) numypixels = 4;
992       else numypixels = height - j;
993       srcaddr = srcPixData + j * width * srccomps;
994       for (int i = 0; i < width; i += 4) {
995          if (width > i + 3) numxpixels = 4;
996          else numxpixels = width - i;
997          extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
998          encodedxt5alpha(blkaddr, srcpixels, numxpixels, numypixels);
999          encodedxtcolorblockfaster(blkaddr + 8, srcpixels, numxpixels, numypixels, destFormat);
1000          srcaddr += srccomps * numxpixels;
1001          blkaddr += 16;
1002       }
1003       blkaddr += dstRowDiff;
1004    }
1005 }
1006 
1007 static void
tx_compress_dxtn(GLint srccomps,GLint width,GLint height,const GLubyte * srcPixData,GLenum destFormat,GLubyte * dest,GLint dstRowStride)1008 tx_compress_dxtn(GLint srccomps, GLint width, GLint height,
1009                  const GLubyte *srcPixData, GLenum destFormat,
1010                  GLubyte *dest, GLint dstRowStride)
1011 {
1012    switch (destFormat) {
1013    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
1014       tx_compress_dxt1(srccomps, width, height, srcPixData,
1015                        dest, dstRowStride, 3);
1016       break;
1017    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
1018       tx_compress_dxt1(srccomps, width, height, srcPixData,
1019                        dest, dstRowStride, 4);
1020       break;
1021    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
1022       tx_compress_dxt3(srccomps, width, height, srcPixData,
1023                        dest, dstRowStride);
1024       break;
1025    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
1026       tx_compress_dxt5(srccomps, width, height, srcPixData,
1027                        dest, dstRowStride);
1028       break;
1029    default:
1030       unreachable("unknown DXTn format");
1031    }
1032 }
1033 
1034 #endif
1035