1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 
26 /**
27  * \file texcompress_fxt1.c
28  * GL_3DFX_texture_compression_FXT1 support.
29  */
30 
31 
32 #include "glheader.h"
33 #include "imports.h"
34 #include "image.h"
35 #include "macros.h"
36 #include "mipmap.h"
37 #include "texcompress.h"
38 #include "texcompress_fxt1.h"
39 #include "texstore.h"
40 
41 
42 static void
43 fxt1_encode (GLuint width, GLuint height, GLint comps,
44              const void *source, GLint srcRowStride,
45              void *dest, GLint destRowStride);
46 
47 static void
48 fxt1_decode_1 (const void *texture, GLint stride,
49                GLint i, GLint j, GLubyte *rgba);
50 
51 
52 /**
53  * Store user's image in rgb_fxt1 format.
54  */
55 GLboolean
_mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)56 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
57 {
58    const GLubyte *pixels;
59    GLint srcRowStride;
60    GLubyte *dst;
61    const GLubyte *tempImage = NULL;
62 
63    assert(dstFormat == MESA_FORMAT_RGB_FXT1);
64 
65    if (srcFormat != GL_RGB ||
66        srcType != GL_UNSIGNED_BYTE ||
67        ctx->_ImageTransferState ||
68        ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
69        srcPacking->SwapBytes) {
70       /* convert image to RGB/GLubyte */
71       GLubyte *tempImageSlices[1];
72       int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
73       tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
74       if (!tempImage)
75          return GL_FALSE; /* out of memory */
76       tempImageSlices[0] = (GLubyte *) tempImage;
77       _mesa_texstore(ctx, dims,
78                      baseInternalFormat,
79                      MESA_FORMAT_RGB_UNORM8,
80                      rgbRowStride, tempImageSlices,
81                      srcWidth, srcHeight, srcDepth,
82                      srcFormat, srcType, srcAddr,
83                      srcPacking);
84       pixels = tempImage;
85       srcRowStride = 3 * srcWidth;
86       srcFormat = GL_RGB;
87    }
88    else {
89       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
90                                      srcFormat, srcType, 0, 0);
91 
92       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
93                                             srcType) / sizeof(GLubyte);
94    }
95 
96    dst = dstSlices[0];
97 
98    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
99                dst, dstRowStride);
100 
101    free((void*) tempImage);
102 
103    return GL_TRUE;
104 }
105 
106 
107 /**
108  * Store user's image in rgba_fxt1 format.
109  */
110 GLboolean
_mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)111 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
112 {
113    const GLubyte *pixels;
114    GLint srcRowStride;
115    GLubyte *dst;
116    const GLubyte *tempImage = NULL;
117 
118    assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
119 
120    if (srcFormat != GL_RGBA ||
121        srcType != GL_UNSIGNED_BYTE ||
122        ctx->_ImageTransferState ||
123        srcPacking->SwapBytes) {
124       /* convert image to RGBA/GLubyte */
125       GLubyte *tempImageSlices[1];
126       int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
127       tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
128       if (!tempImage)
129          return GL_FALSE; /* out of memory */
130       tempImageSlices[0] = (GLubyte *) tempImage;
131       _mesa_texstore(ctx, dims,
132                      baseInternalFormat,
133                      _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
134                                            : MESA_FORMAT_A8B8G8R8_UNORM,
135                      rgbaRowStride, tempImageSlices,
136                      srcWidth, srcHeight, srcDepth,
137                      srcFormat, srcType, srcAddr,
138                      srcPacking);
139       pixels = tempImage;
140       srcRowStride = 4 * srcWidth;
141       srcFormat = GL_RGBA;
142    }
143    else {
144       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
145                                      srcFormat, srcType, 0, 0);
146 
147       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
148                                             srcType) / sizeof(GLubyte);
149    }
150 
151    dst = dstSlices[0];
152 
153    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
154                dst, dstRowStride);
155 
156    free((void*) tempImage);
157 
158    return GL_TRUE;
159 }
160 
161 
162 /***************************************************************************\
163  * FXT1 encoder
164  *
165  * The encoder was built by reversing the decoder,
166  * and is vaguely based on Texus2 by 3dfx. Note that this code
167  * is merely a proof of concept, since it is highly UNoptimized;
168  * moreover, it is sub-optimal due to initial conditions passed
169  * to Lloyd's algorithm (the interpolation modes are even worse).
170 \***************************************************************************/
171 
172 
173 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
174 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
175 #define N_TEXELS 32 /* number of texels in a block (always 32) */
176 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
177 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
178 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
179 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
180 static const GLuint zero = 0;
181 #define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
182 
183 /*
184  * Define a 64-bit unsigned integer type and macros
185  */
186 #if 1
187 
188 #define FX64_NATIVE 1
189 
190 typedef uint64_t Fx64;
191 
192 #define FX64_MOV32(a, b) a = b
193 #define FX64_OR32(a, b)  a |= b
194 #define FX64_SHL(a, c)   a <<= c
195 
196 #else
197 
198 #define FX64_NATIVE 0
199 
200 typedef struct {
201    GLuint lo, hi;
202 } Fx64;
203 
204 #define FX64_MOV32(a, b) a.lo = b
205 #define FX64_OR32(a, b)  a.lo |= b
206 
207 #define FX64_SHL(a, c)                                 \
208    do {                                                \
209        if ((c) >= 32) {                                \
210           a.hi = a.lo << ((c) - 32);                   \
211           a.lo = 0;                                    \
212        } else {                                        \
213           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
214           a.lo <<= (c);                                \
215        }                                               \
216    } while (0)
217 
218 #endif
219 
220 
221 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
222 #define SAFECDOT 1 /* for paranoids */
223 
224 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
225    do {                                  \
226       /* compute interpolation vector */ \
227       GLfloat d2 = 0.0F;                 \
228       GLfloat rd2;                       \
229                                          \
230       for (i = 0; i < NC; i++) {         \
231          IV[i] = (V1[i] - V0[i]) * F(i); \
232          d2 += IV[i] * IV[i];            \
233       }                                  \
234       rd2 = (GLfloat)NV / d2;            \
235       B = 0;                             \
236       for (i = 0; i < NC; i++) {         \
237          IV[i] *= F(i);                  \
238          B -= IV[i] * V0[i];             \
239          IV[i] *= rd2;                   \
240       }                                  \
241       B = B * rd2 + 0.5f;                \
242    } while (0)
243 
244 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
245    do {                                  \
246       GLfloat dot = 0.0F;                \
247       for (i = 0; i < NC; i++) {         \
248          dot += V[i] * IV[i];            \
249       }                                  \
250       TEXEL = (GLint)(dot + B);          \
251       if (SAFECDOT) {                    \
252          if (TEXEL < 0) {                \
253             TEXEL = 0;                   \
254          } else if (TEXEL > NV) {        \
255             TEXEL = NV;                  \
256          }                               \
257       }                                  \
258    } while (0)
259 
260 
261 static GLint
fxt1_bestcol(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[MAX_COMP],GLint nc)262 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
263               GLubyte input[MAX_COMP], GLint nc)
264 {
265    GLint i, j, best = -1;
266    GLfloat err = 1e9; /* big enough */
267 
268    for (j = 0; j < nv; j++) {
269       GLfloat e = 0.0F;
270       for (i = 0; i < nc; i++) {
271          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
272       }
273       if (e < err) {
274          err = e;
275          best = j;
276       }
277    }
278 
279    return best;
280 }
281 
282 
283 static GLint
fxt1_worst(GLfloat vec[MAX_COMP],GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)284 fxt1_worst (GLfloat vec[MAX_COMP],
285             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
286 {
287    GLint i, k, worst = -1;
288    GLfloat err = -1.0F; /* small enough */
289 
290    for (k = 0; k < n; k++) {
291       GLfloat e = 0.0F;
292       for (i = 0; i < nc; i++) {
293          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
294       }
295       if (e > err) {
296          err = e;
297          worst = k;
298       }
299    }
300 
301    return worst;
302 }
303 
304 
305 static GLint
fxt1_variance(GLdouble variance[MAX_COMP],GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)306 fxt1_variance (GLdouble variance[MAX_COMP],
307                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
308 {
309    GLint i, k, best = 0;
310    GLint sx, sx2;
311    GLdouble var, maxvar = -1; /* small enough */
312    GLdouble teenth = 1.0 / n;
313 
314    for (i = 0; i < nc; i++) {
315       sx = sx2 = 0;
316       for (k = 0; k < n; k++) {
317          GLint t = input[k][i];
318          sx += t;
319          sx2 += t * t;
320       }
321       var = sx2 * teenth - sx * sx * teenth * teenth;
322       if (maxvar < var) {
323          maxvar = var;
324          best = i;
325       }
326       if (variance) {
327          variance[i] = var;
328       }
329    }
330 
331    return best;
332 }
333 
334 
335 static GLint
fxt1_choose(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)336 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
337              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
338 {
339 #if 0
340    /* Choose colors from a grid.
341     */
342    GLint i, j;
343 
344    for (j = 0; j < nv; j++) {
345       GLint m = j * (n - 1) / (nv - 1);
346       for (i = 0; i < nc; i++) {
347          vec[j][i] = input[m][i];
348       }
349    }
350 #else
351    /* Our solution here is to find the darkest and brightest colors in
352     * the 8x4 tile and use those as the two representative colors.
353     * There are probably better algorithms to use (histogram-based).
354     */
355    GLint i, j, k;
356    GLint minSum = 2000; /* big enough */
357    GLint maxSum = -1; /* small enough */
358    GLint minCol = 0; /* phoudoin: silent compiler! */
359    GLint maxCol = 0; /* phoudoin: silent compiler! */
360 
361    struct {
362       GLint flag;
363       GLint key;
364       GLint freq;
365       GLint idx;
366    } hist[N_TEXELS];
367    GLint lenh = 0;
368 
369    memset(hist, 0, sizeof(hist));
370 
371    for (k = 0; k < n; k++) {
372       GLint l;
373       GLint key = 0;
374       GLint sum = 0;
375       for (i = 0; i < nc; i++) {
376          key <<= 8;
377          key |= input[k][i];
378          sum += input[k][i];
379       }
380       for (l = 0; l < n; l++) {
381          if (!hist[l].flag) {
382             /* alloc new slot */
383             hist[l].flag = !0;
384             hist[l].key = key;
385             hist[l].freq = 1;
386             hist[l].idx = k;
387             lenh = l + 1;
388             break;
389          } else if (hist[l].key == key) {
390             hist[l].freq++;
391             break;
392          }
393       }
394       if (minSum > sum) {
395          minSum = sum;
396          minCol = k;
397       }
398       if (maxSum < sum) {
399          maxSum = sum;
400          maxCol = k;
401       }
402    }
403 
404    if (lenh <= nv) {
405       for (j = 0; j < lenh; j++) {
406          for (i = 0; i < nc; i++) {
407             vec[j][i] = (GLfloat)input[hist[j].idx][i];
408          }
409       }
410       for (; j < nv; j++) {
411          for (i = 0; i < nc; i++) {
412             vec[j][i] = vec[0][i];
413          }
414       }
415       return 0;
416    }
417 
418    for (j = 0; j < nv; j++) {
419       for (i = 0; i < nc; i++) {
420          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
421       }
422    }
423 #endif
424 
425    return !0;
426 }
427 
428 
429 static GLint
fxt1_lloyd(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)430 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
431             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
432 {
433    /* Use the generalized lloyd's algorithm for VQ:
434     *     find 4 color vectors.
435     *
436     *     for each sample color
437     *         sort to nearest vector.
438     *
439     *     replace each vector with the centroid of its matching colors.
440     *
441     *     repeat until RMS doesn't improve.
442     *
443     *     if a color vector has no samples, or becomes the same as another
444     *     vector, replace it with the color which is farthest from a sample.
445     *
446     * vec[][MAX_COMP]           initial vectors and resulting colors
447     * nv                        number of resulting colors required
448     * input[N_TEXELS][MAX_COMP] input texels
449     * nc                        number of components in input / vec
450     * n                         number of input samples
451     */
452 
453    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
454    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
455    GLfloat error, lasterror = 1e9;
456 
457    GLint i, j, k, rep;
458 
459    /* the quantizer */
460    for (rep = 0; rep < LL_N_REP; rep++) {
461       /* reset sums & counters */
462       for (j = 0; j < nv; j++) {
463          for (i = 0; i < nc; i++) {
464             sum[j][i] = 0;
465          }
466          cnt[j] = 0;
467       }
468       error = 0;
469 
470       /* scan whole block */
471       for (k = 0; k < n; k++) {
472 #if 1
473          GLint best = -1;
474          GLfloat err = 1e9; /* big enough */
475          /* determine best vector */
476          for (j = 0; j < nv; j++) {
477             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
478                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
479                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
480             if (nc == 4) {
481                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
482             }
483             if (e < err) {
484                err = e;
485                best = j;
486             }
487          }
488 #else
489          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
490 #endif
491          assert(best >= 0);
492          /* add in closest color */
493          for (i = 0; i < nc; i++) {
494             sum[best][i] += input[k][i];
495          }
496          /* mark this vector as used */
497          cnt[best]++;
498          /* accumulate error */
499          error += err;
500       }
501 
502       /* check RMS */
503       if ((error < LL_RMS_E) ||
504           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
505          return !0; /* good match */
506       }
507       lasterror = error;
508 
509       /* move each vector to the barycenter of its closest colors */
510       for (j = 0; j < nv; j++) {
511          if (cnt[j]) {
512             GLfloat div = 1.0F / cnt[j];
513             for (i = 0; i < nc; i++) {
514                vec[j][i] = div * sum[j][i];
515             }
516          } else {
517             /* this vec has no samples or is identical with a previous vec */
518             GLint worst = fxt1_worst(vec[j], input, nc, n);
519             for (i = 0; i < nc; i++) {
520                vec[j][i] = input[worst][i];
521             }
522          }
523       }
524    }
525 
526    return 0; /* could not converge fast enough */
527 }
528 
529 
530 static void
fxt1_quantize_CHROMA(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])531 fxt1_quantize_CHROMA (GLuint *cc,
532                       GLubyte input[N_TEXELS][MAX_COMP])
533 {
534    const GLint n_vect = 4; /* 4 base vectors to find */
535    const GLint n_comp = 3; /* 3 components: R, G, B */
536    GLfloat vec[MAX_VECT][MAX_COMP];
537    GLint i, j, k;
538    Fx64 hi; /* high quadword */
539    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
540 
541    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
542       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
543    }
544 
545    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
546    for (j = n_vect - 1; j >= 0; j--) {
547       for (i = 0; i < n_comp; i++) {
548          /* add in colors */
549          FX64_SHL(hi, 5);
550          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
551       }
552    }
553    ((Fx64 *)cc)[1] = hi;
554 
555    lohi = lolo = 0;
556    /* right microtile */
557    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
558       lohi <<= 2;
559       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
560    }
561    /* left microtile */
562    for (; k >= 0; k--) {
563       lolo <<= 2;
564       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
565    }
566    cc[1] = lohi;
567    cc[0] = lolo;
568 }
569 
570 
571 static void
fxt1_quantize_ALPHA0(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP],GLubyte reord[N_TEXELS][MAX_COMP],GLint n)572 fxt1_quantize_ALPHA0 (GLuint *cc,
573                       GLubyte input[N_TEXELS][MAX_COMP],
574                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
575 {
576    const GLint n_vect = 3; /* 3 base vectors to find */
577    const GLint n_comp = 4; /* 4 components: R, G, B, A */
578    GLfloat vec[MAX_VECT][MAX_COMP];
579    GLint i, j, k;
580    Fx64 hi; /* high quadword */
581    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
582 
583    /* the last vector indicates zero */
584    for (i = 0; i < n_comp; i++) {
585       vec[n_vect][i] = 0;
586    }
587 
588    /* the first n texels in reord are guaranteed to be non-zero */
589    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
590       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
591    }
592 
593    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
594    for (j = n_vect - 1; j >= 0; j--) {
595       /* add in alphas */
596       FX64_SHL(hi, 5);
597       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
598    }
599    for (j = n_vect - 1; j >= 0; j--) {
600       for (i = 0; i < n_comp - 1; i++) {
601          /* add in colors */
602          FX64_SHL(hi, 5);
603          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
604       }
605    }
606    ((Fx64 *)cc)[1] = hi;
607 
608    lohi = lolo = 0;
609    /* right microtile */
610    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
611       lohi <<= 2;
612       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
613    }
614    /* left microtile */
615    for (; k >= 0; k--) {
616       lolo <<= 2;
617       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
618    }
619    cc[1] = lohi;
620    cc[0] = lolo;
621 }
622 
623 
624 static void
fxt1_quantize_ALPHA1(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])625 fxt1_quantize_ALPHA1 (GLuint *cc,
626                       GLubyte input[N_TEXELS][MAX_COMP])
627 {
628    const GLint n_vect = 3; /* highest vector number in each microtile */
629    const GLint n_comp = 4; /* 4 components: R, G, B, A */
630    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
631    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
632    GLint i, j, k;
633    Fx64 hi; /* high quadword */
634    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
635 
636    GLint minSum;
637    GLint maxSum;
638    GLint minColL = 0, maxColL = 0;
639    GLint minColR = 0, maxColR = 0;
640    GLint sumL = 0, sumR = 0;
641    GLint nn_comp;
642    /* Our solution here is to find the darkest and brightest colors in
643     * the 4x4 tile and use those as the two representative colors.
644     * There are probably better algorithms to use (histogram-based).
645     */
646    nn_comp = n_comp;
647    while ((minColL == maxColL) && nn_comp) {
648        minSum = 2000; /* big enough */
649        maxSum = -1; /* small enough */
650        for (k = 0; k < N_TEXELS / 2; k++) {
651            GLint sum = 0;
652            for (i = 0; i < nn_comp; i++) {
653                sum += input[k][i];
654            }
655            if (minSum > sum) {
656                minSum = sum;
657                minColL = k;
658            }
659            if (maxSum < sum) {
660                maxSum = sum;
661                maxColL = k;
662            }
663            sumL += sum;
664        }
665 
666        nn_comp--;
667    }
668 
669    nn_comp = n_comp;
670    while ((minColR == maxColR) && nn_comp) {
671        minSum = 2000; /* big enough */
672        maxSum = -1; /* small enough */
673        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
674            GLint sum = 0;
675            for (i = 0; i < nn_comp; i++) {
676                sum += input[k][i];
677            }
678            if (minSum > sum) {
679                minSum = sum;
680                minColR = k;
681            }
682            if (maxSum < sum) {
683                maxSum = sum;
684                maxColR = k;
685            }
686            sumR += sum;
687        }
688 
689        nn_comp--;
690    }
691 
692    /* choose the common vector (yuck!) */
693    {
694       GLint j1, j2;
695       GLint v1 = 0, v2 = 0;
696       GLfloat err = 1e9; /* big enough */
697       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
698       for (i = 0; i < n_comp; i++) {
699          tv[0][i] = input[minColL][i];
700          tv[1][i] = input[maxColL][i];
701          tv[2][i] = input[minColR][i];
702          tv[3][i] = input[maxColR][i];
703       }
704       for (j1 = 0; j1 < 2; j1++) {
705          for (j2 = 2; j2 < 4; j2++) {
706             GLfloat e = 0.0F;
707             for (i = 0; i < n_comp; i++) {
708                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
709             }
710             if (e < err) {
711                err = e;
712                v1 = j1;
713                v2 = j2;
714             }
715          }
716       }
717       for (i = 0; i < n_comp; i++) {
718          vec[0][i] = tv[1 - v1][i];
719          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
720          vec[2][i] = tv[5 - v2][i];
721       }
722    }
723 
724    /* left microtile */
725    cc[0] = 0;
726    if (minColL != maxColL) {
727       /* compute interpolation vector */
728       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
729 
730       /* add in texels */
731       lolo = 0;
732       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
733          GLint texel;
734          /* interpolate color */
735          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
736          /* add in texel */
737          lolo <<= 2;
738          lolo |= texel;
739       }
740 
741       cc[0] = lolo;
742    }
743 
744    /* right microtile */
745    cc[1] = 0;
746    if (minColR != maxColR) {
747       /* compute interpolation vector */
748       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
749 
750       /* add in texels */
751       lohi = 0;
752       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
753          GLint texel;
754          /* interpolate color */
755          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
756          /* add in texel */
757          lohi <<= 2;
758          lohi |= texel;
759       }
760 
761       cc[1] = lohi;
762    }
763 
764    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
765    for (j = n_vect - 1; j >= 0; j--) {
766       /* add in alphas */
767       FX64_SHL(hi, 5);
768       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
769    }
770    for (j = n_vect - 1; j >= 0; j--) {
771       for (i = 0; i < n_comp - 1; i++) {
772          /* add in colors */
773          FX64_SHL(hi, 5);
774          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
775       }
776    }
777    ((Fx64 *)cc)[1] = hi;
778 }
779 
780 
781 static void
fxt1_quantize_HI(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP],GLubyte reord[N_TEXELS][MAX_COMP],GLint n)782 fxt1_quantize_HI (GLuint *cc,
783                   GLubyte input[N_TEXELS][MAX_COMP],
784                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
785 {
786    const GLint n_vect = 6; /* highest vector number */
787    const GLint n_comp = 3; /* 3 components: R, G, B */
788    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
789    GLfloat iv[MAX_COMP];   /* interpolation vector */
790    GLint i, k;
791    GLuint hihi; /* high quadword: hi dword */
792 
793    GLint minSum = 2000; /* big enough */
794    GLint maxSum = -1; /* small enough */
795    GLint minCol = 0; /* phoudoin: silent compiler! */
796    GLint maxCol = 0; /* phoudoin: silent compiler! */
797 
798    /* Our solution here is to find the darkest and brightest colors in
799     * the 8x4 tile and use those as the two representative colors.
800     * There are probably better algorithms to use (histogram-based).
801     */
802    for (k = 0; k < n; k++) {
803       GLint sum = 0;
804       for (i = 0; i < n_comp; i++) {
805          sum += reord[k][i];
806       }
807       if (minSum > sum) {
808          minSum = sum;
809          minCol = k;
810       }
811       if (maxSum < sum) {
812          maxSum = sum;
813          maxCol = k;
814       }
815    }
816 
817    hihi = 0; /* cc-hi = "00" */
818    for (i = 0; i < n_comp; i++) {
819       /* add in colors */
820       hihi <<= 5;
821       hihi |= reord[maxCol][i] >> 3;
822    }
823    for (i = 0; i < n_comp; i++) {
824       /* add in colors */
825       hihi <<= 5;
826       hihi |= reord[minCol][i] >> 3;
827    }
828    cc[3] = hihi;
829    cc[0] = cc[1] = cc[2] = 0;
830 
831    /* compute interpolation vector */
832    if (minCol != maxCol) {
833       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
834    }
835 
836    /* add in texels */
837    for (k = N_TEXELS - 1; k >= 0; k--) {
838       GLint t = k * 3;
839       GLuint *kk = (GLuint *)((char *)cc + t / 8);
840       GLint texel = n_vect + 1; /* transparent black */
841 
842       if (!ISTBLACK(input[k])) {
843          if (minCol != maxCol) {
844             /* interpolate color */
845             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
846             /* add in texel */
847             kk[0] |= texel << (t & 7);
848          }
849       } else {
850          /* add in texel */
851          kk[0] |= texel << (t & 7);
852       }
853    }
854 }
855 
856 
857 static void
fxt1_quantize_MIXED1(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])858 fxt1_quantize_MIXED1 (GLuint *cc,
859                       GLubyte input[N_TEXELS][MAX_COMP])
860 {
861    const GLint n_vect = 2; /* highest vector number in each microtile */
862    const GLint n_comp = 3; /* 3 components: R, G, B */
863    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
864    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
865    GLint i, j, k;
866    Fx64 hi; /* high quadword */
867    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
868 
869    GLint minSum;
870    GLint maxSum;
871    GLint minColL = 0, maxColL = -1;
872    GLint minColR = 0, maxColR = -1;
873 
874    /* Our solution here is to find the darkest and brightest colors in
875     * the 4x4 tile and use those as the two representative colors.
876     * There are probably better algorithms to use (histogram-based).
877     */
878    minSum = 2000; /* big enough */
879    maxSum = -1; /* small enough */
880    for (k = 0; k < N_TEXELS / 2; k++) {
881       if (!ISTBLACK(input[k])) {
882          GLint sum = 0;
883          for (i = 0; i < n_comp; i++) {
884             sum += input[k][i];
885          }
886          if (minSum > sum) {
887             minSum = sum;
888             minColL = k;
889          }
890          if (maxSum < sum) {
891             maxSum = sum;
892             maxColL = k;
893          }
894       }
895    }
896    minSum = 2000; /* big enough */
897    maxSum = -1; /* small enough */
898    for (; k < N_TEXELS; k++) {
899       if (!ISTBLACK(input[k])) {
900          GLint sum = 0;
901          for (i = 0; i < n_comp; i++) {
902             sum += input[k][i];
903          }
904          if (minSum > sum) {
905             minSum = sum;
906             minColR = k;
907          }
908          if (maxSum < sum) {
909             maxSum = sum;
910             maxColR = k;
911          }
912       }
913    }
914 
915    /* left microtile */
916    if (maxColL == -1) {
917       /* all transparent black */
918       cc[0] = ~0u;
919       for (i = 0; i < n_comp; i++) {
920          vec[0][i] = 0;
921          vec[1][i] = 0;
922       }
923    } else {
924       cc[0] = 0;
925       for (i = 0; i < n_comp; i++) {
926          vec[0][i] = input[minColL][i];
927          vec[1][i] = input[maxColL][i];
928       }
929       if (minColL != maxColL) {
930          /* compute interpolation vector */
931          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
932 
933          /* add in texels */
934          lolo = 0;
935          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
936             GLint texel = n_vect + 1; /* transparent black */
937             if (!ISTBLACK(input[k])) {
938                /* interpolate color */
939                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
940             }
941             /* add in texel */
942             lolo <<= 2;
943             lolo |= texel;
944          }
945          cc[0] = lolo;
946       }
947    }
948 
949    /* right microtile */
950    if (maxColR == -1) {
951       /* all transparent black */
952       cc[1] = ~0u;
953       for (i = 0; i < n_comp; i++) {
954          vec[2][i] = 0;
955          vec[3][i] = 0;
956       }
957    } else {
958       cc[1] = 0;
959       for (i = 0; i < n_comp; i++) {
960          vec[2][i] = input[minColR][i];
961          vec[3][i] = input[maxColR][i];
962       }
963       if (minColR != maxColR) {
964          /* compute interpolation vector */
965          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
966 
967          /* add in texels */
968          lohi = 0;
969          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
970             GLint texel = n_vect + 1; /* transparent black */
971             if (!ISTBLACK(input[k])) {
972                /* interpolate color */
973                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
974             }
975             /* add in texel */
976             lohi <<= 2;
977             lohi |= texel;
978          }
979          cc[1] = lohi;
980       }
981    }
982 
983    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
984    for (j = 2 * 2 - 1; j >= 0; j--) {
985       for (i = 0; i < n_comp; i++) {
986          /* add in colors */
987          FX64_SHL(hi, 5);
988          FX64_OR32(hi, vec[j][i] >> 3);
989       }
990    }
991    ((Fx64 *)cc)[1] = hi;
992 }
993 
994 
995 static void
fxt1_quantize_MIXED0(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])996 fxt1_quantize_MIXED0 (GLuint *cc,
997                       GLubyte input[N_TEXELS][MAX_COMP])
998 {
999    const GLint n_vect = 3; /* highest vector number in each microtile */
1000    const GLint n_comp = 3; /* 3 components: R, G, B */
1001    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1002    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1003    GLint i, j, k;
1004    Fx64 hi; /* high quadword */
1005    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1006 
1007    GLint minColL = 0, maxColL = 0;
1008    GLint minColR = 0, maxColR = 0;
1009 #if 0
1010    GLint minSum;
1011    GLint maxSum;
1012 
1013    /* Our solution here is to find the darkest and brightest colors in
1014     * the 4x4 tile and use those as the two representative colors.
1015     * There are probably better algorithms to use (histogram-based).
1016     */
1017    minSum = 2000; /* big enough */
1018    maxSum = -1; /* small enough */
1019    for (k = 0; k < N_TEXELS / 2; k++) {
1020       GLint sum = 0;
1021       for (i = 0; i < n_comp; i++) {
1022          sum += input[k][i];
1023       }
1024       if (minSum > sum) {
1025          minSum = sum;
1026          minColL = k;
1027       }
1028       if (maxSum < sum) {
1029          maxSum = sum;
1030          maxColL = k;
1031       }
1032    }
1033    minSum = 2000; /* big enough */
1034    maxSum = -1; /* small enough */
1035    for (; k < N_TEXELS; k++) {
1036       GLint sum = 0;
1037       for (i = 0; i < n_comp; i++) {
1038          sum += input[k][i];
1039       }
1040       if (minSum > sum) {
1041          minSum = sum;
1042          minColR = k;
1043       }
1044       if (maxSum < sum) {
1045          maxSum = sum;
1046          maxColR = k;
1047       }
1048    }
1049 #else
1050    GLint minVal;
1051    GLint maxVal;
1052    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1053    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1054 
1055    /* Scan the channel with max variance for lo & hi
1056     * and use those as the two representative colors.
1057     */
1058    minVal = 2000; /* big enough */
1059    maxVal = -1; /* small enough */
1060    for (k = 0; k < N_TEXELS / 2; k++) {
1061       GLint t = input[k][maxVarL];
1062       if (minVal > t) {
1063          minVal = t;
1064          minColL = k;
1065       }
1066       if (maxVal < t) {
1067          maxVal = t;
1068          maxColL = k;
1069       }
1070    }
1071    minVal = 2000; /* big enough */
1072    maxVal = -1; /* small enough */
1073    for (; k < N_TEXELS; k++) {
1074       GLint t = input[k][maxVarR];
1075       if (minVal > t) {
1076          minVal = t;
1077          minColR = k;
1078       }
1079       if (maxVal < t) {
1080          maxVal = t;
1081          maxColR = k;
1082       }
1083    }
1084 #endif
1085 
1086    /* left microtile */
1087    cc[0] = 0;
1088    for (i = 0; i < n_comp; i++) {
1089       vec[0][i] = input[minColL][i];
1090       vec[1][i] = input[maxColL][i];
1091    }
1092    if (minColL != maxColL) {
1093       /* compute interpolation vector */
1094       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1095 
1096       /* add in texels */
1097       lolo = 0;
1098       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1099          GLint texel;
1100          /* interpolate color */
1101          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1102          /* add in texel */
1103          lolo <<= 2;
1104          lolo |= texel;
1105       }
1106 
1107       /* funky encoding for LSB of green */
1108       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1109          for (i = 0; i < n_comp; i++) {
1110             vec[1][i] = input[minColL][i];
1111             vec[0][i] = input[maxColL][i];
1112          }
1113          lolo = ~lolo;
1114       }
1115 
1116       cc[0] = lolo;
1117    }
1118 
1119    /* right microtile */
1120    cc[1] = 0;
1121    for (i = 0; i < n_comp; i++) {
1122       vec[2][i] = input[minColR][i];
1123       vec[3][i] = input[maxColR][i];
1124    }
1125    if (minColR != maxColR) {
1126       /* compute interpolation vector */
1127       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1128 
1129       /* add in texels */
1130       lohi = 0;
1131       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1132          GLint texel;
1133          /* interpolate color */
1134          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1135          /* add in texel */
1136          lohi <<= 2;
1137          lohi |= texel;
1138       }
1139 
1140       /* funky encoding for LSB of green */
1141       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1142          for (i = 0; i < n_comp; i++) {
1143             vec[3][i] = input[minColR][i];
1144             vec[2][i] = input[maxColR][i];
1145          }
1146          lohi = ~lohi;
1147       }
1148 
1149       cc[1] = lohi;
1150    }
1151 
1152    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1153    for (j = 2 * 2 - 1; j >= 0; j--) {
1154       for (i = 0; i < n_comp; i++) {
1155          /* add in colors */
1156          FX64_SHL(hi, 5);
1157          FX64_OR32(hi, vec[j][i] >> 3);
1158       }
1159    }
1160    ((Fx64 *)cc)[1] = hi;
1161 }
1162 
1163 
1164 static void
fxt1_quantize(GLuint * cc,const GLubyte * lines[],GLint comps)1165 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1166 {
1167    GLint trualpha;
1168    GLubyte reord[N_TEXELS][MAX_COMP];
1169 
1170    GLubyte input[N_TEXELS][MAX_COMP];
1171    GLint i, k, l;
1172 
1173    if (comps == 3) {
1174       /* make the whole block opaque */
1175       memset(input, -1, sizeof(input));
1176    }
1177 
1178    /* 8 texels each line */
1179    for (l = 0; l < 4; l++) {
1180       for (k = 0; k < 4; k++) {
1181          for (i = 0; i < comps; i++) {
1182             input[k + l * 4][i] = *lines[l]++;
1183          }
1184       }
1185       for (; k < 8; k++) {
1186          for (i = 0; i < comps; i++) {
1187             input[k + l * 4 + 12][i] = *lines[l]++;
1188          }
1189       }
1190    }
1191 
1192    /* block layout:
1193     * 00, 01, 02, 03, 08, 09, 0a, 0b
1194     * 10, 11, 12, 13, 18, 19, 1a, 1b
1195     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1196     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1197     */
1198 
1199    /* [dBorca]
1200     * stupidity flows forth from this
1201     */
1202    l = N_TEXELS;
1203    trualpha = 0;
1204    if (comps == 4) {
1205       /* skip all transparent black texels */
1206       l = 0;
1207       for (k = 0; k < N_TEXELS; k++) {
1208          /* test all components against 0 */
1209          if (!ISTBLACK(input[k])) {
1210             /* texel is not transparent black */
1211             COPY_4UBV(reord[l], input[k]);
1212             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1213                /* non-opaque texel */
1214                trualpha = !0;
1215             }
1216             l++;
1217          }
1218       }
1219    }
1220 
1221 #if 0
1222    if (trualpha) {
1223       fxt1_quantize_ALPHA0(cc, input, reord, l);
1224    } else if (l == 0) {
1225       cc[0] = cc[1] = cc[2] = -1;
1226       cc[3] = 0;
1227    } else if (l < N_TEXELS) {
1228       fxt1_quantize_HI(cc, input, reord, l);
1229    } else {
1230       fxt1_quantize_CHROMA(cc, input);
1231    }
1232    (void)fxt1_quantize_ALPHA1;
1233    (void)fxt1_quantize_MIXED1;
1234    (void)fxt1_quantize_MIXED0;
1235 #else
1236    if (trualpha) {
1237       fxt1_quantize_ALPHA1(cc, input);
1238    } else if (l == 0) {
1239       cc[0] = cc[1] = cc[2] = ~0u;
1240       cc[3] = 0;
1241    } else if (l < N_TEXELS) {
1242       fxt1_quantize_MIXED1(cc, input);
1243    } else {
1244       fxt1_quantize_MIXED0(cc, input);
1245    }
1246    (void)fxt1_quantize_ALPHA0;
1247    (void)fxt1_quantize_HI;
1248    (void)fxt1_quantize_CHROMA;
1249 #endif
1250 }
1251 
1252 
1253 
1254 /**
1255  * Upscale an image by replication, not (typical) stretching.
1256  * We use this when the image width or height is less than a
1257  * certain size (4, 8) and we need to upscale an image.
1258  */
1259 static void
upscale_teximage2d(GLsizei inWidth,GLsizei inHeight,GLsizei outWidth,GLsizei outHeight,GLint comps,const GLubyte * src,GLint srcRowStride,GLubyte * dest)1260 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1261                    GLsizei outWidth, GLsizei outHeight,
1262                    GLint comps, const GLubyte *src, GLint srcRowStride,
1263                    GLubyte *dest )
1264 {
1265    GLint i, j, k;
1266 
1267    assert(outWidth >= inWidth);
1268    assert(outHeight >= inHeight);
1269 #if 0
1270    assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1271    assert((outWidth & 3) == 0);
1272    assert((outHeight & 3) == 0);
1273 #endif
1274 
1275    for (i = 0; i < outHeight; i++) {
1276       const GLint ii = i % inHeight;
1277       for (j = 0; j < outWidth; j++) {
1278          const GLint jj = j % inWidth;
1279          for (k = 0; k < comps; k++) {
1280             dest[(i * outWidth + j) * comps + k]
1281                = src[ii * srcRowStride + jj * comps + k];
1282          }
1283       }
1284    }
1285 }
1286 
1287 
1288 static void
fxt1_encode(GLuint width,GLuint height,GLint comps,const void * source,GLint srcRowStride,void * dest,GLint destRowStride)1289 fxt1_encode (GLuint width, GLuint height, GLint comps,
1290              const void *source, GLint srcRowStride,
1291              void *dest, GLint destRowStride)
1292 {
1293    GLuint x, y;
1294    const GLubyte *data;
1295    GLuint *encoded = (GLuint *)dest;
1296    void *newSource = NULL;
1297 
1298    assert(comps == 3 || comps == 4);
1299 
1300    /* Replicate image if width is not M8 or height is not M4 */
1301    if ((width & 7) | (height & 3)) {
1302       GLint newWidth = (width + 7) & ~7;
1303       GLint newHeight = (height + 3) & ~3;
1304       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1305       if (!newSource) {
1306          GET_CURRENT_CONTEXT(ctx);
1307          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1308          goto cleanUp;
1309       }
1310       upscale_teximage2d(width, height, newWidth, newHeight,
1311                          comps, (const GLubyte *) source,
1312                          srcRowStride, (GLubyte *) newSource);
1313       source = newSource;
1314       width = newWidth;
1315       height = newHeight;
1316       srcRowStride = comps * newWidth;
1317    }
1318 
1319    data = (const GLubyte *) source;
1320    destRowStride = (destRowStride - width * 2) / 4;
1321    for (y = 0; y < height; y += 4) {
1322       GLuint offs = 0 + (y + 0) * srcRowStride;
1323       for (x = 0; x < width; x += 8) {
1324          const GLubyte *lines[4];
1325          lines[0] = &data[offs];
1326          lines[1] = lines[0] + srcRowStride;
1327          lines[2] = lines[1] + srcRowStride;
1328          lines[3] = lines[2] + srcRowStride;
1329          offs += 8 * comps;
1330          fxt1_quantize(encoded, lines, comps);
1331          /* 128 bits per 8x4 block */
1332          encoded += 4;
1333       }
1334       encoded += destRowStride;
1335    }
1336 
1337  cleanUp:
1338    free(newSource);
1339 }
1340 
1341 
1342 /***************************************************************************\
1343  * FXT1 decoder
1344  *
1345  * The decoder is based on GL_3DFX_texture_compression_FXT1
1346  * specification and serves as a concept for the encoder.
1347 \***************************************************************************/
1348 
1349 
1350 /* lookup table for scaling 5 bit colors up to 8 bits */
1351 static const GLubyte _rgb_scale_5[] = {
1352    0,   8,   16,  25,  33,  41,  49,  58,
1353    66,  74,  82,  90,  99,  107, 115, 123,
1354    132, 140, 148, 156, 165, 173, 181, 189,
1355    197, 206, 214, 222, 230, 239, 247, 255
1356 };
1357 
1358 /* lookup table for scaling 6 bit colors up to 8 bits */
1359 static const GLubyte _rgb_scale_6[] = {
1360    0,   4,   8,   12,  16,  20,  24,  28,
1361    32,  36,  40,  45,  49,  53,  57,  61,
1362    65,  69,  73,  77,  81,  85,  89,  93,
1363    97,  101, 105, 109, 113, 117, 121, 125,
1364    130, 134, 138, 142, 146, 150, 154, 158,
1365    162, 166, 170, 174, 178, 182, 186, 190,
1366    194, 198, 202, 206, 210, 215, 219, 223,
1367    227, 231, 235, 239, 243, 247, 251, 255
1368 };
1369 
1370 
1371 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1372 #define UP5(c) _rgb_scale_5[(c) & 31]
1373 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1374 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1375 
1376 
1377 static void
fxt1_decode_1HI(const GLubyte * code,GLint t,GLubyte * rgba)1378 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1379 {
1380    const GLuint *cc;
1381 
1382    t *= 3;
1383    cc = (const GLuint *)(code + t / 8);
1384    t = (cc[0] >> (t & 7)) & 7;
1385 
1386    if (t == 7) {
1387       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1388    } else {
1389       GLubyte r, g, b;
1390       cc = (const GLuint *)(code + 12);
1391       if (t == 0) {
1392          b = UP5(CC_SEL(cc, 0));
1393          g = UP5(CC_SEL(cc, 5));
1394          r = UP5(CC_SEL(cc, 10));
1395       } else if (t == 6) {
1396          b = UP5(CC_SEL(cc, 15));
1397          g = UP5(CC_SEL(cc, 20));
1398          r = UP5(CC_SEL(cc, 25));
1399       } else {
1400          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1401          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1402          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1403       }
1404       rgba[RCOMP] = r;
1405       rgba[GCOMP] = g;
1406       rgba[BCOMP] = b;
1407       rgba[ACOMP] = 255;
1408    }
1409 }
1410 
1411 
1412 static void
fxt1_decode_1CHROMA(const GLubyte * code,GLint t,GLubyte * rgba)1413 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1414 {
1415    const GLuint *cc;
1416    GLuint kk;
1417 
1418    cc = (const GLuint *)code;
1419    if (t & 16) {
1420       cc++;
1421       t &= 15;
1422    }
1423    t = (cc[0] >> (t * 2)) & 3;
1424 
1425    t *= 15;
1426    cc = (const GLuint *)(code + 8 + t / 8);
1427    kk = cc[0] >> (t & 7);
1428    rgba[BCOMP] = UP5(kk);
1429    rgba[GCOMP] = UP5(kk >> 5);
1430    rgba[RCOMP] = UP5(kk >> 10);
1431    rgba[ACOMP] = 255;
1432 }
1433 
1434 
1435 static void
fxt1_decode_1MIXED(const GLubyte * code,GLint t,GLubyte * rgba)1436 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1437 {
1438    const GLuint *cc;
1439    GLuint col[2][3];
1440    GLint glsb, selb;
1441 
1442    cc = (const GLuint *)code;
1443    if (t & 16) {
1444       t &= 15;
1445       t = (cc[1] >> (t * 2)) & 3;
1446       /* col 2 */
1447       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1448       col[0][GCOMP] = CC_SEL(cc, 99);
1449       col[0][RCOMP] = CC_SEL(cc, 104);
1450       /* col 3 */
1451       col[1][BCOMP] = CC_SEL(cc, 109);
1452       col[1][GCOMP] = CC_SEL(cc, 114);
1453       col[1][RCOMP] = CC_SEL(cc, 119);
1454       glsb = CC_SEL(cc, 126);
1455       selb = CC_SEL(cc, 33);
1456    } else {
1457       t = (cc[0] >> (t * 2)) & 3;
1458       /* col 0 */
1459       col[0][BCOMP] = CC_SEL(cc, 64);
1460       col[0][GCOMP] = CC_SEL(cc, 69);
1461       col[0][RCOMP] = CC_SEL(cc, 74);
1462       /* col 1 */
1463       col[1][BCOMP] = CC_SEL(cc, 79);
1464       col[1][GCOMP] = CC_SEL(cc, 84);
1465       col[1][RCOMP] = CC_SEL(cc, 89);
1466       glsb = CC_SEL(cc, 125);
1467       selb = CC_SEL(cc, 1);
1468    }
1469 
1470    if (CC_SEL(cc, 124) & 1) {
1471       /* alpha[0] == 1 */
1472 
1473       if (t == 3) {
1474          /* zero */
1475          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1476       } else {
1477          GLubyte r, g, b;
1478          if (t == 0) {
1479             b = UP5(col[0][BCOMP]);
1480             g = UP5(col[0][GCOMP]);
1481             r = UP5(col[0][RCOMP]);
1482          } else if (t == 2) {
1483             b = UP5(col[1][BCOMP]);
1484             g = UP6(col[1][GCOMP], glsb);
1485             r = UP5(col[1][RCOMP]);
1486          } else {
1487             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1488             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1489             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1490          }
1491          rgba[RCOMP] = r;
1492          rgba[GCOMP] = g;
1493          rgba[BCOMP] = b;
1494          rgba[ACOMP] = 255;
1495       }
1496    } else {
1497       /* alpha[0] == 0 */
1498       GLubyte r, g, b;
1499       if (t == 0) {
1500          b = UP5(col[0][BCOMP]);
1501          g = UP6(col[0][GCOMP], glsb ^ selb);
1502          r = UP5(col[0][RCOMP]);
1503       } else if (t == 3) {
1504          b = UP5(col[1][BCOMP]);
1505          g = UP6(col[1][GCOMP], glsb);
1506          r = UP5(col[1][RCOMP]);
1507       } else {
1508          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1509          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1510                         UP6(col[1][GCOMP], glsb));
1511          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1512       }
1513       rgba[RCOMP] = r;
1514       rgba[GCOMP] = g;
1515       rgba[BCOMP] = b;
1516       rgba[ACOMP] = 255;
1517    }
1518 }
1519 
1520 
1521 static void
fxt1_decode_1ALPHA(const GLubyte * code,GLint t,GLubyte * rgba)1522 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1523 {
1524    const GLuint *cc;
1525    GLubyte r, g, b, a;
1526 
1527    cc = (const GLuint *)code;
1528    if (CC_SEL(cc, 124) & 1) {
1529       /* lerp == 1 */
1530       GLuint col0[4];
1531 
1532       if (t & 16) {
1533          t &= 15;
1534          t = (cc[1] >> (t * 2)) & 3;
1535          /* col 2 */
1536          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1537          col0[GCOMP] = CC_SEL(cc, 99);
1538          col0[RCOMP] = CC_SEL(cc, 104);
1539          col0[ACOMP] = CC_SEL(cc, 119);
1540       } else {
1541          t = (cc[0] >> (t * 2)) & 3;
1542          /* col 0 */
1543          col0[BCOMP] = CC_SEL(cc, 64);
1544          col0[GCOMP] = CC_SEL(cc, 69);
1545          col0[RCOMP] = CC_SEL(cc, 74);
1546          col0[ACOMP] = CC_SEL(cc, 109);
1547       }
1548 
1549       if (t == 0) {
1550          b = UP5(col0[BCOMP]);
1551          g = UP5(col0[GCOMP]);
1552          r = UP5(col0[RCOMP]);
1553          a = UP5(col0[ACOMP]);
1554       } else if (t == 3) {
1555          b = UP5(CC_SEL(cc, 79));
1556          g = UP5(CC_SEL(cc, 84));
1557          r = UP5(CC_SEL(cc, 89));
1558          a = UP5(CC_SEL(cc, 114));
1559       } else {
1560          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1561          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1562          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1563          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1564       }
1565    } else {
1566       /* lerp == 0 */
1567 
1568       if (t & 16) {
1569          cc++;
1570          t &= 15;
1571       }
1572       t = (cc[0] >> (t * 2)) & 3;
1573 
1574       if (t == 3) {
1575          /* zero */
1576          r = g = b = a = 0;
1577       } else {
1578          GLuint kk;
1579          cc = (const GLuint *)code;
1580          a = UP5(cc[3] >> (t * 5 + 13));
1581          t *= 15;
1582          cc = (const GLuint *)(code + 8 + t / 8);
1583          kk = cc[0] >> (t & 7);
1584          b = UP5(kk);
1585          g = UP5(kk >> 5);
1586          r = UP5(kk >> 10);
1587       }
1588    }
1589    rgba[RCOMP] = r;
1590    rgba[GCOMP] = g;
1591    rgba[BCOMP] = b;
1592    rgba[ACOMP] = a;
1593 }
1594 
1595 
1596 static void
fxt1_decode_1(const void * texture,GLint stride,GLint i,GLint j,GLubyte * rgba)1597 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1598                GLint i, GLint j, GLubyte *rgba)
1599 {
1600    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1601       fxt1_decode_1HI,     /* cc-high   = "00?" */
1602       fxt1_decode_1HI,     /* cc-high   = "00?" */
1603       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1604       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1605       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1606       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1607       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1608       fxt1_decode_1MIXED   /* mixed     = "1??" */
1609    };
1610 
1611    const GLubyte *code = (const GLubyte *)texture +
1612                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1613    GLint mode = CC_SEL(code, 125);
1614    GLint t = i & 7;
1615 
1616    if (t & 4) {
1617       t += 12;
1618    }
1619    t += (j & 3) * 4;
1620 
1621    decode_1[mode](code, t, rgba);
1622 }
1623 
1624 
1625 
1626 
1627 static void
fetch_rgb_fxt1(const GLubyte * map,GLint rowStride,GLint i,GLint j,GLfloat * texel)1628 fetch_rgb_fxt1(const GLubyte *map,
1629                GLint rowStride, GLint i, GLint j, GLfloat *texel)
1630 {
1631    GLubyte rgba[4];
1632    fxt1_decode_1(map, rowStride, i, j, rgba);
1633    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1634    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1635    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1636    texel[ACOMP] = 1.0F;
1637 }
1638 
1639 
1640 static void
fetch_rgba_fxt1(const GLubyte * map,GLint rowStride,GLint i,GLint j,GLfloat * texel)1641 fetch_rgba_fxt1(const GLubyte *map,
1642                 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1643 {
1644    GLubyte rgba[4];
1645    fxt1_decode_1(map, rowStride, i, j, rgba);
1646    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1647    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1648    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1649    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1650 }
1651 
1652 
1653 compressed_fetch_func
_mesa_get_fxt_fetch_func(mesa_format format)1654 _mesa_get_fxt_fetch_func(mesa_format format)
1655 {
1656    switch (format) {
1657    case MESA_FORMAT_RGB_FXT1:
1658       return fetch_rgb_fxt1;
1659    case MESA_FORMAT_RGBA_FXT1:
1660       return fetch_rgba_fxt1;
1661    default:
1662       return NULL;
1663    }
1664 }
1665