1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 
26 /**
27  * \file texcompress_fxt1.c
28  * GL_3DFX_texture_compression_FXT1 support.
29  */
30 
31 
32 #include "errors.h"
33 #include "glheader.h"
34 
35 #include "image.h"
36 #include "macros.h"
37 #include "mipmap.h"
38 #include "texcompress.h"
39 #include "texcompress_fxt1.h"
40 #include "texstore.h"
41 #include "mtypes.h"
42 
43 
44 static void
45 fxt1_encode (GLuint width, GLuint height, GLint comps,
46              const void *source, GLint srcRowStride,
47              void *dest, GLint destRowStride);
48 
49 static void
50 fxt1_decode_1 (const void *texture, GLint stride,
51                GLint i, GLint j, GLubyte *rgba);
52 
53 
54 /**
55  * Store user's image in rgb_fxt1 format.
56  */
57 GLboolean
_mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)58 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
59 {
60    const GLubyte *pixels;
61    GLint srcRowStride;
62    GLubyte *dst;
63    const GLubyte *tempImage = NULL;
64 
65    assert(dstFormat == MESA_FORMAT_RGB_FXT1);
66 
67    if (srcFormat != GL_RGB ||
68        srcType != GL_UNSIGNED_BYTE ||
69        ctx->_ImageTransferState ||
70        ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
71        srcPacking->SwapBytes) {
72       /* convert image to RGB/GLubyte */
73       GLubyte *tempImageSlices[1];
74       int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
75       tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
76       if (!tempImage)
77          return GL_FALSE; /* out of memory */
78       tempImageSlices[0] = (GLubyte *) tempImage;
79       _mesa_texstore(ctx, dims,
80                      baseInternalFormat,
81                      MESA_FORMAT_RGB_UNORM8,
82                      rgbRowStride, tempImageSlices,
83                      srcWidth, srcHeight, srcDepth,
84                      srcFormat, srcType, srcAddr,
85                      srcPacking);
86       pixels = tempImage;
87       srcRowStride = 3 * srcWidth;
88       srcFormat = GL_RGB;
89    }
90    else {
91       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
92                                      srcFormat, srcType, 0, 0);
93 
94       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
95                                             srcType) / sizeof(GLubyte);
96    }
97 
98    dst = dstSlices[0];
99 
100    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
101                dst, dstRowStride);
102 
103    free((void*) tempImage);
104 
105    return GL_TRUE;
106 }
107 
108 
109 /**
110  * Store user's image in rgba_fxt1 format.
111  */
112 GLboolean
_mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)113 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
114 {
115    const GLubyte *pixels;
116    GLint srcRowStride;
117    GLubyte *dst;
118    const GLubyte *tempImage = NULL;
119 
120    assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
121 
122    if (srcFormat != GL_RGBA ||
123        srcType != GL_UNSIGNED_BYTE ||
124        ctx->_ImageTransferState ||
125        srcPacking->SwapBytes) {
126       /* convert image to RGBA/GLubyte */
127       GLubyte *tempImageSlices[1];
128       int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
129       tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
130       if (!tempImage)
131          return GL_FALSE; /* out of memory */
132       tempImageSlices[0] = (GLubyte *) tempImage;
133       _mesa_texstore(ctx, dims,
134                      baseInternalFormat,
135 #if UTIL_ARCH_LITTLE_ENDIAN
136                      MESA_FORMAT_R8G8B8A8_UNORM,
137 #else
138                      MESA_FORMAT_A8B8G8R8_UNORM,
139 #endif
140                      rgbaRowStride, tempImageSlices,
141                      srcWidth, srcHeight, srcDepth,
142                      srcFormat, srcType, srcAddr,
143                      srcPacking);
144       pixels = tempImage;
145       srcRowStride = 4 * srcWidth;
146       srcFormat = GL_RGBA;
147    }
148    else {
149       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
150                                      srcFormat, srcType, 0, 0);
151 
152       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
153                                             srcType) / sizeof(GLubyte);
154    }
155 
156    dst = dstSlices[0];
157 
158    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
159                dst, dstRowStride);
160 
161    free((void*) tempImage);
162 
163    return GL_TRUE;
164 }
165 
166 
167 /***************************************************************************\
168  * FXT1 encoder
169  *
170  * The encoder was built by reversing the decoder,
171  * and is vaguely based on Texus2 by 3dfx. Note that this code
172  * is merely a proof of concept, since it is highly UNoptimized;
173  * moreover, it is sub-optimal due to initial conditions passed
174  * to Lloyd's algorithm (the interpolation modes are even worse).
175 \***************************************************************************/
176 
177 
178 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
179 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
180 #define N_TEXELS 32 /* number of texels in a block (always 32) */
181 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
182 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
183 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
184 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
185 static const GLuint zero = 0;
186 #define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
187 
188 /*
189  * Define a 64-bit unsigned integer type and macros
190  */
191 #if 1
192 
193 #define FX64_NATIVE 1
194 
195 typedef uint64_t Fx64;
196 
197 #define FX64_MOV32(a, b) a = b
198 #define FX64_OR32(a, b)  a |= b
199 #define FX64_SHL(a, c)   a <<= c
200 
201 #else
202 
203 #define FX64_NATIVE 0
204 
205 typedef struct {
206    GLuint lo, hi;
207 } Fx64;
208 
209 #define FX64_MOV32(a, b) a.lo = b
210 #define FX64_OR32(a, b)  a.lo |= b
211 
212 #define FX64_SHL(a, c)                                 \
213    do {                                                \
214        if ((c) >= 32) {                                \
215           a.hi = a.lo << ((c) - 32);                   \
216           a.lo = 0;                                    \
217        } else {                                        \
218           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
219           a.lo <<= (c);                                \
220        }                                               \
221    } while (0)
222 
223 #endif
224 
225 
226 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
227 #define SAFECDOT 1 /* for paranoids */
228 
229 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
230    do {                                  \
231       /* compute interpolation vector */ \
232       GLfloat d2 = 0.0F;                 \
233       GLfloat rd2;                       \
234                                          \
235       for (i = 0; i < NC; i++) {         \
236          IV[i] = (V1[i] - V0[i]) * F(i); \
237          d2 += IV[i] * IV[i];            \
238       }                                  \
239       rd2 = (GLfloat)NV / d2;            \
240       B = 0;                             \
241       for (i = 0; i < NC; i++) {         \
242          IV[i] *= F(i);                  \
243          B -= IV[i] * V0[i];             \
244          IV[i] *= rd2;                   \
245       }                                  \
246       B = B * rd2 + 0.5f;                \
247    } while (0)
248 
249 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
250    do {                                  \
251       GLfloat dot = 0.0F;                \
252       for (i = 0; i < NC; i++) {         \
253          dot += V[i] * IV[i];            \
254       }                                  \
255       TEXEL = (GLint)(dot + B);          \
256       if (SAFECDOT) {                    \
257          if (TEXEL < 0) {                \
258             TEXEL = 0;                   \
259          } else if (TEXEL > NV) {        \
260             TEXEL = NV;                  \
261          }                               \
262       }                                  \
263    } while (0)
264 
265 
266 static GLint
fxt1_bestcol(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[MAX_COMP],GLint nc)267 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
268               GLubyte input[MAX_COMP], GLint nc)
269 {
270    GLint i, j, best = -1;
271    GLfloat err = 1e9; /* big enough */
272 
273    for (j = 0; j < nv; j++) {
274       GLfloat e = 0.0F;
275       for (i = 0; i < nc; i++) {
276          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
277       }
278       if (e < err) {
279          err = e;
280          best = j;
281       }
282    }
283 
284    return best;
285 }
286 
287 
288 static GLint
fxt1_worst(GLfloat vec[MAX_COMP],GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)289 fxt1_worst (GLfloat vec[MAX_COMP],
290             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
291 {
292    GLint i, k, worst = -1;
293    GLfloat err = -1.0F; /* small enough */
294 
295    for (k = 0; k < n; k++) {
296       GLfloat e = 0.0F;
297       for (i = 0; i < nc; i++) {
298          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
299       }
300       if (e > err) {
301          err = e;
302          worst = k;
303       }
304    }
305 
306    return worst;
307 }
308 
309 
310 static GLint
fxt1_variance(GLdouble variance[MAX_COMP],GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)311 fxt1_variance (GLdouble variance[MAX_COMP],
312                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
313 {
314    GLint i, k, best = 0;
315    GLint sx, sx2;
316    GLdouble var, maxvar = -1; /* small enough */
317    GLdouble teenth = 1.0 / n;
318 
319    for (i = 0; i < nc; i++) {
320       sx = sx2 = 0;
321       for (k = 0; k < n; k++) {
322          GLint t = input[k][i];
323          sx += t;
324          sx2 += t * t;
325       }
326       var = sx2 * teenth - sx * sx * teenth * teenth;
327       if (maxvar < var) {
328          maxvar = var;
329          best = i;
330       }
331       if (variance) {
332          variance[i] = var;
333       }
334    }
335 
336    return best;
337 }
338 
339 
340 static GLint
fxt1_choose(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)341 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
342              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
343 {
344 #if 0
345    /* Choose colors from a grid.
346     */
347    GLint i, j;
348 
349    for (j = 0; j < nv; j++) {
350       GLint m = j * (n - 1) / (nv - 1);
351       for (i = 0; i < nc; i++) {
352          vec[j][i] = input[m][i];
353       }
354    }
355 #else
356    /* Our solution here is to find the darkest and brightest colors in
357     * the 8x4 tile and use those as the two representative colors.
358     * There are probably better algorithms to use (histogram-based).
359     */
360    GLint i, j, k;
361    GLint minSum = 2000; /* big enough */
362    GLint maxSum = -1; /* small enough */
363    GLint minCol = 0; /* phoudoin: silent compiler! */
364    GLint maxCol = 0; /* phoudoin: silent compiler! */
365 
366    struct {
367       GLint flag;
368       GLint key;
369       GLint freq;
370       GLint idx;
371    } hist[N_TEXELS];
372    GLint lenh = 0;
373 
374    memset(hist, 0, sizeof(hist));
375 
376    for (k = 0; k < n; k++) {
377       GLint l;
378       GLint key = 0;
379       GLint sum = 0;
380       for (i = 0; i < nc; i++) {
381          key <<= 8;
382          key |= input[k][i];
383          sum += input[k][i];
384       }
385       for (l = 0; l < n; l++) {
386          if (!hist[l].flag) {
387             /* alloc new slot */
388             hist[l].flag = !0;
389             hist[l].key = key;
390             hist[l].freq = 1;
391             hist[l].idx = k;
392             lenh = l + 1;
393             break;
394          } else if (hist[l].key == key) {
395             hist[l].freq++;
396             break;
397          }
398       }
399       if (minSum > sum) {
400          minSum = sum;
401          minCol = k;
402       }
403       if (maxSum < sum) {
404          maxSum = sum;
405          maxCol = k;
406       }
407    }
408 
409    if (lenh <= nv) {
410       for (j = 0; j < lenh; j++) {
411          for (i = 0; i < nc; i++) {
412             vec[j][i] = (GLfloat)input[hist[j].idx][i];
413          }
414       }
415       for (; j < nv; j++) {
416          for (i = 0; i < nc; i++) {
417             vec[j][i] = vec[0][i];
418          }
419       }
420       return 0;
421    }
422 
423    for (j = 0; j < nv; j++) {
424       for (i = 0; i < nc; i++) {
425          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
426       }
427    }
428 #endif
429 
430    return !0;
431 }
432 
433 
434 static GLint
fxt1_lloyd(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)435 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
436             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
437 {
438    /* Use the generalized lloyd's algorithm for VQ:
439     *     find 4 color vectors.
440     *
441     *     for each sample color
442     *         sort to nearest vector.
443     *
444     *     replace each vector with the centroid of its matching colors.
445     *
446     *     repeat until RMS doesn't improve.
447     *
448     *     if a color vector has no samples, or becomes the same as another
449     *     vector, replace it with the color which is farthest from a sample.
450     *
451     * vec[][MAX_COMP]           initial vectors and resulting colors
452     * nv                        number of resulting colors required
453     * input[N_TEXELS][MAX_COMP] input texels
454     * nc                        number of components in input / vec
455     * n                         number of input samples
456     */
457 
458    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
459    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
460    GLfloat error, lasterror = 1e9;
461 
462    GLint i, j, k, rep;
463 
464    /* the quantizer */
465    for (rep = 0; rep < LL_N_REP; rep++) {
466       /* reset sums & counters */
467       for (j = 0; j < nv; j++) {
468          for (i = 0; i < nc; i++) {
469             sum[j][i] = 0;
470          }
471          cnt[j] = 0;
472       }
473       error = 0;
474 
475       /* scan whole block */
476       for (k = 0; k < n; k++) {
477 #if 1
478          GLint best = -1;
479          GLfloat err = 1e9; /* big enough */
480          /* determine best vector */
481          for (j = 0; j < nv; j++) {
482             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
483                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
484                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
485             if (nc == 4) {
486                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
487             }
488             if (e < err) {
489                err = e;
490                best = j;
491             }
492          }
493 #else
494          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
495 #endif
496          assert(best >= 0);
497          /* add in closest color */
498          for (i = 0; i < nc; i++) {
499             sum[best][i] += input[k][i];
500          }
501          /* mark this vector as used */
502          cnt[best]++;
503          /* accumulate error */
504          error += err;
505       }
506 
507       /* check RMS */
508       if ((error < LL_RMS_E) ||
509           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
510          return !0; /* good match */
511       }
512       lasterror = error;
513 
514       /* move each vector to the barycenter of its closest colors */
515       for (j = 0; j < nv; j++) {
516          if (cnt[j]) {
517             GLfloat div = 1.0F / cnt[j];
518             for (i = 0; i < nc; i++) {
519                vec[j][i] = div * sum[j][i];
520             }
521          } else {
522             /* this vec has no samples or is identical with a previous vec */
523             GLint worst = fxt1_worst(vec[j], input, nc, n);
524             for (i = 0; i < nc; i++) {
525                vec[j][i] = input[worst][i];
526             }
527          }
528       }
529    }
530 
531    return 0; /* could not converge fast enough */
532 }
533 
534 
535 static void
fxt1_quantize_CHROMA(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])536 fxt1_quantize_CHROMA (GLuint *cc,
537                       GLubyte input[N_TEXELS][MAX_COMP])
538 {
539    const GLint n_vect = 4; /* 4 base vectors to find */
540    const GLint n_comp = 3; /* 3 components: R, G, B */
541    GLfloat vec[MAX_VECT][MAX_COMP];
542    GLint i, j, k;
543    Fx64 hi; /* high quadword */
544    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
545 
546    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
547       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
548    }
549 
550    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
551    for (j = n_vect - 1; j >= 0; j--) {
552       for (i = 0; i < n_comp; i++) {
553          /* add in colors */
554          FX64_SHL(hi, 5);
555          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
556       }
557    }
558    ((Fx64 *)cc)[1] = hi;
559 
560    lohi = lolo = 0;
561    /* right microtile */
562    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
563       lohi <<= 2;
564       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
565    }
566    /* left microtile */
567    for (; k >= 0; k--) {
568       lolo <<= 2;
569       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
570    }
571    cc[1] = lohi;
572    cc[0] = lolo;
573 }
574 
575 
576 static void
fxt1_quantize_ALPHA0(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP],GLubyte reord[N_TEXELS][MAX_COMP],GLint n)577 fxt1_quantize_ALPHA0 (GLuint *cc,
578                       GLubyte input[N_TEXELS][MAX_COMP],
579                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
580 {
581    const GLint n_vect = 3; /* 3 base vectors to find */
582    const GLint n_comp = 4; /* 4 components: R, G, B, A */
583    GLfloat vec[MAX_VECT][MAX_COMP];
584    GLint i, j, k;
585    Fx64 hi; /* high quadword */
586    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
587 
588    /* the last vector indicates zero */
589    for (i = 0; i < n_comp; i++) {
590       vec[n_vect][i] = 0;
591    }
592 
593    /* the first n texels in reord are guaranteed to be non-zero */
594    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
595       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
596    }
597 
598    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
599    for (j = n_vect - 1; j >= 0; j--) {
600       /* add in alphas */
601       FX64_SHL(hi, 5);
602       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
603    }
604    for (j = n_vect - 1; j >= 0; j--) {
605       for (i = 0; i < n_comp - 1; i++) {
606          /* add in colors */
607          FX64_SHL(hi, 5);
608          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
609       }
610    }
611    ((Fx64 *)cc)[1] = hi;
612 
613    lohi = lolo = 0;
614    /* right microtile */
615    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
616       lohi <<= 2;
617       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
618    }
619    /* left microtile */
620    for (; k >= 0; k--) {
621       lolo <<= 2;
622       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
623    }
624    cc[1] = lohi;
625    cc[0] = lolo;
626 }
627 
628 
629 static void
fxt1_quantize_ALPHA1(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])630 fxt1_quantize_ALPHA1 (GLuint *cc,
631                       GLubyte input[N_TEXELS][MAX_COMP])
632 {
633    const GLint n_vect = 3; /* highest vector number in each microtile */
634    const GLint n_comp = 4; /* 4 components: R, G, B, A */
635    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
636    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
637    GLint i, j, k;
638    Fx64 hi; /* high quadword */
639    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
640 
641    GLint minSum;
642    GLint maxSum;
643    GLint minColL = 0, maxColL = 0;
644    GLint minColR = 0, maxColR = 0;
645    GLint sumL = 0, sumR = 0;
646    GLint nn_comp;
647    /* Our solution here is to find the darkest and brightest colors in
648     * the 4x4 tile and use those as the two representative colors.
649     * There are probably better algorithms to use (histogram-based).
650     */
651    nn_comp = n_comp;
652    while ((minColL == maxColL) && nn_comp) {
653        minSum = 2000; /* big enough */
654        maxSum = -1; /* small enough */
655        for (k = 0; k < N_TEXELS / 2; k++) {
656            GLint sum = 0;
657            for (i = 0; i < nn_comp; i++) {
658                sum += input[k][i];
659            }
660            if (minSum > sum) {
661                minSum = sum;
662                minColL = k;
663            }
664            if (maxSum < sum) {
665                maxSum = sum;
666                maxColL = k;
667            }
668            sumL += sum;
669        }
670 
671        nn_comp--;
672    }
673 
674    nn_comp = n_comp;
675    while ((minColR == maxColR) && nn_comp) {
676        minSum = 2000; /* big enough */
677        maxSum = -1; /* small enough */
678        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
679            GLint sum = 0;
680            for (i = 0; i < nn_comp; i++) {
681                sum += input[k][i];
682            }
683            if (minSum > sum) {
684                minSum = sum;
685                minColR = k;
686            }
687            if (maxSum < sum) {
688                maxSum = sum;
689                maxColR = k;
690            }
691            sumR += sum;
692        }
693 
694        nn_comp--;
695    }
696 
697    /* choose the common vector (yuck!) */
698    {
699       GLint j1, j2;
700       GLint v1 = 0, v2 = 0;
701       GLfloat err = 1e9; /* big enough */
702       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
703       for (i = 0; i < n_comp; i++) {
704          tv[0][i] = input[minColL][i];
705          tv[1][i] = input[maxColL][i];
706          tv[2][i] = input[minColR][i];
707          tv[3][i] = input[maxColR][i];
708       }
709       for (j1 = 0; j1 < 2; j1++) {
710          for (j2 = 2; j2 < 4; j2++) {
711             GLfloat e = 0.0F;
712             for (i = 0; i < n_comp; i++) {
713                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
714             }
715             if (e < err) {
716                err = e;
717                v1 = j1;
718                v2 = j2;
719             }
720          }
721       }
722       for (i = 0; i < n_comp; i++) {
723          vec[0][i] = tv[1 - v1][i];
724          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
725          vec[2][i] = tv[5 - v2][i];
726       }
727    }
728 
729    /* left microtile */
730    cc[0] = 0;
731    if (minColL != maxColL) {
732       /* compute interpolation vector */
733       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
734 
735       /* add in texels */
736       lolo = 0;
737       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
738          GLint texel;
739          /* interpolate color */
740          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
741          /* add in texel */
742          lolo <<= 2;
743          lolo |= texel;
744       }
745 
746       cc[0] = lolo;
747    }
748 
749    /* right microtile */
750    cc[1] = 0;
751    if (minColR != maxColR) {
752       /* compute interpolation vector */
753       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
754 
755       /* add in texels */
756       lohi = 0;
757       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
758          GLint texel;
759          /* interpolate color */
760          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
761          /* add in texel */
762          lohi <<= 2;
763          lohi |= texel;
764       }
765 
766       cc[1] = lohi;
767    }
768 
769    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
770    for (j = n_vect - 1; j >= 0; j--) {
771       /* add in alphas */
772       FX64_SHL(hi, 5);
773       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
774    }
775    for (j = n_vect - 1; j >= 0; j--) {
776       for (i = 0; i < n_comp - 1; i++) {
777          /* add in colors */
778          FX64_SHL(hi, 5);
779          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
780       }
781    }
782    ((Fx64 *)cc)[1] = hi;
783 }
784 
785 
786 static void
fxt1_quantize_HI(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP],GLubyte reord[N_TEXELS][MAX_COMP],GLint n)787 fxt1_quantize_HI (GLuint *cc,
788                   GLubyte input[N_TEXELS][MAX_COMP],
789                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
790 {
791    const GLint n_vect = 6; /* highest vector number */
792    const GLint n_comp = 3; /* 3 components: R, G, B */
793    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
794    GLfloat iv[MAX_COMP];   /* interpolation vector */
795    GLint i, k;
796    GLuint hihi; /* high quadword: hi dword */
797 
798    GLint minSum = 2000; /* big enough */
799    GLint maxSum = -1; /* small enough */
800    GLint minCol = 0; /* phoudoin: silent compiler! */
801    GLint maxCol = 0; /* phoudoin: silent compiler! */
802 
803    /* Our solution here is to find the darkest and brightest colors in
804     * the 8x4 tile and use those as the two representative colors.
805     * There are probably better algorithms to use (histogram-based).
806     */
807    for (k = 0; k < n; k++) {
808       GLint sum = 0;
809       for (i = 0; i < n_comp; i++) {
810          sum += reord[k][i];
811       }
812       if (minSum > sum) {
813          minSum = sum;
814          minCol = k;
815       }
816       if (maxSum < sum) {
817          maxSum = sum;
818          maxCol = k;
819       }
820    }
821 
822    hihi = 0; /* cc-hi = "00" */
823    for (i = 0; i < n_comp; i++) {
824       /* add in colors */
825       hihi <<= 5;
826       hihi |= reord[maxCol][i] >> 3;
827    }
828    for (i = 0; i < n_comp; i++) {
829       /* add in colors */
830       hihi <<= 5;
831       hihi |= reord[minCol][i] >> 3;
832    }
833    cc[3] = hihi;
834    cc[0] = cc[1] = cc[2] = 0;
835 
836    /* compute interpolation vector */
837    if (minCol != maxCol) {
838       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
839    }
840 
841    /* add in texels */
842    for (k = N_TEXELS - 1; k >= 0; k--) {
843       GLint t = k * 3;
844       GLuint *kk = (GLuint *)((char *)cc + t / 8);
845       GLint texel = n_vect + 1; /* transparent black */
846 
847       if (!ISTBLACK(input[k])) {
848          if (minCol != maxCol) {
849             /* interpolate color */
850             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
851             /* add in texel */
852             kk[0] |= texel << (t & 7);
853          }
854       } else {
855          /* add in texel */
856          kk[0] |= texel << (t & 7);
857       }
858    }
859 }
860 
861 
862 static void
fxt1_quantize_MIXED1(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])863 fxt1_quantize_MIXED1 (GLuint *cc,
864                       GLubyte input[N_TEXELS][MAX_COMP])
865 {
866    const GLint n_vect = 2; /* highest vector number in each microtile */
867    const GLint n_comp = 3; /* 3 components: R, G, B */
868    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
869    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
870    GLint i, j, k;
871    Fx64 hi; /* high quadword */
872    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
873 
874    GLint minSum;
875    GLint maxSum;
876    GLint minColL = 0, maxColL = -1;
877    GLint minColR = 0, maxColR = -1;
878 
879    /* Our solution here is to find the darkest and brightest colors in
880     * the 4x4 tile and use those as the two representative colors.
881     * There are probably better algorithms to use (histogram-based).
882     */
883    minSum = 2000; /* big enough */
884    maxSum = -1; /* small enough */
885    for (k = 0; k < N_TEXELS / 2; k++) {
886       if (!ISTBLACK(input[k])) {
887          GLint sum = 0;
888          for (i = 0; i < n_comp; i++) {
889             sum += input[k][i];
890          }
891          if (minSum > sum) {
892             minSum = sum;
893             minColL = k;
894          }
895          if (maxSum < sum) {
896             maxSum = sum;
897             maxColL = k;
898          }
899       }
900    }
901    minSum = 2000; /* big enough */
902    maxSum = -1; /* small enough */
903    for (; k < N_TEXELS; k++) {
904       if (!ISTBLACK(input[k])) {
905          GLint sum = 0;
906          for (i = 0; i < n_comp; i++) {
907             sum += input[k][i];
908          }
909          if (minSum > sum) {
910             minSum = sum;
911             minColR = k;
912          }
913          if (maxSum < sum) {
914             maxSum = sum;
915             maxColR = k;
916          }
917       }
918    }
919 
920    /* left microtile */
921    if (maxColL == -1) {
922       /* all transparent black */
923       cc[0] = ~0u;
924       for (i = 0; i < n_comp; i++) {
925          vec[0][i] = 0;
926          vec[1][i] = 0;
927       }
928    } else {
929       cc[0] = 0;
930       for (i = 0; i < n_comp; i++) {
931          vec[0][i] = input[minColL][i];
932          vec[1][i] = input[maxColL][i];
933       }
934       if (minColL != maxColL) {
935          /* compute interpolation vector */
936          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
937 
938          /* add in texels */
939          lolo = 0;
940          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
941             GLint texel = n_vect + 1; /* transparent black */
942             if (!ISTBLACK(input[k])) {
943                /* interpolate color */
944                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
945             }
946             /* add in texel */
947             lolo <<= 2;
948             lolo |= texel;
949          }
950          cc[0] = lolo;
951       }
952    }
953 
954    /* right microtile */
955    if (maxColR == -1) {
956       /* all transparent black */
957       cc[1] = ~0u;
958       for (i = 0; i < n_comp; i++) {
959          vec[2][i] = 0;
960          vec[3][i] = 0;
961       }
962    } else {
963       cc[1] = 0;
964       for (i = 0; i < n_comp; i++) {
965          vec[2][i] = input[minColR][i];
966          vec[3][i] = input[maxColR][i];
967       }
968       if (minColR != maxColR) {
969          /* compute interpolation vector */
970          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
971 
972          /* add in texels */
973          lohi = 0;
974          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
975             GLint texel = n_vect + 1; /* transparent black */
976             if (!ISTBLACK(input[k])) {
977                /* interpolate color */
978                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
979             }
980             /* add in texel */
981             lohi <<= 2;
982             lohi |= texel;
983          }
984          cc[1] = lohi;
985       }
986    }
987 
988    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
989    for (j = 2 * 2 - 1; j >= 0; j--) {
990       for (i = 0; i < n_comp; i++) {
991          /* add in colors */
992          FX64_SHL(hi, 5);
993          FX64_OR32(hi, vec[j][i] >> 3);
994       }
995    }
996    ((Fx64 *)cc)[1] = hi;
997 }
998 
999 
1000 static void
fxt1_quantize_MIXED0(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])1001 fxt1_quantize_MIXED0 (GLuint *cc,
1002                       GLubyte input[N_TEXELS][MAX_COMP])
1003 {
1004    const GLint n_vect = 3; /* highest vector number in each microtile */
1005    const GLint n_comp = 3; /* 3 components: R, G, B */
1006    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1007    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1008    GLint i, j, k;
1009    Fx64 hi; /* high quadword */
1010    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1011 
1012    GLint minColL = 0, maxColL = 0;
1013    GLint minColR = 0, maxColR = 0;
1014 #if 0
1015    GLint minSum;
1016    GLint maxSum;
1017 
1018    /* Our solution here is to find the darkest and brightest colors in
1019     * the 4x4 tile and use those as the two representative colors.
1020     * There are probably better algorithms to use (histogram-based).
1021     */
1022    minSum = 2000; /* big enough */
1023    maxSum = -1; /* small enough */
1024    for (k = 0; k < N_TEXELS / 2; k++) {
1025       GLint sum = 0;
1026       for (i = 0; i < n_comp; i++) {
1027          sum += input[k][i];
1028       }
1029       if (minSum > sum) {
1030          minSum = sum;
1031          minColL = k;
1032       }
1033       if (maxSum < sum) {
1034          maxSum = sum;
1035          maxColL = k;
1036       }
1037    }
1038    minSum = 2000; /* big enough */
1039    maxSum = -1; /* small enough */
1040    for (; k < N_TEXELS; k++) {
1041       GLint sum = 0;
1042       for (i = 0; i < n_comp; i++) {
1043          sum += input[k][i];
1044       }
1045       if (minSum > sum) {
1046          minSum = sum;
1047          minColR = k;
1048       }
1049       if (maxSum < sum) {
1050          maxSum = sum;
1051          maxColR = k;
1052       }
1053    }
1054 #else
1055    GLint minVal;
1056    GLint maxVal;
1057    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1058    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1059 
1060    /* Scan the channel with max variance for lo & hi
1061     * and use those as the two representative colors.
1062     */
1063    minVal = 2000; /* big enough */
1064    maxVal = -1; /* small enough */
1065    for (k = 0; k < N_TEXELS / 2; k++) {
1066       GLint t = input[k][maxVarL];
1067       if (minVal > t) {
1068          minVal = t;
1069          minColL = k;
1070       }
1071       if (maxVal < t) {
1072          maxVal = t;
1073          maxColL = k;
1074       }
1075    }
1076    minVal = 2000; /* big enough */
1077    maxVal = -1; /* small enough */
1078    for (; k < N_TEXELS; k++) {
1079       GLint t = input[k][maxVarR];
1080       if (minVal > t) {
1081          minVal = t;
1082          minColR = k;
1083       }
1084       if (maxVal < t) {
1085          maxVal = t;
1086          maxColR = k;
1087       }
1088    }
1089 #endif
1090 
1091    /* left microtile */
1092    cc[0] = 0;
1093    for (i = 0; i < n_comp; i++) {
1094       vec[0][i] = input[minColL][i];
1095       vec[1][i] = input[maxColL][i];
1096    }
1097    if (minColL != maxColL) {
1098       /* compute interpolation vector */
1099       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1100 
1101       /* add in texels */
1102       lolo = 0;
1103       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1104          GLint texel;
1105          /* interpolate color */
1106          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1107          /* add in texel */
1108          lolo <<= 2;
1109          lolo |= texel;
1110       }
1111 
1112       /* funky encoding for LSB of green */
1113       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1114          for (i = 0; i < n_comp; i++) {
1115             vec[1][i] = input[minColL][i];
1116             vec[0][i] = input[maxColL][i];
1117          }
1118          lolo = ~lolo;
1119       }
1120 
1121       cc[0] = lolo;
1122    }
1123 
1124    /* right microtile */
1125    cc[1] = 0;
1126    for (i = 0; i < n_comp; i++) {
1127       vec[2][i] = input[minColR][i];
1128       vec[3][i] = input[maxColR][i];
1129    }
1130    if (minColR != maxColR) {
1131       /* compute interpolation vector */
1132       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1133 
1134       /* add in texels */
1135       lohi = 0;
1136       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1137          GLint texel;
1138          /* interpolate color */
1139          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1140          /* add in texel */
1141          lohi <<= 2;
1142          lohi |= texel;
1143       }
1144 
1145       /* funky encoding for LSB of green */
1146       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1147          for (i = 0; i < n_comp; i++) {
1148             vec[3][i] = input[minColR][i];
1149             vec[2][i] = input[maxColR][i];
1150          }
1151          lohi = ~lohi;
1152       }
1153 
1154       cc[1] = lohi;
1155    }
1156 
1157    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1158    for (j = 2 * 2 - 1; j >= 0; j--) {
1159       for (i = 0; i < n_comp; i++) {
1160          /* add in colors */
1161          FX64_SHL(hi, 5);
1162          FX64_OR32(hi, vec[j][i] >> 3);
1163       }
1164    }
1165    ((Fx64 *)cc)[1] = hi;
1166 }
1167 
1168 
1169 static void
fxt1_quantize(GLuint * cc,const GLubyte * lines[],GLint comps)1170 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1171 {
1172    GLint trualpha;
1173    GLubyte reord[N_TEXELS][MAX_COMP];
1174 
1175    GLubyte input[N_TEXELS][MAX_COMP];
1176    GLint i, k, l;
1177 
1178    if (comps == 3) {
1179       /* make the whole block opaque */
1180       memset(input, -1, sizeof(input));
1181    }
1182 
1183    /* 8 texels each line */
1184    for (l = 0; l < 4; l++) {
1185       for (k = 0; k < 4; k++) {
1186          for (i = 0; i < comps; i++) {
1187             input[k + l * 4][i] = *lines[l]++;
1188          }
1189       }
1190       for (; k < 8; k++) {
1191          for (i = 0; i < comps; i++) {
1192             input[k + l * 4 + 12][i] = *lines[l]++;
1193          }
1194       }
1195    }
1196 
1197    /* block layout:
1198     * 00, 01, 02, 03, 08, 09, 0a, 0b
1199     * 10, 11, 12, 13, 18, 19, 1a, 1b
1200     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1201     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1202     */
1203 
1204    /* [dBorca]
1205     * stupidity flows forth from this
1206     */
1207    l = N_TEXELS;
1208    trualpha = 0;
1209    if (comps == 4) {
1210       /* skip all transparent black texels */
1211       l = 0;
1212       for (k = 0; k < N_TEXELS; k++) {
1213          /* test all components against 0 */
1214          if (!ISTBLACK(input[k])) {
1215             /* texel is not transparent black */
1216             COPY_4UBV(reord[l], input[k]);
1217             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1218                /* non-opaque texel */
1219                trualpha = !0;
1220             }
1221             l++;
1222          }
1223       }
1224    }
1225 
1226 #if 0
1227    if (trualpha) {
1228       fxt1_quantize_ALPHA0(cc, input, reord, l);
1229    } else if (l == 0) {
1230       cc[0] = cc[1] = cc[2] = -1;
1231       cc[3] = 0;
1232    } else if (l < N_TEXELS) {
1233       fxt1_quantize_HI(cc, input, reord, l);
1234    } else {
1235       fxt1_quantize_CHROMA(cc, input);
1236    }
1237    (void)fxt1_quantize_ALPHA1;
1238    (void)fxt1_quantize_MIXED1;
1239    (void)fxt1_quantize_MIXED0;
1240 #else
1241    if (trualpha) {
1242       fxt1_quantize_ALPHA1(cc, input);
1243    } else if (l == 0) {
1244       cc[0] = cc[1] = cc[2] = ~0u;
1245       cc[3] = 0;
1246    } else if (l < N_TEXELS) {
1247       fxt1_quantize_MIXED1(cc, input);
1248    } else {
1249       fxt1_quantize_MIXED0(cc, input);
1250    }
1251    (void)fxt1_quantize_ALPHA0;
1252    (void)fxt1_quantize_HI;
1253    (void)fxt1_quantize_CHROMA;
1254 #endif
1255 }
1256 
1257 
1258 
1259 /**
1260  * Upscale an image by replication, not (typical) stretching.
1261  * We use this when the image width or height is less than a
1262  * certain size (4, 8) and we need to upscale an image.
1263  */
1264 static void
upscale_teximage2d(GLsizei inWidth,GLsizei inHeight,GLsizei outWidth,GLsizei outHeight,GLint comps,const GLubyte * src,GLint srcRowStride,GLubyte * dest)1265 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1266                    GLsizei outWidth, GLsizei outHeight,
1267                    GLint comps, const GLubyte *src, GLint srcRowStride,
1268                    GLubyte *dest )
1269 {
1270    GLint i, j, k;
1271 
1272    assert(outWidth >= inWidth);
1273    assert(outHeight >= inHeight);
1274 #if 0
1275    assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1276    assert((outWidth & 3) == 0);
1277    assert((outHeight & 3) == 0);
1278 #endif
1279 
1280    for (i = 0; i < outHeight; i++) {
1281       const GLint ii = i % inHeight;
1282       for (j = 0; j < outWidth; j++) {
1283          const GLint jj = j % inWidth;
1284          for (k = 0; k < comps; k++) {
1285             dest[(i * outWidth + j) * comps + k]
1286                = src[ii * srcRowStride + jj * comps + k];
1287          }
1288       }
1289    }
1290 }
1291 
1292 
1293 static void
fxt1_encode(GLuint width,GLuint height,GLint comps,const void * source,GLint srcRowStride,void * dest,GLint destRowStride)1294 fxt1_encode (GLuint width, GLuint height, GLint comps,
1295              const void *source, GLint srcRowStride,
1296              void *dest, GLint destRowStride)
1297 {
1298    GLuint x, y;
1299    const GLubyte *data;
1300    GLuint *encoded = (GLuint *)dest;
1301    void *newSource = NULL;
1302 
1303    assert(comps == 3 || comps == 4);
1304 
1305    /* Replicate image if width is not M8 or height is not M4 */
1306    if ((width & 7) | (height & 3)) {
1307       GLint newWidth = (width + 7) & ~7;
1308       GLint newHeight = (height + 3) & ~3;
1309       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1310       if (!newSource) {
1311          GET_CURRENT_CONTEXT(ctx);
1312          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1313          goto cleanUp;
1314       }
1315       upscale_teximage2d(width, height, newWidth, newHeight,
1316                          comps, (const GLubyte *) source,
1317                          srcRowStride, (GLubyte *) newSource);
1318       source = newSource;
1319       width = newWidth;
1320       height = newHeight;
1321       srcRowStride = comps * newWidth;
1322    }
1323 
1324    data = (const GLubyte *) source;
1325    destRowStride = (destRowStride - width * 2) / 4;
1326    for (y = 0; y < height; y += 4) {
1327       GLuint offs = 0 + (y + 0) * srcRowStride;
1328       for (x = 0; x < width; x += 8) {
1329          const GLubyte *lines[4];
1330          lines[0] = &data[offs];
1331          lines[1] = lines[0] + srcRowStride;
1332          lines[2] = lines[1] + srcRowStride;
1333          lines[3] = lines[2] + srcRowStride;
1334          offs += 8 * comps;
1335          fxt1_quantize(encoded, lines, comps);
1336          /* 128 bits per 8x4 block */
1337          encoded += 4;
1338       }
1339       encoded += destRowStride;
1340    }
1341 
1342  cleanUp:
1343    free(newSource);
1344 }
1345 
1346 
1347 /***************************************************************************\
1348  * FXT1 decoder
1349  *
1350  * The decoder is based on GL_3DFX_texture_compression_FXT1
1351  * specification and serves as a concept for the encoder.
1352 \***************************************************************************/
1353 
1354 
1355 /* lookup table for scaling 5 bit colors up to 8 bits */
1356 static const GLubyte _rgb_scale_5[] = {
1357    0,   8,   16,  25,  33,  41,  49,  58,
1358    66,  74,  82,  90,  99,  107, 115, 123,
1359    132, 140, 148, 156, 165, 173, 181, 189,
1360    197, 206, 214, 222, 230, 239, 247, 255
1361 };
1362 
1363 /* lookup table for scaling 6 bit colors up to 8 bits */
1364 static const GLubyte _rgb_scale_6[] = {
1365    0,   4,   8,   12,  16,  20,  24,  28,
1366    32,  36,  40,  45,  49,  53,  57,  61,
1367    65,  69,  73,  77,  81,  85,  89,  93,
1368    97,  101, 105, 109, 113, 117, 121, 125,
1369    130, 134, 138, 142, 146, 150, 154, 158,
1370    162, 166, 170, 174, 178, 182, 186, 190,
1371    194, 198, 202, 206, 210, 215, 219, 223,
1372    227, 231, 235, 239, 243, 247, 251, 255
1373 };
1374 
1375 
1376 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1377 #define UP5(c) _rgb_scale_5[(c) & 31]
1378 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1379 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1380 
1381 
1382 static void
fxt1_decode_1HI(const GLubyte * code,GLint t,GLubyte * rgba)1383 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1384 {
1385    const GLuint *cc;
1386 
1387    t *= 3;
1388    cc = (const GLuint *)(code + t / 8);
1389    t = (cc[0] >> (t & 7)) & 7;
1390 
1391    if (t == 7) {
1392       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1393    } else {
1394       GLubyte r, g, b;
1395       cc = (const GLuint *)(code + 12);
1396       if (t == 0) {
1397          b = UP5(CC_SEL(cc, 0));
1398          g = UP5(CC_SEL(cc, 5));
1399          r = UP5(CC_SEL(cc, 10));
1400       } else if (t == 6) {
1401          b = UP5(CC_SEL(cc, 15));
1402          g = UP5(CC_SEL(cc, 20));
1403          r = UP5(CC_SEL(cc, 25));
1404       } else {
1405          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1406          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1407          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1408       }
1409       rgba[RCOMP] = r;
1410       rgba[GCOMP] = g;
1411       rgba[BCOMP] = b;
1412       rgba[ACOMP] = 255;
1413    }
1414 }
1415 
1416 
1417 static void
fxt1_decode_1CHROMA(const GLubyte * code,GLint t,GLubyte * rgba)1418 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1419 {
1420    const GLuint *cc;
1421    GLuint kk;
1422 
1423    cc = (const GLuint *)code;
1424    if (t & 16) {
1425       cc++;
1426       t &= 15;
1427    }
1428    t = (cc[0] >> (t * 2)) & 3;
1429 
1430    t *= 15;
1431    cc = (const GLuint *)(code + 8 + t / 8);
1432    kk = cc[0] >> (t & 7);
1433    rgba[BCOMP] = UP5(kk);
1434    rgba[GCOMP] = UP5(kk >> 5);
1435    rgba[RCOMP] = UP5(kk >> 10);
1436    rgba[ACOMP] = 255;
1437 }
1438 
1439 
1440 static void
fxt1_decode_1MIXED(const GLubyte * code,GLint t,GLubyte * rgba)1441 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1442 {
1443    const GLuint *cc;
1444    GLuint col[2][3];
1445    GLint glsb, selb;
1446 
1447    cc = (const GLuint *)code;
1448    if (t & 16) {
1449       t &= 15;
1450       t = (cc[1] >> (t * 2)) & 3;
1451       /* col 2 */
1452       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1453       col[0][GCOMP] = CC_SEL(cc, 99);
1454       col[0][RCOMP] = CC_SEL(cc, 104);
1455       /* col 3 */
1456       col[1][BCOMP] = CC_SEL(cc, 109);
1457       col[1][GCOMP] = CC_SEL(cc, 114);
1458       col[1][RCOMP] = CC_SEL(cc, 119);
1459       glsb = CC_SEL(cc, 126);
1460       selb = CC_SEL(cc, 33);
1461    } else {
1462       t = (cc[0] >> (t * 2)) & 3;
1463       /* col 0 */
1464       col[0][BCOMP] = CC_SEL(cc, 64);
1465       col[0][GCOMP] = CC_SEL(cc, 69);
1466       col[0][RCOMP] = CC_SEL(cc, 74);
1467       /* col 1 */
1468       col[1][BCOMP] = CC_SEL(cc, 79);
1469       col[1][GCOMP] = CC_SEL(cc, 84);
1470       col[1][RCOMP] = CC_SEL(cc, 89);
1471       glsb = CC_SEL(cc, 125);
1472       selb = CC_SEL(cc, 1);
1473    }
1474 
1475    if (CC_SEL(cc, 124) & 1) {
1476       /* alpha[0] == 1 */
1477 
1478       if (t == 3) {
1479          /* zero */
1480          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1481       } else {
1482          GLubyte r, g, b;
1483          if (t == 0) {
1484             b = UP5(col[0][BCOMP]);
1485             g = UP5(col[0][GCOMP]);
1486             r = UP5(col[0][RCOMP]);
1487          } else if (t == 2) {
1488             b = UP5(col[1][BCOMP]);
1489             g = UP6(col[1][GCOMP], glsb);
1490             r = UP5(col[1][RCOMP]);
1491          } else {
1492             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1493             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1494             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1495          }
1496          rgba[RCOMP] = r;
1497          rgba[GCOMP] = g;
1498          rgba[BCOMP] = b;
1499          rgba[ACOMP] = 255;
1500       }
1501    } else {
1502       /* alpha[0] == 0 */
1503       GLubyte r, g, b;
1504       if (t == 0) {
1505          b = UP5(col[0][BCOMP]);
1506          g = UP6(col[0][GCOMP], glsb ^ selb);
1507          r = UP5(col[0][RCOMP]);
1508       } else if (t == 3) {
1509          b = UP5(col[1][BCOMP]);
1510          g = UP6(col[1][GCOMP], glsb);
1511          r = UP5(col[1][RCOMP]);
1512       } else {
1513          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1514          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1515                         UP6(col[1][GCOMP], glsb));
1516          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1517       }
1518       rgba[RCOMP] = r;
1519       rgba[GCOMP] = g;
1520       rgba[BCOMP] = b;
1521       rgba[ACOMP] = 255;
1522    }
1523 }
1524 
1525 
1526 static void
fxt1_decode_1ALPHA(const GLubyte * code,GLint t,GLubyte * rgba)1527 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1528 {
1529    const GLuint *cc;
1530    GLubyte r, g, b, a;
1531 
1532    cc = (const GLuint *)code;
1533    if (CC_SEL(cc, 124) & 1) {
1534       /* lerp == 1 */
1535       GLuint col0[4];
1536 
1537       if (t & 16) {
1538          t &= 15;
1539          t = (cc[1] >> (t * 2)) & 3;
1540          /* col 2 */
1541          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1542          col0[GCOMP] = CC_SEL(cc, 99);
1543          col0[RCOMP] = CC_SEL(cc, 104);
1544          col0[ACOMP] = CC_SEL(cc, 119);
1545       } else {
1546          t = (cc[0] >> (t * 2)) & 3;
1547          /* col 0 */
1548          col0[BCOMP] = CC_SEL(cc, 64);
1549          col0[GCOMP] = CC_SEL(cc, 69);
1550          col0[RCOMP] = CC_SEL(cc, 74);
1551          col0[ACOMP] = CC_SEL(cc, 109);
1552       }
1553 
1554       if (t == 0) {
1555          b = UP5(col0[BCOMP]);
1556          g = UP5(col0[GCOMP]);
1557          r = UP5(col0[RCOMP]);
1558          a = UP5(col0[ACOMP]);
1559       } else if (t == 3) {
1560          b = UP5(CC_SEL(cc, 79));
1561          g = UP5(CC_SEL(cc, 84));
1562          r = UP5(CC_SEL(cc, 89));
1563          a = UP5(CC_SEL(cc, 114));
1564       } else {
1565          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1566          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1567          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1568          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1569       }
1570    } else {
1571       /* lerp == 0 */
1572 
1573       if (t & 16) {
1574          cc++;
1575          t &= 15;
1576       }
1577       t = (cc[0] >> (t * 2)) & 3;
1578 
1579       if (t == 3) {
1580          /* zero */
1581          r = g = b = a = 0;
1582       } else {
1583          GLuint kk;
1584          cc = (const GLuint *)code;
1585          a = UP5(cc[3] >> (t * 5 + 13));
1586          t *= 15;
1587          cc = (const GLuint *)(code + 8 + t / 8);
1588          kk = cc[0] >> (t & 7);
1589          b = UP5(kk);
1590          g = UP5(kk >> 5);
1591          r = UP5(kk >> 10);
1592       }
1593    }
1594    rgba[RCOMP] = r;
1595    rgba[GCOMP] = g;
1596    rgba[BCOMP] = b;
1597    rgba[ACOMP] = a;
1598 }
1599 
1600 
1601 static void
fxt1_decode_1(const void * texture,GLint stride,GLint i,GLint j,GLubyte * rgba)1602 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1603                GLint i, GLint j, GLubyte *rgba)
1604 {
1605    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1606       fxt1_decode_1HI,     /* cc-high   = "00?" */
1607       fxt1_decode_1HI,     /* cc-high   = "00?" */
1608       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1609       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1610       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1611       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1612       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1613       fxt1_decode_1MIXED   /* mixed     = "1??" */
1614    };
1615 
1616    const GLubyte *code = (const GLubyte *)texture +
1617                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1618    GLint mode = CC_SEL(code, 125);
1619    GLint t = i & 7;
1620 
1621    if (t & 4) {
1622       t += 12;
1623    }
1624    t += (j & 3) * 4;
1625 
1626    decode_1[mode](code, t, rgba);
1627 }
1628 
1629 
1630 
1631 
1632 static void
fetch_rgb_fxt1(const GLubyte * map,GLint rowStride,GLint i,GLint j,GLfloat * texel)1633 fetch_rgb_fxt1(const GLubyte *map,
1634                GLint rowStride, GLint i, GLint j, GLfloat *texel)
1635 {
1636    GLubyte rgba[4];
1637    fxt1_decode_1(map, rowStride, i, j, rgba);
1638    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1639    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1640    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1641    texel[ACOMP] = 1.0F;
1642 }
1643 
1644 
1645 static void
fetch_rgba_fxt1(const GLubyte * map,GLint rowStride,GLint i,GLint j,GLfloat * texel)1646 fetch_rgba_fxt1(const GLubyte *map,
1647                 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1648 {
1649    GLubyte rgba[4];
1650    fxt1_decode_1(map, rowStride, i, j, rgba);
1651    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1652    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1653    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1654    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1655 }
1656 
1657 
1658 compressed_fetch_func
_mesa_get_fxt_fetch_func(mesa_format format)1659 _mesa_get_fxt_fetch_func(mesa_format format)
1660 {
1661    switch (format) {
1662    case MESA_FORMAT_RGB_FXT1:
1663       return fetch_rgb_fxt1;
1664    case MESA_FORMAT_RGBA_FXT1:
1665       return fetch_rgba_fxt1;
1666    default:
1667       return NULL;
1668    }
1669 }
1670