1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "image.h"
36 #include "macros.h"
37 #include "mfeatures.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texstore.h"
42 #include "swrast/s_context.h"
43
44
45 #if FEATURE_texture_fxt1
46
47
48 static void
49 fxt1_encode (GLuint width, GLuint height, GLint comps,
50 const void *source, GLint srcRowStride,
51 void *dest, GLint destRowStride);
52
53 void
54 fxt1_decode_1 (const void *texture, GLint stride,
55 GLint i, GLint j, GLubyte *rgba);
56
57
58 /**
59 * Store user's image in rgb_fxt1 format.
60 */
61 GLboolean
_mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
63 {
64 const GLubyte *pixels;
65 GLint srcRowStride;
66 GLubyte *dst;
67 const GLubyte *tempImage = NULL;
68
69 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
70
71 if (srcFormat != GL_RGB ||
72 srcType != GL_UNSIGNED_BYTE ||
73 ctx->_ImageTransferState ||
74 srcPacking->RowLength != srcWidth ||
75 srcPacking->SwapBytes) {
76 /* convert image to RGB/GLubyte */
77 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
78 baseInternalFormat,
79 _mesa_get_format_base_format(dstFormat),
80 srcWidth, srcHeight, srcDepth,
81 srcFormat, srcType, srcAddr,
82 srcPacking);
83 if (!tempImage)
84 return GL_FALSE; /* out of memory */
85 pixels = tempImage;
86 srcRowStride = 3 * srcWidth;
87 srcFormat = GL_RGB;
88 }
89 else {
90 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
91 srcFormat, srcType, 0, 0);
92
93 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
94 srcType) / sizeof(GLubyte);
95 }
96
97 dst = dstSlices[0];
98
99 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
100 dst, dstRowStride);
101
102 if (tempImage)
103 free((void*) tempImage);
104
105 return GL_TRUE;
106 }
107
108
109 /**
110 * Store user's image in rgba_fxt1 format.
111 */
112 GLboolean
_mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)113 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
114 {
115 const GLubyte *pixels;
116 GLint srcRowStride;
117 GLubyte *dst;
118 const GLubyte *tempImage = NULL;
119
120 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
121
122 if (srcFormat != GL_RGBA ||
123 srcType != GL_UNSIGNED_BYTE ||
124 ctx->_ImageTransferState ||
125 srcPacking->SwapBytes) {
126 /* convert image to RGBA/GLubyte */
127 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
128 baseInternalFormat,
129 _mesa_get_format_base_format(dstFormat),
130 srcWidth, srcHeight, srcDepth,
131 srcFormat, srcType, srcAddr,
132 srcPacking);
133 if (!tempImage)
134 return GL_FALSE; /* out of memory */
135 pixels = tempImage;
136 srcRowStride = 4 * srcWidth;
137 srcFormat = GL_RGBA;
138 }
139 else {
140 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
141 srcFormat, srcType, 0, 0);
142
143 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
144 srcType) / sizeof(GLubyte);
145 }
146
147 dst = dstSlices[0];
148
149 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
150 dst, dstRowStride);
151
152 if (tempImage)
153 free((void*) tempImage);
154
155 return GL_TRUE;
156 }
157
158
159 void
_mesa_fetch_texel_2d_f_rgba_fxt1(const struct swrast_texture_image * texImage,GLint i,GLint j,GLint k,GLfloat * texel)160 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
161 GLint i, GLint j, GLint k, GLfloat *texel )
162 {
163 /* just sample as GLubyte and convert to float here */
164 GLubyte rgba[4];
165 (void) k;
166 fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
167 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
168 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
169 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
170 texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
171 }
172
173
174 void
_mesa_fetch_texel_2d_f_rgb_fxt1(const struct swrast_texture_image * texImage,GLint i,GLint j,GLint k,GLfloat * texel)175 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
176 GLint i, GLint j, GLint k, GLfloat *texel )
177 {
178 /* just sample as GLubyte and convert to float here */
179 GLubyte rgba[4];
180 (void) k;
181 fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
182 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
183 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
184 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
185 texel[ACOMP] = 1.0F;
186 }
187
188
189
190 /***************************************************************************\
191 * FXT1 encoder
192 *
193 * The encoder was built by reversing the decoder,
194 * and is vaguely based on Texus2 by 3dfx. Note that this code
195 * is merely a proof of concept, since it is highly UNoptimized;
196 * moreover, it is sub-optimal due to initial conditions passed
197 * to Lloyd's algorithm (the interpolation modes are even worse).
198 \***************************************************************************/
199
200
201 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
202 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
203 #define N_TEXELS 32 /* number of texels in a block (always 32) */
204 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
205 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
206 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
207 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
208 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
209
210
211 /*
212 * Define a 64-bit unsigned integer type and macros
213 */
214 #if 1
215
216 #define FX64_NATIVE 1
217
218 typedef uint64_t Fx64;
219
220 #define FX64_MOV32(a, b) a = b
221 #define FX64_OR32(a, b) a |= b
222 #define FX64_SHL(a, c) a <<= c
223
224 #else
225
226 #define FX64_NATIVE 0
227
228 typedef struct {
229 GLuint lo, hi;
230 } Fx64;
231
232 #define FX64_MOV32(a, b) a.lo = b
233 #define FX64_OR32(a, b) a.lo |= b
234
235 #define FX64_SHL(a, c) \
236 do { \
237 if ((c) >= 32) { \
238 a.hi = a.lo << ((c) - 32); \
239 a.lo = 0; \
240 } else { \
241 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
242 a.lo <<= (c); \
243 } \
244 } while (0)
245
246 #endif
247
248
249 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
250 #define SAFECDOT 1 /* for paranoids */
251
252 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
253 do { \
254 /* compute interpolation vector */ \
255 GLfloat d2 = 0.0F; \
256 GLfloat rd2; \
257 \
258 for (i = 0; i < NC; i++) { \
259 IV[i] = (V1[i] - V0[i]) * F(i); \
260 d2 += IV[i] * IV[i]; \
261 } \
262 rd2 = (GLfloat)NV / d2; \
263 B = 0; \
264 for (i = 0; i < NC; i++) { \
265 IV[i] *= F(i); \
266 B -= IV[i] * V0[i]; \
267 IV[i] *= rd2; \
268 } \
269 B = B * rd2 + 0.5f; \
270 } while (0)
271
272 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
273 do { \
274 GLfloat dot = 0.0F; \
275 for (i = 0; i < NC; i++) { \
276 dot += V[i] * IV[i]; \
277 } \
278 TEXEL = (GLint)(dot + B); \
279 if (SAFECDOT) { \
280 if (TEXEL < 0) { \
281 TEXEL = 0; \
282 } else if (TEXEL > NV) { \
283 TEXEL = NV; \
284 } \
285 } \
286 } while (0)
287
288
289 static GLint
fxt1_bestcol(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[MAX_COMP],GLint nc)290 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
291 GLubyte input[MAX_COMP], GLint nc)
292 {
293 GLint i, j, best = -1;
294 GLfloat err = 1e9; /* big enough */
295
296 for (j = 0; j < nv; j++) {
297 GLfloat e = 0.0F;
298 for (i = 0; i < nc; i++) {
299 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
300 }
301 if (e < err) {
302 err = e;
303 best = j;
304 }
305 }
306
307 return best;
308 }
309
310
311 static GLint
fxt1_worst(GLfloat vec[MAX_COMP],GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)312 fxt1_worst (GLfloat vec[MAX_COMP],
313 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
314 {
315 GLint i, k, worst = -1;
316 GLfloat err = -1.0F; /* small enough */
317
318 for (k = 0; k < n; k++) {
319 GLfloat e = 0.0F;
320 for (i = 0; i < nc; i++) {
321 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
322 }
323 if (e > err) {
324 err = e;
325 worst = k;
326 }
327 }
328
329 return worst;
330 }
331
332
333 static GLint
fxt1_variance(GLdouble variance[MAX_COMP],GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)334 fxt1_variance (GLdouble variance[MAX_COMP],
335 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
336 {
337 GLint i, k, best = 0;
338 GLint sx, sx2;
339 GLdouble var, maxvar = -1; /* small enough */
340 GLdouble teenth = 1.0 / n;
341
342 for (i = 0; i < nc; i++) {
343 sx = sx2 = 0;
344 for (k = 0; k < n; k++) {
345 GLint t = input[k][i];
346 sx += t;
347 sx2 += t * t;
348 }
349 var = sx2 * teenth - sx * sx * teenth * teenth;
350 if (maxvar < var) {
351 maxvar = var;
352 best = i;
353 }
354 if (variance) {
355 variance[i] = var;
356 }
357 }
358
359 return best;
360 }
361
362
363 static GLint
fxt1_choose(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)364 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
365 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
366 {
367 #if 0
368 /* Choose colors from a grid.
369 */
370 GLint i, j;
371
372 for (j = 0; j < nv; j++) {
373 GLint m = j * (n - 1) / (nv - 1);
374 for (i = 0; i < nc; i++) {
375 vec[j][i] = input[m][i];
376 }
377 }
378 #else
379 /* Our solution here is to find the darkest and brightest colors in
380 * the 8x4 tile and use those as the two representative colors.
381 * There are probably better algorithms to use (histogram-based).
382 */
383 GLint i, j, k;
384 GLint minSum = 2000; /* big enough */
385 GLint maxSum = -1; /* small enough */
386 GLint minCol = 0; /* phoudoin: silent compiler! */
387 GLint maxCol = 0; /* phoudoin: silent compiler! */
388
389 struct {
390 GLint flag;
391 GLint key;
392 GLint freq;
393 GLint idx;
394 } hist[N_TEXELS];
395 GLint lenh = 0;
396
397 memset(hist, 0, sizeof(hist));
398
399 for (k = 0; k < n; k++) {
400 GLint l;
401 GLint key = 0;
402 GLint sum = 0;
403 for (i = 0; i < nc; i++) {
404 key <<= 8;
405 key |= input[k][i];
406 sum += input[k][i];
407 }
408 for (l = 0; l < n; l++) {
409 if (!hist[l].flag) {
410 /* alloc new slot */
411 hist[l].flag = !0;
412 hist[l].key = key;
413 hist[l].freq = 1;
414 hist[l].idx = k;
415 lenh = l + 1;
416 break;
417 } else if (hist[l].key == key) {
418 hist[l].freq++;
419 break;
420 }
421 }
422 if (minSum > sum) {
423 minSum = sum;
424 minCol = k;
425 }
426 if (maxSum < sum) {
427 maxSum = sum;
428 maxCol = k;
429 }
430 }
431
432 if (lenh <= nv) {
433 for (j = 0; j < lenh; j++) {
434 for (i = 0; i < nc; i++) {
435 vec[j][i] = (GLfloat)input[hist[j].idx][i];
436 }
437 }
438 for (; j < nv; j++) {
439 for (i = 0; i < nc; i++) {
440 vec[j][i] = vec[0][i];
441 }
442 }
443 return 0;
444 }
445
446 for (j = 0; j < nv; j++) {
447 for (i = 0; i < nc; i++) {
448 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
449 }
450 }
451 #endif
452
453 return !0;
454 }
455
456
457 static GLint
fxt1_lloyd(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)458 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
459 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
460 {
461 /* Use the generalized lloyd's algorithm for VQ:
462 * find 4 color vectors.
463 *
464 * for each sample color
465 * sort to nearest vector.
466 *
467 * replace each vector with the centroid of its matching colors.
468 *
469 * repeat until RMS doesn't improve.
470 *
471 * if a color vector has no samples, or becomes the same as another
472 * vector, replace it with the color which is farthest from a sample.
473 *
474 * vec[][MAX_COMP] initial vectors and resulting colors
475 * nv number of resulting colors required
476 * input[N_TEXELS][MAX_COMP] input texels
477 * nc number of components in input / vec
478 * n number of input samples
479 */
480
481 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
482 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
483 GLfloat error, lasterror = 1e9;
484
485 GLint i, j, k, rep;
486
487 /* the quantizer */
488 for (rep = 0; rep < LL_N_REP; rep++) {
489 /* reset sums & counters */
490 for (j = 0; j < nv; j++) {
491 for (i = 0; i < nc; i++) {
492 sum[j][i] = 0;
493 }
494 cnt[j] = 0;
495 }
496 error = 0;
497
498 /* scan whole block */
499 for (k = 0; k < n; k++) {
500 #if 1
501 GLint best = -1;
502 GLfloat err = 1e9; /* big enough */
503 /* determine best vector */
504 for (j = 0; j < nv; j++) {
505 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
506 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
507 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
508 if (nc == 4) {
509 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
510 }
511 if (e < err) {
512 err = e;
513 best = j;
514 }
515 }
516 #else
517 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
518 #endif
519 assert(best >= 0);
520 /* add in closest color */
521 for (i = 0; i < nc; i++) {
522 sum[best][i] += input[k][i];
523 }
524 /* mark this vector as used */
525 cnt[best]++;
526 /* accumulate error */
527 error += err;
528 }
529
530 /* check RMS */
531 if ((error < LL_RMS_E) ||
532 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
533 return !0; /* good match */
534 }
535 lasterror = error;
536
537 /* move each vector to the barycenter of its closest colors */
538 for (j = 0; j < nv; j++) {
539 if (cnt[j]) {
540 GLfloat div = 1.0F / cnt[j];
541 for (i = 0; i < nc; i++) {
542 vec[j][i] = div * sum[j][i];
543 }
544 } else {
545 /* this vec has no samples or is identical with a previous vec */
546 GLint worst = fxt1_worst(vec[j], input, nc, n);
547 for (i = 0; i < nc; i++) {
548 vec[j][i] = input[worst][i];
549 }
550 }
551 }
552 }
553
554 return 0; /* could not converge fast enough */
555 }
556
557
558 static void
fxt1_quantize_CHROMA(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])559 fxt1_quantize_CHROMA (GLuint *cc,
560 GLubyte input[N_TEXELS][MAX_COMP])
561 {
562 const GLint n_vect = 4; /* 4 base vectors to find */
563 const GLint n_comp = 3; /* 3 components: R, G, B */
564 GLfloat vec[MAX_VECT][MAX_COMP];
565 GLint i, j, k;
566 Fx64 hi; /* high quadword */
567 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
568
569 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
570 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
571 }
572
573 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
574 for (j = n_vect - 1; j >= 0; j--) {
575 for (i = 0; i < n_comp; i++) {
576 /* add in colors */
577 FX64_SHL(hi, 5);
578 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
579 }
580 }
581 ((Fx64 *)cc)[1] = hi;
582
583 lohi = lolo = 0;
584 /* right microtile */
585 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
586 lohi <<= 2;
587 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
588 }
589 /* left microtile */
590 for (; k >= 0; k--) {
591 lolo <<= 2;
592 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
593 }
594 cc[1] = lohi;
595 cc[0] = lolo;
596 }
597
598
599 static void
fxt1_quantize_ALPHA0(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP],GLubyte reord[N_TEXELS][MAX_COMP],GLint n)600 fxt1_quantize_ALPHA0 (GLuint *cc,
601 GLubyte input[N_TEXELS][MAX_COMP],
602 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
603 {
604 const GLint n_vect = 3; /* 3 base vectors to find */
605 const GLint n_comp = 4; /* 4 components: R, G, B, A */
606 GLfloat vec[MAX_VECT][MAX_COMP];
607 GLint i, j, k;
608 Fx64 hi; /* high quadword */
609 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
610
611 /* the last vector indicates zero */
612 for (i = 0; i < n_comp; i++) {
613 vec[n_vect][i] = 0;
614 }
615
616 /* the first n texels in reord are guaranteed to be non-zero */
617 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
618 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
619 }
620
621 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
622 for (j = n_vect - 1; j >= 0; j--) {
623 /* add in alphas */
624 FX64_SHL(hi, 5);
625 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
626 }
627 for (j = n_vect - 1; j >= 0; j--) {
628 for (i = 0; i < n_comp - 1; i++) {
629 /* add in colors */
630 FX64_SHL(hi, 5);
631 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
632 }
633 }
634 ((Fx64 *)cc)[1] = hi;
635
636 lohi = lolo = 0;
637 /* right microtile */
638 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
639 lohi <<= 2;
640 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
641 }
642 /* left microtile */
643 for (; k >= 0; k--) {
644 lolo <<= 2;
645 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
646 }
647 cc[1] = lohi;
648 cc[0] = lolo;
649 }
650
651
652 static void
fxt1_quantize_ALPHA1(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])653 fxt1_quantize_ALPHA1 (GLuint *cc,
654 GLubyte input[N_TEXELS][MAX_COMP])
655 {
656 const GLint n_vect = 3; /* highest vector number in each microtile */
657 const GLint n_comp = 4; /* 4 components: R, G, B, A */
658 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
659 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
660 GLint i, j, k;
661 Fx64 hi; /* high quadword */
662 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
663
664 GLint minSum;
665 GLint maxSum;
666 GLint minColL = 0, maxColL = 0;
667 GLint minColR = 0, maxColR = 0;
668 GLint sumL = 0, sumR = 0;
669 GLint nn_comp;
670 /* Our solution here is to find the darkest and brightest colors in
671 * the 4x4 tile and use those as the two representative colors.
672 * There are probably better algorithms to use (histogram-based).
673 */
674 nn_comp = n_comp;
675 while ((minColL == maxColL) && nn_comp) {
676 minSum = 2000; /* big enough */
677 maxSum = -1; /* small enough */
678 for (k = 0; k < N_TEXELS / 2; k++) {
679 GLint sum = 0;
680 for (i = 0; i < nn_comp; i++) {
681 sum += input[k][i];
682 }
683 if (minSum > sum) {
684 minSum = sum;
685 minColL = k;
686 }
687 if (maxSum < sum) {
688 maxSum = sum;
689 maxColL = k;
690 }
691 sumL += sum;
692 }
693
694 nn_comp--;
695 }
696
697 nn_comp = n_comp;
698 while ((minColR == maxColR) && nn_comp) {
699 minSum = 2000; /* big enough */
700 maxSum = -1; /* small enough */
701 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
702 GLint sum = 0;
703 for (i = 0; i < nn_comp; i++) {
704 sum += input[k][i];
705 }
706 if (minSum > sum) {
707 minSum = sum;
708 minColR = k;
709 }
710 if (maxSum < sum) {
711 maxSum = sum;
712 maxColR = k;
713 }
714 sumR += sum;
715 }
716
717 nn_comp--;
718 }
719
720 /* choose the common vector (yuck!) */
721 {
722 GLint j1, j2;
723 GLint v1 = 0, v2 = 0;
724 GLfloat err = 1e9; /* big enough */
725 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
726 for (i = 0; i < n_comp; i++) {
727 tv[0][i] = input[minColL][i];
728 tv[1][i] = input[maxColL][i];
729 tv[2][i] = input[minColR][i];
730 tv[3][i] = input[maxColR][i];
731 }
732 for (j1 = 0; j1 < 2; j1++) {
733 for (j2 = 2; j2 < 4; j2++) {
734 GLfloat e = 0.0F;
735 for (i = 0; i < n_comp; i++) {
736 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
737 }
738 if (e < err) {
739 err = e;
740 v1 = j1;
741 v2 = j2;
742 }
743 }
744 }
745 for (i = 0; i < n_comp; i++) {
746 vec[0][i] = tv[1 - v1][i];
747 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
748 vec[2][i] = tv[5 - v2][i];
749 }
750 }
751
752 /* left microtile */
753 cc[0] = 0;
754 if (minColL != maxColL) {
755 /* compute interpolation vector */
756 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
757
758 /* add in texels */
759 lolo = 0;
760 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
761 GLint texel;
762 /* interpolate color */
763 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
764 /* add in texel */
765 lolo <<= 2;
766 lolo |= texel;
767 }
768
769 cc[0] = lolo;
770 }
771
772 /* right microtile */
773 cc[1] = 0;
774 if (minColR != maxColR) {
775 /* compute interpolation vector */
776 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
777
778 /* add in texels */
779 lohi = 0;
780 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
781 GLint texel;
782 /* interpolate color */
783 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
784 /* add in texel */
785 lohi <<= 2;
786 lohi |= texel;
787 }
788
789 cc[1] = lohi;
790 }
791
792 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
793 for (j = n_vect - 1; j >= 0; j--) {
794 /* add in alphas */
795 FX64_SHL(hi, 5);
796 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
797 }
798 for (j = n_vect - 1; j >= 0; j--) {
799 for (i = 0; i < n_comp - 1; i++) {
800 /* add in colors */
801 FX64_SHL(hi, 5);
802 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
803 }
804 }
805 ((Fx64 *)cc)[1] = hi;
806 }
807
808
809 static void
fxt1_quantize_HI(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP],GLubyte reord[N_TEXELS][MAX_COMP],GLint n)810 fxt1_quantize_HI (GLuint *cc,
811 GLubyte input[N_TEXELS][MAX_COMP],
812 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
813 {
814 const GLint n_vect = 6; /* highest vector number */
815 const GLint n_comp = 3; /* 3 components: R, G, B */
816 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
817 GLfloat iv[MAX_COMP]; /* interpolation vector */
818 GLint i, k;
819 GLuint hihi; /* high quadword: hi dword */
820
821 GLint minSum = 2000; /* big enough */
822 GLint maxSum = -1; /* small enough */
823 GLint minCol = 0; /* phoudoin: silent compiler! */
824 GLint maxCol = 0; /* phoudoin: silent compiler! */
825
826 /* Our solution here is to find the darkest and brightest colors in
827 * the 8x4 tile and use those as the two representative colors.
828 * There are probably better algorithms to use (histogram-based).
829 */
830 for (k = 0; k < n; k++) {
831 GLint sum = 0;
832 for (i = 0; i < n_comp; i++) {
833 sum += reord[k][i];
834 }
835 if (minSum > sum) {
836 minSum = sum;
837 minCol = k;
838 }
839 if (maxSum < sum) {
840 maxSum = sum;
841 maxCol = k;
842 }
843 }
844
845 hihi = 0; /* cc-hi = "00" */
846 for (i = 0; i < n_comp; i++) {
847 /* add in colors */
848 hihi <<= 5;
849 hihi |= reord[maxCol][i] >> 3;
850 }
851 for (i = 0; i < n_comp; i++) {
852 /* add in colors */
853 hihi <<= 5;
854 hihi |= reord[minCol][i] >> 3;
855 }
856 cc[3] = hihi;
857 cc[0] = cc[1] = cc[2] = 0;
858
859 /* compute interpolation vector */
860 if (minCol != maxCol) {
861 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
862 }
863
864 /* add in texels */
865 for (k = N_TEXELS - 1; k >= 0; k--) {
866 GLint t = k * 3;
867 GLuint *kk = (GLuint *)((char *)cc + t / 8);
868 GLint texel = n_vect + 1; /* transparent black */
869
870 if (!ISTBLACK(input[k])) {
871 if (minCol != maxCol) {
872 /* interpolate color */
873 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
874 /* add in texel */
875 kk[0] |= texel << (t & 7);
876 }
877 } else {
878 /* add in texel */
879 kk[0] |= texel << (t & 7);
880 }
881 }
882 }
883
884
885 static void
fxt1_quantize_MIXED1(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])886 fxt1_quantize_MIXED1 (GLuint *cc,
887 GLubyte input[N_TEXELS][MAX_COMP])
888 {
889 const GLint n_vect = 2; /* highest vector number in each microtile */
890 const GLint n_comp = 3; /* 3 components: R, G, B */
891 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
892 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
893 GLint i, j, k;
894 Fx64 hi; /* high quadword */
895 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
896
897 GLint minSum;
898 GLint maxSum;
899 GLint minColL = 0, maxColL = -1;
900 GLint minColR = 0, maxColR = -1;
901
902 /* Our solution here is to find the darkest and brightest colors in
903 * the 4x4 tile and use those as the two representative colors.
904 * There are probably better algorithms to use (histogram-based).
905 */
906 minSum = 2000; /* big enough */
907 maxSum = -1; /* small enough */
908 for (k = 0; k < N_TEXELS / 2; k++) {
909 if (!ISTBLACK(input[k])) {
910 GLint sum = 0;
911 for (i = 0; i < n_comp; i++) {
912 sum += input[k][i];
913 }
914 if (minSum > sum) {
915 minSum = sum;
916 minColL = k;
917 }
918 if (maxSum < sum) {
919 maxSum = sum;
920 maxColL = k;
921 }
922 }
923 }
924 minSum = 2000; /* big enough */
925 maxSum = -1; /* small enough */
926 for (; k < N_TEXELS; k++) {
927 if (!ISTBLACK(input[k])) {
928 GLint sum = 0;
929 for (i = 0; i < n_comp; i++) {
930 sum += input[k][i];
931 }
932 if (minSum > sum) {
933 minSum = sum;
934 minColR = k;
935 }
936 if (maxSum < sum) {
937 maxSum = sum;
938 maxColR = k;
939 }
940 }
941 }
942
943 /* left microtile */
944 if (maxColL == -1) {
945 /* all transparent black */
946 cc[0] = ~0u;
947 for (i = 0; i < n_comp; i++) {
948 vec[0][i] = 0;
949 vec[1][i] = 0;
950 }
951 } else {
952 cc[0] = 0;
953 for (i = 0; i < n_comp; i++) {
954 vec[0][i] = input[minColL][i];
955 vec[1][i] = input[maxColL][i];
956 }
957 if (minColL != maxColL) {
958 /* compute interpolation vector */
959 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
960
961 /* add in texels */
962 lolo = 0;
963 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
964 GLint texel = n_vect + 1; /* transparent black */
965 if (!ISTBLACK(input[k])) {
966 /* interpolate color */
967 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
968 }
969 /* add in texel */
970 lolo <<= 2;
971 lolo |= texel;
972 }
973 cc[0] = lolo;
974 }
975 }
976
977 /* right microtile */
978 if (maxColR == -1) {
979 /* all transparent black */
980 cc[1] = ~0u;
981 for (i = 0; i < n_comp; i++) {
982 vec[2][i] = 0;
983 vec[3][i] = 0;
984 }
985 } else {
986 cc[1] = 0;
987 for (i = 0; i < n_comp; i++) {
988 vec[2][i] = input[minColR][i];
989 vec[3][i] = input[maxColR][i];
990 }
991 if (minColR != maxColR) {
992 /* compute interpolation vector */
993 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
994
995 /* add in texels */
996 lohi = 0;
997 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
998 GLint texel = n_vect + 1; /* transparent black */
999 if (!ISTBLACK(input[k])) {
1000 /* interpolate color */
1001 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1002 }
1003 /* add in texel */
1004 lohi <<= 2;
1005 lohi |= texel;
1006 }
1007 cc[1] = lohi;
1008 }
1009 }
1010
1011 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1012 for (j = 2 * 2 - 1; j >= 0; j--) {
1013 for (i = 0; i < n_comp; i++) {
1014 /* add in colors */
1015 FX64_SHL(hi, 5);
1016 FX64_OR32(hi, vec[j][i] >> 3);
1017 }
1018 }
1019 ((Fx64 *)cc)[1] = hi;
1020 }
1021
1022
1023 static void
fxt1_quantize_MIXED0(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])1024 fxt1_quantize_MIXED0 (GLuint *cc,
1025 GLubyte input[N_TEXELS][MAX_COMP])
1026 {
1027 const GLint n_vect = 3; /* highest vector number in each microtile */
1028 const GLint n_comp = 3; /* 3 components: R, G, B */
1029 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1030 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1031 GLint i, j, k;
1032 Fx64 hi; /* high quadword */
1033 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1034
1035 GLint minColL = 0, maxColL = 0;
1036 GLint minColR = 0, maxColR = 0;
1037 #if 0
1038 GLint minSum;
1039 GLint maxSum;
1040
1041 /* Our solution here is to find the darkest and brightest colors in
1042 * the 4x4 tile and use those as the two representative colors.
1043 * There are probably better algorithms to use (histogram-based).
1044 */
1045 minSum = 2000; /* big enough */
1046 maxSum = -1; /* small enough */
1047 for (k = 0; k < N_TEXELS / 2; k++) {
1048 GLint sum = 0;
1049 for (i = 0; i < n_comp; i++) {
1050 sum += input[k][i];
1051 }
1052 if (minSum > sum) {
1053 minSum = sum;
1054 minColL = k;
1055 }
1056 if (maxSum < sum) {
1057 maxSum = sum;
1058 maxColL = k;
1059 }
1060 }
1061 minSum = 2000; /* big enough */
1062 maxSum = -1; /* small enough */
1063 for (; k < N_TEXELS; k++) {
1064 GLint sum = 0;
1065 for (i = 0; i < n_comp; i++) {
1066 sum += input[k][i];
1067 }
1068 if (minSum > sum) {
1069 minSum = sum;
1070 minColR = k;
1071 }
1072 if (maxSum < sum) {
1073 maxSum = sum;
1074 maxColR = k;
1075 }
1076 }
1077 #else
1078 GLint minVal;
1079 GLint maxVal;
1080 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1081 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1082
1083 /* Scan the channel with max variance for lo & hi
1084 * and use those as the two representative colors.
1085 */
1086 minVal = 2000; /* big enough */
1087 maxVal = -1; /* small enough */
1088 for (k = 0; k < N_TEXELS / 2; k++) {
1089 GLint t = input[k][maxVarL];
1090 if (minVal > t) {
1091 minVal = t;
1092 minColL = k;
1093 }
1094 if (maxVal < t) {
1095 maxVal = t;
1096 maxColL = k;
1097 }
1098 }
1099 minVal = 2000; /* big enough */
1100 maxVal = -1; /* small enough */
1101 for (; k < N_TEXELS; k++) {
1102 GLint t = input[k][maxVarR];
1103 if (minVal > t) {
1104 minVal = t;
1105 minColR = k;
1106 }
1107 if (maxVal < t) {
1108 maxVal = t;
1109 maxColR = k;
1110 }
1111 }
1112 #endif
1113
1114 /* left microtile */
1115 cc[0] = 0;
1116 for (i = 0; i < n_comp; i++) {
1117 vec[0][i] = input[minColL][i];
1118 vec[1][i] = input[maxColL][i];
1119 }
1120 if (minColL != maxColL) {
1121 /* compute interpolation vector */
1122 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1123
1124 /* add in texels */
1125 lolo = 0;
1126 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1127 GLint texel;
1128 /* interpolate color */
1129 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1130 /* add in texel */
1131 lolo <<= 2;
1132 lolo |= texel;
1133 }
1134
1135 /* funky encoding for LSB of green */
1136 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1137 for (i = 0; i < n_comp; i++) {
1138 vec[1][i] = input[minColL][i];
1139 vec[0][i] = input[maxColL][i];
1140 }
1141 lolo = ~lolo;
1142 }
1143
1144 cc[0] = lolo;
1145 }
1146
1147 /* right microtile */
1148 cc[1] = 0;
1149 for (i = 0; i < n_comp; i++) {
1150 vec[2][i] = input[minColR][i];
1151 vec[3][i] = input[maxColR][i];
1152 }
1153 if (minColR != maxColR) {
1154 /* compute interpolation vector */
1155 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1156
1157 /* add in texels */
1158 lohi = 0;
1159 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1160 GLint texel;
1161 /* interpolate color */
1162 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1163 /* add in texel */
1164 lohi <<= 2;
1165 lohi |= texel;
1166 }
1167
1168 /* funky encoding for LSB of green */
1169 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1170 for (i = 0; i < n_comp; i++) {
1171 vec[3][i] = input[minColR][i];
1172 vec[2][i] = input[maxColR][i];
1173 }
1174 lohi = ~lohi;
1175 }
1176
1177 cc[1] = lohi;
1178 }
1179
1180 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1181 for (j = 2 * 2 - 1; j >= 0; j--) {
1182 for (i = 0; i < n_comp; i++) {
1183 /* add in colors */
1184 FX64_SHL(hi, 5);
1185 FX64_OR32(hi, vec[j][i] >> 3);
1186 }
1187 }
1188 ((Fx64 *)cc)[1] = hi;
1189 }
1190
1191
1192 static void
fxt1_quantize(GLuint * cc,const GLubyte * lines[],GLint comps)1193 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1194 {
1195 GLint trualpha;
1196 GLubyte reord[N_TEXELS][MAX_COMP];
1197
1198 GLubyte input[N_TEXELS][MAX_COMP];
1199 GLint i, k, l;
1200
1201 if (comps == 3) {
1202 /* make the whole block opaque */
1203 memset(input, -1, sizeof(input));
1204 }
1205
1206 /* 8 texels each line */
1207 for (l = 0; l < 4; l++) {
1208 for (k = 0; k < 4; k++) {
1209 for (i = 0; i < comps; i++) {
1210 input[k + l * 4][i] = *lines[l]++;
1211 }
1212 }
1213 for (; k < 8; k++) {
1214 for (i = 0; i < comps; i++) {
1215 input[k + l * 4 + 12][i] = *lines[l]++;
1216 }
1217 }
1218 }
1219
1220 /* block layout:
1221 * 00, 01, 02, 03, 08, 09, 0a, 0b
1222 * 10, 11, 12, 13, 18, 19, 1a, 1b
1223 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1224 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1225 */
1226
1227 /* [dBorca]
1228 * stupidity flows forth from this
1229 */
1230 l = N_TEXELS;
1231 trualpha = 0;
1232 if (comps == 4) {
1233 /* skip all transparent black texels */
1234 l = 0;
1235 for (k = 0; k < N_TEXELS; k++) {
1236 /* test all components against 0 */
1237 if (!ISTBLACK(input[k])) {
1238 /* texel is not transparent black */
1239 COPY_4UBV(reord[l], input[k]);
1240 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1241 /* non-opaque texel */
1242 trualpha = !0;
1243 }
1244 l++;
1245 }
1246 }
1247 }
1248
1249 #if 0
1250 if (trualpha) {
1251 fxt1_quantize_ALPHA0(cc, input, reord, l);
1252 } else if (l == 0) {
1253 cc[0] = cc[1] = cc[2] = -1;
1254 cc[3] = 0;
1255 } else if (l < N_TEXELS) {
1256 fxt1_quantize_HI(cc, input, reord, l);
1257 } else {
1258 fxt1_quantize_CHROMA(cc, input);
1259 }
1260 (void)fxt1_quantize_ALPHA1;
1261 (void)fxt1_quantize_MIXED1;
1262 (void)fxt1_quantize_MIXED0;
1263 #else
1264 if (trualpha) {
1265 fxt1_quantize_ALPHA1(cc, input);
1266 } else if (l == 0) {
1267 cc[0] = cc[1] = cc[2] = ~0u;
1268 cc[3] = 0;
1269 } else if (l < N_TEXELS) {
1270 fxt1_quantize_MIXED1(cc, input);
1271 } else {
1272 fxt1_quantize_MIXED0(cc, input);
1273 }
1274 (void)fxt1_quantize_ALPHA0;
1275 (void)fxt1_quantize_HI;
1276 (void)fxt1_quantize_CHROMA;
1277 #endif
1278 }
1279
1280
1281
1282 /**
1283 * Upscale an image by replication, not (typical) stretching.
1284 * We use this when the image width or height is less than a
1285 * certain size (4, 8) and we need to upscale an image.
1286 */
1287 static void
upscale_teximage2d(GLsizei inWidth,GLsizei inHeight,GLsizei outWidth,GLsizei outHeight,GLint comps,const GLubyte * src,GLint srcRowStride,GLubyte * dest)1288 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1289 GLsizei outWidth, GLsizei outHeight,
1290 GLint comps, const GLubyte *src, GLint srcRowStride,
1291 GLubyte *dest )
1292 {
1293 GLint i, j, k;
1294
1295 ASSERT(outWidth >= inWidth);
1296 ASSERT(outHeight >= inHeight);
1297 #if 0
1298 ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1299 ASSERT((outWidth & 3) == 0);
1300 ASSERT((outHeight & 3) == 0);
1301 #endif
1302
1303 for (i = 0; i < outHeight; i++) {
1304 const GLint ii = i % inHeight;
1305 for (j = 0; j < outWidth; j++) {
1306 const GLint jj = j % inWidth;
1307 for (k = 0; k < comps; k++) {
1308 dest[(i * outWidth + j) * comps + k]
1309 = src[ii * srcRowStride + jj * comps + k];
1310 }
1311 }
1312 }
1313 }
1314
1315
1316 static void
fxt1_encode(GLuint width,GLuint height,GLint comps,const void * source,GLint srcRowStride,void * dest,GLint destRowStride)1317 fxt1_encode (GLuint width, GLuint height, GLint comps,
1318 const void *source, GLint srcRowStride,
1319 void *dest, GLint destRowStride)
1320 {
1321 GLuint x, y;
1322 const GLubyte *data;
1323 GLuint *encoded = (GLuint *)dest;
1324 void *newSource = NULL;
1325
1326 assert(comps == 3 || comps == 4);
1327
1328 /* Replicate image if width is not M8 or height is not M4 */
1329 if ((width & 7) | (height & 3)) {
1330 GLint newWidth = (width + 7) & ~7;
1331 GLint newHeight = (height + 3) & ~3;
1332 newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1333 if (!newSource) {
1334 GET_CURRENT_CONTEXT(ctx);
1335 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1336 goto cleanUp;
1337 }
1338 upscale_teximage2d(width, height, newWidth, newHeight,
1339 comps, (const GLubyte *) source,
1340 srcRowStride, (GLubyte *) newSource);
1341 source = newSource;
1342 width = newWidth;
1343 height = newHeight;
1344 srcRowStride = comps * newWidth;
1345 }
1346
1347 data = (const GLubyte *) source;
1348 destRowStride = (destRowStride - width * 2) / 4;
1349 for (y = 0; y < height; y += 4) {
1350 GLuint offs = 0 + (y + 0) * srcRowStride;
1351 for (x = 0; x < width; x += 8) {
1352 const GLubyte *lines[4];
1353 lines[0] = &data[offs];
1354 lines[1] = lines[0] + srcRowStride;
1355 lines[2] = lines[1] + srcRowStride;
1356 lines[3] = lines[2] + srcRowStride;
1357 offs += 8 * comps;
1358 fxt1_quantize(encoded, lines, comps);
1359 /* 128 bits per 8x4 block */
1360 encoded += 4;
1361 }
1362 encoded += destRowStride;
1363 }
1364
1365 cleanUp:
1366 if (newSource != NULL) {
1367 free(newSource);
1368 }
1369 }
1370
1371
1372 /***************************************************************************\
1373 * FXT1 decoder
1374 *
1375 * The decoder is based on GL_3DFX_texture_compression_FXT1
1376 * specification and serves as a concept for the encoder.
1377 \***************************************************************************/
1378
1379
1380 /* lookup table for scaling 5 bit colors up to 8 bits */
1381 static const GLubyte _rgb_scale_5[] = {
1382 0, 8, 16, 25, 33, 41, 49, 58,
1383 66, 74, 82, 90, 99, 107, 115, 123,
1384 132, 140, 148, 156, 165, 173, 181, 189,
1385 197, 206, 214, 222, 230, 239, 247, 255
1386 };
1387
1388 /* lookup table for scaling 6 bit colors up to 8 bits */
1389 static const GLubyte _rgb_scale_6[] = {
1390 0, 4, 8, 12, 16, 20, 24, 28,
1391 32, 36, 40, 45, 49, 53, 57, 61,
1392 65, 69, 73, 77, 81, 85, 89, 93,
1393 97, 101, 105, 109, 113, 117, 121, 125,
1394 130, 134, 138, 142, 146, 150, 154, 158,
1395 162, 166, 170, 174, 178, 182, 186, 190,
1396 194, 198, 202, 206, 210, 215, 219, 223,
1397 227, 231, 235, 239, 243, 247, 251, 255
1398 };
1399
1400
1401 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1402 #define UP5(c) _rgb_scale_5[(c) & 31]
1403 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1404 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1405
1406
1407 static void
fxt1_decode_1HI(const GLubyte * code,GLint t,GLubyte * rgba)1408 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1409 {
1410 const GLuint *cc;
1411
1412 t *= 3;
1413 cc = (const GLuint *)(code + t / 8);
1414 t = (cc[0] >> (t & 7)) & 7;
1415
1416 if (t == 7) {
1417 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1418 } else {
1419 GLubyte r, g, b;
1420 cc = (const GLuint *)(code + 12);
1421 if (t == 0) {
1422 b = UP5(CC_SEL(cc, 0));
1423 g = UP5(CC_SEL(cc, 5));
1424 r = UP5(CC_SEL(cc, 10));
1425 } else if (t == 6) {
1426 b = UP5(CC_SEL(cc, 15));
1427 g = UP5(CC_SEL(cc, 20));
1428 r = UP5(CC_SEL(cc, 25));
1429 } else {
1430 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1431 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1432 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1433 }
1434 rgba[RCOMP] = r;
1435 rgba[GCOMP] = g;
1436 rgba[BCOMP] = b;
1437 rgba[ACOMP] = 255;
1438 }
1439 }
1440
1441
1442 static void
fxt1_decode_1CHROMA(const GLubyte * code,GLint t,GLubyte * rgba)1443 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1444 {
1445 const GLuint *cc;
1446 GLuint kk;
1447
1448 cc = (const GLuint *)code;
1449 if (t & 16) {
1450 cc++;
1451 t &= 15;
1452 }
1453 t = (cc[0] >> (t * 2)) & 3;
1454
1455 t *= 15;
1456 cc = (const GLuint *)(code + 8 + t / 8);
1457 kk = cc[0] >> (t & 7);
1458 rgba[BCOMP] = UP5(kk);
1459 rgba[GCOMP] = UP5(kk >> 5);
1460 rgba[RCOMP] = UP5(kk >> 10);
1461 rgba[ACOMP] = 255;
1462 }
1463
1464
1465 static void
fxt1_decode_1MIXED(const GLubyte * code,GLint t,GLubyte * rgba)1466 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1467 {
1468 const GLuint *cc;
1469 GLuint col[2][3];
1470 GLint glsb, selb;
1471
1472 cc = (const GLuint *)code;
1473 if (t & 16) {
1474 t &= 15;
1475 t = (cc[1] >> (t * 2)) & 3;
1476 /* col 2 */
1477 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1478 col[0][GCOMP] = CC_SEL(cc, 99);
1479 col[0][RCOMP] = CC_SEL(cc, 104);
1480 /* col 3 */
1481 col[1][BCOMP] = CC_SEL(cc, 109);
1482 col[1][GCOMP] = CC_SEL(cc, 114);
1483 col[1][RCOMP] = CC_SEL(cc, 119);
1484 glsb = CC_SEL(cc, 126);
1485 selb = CC_SEL(cc, 33);
1486 } else {
1487 t = (cc[0] >> (t * 2)) & 3;
1488 /* col 0 */
1489 col[0][BCOMP] = CC_SEL(cc, 64);
1490 col[0][GCOMP] = CC_SEL(cc, 69);
1491 col[0][RCOMP] = CC_SEL(cc, 74);
1492 /* col 1 */
1493 col[1][BCOMP] = CC_SEL(cc, 79);
1494 col[1][GCOMP] = CC_SEL(cc, 84);
1495 col[1][RCOMP] = CC_SEL(cc, 89);
1496 glsb = CC_SEL(cc, 125);
1497 selb = CC_SEL(cc, 1);
1498 }
1499
1500 if (CC_SEL(cc, 124) & 1) {
1501 /* alpha[0] == 1 */
1502
1503 if (t == 3) {
1504 /* zero */
1505 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1506 } else {
1507 GLubyte r, g, b;
1508 if (t == 0) {
1509 b = UP5(col[0][BCOMP]);
1510 g = UP5(col[0][GCOMP]);
1511 r = UP5(col[0][RCOMP]);
1512 } else if (t == 2) {
1513 b = UP5(col[1][BCOMP]);
1514 g = UP6(col[1][GCOMP], glsb);
1515 r = UP5(col[1][RCOMP]);
1516 } else {
1517 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1518 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1519 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1520 }
1521 rgba[RCOMP] = r;
1522 rgba[GCOMP] = g;
1523 rgba[BCOMP] = b;
1524 rgba[ACOMP] = 255;
1525 }
1526 } else {
1527 /* alpha[0] == 0 */
1528 GLubyte r, g, b;
1529 if (t == 0) {
1530 b = UP5(col[0][BCOMP]);
1531 g = UP6(col[0][GCOMP], glsb ^ selb);
1532 r = UP5(col[0][RCOMP]);
1533 } else if (t == 3) {
1534 b = UP5(col[1][BCOMP]);
1535 g = UP6(col[1][GCOMP], glsb);
1536 r = UP5(col[1][RCOMP]);
1537 } else {
1538 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1539 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1540 UP6(col[1][GCOMP], glsb));
1541 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1542 }
1543 rgba[RCOMP] = r;
1544 rgba[GCOMP] = g;
1545 rgba[BCOMP] = b;
1546 rgba[ACOMP] = 255;
1547 }
1548 }
1549
1550
1551 static void
fxt1_decode_1ALPHA(const GLubyte * code,GLint t,GLubyte * rgba)1552 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1553 {
1554 const GLuint *cc;
1555 GLubyte r, g, b, a;
1556
1557 cc = (const GLuint *)code;
1558 if (CC_SEL(cc, 124) & 1) {
1559 /* lerp == 1 */
1560 GLuint col0[4];
1561
1562 if (t & 16) {
1563 t &= 15;
1564 t = (cc[1] >> (t * 2)) & 3;
1565 /* col 2 */
1566 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1567 col0[GCOMP] = CC_SEL(cc, 99);
1568 col0[RCOMP] = CC_SEL(cc, 104);
1569 col0[ACOMP] = CC_SEL(cc, 119);
1570 } else {
1571 t = (cc[0] >> (t * 2)) & 3;
1572 /* col 0 */
1573 col0[BCOMP] = CC_SEL(cc, 64);
1574 col0[GCOMP] = CC_SEL(cc, 69);
1575 col0[RCOMP] = CC_SEL(cc, 74);
1576 col0[ACOMP] = CC_SEL(cc, 109);
1577 }
1578
1579 if (t == 0) {
1580 b = UP5(col0[BCOMP]);
1581 g = UP5(col0[GCOMP]);
1582 r = UP5(col0[RCOMP]);
1583 a = UP5(col0[ACOMP]);
1584 } else if (t == 3) {
1585 b = UP5(CC_SEL(cc, 79));
1586 g = UP5(CC_SEL(cc, 84));
1587 r = UP5(CC_SEL(cc, 89));
1588 a = UP5(CC_SEL(cc, 114));
1589 } else {
1590 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1591 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1592 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1593 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1594 }
1595 } else {
1596 /* lerp == 0 */
1597
1598 if (t & 16) {
1599 cc++;
1600 t &= 15;
1601 }
1602 t = (cc[0] >> (t * 2)) & 3;
1603
1604 if (t == 3) {
1605 /* zero */
1606 r = g = b = a = 0;
1607 } else {
1608 GLuint kk;
1609 cc = (const GLuint *)code;
1610 a = UP5(cc[3] >> (t * 5 + 13));
1611 t *= 15;
1612 cc = (const GLuint *)(code + 8 + t / 8);
1613 kk = cc[0] >> (t & 7);
1614 b = UP5(kk);
1615 g = UP5(kk >> 5);
1616 r = UP5(kk >> 10);
1617 }
1618 }
1619 rgba[RCOMP] = r;
1620 rgba[GCOMP] = g;
1621 rgba[BCOMP] = b;
1622 rgba[ACOMP] = a;
1623 }
1624
1625
1626 void
fxt1_decode_1(const void * texture,GLint stride,GLint i,GLint j,GLubyte * rgba)1627 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1628 GLint i, GLint j, GLubyte *rgba)
1629 {
1630 static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1631 fxt1_decode_1HI, /* cc-high = "00?" */
1632 fxt1_decode_1HI, /* cc-high = "00?" */
1633 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1634 fxt1_decode_1ALPHA, /* alpha = "011" */
1635 fxt1_decode_1MIXED, /* mixed = "1??" */
1636 fxt1_decode_1MIXED, /* mixed = "1??" */
1637 fxt1_decode_1MIXED, /* mixed = "1??" */
1638 fxt1_decode_1MIXED /* mixed = "1??" */
1639 };
1640
1641 const GLubyte *code = (const GLubyte *)texture +
1642 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1643 GLint mode = CC_SEL(code, 125);
1644 GLint t = i & 7;
1645
1646 if (t & 4) {
1647 t += 12;
1648 }
1649 t += (j & 3) * 4;
1650
1651 decode_1[mode](code, t, rgba);
1652 }
1653
1654
1655 #endif /* FEATURE_texture_fxt1 */
1656