1 /*
2 * Copyright (C) 2011 Red Hat Inc.
3 *
4 * block compression parts are:
5 * Copyright (C) 2004 Roland Scheidegger All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 *
26 * Author:
27 * Dave Airlie
28 */
29
30 /* included by texcompress_rgtc to define byte/ubyte compressors */
31
TAG(fetch_texel_rgtc)32 static void TAG(fetch_texel_rgtc)(unsigned srcRowStride, const TYPE *pixdata,
33 unsigned i, unsigned j, TYPE *value, unsigned comps)
34 {
35 TYPE decode;
36 const TYPE *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8 * comps);
37 const TYPE alpha0 = blksrc[0];
38 const TYPE alpha1 = blksrc[1];
39 const char bit_pos = ((j&3) * 4 + (i&3)) * 3;
40 const TYPE acodelow = blksrc[2 + bit_pos / 8];
41 const TYPE acodehigh = (3 + bit_pos / 8) < 8 ? blksrc[3 + bit_pos / 8] : 0;
42 const TYPE code = (acodelow >> (bit_pos & 0x7) |
43 (acodehigh << (8 - (bit_pos & 0x7)))) & 0x7;
44
45 if (code == 0)
46 decode = alpha0;
47 else if (code == 1)
48 decode = alpha1;
49 else if (alpha0 > alpha1)
50 decode = ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7);
51 else if (code < 6)
52 decode = ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5);
53 else if (code == 6)
54 decode = T_MIN;
55 else
56 decode = T_MAX;
57
58 *value = decode;
59 }
60
TAG(write_rgtc_encoded_channel)61 static void TAG(write_rgtc_encoded_channel)(TYPE *blkaddr,
62 TYPE alphabase1,
63 TYPE alphabase2,
64 TYPE alphaenc[16])
65 {
66 *blkaddr++ = alphabase1;
67 *blkaddr++ = alphabase2;
68 *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
69 *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
70 *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
71 *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
72 *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
73 *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
74 }
75
TAG(encode_rgtc_ubyte)76 static void TAG(encode_rgtc_ubyte)(TYPE *blkaddr, TYPE srccolors[4][4],
77 int numxpixels, int numypixels)
78 {
79 TYPE alphabase[2], alphause[2];
80 short alphatest[2] = { 0 };
81 unsigned int alphablockerror1, alphablockerror2, alphablockerror3;
82 TYPE i, j, aindex, acutValues[7];
83 TYPE alphaenc1[16], alphaenc2[16], alphaenc3[16];
84 int alphaabsmin = 0, alphaabsmax = 0;
85 short alphadist;
86
87 /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
88 alphabase[0] = T_MAX; alphabase[1] = T_MIN;
89 for (j = 0; j < numypixels; j++) {
90 for (i = 0; i < numxpixels; i++) {
91 if (srccolors[j][i] == T_MIN)
92 alphaabsmin = 1;
93 else if (srccolors[j][i] == T_MAX)
94 alphaabsmax = 1;
95 else {
96 if (srccolors[j][i] > alphabase[1])
97 alphabase[1] = srccolors[j][i];
98 if (srccolors[j][i] < alphabase[0])
99 alphabase[0] = srccolors[j][i];
100 }
101 }
102 }
103
104
105 if (((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax))
106 || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax)) { /* one color, either max or min */
107 /* shortcut here since it is a very common case (and also avoids later problems) */
108 /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
109
110 *blkaddr++ = srccolors[0][0];
111 blkaddr++;
112 *blkaddr++ = 0;
113 *blkaddr++ = 0;
114 *blkaddr++ = 0;
115 *blkaddr++ = 0;
116 *blkaddr++ = 0;
117 *blkaddr++ = 0;
118 #if RGTC_DEBUG
119 fprintf(stderr, "enc0 used\n");
120 #endif
121 return;
122 }
123
124 /* find best encoding for alpha0 > alpha1 */
125 /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
126 alphablockerror1 = 0x0;
127 alphablockerror2 = 0xffffffff;
128 alphablockerror3 = 0xffffffff;
129 if (alphaabsmin) alphause[0] = T_MIN;
130 else alphause[0] = alphabase[0];
131 if (alphaabsmax) alphause[1] = T_MAX;
132 else alphause[1] = alphabase[1];
133 /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
134 for (aindex = 0; aindex < 7; aindex++) {
135 /* don't forget here is always rounded down */
136 acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
137 }
138
139 for (j = 0; j < numypixels; j++) {
140 for (i = 0; i < numxpixels; i++) {
141 /* maybe it's overkill to have the most complicated calculation just for the error
142 calculation which we only need to figure out if encoding1 or encoding2 is better... */
143 if (srccolors[j][i] > acutValues[0]) {
144 alphaenc1[4*j + i] = 0;
145 alphadist = srccolors[j][i] - alphause[1];
146 }
147 else if (srccolors[j][i] > acutValues[1]) {
148 alphaenc1[4*j + i] = 2;
149 alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7;
150 }
151 else if (srccolors[j][i] > acutValues[2]) {
152 alphaenc1[4*j + i] = 3;
153 alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7;
154 }
155 else if (srccolors[j][i] > acutValues[3]) {
156 alphaenc1[4*j + i] = 4;
157 alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7;
158 }
159 else if (srccolors[j][i] > acutValues[4]) {
160 alphaenc1[4*j + i] = 5;
161 alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7;
162 }
163 else if (srccolors[j][i] > acutValues[5]) {
164 alphaenc1[4*j + i] = 6;
165 alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7;
166 }
167 else if (srccolors[j][i] > acutValues[6]) {
168 alphaenc1[4*j + i] = 7;
169 alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7;
170 }
171 else {
172 alphaenc1[4*j + i] = 1;
173 alphadist = srccolors[j][i] - alphause[0];
174 }
175 alphablockerror1 += alphadist * alphadist;
176 }
177 }
178
179 #if RGTC_DEBUG
180 for (i = 0; i < 16; i++) {
181 fprintf(stderr, "%d ", alphaenc1[i]);
182 }
183 fprintf(stderr, "cutVals ");
184 for (i = 0; i < 7; i++) {
185 fprintf(stderr, "%d ", acutValues[i]);
186 }
187 fprintf(stderr, "srcVals ");
188 for (j = 0; j < numypixels; j++) {
189 for (i = 0; i < numxpixels; i++) {
190 fprintf(stderr, "%d ", srccolors[j][i]);
191 }
192 }
193 fprintf(stderr, "\n");
194 #endif
195
196 /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
197 are false but try it anyway */
198 if (alphablockerror1 >= 32) {
199
200 /* don't bother if encoding is already very good, this condition should also imply
201 we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
202 alphablockerror2 = 0;
203 for (aindex = 0; aindex < 5; aindex++) {
204 /* don't forget here is always rounded down */
205 acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
206 }
207 for (j = 0; j < numypixels; j++) {
208 for (i = 0; i < numxpixels; i++) {
209 /* maybe it's overkill to have the most complicated calculation just for the error
210 calculation which we only need to figure out if encoding1 or encoding2 is better... */
211 if (srccolors[j][i] == T_MIN) {
212 alphaenc2[4*j + i] = 6;
213 alphadist = 0;
214 }
215 else if (srccolors[j][i] == T_MAX) {
216 alphaenc2[4*j + i] = 7;
217 alphadist = 0;
218 }
219 else if (srccolors[j][i] <= acutValues[0]) {
220 alphaenc2[4*j + i] = 0;
221 alphadist = srccolors[j][i] - alphabase[0];
222 }
223 else if (srccolors[j][i] <= acutValues[1]) {
224 alphaenc2[4*j + i] = 2;
225 alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
226 }
227 else if (srccolors[j][i] <= acutValues[2]) {
228 alphaenc2[4*j + i] = 3;
229 alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
230 }
231 else if (srccolors[j][i] <= acutValues[3]) {
232 alphaenc2[4*j + i] = 4;
233 alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
234 }
235 else if (srccolors[j][i] <= acutValues[4]) {
236 alphaenc2[4*j + i] = 5;
237 alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
238 }
239 else {
240 alphaenc2[4*j + i] = 1;
241 alphadist = srccolors[j][i] - alphabase[1];
242 }
243 alphablockerror2 += alphadist * alphadist;
244 }
245 }
246
247
248 /* skip this if the error is already very small
249 this encoding is MUCH better on average than #2 though, but expensive! */
250 if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
251 short blockerrlin1 = 0;
252 short blockerrlin2 = 0;
253 TYPE nralphainrangelow = 0;
254 TYPE nralphainrangehigh = 0;
255 alphatest[0] = T_MAX;
256 alphatest[1] = T_MIN;
257 /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
258 for (j = 0; j < numypixels; j++) {
259 for (i = 0; i < numxpixels; i++) {
260 if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (T_MAX -(alphabase[1] - alphabase[0]) / 28)))
261 alphatest[1] = srccolors[j][i];
262 if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28))
263 alphatest[0] = srccolors[j][i];
264 }
265 }
266 /* shouldn't happen too often, don't really care about those degenerated cases */
267 if (alphatest[1] <= alphatest[0]) {
268 alphatest[0] = T_MIN+1;
269 alphatest[1] = T_MAX-1;
270 }
271 for (aindex = 0; aindex < 5; aindex++) {
272 /* don't forget here is always rounded down */
273 acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
274 }
275
276 /* find the "average" difference between the alpha values and the next encoded value.
277 This is then used to calculate new base values.
278 Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
279 since they will see more improvement, and also because the values in the middle are somewhat
280 likely to get no improvement at all (because the base values might move in different directions)?
281 OTOH it would mean the values in the middle are even less likely to get an improvement
282 */
283 for (j = 0; j < numypixels; j++) {
284 for (i = 0; i < numxpixels; i++) {
285 if (srccolors[j][i] <= alphatest[0] / 2) {
286 }
287 else if (srccolors[j][i] > ((T_MAX + alphatest[1]) / 2)) {
288 }
289 else if (srccolors[j][i] <= acutValues[0]) {
290 blockerrlin1 += (srccolors[j][i] - alphatest[0]);
291 nralphainrangelow += 1;
292 }
293 else if (srccolors[j][i] <= acutValues[1]) {
294 blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
295 blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
296 nralphainrangelow += 1;
297 nralphainrangehigh += 1;
298 }
299 else if (srccolors[j][i] <= acutValues[2]) {
300 blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
301 blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
302 nralphainrangelow += 1;
303 nralphainrangehigh += 1;
304 }
305 else if (srccolors[j][i] <= acutValues[3]) {
306 blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
307 blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
308 nralphainrangelow += 1;
309 nralphainrangehigh += 1;
310 }
311 else if (srccolors[j][i] <= acutValues[4]) {
312 blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
313 blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
314 nralphainrangelow += 1;
315 nralphainrangehigh += 1;
316 }
317 else {
318 blockerrlin2 += (srccolors[j][i] - alphatest[1]);
319 nralphainrangehigh += 1;
320 }
321 }
322 }
323 /* shouldn't happen often, needed to avoid div by zero */
324 if (nralphainrangelow == 0) nralphainrangelow = 1;
325 if (nralphainrangehigh == 0) nralphainrangehigh = 1;
326 alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
327 #if RGTC_DEBUG
328 fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
329 fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);
330 #endif
331 /* again shouldn't really happen often... */
332 if (alphatest[0] < T_MIN) {
333 alphatest[0] = T_MIN;
334 }
335 alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
336 if (alphatest[1] > T_MAX) {
337 alphatest[1] = T_MAX;
338 }
339
340 alphablockerror3 = 0;
341 for (aindex = 0; aindex < 5; aindex++) {
342 /* don't forget here is always rounded down */
343 acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
344 }
345 for (j = 0; j < numypixels; j++) {
346 for (i = 0; i < numxpixels; i++) {
347 /* maybe it's overkill to have the most complicated calculation just for the error
348 calculation which we only need to figure out if encoding1 or encoding2 is better... */
349 if (srccolors[j][i] <= alphatest[0] / 2) {
350 alphaenc3[4*j + i] = 6;
351 alphadist = srccolors[j][i];
352 }
353 else if (srccolors[j][i] > ((T_MAX + alphatest[1]) / 2)) {
354 alphaenc3[4*j + i] = 7;
355 alphadist = T_MAX - srccolors[j][i];
356 }
357 else if (srccolors[j][i] <= acutValues[0]) {
358 alphaenc3[4*j + i] = 0;
359 alphadist = srccolors[j][i] - alphatest[0];
360 }
361 else if (srccolors[j][i] <= acutValues[1]) {
362 alphaenc3[4*j + i] = 2;
363 alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
364 }
365 else if (srccolors[j][i] <= acutValues[2]) {
366 alphaenc3[4*j + i] = 3;
367 alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
368 }
369 else if (srccolors[j][i] <= acutValues[3]) {
370 alphaenc3[4*j + i] = 4;
371 alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
372 }
373 else if (srccolors[j][i] <= acutValues[4]) {
374 alphaenc3[4*j + i] = 5;
375 alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
376 }
377 else {
378 alphaenc3[4*j + i] = 1;
379 alphadist = srccolors[j][i] - alphatest[1];
380 }
381 alphablockerror3 += alphadist * alphadist;
382 }
383 }
384 }
385 }
386
387 /* write the alpha values and encoding back. */
388 if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
389 #if RGTC_DEBUG
390 if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);
391 fprintf(stderr,"w1: min %d max %d au0 %d au1 %d\n",
392 T_MIN, T_MAX,
393 alphause[1], alphause[0]);
394 #endif
395
396 TAG(write_rgtc_encoded_channel)( blkaddr, alphause[1], alphause[0], alphaenc1 );
397 }
398 else if (alphablockerror2 <= alphablockerror3) {
399 #if RGTC_DEBUG
400 if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);
401 fprintf(stderr,"w2: min %d max %d au0 %d au1 %d\n",
402 T_MIN, T_MAX,
403 alphabase[0], alphabase[1]);
404 #endif
405
406 TAG(write_rgtc_encoded_channel)( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
407 }
408 else {
409 #if RGTC_DEBUG
410 fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);
411 fprintf(stderr,"w3: min %d max %d au0 %d au1 %d\n",
412 T_MIN, T_MAX,
413 alphatest[0], alphatest[1]);
414 #endif
415
416 TAG(write_rgtc_encoded_channel)( blkaddr, (TYPE)alphatest[0], (TYPE)alphatest[1], alphaenc3 );
417 }
418 }
419