1 /*
2 * Copyright 2014 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkTextureCompressor_ASTC.h"
9 #include "SkTextureCompressor_Blitter.h"
10
11 #include "SkBlitter.h"
12 #include "SkEndian.h"
13 #include "SkMath.h"
14
15 // This table contains the weight values for each texel. This is used in determining
16 // how to convert a 12x12 grid of alpha values into a 6x5 grid of index values. Since
17 // we have a 6x5 grid, that gives 30 values that we have to compute. For each index,
18 // we store up to 20 different triplets of values. In order the triplets are:
19 // weight, texel-x, texel-y
20 // The weight value corresponds to the amount that this index contributes to the final
21 // index value of the given texel. Hence, we need to reconstruct the 6x5 index grid
22 // from their relative contribution to the 12x12 texel grid.
23 //
24 // The algorithm is something like this:
25 // foreach index i:
26 // total-weight = 0;
27 // total-alpha = 0;
28 // for w = 1 to 20:
29 // weight = table[i][w*3];
30 // texel-x = table[i][w*3 + 1];
31 // texel-y = table[i][w*3 + 2];
32 // if weight >= 0:
33 // total-weight += weight;
34 // total-alpha += weight * alphas[texel-x][texel-y];
35 //
36 // total-alpha /= total-weight;
37 // index = top three bits of total-alpha
38 //
39 // If the associated index does not contribute to 20 different texels (e.g. it's in
40 // a corner), then the extra texels are stored with -1's in the table.
41
42 static const int8_t k6x5To12x12Table[30][60] = {
43 { 16, 0, 0, 9, 1, 0, 1, 2, 0, 10, 0, 1, 6, 1, 1, 1, 2, 1, 4, 0, 2, 2,
44 1, 2, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
45 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
46 { 7, 1, 0, 15, 2, 0, 10, 3, 0, 3, 4, 0, 4, 1, 1, 9, 2, 1, 6, 3, 1, 2,
47 4, 1, 2, 1, 2, 4, 2, 2, 3, 3, 2, 1, 4, 2, -1, 0, 0, -1, 0, 0, -1, 0,
48 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
49 { 6, 3, 0, 13, 4, 0, 12, 5, 0, 4, 6, 0, 4, 3, 1, 8, 4, 1, 8, 5, 1, 3,
50 6, 1, 1, 3, 2, 3, 4, 2, 3, 5, 2, 1, 6, 2, -1, 0, 0, -1, 0, 0, -1, 0,
51 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
52 { 4, 5, 0, 12, 6, 0, 13, 7, 0, 6, 8, 0, 2, 5, 1, 7, 6, 1, 8, 7, 1, 4,
53 8, 1, 1, 5, 2, 3, 6, 2, 3, 7, 2, 2, 8, 2, -1, 0, 0, -1, 0, 0, -1, 0,
54 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
55 { 3, 7, 0, 10, 8, 0, 15, 9, 0, 7, 10, 0, 2, 7, 1, 6, 8, 1, 9, 9, 1, 4,
56 10, 1, 1, 7, 2, 2, 8, 2, 4, 9, 2, 2, 10, 2, -1, 0, 0, -1, 0, 0, -1, 0,
57 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
58 { 1, 9, 0, 9, 10, 0, 16, 11, 0, 1, 9, 1, 6, 10, 1, 10, 11, 1, 2, 10, 2, 4,
59 11, 2, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
60 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
61 { 6, 0, 1, 3, 1, 1, 12, 0, 2, 7, 1, 2, 1, 2, 2, 15, 0, 3, 8, 1, 3, 1,
62 2, 3, 9, 0, 4, 5, 1, 4, 1, 2, 4, 3, 0, 5, 2, 1, 5, -1, 0, 0, -1, 0,
63 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
64 { 3, 1, 1, 6, 2, 1, 4, 3, 1, 1, 4, 1, 5, 1, 2, 11, 2, 2, 7, 3, 2, 2,
65 4, 2, 7, 1, 3, 14, 2, 3, 9, 3, 3, 3, 4, 3, 4, 1, 4, 8, 2, 4, 6, 3,
66 4, 2, 4, 4, 1, 1, 5, 3, 2, 5, 2, 3, 5, 1, 4, 5}, // n = 20
67 { 2, 3, 1, 5, 4, 1, 4, 5, 1, 1, 6, 1, 5, 3, 2, 10, 4, 2, 9, 5, 2, 3,
68 6, 2, 6, 3, 3, 12, 4, 3, 11, 5, 3, 4, 6, 3, 3, 3, 4, 7, 4, 4, 7, 5,
69 4, 2, 6, 4, 1, 3, 5, 2, 4, 5, 2, 5, 5, 1, 6, 5}, // n = 20
70 { 2, 5, 1, 5, 6, 1, 5, 7, 1, 2, 8, 1, 3, 5, 2, 9, 6, 2, 10, 7, 2, 4,
71 8, 2, 4, 5, 3, 11, 6, 3, 12, 7, 3, 6, 8, 3, 2, 5, 4, 7, 6, 4, 7, 7,
72 4, 3, 8, 4, 1, 5, 5, 2, 6, 5, 2, 7, 5, 1, 8, 5}, // n = 20
73 { 1, 7, 1, 4, 8, 1, 6, 9, 1, 3, 10, 1, 2, 7, 2, 8, 8, 2, 11, 9, 2, 5,
74 10, 2, 3, 7, 3, 9, 8, 3, 14, 9, 3, 7, 10, 3, 2, 7, 4, 6, 8, 4, 8, 9,
75 4, 4, 10, 4, 1, 7, 5, 2, 8, 5, 3, 9, 5, 1, 10, 5}, // n = 20
76 { 3, 10, 1, 6, 11, 1, 1, 9, 2, 7, 10, 2, 12, 11, 2, 1, 9, 3, 8, 10, 3, 15,
77 11, 3, 1, 9, 4, 5, 10, 4, 9, 11, 4, 2, 10, 5, 3, 11, 5, -1, 0, 0, -1, 0,
78 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
79 { 1, 0, 3, 1, 1, 3, 7, 0, 4, 4, 1, 4, 13, 0, 5, 7, 1, 5, 1, 2, 5, 13,
80 0, 6, 7, 1, 6, 1, 2, 6, 7, 0, 7, 4, 1, 7, 1, 0, 8, 1, 1, 8, -1, 0,
81 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
82 { 1, 2, 3, 1, 3, 3, 3, 1, 4, 7, 2, 4, 4, 3, 4, 1, 4, 4, 6, 1, 5, 12,
83 2, 5, 8, 3, 5, 2, 4, 5, 6, 1, 6, 12, 2, 6, 8, 3, 6, 2, 4, 6, 3, 1,
84 7, 7, 2, 7, 4, 3, 7, 1, 4, 7, 1, 2, 8, 1, 3, 8}, // n = 20
85 { 1, 4, 3, 1, 5, 3, 3, 3, 4, 6, 4, 4, 5, 5, 4, 2, 6, 4, 5, 3, 5, 11,
86 4, 5, 10, 5, 5, 3, 6, 5, 5, 3, 6, 11, 4, 6, 10, 5, 6, 3, 6, 6, 3, 3,
87 7, 6, 4, 7, 5, 5, 7, 2, 6, 7, 1, 4, 8, 1, 5, 8}, // n = 20
88 { 1, 6, 3, 1, 7, 3, 2, 5, 4, 5, 6, 4, 6, 7, 4, 3, 8, 4, 3, 5, 5, 10,
89 6, 5, 11, 7, 5, 5, 8, 5, 3, 5, 6, 10, 6, 6, 11, 7, 6, 5, 8, 6, 2, 5,
90 7, 5, 6, 7, 6, 7, 7, 3, 8, 7, 1, 6, 8, 1, 7, 8}, // n = 20
91 { 1, 8, 3, 1, 9, 3, 1, 7, 4, 4, 8, 4, 7, 9, 4, 3, 10, 4, 2, 7, 5, 8,
92 8, 5, 12, 9, 5, 6, 10, 5, 2, 7, 6, 8, 8, 6, 12, 9, 6, 6, 10, 6, 1, 7,
93 7, 4, 8, 7, 7, 9, 7, 3, 10, 7, 1, 8, 8, 1, 9, 8}, // n = 20
94 { 1, 10, 3, 1, 11, 3, 4, 10, 4, 7, 11, 4, 1, 9, 5, 7, 10, 5, 13, 11, 5, 1,
95 9, 6, 7, 10, 6, 13, 11, 6, 4, 10, 7, 7, 11, 7, 1, 10, 8, 1, 11, 8, -1, 0,
96 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
97 { 3, 0, 6, 2, 1, 6, 9, 0, 7, 5, 1, 7, 1, 2, 7, 15, 0, 8, 8, 1, 8, 1,
98 2, 8, 12, 0, 9, 7, 1, 9, 1, 2, 9, 6, 0, 10, 3, 1, 10, -1, 0, 0, -1, 0,
99 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
100 { 1, 1, 6, 3, 2, 6, 2, 3, 6, 1, 4, 6, 4, 1, 7, 8, 2, 7, 6, 3, 7, 2,
101 4, 7, 7, 1, 8, 14, 2, 8, 9, 3, 8, 3, 4, 8, 5, 1, 9, 11, 2, 9, 8, 3,
102 9, 2, 4, 9, 3, 1, 10, 6, 2, 10, 4, 3, 10, 1, 4, 10}, // n = 20
103 { 1, 3, 6, 2, 4, 6, 2, 5, 6, 1, 6, 6, 3, 3, 7, 7, 4, 7, 7, 5, 7, 2,
104 6, 7, 6, 3, 8, 12, 4, 8, 11, 5, 8, 4, 6, 8, 4, 3, 9, 10, 4, 9, 9, 5,
105 9, 3, 6, 9, 2, 3, 10, 5, 4, 10, 5, 5, 10, 2, 6, 10}, // n = 20
106 { 1, 5, 6, 2, 6, 6, 2, 7, 6, 1, 8, 6, 2, 5, 7, 7, 6, 7, 7, 7, 7, 3,
107 8, 7, 4, 5, 8, 11, 6, 8, 12, 7, 8, 6, 8, 8, 3, 5, 9, 9, 6, 9, 10, 7,
108 9, 5, 8, 9, 1, 5, 10, 4, 6, 10, 5, 7, 10, 2, 8, 10}, // n = 20
109 { 1, 7, 6, 2, 8, 6, 3, 9, 6, 1, 10, 6, 2, 7, 7, 6, 8, 7, 8, 9, 7, 4,
110 10, 7, 3, 7, 8, 9, 8, 8, 14, 9, 8, 7, 10, 8, 2, 7, 9, 7, 8, 9, 11, 9,
111 9, 5, 10, 9, 1, 7, 10, 4, 8, 10, 6, 9, 10, 3, 10, 10}, // n = 20
112 { 2, 10, 6, 3, 11, 6, 1, 9, 7, 5, 10, 7, 9, 11, 7, 1, 9, 8, 8, 10, 8, 15,
113 11, 8, 1, 9, 9, 7, 10, 9, 12, 11, 9, 3, 10, 10, 6, 11, 10, -1, 0, 0, -1, 0,
114 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
115 { 4, 0, 9, 2, 1, 9, 10, 0, 10, 6, 1, 10, 1, 2, 10, 16, 0, 11, 9, 1, 11, 1,
116 2, 11, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
117 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
118 { 2, 1, 9, 4, 2, 9, 2, 3, 9, 1, 4, 9, 4, 1, 10, 9, 2, 10, 6, 3, 10, 2,
119 4, 10, 7, 1, 11, 15, 2, 11, 10, 3, 11, 3, 4, 11, -1, 0, 0, -1, 0, 0, -1, 0,
120 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
121 { 2, 3, 9, 3, 4, 9, 3, 5, 9, 1, 6, 9, 4, 3, 10, 8, 4, 10, 7, 5, 10, 2,
122 6, 10, 6, 3, 11, 13, 4, 11, 12, 5, 11, 4, 6, 11, -1, 0, 0, -1, 0, 0, -1, 0,
123 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
124 { 1, 5, 9, 3, 6, 9, 3, 7, 9, 1, 8, 9, 3, 5, 10, 8, 6, 10, 8, 7, 10, 4,
125 8, 10, 4, 5, 11, 12, 6, 11, 13, 7, 11, 6, 8, 11, -1, 0, 0, -1, 0, 0, -1, 0,
126 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
127 { 1, 7, 9, 3, 8, 9, 4, 9, 9, 2, 10, 9, 2, 7, 10, 6, 8, 10, 9, 9, 10, 4,
128 10, 10, 3, 7, 11, 10, 8, 11, 15, 9, 11, 7, 10, 11, -1, 0, 0, -1, 0, 0, -1, 0,
129 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
130 { 2, 10, 9, 4, 11, 9, 1, 9, 10, 6, 10, 10, 10, 11, 10, 1, 9, 11, 9, 10, 11, 16,
131 11, 11, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
132 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0} // n = 20
133 };
134
135 // Returns the alpha value of a texel at position (x, y) from src.
136 // (x, y) are assumed to be in the range [0, 12).
GetAlpha(const uint8_t * src,size_t rowBytes,int x,int y)137 inline uint8_t GetAlpha(const uint8_t *src, size_t rowBytes, int x, int y) {
138 SkASSERT(x >= 0 && x < 12);
139 SkASSERT(y >= 0 && y < 12);
140 SkASSERT(rowBytes >= 12);
141 return *(src + y*rowBytes + x);
142 }
143
GetAlphaTranspose(const uint8_t * src,size_t rowBytes,int x,int y)144 inline uint8_t GetAlphaTranspose(const uint8_t *src, size_t rowBytes, int x, int y) {
145 return GetAlpha(src, rowBytes, y, x);
146 }
147
148 // Output the 16 bytes stored in top and bottom and advance the pointer. The bytes
149 // are stored as the integers are represented in memory, so they should be swapped
150 // if necessary.
send_packing(uint8_t ** dst,const uint64_t top,const uint64_t bottom)151 static inline void send_packing(uint8_t** dst, const uint64_t top, const uint64_t bottom) {
152 uint64_t* dst64 = reinterpret_cast<uint64_t*>(*dst);
153 dst64[0] = top;
154 dst64[1] = bottom;
155 *dst += 16;
156 }
157
158 // Compresses an ASTC block, by looking up the proper contributions from
159 // k6x5To12x12Table and computing an index from the associated values.
160 typedef uint8_t (*GetAlphaProc)(const uint8_t* src, size_t rowBytes, int x, int y);
161
162 template<GetAlphaProc getAlphaProc>
compress_a8_astc_block(uint8_t ** dst,const uint8_t * src,size_t rowBytes)163 static void compress_a8_astc_block(uint8_t** dst, const uint8_t* src, size_t rowBytes) {
164 // Check for single color
165 bool constant = true;
166 const uint32_t firstInt = *(reinterpret_cast<const uint32_t*>(src));
167 for (int i = 0; i < 12; ++i) {
168 const uint32_t *rowInt = reinterpret_cast<const uint32_t *>(src + i*rowBytes);
169 constant = constant && (rowInt[0] == firstInt);
170 constant = constant && (rowInt[1] == firstInt);
171 constant = constant && (rowInt[2] == firstInt);
172 }
173
174 if (constant) {
175 if (0 == firstInt) {
176 // All of the indices are set to zero, and the colors are
177 // v0 = 0, v1 = 255, so everything will be transparent.
178 send_packing(dst, SkTEndian_SwapLE64(0x0000000001FE000173ULL), 0);
179 return;
180 } else if (0xFFFFFFFF == firstInt) {
181 // All of the indices are set to zero, and the colors are
182 // v0 = 255, v1 = 0, so everything will be opaque.
183 send_packing(dst, SkTEndian_SwapLE64(0x000000000001FE0173ULL), 0);
184 return;
185 }
186 }
187
188 uint8_t indices[30]; // 6x5 index grid
189 for (int idx = 0; idx < 30; ++idx) {
190 int weightTot = 0;
191 int alphaTot = 0;
192 for (int w = 0; w < 20; ++w) {
193 const int8_t weight = k6x5To12x12Table[idx][w*3];
194 if (weight > 0) {
195 const int x = k6x5To12x12Table[idx][w*3 + 1];
196 const int y = k6x5To12x12Table[idx][w*3 + 2];
197 weightTot += weight;
198 alphaTot += weight * getAlphaProc(src, rowBytes, x, y);
199 } else {
200 // In our table, not every entry has 20 weights, and all
201 // of them are nonzero. Once we hit a negative weight, we
202 // know that all of the other weights are not valid either.
203 break;
204 }
205 }
206
207 indices[idx] = (alphaTot / weightTot) >> 5;
208 }
209
210 // Pack indices... The ASTC block layout is fairly complicated. An extensive
211 // description can be found here:
212 // https://www.opengl.org/registry/specs/KHR/texture_compression_astc_hdr.txt
213 //
214 // Here is a summary of the options that we've chosen:
215 // 1. Block mode: 0b00101110011
216 // - 6x5 texel grid
217 // - Single plane
218 // - Low-precision index values
219 // - Index range 0-7 (three bits per index)
220 // 2. Partitions: 0b00
221 // - One partition
222 // 3. Color Endpoint Mode: 0b0000
223 // - Direct luminance -- e0=(v0,v0,v0,0xFF); e1=(v1,v1,v1,0xFF);
224 // 4. 8-bit endpoints:
225 // v0 = 0, v1 = 255
226 //
227 // The rest of the block contains the 30 index values from before, which
228 // are currently stored in the indices variable.
229
230 uint64_t top = 0x0000000001FE000173ULL;
231 uint64_t bottom = 0;
232
233 for (int idx = 0; idx <= 20; ++idx) {
234 const uint8_t index = indices[idx];
235 bottom |= static_cast<uint64_t>(index) << (61-(idx*3));
236 }
237
238 // index 21 straddles top and bottom
239 {
240 const uint8_t index = indices[21];
241 bottom |= index & 1;
242 top |= static_cast<uint64_t>((index >> 2) | (index & 2)) << 62;
243 }
244
245 for (int idx = 22; idx < 30; ++idx) {
246 const uint8_t index = indices[idx];
247 top |= static_cast<uint64_t>(index) << (59-(idx-22)*3);
248 }
249
250 // Reverse each 3-bit index since indices are read in reverse order...
251 uint64_t t = (bottom ^ (bottom >> 2)) & 0x2492492492492492ULL;
252 bottom = bottom ^ t ^ (t << 2);
253
254 t = (top ^ (top >> 2)) & 0x0924924000000000ULL;
255 top = top ^ t ^ (t << 2);
256
257 send_packing(dst, SkEndian_SwapLE64(top), SkEndian_SwapLE64(bottom));
258 }
259
CompressA8ASTCBlockVertical(uint8_t * dst,const uint8_t * src)260 inline void CompressA8ASTCBlockVertical(uint8_t* dst, const uint8_t* src) {
261 compress_a8_astc_block<GetAlphaTranspose>(&dst, src, 12);
262 }
263
264 ////////////////////////////////////////////////////////////////////////////////
265 //
266 // ASTC Decoder
267 //
268 // Full details available in the spec:
269 // http://www.khronos.org/registry/gles/extensions/OES/OES_texture_compression_astc.txt
270 //
271 ////////////////////////////////////////////////////////////////////////////////
272
273 // Enable this to assert whenever a decoded block has invalid ASTC values. Otherwise,
274 // each invalid block will result in a disgusting magenta color.
275 #define ASSERT_ASTC_DECODE_ERROR 0
276
277 // Reverse 64-bit integer taken from TAOCP 4a, although it's better
278 // documented at this site:
279 // http://matthewarcus.wordpress.com/2012/11/18/reversing-a-64-bit-word/
280
281 template <typename T, T m, int k>
swap_bits(T p)282 static inline T swap_bits(T p) {
283 T q = ((p>>k)^p) & m;
284 return p^q^(q<<k);
285 }
286
reverse64(uint64_t n)287 static inline uint64_t reverse64(uint64_t n) {
288 static const uint64_t m0 = 0x5555555555555555ULL;
289 static const uint64_t m1 = 0x0300c0303030c303ULL;
290 static const uint64_t m2 = 0x00c0300c03f0003fULL;
291 static const uint64_t m3 = 0x00000ffc00003fffULL;
292 n = ((n>>1)&m0) | (n&m0)<<1;
293 n = swap_bits<uint64_t, m1, 4>(n);
294 n = swap_bits<uint64_t, m2, 8>(n);
295 n = swap_bits<uint64_t, m3, 20>(n);
296 n = (n >> 34) | (n << 30);
297 return n;
298 }
299
300 // An ASTC block is 128 bits. We represent it as two 64-bit integers in order
301 // to efficiently operate on the block using bitwise operations.
302 struct ASTCBlock {
303 uint64_t fLow;
304 uint64_t fHigh;
305
306 // Reverses the bits of an ASTC block, making the LSB of the
307 // 128 bit block the MSB.
reverseASTCBlock308 inline void reverse() {
309 const uint64_t newLow = reverse64(this->fHigh);
310 this->fHigh = reverse64(this->fLow);
311 this->fLow = newLow;
312 }
313 };
314
315 // Writes the given color to every pixel in the block. This is used by void-extent
316 // blocks (a special constant-color encoding of a block) and by the error function.
write_constant_color(uint8_t * dst,int blockDimX,int blockDimY,int dstRowBytes,SkColor color)317 static inline void write_constant_color(uint8_t* dst, int blockDimX, int blockDimY,
318 int dstRowBytes, SkColor color) {
319 for (int y = 0; y < blockDimY; ++y) {
320 SkColor *dstColors = reinterpret_cast<SkColor*>(dst);
321 for (int x = 0; x < blockDimX; ++x) {
322 dstColors[x] = color;
323 }
324 dst += dstRowBytes;
325 }
326 }
327
328 // Sets the entire block to the ASTC "error" color, a disgusting magenta
329 // that's not supposed to appear in natural images.
write_error_color(uint8_t * dst,int blockDimX,int blockDimY,int dstRowBytes)330 static inline void write_error_color(uint8_t* dst, int blockDimX, int blockDimY,
331 int dstRowBytes) {
332 static const SkColor kASTCErrorColor = SkColorSetRGB(0xFF, 0, 0xFF);
333
334 #if ASSERT_ASTC_DECODE_ERROR
335 SkDEBUGFAIL("ASTC decoding error!\n");
336 #endif
337
338 write_constant_color(dst, blockDimX, blockDimY, dstRowBytes, kASTCErrorColor);
339 }
340
341 // Reads up to 64 bits of the ASTC block starting from bit
342 // 'from' and going up to but not including bit 'to'. 'from' starts
343 // counting from the LSB, counting up to the MSB. Returns -1 on
344 // error.
read_astc_bits(const ASTCBlock & block,int from,int to)345 static uint64_t read_astc_bits(const ASTCBlock &block, int from, int to) {
346 SkASSERT(0 <= from && from <= 128);
347 SkASSERT(0 <= to && to <= 128);
348
349 const int nBits = to - from;
350 if (0 == nBits) {
351 return 0;
352 }
353
354 if (nBits < 0 || 64 <= nBits) {
355 SkDEBUGFAIL("ASTC -- shouldn't read more than 64 bits");
356 return -1;
357 }
358
359 // Remember, the 'to' bit isn't read.
360 uint64_t result = 0;
361 if (to <= 64) {
362 // All desired bits are in the low 64-bits.
363 result = (block.fLow >> from) & ((1ULL << nBits) - 1);
364 } else if (from >= 64) {
365 // All desired bits are in the high 64-bits.
366 result = (block.fHigh >> (from - 64)) & ((1ULL << nBits) - 1);
367 } else {
368 // from < 64 && to > 64
369 SkASSERT(nBits > (64 - from));
370 const int nLow = 64 - from;
371 const int nHigh = nBits - nLow;
372 result =
373 ((block.fLow >> from) & ((1ULL << nLow) - 1)) |
374 ((block.fHigh & ((1ULL << nHigh) - 1)) << nLow);
375 }
376
377 return result;
378 }
379
380 // Returns the number of bits needed to represent a number
381 // in the given power-of-two range (excluding the power of two itself).
bits_for_range(int x)382 static inline int bits_for_range(int x) {
383 SkASSERT(SkIsPow2(x));
384 SkASSERT(0 != x);
385 // Since we know it's a power of two, there should only be one bit set,
386 // meaning the number of trailing zeros is 31 minus the number of leading
387 // zeros.
388 return 31 - SkCLZ(x);
389 }
390
391 // Clamps an integer to the range [0, 255]
clamp_byte(int x)392 static inline int clamp_byte(int x) {
393 return SkClampMax(x, 255);
394 }
395
396 // Helper function defined in the ASTC spec, section C.2.14
397 // It transfers a few bits of precision from one value to another.
bit_transfer_signed(int * a,int * b)398 static inline void bit_transfer_signed(int *a, int *b) {
399 *b >>= 1;
400 *b |= *a & 0x80;
401 *a >>= 1;
402 *a &= 0x3F;
403 if ( (*a & 0x20) != 0 ) {
404 *a -= 0x40;
405 }
406 }
407
408 // Helper function defined in the ASTC spec, section C.2.14
409 // It uses the value in the blue channel to tint the red and green
blue_contract(int a,int r,int g,int b)410 static inline SkColor blue_contract(int a, int r, int g, int b) {
411 return SkColorSetARGB(a, (r + b) >> 1, (g + b) >> 1, b);
412 }
413
414 // Helper function that decodes two colors from eight values. If isRGB is true,
415 // then the pointer 'v' contains six values and the last two are considered to be
416 // 0xFF. If isRGB is false, then all eight values come from the pointer 'v'. This
417 // corresponds to the decode procedure for the following endpoint modes:
418 // kLDR_RGB_Direct_ColorEndpointMode
419 // kLDR_RGBA_Direct_ColorEndpointMode
decode_rgba_direct(const int * v,SkColor * endpoints,bool isRGB)420 static inline void decode_rgba_direct(const int *v, SkColor *endpoints, bool isRGB) {
421
422 int v6 = 0xFF;
423 int v7 = 0xFF;
424 if (!isRGB) {
425 v6 = v[6];
426 v7 = v[7];
427 }
428
429 const int s0 = v[0] + v[2] + v[4];
430 const int s1 = v[1] + v[3] + v[5];
431
432 if (s1 >= s0) {
433 endpoints[0] = SkColorSetARGB(v6, v[0], v[2], v[4]);
434 endpoints[1] = SkColorSetARGB(v7, v[1], v[3], v[5]);
435 } else {
436 endpoints[0] = blue_contract(v7, v[1], v[3], v[5]);
437 endpoints[1] = blue_contract(v6, v[0], v[2], v[4]);
438 }
439 }
440
441 // Helper function that decodes two colors from six values. If isRGB is true,
442 // then the pointer 'v' contains four values and the last two are considered to be
443 // 0xFF. If isRGB is false, then all six values come from the pointer 'v'. This
444 // corresponds to the decode procedure for the following endpoint modes:
445 // kLDR_RGB_BaseScale_ColorEndpointMode
446 // kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode
decode_rgba_basescale(const int * v,SkColor * endpoints,bool isRGB)447 static inline void decode_rgba_basescale(const int *v, SkColor *endpoints, bool isRGB) {
448
449 int v4 = 0xFF;
450 int v5 = 0xFF;
451 if (!isRGB) {
452 v4 = v[4];
453 v5 = v[5];
454 }
455
456 endpoints[0] = SkColorSetARGB(v4,
457 (v[0]*v[3]) >> 8,
458 (v[1]*v[3]) >> 8,
459 (v[2]*v[3]) >> 8);
460 endpoints[1] = SkColorSetARGB(v5, v[0], v[1], v[2]);
461 }
462
463 // Helper function that decodes two colors from eight values. If isRGB is true,
464 // then the pointer 'v' contains six values and the last two are considered to be
465 // 0xFF. If isRGB is false, then all eight values come from the pointer 'v'. This
466 // corresponds to the decode procedure for the following endpoint modes:
467 // kLDR_RGB_BaseOffset_ColorEndpointMode
468 // kLDR_RGBA_BaseOffset_ColorEndpointMode
469 //
470 // If isRGB is true, then treat this as if v6 and v7 are meant to encode full alpha values.
decode_rgba_baseoffset(const int * v,SkColor * endpoints,bool isRGB)471 static inline void decode_rgba_baseoffset(const int *v, SkColor *endpoints, bool isRGB) {
472 int v0 = v[0];
473 int v1 = v[1];
474 int v2 = v[2];
475 int v3 = v[3];
476 int v4 = v[4];
477 int v5 = v[5];
478 int v6 = isRGB ? 0xFF : v[6];
479 // The 0 is here because this is an offset, not a direct value
480 int v7 = isRGB ? 0 : v[7];
481
482 bit_transfer_signed(&v1, &v0);
483 bit_transfer_signed(&v3, &v2);
484 bit_transfer_signed(&v5, &v4);
485 if (!isRGB) {
486 bit_transfer_signed(&v7, &v6);
487 }
488
489 int c[2][4];
490 if ((v1 + v3 + v5) >= 0) {
491 c[0][0] = v6;
492 c[0][1] = v0;
493 c[0][2] = v2;
494 c[0][3] = v4;
495
496 c[1][0] = v6 + v7;
497 c[1][1] = v0 + v1;
498 c[1][2] = v2 + v3;
499 c[1][3] = v4 + v5;
500 } else {
501 c[0][0] = v6 + v7;
502 c[0][1] = (v0 + v1 + v4 + v5) >> 1;
503 c[0][2] = (v2 + v3 + v4 + v5) >> 1;
504 c[0][3] = v4 + v5;
505
506 c[1][0] = v6;
507 c[1][1] = (v0 + v4) >> 1;
508 c[1][2] = (v2 + v4) >> 1;
509 c[1][3] = v4;
510 }
511
512 endpoints[0] = SkColorSetARGB(clamp_byte(c[0][0]),
513 clamp_byte(c[0][1]),
514 clamp_byte(c[0][2]),
515 clamp_byte(c[0][3]));
516
517 endpoints[1] = SkColorSetARGB(clamp_byte(c[1][0]),
518 clamp_byte(c[1][1]),
519 clamp_byte(c[1][2]),
520 clamp_byte(c[1][3]));
521 }
522
523
524 // A helper class used to decode bit values from standard integer values.
525 // We can't use this class with ASTCBlock because then it would need to
526 // handle multi-value ranges, and it's non-trivial to lookup a range of bits
527 // that splits across two different ints.
528 template <typename T>
529 class SkTBits {
530 public:
SkTBits(const T val)531 SkTBits(const T val) : fVal(val) { }
532
533 // Returns the bit at the given position
operator [](const int idx) const534 T operator [](const int idx) const {
535 return (fVal >> idx) & 1;
536 }
537
538 // Returns the bits in the given range, inclusive
operator ()(const int end,const int start) const539 T operator ()(const int end, const int start) const {
540 SkASSERT(end >= start);
541 return (fVal >> start) & ((1ULL << ((end - start) + 1)) - 1);
542 }
543
544 private:
545 const T fVal;
546 };
547
548 // This algorithm matches the trit block decoding in the spec (Table C.2.14)
decode_trit_block(int * dst,int nBits,const uint64_t & block)549 static void decode_trit_block(int* dst, int nBits, const uint64_t &block) {
550
551 SkTBits<uint64_t> blockBits(block);
552
553 // According to the spec, a trit block, which contains five values,
554 // has the following layout:
555 //
556 // 27 26 25 24 23 22 21 20 19 18 17 16
557 // -----------------------------------------------
558 // |T7 | m4 |T6 T5 | m3 |T4 |
559 // -----------------------------------------------
560 //
561 // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
562 // --------------------------------------------------------------
563 // | m2 |T3 T2 | m1 |T1 T0 | m0 |
564 // --------------------------------------------------------------
565 //
566 // Where the m's are variable width depending on the number of bits used
567 // to encode the values (anywhere from 0 to 6). Since 3^5 = 243, the extra
568 // byte labeled T (whose bits are interleaved where 0 is the LSB and 7 is
569 // the MSB), contains five trit values. To decode the trit values, the spec
570 // says that we need to follow the following algorithm:
571 //
572 // if T[4:2] = 111
573 // C = { T[7:5], T[1:0] }; t4 = t3 = 2
574 // else
575 // C = T[4:0]
576 //
577 // if T[6:5] = 11
578 // t4 = 2; t3 = T[7]
579 // else
580 // t4 = T[7]; t3 = T[6:5]
581 //
582 // if C[1:0] = 11
583 // t2 = 2; t1 = C[4]; t0 = { C[3], C[2]&~C[3] }
584 // else if C[3:2] = 11
585 // t2 = 2; t1 = 2; t0 = C[1:0]
586 // else
587 // t2 = C[4]; t1 = C[3:2]; t0 = { C[1], C[0]&~C[1] }
588 //
589 // The following C++ code is meant to mirror this layout and algorithm as
590 // closely as possible.
591
592 int m[5];
593 if (0 == nBits) {
594 memset(m, 0, sizeof(m));
595 } else {
596 SkASSERT(nBits < 8);
597 m[0] = static_cast<int>(blockBits(nBits - 1, 0));
598 m[1] = static_cast<int>(blockBits(2*nBits - 1 + 2, nBits + 2));
599 m[2] = static_cast<int>(blockBits(3*nBits - 1 + 4, 2*nBits + 4));
600 m[3] = static_cast<int>(blockBits(4*nBits - 1 + 5, 3*nBits + 5));
601 m[4] = static_cast<int>(blockBits(5*nBits - 1 + 7, 4*nBits + 7));
602 }
603
604 int T =
605 static_cast<int>(blockBits(nBits + 1, nBits)) |
606 (static_cast<int>(blockBits(2*nBits + 2 + 1, 2*nBits + 2)) << 2) |
607 (static_cast<int>(blockBits[3*nBits + 4] << 4)) |
608 (static_cast<int>(blockBits(4*nBits + 5 + 1, 4*nBits + 5)) << 5) |
609 (static_cast<int>(blockBits[5*nBits + 7] << 7));
610
611 int t[5];
612
613 int C;
614 SkTBits<int> Tbits(T);
615 if (0x7 == Tbits(4, 2)) {
616 C = (Tbits(7, 5) << 2) | Tbits(1, 0);
617 t[3] = t[4] = 2;
618 } else {
619 C = Tbits(4, 0);
620 if (Tbits(6, 5) == 0x3) {
621 t[4] = 2; t[3] = Tbits[7];
622 } else {
623 t[4] = Tbits[7]; t[3] = Tbits(6, 5);
624 }
625 }
626
627 SkTBits<int> Cbits(C);
628 if (Cbits(1, 0) == 0x3) {
629 t[2] = 2;
630 t[1] = Cbits[4];
631 t[0] = (Cbits[3] << 1) | (Cbits[2] & (0x1 & ~(Cbits[3])));
632 } else if (Cbits(3, 2) == 0x3) {
633 t[2] = 2;
634 t[1] = 2;
635 t[0] = Cbits(1, 0);
636 } else {
637 t[2] = Cbits[4];
638 t[1] = Cbits(3, 2);
639 t[0] = (Cbits[1] << 1) | (Cbits[0] & (0x1 & ~(Cbits[1])));
640 }
641
642 #ifdef SK_DEBUG
643 // Make sure all of the decoded values have a trit less than three
644 // and a bit value within the range of the allocated bits.
645 for (int i = 0; i < 5; ++i) {
646 SkASSERT(t[i] < 3);
647 SkASSERT(m[i] < (1 << nBits));
648 }
649 #endif
650
651 for (int i = 0; i < 5; ++i) {
652 *dst = (t[i] << nBits) + m[i];
653 ++dst;
654 }
655 }
656
657 // This algorithm matches the quint block decoding in the spec (Table C.2.15)
decode_quint_block(int * dst,int nBits,const uint64_t & block)658 static void decode_quint_block(int* dst, int nBits, const uint64_t &block) {
659 SkTBits<uint64_t> blockBits(block);
660
661 // According to the spec, a quint block, which contains three values,
662 // has the following layout:
663 //
664 //
665 // 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
666 // --------------------------------------------------------------------------
667 // |Q6 Q5 | m2 |Q4 Q3 | m1 |Q2 Q1 Q0 | m0 |
668 // --------------------------------------------------------------------------
669 //
670 // Where the m's are variable width depending on the number of bits used
671 // to encode the values (anywhere from 0 to 4). Since 5^3 = 125, the extra
672 // 7-bit value labeled Q (whose bits are interleaved where 0 is the LSB and 6 is
673 // the MSB), contains three quint values. To decode the quint values, the spec
674 // says that we need to follow the following algorithm:
675 //
676 // if Q[2:1] = 11 and Q[6:5] = 00
677 // q2 = { Q[0], Q[4]&~Q[0], Q[3]&~Q[0] }; q1 = q0 = 4
678 // else
679 // if Q[2:1] = 11
680 // q2 = 4; C = { Q[4:3], ~Q[6:5], Q[0] }
681 // else
682 // q2 = T[6:5]; C = Q[4:0]
683 //
684 // if C[2:0] = 101
685 // q1 = 4; q0 = C[4:3]
686 // else
687 // q1 = C[4:3]; q0 = C[2:0]
688 //
689 // The following C++ code is meant to mirror this layout and algorithm as
690 // closely as possible.
691
692 int m[3];
693 if (0 == nBits) {
694 memset(m, 0, sizeof(m));
695 } else {
696 SkASSERT(nBits < 8);
697 m[0] = static_cast<int>(blockBits(nBits - 1, 0));
698 m[1] = static_cast<int>(blockBits(2*nBits - 1 + 3, nBits + 3));
699 m[2] = static_cast<int>(blockBits(3*nBits - 1 + 5, 2*nBits + 5));
700 }
701
702 int Q =
703 static_cast<int>(blockBits(nBits + 2, nBits)) |
704 (static_cast<int>(blockBits(2*nBits + 3 + 1, 2*nBits + 3)) << 3) |
705 (static_cast<int>(blockBits(3*nBits + 5 + 1, 3*nBits + 5)) << 5);
706
707 int q[3];
708 SkTBits<int> Qbits(Q); // quantum?
709
710 if (Qbits(2, 1) == 0x3 && Qbits(6, 5) == 0) {
711 const int notBitZero = (0x1 & ~(Qbits[0]));
712 q[2] = (Qbits[0] << 2) | ((Qbits[4] & notBitZero) << 1) | (Qbits[3] & notBitZero);
713 q[1] = 4;
714 q[0] = 4;
715 } else {
716 int C;
717 if (Qbits(2, 1) == 0x3) {
718 q[2] = 4;
719 C = (Qbits(4, 3) << 3) | ((0x3 & ~(Qbits(6, 5))) << 1) | Qbits[0];
720 } else {
721 q[2] = Qbits(6, 5);
722 C = Qbits(4, 0);
723 }
724
725 SkTBits<int> Cbits(C);
726 if (Cbits(2, 0) == 0x5) {
727 q[1] = 4;
728 q[0] = Cbits(4, 3);
729 } else {
730 q[1] = Cbits(4, 3);
731 q[0] = Cbits(2, 0);
732 }
733 }
734
735 #ifdef SK_DEBUG
736 for (int i = 0; i < 3; ++i) {
737 SkASSERT(q[i] < 5);
738 SkASSERT(m[i] < (1 << nBits));
739 }
740 #endif
741
742 for (int i = 0; i < 3; ++i) {
743 *dst = (q[i] << nBits) + m[i];
744 ++dst;
745 }
746 }
747
748 // Function that decodes a sequence of integers stored as an ISE (Integer
749 // Sequence Encoding) bit stream. The full details of this function are outlined
750 // in section C.2.12 of the ASTC spec. A brief overview is as follows:
751 //
752 // - Each integer in the sequence is bounded by a specific range r.
753 // - The range of each value determines the way the bit stream is interpreted,
754 // - If the range is a power of two, then the sequence is a sequence of bits
755 // - If the range is of the form 3*2^n, then the sequence is stored as a
756 // sequence of blocks, each block contains 5 trits and 5 bit sequences, which
757 // decodes into 5 values.
758 // - Similarly, if the range is of the form 5*2^n, then the sequence is stored as a
759 // sequence of blocks, each block contains 3 quints and 3 bit sequences, which
760 // decodes into 3 values.
decode_integer_sequence(int * dst,int dstSize,int nVals,const ASTCBlock & block,int startBit,int endBit,bool bReadForward,int nBits,int nTrits,int nQuints)761 static bool decode_integer_sequence(
762 int* dst, // The array holding the destination bits
763 int dstSize, // The maximum size of the array
764 int nVals, // The number of values that we'd like to decode
765 const ASTCBlock &block, // The block that we're decoding from
766 int startBit, // The bit from which we're going to do the reading
767 int endBit, // The bit at which we stop reading (not inclusive)
768 bool bReadForward, // If true, then read LSB -> MSB, else read MSB -> LSB
769 int nBits, // The number of bits representing this encoding
770 int nTrits, // The number of trits representing this encoding
771 int nQuints // The number of quints representing this encoding
772 ) {
773 // If we want more values than we have, then fail.
774 if (nVals > dstSize) {
775 return false;
776 }
777
778 ASTCBlock src = block;
779
780 if (!bReadForward) {
781 src.reverse();
782 startBit = 128 - startBit;
783 endBit = 128 - endBit;
784 }
785
786 while (nVals > 0) {
787
788 if (nTrits > 0) {
789 SkASSERT(0 == nQuints);
790
791 int endBlockBit = startBit + 8 + 5*nBits;
792 if (endBlockBit > endBit) {
793 endBlockBit = endBit;
794 }
795
796 // Trit blocks are three values large.
797 int trits[5];
798 decode_trit_block(trits, nBits, read_astc_bits(src, startBit, endBlockBit));
799 memcpy(dst, trits, SkMin32(nVals, 5)*sizeof(int));
800
801 dst += 5;
802 nVals -= 5;
803 startBit = endBlockBit;
804
805 } else if (nQuints > 0) {
806 SkASSERT(0 == nTrits);
807
808 int endBlockBit = startBit + 7 + 3*nBits;
809 if (endBlockBit > endBit) {
810 endBlockBit = endBit;
811 }
812
813 // Quint blocks are three values large
814 int quints[3];
815 decode_quint_block(quints, nBits, read_astc_bits(src, startBit, endBlockBit));
816 memcpy(dst, quints, SkMin32(nVals, 3)*sizeof(int));
817
818 dst += 3;
819 nVals -= 3;
820 startBit = endBlockBit;
821
822 } else {
823 // Just read the bits, but don't read more than we have...
824 int endValBit = startBit + nBits;
825 if (endValBit > endBit) {
826 endValBit = endBit;
827 }
828
829 SkASSERT(endValBit - startBit < 31);
830 *dst = static_cast<int>(read_astc_bits(src, startBit, endValBit));
831 ++dst;
832 --nVals;
833 startBit = endValBit;
834 }
835 }
836
837 return true;
838 }
839
840 // Helper function that unquantizes some (seemingly random) generated
841 // numbers... meant to match the ASTC hardware. This function is used
842 // to unquantize both colors (Table C.2.16) and weights (Table C.2.26)
unquantize_value(unsigned mask,int A,int B,int C,int D)843 static inline int unquantize_value(unsigned mask, int A, int B, int C, int D) {
844 int T = D * C + B;
845 T = T ^ A;
846 T = (A & mask) | (T >> 2);
847 SkASSERT(T < 256);
848 return T;
849 }
850
851 // Helper function to replicate the bits in x that represents an oldPrec
852 // precision integer into a prec precision integer. For example:
853 // 255 == replicate_bits(7, 3, 8);
replicate_bits(int x,int oldPrec,int prec)854 static inline int replicate_bits(int x, int oldPrec, int prec) {
855 while (oldPrec < prec) {
856 const int toShift = SkMin32(prec-oldPrec, oldPrec);
857 x = (x << toShift) | (x >> (oldPrec - toShift));
858 oldPrec += toShift;
859 }
860
861 // Make sure that no bits are set outside the desired precision.
862 SkASSERT((-(1 << prec) & x) == 0);
863 return x;
864 }
865
866 // Returns the unquantized value of a color that's represented only as
867 // a set of bits.
unquantize_bits_color(int val,int nBits)868 static inline int unquantize_bits_color(int val, int nBits) {
869 return replicate_bits(val, nBits, 8);
870 }
871
872 // Returns the unquantized value of a color that's represented as a
873 // trit followed by nBits bits. This algorithm follows the sequence
874 // defined in section C.2.13 of the ASTC spec.
unquantize_trit_color(int val,int nBits)875 static inline int unquantize_trit_color(int val, int nBits) {
876 SkASSERT(nBits > 0);
877 SkASSERT(nBits < 7);
878
879 const int D = (val >> nBits) & 0x3;
880 SkASSERT(D < 3);
881
882 const int A = -(val & 0x1) & 0x1FF;
883
884 static const int Cvals[6] = { 204, 93, 44, 22, 11, 5 };
885 const int C = Cvals[nBits - 1];
886
887 int B = 0;
888 const SkTBits<int> valBits(val);
889 switch (nBits) {
890 case 1:
891 B = 0;
892 break;
893
894 case 2: {
895 const int b = valBits[1];
896 B = (b << 1) | (b << 2) | (b << 4) | (b << 8);
897 }
898 break;
899
900 case 3: {
901 const int cb = valBits(2, 1);
902 B = cb | (cb << 2) | (cb << 7);
903 }
904 break;
905
906 case 4: {
907 const int dcb = valBits(3, 1);
908 B = dcb | (dcb << 6);
909 }
910 break;
911
912 case 5: {
913 const int edcb = valBits(4, 1);
914 B = (edcb << 5) | (edcb >> 2);
915 }
916 break;
917
918 case 6: {
919 const int fedcb = valBits(5, 1);
920 B = (fedcb << 4) | (fedcb >> 4);
921 }
922 break;
923 }
924
925 return unquantize_value(0x80, A, B, C, D);
926 }
927
928 // Returns the unquantized value of a color that's represented as a
929 // quint followed by nBits bits. This algorithm follows the sequence
930 // defined in section C.2.13 of the ASTC spec.
unquantize_quint_color(int val,int nBits)931 static inline int unquantize_quint_color(int val, int nBits) {
932 const int D = (val >> nBits) & 0x7;
933 SkASSERT(D < 5);
934
935 const int A = -(val & 0x1) & 0x1FF;
936
937 static const int Cvals[5] = { 113, 54, 26, 13, 6 };
938 SkASSERT(nBits > 0);
939 SkASSERT(nBits < 6);
940
941 const int C = Cvals[nBits - 1];
942
943 int B = 0;
944 const SkTBits<int> valBits(val);
945 switch (nBits) {
946 case 1:
947 B = 0;
948 break;
949
950 case 2: {
951 const int b = valBits[1];
952 B = (b << 2) | (b << 3) | (b << 8);
953 }
954 break;
955
956 case 3: {
957 const int cb = valBits(2, 1);
958 B = (cb >> 1) | (cb << 1) | (cb << 7);
959 }
960 break;
961
962 case 4: {
963 const int dcb = valBits(3, 1);
964 B = (dcb >> 1) | (dcb << 6);
965 }
966 break;
967
968 case 5: {
969 const int edcb = valBits(4, 1);
970 B = (edcb << 5) | (edcb >> 3);
971 }
972 break;
973 }
974
975 return unquantize_value(0x80, A, B, C, D);
976 }
977
978 // This algorithm takes a list of integers, stored in vals, and unquantizes them
979 // in place. This follows the algorithm laid out in section C.2.13 of the ASTC spec.
unquantize_colors(int * vals,int nVals,int nBits,int nTrits,int nQuints)980 static void unquantize_colors(int *vals, int nVals, int nBits, int nTrits, int nQuints) {
981 for (int i = 0; i < nVals; ++i) {
982 if (nTrits > 0) {
983 SkASSERT(nQuints == 0);
984 vals[i] = unquantize_trit_color(vals[i], nBits);
985 } else if (nQuints > 0) {
986 SkASSERT(nTrits == 0);
987 vals[i] = unquantize_quint_color(vals[i], nBits);
988 } else {
989 SkASSERT(nQuints == 0 && nTrits == 0);
990 vals[i] = unquantize_bits_color(vals[i], nBits);
991 }
992 }
993 }
994
995 // Returns an interpolated value between c0 and c1 based on the weight. This
996 // follows the algorithm laid out in section C.2.19 of the ASTC spec.
interpolate_channel(int c0,int c1,int weight)997 static int interpolate_channel(int c0, int c1, int weight) {
998 SkASSERT(0 <= c0 && c0 < 256);
999 SkASSERT(0 <= c1 && c1 < 256);
1000
1001 c0 = (c0 << 8) | c0;
1002 c1 = (c1 << 8) | c1;
1003
1004 const int result = ((c0*(64 - weight) + c1*weight + 32) / 64) >> 8;
1005
1006 if (result > 255) {
1007 return 255;
1008 }
1009
1010 SkASSERT(result >= 0);
1011 return result;
1012 }
1013
1014 // Returns an interpolated color between the two endpoints based on the weight.
interpolate_endpoints(const SkColor endpoints[2],int weight)1015 static SkColor interpolate_endpoints(const SkColor endpoints[2], int weight) {
1016 return SkColorSetARGB(
1017 interpolate_channel(SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight),
1018 interpolate_channel(SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight),
1019 interpolate_channel(SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight),
1020 interpolate_channel(SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight));
1021 }
1022
1023 // Returns an interpolated color between the two endpoints based on the weight.
1024 // It uses separate weights for the channel depending on the value of the 'plane'
1025 // variable. By default, all channels will use weight 0, and the value of plane
1026 // means that weight1 will be used for:
1027 // 0: red
1028 // 1: green
1029 // 2: blue
1030 // 3: alpha
interpolate_dual_endpoints(const SkColor endpoints[2],int weight0,int weight1,int plane)1031 static SkColor interpolate_dual_endpoints(
1032 const SkColor endpoints[2], int weight0, int weight1, int plane) {
1033 int a = interpolate_channel(SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight0);
1034 int r = interpolate_channel(SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight0);
1035 int g = interpolate_channel(SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight0);
1036 int b = interpolate_channel(SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight0);
1037
1038 switch (plane) {
1039
1040 case 0:
1041 r = interpolate_channel(
1042 SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight1);
1043 break;
1044
1045 case 1:
1046 g = interpolate_channel(
1047 SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight1);
1048 break;
1049
1050 case 2:
1051 b = interpolate_channel(
1052 SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight1);
1053 break;
1054
1055 case 3:
1056 a = interpolate_channel(
1057 SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight1);
1058 break;
1059
1060 default:
1061 SkDEBUGFAIL("Plane should be 0-3");
1062 break;
1063 }
1064
1065 return SkColorSetARGB(a, r, g, b);
1066 }
1067
1068 // A struct of decoded values that we use to carry around information
1069 // about the block. dimX and dimY are the dimension in texels of the block,
1070 // for which there is only a limited subset of valid values:
1071 //
1072 // 4x4, 5x4, 5x5, 6x5, 6x6, 8x5, 8x6, 8x8, 10x5, 10x6, 10x8, 10x10, 12x10, 12x12
1073
1074 struct ASTCDecompressionData {
ASTCDecompressionDataASTCDecompressionData1075 ASTCDecompressionData(int dimX, int dimY) : fDimX(dimX), fDimY(dimY) { }
1076 const int fDimX; // the X dimension of the decompressed block
1077 const int fDimY; // the Y dimension of the decompressed block
1078 ASTCBlock fBlock; // the block data
1079 int fBlockMode; // the block header that contains the block mode.
1080
1081 bool fDualPlaneEnabled; // is this block compressing dual weight planes?
1082 int fDualPlane; // the independent plane in dual plane mode.
1083
1084 bool fVoidExtent; // is this block a single color?
1085 bool fError; // does this block have an error encoding?
1086
1087 int fWeightDimX; // the x dimension of the weight grid
1088 int fWeightDimY; // the y dimension of the weight grid
1089
1090 int fWeightBits; // the number of bits used for each weight value
1091 int fWeightTrits; // the number of trits used for each weight value
1092 int fWeightQuints; // the number of quints used for each weight value
1093
1094 int fPartCount; // the number of partitions in this block
1095 int fPartIndex; // the partition index: only relevant if fPartCount > 0
1096
1097 // CEM values can be anything in the range 0-15, and each corresponds to a different
1098 // mode that represents the color data. We only support LDR modes.
1099 enum ColorEndpointMode {
1100 kLDR_Luminance_Direct_ColorEndpointMode = 0,
1101 kLDR_Luminance_BaseOffset_ColorEndpointMode = 1,
1102 kHDR_Luminance_LargeRange_ColorEndpointMode = 2,
1103 kHDR_Luminance_SmallRange_ColorEndpointMode = 3,
1104 kLDR_LuminanceAlpha_Direct_ColorEndpointMode = 4,
1105 kLDR_LuminanceAlpha_BaseOffset_ColorEndpointMode = 5,
1106 kLDR_RGB_BaseScale_ColorEndpointMode = 6,
1107 kHDR_RGB_BaseScale_ColorEndpointMode = 7,
1108 kLDR_RGB_Direct_ColorEndpointMode = 8,
1109 kLDR_RGB_BaseOffset_ColorEndpointMode = 9,
1110 kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode = 10,
1111 kHDR_RGB_ColorEndpointMode = 11,
1112 kLDR_RGBA_Direct_ColorEndpointMode = 12,
1113 kLDR_RGBA_BaseOffset_ColorEndpointMode = 13,
1114 kHDR_RGB_LDRAlpha_ColorEndpointMode = 14,
1115 kHDR_RGB_HDRAlpha_ColorEndpointMode = 15
1116 };
1117 static const int kMaxColorEndpointModes = 16;
1118
1119 // the color endpoint modes for this block.
1120 static const int kMaxPartitions = 4;
1121 ColorEndpointMode fCEM[kMaxPartitions];
1122
1123 int fColorStartBit; // The bit position of the first bit of the color data
1124 int fColorEndBit; // The bit position of the last *possible* bit of the color data
1125
1126 // Returns the number of partitions for this block.
numPartitionsASTCDecompressionData1127 int numPartitions() const {
1128 return fPartCount;
1129 }
1130
1131 // Returns the total number of weight values that are stored in this block
numWeightsASTCDecompressionData1132 int numWeights() const {
1133 return fWeightDimX * fWeightDimY * (fDualPlaneEnabled ? 2 : 1);
1134 }
1135
1136 #ifdef SK_DEBUG
1137 // Returns the maximum value that any weight can take. We really only use
1138 // this function for debugging.
maxWeightValueASTCDecompressionData1139 int maxWeightValue() const {
1140 int maxVal = (1 << fWeightBits);
1141 if (fWeightTrits > 0) {
1142 SkASSERT(0 == fWeightQuints);
1143 maxVal *= 3;
1144 } else if (fWeightQuints > 0) {
1145 SkASSERT(0 == fWeightTrits);
1146 maxVal *= 5;
1147 }
1148 return maxVal - 1;
1149 }
1150 #endif
1151
1152 // The number of bits needed to represent the texel weight data. This
1153 // comes from the 'data size determination' section of the ASTC spec (C.2.22)
numWeightBitsASTCDecompressionData1154 int numWeightBits() const {
1155 const int nWeights = this->numWeights();
1156 return
1157 ((nWeights*8*fWeightTrits + 4) / 5) +
1158 ((nWeights*7*fWeightQuints + 2) / 3) +
1159 (nWeights*fWeightBits);
1160 }
1161
1162 // Returns the number of color values stored in this block. The number of
1163 // values stored is directly a function of the color endpoint modes.
numColorValuesASTCDecompressionData1164 int numColorValues() const {
1165 int numValues = 0;
1166 for (int i = 0; i < this->numPartitions(); ++i) {
1167 int cemInt = static_cast<int>(fCEM[i]);
1168 numValues += ((cemInt >> 2) + 1) * 2;
1169 }
1170
1171 return numValues;
1172 }
1173
1174 // Figures out the number of bits available for color values, and fills
1175 // in the maximum encoding that will fit the number of color values that
1176 // we need. Returns false on error. (See section C.2.22 of the spec)
getColorValueEncodingASTCDecompressionData1177 bool getColorValueEncoding(int *nBits, int *nTrits, int *nQuints) const {
1178 if (nullptr == nBits || nullptr == nTrits || nullptr == nQuints) {
1179 return false;
1180 }
1181
1182 const int nColorVals = this->numColorValues();
1183 if (nColorVals <= 0) {
1184 return false;
1185 }
1186
1187 const int colorBits = fColorEndBit - fColorStartBit;
1188 SkASSERT(colorBits > 0);
1189
1190 // This is the minimum amount of accuracy required by the spec.
1191 if (colorBits < ((13 * nColorVals + 4) / 5)) {
1192 return false;
1193 }
1194
1195 // Values can be represented as at most 8-bit values.
1196 // !SPEED! place this in a lookup table based on colorBits and nColorVals
1197 for (int i = 255; i > 0; --i) {
1198 int range = i + 1;
1199 int bits = 0, trits = 0, quints = 0;
1200 bool valid = false;
1201 if (SkIsPow2(range)) {
1202 bits = bits_for_range(range);
1203 valid = true;
1204 } else if ((range % 3) == 0 && SkIsPow2(range/3)) {
1205 trits = 1;
1206 bits = bits_for_range(range/3);
1207 valid = true;
1208 } else if ((range % 5) == 0 && SkIsPow2(range/5)) {
1209 quints = 1;
1210 bits = bits_for_range(range/5);
1211 valid = true;
1212 }
1213
1214 if (valid) {
1215 const int actualColorBits =
1216 ((nColorVals*8*trits + 4) / 5) +
1217 ((nColorVals*7*quints + 2) / 3) +
1218 (nColorVals*bits);
1219 if (actualColorBits <= colorBits) {
1220 *nTrits = trits;
1221 *nQuints = quints;
1222 *nBits = bits;
1223 return true;
1224 }
1225 }
1226 }
1227
1228 return false;
1229 }
1230
1231 // Converts the sequence of color values into endpoints. The algorithm here
1232 // corresponds to the values determined by section C.2.14 of the ASTC spec
colorEndpointsASTCDecompressionData1233 void colorEndpoints(SkColor endpoints[4][2], const int* colorValues) const {
1234 for (int i = 0; i < this->numPartitions(); ++i) {
1235 switch (fCEM[i]) {
1236 case kLDR_Luminance_Direct_ColorEndpointMode: {
1237 const int* v = colorValues;
1238 endpoints[i][0] = SkColorSetARGB(0xFF, v[0], v[0], v[0]);
1239 endpoints[i][1] = SkColorSetARGB(0xFF, v[1], v[1], v[1]);
1240
1241 colorValues += 2;
1242 }
1243 break;
1244
1245 case kLDR_Luminance_BaseOffset_ColorEndpointMode: {
1246 const int* v = colorValues;
1247 const int L0 = (v[0] >> 2) | (v[1] & 0xC0);
1248 const int L1 = clamp_byte(L0 + (v[1] & 0x3F));
1249
1250 endpoints[i][0] = SkColorSetARGB(0xFF, L0, L0, L0);
1251 endpoints[i][1] = SkColorSetARGB(0xFF, L1, L1, L1);
1252
1253 colorValues += 2;
1254 }
1255 break;
1256
1257 case kLDR_LuminanceAlpha_Direct_ColorEndpointMode: {
1258 const int* v = colorValues;
1259
1260 endpoints[i][0] = SkColorSetARGB(v[2], v[0], v[0], v[0]);
1261 endpoints[i][1] = SkColorSetARGB(v[3], v[1], v[1], v[1]);
1262
1263 colorValues += 4;
1264 }
1265 break;
1266
1267 case kLDR_LuminanceAlpha_BaseOffset_ColorEndpointMode: {
1268 int v0 = colorValues[0];
1269 int v1 = colorValues[1];
1270 int v2 = colorValues[2];
1271 int v3 = colorValues[3];
1272
1273 bit_transfer_signed(&v1, &v0);
1274 bit_transfer_signed(&v3, &v2);
1275
1276 endpoints[i][0] = SkColorSetARGB(v2, v0, v0, v0);
1277 endpoints[i][1] = SkColorSetARGB(
1278 clamp_byte(v3+v2),
1279 clamp_byte(v1+v0),
1280 clamp_byte(v1+v0),
1281 clamp_byte(v1+v0));
1282
1283 colorValues += 4;
1284 }
1285 break;
1286
1287 case kLDR_RGB_BaseScale_ColorEndpointMode: {
1288 decode_rgba_basescale(colorValues, endpoints[i], true);
1289 colorValues += 4;
1290 }
1291 break;
1292
1293 case kLDR_RGB_Direct_ColorEndpointMode: {
1294 decode_rgba_direct(colorValues, endpoints[i], true);
1295 colorValues += 6;
1296 }
1297 break;
1298
1299 case kLDR_RGB_BaseOffset_ColorEndpointMode: {
1300 decode_rgba_baseoffset(colorValues, endpoints[i], true);
1301 colorValues += 6;
1302 }
1303 break;
1304
1305 case kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode: {
1306 decode_rgba_basescale(colorValues, endpoints[i], false);
1307 colorValues += 6;
1308 }
1309 break;
1310
1311 case kLDR_RGBA_Direct_ColorEndpointMode: {
1312 decode_rgba_direct(colorValues, endpoints[i], false);
1313 colorValues += 8;
1314 }
1315 break;
1316
1317 case kLDR_RGBA_BaseOffset_ColorEndpointMode: {
1318 decode_rgba_baseoffset(colorValues, endpoints[i], false);
1319 colorValues += 8;
1320 }
1321 break;
1322
1323 default:
1324 SkDEBUGFAIL("HDR mode unsupported! This should be caught sooner.");
1325 break;
1326 }
1327 }
1328 }
1329
1330 // Follows the procedure from section C.2.17 of the ASTC specification
unquantizeWeightASTCDecompressionData1331 int unquantizeWeight(int x) const {
1332 SkASSERT(x <= this->maxWeightValue());
1333
1334 const int D = (x >> fWeightBits) & 0x7;
1335 const int A = -(x & 0x1) & 0x7F;
1336
1337 SkTBits<int> xbits(x);
1338
1339 int T = 0;
1340 if (fWeightTrits > 0) {
1341 SkASSERT(0 == fWeightQuints);
1342 switch (fWeightBits) {
1343 case 0: {
1344 // x is a single trit
1345 SkASSERT(x < 3);
1346
1347 static const int kUnquantizationTable[3] = { 0, 32, 63 };
1348 T = kUnquantizationTable[x];
1349 }
1350 break;
1351
1352 case 1: {
1353 const int B = 0;
1354 const int C = 50;
1355 T = unquantize_value(0x20, A, B, C, D);
1356 }
1357 break;
1358
1359 case 2: {
1360 const int b = xbits[1];
1361 const int B = b | (b << 2) | (b << 6);
1362 const int C = 23;
1363 T = unquantize_value(0x20, A, B, C, D);
1364 }
1365 break;
1366
1367 case 3: {
1368 const int cb = xbits(2, 1);
1369 const int B = cb | (cb << 5);
1370 const int C = 11;
1371 T = unquantize_value(0x20, A, B, C, D);
1372 }
1373 break;
1374
1375 default:
1376 SkDEBUGFAIL("Too many bits for trit encoding");
1377 break;
1378 }
1379
1380 } else if (fWeightQuints > 0) {
1381 SkASSERT(0 == fWeightTrits);
1382 switch (fWeightBits) {
1383 case 0: {
1384 // x is a single quint
1385 SkASSERT(x < 5);
1386
1387 static const int kUnquantizationTable[5] = { 0, 16, 32, 47, 63 };
1388 T = kUnquantizationTable[x];
1389 }
1390 break;
1391
1392 case 1: {
1393 const int B = 0;
1394 const int C = 28;
1395 T = unquantize_value(0x20, A, B, C, D);
1396 }
1397 break;
1398
1399 case 2: {
1400 const int b = xbits[1];
1401 const int B = (b << 1) | (b << 6);
1402 const int C = 13;
1403 T = unquantize_value(0x20, A, B, C, D);
1404 }
1405 break;
1406
1407 default:
1408 SkDEBUGFAIL("Too many bits for quint encoding");
1409 break;
1410 }
1411 } else {
1412 SkASSERT(0 == fWeightTrits);
1413 SkASSERT(0 == fWeightQuints);
1414
1415 T = replicate_bits(x, fWeightBits, 6);
1416 }
1417
1418 // This should bring the value within [0, 63]..
1419 SkASSERT(T <= 63);
1420
1421 if (T > 32) {
1422 T += 1;
1423 }
1424
1425 SkASSERT(T <= 64);
1426
1427 return T;
1428 }
1429
1430 // Returns the weight at the associated index. If the index is out of bounds, it
1431 // returns zero. It also chooses the weight appropriately based on the given dual
1432 // plane.
getWeightASTCDecompressionData1433 int getWeight(const int* unquantizedWeights, int idx, bool dualPlane) const {
1434 const int maxIdx = (fDualPlaneEnabled ? 2 : 1) * fWeightDimX * fWeightDimY - 1;
1435 if (fDualPlaneEnabled) {
1436 const int effectiveIdx = 2*idx + (dualPlane ? 1 : 0);
1437 if (effectiveIdx > maxIdx) {
1438 return 0;
1439 }
1440 return unquantizedWeights[effectiveIdx];
1441 }
1442
1443 SkASSERT(!dualPlane);
1444
1445 if (idx > maxIdx) {
1446 return 0;
1447 } else {
1448 return unquantizedWeights[idx];
1449 }
1450 }
1451
1452 // This computes the effective weight at location (s, t) of the block. This
1453 // weight is computed by sampling the texel weight grid (it's usually not 1-1), and
1454 // then applying a bilerp. The algorithm outlined here follows the algorithm
1455 // defined in section C.2.18 of the ASTC spec.
infillWeightASTCDecompressionData1456 int infillWeight(const int* unquantizedValues, int s, int t, bool dualPlane) const {
1457 const int Ds = (1024 + fDimX/2) / (fDimX - 1);
1458 const int Dt = (1024 + fDimY/2) / (fDimY - 1);
1459
1460 const int cs = Ds * s;
1461 const int ct = Dt * t;
1462
1463 const int gs = (cs*(fWeightDimX - 1) + 32) >> 6;
1464 const int gt = (ct*(fWeightDimY - 1) + 32) >> 6;
1465
1466 const int js = gs >> 4;
1467 const int jt = gt >> 4;
1468
1469 const int fs = gs & 0xF;
1470 const int ft = gt & 0xF;
1471
1472 const int idx = js + jt*fWeightDimX;
1473 const int p00 = this->getWeight(unquantizedValues, idx, dualPlane);
1474 const int p01 = this->getWeight(unquantizedValues, idx + 1, dualPlane);
1475 const int p10 = this->getWeight(unquantizedValues, idx + fWeightDimX, dualPlane);
1476 const int p11 = this->getWeight(unquantizedValues, idx + fWeightDimX + 1, dualPlane);
1477
1478 const int w11 = (fs*ft + 8) >> 4;
1479 const int w10 = ft - w11;
1480 const int w01 = fs - w11;
1481 const int w00 = 16 - fs - ft + w11;
1482
1483 const int weight = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1484 SkASSERT(weight <= 64);
1485 return weight;
1486 }
1487
1488 // Unquantizes the decoded texel weights as described in section C.2.17 of
1489 // the ASTC specification. Additionally, it populates texelWeights with
1490 // the expanded weight grid, which is computed according to section C.2.18
texelWeightsASTCDecompressionData1491 void texelWeights(int texelWeights[2][12][12], const int* texelValues) const {
1492 // Unquantized texel weights...
1493 int unquantizedValues[144*2]; // 12x12 blocks with dual plane decoding...
1494 SkASSERT(this->numWeights() <= 144*2);
1495
1496 // Unquantize the weights and cache them
1497 for (int j = 0; j < this->numWeights(); ++j) {
1498 unquantizedValues[j] = this->unquantizeWeight(texelValues[j]);
1499 }
1500
1501 // Do weight infill...
1502 for (int y = 0; y < fDimY; ++y) {
1503 for (int x = 0; x < fDimX; ++x) {
1504 texelWeights[0][x][y] = this->infillWeight(unquantizedValues, x, y, false);
1505 if (fDualPlaneEnabled) {
1506 texelWeights[1][x][y] = this->infillWeight(unquantizedValues, x, y, true);
1507 }
1508 }
1509 }
1510 }
1511
1512 // Returns the partition for the texel located at position (x, y).
1513 // Adapted from C.2.21 of the ASTC specification
getPartitionASTCDecompressionData1514 int getPartition(int x, int y) const {
1515 const int partitionCount = this->numPartitions();
1516 int seed = fPartIndex;
1517 if ((fDimX * fDimY) < 31) {
1518 x <<= 1;
1519 y <<= 1;
1520 }
1521
1522 seed += (partitionCount - 1) * 1024;
1523
1524 uint32_t p = seed;
1525 p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
1526 p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
1527 p ^= p << 6; p ^= p >> 17;
1528
1529 uint32_t rnum = p;
1530 uint8_t seed1 = rnum & 0xF;
1531 uint8_t seed2 = (rnum >> 4) & 0xF;
1532 uint8_t seed3 = (rnum >> 8) & 0xF;
1533 uint8_t seed4 = (rnum >> 12) & 0xF;
1534 uint8_t seed5 = (rnum >> 16) & 0xF;
1535 uint8_t seed6 = (rnum >> 20) & 0xF;
1536 uint8_t seed7 = (rnum >> 24) & 0xF;
1537 uint8_t seed8 = (rnum >> 28) & 0xF;
1538 uint8_t seed9 = (rnum >> 18) & 0xF;
1539 uint8_t seed10 = (rnum >> 22) & 0xF;
1540 uint8_t seed11 = (rnum >> 26) & 0xF;
1541 uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
1542
1543 seed1 *= seed1; seed2 *= seed2;
1544 seed3 *= seed3; seed4 *= seed4;
1545 seed5 *= seed5; seed6 *= seed6;
1546 seed7 *= seed7; seed8 *= seed8;
1547 seed9 *= seed9; seed10 *= seed10;
1548 seed11 *= seed11; seed12 *= seed12;
1549
1550 int sh1, sh2, sh3;
1551 if (0 != (seed & 1)) {
1552 sh1 = (0 != (seed & 2))? 4 : 5;
1553 sh2 = (partitionCount == 3)? 6 : 5;
1554 } else {
1555 sh1 = (partitionCount==3)? 6 : 5;
1556 sh2 = (0 != (seed & 2))? 4 : 5;
1557 }
1558 sh3 = (0 != (seed & 0x10))? sh1 : sh2;
1559
1560 seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
1561 seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
1562 seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
1563
1564 const int z = 0;
1565 int a = seed1*x + seed2*y + seed11*z + (rnum >> 14);
1566 int b = seed3*x + seed4*y + seed12*z + (rnum >> 10);
1567 int c = seed5*x + seed6*y + seed9 *z + (rnum >> 6);
1568 int d = seed7*x + seed8*y + seed10*z + (rnum >> 2);
1569
1570 a &= 0x3F;
1571 b &= 0x3F;
1572 c &= 0x3F;
1573 d &= 0x3F;
1574
1575 if (partitionCount < 4) {
1576 d = 0;
1577 }
1578
1579 if (partitionCount < 3) {
1580 c = 0;
1581 }
1582
1583 if (a >= b && a >= c && a >= d) {
1584 return 0;
1585 } else if (b >= c && b >= d) {
1586 return 1;
1587 } else if (c >= d) {
1588 return 2;
1589 } else {
1590 return 3;
1591 }
1592 }
1593
1594 // Performs the proper interpolation of the texel based on the
1595 // endpoints and weights.
getTexelASTCDecompressionData1596 SkColor getTexel(const SkColor endpoints[4][2],
1597 const int weights[2][12][12],
1598 int x, int y) const {
1599 int part = 0;
1600 if (this->numPartitions() > 1) {
1601 part = this->getPartition(x, y);
1602 }
1603
1604 SkColor result;
1605 if (fDualPlaneEnabled) {
1606 result = interpolate_dual_endpoints(
1607 endpoints[part], weights[0][x][y], weights[1][x][y], fDualPlane);
1608 } else {
1609 result = interpolate_endpoints(endpoints[part], weights[0][x][y]);
1610 }
1611
1612 #if 1
1613 // !FIXME! if we're writing directly to a bitmap, then we don't need
1614 // to swap the red and blue channels, but since we're usually being used
1615 // by the SkImageDecoder_astc module, the results are expected to be in RGBA.
1616 result = SkColorSetARGB(
1617 SkColorGetA(result), SkColorGetB(result), SkColorGetG(result), SkColorGetR(result));
1618 #endif
1619
1620 return result;
1621 }
1622
decodeASTCDecompressionData1623 void decode() {
1624 // First decode the block mode.
1625 this->decodeBlockMode();
1626
1627 // Now we can decode the partition information.
1628 fPartIndex = static_cast<int>(read_astc_bits(fBlock, 11, 23));
1629 fPartCount = (fPartIndex & 0x3) + 1;
1630 fPartIndex >>= 2;
1631
1632 // This is illegal
1633 if (fDualPlaneEnabled && this->numPartitions() == 4) {
1634 fError = true;
1635 return;
1636 }
1637
1638 // Based on the partition info, we can decode the color information.
1639 this->decodeColorData();
1640 }
1641
1642 // Decodes the dual plane based on the given bit location. The final
1643 // location, if the dual plane is enabled, is also the end of our color data.
1644 // This function is only meant to be used from this->decodeColorData()
decodeDualPlaneASTCDecompressionData1645 void decodeDualPlane(int bitLoc) {
1646 if (fDualPlaneEnabled) {
1647 fDualPlane = static_cast<int>(read_astc_bits(fBlock, bitLoc - 2, bitLoc));
1648 fColorEndBit = bitLoc - 2;
1649 } else {
1650 fColorEndBit = bitLoc;
1651 }
1652 }
1653
1654 // Decodes the color information based on the ASTC spec.
decodeColorDataASTCDecompressionData1655 void decodeColorData() {
1656
1657 // By default, the last color bit is at the end of the texel weights
1658 const int lastWeight = 128 - this->numWeightBits();
1659
1660 // If we have a dual plane then it will be at this location, too.
1661 int dualPlaneBitLoc = lastWeight;
1662
1663 // If there's only one partition, then our job is (relatively) easy.
1664 if (this->numPartitions() == 1) {
1665 fCEM[0] = static_cast<ColorEndpointMode>(read_astc_bits(fBlock, 13, 17));
1666 fColorStartBit = 17;
1667
1668 // Handle dual plane mode...
1669 this->decodeDualPlane(dualPlaneBitLoc);
1670
1671 return;
1672 }
1673
1674 // If we have more than one partition, then we need to make
1675 // room for the partition index.
1676 fColorStartBit = 29;
1677
1678 // Read the base CEM. If it's zero, then we have no additional
1679 // CEM data and the endpoints for each partition share the same CEM.
1680 const int baseCEM = static_cast<int>(read_astc_bits(fBlock, 23, 25));
1681 if (0 == baseCEM) {
1682
1683 const ColorEndpointMode sameCEM =
1684 static_cast<ColorEndpointMode>(read_astc_bits(fBlock, 25, 29));
1685
1686 for (int i = 0; i < kMaxPartitions; ++i) {
1687 fCEM[i] = sameCEM;
1688 }
1689
1690 // Handle dual plane mode...
1691 this->decodeDualPlane(dualPlaneBitLoc);
1692
1693 return;
1694 }
1695
1696 // Move the dual plane selector bits down based on how many
1697 // partitions the block contains.
1698 switch (this->numPartitions()) {
1699 case 2:
1700 dualPlaneBitLoc -= 2;
1701 break;
1702
1703 case 3:
1704 dualPlaneBitLoc -= 5;
1705 break;
1706
1707 case 4:
1708 dualPlaneBitLoc -= 8;
1709 break;
1710
1711 default:
1712 SkDEBUGFAIL("Internal ASTC decoding error.");
1713 break;
1714 }
1715
1716 // The rest of the CEM config will be between the dual plane bit selector
1717 // and the texel weight grid.
1718 const int lowCEM = static_cast<int>(read_astc_bits(fBlock, 23, 29));
1719 SkASSERT(lastWeight >= dualPlaneBitLoc);
1720 SkASSERT(lastWeight - dualPlaneBitLoc < 31);
1721 int fullCEM = static_cast<int>(read_astc_bits(fBlock, dualPlaneBitLoc, lastWeight));
1722
1723 // Attach the config at the end of the weight grid to the CEM values
1724 // in the beginning of the block.
1725 fullCEM = (fullCEM << 6) | lowCEM;
1726
1727 // Ignore the two least significant bits, since those are our baseCEM above.
1728 fullCEM = fullCEM >> 2;
1729
1730 int C[kMaxPartitions]; // Next, decode C and M from the spec (Table C.2.12)
1731 for (int i = 0; i < this->numPartitions(); ++i) {
1732 C[i] = fullCEM & 1;
1733 fullCEM = fullCEM >> 1;
1734 }
1735
1736 int M[kMaxPartitions];
1737 for (int i = 0; i < this->numPartitions(); ++i) {
1738 M[i] = fullCEM & 0x3;
1739 fullCEM = fullCEM >> 2;
1740 }
1741
1742 // Construct our CEMs..
1743 SkASSERT(baseCEM > 0);
1744 for (int i = 0; i < this->numPartitions(); ++i) {
1745 int cem = (baseCEM - 1) * 4;
1746 cem += (0 == C[i])? 0 : 4;
1747 cem += M[i];
1748
1749 SkASSERT(cem < 16);
1750 fCEM[i] = static_cast<ColorEndpointMode>(cem);
1751 }
1752
1753 // Finally, if we have dual plane mode, then read the plane selector.
1754 this->decodeDualPlane(dualPlaneBitLoc);
1755 }
1756
1757 // Decodes the block mode. This function determines whether or not we use
1758 // dual plane encoding, the size of the texel weight grid, and the number of
1759 // bits, trits and quints that are used to encode it. For more information,
1760 // see section C.2.10 of the ASTC spec.
1761 //
1762 // For 2D blocks, the Block Mode field is laid out as follows:
1763 //
1764 // -------------------------------------------------------------------------
1765 // 10 9 8 7 6 5 4 3 2 1 0 Width Height Notes
1766 // -------------------------------------------------------------------------
1767 // D H B A R0 0 0 R2 R1 B+4 A+2
1768 // D H B A R0 0 1 R2 R1 B+8 A+2
1769 // D H B A R0 1 0 R2 R1 A+2 B+8
1770 // D H 0 B A R0 1 1 R2 R1 A+2 B+6
1771 // D H 1 B A R0 1 1 R2 R1 B+2 A+2
1772 // D H 0 0 A R0 R2 R1 0 0 12 A+2
1773 // D H 0 1 A R0 R2 R1 0 0 A+2 12
1774 // D H 1 1 0 0 R0 R2 R1 0 0 6 10
1775 // D H 1 1 0 1 R0 R2 R1 0 0 10 6
1776 // B 1 0 A R0 R2 R1 0 0 A+6 B+6 D=0, H=0
1777 // x x 1 1 1 1 1 1 1 0 0 - - Void-extent
1778 // x x 1 1 1 x x x x 0 0 - - Reserved*
1779 // x x x x x x x 0 0 0 0 - - Reserved
1780 // -------------------------------------------------------------------------
1781 //
1782 // D - dual plane enabled
1783 // H, R - used to determine the number of bits/trits/quints in texel weight encoding
1784 // R is a three bit value whose LSB is R0 and MSB is R1
1785 // Width, Height - dimensions of the texel weight grid (determined by A and B)
1786
decodeBlockModeASTCDecompressionData1787 void decodeBlockMode() {
1788 const int blockMode = static_cast<int>(read_astc_bits(fBlock, 0, 11));
1789
1790 // Check for special void extent encoding
1791 fVoidExtent = (blockMode & 0x1FF) == 0x1FC;
1792
1793 // Check for reserved block modes
1794 fError = ((blockMode & 0x1C3) == 0x1C0) || ((blockMode & 0xF) == 0);
1795
1796 // Neither reserved nor void-extent, decode as usual
1797 // This code corresponds to table C.2.8 of the ASTC spec
1798 bool highPrecision = false;
1799 int R = 0;
1800 if ((blockMode & 0x3) == 0) {
1801 R = ((0xC & blockMode) >> 1) | ((0x10 & blockMode) >> 4);
1802 const int bitsSevenAndEight = (blockMode & 0x180) >> 7;
1803 SkASSERT(0 <= bitsSevenAndEight && bitsSevenAndEight < 4);
1804
1805 const int A = (blockMode >> 5) & 0x3;
1806 const int B = (blockMode >> 9) & 0x3;
1807
1808 fDualPlaneEnabled = (blockMode >> 10) & 0x1;
1809 highPrecision = (blockMode >> 9) & 0x1;
1810
1811 switch (bitsSevenAndEight) {
1812 default:
1813 case 0:
1814 fWeightDimX = 12;
1815 fWeightDimY = A + 2;
1816 break;
1817
1818 case 1:
1819 fWeightDimX = A + 2;
1820 fWeightDimY = 12;
1821 break;
1822
1823 case 2:
1824 fWeightDimX = A + 6;
1825 fWeightDimY = B + 6;
1826 fDualPlaneEnabled = false;
1827 highPrecision = false;
1828 break;
1829
1830 case 3:
1831 if (0 == A) {
1832 fWeightDimX = 6;
1833 fWeightDimY = 10;
1834 } else {
1835 fWeightDimX = 10;
1836 fWeightDimY = 6;
1837 }
1838 break;
1839 }
1840 } else { // (blockMode & 0x3) != 0
1841 R = ((blockMode & 0x3) << 1) | ((blockMode & 0x10) >> 4);
1842
1843 const int bitsTwoAndThree = (blockMode >> 2) & 0x3;
1844 SkASSERT(0 <= bitsTwoAndThree && bitsTwoAndThree < 4);
1845
1846 const int A = (blockMode >> 5) & 0x3;
1847 const int B = (blockMode >> 7) & 0x3;
1848
1849 fDualPlaneEnabled = (blockMode >> 10) & 0x1;
1850 highPrecision = (blockMode >> 9) & 0x1;
1851
1852 switch (bitsTwoAndThree) {
1853 case 0:
1854 fWeightDimX = B + 4;
1855 fWeightDimY = A + 2;
1856 break;
1857 case 1:
1858 fWeightDimX = B + 8;
1859 fWeightDimY = A + 2;
1860 break;
1861 case 2:
1862 fWeightDimX = A + 2;
1863 fWeightDimY = B + 8;
1864 break;
1865 case 3:
1866 if ((B & 0x2) == 0) {
1867 fWeightDimX = A + 2;
1868 fWeightDimY = (B & 1) + 6;
1869 } else {
1870 fWeightDimX = (B & 1) + 2;
1871 fWeightDimY = A + 2;
1872 }
1873 break;
1874 }
1875 }
1876
1877 // We should have set the values of R and highPrecision
1878 // from decoding the block mode, these are used to determine
1879 // the proper dimensions of our weight grid.
1880 if ((R & 0x6) == 0) {
1881 fError = true;
1882 } else {
1883 static const int kBitAllocationTable[2][6][3] = {
1884 {
1885 { 1, 0, 0 },
1886 { 0, 1, 0 },
1887 { 2, 0, 0 },
1888 { 0, 0, 1 },
1889 { 1, 1, 0 },
1890 { 3, 0, 0 }
1891 },
1892 {
1893 { 1, 0, 1 },
1894 { 2, 1, 0 },
1895 { 4, 0, 0 },
1896 { 2, 0, 1 },
1897 { 3, 1, 0 },
1898 { 5, 0, 0 }
1899 }
1900 };
1901
1902 fWeightBits = kBitAllocationTable[highPrecision][R - 2][0];
1903 fWeightTrits = kBitAllocationTable[highPrecision][R - 2][1];
1904 fWeightQuints = kBitAllocationTable[highPrecision][R - 2][2];
1905 }
1906 }
1907 };
1908
1909 // Reads an ASTC block from the given pointer.
read_astc_block(ASTCDecompressionData * dst,const uint8_t * src)1910 static inline void read_astc_block(ASTCDecompressionData *dst, const uint8_t* src) {
1911 const uint64_t* qword = reinterpret_cast<const uint64_t*>(src);
1912 dst->fBlock.fLow = SkEndian_SwapLE64(qword[0]);
1913 dst->fBlock.fHigh = SkEndian_SwapLE64(qword[1]);
1914 dst->decode();
1915 }
1916
1917 // Take a known void-extent block, and write out the values as a constant color.
decompress_void_extent(uint8_t * dst,int dstRowBytes,const ASTCDecompressionData & data)1918 static void decompress_void_extent(uint8_t* dst, int dstRowBytes,
1919 const ASTCDecompressionData &data) {
1920 // The top 64 bits contain 4 16-bit RGBA values.
1921 int a = (static_cast<int>(read_astc_bits(data.fBlock, 112, 128)) + 255) >> 8;
1922 int b = (static_cast<int>(read_astc_bits(data.fBlock, 96, 112)) + 255) >> 8;
1923 int g = (static_cast<int>(read_astc_bits(data.fBlock, 80, 96)) + 255) >> 8;
1924 int r = (static_cast<int>(read_astc_bits(data.fBlock, 64, 80)) + 255) >> 8;
1925
1926 write_constant_color(dst, data.fDimX, data.fDimY, dstRowBytes, SkColorSetARGB(a, r, g, b));
1927 }
1928
1929 // Decompresses a single ASTC block. It's assumed that data.fDimX and data.fDimY are
1930 // set and that the block has already been decoded (i.e. data.decode() has been called)
decompress_astc_block(uint8_t * dst,int dstRowBytes,const ASTCDecompressionData & data)1931 static void decompress_astc_block(uint8_t* dst, int dstRowBytes,
1932 const ASTCDecompressionData &data) {
1933 if (data.fError) {
1934 write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
1935 return;
1936 }
1937
1938 if (data.fVoidExtent) {
1939 decompress_void_extent(dst, dstRowBytes, data);
1940 return;
1941 }
1942
1943 // According to the spec, any more than 64 values is illegal. (C.2.24)
1944 static const int kMaxTexelValues = 64;
1945
1946 // Decode the texel weights.
1947 int texelValues[kMaxTexelValues];
1948 bool success = decode_integer_sequence(
1949 texelValues, kMaxTexelValues, data.numWeights(),
1950 // texel data goes to the end of the 128 bit block.
1951 data.fBlock, 128, 128 - data.numWeightBits(), false,
1952 data.fWeightBits, data.fWeightTrits, data.fWeightQuints);
1953
1954 if (!success) {
1955 write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
1956 return;
1957 }
1958
1959 // Decode the color endpoints
1960 int colorBits, colorTrits, colorQuints;
1961 if (!data.getColorValueEncoding(&colorBits, &colorTrits, &colorQuints)) {
1962 write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
1963 return;
1964 }
1965
1966 // According to the spec, any more than 18 color values is illegal. (C.2.24)
1967 static const int kMaxColorValues = 18;
1968
1969 int colorValues[kMaxColorValues];
1970 success = decode_integer_sequence(
1971 colorValues, kMaxColorValues, data.numColorValues(),
1972 data.fBlock, data.fColorStartBit, data.fColorEndBit, true,
1973 colorBits, colorTrits, colorQuints);
1974
1975 if (!success) {
1976 write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
1977 return;
1978 }
1979
1980 // Unquantize the color values after they've been decoded.
1981 unquantize_colors(colorValues, data.numColorValues(), colorBits, colorTrits, colorQuints);
1982
1983 // Decode the colors into the appropriate endpoints.
1984 SkColor endpoints[4][2];
1985 data.colorEndpoints(endpoints, colorValues);
1986
1987 // Do texel infill and decode the texel values.
1988 int texelWeights[2][12][12];
1989 data.texelWeights(texelWeights, texelValues);
1990
1991 // Write the texels by interpolating them based on the information
1992 // stored in the block.
1993 dst += data.fDimY * dstRowBytes;
1994 for (int y = 0; y < data.fDimY; ++y) {
1995 dst -= dstRowBytes;
1996 SkColor* colorPtr = reinterpret_cast<SkColor*>(dst);
1997 for (int x = 0; x < data.fDimX; ++x) {
1998 colorPtr[x] = data.getTexel(endpoints, texelWeights, x, y);
1999 }
2000 }
2001 }
2002
2003 ////////////////////////////////////////////////////////////////////////////////
2004 //
2005 // ASTC Comrpession Struct
2006 //
2007 ////////////////////////////////////////////////////////////////////////////////
2008
2009 // This is the type passed as the CompressorType argument of the compressed
2010 // blitter for the ASTC format. The static functions required to be in this
2011 // struct are documented in SkTextureCompressor_Blitter.h
2012 struct CompressorASTC {
CompressA8VerticalCompressorASTC2013 static inline void CompressA8Vertical(uint8_t* dst, const uint8_t* src) {
2014 compress_a8_astc_block<GetAlphaTranspose>(&dst, src, 12);
2015 }
2016
CompressA8HorizontalCompressorASTC2017 static inline void CompressA8Horizontal(uint8_t* dst, const uint8_t* src,
2018 int srcRowBytes) {
2019 compress_a8_astc_block<GetAlpha>(&dst, src, srcRowBytes);
2020 }
2021
2022 #if PEDANTIC_BLIT_RECT
UpdateBlockCompressorASTC2023 static inline void UpdateBlock(uint8_t* dst, const uint8_t* src, int srcRowBytes,
2024 const uint8_t* mask) {
2025 // TODO: krajcevski
2026 // This is kind of difficult for ASTC because the weight values are calculated
2027 // as an average of the actual weights. The best we can do is decompress the
2028 // weights and recalculate them based on the new texel values. This should
2029 // be "not too bad" since we know that anytime we hit this function, we're
2030 // compressing 12x12 block dimension alpha-only, and we know the layout
2031 // of the block
2032 SkFAIL("Implement me!");
2033 }
2034 #endif
2035 };
2036
2037 ////////////////////////////////////////////////////////////////////////////////
2038
2039 namespace SkTextureCompressor {
2040
CompressA8To12x12ASTC(uint8_t * dst,const uint8_t * src,int width,int height,size_t rowBytes)2041 bool CompressA8To12x12ASTC(uint8_t* dst, const uint8_t* src,
2042 int width, int height, size_t rowBytes) {
2043 if (width < 0 || ((width % 12) != 0) || height < 0 || ((height % 12) != 0)) {
2044 return false;
2045 }
2046
2047 uint8_t** dstPtr = &dst;
2048 for (int y = 0; y < height; y += 12) {
2049 for (int x = 0; x < width; x += 12) {
2050 compress_a8_astc_block<GetAlpha>(dstPtr, src + y*rowBytes + x, rowBytes);
2051 }
2052 }
2053
2054 return true;
2055 }
2056
CreateASTCBlitter(int width,int height,void * outputBuffer,SkTBlitterAllocator * allocator)2057 SkBlitter* CreateASTCBlitter(int width, int height, void* outputBuffer,
2058 SkTBlitterAllocator* allocator) {
2059 if ((width % 12) != 0 || (height % 12) != 0) {
2060 return nullptr;
2061 }
2062
2063 // Memset the output buffer to an encoding that decodes to zero. We must do this
2064 // in order to avoid having uninitialized values in the buffer if the blitter
2065 // decides not to write certain scanlines (and skip entire rows of blocks).
2066 // In the case of ASTC, if everything index is zero, then the interpolated value
2067 // will decode to zero provided we have the right header. We use the encoding
2068 // from recognizing all zero blocks from above.
2069 const int nBlocks = (width * height / 144);
2070 uint8_t *dst = reinterpret_cast<uint8_t *>(outputBuffer);
2071 for (int i = 0; i < nBlocks; ++i) {
2072 send_packing(&dst, SkTEndian_SwapLE64(0x0000000001FE000173ULL), 0);
2073 }
2074
2075 return allocator->createT<
2076 SkTCompressedAlphaBlitter<12, 16, CompressorASTC>, int, int, void* >
2077 (width, height, outputBuffer);
2078 }
2079
DecompressASTC(uint8_t * dst,int dstRowBytes,const uint8_t * src,int width,int height,int blockDimX,int blockDimY)2080 void DecompressASTC(uint8_t* dst, int dstRowBytes, const uint8_t* src,
2081 int width, int height, int blockDimX, int blockDimY) {
2082 // ASTC is encoded in what they call "raster order", so that the first
2083 // block is the bottom-left block in the image, and the first pixel
2084 // is the bottom-left pixel of the image
2085 dst += height * dstRowBytes;
2086
2087 ASTCDecompressionData data(blockDimX, blockDimY);
2088 for (int y = 0; y < height; y += blockDimY) {
2089 dst -= blockDimY * dstRowBytes;
2090 SkColor *colorPtr = reinterpret_cast<SkColor*>(dst);
2091 for (int x = 0; x < width; x += blockDimX) {
2092 read_astc_block(&data, src);
2093 decompress_astc_block(reinterpret_cast<uint8_t*>(colorPtr + x), dstRowBytes, data);
2094
2095 // ASTC encoded blocks are 16 bytes (128 bits) large.
2096 src += 16;
2097 }
2098 }
2099 }
2100
2101 } // SkTextureCompressor
2102