1 /*
2  * Copyright (C) 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /*
25  * Included by texcompress_bptc and gallium to define BPTC decoding routines.
26  */
27 
28 #ifndef TEXCOMPRESS_BPTC_TMP_H
29 #define TEXCOMPRESS_BPTC_TMP_H
30 
31 #include "util/format_srgb.h"
32 #include "util/half_float.h"
33 #include "macros.h"
34 
35 #define BLOCK_SIZE 4
36 #define N_PARTITIONS 64
37 #define BLOCK_BYTES 16
38 
39 struct bptc_unorm_mode {
40    int n_subsets;
41    int n_partition_bits;
42    bool has_rotation_bits;
43    bool has_index_selection_bit;
44    int n_color_bits;
45    int n_alpha_bits;
46    bool has_endpoint_pbits;
47    bool has_shared_pbits;
48    int n_index_bits;
49    int n_secondary_index_bits;
50 };
51 
52 struct bptc_float_bitfield {
53    int8_t endpoint;
54    uint8_t component;
55    uint8_t offset;
56    uint8_t n_bits;
57    bool reverse;
58 };
59 
60 struct bptc_float_mode {
61    bool reserved;
62    bool transformed_endpoints;
63    int n_partition_bits;
64    int n_endpoint_bits;
65    int n_index_bits;
66    int n_delta_bits[3];
67    struct bptc_float_bitfield bitfields[24];
68 };
69 
70 struct bit_writer {
71    uint8_t buf;
72    int pos;
73    uint8_t *dst;
74 };
75 
76 static const struct bptc_unorm_mode
77 bptc_unorm_modes[] = {
78    /* 0 */ { 3, 4, false, false, 4, 0, true,  false, 3, 0 },
79    /* 1 */ { 2, 6, false, false, 6, 0, false, true,  3, 0 },
80    /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
81    /* 3 */ { 2, 6, false, false, 7, 0, true,  false, 2, 0 },
82    /* 4 */ { 1, 0, true,  true,  5, 6, false, false, 2, 3 },
83    /* 5 */ { 1, 0, true,  false, 7, 8, false, false, 2, 2 },
84    /* 6 */ { 1, 0, false, false, 7, 7, true,  false, 4, 0 },
85    /* 7 */ { 2, 6, false, false, 5, 5, true,  false, 2, 0 }
86 };
87 
88 static const struct bptc_float_mode
89 bptc_float_modes[] = {
90    /* 00 */
91    { false, true, 5, 10, 3, { 5, 5, 5 },
92      { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
93        { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
94        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
95        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
96        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
97        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
98        { 3, 2, 3, 1, false },
99        { -1 } }
100    },
101    /* 01 */
102    { false, true, 5, 7, 3, { 6, 6, 6 },
103      { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
104        { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
105        { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
106        { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
107        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
108        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
109        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
110        { 2, 0, 0, 6, false },
111        { 3, 0, 0, 6, false },
112        { -1 } }
113    },
114    /* 00010 */
115    { false, true, 5, 11, 3, { 5, 4, 4 },
116      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
117        { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
118        { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
119        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
120        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
121        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
122        { -1 } }
123    },
124    /* 00011 */
125    { false, false, 0, 10, 4, { 10, 10, 10 },
126      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
127        { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
128        { -1 } }
129    },
130    /* 00110 */
131    { false, true, 5, 11, 3, { 4, 5, 4 },
132      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
133        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
134        { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
135        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
136        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
137        { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
138        { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
139        { -1 } }
140    },
141    /* 00111 */
142    { false, true, 0, 11, 4, { 9, 9, 9 },
143      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
144        { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
145        { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
146        { -1 } }
147    },
148    /* 01010 */
149    { false, true, 5, 11, 3, { 4, 4, 5 },
150      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
151        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
152        { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
153        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
154        { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
155        { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
156        { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
157        { -1 } }
158    },
159    /* 01011 */
160    { false, true, 0, 12, 4, { 8, 8, 8 },
161      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
162        { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
163        { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
164        { -1 } }
165    },
166    /* 01110 */
167    { false, true, 5, 9, 3, { 5, 5, 5 },
168      { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
169        { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
170        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
171        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
172        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
173        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
174        { 3, 2, 3, 1, false },
175        { -1 } }
176    },
177    /* 01111 */
178    { false, true, 0, 16, 4, { 4, 4, 4 },
179      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
180        { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
181        { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
182        { -1 } }
183    },
184    /* 10010 */
185    { false, true, 5, 8, 3, { 6, 5, 5 },
186      { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
187        { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
188        { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
189        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
190        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
191        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
192        { 3, 0, 0, 6, false },
193        { -1 } }
194    },
195    /* 10011 */
196    { true /* reserved */ },
197    /* 10110 */
198    { false, true, 5, 8, 3, { 5, 6, 5 },
199      { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
200        { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
201        { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
202        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
203        { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
204        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
205        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
206        { -1 } }
207    },
208    /* 10111 */
209    { true /* reserved */ },
210    /* 11010 */
211    { false, true, 5, 8, 3, { 5, 5, 6 },
212      { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
213        { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
214        { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
215        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
216        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
217        { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
218        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
219        { -1 } }
220    },
221    /* 11011 */
222    { true /* reserved */ },
223    /* 11110 */
224    { false, false, 5, 6, 3, { 6, 6, 6 },
225      { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
226        { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
227        { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
228        { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
229        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
230        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
231        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
232        { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
233        { -1 } }
234    },
235    /* 11111 */
236    { true /* reserved */ },
237 };
238 
239 /* This partition table is used when the mode has two subsets. Each
240  * partition is represented by a 32-bit value which gives 2 bits per texel
241  * within the block. The value of the two bits represents which subset to use
242  * (0 or 1).
243  */
244 static const uint32_t
245 partition_table1[N_PARTITIONS] = {
246    0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
247    0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
248    0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
249    0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
250    0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
251    0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
252    0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
253    0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
254    0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
255    0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
256    0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
257    0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
258    0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
259    0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
260    0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
261    0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
262 };
263 
264 /* This partition table is used when the mode has three subsets. In this case
265  * the values can be 0, 1 or 2.
266  */
267 static const uint32_t
268 partition_table2[N_PARTITIONS] = {
269    0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
270    0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
271    0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
272    0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
273    0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
274    0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
275    0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
276    0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
277    0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
278    0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
279    0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
280    0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
281    0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
282    0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
283    0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
284    0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
285 };
286 
287 static const uint8_t
288 anchor_indices[][N_PARTITIONS] = {
289    /* Anchor index values for the second subset of two-subset partitioning */
290    {
291       0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
292       0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
293       0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
294       0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
295    },
296 
297    /* Anchor index values for the second subset of three-subset partitioning */
298    {
299       0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
300       0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
301       0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
302       0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
303    },
304 
305    /* Anchor index values for the third subset of three-subset
306     * partitioning
307     */
308    {
309       0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
310       0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
311       0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
312       0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
313    }
314 };
315 
316 static int
extract_bits(const uint8_t * block,int offset,int n_bits)317 extract_bits(const uint8_t *block,
318              int offset,
319              int n_bits)
320 {
321    int byte_index = offset / 8;
322    int bit_index = offset % 8;
323    int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
324    int result = 0;
325    int bit = 0;
326 
327    while (true) {
328       result |= ((block[byte_index] >> bit_index) &
329                  ((1 << n_bits_in_byte) - 1)) << bit;
330 
331       n_bits -= n_bits_in_byte;
332 
333       if (n_bits <= 0)
334          return result;
335 
336       bit += n_bits_in_byte;
337       byte_index++;
338       bit_index = 0;
339       n_bits_in_byte = MIN2(n_bits, 8);
340    }
341 }
342 
343 static uint8_t
expand_component(uint8_t byte,int n_bits)344 expand_component(uint8_t byte,
345                  int n_bits)
346 {
347    /* Expands a n-bit quantity into a byte by copying the most-significant
348     * bits into the unused least-significant bits.
349     */
350    return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
351 }
352 
353 static int
extract_unorm_endpoints(const struct bptc_unorm_mode * mode,const uint8_t * block,int bit_offset,uint8_t endpoints[][4])354 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
355                         const uint8_t *block,
356                         int bit_offset,
357                         uint8_t endpoints[][4])
358 {
359    int component;
360    int subset;
361    int endpoint;
362    int pbit;
363    int n_components;
364 
365    /* Extract each color component */
366    for (component = 0; component < 3; component++) {
367       for (subset = 0; subset < mode->n_subsets; subset++) {
368          for (endpoint = 0; endpoint < 2; endpoint++) {
369             endpoints[subset * 2 + endpoint][component] =
370                extract_bits(block, bit_offset, mode->n_color_bits);
371             bit_offset += mode->n_color_bits;
372          }
373       }
374    }
375 
376    /* Extract the alpha values */
377    if (mode->n_alpha_bits > 0) {
378       for (subset = 0; subset < mode->n_subsets; subset++) {
379          for (endpoint = 0; endpoint < 2; endpoint++) {
380             endpoints[subset * 2 + endpoint][3] =
381                extract_bits(block, bit_offset, mode->n_alpha_bits);
382             bit_offset += mode->n_alpha_bits;
383          }
384       }
385 
386       n_components = 4;
387    } else {
388       for (subset = 0; subset < mode->n_subsets; subset++)
389          for (endpoint = 0; endpoint < 2; endpoint++)
390             endpoints[subset * 2 + endpoint][3] = 255;
391 
392       n_components = 3;
393    }
394 
395    /* Add in the p-bits */
396    if (mode->has_endpoint_pbits) {
397       for (subset = 0; subset < mode->n_subsets; subset++) {
398          for (endpoint = 0; endpoint < 2; endpoint++) {
399             pbit = extract_bits(block, bit_offset, 1);
400             bit_offset += 1;
401 
402             for (component = 0; component < n_components; component++) {
403                endpoints[subset * 2 + endpoint][component] <<= 1;
404                endpoints[subset * 2 + endpoint][component] |= pbit;
405             }
406          }
407       }
408    } else if (mode->has_shared_pbits) {
409       for (subset = 0; subset < mode->n_subsets; subset++) {
410          pbit = extract_bits(block, bit_offset, 1);
411          bit_offset += 1;
412 
413          for (endpoint = 0; endpoint < 2; endpoint++) {
414             for (component = 0; component < n_components; component++) {
415                endpoints[subset * 2 + endpoint][component] <<= 1;
416                endpoints[subset * 2 + endpoint][component] |= pbit;
417             }
418          }
419       }
420    }
421 
422    /* Expand the n-bit values to a byte */
423    for (subset = 0; subset < mode->n_subsets; subset++) {
424       for (endpoint = 0; endpoint < 2; endpoint++) {
425          for (component = 0; component < 3; component++) {
426             endpoints[subset * 2 + endpoint][component] =
427                expand_component(endpoints[subset * 2 + endpoint][component],
428                                 mode->n_color_bits +
429                                 mode->has_endpoint_pbits +
430                                 mode->has_shared_pbits);
431          }
432 
433          if (mode->n_alpha_bits > 0) {
434             endpoints[subset * 2 + endpoint][3] =
435                expand_component(endpoints[subset * 2 + endpoint][3],
436                                 mode->n_alpha_bits +
437                                 mode->has_endpoint_pbits +
438                                 mode->has_shared_pbits);
439          }
440       }
441    }
442 
443    return bit_offset;
444 }
445 
446 static bool
is_anchor(int n_subsets,int partition_num,int texel)447 is_anchor(int n_subsets,
448           int partition_num,
449           int texel)
450 {
451    if (texel == 0)
452       return true;
453 
454    switch (n_subsets) {
455    case 1:
456       return false;
457    case 2:
458       return anchor_indices[0][partition_num] == texel;
459    case 3:
460       return (anchor_indices[1][partition_num] == texel ||
461               anchor_indices[2][partition_num] == texel);
462    default:
463       assert(false);
464       return false;
465    }
466 }
467 
468 static int
count_anchors_before_texel(int n_subsets,int partition_num,int texel)469 count_anchors_before_texel(int n_subsets,
470                            int partition_num,
471                            int texel)
472 {
473    int count = 1;
474 
475    if (texel == 0)
476       return 0;
477 
478    switch (n_subsets) {
479    case 1:
480       break;
481    case 2:
482       if (texel > anchor_indices[0][partition_num])
483          count++;
484       break;
485    case 3:
486       if (texel > anchor_indices[1][partition_num])
487          count++;
488       if (texel > anchor_indices[2][partition_num])
489          count++;
490       break;
491    default:
492       assert(false);
493       return 0;
494    }
495 
496    return count;
497 }
498 
499 static int32_t
interpolate(int32_t a,int32_t b,int index,int index_bits)500 interpolate(int32_t a, int32_t b,
501             int index,
502             int index_bits)
503 {
504    static const uint8_t weights2[] = { 0, 21, 43, 64 };
505    static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
506    static const uint8_t weights4[] =
507       { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
508    static const uint8_t *weights[] = {
509       NULL, NULL, weights2, weights3, weights4
510    };
511    int weight;
512 
513    weight = weights[index_bits][index];
514 
515    return ((64 - weight) * a + weight * b + 32) >> 6;
516 }
517 
518 static void
apply_rotation(int rotation,uint8_t * result)519 apply_rotation(int rotation,
520                uint8_t *result)
521 {
522    uint8_t t;
523 
524    if (rotation == 0)
525       return;
526 
527    rotation--;
528 
529    t = result[rotation];
530    result[rotation] = result[3];
531    result[3] = t;
532 }
533 
534 static void
fetch_rgba_unorm_from_block(const uint8_t * block,uint8_t * result,int texel)535 fetch_rgba_unorm_from_block(const uint8_t *block,
536                             uint8_t *result,
537                             int texel)
538 {
539    int mode_num = ffs(block[0]);
540    const struct bptc_unorm_mode *mode;
541    int bit_offset, secondary_bit_offset;
542    int partition_num;
543    int subset_num;
544    int rotation;
545    int index_selection;
546    int index_bits;
547    int indices[2];
548    int index;
549    int anchors_before_texel;
550    bool anchor;
551    uint8_t endpoints[3 * 2][4];
552    uint32_t subsets;
553    int component;
554 
555    if (mode_num == 0) {
556       /* According to the spec this mode is reserved and shouldn't be used. */
557       memset(result, 0, 4);
558       return;
559    }
560 
561    mode = bptc_unorm_modes + mode_num - 1;
562    bit_offset = mode_num;
563 
564    partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
565    bit_offset += mode->n_partition_bits;
566 
567    switch (mode->n_subsets) {
568    case 1:
569       subsets = 0;
570       break;
571    case 2:
572       subsets = partition_table1[partition_num];
573       break;
574    case 3:
575       subsets = partition_table2[partition_num];
576       break;
577    default:
578       assert(false);
579       return;
580    }
581 
582    if (mode->has_rotation_bits) {
583       rotation = extract_bits(block, bit_offset, 2);
584       bit_offset += 2;
585    } else {
586       rotation = 0;
587    }
588 
589    if (mode->has_index_selection_bit) {
590       index_selection = extract_bits(block, bit_offset, 1);
591       bit_offset++;
592    } else {
593       index_selection = 0;
594    }
595 
596    bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
597 
598    anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
599                                                      partition_num, texel);
600 
601    /* Calculate the offset to the secondary index */
602    secondary_bit_offset = (bit_offset +
603                            BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
604                            mode->n_subsets +
605                            mode->n_secondary_index_bits * texel -
606                            anchors_before_texel);
607 
608    /* Calculate the offset to the primary index for this texel */
609    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
610 
611    subset_num = (subsets >> (texel * 2)) & 3;
612 
613    anchor = is_anchor(mode->n_subsets, partition_num, texel);
614 
615    index_bits = mode->n_index_bits;
616    if (anchor)
617       index_bits--;
618    indices[0] = extract_bits(block, bit_offset, index_bits);
619 
620    if (mode->n_secondary_index_bits) {
621       index_bits = mode->n_secondary_index_bits;
622       if (anchor)
623          index_bits--;
624       indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
625    }
626 
627    index = indices[index_selection];
628    index_bits = (index_selection ?
629                  mode->n_secondary_index_bits :
630                  mode->n_index_bits);
631 
632    for (component = 0; component < 3; component++)
633       result[component] = interpolate(endpoints[subset_num * 2][component],
634                                       endpoints[subset_num * 2 + 1][component],
635                                       index,
636                                       index_bits);
637 
638    /* Alpha uses the opposite index from the color components */
639    if (mode->n_secondary_index_bits && !index_selection) {
640       index = indices[1];
641       index_bits = mode->n_secondary_index_bits;
642    } else {
643       index = indices[0];
644       index_bits = mode->n_index_bits;
645    }
646 
647    result[3] = interpolate(endpoints[subset_num * 2][3],
648                            endpoints[subset_num * 2 + 1][3],
649                            index,
650                            index_bits);
651 
652    apply_rotation(rotation, result);
653 }
654 
655 #ifdef BPTC_BLOCK_DECODE
656 static void
decompress_rgba_unorm_block(int src_width,int src_height,const uint8_t * block,uint8_t * dst_row,int dst_rowstride)657 decompress_rgba_unorm_block(int src_width, int src_height,
658                             const uint8_t *block,
659                             uint8_t *dst_row, int dst_rowstride)
660 {
661    int mode_num = ffs(block[0]);
662    const struct bptc_unorm_mode *mode;
663    int bit_offset_head, bit_offset, secondary_bit_offset;
664    int partition_num;
665    int subset_num;
666    int rotation;
667    int index_selection;
668    int index_bits;
669    int indices[2];
670    int index;
671    int anchors_before_texel;
672    bool anchor;
673    uint8_t endpoints[3 * 2][4];
674    uint32_t subsets;
675    int component;
676    unsigned x, y;
677 
678    if (mode_num == 0) {
679       /* According to the spec this mode is reserved and shouldn't be used. */
680       for(y = 0; y < src_height; y += 1) {
681          uint8_t *result = dst_row;
682          memset(result, 0, 4 * src_width);
683          dst_row += dst_rowstride;
684       }
685       return;
686    }
687 
688    mode = bptc_unorm_modes + mode_num - 1;
689    bit_offset_head = mode_num;
690 
691    partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
692    bit_offset_head += mode->n_partition_bits;
693 
694    switch (mode->n_subsets) {
695    case 1:
696       subsets = 0;
697       break;
698    case 2:
699       subsets = partition_table1[partition_num];
700       break;
701    case 3:
702       subsets = partition_table2[partition_num];
703       break;
704    default:
705       assert(false);
706       return;
707    }
708 
709    if (mode->has_rotation_bits) {
710       rotation = extract_bits(block, bit_offset_head, 2);
711       bit_offset_head += 2;
712    } else {
713       rotation = 0;
714    }
715 
716    if (mode->has_index_selection_bit) {
717       index_selection = extract_bits(block, bit_offset_head, 1);
718       bit_offset_head++;
719    } else {
720       index_selection = 0;
721    }
722 
723    bit_offset_head = extract_unorm_endpoints(mode, block, bit_offset_head, endpoints);
724 
725    for(y = 0; y < src_height; y += 1) {
726       uint8_t *result = dst_row;
727       for(x = 0; x < src_width; x += 1) {
728          int texel;
729          texel = x + y * 4;
730          bit_offset = bit_offset_head;
731 
732          anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
733                                                            partition_num,
734                                                            texel);
735 
736          /* Calculate the offset to the secondary index */
737          secondary_bit_offset = (bit_offset +
738                                  BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
739                                  mode->n_subsets +
740                                  mode->n_secondary_index_bits * texel -
741                                  anchors_before_texel);
742 
743          /* Calculate the offset to the primary index for this texel */
744          bit_offset += mode->n_index_bits * texel - anchors_before_texel;
745 
746          subset_num = (subsets >> (texel * 2)) & 3;
747 
748          anchor = is_anchor(mode->n_subsets, partition_num, texel);
749 
750          index_bits = mode->n_index_bits;
751          if (anchor)
752             index_bits--;
753          indices[0] = extract_bits(block, bit_offset, index_bits);
754 
755          if (mode->n_secondary_index_bits) {
756             index_bits = mode->n_secondary_index_bits;
757             if (anchor)
758                index_bits--;
759             indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
760          }
761 
762          index = indices[index_selection];
763          index_bits = (index_selection ?
764                        mode->n_secondary_index_bits :
765                        mode->n_index_bits);
766 
767          for (component = 0; component < 3; component++)
768             result[component] = interpolate(endpoints[subset_num * 2][component],
769                                             endpoints[subset_num * 2 + 1][component],
770                                             index,
771                                             index_bits);
772 
773          /* Alpha uses the opposite index from the color components */
774          if (mode->n_secondary_index_bits && !index_selection) {
775             index = indices[1];
776             index_bits = mode->n_secondary_index_bits;
777          } else {
778             index = indices[0];
779             index_bits = mode->n_index_bits;
780          }
781 
782          result[3] = interpolate(endpoints[subset_num * 2][3],
783                                  endpoints[subset_num * 2 + 1][3],
784                                  index,
785                                  index_bits);
786 
787          apply_rotation(rotation, result);
788          result += 4;
789       }
790       dst_row += dst_rowstride;
791    }
792 }
793 
794 static void
decompress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)795 decompress_rgba_unorm(int width, int height,
796                       const uint8_t *src, int src_rowstride,
797                       uint8_t *dst, int dst_rowstride)
798 {
799    int src_row_diff;
800    int y, x;
801 
802    if (src_rowstride >= width * 4)
803       src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
804    else
805       src_row_diff = 0;
806 
807    for (y = 0; y < height; y += BLOCK_SIZE) {
808       for (x = 0; x < width; x += BLOCK_SIZE) {
809          decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
810                                      MIN2(height - y, BLOCK_SIZE),
811                                      src,
812                                      dst + x * 4 + y * dst_rowstride,
813                                      dst_rowstride);
814          src += BLOCK_BYTES;
815       }
816       src += src_row_diff;
817    }
818 }
819 #endif // BPTC_BLOCK_DECODE
820 
821 static int32_t
sign_extend(int32_t value,int n_bits)822 sign_extend(int32_t value,
823             int n_bits)
824 {
825    assert(n_bits > 0 && n_bits < 32);
826 
827    const unsigned n = 32 - n_bits;
828    return (int32_t)((uint32_t)value << n) >> n;
829 }
830 
831 static int
signed_unquantize(int value,int n_endpoint_bits)832 signed_unquantize(int value, int n_endpoint_bits)
833 {
834    bool sign;
835 
836    if (n_endpoint_bits >= 16)
837       return value;
838 
839    if (value == 0)
840       return 0;
841 
842    sign = false;
843 
844    if (value < 0) {
845       sign = true;
846       value = -value;
847    }
848 
849    if (value >= (1 << (n_endpoint_bits - 1)) - 1)
850       value = 0x7fff;
851    else
852       value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
853 
854    if (sign)
855       value = -value;
856 
857    return value;
858 }
859 
860 static int
unsigned_unquantize(int value,int n_endpoint_bits)861 unsigned_unquantize(int value, int n_endpoint_bits)
862 {
863    if (n_endpoint_bits >= 15)
864       return value;
865 
866    if (value == 0)
867       return 0;
868 
869    if (value == (1 << n_endpoint_bits) - 1)
870       return 0xffff;
871 
872    return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
873 }
874 
875 static int
extract_float_endpoints(const struct bptc_float_mode * mode,const uint8_t * block,int bit_offset,int32_t endpoints[][3],bool is_signed)876 extract_float_endpoints(const struct bptc_float_mode *mode,
877                         const uint8_t *block,
878                         int bit_offset,
879                         int32_t endpoints[][3],
880                         bool is_signed)
881 {
882    const struct bptc_float_bitfield *bitfield;
883    int endpoint, component;
884    int n_endpoints;
885    int value;
886    int i;
887 
888    if (mode->n_partition_bits)
889       n_endpoints = 4;
890    else
891       n_endpoints = 2;
892 
893    memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
894 
895    for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
896       value = extract_bits(block, bit_offset, bitfield->n_bits);
897       bit_offset += bitfield->n_bits;
898 
899       if (bitfield->reverse) {
900          for (i = 0; i < bitfield->n_bits; i++) {
901             if (value & (1 << i))
902                endpoints[bitfield->endpoint][bitfield->component] |=
903                   1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
904          }
905       } else {
906          endpoints[bitfield->endpoint][bitfield->component] |=
907             value << bitfield->offset;
908       }
909    }
910 
911    if (mode->transformed_endpoints) {
912       /* The endpoints are specified as signed offsets from e0 */
913       for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
914          for (component = 0; component < 3; component++) {
915             value = sign_extend(endpoints[endpoint][component],
916                                 mode->n_delta_bits[component]);
917             endpoints[endpoint][component] =
918                ((endpoints[0][component] + value) &
919                 ((1 << mode->n_endpoint_bits) - 1));
920          }
921       }
922    }
923 
924    if (is_signed) {
925       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
926          for (component = 0; component < 3; component++) {
927             value = sign_extend(endpoints[endpoint][component],
928                                 mode->n_endpoint_bits);
929             endpoints[endpoint][component] =
930                signed_unquantize(value, mode->n_endpoint_bits);
931          }
932       }
933    } else {
934       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
935          for (component = 0; component < 3; component++) {
936             endpoints[endpoint][component] =
937                unsigned_unquantize(endpoints[endpoint][component],
938                                    mode->n_endpoint_bits);
939          }
940       }
941    }
942 
943    return bit_offset;
944 }
945 
946 static int32_t
finish_unsigned_unquantize(int32_t value)947 finish_unsigned_unquantize(int32_t value)
948 {
949    return value * 31 / 64;
950 }
951 
952 static int32_t
finish_signed_unquantize(int32_t value)953 finish_signed_unquantize(int32_t value)
954 {
955    if (value < 0)
956       return (-value * 31 / 32) | 0x8000;
957    else
958       return value * 31 / 32;
959 }
960 
961 static void
fetch_rgb_float_from_block(const uint8_t * block,float * result,int texel,bool is_signed)962 fetch_rgb_float_from_block(const uint8_t *block,
963                            float *result,
964                            int texel,
965                            bool is_signed)
966 {
967    int mode_num;
968    const struct bptc_float_mode *mode;
969    int bit_offset;
970    int partition_num;
971    int subset_num;
972    int index_bits;
973    int index;
974    int anchors_before_texel;
975    int32_t endpoints[2 * 2][3];
976    uint32_t subsets;
977    int n_subsets;
978    int component;
979    int32_t value;
980 
981    if (block[0] & 0x2) {
982       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
983       bit_offset = 5;
984    } else {
985       mode_num = block[0] & 3;
986       bit_offset = 2;
987    }
988 
989    mode = bptc_float_modes + mode_num;
990 
991    if (mode->reserved) {
992       memset(result, 0, sizeof result[0] * 3);
993       result[3] = 1.0f;
994       return;
995    }
996 
997    bit_offset = extract_float_endpoints(mode, block, bit_offset,
998                                         endpoints, is_signed);
999 
1000    if (mode->n_partition_bits) {
1001       partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1002       bit_offset += mode->n_partition_bits;
1003 
1004       subsets = partition_table1[partition_num];
1005       n_subsets = 2;
1006    } else {
1007       partition_num = 0;
1008       subsets = 0;
1009       n_subsets = 1;
1010    }
1011 
1012    anchors_before_texel =
1013       count_anchors_before_texel(n_subsets, partition_num, texel);
1014 
1015    /* Calculate the offset to the primary index for this texel */
1016    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1017 
1018    subset_num = (subsets >> (texel * 2)) & 3;
1019 
1020    index_bits = mode->n_index_bits;
1021    if (is_anchor(n_subsets, partition_num, texel))
1022       index_bits--;
1023    index = extract_bits(block, bit_offset, index_bits);
1024 
1025    for (component = 0; component < 3; component++) {
1026       value = interpolate(endpoints[subset_num * 2][component],
1027                           endpoints[subset_num * 2 + 1][component],
1028                           index,
1029                           mode->n_index_bits);
1030 
1031       if (is_signed)
1032          value = finish_signed_unquantize(value);
1033       else
1034          value = finish_unsigned_unquantize(value);
1035 
1036       result[component] = _mesa_half_to_float(value);
1037    }
1038 
1039    result[3] = 1.0f;
1040 }
1041 
1042 #ifdef BPTC_BLOCK_DECODE
1043 static void
decompress_rgb_float_block(unsigned src_width,unsigned src_height,const uint8_t * block,float * dst_row,unsigned dst_rowstride,bool is_signed)1044 decompress_rgb_float_block(unsigned src_width, unsigned src_height,
1045                            const uint8_t *block,
1046                            float *dst_row, unsigned dst_rowstride,
1047                            bool is_signed)
1048 {
1049    int mode_num;
1050    const struct bptc_float_mode *mode;
1051    int bit_offset_head, bit_offset;
1052    int partition_num;
1053    int subset_num;
1054    int index_bits;
1055    int index;
1056    int anchors_before_texel;
1057    int32_t endpoints[2 * 2][3];
1058    uint32_t subsets;
1059    int n_subsets;
1060    int component;
1061    int32_t value;
1062    unsigned x, y;
1063 
1064    if (block[0] & 0x2) {
1065       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1066       bit_offset_head = 5;
1067    } else {
1068       mode_num = block[0] & 3;
1069       bit_offset_head = 2;
1070    }
1071 
1072    mode = bptc_float_modes + mode_num;
1073 
1074    if (mode->reserved) {
1075       for(y = 0; y < src_height; y += 1) {
1076          float *result = dst_row;
1077          memset(result, 0, sizeof result[0] * 4 * src_width);
1078          for(x = 0; x < src_width; x += 1) {
1079             result[3] = 1.0f;
1080             result += 4;
1081          }
1082          dst_row += dst_rowstride / sizeof dst_row[0];
1083       }
1084       return;
1085    }
1086 
1087    bit_offset_head = extract_float_endpoints(mode, block, bit_offset_head,
1088                                         endpoints, is_signed);
1089 
1090    if (mode->n_partition_bits) {
1091       partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
1092       bit_offset_head += mode->n_partition_bits;
1093 
1094       subsets = partition_table1[partition_num];
1095       n_subsets = 2;
1096    } else {
1097       partition_num = 0;
1098       subsets = 0;
1099       n_subsets = 1;
1100    }
1101 
1102    for(y = 0; y < src_height; y += 1) {
1103       float *result = dst_row;
1104       for(x = 0; x < src_width; x += 1) {
1105          int texel;
1106 
1107          bit_offset = bit_offset_head;
1108 
1109          texel = x + y * 4;
1110 
1111          anchors_before_texel =
1112             count_anchors_before_texel(n_subsets, partition_num, texel);
1113 
1114          /* Calculate the offset to the primary index for this texel */
1115          bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1116 
1117          subset_num = (subsets >> (texel * 2)) & 3;
1118 
1119          index_bits = mode->n_index_bits;
1120          if (is_anchor(n_subsets, partition_num, texel))
1121             index_bits--;
1122          index = extract_bits(block, bit_offset, index_bits);
1123 
1124          for (component = 0; component < 3; component++) {
1125             value = interpolate(endpoints[subset_num * 2][component],
1126                                 endpoints[subset_num * 2 + 1][component],
1127                                 index,
1128                                 mode->n_index_bits);
1129 
1130             if (is_signed)
1131                value = finish_signed_unquantize(value);
1132             else
1133                value = finish_unsigned_unquantize(value);
1134 
1135             result[component] = _mesa_half_to_float(value);
1136          }
1137 
1138          result[3] = 1.0f;
1139          result += 4;
1140       }
1141       dst_row += dst_rowstride / sizeof dst_row[0];
1142    }
1143 }
1144 
1145 static void
decompress_rgb_float(int width,int height,const uint8_t * src,int src_rowstride,float * dst,int dst_rowstride,bool is_signed)1146 decompress_rgb_float(int width, int height,
1147                       const uint8_t *src, int src_rowstride,
1148                       float *dst, int dst_rowstride, bool is_signed)
1149 {
1150    int src_row_diff;
1151    int y, x;
1152 
1153    if (src_rowstride >= width * 4)
1154       src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1155    else
1156       src_row_diff = 0;
1157 
1158    for (y = 0; y < height; y += BLOCK_SIZE) {
1159       for (x = 0; x < width; x += BLOCK_SIZE) {
1160          decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1161                                     MIN2(height - y, BLOCK_SIZE),
1162                                     src,
1163                                     (dst + x * 4 +
1164                                      (y * dst_rowstride / sizeof dst[0])),
1165                                     dst_rowstride, is_signed);
1166          src += BLOCK_BYTES;
1167       }
1168       src += src_row_diff;
1169    }
1170 }
1171 #endif // BPTC_BLOCK_DECODE
1172 
1173 static void
write_bits(struct bit_writer * writer,int n_bits,int value)1174 write_bits(struct bit_writer *writer, int n_bits, int value)
1175 {
1176    do {
1177       if (n_bits + writer->pos >= 8) {
1178          *(writer->dst++) = writer->buf | (value << writer->pos);
1179          writer->buf = 0;
1180          value >>= (8 - writer->pos);
1181          n_bits -= (8 - writer->pos);
1182          writer->pos = 0;
1183       } else {
1184          writer->buf |= value << writer->pos;
1185          writer->pos += n_bits;
1186          break;
1187       }
1188    } while (n_bits > 0);
1189 }
1190 
1191 static void
get_average_luminance_alpha_unorm(int width,int height,const uint8_t * src,int src_rowstride,int * average_luminance,int * average_alpha)1192 get_average_luminance_alpha_unorm(int width, int height,
1193                                   const uint8_t *src, int src_rowstride,
1194                                   int *average_luminance, int *average_alpha)
1195 {
1196    int luminance_sum = 0, alpha_sum = 0;
1197    int y, x;
1198 
1199    for (y = 0; y < height; y++) {
1200       for (x = 0; x < width; x++) {
1201          luminance_sum += src[0] + src[1] + src[2];
1202          alpha_sum += src[3];
1203          src += 4;
1204       }
1205       src += src_rowstride - width * 4;
1206    }
1207 
1208    *average_luminance = luminance_sum / (width * height);
1209    *average_alpha = alpha_sum / (width * height);
1210 }
1211 
1212 static void
get_rgba_endpoints_unorm(int width,int height,const uint8_t * src,int src_rowstride,int average_luminance,int average_alpha,uint8_t endpoints[][4])1213 get_rgba_endpoints_unorm(int width, int height,
1214                          const uint8_t *src, int src_rowstride,
1215                          int average_luminance, int average_alpha,
1216                          uint8_t endpoints[][4])
1217 {
1218    int endpoint_luminances[2];
1219    int midpoint;
1220    int sums[2][4];
1221    int endpoint;
1222    int luminance;
1223    uint8_t temp[3];
1224    const uint8_t *p = src;
1225    int rgb_left_endpoint_count = 0;
1226    int alpha_left_endpoint_count = 0;
1227    int y, x, i;
1228 
1229    memset(sums, 0, sizeof sums);
1230 
1231    for (y = 0; y < height; y++) {
1232       for (x = 0; x < width; x++) {
1233          luminance = p[0] + p[1] + p[2];
1234          if (luminance < average_luminance) {
1235             endpoint = 0;
1236             rgb_left_endpoint_count++;
1237          } else {
1238             endpoint = 1;
1239          }
1240          for (i = 0; i < 3; i++)
1241             sums[endpoint][i] += p[i];
1242 
1243          if (p[2] < average_alpha) {
1244             endpoint = 0;
1245             alpha_left_endpoint_count++;
1246          } else {
1247             endpoint = 1;
1248          }
1249          sums[endpoint][3] += p[3];
1250 
1251          p += 4;
1252       }
1253 
1254       p += src_rowstride - width * 4;
1255    }
1256 
1257    if (rgb_left_endpoint_count == 0 ||
1258        rgb_left_endpoint_count == width * height) {
1259       for (i = 0; i < 3; i++)
1260          endpoints[0][i] = endpoints[1][i] =
1261             (sums[0][i] + sums[1][i]) / (width * height);
1262    } else {
1263       for (i = 0; i < 3; i++) {
1264          endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1265          endpoints[1][i] = (sums[1][i] /
1266                             (width * height - rgb_left_endpoint_count));
1267       }
1268    }
1269 
1270    if (alpha_left_endpoint_count == 0 ||
1271        alpha_left_endpoint_count == width * height) {
1272       endpoints[0][3] = endpoints[1][3] =
1273          (sums[0][3] + sums[1][3]) / (width * height);
1274    } else {
1275          endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1276          endpoints[1][3] = (sums[1][3] /
1277                             (width * height - alpha_left_endpoint_count));
1278    }
1279 
1280    /* We may need to swap the endpoints to ensure the most-significant bit of
1281     * the first index is zero */
1282 
1283    for (endpoint = 0; endpoint < 2; endpoint++) {
1284       endpoint_luminances[endpoint] =
1285          endpoints[endpoint][0] +
1286          endpoints[endpoint][1] +
1287          endpoints[endpoint][2];
1288    }
1289    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1290 
1291    if ((src[0] + src[1] + src[2] <= midpoint) !=
1292        (endpoint_luminances[0] <= midpoint)) {
1293       memcpy(temp, endpoints[0], 3);
1294       memcpy(endpoints[0], endpoints[1], 3);
1295       memcpy(endpoints[1], temp, 3);
1296    }
1297 
1298    /* Same for the alpha endpoints */
1299 
1300    midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1301 
1302    if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1303       temp[0] = endpoints[0][3];
1304       endpoints[0][3] = endpoints[1][3];
1305       endpoints[1][3] = temp[0];
1306    }
1307 }
1308 
1309 static void
write_rgb_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1310 write_rgb_indices_unorm(struct bit_writer *writer,
1311                         int src_width, int src_height,
1312                         const uint8_t *src, int src_rowstride,
1313                         uint8_t endpoints[][4])
1314 {
1315    int luminance;
1316    int endpoint_luminances[2];
1317    int endpoint;
1318    int index;
1319    int y, x;
1320 
1321    for (endpoint = 0; endpoint < 2; endpoint++) {
1322       endpoint_luminances[endpoint] =
1323          endpoints[endpoint][0] +
1324          endpoints[endpoint][1] +
1325          endpoints[endpoint][2];
1326    }
1327 
1328    /* If the endpoints have the same luminance then we'll just use index 0 for
1329     * all of the texels */
1330    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1331       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1332       return;
1333    }
1334 
1335    for (y = 0; y < src_height; y++) {
1336       for (x = 0; x < src_width; x++) {
1337          luminance = src[0] + src[1] + src[2];
1338 
1339          index = ((luminance - endpoint_luminances[0]) * 3 /
1340                   (endpoint_luminances[1] - endpoint_luminances[0]));
1341          if (index < 0)
1342             index = 0;
1343          else if (index > 3)
1344             index = 3;
1345 
1346          assert(x != 0 || y != 0 || index < 2);
1347 
1348          write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1349 
1350          src += 4;
1351       }
1352 
1353       /* Pad the indices out to the block size */
1354       if (src_width < BLOCK_SIZE)
1355          write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1356 
1357       src += src_rowstride - src_width * 4;
1358    }
1359 
1360    /* Pad the indices out to the block size */
1361    if (src_height < BLOCK_SIZE)
1362       write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1363 }
1364 
1365 static void
write_alpha_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1366 write_alpha_indices_unorm(struct bit_writer *writer,
1367                           int src_width, int src_height,
1368                           const uint8_t *src, int src_rowstride,
1369                           uint8_t endpoints[][4])
1370 {
1371    int index;
1372    int y, x;
1373 
1374    /* If the endpoints have the same alpha then we'll just use index 0 for
1375     * all of the texels */
1376    if (endpoints[0][3] == endpoints[1][3]) {
1377       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1378       return;
1379    }
1380 
1381    for (y = 0; y < src_height; y++) {
1382       for (x = 0; x < src_width; x++) {
1383          index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1384                   ((int) endpoints[1][3] - endpoints[0][3]));
1385          if (index < 0)
1386             index = 0;
1387          else if (index > 7)
1388             index = 7;
1389 
1390          assert(x != 0 || y != 0 || index < 4);
1391 
1392          /* The first index has one less bit */
1393          write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1394 
1395          src += 4;
1396       }
1397 
1398       /* Pad the indices out to the block size */
1399       if (src_width < BLOCK_SIZE)
1400          write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1401 
1402       src += src_rowstride - src_width * 4;
1403    }
1404 
1405    /* Pad the indices out to the block size */
1406    if (src_height < BLOCK_SIZE)
1407       write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1408 }
1409 
1410 static void
compress_rgba_unorm_block(int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t * dst)1411 compress_rgba_unorm_block(int src_width, int src_height,
1412                           const uint8_t *src, int src_rowstride,
1413                           uint8_t *dst)
1414 {
1415    int average_luminance, average_alpha;
1416    uint8_t endpoints[2][4];
1417    struct bit_writer writer;
1418    int component, endpoint;
1419 
1420    get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1421                                      &average_luminance, &average_alpha);
1422    get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1423                             average_luminance, average_alpha,
1424                             endpoints);
1425 
1426    writer.dst = dst;
1427    writer.pos = 0;
1428    writer.buf = 0;
1429 
1430    write_bits(&writer, 5, 0x10); /* mode 4 */
1431    write_bits(&writer, 2, 0); /* rotation 0 */
1432    write_bits(&writer, 1, 0); /* index selection bit */
1433 
1434    /* Write the color endpoints */
1435    for (component = 0; component < 3; component++)
1436       for (endpoint = 0; endpoint < 2; endpoint++)
1437          write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1438 
1439    /* Write the alpha endpoints */
1440    for (endpoint = 0; endpoint < 2; endpoint++)
1441       write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1442 
1443    write_rgb_indices_unorm(&writer,
1444                            src_width, src_height,
1445                            src, src_rowstride,
1446                            endpoints);
1447    write_alpha_indices_unorm(&writer,
1448                              src_width, src_height,
1449                              src, src_rowstride,
1450                              endpoints);
1451 }
1452 
1453 static void
compress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)1454 compress_rgba_unorm(int width, int height,
1455                     const uint8_t *src, int src_rowstride,
1456                     uint8_t *dst, int dst_rowstride)
1457 {
1458    int dst_row_diff;
1459    int y, x;
1460 
1461    if (dst_rowstride >= width * 4)
1462       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1463    else
1464       dst_row_diff = 0;
1465 
1466    for (y = 0; y < height; y += BLOCK_SIZE) {
1467       for (x = 0; x < width; x += BLOCK_SIZE) {
1468          compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1469                                    MIN2(height - y, BLOCK_SIZE),
1470                                    src + x * 4 + y * src_rowstride,
1471                                    src_rowstride,
1472                                    dst);
1473          dst += BLOCK_BYTES;
1474       }
1475       dst += dst_row_diff;
1476    }
1477 }
1478 
1479 static float
get_average_luminance_float(int width,int height,const float * src,int src_rowstride)1480 get_average_luminance_float(int width, int height,
1481                             const float *src, int src_rowstride)
1482 {
1483    float luminance_sum = 0;
1484    int y, x;
1485 
1486    for (y = 0; y < height; y++) {
1487       for (x = 0; x < width; x++) {
1488          luminance_sum += src[0] + src[1] + src[2];
1489          src += 3;
1490       }
1491       src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1492    }
1493 
1494    return luminance_sum / (width * height);
1495 }
1496 
1497 static float
clamp_value(float value,bool is_signed)1498 clamp_value(float value, bool is_signed)
1499 {
1500    if (value > 65504.0f)
1501       return 65504.0f;
1502 
1503    if (is_signed) {
1504       if (value < -65504.0f)
1505          return -65504.0f;
1506       else
1507          return value;
1508    }
1509 
1510    if (value < 0.0f)
1511       return 0.0f;
1512 
1513    return value;
1514 }
1515 
1516 static void
get_endpoints_float(int width,int height,const float * src,int src_rowstride,float average_luminance,float endpoints[][3],bool is_signed)1517 get_endpoints_float(int width, int height,
1518                     const float *src, int src_rowstride,
1519                     float average_luminance, float endpoints[][3],
1520                     bool is_signed)
1521 {
1522    float endpoint_luminances[2];
1523    float midpoint;
1524    float sums[2][3];
1525    int endpoint, component;
1526    float luminance;
1527    float temp[3];
1528    const float *p = src;
1529    int left_endpoint_count = 0;
1530    int y, x, i;
1531 
1532    memset(sums, 0, sizeof sums);
1533 
1534    for (y = 0; y < height; y++) {
1535       for (x = 0; x < width; x++) {
1536          luminance = p[0] + p[1] + p[2];
1537          if (luminance < average_luminance) {
1538             endpoint = 0;
1539             left_endpoint_count++;
1540          } else {
1541             endpoint = 1;
1542          }
1543          for (i = 0; i < 3; i++)
1544             sums[endpoint][i] += p[i];
1545 
1546          p += 3;
1547       }
1548 
1549       p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1550    }
1551 
1552    if (left_endpoint_count == 0 ||
1553        left_endpoint_count == width * height) {
1554       for (i = 0; i < 3; i++)
1555          endpoints[0][i] = endpoints[1][i] =
1556             (sums[0][i] + sums[1][i]) / (width * height);
1557    } else {
1558       for (i = 0; i < 3; i++) {
1559          endpoints[0][i] = sums[0][i] / left_endpoint_count;
1560          endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1561       }
1562    }
1563 
1564    /* Clamp the endpoints to the range of a half float and strip out
1565     * infinities */
1566    for (endpoint = 0; endpoint < 2; endpoint++) {
1567       for (component = 0; component < 3; component++) {
1568          endpoints[endpoint][component] =
1569             clamp_value(endpoints[endpoint][component], is_signed);
1570       }
1571    }
1572 
1573    /* We may need to swap the endpoints to ensure the most-significant bit of
1574     * the first index is zero */
1575 
1576    for (endpoint = 0; endpoint < 2; endpoint++) {
1577       endpoint_luminances[endpoint] =
1578          endpoints[endpoint][0] +
1579          endpoints[endpoint][1] +
1580          endpoints[endpoint][2];
1581    }
1582    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1583 
1584    if ((src[0] + src[1] + src[2] <= midpoint) !=
1585        (endpoint_luminances[0] <= midpoint)) {
1586       memcpy(temp, endpoints[0], sizeof temp);
1587       memcpy(endpoints[0], endpoints[1], sizeof temp);
1588       memcpy(endpoints[1], temp, sizeof temp);
1589    }
1590 }
1591 
1592 static void
write_rgb_indices_float(struct bit_writer * writer,int src_width,int src_height,const float * src,int src_rowstride,float endpoints[][3])1593 write_rgb_indices_float(struct bit_writer *writer,
1594                         int src_width, int src_height,
1595                         const float *src, int src_rowstride,
1596                         float endpoints[][3])
1597 {
1598    float luminance;
1599    float endpoint_luminances[2];
1600    int endpoint;
1601    int index;
1602    int y, x;
1603 
1604    for (endpoint = 0; endpoint < 2; endpoint++) {
1605       endpoint_luminances[endpoint] =
1606          endpoints[endpoint][0] +
1607          endpoints[endpoint][1] +
1608          endpoints[endpoint][2];
1609    }
1610 
1611    /* If the endpoints have the same luminance then we'll just use index 0 for
1612     * all of the texels */
1613    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1614       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1615       return;
1616    }
1617 
1618    for (y = 0; y < src_height; y++) {
1619       for (x = 0; x < src_width; x++) {
1620          luminance = src[0] + src[1] + src[2];
1621 
1622          index = ((luminance - endpoint_luminances[0]) * 15 /
1623                   (endpoint_luminances[1] - endpoint_luminances[0]));
1624          if (index < 0)
1625             index = 0;
1626          else if (index > 15)
1627             index = 15;
1628 
1629          assert(x != 0 || y != 0 || index < 8);
1630 
1631          write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1632 
1633          src += 3;
1634       }
1635 
1636       /* Pad the indices out to the block size */
1637       if (src_width < BLOCK_SIZE)
1638          write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1639 
1640       src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1641    }
1642 
1643    /* Pad the indices out to the block size */
1644    if (src_height < BLOCK_SIZE)
1645       write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1646 }
1647 
1648 static int
get_endpoint_value(float value,bool is_signed)1649 get_endpoint_value(float value, bool is_signed)
1650 {
1651    bool sign = false;
1652    int half;
1653 
1654    if (is_signed) {
1655       half = _mesa_float_to_half(value);
1656 
1657       if (half & 0x8000) {
1658          half &= 0x7fff;
1659          sign = true;
1660       }
1661 
1662       half = (32 * half / 31) >> 6;
1663 
1664       if (sign)
1665          half = -half & ((1 << 10) - 1);
1666 
1667       return half;
1668    } else {
1669       if (value <= 0.0f)
1670          return 0;
1671 
1672       half = _mesa_float_to_half(value);
1673 
1674       return (64 * half / 31) >> 6;
1675    }
1676 }
1677 
1678 static void
compress_rgb_float_block(int src_width,int src_height,const float * src,int src_rowstride,uint8_t * dst,bool is_signed)1679 compress_rgb_float_block(int src_width, int src_height,
1680                          const float *src, int src_rowstride,
1681                          uint8_t *dst,
1682                          bool is_signed)
1683 {
1684    float average_luminance;
1685    float endpoints[2][3];
1686    struct bit_writer writer;
1687    int component, endpoint;
1688    int endpoint_value;
1689 
1690    average_luminance =
1691       get_average_luminance_float(src_width, src_height, src, src_rowstride);
1692    get_endpoints_float(src_width, src_height, src, src_rowstride,
1693                        average_luminance, endpoints, is_signed);
1694 
1695    writer.dst = dst;
1696    writer.pos = 0;
1697    writer.buf = 0;
1698 
1699    write_bits(&writer, 5, 3); /* mode 3 */
1700 
1701    /* Write the endpoints */
1702    for (endpoint = 0; endpoint < 2; endpoint++) {
1703       for (component = 0; component < 3; component++) {
1704          endpoint_value =
1705             get_endpoint_value(endpoints[endpoint][component], is_signed);
1706          write_bits(&writer, 10, endpoint_value);
1707       }
1708    }
1709 
1710    write_rgb_indices_float(&writer,
1711                            src_width, src_height,
1712                            src, src_rowstride,
1713                            endpoints);
1714 }
1715 
1716 static void
compress_rgb_float(int width,int height,const float * src,int src_rowstride,uint8_t * dst,int dst_rowstride,bool is_signed)1717 compress_rgb_float(int width, int height,
1718                    const float *src, int src_rowstride,
1719                    uint8_t *dst, int dst_rowstride,
1720                    bool is_signed)
1721 {
1722    int dst_row_diff;
1723    int y, x;
1724 
1725    if (dst_rowstride >= width * 4)
1726       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1727    else
1728       dst_row_diff = 0;
1729 
1730    for (y = 0; y < height; y += BLOCK_SIZE) {
1731       for (x = 0; x < width; x += BLOCK_SIZE) {
1732          compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1733                                   MIN2(height - y, BLOCK_SIZE),
1734                                   src + x * 3 +
1735                                   y * src_rowstride / sizeof (float),
1736                                   src_rowstride,
1737                                   dst,
1738                                   is_signed);
1739          dst += BLOCK_BYTES;
1740       }
1741       dst += dst_row_diff;
1742    }
1743 }
1744 
1745 #endif
1746