1 /*
2  * Copyright (C) 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file texcompress_bptc.c
26  * GL_ARB_texture_compression_bptc support.
27  */
28 
29 #include <stdbool.h>
30 #include "texcompress.h"
31 #include "texcompress_bptc.h"
32 #include "util/format_srgb.h"
33 #include "util/half_float.h"
34 #include "texstore.h"
35 #include "macros.h"
36 #include "image.h"
37 
38 #define BLOCK_SIZE 4
39 #define N_PARTITIONS 64
40 #define BLOCK_BYTES 16
41 
42 struct bptc_unorm_mode {
43    int n_subsets;
44    int n_partition_bits;
45    bool has_rotation_bits;
46    bool has_index_selection_bit;
47    int n_color_bits;
48    int n_alpha_bits;
49    bool has_endpoint_pbits;
50    bool has_shared_pbits;
51    int n_index_bits;
52    int n_secondary_index_bits;
53 };
54 
55 struct bptc_float_bitfield {
56    int8_t endpoint;
57    uint8_t component;
58    uint8_t offset;
59    uint8_t n_bits;
60    bool reverse;
61 };
62 
63 struct bptc_float_mode {
64    bool reserved;
65    bool transformed_endpoints;
66    int n_partition_bits;
67    int n_endpoint_bits;
68    int n_index_bits;
69    int n_delta_bits[3];
70    struct bptc_float_bitfield bitfields[24];
71 };
72 
73 struct bit_writer {
74    uint8_t buf;
75    int pos;
76    uint8_t *dst;
77 };
78 
79 static const struct bptc_unorm_mode
80 bptc_unorm_modes[] = {
81    /* 0 */ { 3, 4, false, false, 4, 0, true,  false, 3, 0 },
82    /* 1 */ { 2, 6, false, false, 6, 0, false, true,  3, 0 },
83    /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
84    /* 3 */ { 2, 6, false, false, 7, 0, true,  false, 2, 0 },
85    /* 4 */ { 1, 0, true,  true,  5, 6, false, false, 2, 3 },
86    /* 5 */ { 1, 0, true,  false, 7, 8, false, false, 2, 2 },
87    /* 6 */ { 1, 0, false, false, 7, 7, true,  false, 4, 0 },
88    /* 7 */ { 2, 6, false, false, 5, 5, true,  false, 2, 0 }
89 };
90 
91 static const struct bptc_float_mode
92 bptc_float_modes[] = {
93    /* 00 */
94    { false, true, 5, 10, 3, { 5, 5, 5 },
95      { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
96        { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
97        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
98        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
99        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
100        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
101        { 3, 2, 3, 1, false },
102        { -1 } }
103    },
104    /* 01 */
105    { false, true, 5, 7, 3, { 6, 6, 6 },
106      { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
107        { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
108        { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
109        { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
110        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
111        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
112        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
113        { 2, 0, 0, 6, false },
114        { 3, 0, 0, 6, false },
115        { -1 } }
116    },
117    /* 00010 */
118    { false, true, 5, 11, 3, { 5, 4, 4 },
119      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
120        { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
121        { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
122        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
123        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
124        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
125        { -1 } }
126    },
127    /* 00011 */
128    { false, false, 0, 10, 4, { 10, 10, 10 },
129      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
130        { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
131        { -1 } }
132    },
133    /* 00110 */
134    { false, true, 5, 11, 3, { 4, 5, 4 },
135      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
136        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
137        { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
138        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
139        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
140        { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
141        { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
142        { -1 } }
143    },
144    /* 00111 */
145    { false, true, 0, 11, 4, { 9, 9, 9 },
146      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
147        { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
148        { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
149        { -1 } }
150    },
151    /* 01010 */
152    { false, true, 5, 11, 3, { 4, 4, 5 },
153      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
154        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
155        { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
156        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
157        { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
158        { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
159        { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
160        { -1 } }
161    },
162    /* 01011 */
163    { false, true, 0, 12, 4, { 8, 8, 8 },
164      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
165        { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
166        { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
167        { -1 } }
168    },
169    /* 01110 */
170    { false, true, 5, 9, 3, { 5, 5, 5 },
171      { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
172        { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
173        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
174        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
175        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
176        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
177        { 3, 2, 3, 1, false },
178        { -1 } }
179    },
180    /* 01111 */
181    { false, true, 0, 16, 4, { 4, 4, 4 },
182      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
183        { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
184        { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
185        { -1 } }
186    },
187    /* 10010 */
188    { false, true, 5, 8, 3, { 6, 5, 5 },
189      { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
190        { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
191        { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
192        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
193        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
194        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
195        { 3, 0, 0, 6, false },
196        { -1 } }
197    },
198    /* 10011 */
199    { true /* reserved */ },
200    /* 10110 */
201    { false, true, 5, 8, 3, { 5, 6, 5 },
202      { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
203        { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
204        { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
205        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
206        { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
207        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
208        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
209        { -1 } }
210    },
211    /* 10111 */
212    { true /* reserved */ },
213    /* 11010 */
214    { false, true, 5, 8, 3, { 5, 5, 6 },
215      { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
216        { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
217        { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
218        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
219        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
220        { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
221        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
222        { -1 } }
223    },
224    /* 11011 */
225    { true /* reserved */ },
226    /* 11110 */
227    { false, false, 5, 6, 3, { 6, 6, 6 },
228      { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
229        { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
230        { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
231        { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
232        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
233        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
234        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
235        { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
236        { -1 } }
237    },
238    /* 11111 */
239    { true /* reserved */ },
240 };
241 
242 /* This partition table is used when the mode has two subsets. Each
243  * partition is represented by a 32-bit value which gives 2 bits per texel
244  * within the block. The value of the two bits represents which subset to use
245  * (0 or 1).
246  */
247 static const uint32_t
248 partition_table1[N_PARTITIONS] = {
249    0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
250    0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
251    0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
252    0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
253    0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
254    0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
255    0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
256    0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
257    0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
258    0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
259    0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
260    0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
261    0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
262    0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
263    0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
264    0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
265 };
266 
267 /* This partition table is used when the mode has three subsets. In this case
268  * the values can be 0, 1 or 2.
269  */
270 static const uint32_t
271 partition_table2[N_PARTITIONS] = {
272    0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
273    0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
274    0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
275    0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
276    0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
277    0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
278    0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
279    0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
280    0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
281    0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
282    0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
283    0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
284    0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
285    0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
286    0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
287    0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
288 };
289 
290 static const uint8_t
291 anchor_indices[][N_PARTITIONS] = {
292    /* Anchor index values for the second subset of two-subset partitioning */
293    {
294       0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
295       0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
296       0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
297       0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
298    },
299 
300    /* Anchor index values for the second subset of three-subset partitioning */
301    {
302       0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
303       0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
304       0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
305       0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
306    },
307 
308    /* Anchor index values for the third subset of three-subset
309     * partitioning
310     */
311    {
312       0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
313       0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
314       0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
315       0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
316    }
317 };
318 
319 static int
extract_bits(const uint8_t * block,int offset,int n_bits)320 extract_bits(const uint8_t *block,
321              int offset,
322              int n_bits)
323 {
324    int byte_index = offset / 8;
325    int bit_index = offset % 8;
326    int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
327    int result = 0;
328    int bit = 0;
329 
330    while (true) {
331       result |= ((block[byte_index] >> bit_index) &
332                  ((1 << n_bits_in_byte) - 1)) << bit;
333 
334       n_bits -= n_bits_in_byte;
335 
336       if (n_bits <= 0)
337          return result;
338 
339       bit += n_bits_in_byte;
340       byte_index++;
341       bit_index = 0;
342       n_bits_in_byte = MIN2(n_bits, 8);
343    }
344 }
345 
346 static uint8_t
expand_component(uint8_t byte,int n_bits)347 expand_component(uint8_t byte,
348                  int n_bits)
349 {
350    /* Expands a n-bit quantity into a byte by copying the most-significant
351     * bits into the unused least-significant bits.
352     */
353    return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
354 }
355 
356 static int
extract_unorm_endpoints(const struct bptc_unorm_mode * mode,const uint8_t * block,int bit_offset,uint8_t endpoints[][4])357 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
358                         const uint8_t *block,
359                         int bit_offset,
360                         uint8_t endpoints[][4])
361 {
362    int component;
363    int subset;
364    int endpoint;
365    int pbit;
366    int n_components;
367 
368    /* Extract each color component */
369    for (component = 0; component < 3; component++) {
370       for (subset = 0; subset < mode->n_subsets; subset++) {
371          for (endpoint = 0; endpoint < 2; endpoint++) {
372             endpoints[subset * 2 + endpoint][component] =
373                extract_bits(block, bit_offset, mode->n_color_bits);
374             bit_offset += mode->n_color_bits;
375          }
376       }
377    }
378 
379    /* Extract the alpha values */
380    if (mode->n_alpha_bits > 0) {
381       for (subset = 0; subset < mode->n_subsets; subset++) {
382          for (endpoint = 0; endpoint < 2; endpoint++) {
383             endpoints[subset * 2 + endpoint][3] =
384                extract_bits(block, bit_offset, mode->n_alpha_bits);
385             bit_offset += mode->n_alpha_bits;
386          }
387       }
388 
389       n_components = 4;
390    } else {
391       for (subset = 0; subset < mode->n_subsets; subset++)
392          for (endpoint = 0; endpoint < 2; endpoint++)
393             endpoints[subset * 2 + endpoint][3] = 255;
394 
395       n_components = 3;
396    }
397 
398    /* Add in the p-bits */
399    if (mode->has_endpoint_pbits) {
400       for (subset = 0; subset < mode->n_subsets; subset++) {
401          for (endpoint = 0; endpoint < 2; endpoint++) {
402             pbit = extract_bits(block, bit_offset, 1);
403             bit_offset += 1;
404 
405             for (component = 0; component < n_components; component++) {
406                endpoints[subset * 2 + endpoint][component] <<= 1;
407                endpoints[subset * 2 + endpoint][component] |= pbit;
408             }
409          }
410       }
411    } else if (mode->has_shared_pbits) {
412       for (subset = 0; subset < mode->n_subsets; subset++) {
413          pbit = extract_bits(block, bit_offset, 1);
414          bit_offset += 1;
415 
416          for (endpoint = 0; endpoint < 2; endpoint++) {
417             for (component = 0; component < n_components; component++) {
418                endpoints[subset * 2 + endpoint][component] <<= 1;
419                endpoints[subset * 2 + endpoint][component] |= pbit;
420             }
421          }
422       }
423    }
424 
425    /* Expand the n-bit values to a byte */
426    for (subset = 0; subset < mode->n_subsets; subset++) {
427       for (endpoint = 0; endpoint < 2; endpoint++) {
428          for (component = 0; component < 3; component++) {
429             endpoints[subset * 2 + endpoint][component] =
430                expand_component(endpoints[subset * 2 + endpoint][component],
431                                 mode->n_color_bits +
432                                 mode->has_endpoint_pbits +
433                                 mode->has_shared_pbits);
434          }
435 
436          if (mode->n_alpha_bits > 0) {
437             endpoints[subset * 2 + endpoint][3] =
438                expand_component(endpoints[subset * 2 + endpoint][3],
439                                 mode->n_alpha_bits +
440                                 mode->has_endpoint_pbits +
441                                 mode->has_shared_pbits);
442          }
443       }
444    }
445 
446    return bit_offset;
447 }
448 
449 static bool
is_anchor(int n_subsets,int partition_num,int texel)450 is_anchor(int n_subsets,
451           int partition_num,
452           int texel)
453 {
454    if (texel == 0)
455       return true;
456 
457    switch (n_subsets) {
458    case 1:
459       return false;
460    case 2:
461       return anchor_indices[0][partition_num] == texel;
462    case 3:
463       return (anchor_indices[1][partition_num] == texel ||
464               anchor_indices[2][partition_num] == texel);
465    default:
466       assert(false);
467       return false;
468    }
469 }
470 
471 static int
count_anchors_before_texel(int n_subsets,int partition_num,int texel)472 count_anchors_before_texel(int n_subsets,
473                            int partition_num,
474                            int texel)
475 {
476    int count = 1;
477 
478    if (texel == 0)
479       return 0;
480 
481    switch (n_subsets) {
482    case 1:
483       break;
484    case 2:
485       if (texel > anchor_indices[0][partition_num])
486          count++;
487       break;
488    case 3:
489       if (texel > anchor_indices[1][partition_num])
490          count++;
491       if (texel > anchor_indices[2][partition_num])
492          count++;
493       break;
494    default:
495       assert(false);
496       return 0;
497    }
498 
499    return count;
500 }
501 
502 static int32_t
interpolate(int32_t a,int32_t b,int index,int index_bits)503 interpolate(int32_t a, int32_t b,
504             int index,
505             int index_bits)
506 {
507    static const uint8_t weights2[] = { 0, 21, 43, 64 };
508    static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
509    static const uint8_t weights4[] =
510       { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
511    static const uint8_t *weights[] = {
512       NULL, NULL, weights2, weights3, weights4
513    };
514    int weight;
515 
516    weight = weights[index_bits][index];
517 
518    return ((64 - weight) * a + weight * b + 32) >> 6;
519 }
520 
521 static void
apply_rotation(int rotation,uint8_t * result)522 apply_rotation(int rotation,
523                uint8_t *result)
524 {
525    uint8_t t;
526 
527    if (rotation == 0)
528       return;
529 
530    rotation--;
531 
532    t = result[rotation];
533    result[rotation] = result[3];
534    result[3] = t;
535 }
536 
537 static void
fetch_rgba_unorm_from_block(const uint8_t * block,uint8_t * result,int texel)538 fetch_rgba_unorm_from_block(const uint8_t *block,
539                             uint8_t *result,
540                             int texel)
541 {
542    int mode_num = ffs(block[0]);
543    const struct bptc_unorm_mode *mode;
544    int bit_offset, secondary_bit_offset;
545    int partition_num;
546    int subset_num;
547    int rotation;
548    int index_selection;
549    int index_bits;
550    int indices[2];
551    int index;
552    int anchors_before_texel;
553    bool anchor;
554    uint8_t endpoints[3 * 2][4];
555    uint32_t subsets;
556    int component;
557 
558    if (mode_num == 0) {
559       /* According to the spec this mode is reserved and shouldn't be used. */
560       memset(result, 0, 3);
561       result[3] = 0xff;
562       return;
563    }
564 
565    mode = bptc_unorm_modes + mode_num - 1;
566    bit_offset = mode_num;
567 
568    partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
569    bit_offset += mode->n_partition_bits;
570 
571    switch (mode->n_subsets) {
572    case 1:
573       subsets = 0;
574       break;
575    case 2:
576       subsets = partition_table1[partition_num];
577       break;
578    case 3:
579       subsets = partition_table2[partition_num];
580       break;
581    default:
582       assert(false);
583       return;
584    }
585 
586    if (mode->has_rotation_bits) {
587       rotation = extract_bits(block, bit_offset, 2);
588       bit_offset += 2;
589    } else {
590       rotation = 0;
591    }
592 
593    if (mode->has_index_selection_bit) {
594       index_selection = extract_bits(block, bit_offset, 1);
595       bit_offset++;
596    } else {
597       index_selection = 0;
598    }
599 
600    bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
601 
602    anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
603                                                      partition_num, texel);
604 
605    /* Calculate the offset to the secondary index */
606    secondary_bit_offset = (bit_offset +
607                            BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
608                            mode->n_subsets +
609                            mode->n_secondary_index_bits * texel -
610                            anchors_before_texel);
611 
612    /* Calculate the offset to the primary index for this texel */
613    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
614 
615    subset_num = (subsets >> (texel * 2)) & 3;
616 
617    anchor = is_anchor(mode->n_subsets, partition_num, texel);
618 
619    index_bits = mode->n_index_bits;
620    if (anchor)
621       index_bits--;
622    indices[0] = extract_bits(block, bit_offset, index_bits);
623 
624    if (mode->n_secondary_index_bits) {
625       index_bits = mode->n_secondary_index_bits;
626       if (anchor)
627          index_bits--;
628       indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
629    }
630 
631    index = indices[index_selection];
632    index_bits = (index_selection ?
633                  mode->n_secondary_index_bits :
634                  mode->n_index_bits);
635 
636    for (component = 0; component < 3; component++)
637       result[component] = interpolate(endpoints[subset_num * 2][component],
638                                       endpoints[subset_num * 2 + 1][component],
639                                       index,
640                                       index_bits);
641 
642    /* Alpha uses the opposite index from the color components */
643    if (mode->n_secondary_index_bits && !index_selection) {
644       index = indices[1];
645       index_bits = mode->n_secondary_index_bits;
646    } else {
647       index = indices[0];
648       index_bits = mode->n_index_bits;
649    }
650 
651    result[3] = interpolate(endpoints[subset_num * 2][3],
652                            endpoints[subset_num * 2 + 1][3],
653                            index,
654                            index_bits);
655 
656    apply_rotation(rotation, result);
657 }
658 
659 static void
fetch_bptc_rgba_unorm_bytes(const GLubyte * map,GLint rowStride,GLint i,GLint j,GLubyte * texel)660 fetch_bptc_rgba_unorm_bytes(const GLubyte *map,
661                             GLint rowStride, GLint i, GLint j,
662                             GLubyte *texel)
663 {
664    const GLubyte *block;
665 
666    block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
667 
668    fetch_rgba_unorm_from_block(block, texel, (i % 4) + (j % 4) * 4);
669 }
670 
671 static void
fetch_bptc_rgba_unorm(const GLubyte * map,GLint rowStride,GLint i,GLint j,GLfloat * texel)672 fetch_bptc_rgba_unorm(const GLubyte *map,
673                       GLint rowStride, GLint i, GLint j,
674                       GLfloat *texel)
675 {
676    GLubyte texel_bytes[4];
677 
678    fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
679 
680    texel[RCOMP] = UBYTE_TO_FLOAT(texel_bytes[0]);
681    texel[GCOMP] = UBYTE_TO_FLOAT(texel_bytes[1]);
682    texel[BCOMP] = UBYTE_TO_FLOAT(texel_bytes[2]);
683    texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
684 }
685 
686 static void
fetch_bptc_srgb_alpha_unorm(const GLubyte * map,GLint rowStride,GLint i,GLint j,GLfloat * texel)687 fetch_bptc_srgb_alpha_unorm(const GLubyte *map,
688                             GLint rowStride, GLint i, GLint j,
689                             GLfloat *texel)
690 {
691    GLubyte texel_bytes[4];
692 
693    fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
694 
695    texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[0]);
696    texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[1]);
697    texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[2]);
698    texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
699 }
700 
701 static int32_t
sign_extend(int32_t value,int n_bits)702 sign_extend(int32_t value,
703             int n_bits)
704 {
705    if ((value & (1 << (n_bits - 1)))) {
706       value |= (~(int32_t) 0) << n_bits;
707    }
708 
709    return value;
710 }
711 
712 static int
signed_unquantize(int value,int n_endpoint_bits)713 signed_unquantize(int value, int n_endpoint_bits)
714 {
715    bool sign;
716 
717    if (n_endpoint_bits >= 16)
718       return value;
719 
720    if (value == 0)
721       return 0;
722 
723    sign = false;
724 
725    if (value < 0) {
726       sign = true;
727       value = -value;
728    }
729 
730    if (value >= (1 << (n_endpoint_bits - 1)) - 1)
731       value = 0x7fff;
732    else
733       value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
734 
735    if (sign)
736       value = -value;
737 
738    return value;
739 }
740 
741 static int
unsigned_unquantize(int value,int n_endpoint_bits)742 unsigned_unquantize(int value, int n_endpoint_bits)
743 {
744    if (n_endpoint_bits >= 15)
745       return value;
746 
747    if (value == 0)
748       return 0;
749 
750    if (value == (1 << n_endpoint_bits) - 1)
751       return 0xffff;
752 
753    return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
754 }
755 
756 static int
extract_float_endpoints(const struct bptc_float_mode * mode,const uint8_t * block,int bit_offset,int32_t endpoints[][3],bool is_signed)757 extract_float_endpoints(const struct bptc_float_mode *mode,
758                         const uint8_t *block,
759                         int bit_offset,
760                         int32_t endpoints[][3],
761                         bool is_signed)
762 {
763    const struct bptc_float_bitfield *bitfield;
764    int endpoint, component;
765    int n_endpoints;
766    int value;
767    int i;
768 
769    if (mode->n_partition_bits)
770       n_endpoints = 4;
771    else
772       n_endpoints = 2;
773 
774    memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
775 
776    for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
777       value = extract_bits(block, bit_offset, bitfield->n_bits);
778       bit_offset += bitfield->n_bits;
779 
780       if (bitfield->reverse) {
781          for (i = 0; i < bitfield->n_bits; i++) {
782             if (value & (1 << i))
783                endpoints[bitfield->endpoint][bitfield->component] |=
784                   1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
785          }
786       } else {
787          endpoints[bitfield->endpoint][bitfield->component] |=
788             value << bitfield->offset;
789       }
790    }
791 
792    if (mode->transformed_endpoints) {
793       /* The endpoints are specified as signed offsets from e0 */
794       for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
795          for (component = 0; component < 3; component++) {
796             value = sign_extend(endpoints[endpoint][component],
797                                 mode->n_delta_bits[component]);
798             endpoints[endpoint][component] =
799                ((endpoints[0][component] + value) &
800                 ((1 << mode->n_endpoint_bits) - 1));
801          }
802       }
803    }
804 
805    if (is_signed) {
806       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
807          for (component = 0; component < 3; component++) {
808             value = sign_extend(endpoints[endpoint][component],
809                                 mode->n_endpoint_bits);
810             endpoints[endpoint][component] =
811                signed_unquantize(value, mode->n_endpoint_bits);
812          }
813       }
814    } else {
815       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
816          for (component = 0; component < 3; component++) {
817             endpoints[endpoint][component] =
818                unsigned_unquantize(endpoints[endpoint][component],
819                                    mode->n_endpoint_bits);
820          }
821       }
822    }
823 
824    return bit_offset;
825 }
826 
827 static int32_t
finish_unsigned_unquantize(int32_t value)828 finish_unsigned_unquantize(int32_t value)
829 {
830    return value * 31 / 64;
831 }
832 
833 static int32_t
finish_signed_unquantize(int32_t value)834 finish_signed_unquantize(int32_t value)
835 {
836    if (value < 0)
837       return (-value * 31 / 32) | 0x8000;
838    else
839       return value * 31 / 32;
840 }
841 
842 static void
fetch_rgb_float_from_block(const uint8_t * block,float * result,int texel,bool is_signed)843 fetch_rgb_float_from_block(const uint8_t *block,
844                            float *result,
845                            int texel,
846                            bool is_signed)
847 {
848    int mode_num;
849    const struct bptc_float_mode *mode;
850    int bit_offset;
851    int partition_num;
852    int subset_num;
853    int index_bits;
854    int index;
855    int anchors_before_texel;
856    int32_t endpoints[2 * 2][3];
857    uint32_t subsets;
858    int n_subsets;
859    int component;
860    int32_t value;
861 
862    if (block[0] & 0x2) {
863       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
864       bit_offset = 5;
865    } else {
866       mode_num = block[0] & 3;
867       bit_offset = 2;
868    }
869 
870    mode = bptc_float_modes + mode_num;
871 
872    if (mode->reserved) {
873       memset(result, 0, sizeof result[0] * 3);
874       result[3] = 1.0f;
875       return;
876    }
877 
878    bit_offset = extract_float_endpoints(mode, block, bit_offset,
879                                         endpoints, is_signed);
880 
881    if (mode->n_partition_bits) {
882       partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
883       bit_offset += mode->n_partition_bits;
884 
885       subsets = partition_table1[partition_num];
886       n_subsets = 2;
887    } else {
888       partition_num = 0;
889       subsets = 0;
890       n_subsets = 1;
891    }
892 
893    anchors_before_texel =
894       count_anchors_before_texel(n_subsets, partition_num, texel);
895 
896    /* Calculate the offset to the primary index for this texel */
897    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
898 
899    subset_num = (subsets >> (texel * 2)) & 3;
900 
901    index_bits = mode->n_index_bits;
902    if (is_anchor(n_subsets, partition_num, texel))
903       index_bits--;
904    index = extract_bits(block, bit_offset, index_bits);
905 
906    for (component = 0; component < 3; component++) {
907       value = interpolate(endpoints[subset_num * 2][component],
908                           endpoints[subset_num * 2 + 1][component],
909                           index,
910                           mode->n_index_bits);
911 
912       if (is_signed)
913          value = finish_signed_unquantize(value);
914       else
915          value = finish_unsigned_unquantize(value);
916 
917       result[component] = _mesa_half_to_float(value);
918    }
919 
920    result[3] = 1.0f;
921 }
922 
923 static void
fetch_bptc_rgb_float(const GLubyte * map,GLint rowStride,GLint i,GLint j,GLfloat * texel,bool is_signed)924 fetch_bptc_rgb_float(const GLubyte *map,
925                      GLint rowStride, GLint i, GLint j,
926                      GLfloat *texel,
927                      bool is_signed)
928 {
929    const GLubyte *block;
930 
931    block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
932 
933    fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed);
934 }
935 
936 static void
fetch_bptc_rgb_signed_float(const GLubyte * map,GLint rowStride,GLint i,GLint j,GLfloat * texel)937 fetch_bptc_rgb_signed_float(const GLubyte *map,
938                             GLint rowStride, GLint i, GLint j,
939                             GLfloat *texel)
940 {
941    fetch_bptc_rgb_float(map, rowStride, i, j, texel, true);
942 }
943 
944 static void
fetch_bptc_rgb_unsigned_float(const GLubyte * map,GLint rowStride,GLint i,GLint j,GLfloat * texel)945 fetch_bptc_rgb_unsigned_float(const GLubyte *map,
946                               GLint rowStride, GLint i, GLint j,
947                               GLfloat *texel)
948 {
949    fetch_bptc_rgb_float(map, rowStride, i, j, texel, false);
950 }
951 
952 compressed_fetch_func
_mesa_get_bptc_fetch_func(mesa_format format)953 _mesa_get_bptc_fetch_func(mesa_format format)
954 {
955    switch (format) {
956    case MESA_FORMAT_BPTC_RGBA_UNORM:
957       return fetch_bptc_rgba_unorm;
958    case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM:
959       return fetch_bptc_srgb_alpha_unorm;
960    case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT:
961       return fetch_bptc_rgb_signed_float;
962    case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT:
963       return fetch_bptc_rgb_unsigned_float;
964    default:
965       return NULL;
966    }
967 }
968 
969 static void
write_bits(struct bit_writer * writer,int n_bits,int value)970 write_bits(struct bit_writer *writer, int n_bits, int value)
971 {
972    do {
973       if (n_bits + writer->pos >= 8) {
974          *(writer->dst++) = writer->buf | (value << writer->pos);
975          writer->buf = 0;
976          value >>= (8 - writer->pos);
977          n_bits -= (8 - writer->pos);
978          writer->pos = 0;
979       } else {
980          writer->buf |= value << writer->pos;
981          writer->pos += n_bits;
982          break;
983       }
984    } while (n_bits > 0);
985 }
986 
987 static void
get_average_luminance_alpha_unorm(int width,int height,const uint8_t * src,int src_rowstride,int * average_luminance,int * average_alpha)988 get_average_luminance_alpha_unorm(int width, int height,
989                                   const uint8_t *src, int src_rowstride,
990                                   int *average_luminance, int *average_alpha)
991 {
992    int luminance_sum = 0, alpha_sum = 0;
993    int y, x;
994 
995    for (y = 0; y < height; y++) {
996       for (x = 0; x < width; x++) {
997          luminance_sum += src[0] + src[1] + src[2];
998          alpha_sum += src[3];
999          src += 4;
1000       }
1001       src += src_rowstride - width * 4;
1002    }
1003 
1004    *average_luminance = luminance_sum / (width * height);
1005    *average_alpha = alpha_sum / (width * height);
1006 }
1007 
1008 static void
get_rgba_endpoints_unorm(int width,int height,const uint8_t * src,int src_rowstride,int average_luminance,int average_alpha,uint8_t endpoints[][4])1009 get_rgba_endpoints_unorm(int width, int height,
1010                          const uint8_t *src, int src_rowstride,
1011                          int average_luminance, int average_alpha,
1012                          uint8_t endpoints[][4])
1013 {
1014    int endpoint_luminances[2];
1015    int midpoint;
1016    int sums[2][4];
1017    int endpoint;
1018    int luminance;
1019    uint8_t temp[3];
1020    const uint8_t *p = src;
1021    int rgb_left_endpoint_count = 0;
1022    int alpha_left_endpoint_count = 0;
1023    int y, x, i;
1024 
1025    memset(sums, 0, sizeof sums);
1026 
1027    for (y = 0; y < height; y++) {
1028       for (x = 0; x < width; x++) {
1029          luminance = p[0] + p[1] + p[2];
1030          if (luminance < average_luminance) {
1031             endpoint = 0;
1032             rgb_left_endpoint_count++;
1033          } else {
1034             endpoint = 1;
1035          }
1036          for (i = 0; i < 3; i++)
1037             sums[endpoint][i] += p[i];
1038 
1039          if (p[2] < average_alpha) {
1040             endpoint = 0;
1041             alpha_left_endpoint_count++;
1042          } else {
1043             endpoint = 1;
1044          }
1045          sums[endpoint][3] += p[3];
1046 
1047          p += 4;
1048       }
1049 
1050       p += src_rowstride - width * 4;
1051    }
1052 
1053    if (rgb_left_endpoint_count == 0 ||
1054        rgb_left_endpoint_count == width * height) {
1055       for (i = 0; i < 3; i++)
1056          endpoints[0][i] = endpoints[1][i] =
1057             (sums[0][i] + sums[1][i]) / (width * height);
1058    } else {
1059       for (i = 0; i < 3; i++) {
1060          endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1061          endpoints[1][i] = (sums[1][i] /
1062                             (width * height - rgb_left_endpoint_count));
1063       }
1064    }
1065 
1066    if (alpha_left_endpoint_count == 0 ||
1067        alpha_left_endpoint_count == width * height) {
1068       endpoints[0][3] = endpoints[1][3] =
1069          (sums[0][3] + sums[1][3]) / (width * height);
1070    } else {
1071          endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1072          endpoints[1][3] = (sums[1][3] /
1073                             (width * height - alpha_left_endpoint_count));
1074    }
1075 
1076    /* We may need to swap the endpoints to ensure the most-significant bit of
1077     * the first index is zero */
1078 
1079    for (endpoint = 0; endpoint < 2; endpoint++) {
1080       endpoint_luminances[endpoint] =
1081          endpoints[endpoint][0] +
1082          endpoints[endpoint][1] +
1083          endpoints[endpoint][2];
1084    }
1085    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1086 
1087    if ((src[0] + src[1] + src[2] <= midpoint) !=
1088        (endpoint_luminances[0] <= midpoint)) {
1089       memcpy(temp, endpoints[0], 3);
1090       memcpy(endpoints[0], endpoints[1], 3);
1091       memcpy(endpoints[1], temp, 3);
1092    }
1093 
1094    /* Same for the alpha endpoints */
1095 
1096    midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1097 
1098    if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1099       temp[0] = endpoints[0][3];
1100       endpoints[0][3] = endpoints[1][3];
1101       endpoints[1][3] = temp[0];
1102    }
1103 }
1104 
1105 static void
write_rgb_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1106 write_rgb_indices_unorm(struct bit_writer *writer,
1107                         int src_width, int src_height,
1108                         const uint8_t *src, int src_rowstride,
1109                         uint8_t endpoints[][4])
1110 {
1111    int luminance;
1112    int endpoint_luminances[2];
1113    int endpoint;
1114    int index;
1115    int y, x;
1116 
1117    for (endpoint = 0; endpoint < 2; endpoint++) {
1118       endpoint_luminances[endpoint] =
1119          endpoints[endpoint][0] +
1120          endpoints[endpoint][1] +
1121          endpoints[endpoint][2];
1122    }
1123 
1124    /* If the endpoints have the same luminance then we'll just use index 0 for
1125     * all of the texels */
1126    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1127       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1128       return;
1129    }
1130 
1131    for (y = 0; y < src_height; y++) {
1132       for (x = 0; x < src_width; x++) {
1133          luminance = src[0] + src[1] + src[2];
1134 
1135          index = ((luminance - endpoint_luminances[0]) * 3 /
1136                   (endpoint_luminances[1] - endpoint_luminances[0]));
1137          if (index < 0)
1138             index = 0;
1139          else if (index > 3)
1140             index = 3;
1141 
1142          assert(x != 0 || y != 0 || index < 2);
1143 
1144          write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1145 
1146          src += 4;
1147       }
1148 
1149       /* Pad the indices out to the block size */
1150       if (src_width < BLOCK_SIZE)
1151          write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1152 
1153       src += src_rowstride - src_width * 4;
1154    }
1155 
1156    /* Pad the indices out to the block size */
1157    if (src_height < BLOCK_SIZE)
1158       write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1159 }
1160 
1161 static void
write_alpha_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1162 write_alpha_indices_unorm(struct bit_writer *writer,
1163                           int src_width, int src_height,
1164                           const uint8_t *src, int src_rowstride,
1165                           uint8_t endpoints[][4])
1166 {
1167    int index;
1168    int y, x;
1169 
1170    /* If the endpoints have the same alpha then we'll just use index 0 for
1171     * all of the texels */
1172    if (endpoints[0][3] == endpoints[1][3]) {
1173       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1174       return;
1175    }
1176 
1177    for (y = 0; y < src_height; y++) {
1178       for (x = 0; x < src_width; x++) {
1179          index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1180                   ((int) endpoints[1][3] - endpoints[0][3]));
1181          if (index < 0)
1182             index = 0;
1183          else if (index > 7)
1184             index = 7;
1185 
1186          assert(x != 0 || y != 0 || index < 4);
1187 
1188          /* The first index has one less bit */
1189          write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1190 
1191          src += 4;
1192       }
1193 
1194       /* Pad the indices out to the block size */
1195       if (src_width < BLOCK_SIZE)
1196          write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1197 
1198       src += src_rowstride - src_width * 4;
1199    }
1200 
1201    /* Pad the indices out to the block size */
1202    if (src_height < BLOCK_SIZE)
1203       write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1204 }
1205 
1206 static void
compress_rgba_unorm_block(int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t * dst)1207 compress_rgba_unorm_block(int src_width, int src_height,
1208                           const uint8_t *src, int src_rowstride,
1209                           uint8_t *dst)
1210 {
1211    int average_luminance, average_alpha;
1212    uint8_t endpoints[2][4];
1213    struct bit_writer writer;
1214    int component, endpoint;
1215 
1216    get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1217                                      &average_luminance, &average_alpha);
1218    get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1219                             average_luminance, average_alpha,
1220                             endpoints);
1221 
1222    writer.dst = dst;
1223    writer.pos = 0;
1224    writer.buf = 0;
1225 
1226    write_bits(&writer, 5, 0x10); /* mode 4 */
1227    write_bits(&writer, 2, 0); /* rotation 0 */
1228    write_bits(&writer, 1, 0); /* index selection bit */
1229 
1230    /* Write the color endpoints */
1231    for (component = 0; component < 3; component++)
1232       for (endpoint = 0; endpoint < 2; endpoint++)
1233          write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1234 
1235    /* Write the alpha endpoints */
1236    for (endpoint = 0; endpoint < 2; endpoint++)
1237       write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1238 
1239    write_rgb_indices_unorm(&writer,
1240                            src_width, src_height,
1241                            src, src_rowstride,
1242                            endpoints);
1243    write_alpha_indices_unorm(&writer,
1244                              src_width, src_height,
1245                              src, src_rowstride,
1246                              endpoints);
1247 }
1248 
1249 static void
compress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)1250 compress_rgba_unorm(int width, int height,
1251                     const uint8_t *src, int src_rowstride,
1252                     uint8_t *dst, int dst_rowstride)
1253 {
1254    int dst_row_diff;
1255    int y, x;
1256 
1257    if (dst_rowstride >= width * 4)
1258       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1259    else
1260       dst_row_diff = 0;
1261 
1262    for (y = 0; y < height; y += BLOCK_SIZE) {
1263       for (x = 0; x < width; x += BLOCK_SIZE) {
1264          compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1265                                    MIN2(height - y, BLOCK_SIZE),
1266                                    src + x * 4 + y * src_rowstride,
1267                                    src_rowstride,
1268                                    dst);
1269          dst += BLOCK_BYTES;
1270       }
1271       dst += dst_row_diff;
1272    }
1273 }
1274 
1275 GLboolean
_mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)1276 _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
1277 {
1278    const GLubyte *pixels;
1279    const GLubyte *tempImage = NULL;
1280    int rowstride;
1281 
1282    if (srcFormat != GL_RGBA ||
1283        srcType != GL_UNSIGNED_BYTE ||
1284        ctx->_ImageTransferState ||
1285        srcPacking->SwapBytes) {
1286       /* convert image to RGBA/ubyte */
1287       GLubyte *tempImageSlices[1];
1288       int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
1289       tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
1290       if (!tempImage)
1291          return GL_FALSE; /* out of memory */
1292       tempImageSlices[0] = (GLubyte *) tempImage;
1293       _mesa_texstore(ctx, dims,
1294                      baseInternalFormat,
1295                      _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
1296                                            : MESA_FORMAT_A8B8G8R8_UNORM,
1297                      rgbaRowStride, tempImageSlices,
1298                      srcWidth, srcHeight, srcDepth,
1299                      srcFormat, srcType, srcAddr,
1300                      srcPacking);
1301 
1302       pixels = tempImage;
1303       rowstride = srcWidth * 4;
1304    } else {
1305       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
1306                                      srcFormat, srcType, 0, 0);
1307       rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
1308                                          srcFormat, srcType);
1309    }
1310 
1311    compress_rgba_unorm(srcWidth, srcHeight,
1312                        pixels, rowstride,
1313                        dstSlices[0], dstRowStride);
1314 
1315    free((void *) tempImage);
1316 
1317    return GL_TRUE;
1318 }
1319 
1320 static float
get_average_luminance_float(int width,int height,const float * src,int src_rowstride)1321 get_average_luminance_float(int width, int height,
1322                             const float *src, int src_rowstride)
1323 {
1324    float luminance_sum = 0;
1325    int y, x;
1326 
1327    for (y = 0; y < height; y++) {
1328       for (x = 0; x < width; x++) {
1329          luminance_sum += src[0] + src[1] + src[2];
1330          src += 3;
1331       }
1332       src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1333    }
1334 
1335    return luminance_sum / (width * height);
1336 }
1337 
1338 static float
clamp_value(float value,bool is_signed)1339 clamp_value(float value, bool is_signed)
1340 {
1341    if (value > 65504.0f)
1342       return 65504.0f;
1343 
1344    if (is_signed) {
1345       if (value < -65504.0f)
1346          return -65504.0f;
1347       else
1348          return value;
1349    }
1350 
1351    if (value < 0.0f)
1352       return 0.0f;
1353 
1354    return value;
1355 }
1356 
1357 static void
get_endpoints_float(int width,int height,const float * src,int src_rowstride,float average_luminance,float endpoints[][3],bool is_signed)1358 get_endpoints_float(int width, int height,
1359                     const float *src, int src_rowstride,
1360                     float average_luminance, float endpoints[][3],
1361                     bool is_signed)
1362 {
1363    float endpoint_luminances[2];
1364    float midpoint;
1365    float sums[2][3];
1366    int endpoint, component;
1367    float luminance;
1368    float temp[3];
1369    const float *p = src;
1370    int left_endpoint_count = 0;
1371    int y, x, i;
1372 
1373    memset(sums, 0, sizeof sums);
1374 
1375    for (y = 0; y < height; y++) {
1376       for (x = 0; x < width; x++) {
1377          luminance = p[0] + p[1] + p[2];
1378          if (luminance < average_luminance) {
1379             endpoint = 0;
1380             left_endpoint_count++;
1381          } else {
1382             endpoint = 1;
1383          }
1384          for (i = 0; i < 3; i++)
1385             sums[endpoint][i] += p[i];
1386 
1387          p += 3;
1388       }
1389 
1390       p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1391    }
1392 
1393    if (left_endpoint_count == 0 ||
1394        left_endpoint_count == width * height) {
1395       for (i = 0; i < 3; i++)
1396          endpoints[0][i] = endpoints[1][i] =
1397             (sums[0][i] + sums[1][i]) / (width * height);
1398    } else {
1399       for (i = 0; i < 3; i++) {
1400          endpoints[0][i] = sums[0][i] / left_endpoint_count;
1401          endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1402       }
1403    }
1404 
1405    /* Clamp the endpoints to the range of a half float and strip out
1406     * infinities */
1407    for (endpoint = 0; endpoint < 2; endpoint++) {
1408       for (component = 0; component < 3; component++) {
1409          endpoints[endpoint][component] =
1410             clamp_value(endpoints[endpoint][component], is_signed);
1411       }
1412    }
1413 
1414    /* We may need to swap the endpoints to ensure the most-significant bit of
1415     * the first index is zero */
1416 
1417    for (endpoint = 0; endpoint < 2; endpoint++) {
1418       endpoint_luminances[endpoint] =
1419          endpoints[endpoint][0] +
1420          endpoints[endpoint][1] +
1421          endpoints[endpoint][2];
1422    }
1423    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1424 
1425    if ((src[0] + src[1] + src[2] <= midpoint) !=
1426        (endpoint_luminances[0] <= midpoint)) {
1427       memcpy(temp, endpoints[0], sizeof temp);
1428       memcpy(endpoints[0], endpoints[1], sizeof temp);
1429       memcpy(endpoints[1], temp, sizeof temp);
1430    }
1431 }
1432 
1433 static void
write_rgb_indices_float(struct bit_writer * writer,int src_width,int src_height,const float * src,int src_rowstride,float endpoints[][3])1434 write_rgb_indices_float(struct bit_writer *writer,
1435                         int src_width, int src_height,
1436                         const float *src, int src_rowstride,
1437                         float endpoints[][3])
1438 {
1439    float luminance;
1440    float endpoint_luminances[2];
1441    int endpoint;
1442    int index;
1443    int y, x;
1444 
1445    for (endpoint = 0; endpoint < 2; endpoint++) {
1446       endpoint_luminances[endpoint] =
1447          endpoints[endpoint][0] +
1448          endpoints[endpoint][1] +
1449          endpoints[endpoint][2];
1450    }
1451 
1452    /* If the endpoints have the same luminance then we'll just use index 0 for
1453     * all of the texels */
1454    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1455       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1456       return;
1457    }
1458 
1459    for (y = 0; y < src_height; y++) {
1460       for (x = 0; x < src_width; x++) {
1461          luminance = src[0] + src[1] + src[2];
1462 
1463          index = ((luminance - endpoint_luminances[0]) * 15 /
1464                   (endpoint_luminances[1] - endpoint_luminances[0]));
1465          if (index < 0)
1466             index = 0;
1467          else if (index > 15)
1468             index = 15;
1469 
1470          assert(x != 0 || y != 0 || index < 8);
1471 
1472          write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1473 
1474          src += 3;
1475       }
1476 
1477       /* Pad the indices out to the block size */
1478       if (src_width < BLOCK_SIZE)
1479          write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1480 
1481       src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1482    }
1483 
1484    /* Pad the indices out to the block size */
1485    if (src_height < BLOCK_SIZE)
1486       write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1487 }
1488 
1489 static int
get_endpoint_value(float value,bool is_signed)1490 get_endpoint_value(float value, bool is_signed)
1491 {
1492    bool sign = false;
1493    int half;
1494 
1495    if (is_signed) {
1496       half = _mesa_float_to_half(value);
1497 
1498       if (half & 0x8000) {
1499          half &= 0x7fff;
1500          sign = true;
1501       }
1502 
1503       half = (32 * half / 31) >> 6;
1504 
1505       if (sign)
1506          half = -half & ((1 << 10) - 1);
1507 
1508       return half;
1509    } else {
1510       if (value <= 0.0f)
1511          return 0;
1512 
1513       half = _mesa_float_to_half(value);
1514 
1515       return (64 * half / 31) >> 6;
1516    }
1517 }
1518 
1519 static void
compress_rgb_float_block(int src_width,int src_height,const float * src,int src_rowstride,uint8_t * dst,bool is_signed)1520 compress_rgb_float_block(int src_width, int src_height,
1521                          const float *src, int src_rowstride,
1522                          uint8_t *dst,
1523                          bool is_signed)
1524 {
1525    float average_luminance;
1526    float endpoints[2][3];
1527    struct bit_writer writer;
1528    int component, endpoint;
1529    int endpoint_value;
1530 
1531    average_luminance =
1532       get_average_luminance_float(src_width, src_height, src, src_rowstride);
1533    get_endpoints_float(src_width, src_height, src, src_rowstride,
1534                        average_luminance, endpoints, is_signed);
1535 
1536    writer.dst = dst;
1537    writer.pos = 0;
1538    writer.buf = 0;
1539 
1540    write_bits(&writer, 5, 3); /* mode 3 */
1541 
1542    /* Write the endpoints */
1543    for (endpoint = 0; endpoint < 2; endpoint++) {
1544       for (component = 0; component < 3; component++) {
1545          endpoint_value =
1546             get_endpoint_value(endpoints[endpoint][component], is_signed);
1547          write_bits(&writer, 10, endpoint_value);
1548       }
1549    }
1550 
1551    write_rgb_indices_float(&writer,
1552                            src_width, src_height,
1553                            src, src_rowstride,
1554                            endpoints);
1555 }
1556 
1557 static void
compress_rgb_float(int width,int height,const float * src,int src_rowstride,uint8_t * dst,int dst_rowstride,bool is_signed)1558 compress_rgb_float(int width, int height,
1559                    const float *src, int src_rowstride,
1560                    uint8_t *dst, int dst_rowstride,
1561                    bool is_signed)
1562 {
1563    int dst_row_diff;
1564    int y, x;
1565 
1566    if (dst_rowstride >= width * 4)
1567       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1568    else
1569       dst_row_diff = 0;
1570 
1571    for (y = 0; y < height; y += BLOCK_SIZE) {
1572       for (x = 0; x < width; x += BLOCK_SIZE) {
1573          compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1574                                   MIN2(height - y, BLOCK_SIZE),
1575                                   src + x * 3 +
1576                                   y * src_rowstride / sizeof (float),
1577                                   src_rowstride,
1578                                   dst,
1579                                   is_signed);
1580          dst += BLOCK_BYTES;
1581       }
1582       dst += dst_row_diff;
1583    }
1584 }
1585 
1586 static GLboolean
texstore_bptc_rgb_float(TEXSTORE_PARAMS,bool is_signed)1587 texstore_bptc_rgb_float(TEXSTORE_PARAMS,
1588                         bool is_signed)
1589 {
1590    const float *pixels;
1591    const float *tempImage = NULL;
1592    int rowstride;
1593 
1594    if (srcFormat != GL_RGB ||
1595        srcType != GL_FLOAT ||
1596        ctx->_ImageTransferState ||
1597        srcPacking->SwapBytes) {
1598       /* convert image to RGB/float */
1599       GLfloat *tempImageSlices[1];
1600       int rgbRowStride = 3 * srcWidth * sizeof(GLfloat);
1601       tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLfloat));
1602       if (!tempImage)
1603          return GL_FALSE; /* out of memory */
1604       tempImageSlices[0] = (GLfloat *) tempImage;
1605       _mesa_texstore(ctx, dims,
1606                      baseInternalFormat,
1607                      MESA_FORMAT_RGB_FLOAT32,
1608                      rgbRowStride, (GLubyte **)tempImageSlices,
1609                      srcWidth, srcHeight, srcDepth,
1610                      srcFormat, srcType, srcAddr,
1611                      srcPacking);
1612 
1613       pixels = tempImage;
1614       rowstride = srcWidth * sizeof(float) * 3;
1615    } else {
1616       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
1617                                      srcFormat, srcType, 0, 0);
1618       rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
1619                                          srcFormat, srcType);
1620    }
1621 
1622    compress_rgb_float(srcWidth, srcHeight,
1623                       pixels, rowstride,
1624                       dstSlices[0], dstRowStride,
1625                       is_signed);
1626 
1627    free((void *) tempImage);
1628 
1629    return GL_TRUE;
1630 }
1631 
1632 GLboolean
_mesa_texstore_bptc_rgb_signed_float(TEXSTORE_PARAMS)1633 _mesa_texstore_bptc_rgb_signed_float(TEXSTORE_PARAMS)
1634 {
1635    assert(dstFormat == MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT);
1636 
1637    return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
1638                                   dstFormat, dstRowStride, dstSlices,
1639                                   srcWidth, srcHeight, srcDepth,
1640                                   srcFormat, srcType,
1641                                   srcAddr, srcPacking,
1642                                   true /* signed */);
1643 }
1644 
1645 GLboolean
_mesa_texstore_bptc_rgb_unsigned_float(TEXSTORE_PARAMS)1646 _mesa_texstore_bptc_rgb_unsigned_float(TEXSTORE_PARAMS)
1647 {
1648    assert(dstFormat == MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT);
1649 
1650    return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
1651                                   dstFormat, dstRowStride, dstSlices,
1652                                   srcWidth, srcHeight, srcDepth,
1653                                   srcFormat, srcType,
1654                                   srcAddr, srcPacking,
1655                                   false /* unsigned */);
1656 }
1657