1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_builder.h"
25 
26 #include "util/format_rgb9e5.h"
27 
28 static inline nir_ssa_def *
nir_shift(nir_builder * b,nir_ssa_def * value,int left_shift)29 nir_shift(nir_builder *b, nir_ssa_def *value, int left_shift)
30 {
31    if (left_shift > 0)
32       return nir_ishl(b, value, nir_imm_int(b, left_shift));
33    else if (left_shift < 0)
34       return nir_ushr(b, value, nir_imm_int(b, -left_shift));
35    else
36       return value;
37 }
38 
39 static inline nir_ssa_def *
nir_mask_shift(struct nir_builder * b,nir_ssa_def * src,uint32_t mask,int left_shift)40 nir_mask_shift(struct nir_builder *b, nir_ssa_def *src,
41                uint32_t mask, int left_shift)
42 {
43    return nir_shift(b, nir_iand(b, src, nir_imm_int(b, mask)), left_shift);
44 }
45 
46 static inline nir_ssa_def *
nir_mask_shift_or(struct nir_builder * b,nir_ssa_def * dst,nir_ssa_def * src,uint32_t src_mask,int src_left_shift)47 nir_mask_shift_or(struct nir_builder *b, nir_ssa_def *dst, nir_ssa_def *src,
48                   uint32_t src_mask, int src_left_shift)
49 {
50    return nir_ior(b, nir_mask_shift(b, src, src_mask, src_left_shift), dst);
51 }
52 
53 static inline nir_ssa_def *
nir_format_mask_uvec(nir_builder * b,nir_ssa_def * src,const unsigned * bits)54 nir_format_mask_uvec(nir_builder *b, nir_ssa_def *src, const unsigned *bits)
55 {
56    nir_const_value mask[NIR_MAX_VEC_COMPONENTS];
57    memset(mask, 0, sizeof(mask));
58    for (unsigned i = 0; i < src->num_components; i++) {
59       assert(bits[i] < 32);
60       mask[i].u32 = (1u << bits[i]) - 1;
61    }
62    return nir_iand(b, src, nir_build_imm(b, src->num_components, 32, mask));
63 }
64 
65 static inline nir_ssa_def *
nir_format_sign_extend_ivec(nir_builder * b,nir_ssa_def * src,const unsigned * bits)66 nir_format_sign_extend_ivec(nir_builder *b, nir_ssa_def *src,
67                             const unsigned *bits)
68 {
69    assert(src->num_components <= 4);
70    nir_ssa_def *comps[4];
71    for (unsigned i = 0; i < src->num_components; i++) {
72       nir_ssa_def *shift = nir_imm_int(b, src->bit_size - bits[i]);
73       comps[i] = nir_ishr(b, nir_ishl(b, nir_channel(b, src, i), shift), shift);
74    }
75    return nir_vec(b, comps, src->num_components);
76 }
77 
78 
79 static inline nir_ssa_def *
nir_format_unpack_int(nir_builder * b,nir_ssa_def * packed,const unsigned * bits,unsigned num_components,bool sign_extend)80 nir_format_unpack_int(nir_builder *b, nir_ssa_def *packed,
81                       const unsigned *bits, unsigned num_components,
82                       bool sign_extend)
83 {
84    assert(num_components >= 1 && num_components <= 4);
85    const unsigned bit_size = packed->bit_size;
86    nir_ssa_def *comps[4];
87 
88    if (bits[0] >= bit_size) {
89       assert(bits[0] == bit_size);
90       assert(num_components == 1);
91       return packed;
92    }
93 
94    unsigned next_chan = 0;
95    unsigned offset = 0;
96    for (unsigned i = 0; i < num_components; i++) {
97       assert(bits[i] < bit_size);
98       assert(offset + bits[i] <= bit_size);
99       nir_ssa_def *chan = nir_channel(b, packed, next_chan);
100       nir_ssa_def *lshift = nir_imm_int(b, bit_size - (offset + bits[i]));
101       nir_ssa_def *rshift = nir_imm_int(b, bit_size - bits[i]);
102       if (sign_extend)
103          comps[i] = nir_ishr(b, nir_ishl(b, chan, lshift), rshift);
104       else
105          comps[i] = nir_ushr(b, nir_ishl(b, chan, lshift), rshift);
106       offset += bits[i];
107       if (offset >= bit_size) {
108          next_chan++;
109          offset -= bit_size;
110       }
111    }
112 
113    return nir_vec(b, comps, num_components);
114 }
115 
116 static inline nir_ssa_def *
nir_format_unpack_uint(nir_builder * b,nir_ssa_def * packed,const unsigned * bits,unsigned num_components)117 nir_format_unpack_uint(nir_builder *b, nir_ssa_def *packed,
118                        const unsigned *bits, unsigned num_components)
119 {
120    return nir_format_unpack_int(b, packed, bits, num_components, false);
121 }
122 
123 static inline nir_ssa_def *
nir_format_unpack_sint(nir_builder * b,nir_ssa_def * packed,const unsigned * bits,unsigned num_components)124 nir_format_unpack_sint(nir_builder *b, nir_ssa_def *packed,
125                        const unsigned *bits, unsigned num_components)
126 {
127    return nir_format_unpack_int(b, packed, bits, num_components, true);
128 }
129 
130 static inline nir_ssa_def *
nir_format_pack_uint_unmasked(nir_builder * b,nir_ssa_def * color,const unsigned * bits,unsigned num_components)131 nir_format_pack_uint_unmasked(nir_builder *b, nir_ssa_def *color,
132                               const unsigned *bits, unsigned num_components)
133 {
134    assert(num_components >= 1 && num_components <= 4);
135    nir_ssa_def *packed = nir_imm_int(b, 0);
136    unsigned offset = 0;
137    for (unsigned i = 0; i < num_components; i++) {
138       packed = nir_ior(b, packed, nir_shift(b, nir_channel(b, color, i),
139                                                offset));
140       offset += bits[i];
141    }
142    assert(offset <= packed->bit_size);
143 
144    return packed;
145 }
146 
147 static inline nir_ssa_def *
nir_format_pack_uint(nir_builder * b,nir_ssa_def * color,const unsigned * bits,unsigned num_components)148 nir_format_pack_uint(nir_builder *b, nir_ssa_def *color,
149                      const unsigned *bits, unsigned num_components)
150 {
151    return nir_format_pack_uint_unmasked(b, nir_format_mask_uvec(b, color, bits),
152                                         bits, num_components);
153 }
154 
155 static inline nir_ssa_def *
nir_format_bitcast_uvec_unmasked(nir_builder * b,nir_ssa_def * src,unsigned src_bits,unsigned dst_bits)156 nir_format_bitcast_uvec_unmasked(nir_builder *b, nir_ssa_def *src,
157                                  unsigned src_bits, unsigned dst_bits)
158 {
159    assert(src->bit_size >= src_bits && src->bit_size >= dst_bits);
160    assert(src_bits == 8 || src_bits == 16 || src_bits == 32);
161    assert(dst_bits == 8 || dst_bits == 16 || dst_bits == 32);
162 
163    if (src_bits == dst_bits)
164       return src;
165 
166    const unsigned dst_components =
167       DIV_ROUND_UP(src->num_components * src_bits, dst_bits);
168    assert(dst_components <= 4);
169 
170    nir_ssa_def *dst_chan[4] = {0};
171    if (dst_bits > src_bits) {
172       unsigned shift = 0;
173       unsigned dst_idx = 0;
174       for (unsigned i = 0; i < src->num_components; i++) {
175          nir_ssa_def *shifted = nir_ishl(b, nir_channel(b, src, i),
176                                             nir_imm_int(b, shift));
177          if (shift == 0) {
178             dst_chan[dst_idx] = shifted;
179          } else {
180             dst_chan[dst_idx] = nir_ior(b, dst_chan[dst_idx], shifted);
181          }
182 
183          shift += src_bits;
184          if (shift >= dst_bits) {
185             dst_idx++;
186             shift = 0;
187          }
188       }
189    } else {
190       nir_ssa_def *mask = nir_imm_int(b, ~0u >> (32 - dst_bits));
191 
192       unsigned src_idx = 0;
193       unsigned shift = 0;
194       for (unsigned i = 0; i < dst_components; i++) {
195          dst_chan[i] = nir_iand(b, nir_ushr_imm(b, nir_channel(b, src, src_idx),
196                                                 shift),
197                                    mask);
198          shift += dst_bits;
199          if (shift >= src_bits) {
200             src_idx++;
201             shift = 0;
202          }
203       }
204    }
205 
206    return nir_vec(b, dst_chan, dst_components);
207 }
208 
209 static inline nir_ssa_def *
_nir_format_norm_factor(nir_builder * b,const unsigned * bits,unsigned num_components,bool is_signed)210 _nir_format_norm_factor(nir_builder *b, const unsigned *bits,
211                         unsigned num_components,
212                         bool is_signed)
213 {
214    nir_const_value factor[NIR_MAX_VEC_COMPONENTS];
215    memset(factor, 0, sizeof(factor));
216    for (unsigned i = 0; i < num_components; i++) {
217       assert(bits[i] <= 32);
218       factor[i].f32 = (1ull << (bits[i] - is_signed)) - 1;
219    }
220    return nir_build_imm(b, num_components, 32, factor);
221 }
222 
223 static inline nir_ssa_def *
nir_format_unorm_to_float(nir_builder * b,nir_ssa_def * u,const unsigned * bits)224 nir_format_unorm_to_float(nir_builder *b, nir_ssa_def *u, const unsigned *bits)
225 {
226    nir_ssa_def *factor =
227       _nir_format_norm_factor(b, bits, u->num_components, false);
228 
229    return nir_fdiv(b, nir_u2f32(b, u), factor);
230 }
231 
232 static inline nir_ssa_def *
nir_format_snorm_to_float(nir_builder * b,nir_ssa_def * s,const unsigned * bits)233 nir_format_snorm_to_float(nir_builder *b, nir_ssa_def *s, const unsigned *bits)
234 {
235    nir_ssa_def *factor =
236       _nir_format_norm_factor(b, bits, s->num_components, true);
237 
238    return nir_fmax(b, nir_fdiv(b, nir_i2f32(b, s), factor),
239                       nir_imm_float(b, -1.0f));
240 }
241 
242 static inline nir_ssa_def *
nir_format_float_to_unorm(nir_builder * b,nir_ssa_def * f,const unsigned * bits)243 nir_format_float_to_unorm(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
244 {
245    nir_ssa_def *factor =
246       _nir_format_norm_factor(b, bits, f->num_components, false);
247 
248    /* Clamp to the range [0, 1] */
249    f = nir_fsat(b, f);
250 
251    return nir_f2u32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
252 }
253 
254 static inline nir_ssa_def *
nir_format_float_to_snorm(nir_builder * b,nir_ssa_def * f,const unsigned * bits)255 nir_format_float_to_snorm(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
256 {
257    nir_ssa_def *factor =
258       _nir_format_norm_factor(b, bits, f->num_components, true);
259 
260    /* Clamp to the range [-1, 1] */
261    f = nir_fmin(b, nir_fmax(b, f, nir_imm_float(b, -1)), nir_imm_float(b, 1));
262 
263    return nir_f2i32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
264 }
265 
266 /* Converts a vector of floats to a vector of half-floats packed in the low 16
267  * bits.
268  */
269 static inline nir_ssa_def *
nir_format_float_to_half(nir_builder * b,nir_ssa_def * f)270 nir_format_float_to_half(nir_builder *b, nir_ssa_def *f)
271 {
272    nir_ssa_def *zero = nir_imm_float(b, 0);
273    nir_ssa_def *f16comps[4];
274    for (unsigned i = 0; i < f->num_components; i++)
275       f16comps[i] = nir_pack_half_2x16_split(b, nir_channel(b, f, i), zero);
276    return nir_vec(b, f16comps, f->num_components);
277 }
278 
279 static inline nir_ssa_def *
nir_format_linear_to_srgb(nir_builder * b,nir_ssa_def * c)280 nir_format_linear_to_srgb(nir_builder *b, nir_ssa_def *c)
281 {
282    nir_ssa_def *linear = nir_fmul(b, c, nir_imm_float(b, 12.92f));
283    nir_ssa_def *curved =
284       nir_fsub(b, nir_fmul(b, nir_imm_float(b, 1.055f),
285                               nir_fpow(b, c, nir_imm_float(b, 1.0 / 2.4))),
286                   nir_imm_float(b, 0.055f));
287 
288    return nir_fsat(b, nir_bcsel(b, nir_flt(b, c, nir_imm_float(b, 0.0031308f)),
289                                    linear, curved));
290 }
291 
292 static inline nir_ssa_def *
nir_format_srgb_to_linear(nir_builder * b,nir_ssa_def * c)293 nir_format_srgb_to_linear(nir_builder *b, nir_ssa_def *c)
294 {
295    nir_ssa_def *linear = nir_fdiv(b, c, nir_imm_float(b, 12.92f));
296    nir_ssa_def *curved =
297       nir_fpow(b, nir_fdiv(b, nir_fadd(b, c, nir_imm_float(b, 0.055f)),
298                               nir_imm_float(b, 1.055f)),
299                   nir_imm_float(b, 2.4f));
300 
301    return nir_fsat(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 0.04045f), c),
302                                    linear, curved));
303 }
304 
305 /* Clamps a vector of uints so they don't extend beyond the given number of
306  * bits per channel.
307  */
308 static inline nir_ssa_def *
nir_format_clamp_uint(nir_builder * b,nir_ssa_def * f,const unsigned * bits)309 nir_format_clamp_uint(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
310 {
311    if (bits[0] == 32)
312       return f;
313 
314    nir_const_value max[NIR_MAX_VEC_COMPONENTS];
315    memset(max, 0, sizeof(max));
316    for (unsigned i = 0; i < f->num_components; i++) {
317       assert(bits[i] < 32);
318       max[i].u32 = (1 << bits[i]) - 1;
319    }
320    return nir_umin(b, f, nir_build_imm(b, f->num_components, 32, max));
321 }
322 
323 /* Clamps a vector of sints so they don't extend beyond the given number of
324  * bits per channel.
325  */
326 static inline nir_ssa_def *
nir_format_clamp_sint(nir_builder * b,nir_ssa_def * f,const unsigned * bits)327 nir_format_clamp_sint(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
328 {
329    if (bits[0] == 32)
330       return f;
331 
332    nir_const_value min[NIR_MAX_VEC_COMPONENTS], max[NIR_MAX_VEC_COMPONENTS];
333    memset(min, 0, sizeof(min));
334    memset(max, 0, sizeof(max));
335    for (unsigned i = 0; i < f->num_components; i++) {
336       assert(bits[i] < 32);
337       max[i].i32 = (1 << (bits[i] - 1)) - 1;
338       min[i].i32 = -(1 << (bits[i] - 1));
339    }
340    f = nir_imin(b, f, nir_build_imm(b, f->num_components, 32, max));
341    f = nir_imax(b, f, nir_build_imm(b, f->num_components, 32, min));
342 
343    return f;
344 }
345 
346 static inline nir_ssa_def *
nir_format_unpack_11f11f10f(nir_builder * b,nir_ssa_def * packed)347 nir_format_unpack_11f11f10f(nir_builder *b, nir_ssa_def *packed)
348 {
349    nir_ssa_def *chans[3];
350    chans[0] = nir_mask_shift(b, packed, 0x000007ff, 4);
351    chans[1] = nir_mask_shift(b, packed, 0x003ff800, -7);
352    chans[2] = nir_mask_shift(b, packed, 0xffc00000, -17);
353 
354    for (unsigned i = 0; i < 3; i++)
355       chans[i] = nir_unpack_half_2x16_split_x(b, chans[i]);
356 
357    return nir_vec(b, chans, 3);
358 }
359 
360 static inline nir_ssa_def *
nir_format_pack_11f11f10f(nir_builder * b,nir_ssa_def * color)361 nir_format_pack_11f11f10f(nir_builder *b, nir_ssa_def *color)
362 {
363    /* 10 and 11-bit floats are unsigned.  Clamp to non-negative */
364    nir_ssa_def *clamped = nir_fmax(b, color, nir_imm_float(b, 0));
365 
366    nir_ssa_def *undef = nir_ssa_undef(b, 1, color->bit_size);
367    nir_ssa_def *p1 = nir_pack_half_2x16_split(b, nir_channel(b, clamped, 0),
368                                                  nir_channel(b, clamped, 1));
369    nir_ssa_def *p2 = nir_pack_half_2x16_split(b, nir_channel(b, clamped, 2),
370                                                  undef);
371 
372    /* A 10 or 11-bit float has the same exponent as a 16-bit float but with
373     * fewer mantissa bits and no sign bit.  All we have to do is throw away
374     * the sign bit and the bottom mantissa bits and shift it into place.
375     */
376    nir_ssa_def *packed = nir_imm_int(b, 0);
377    packed = nir_mask_shift_or(b, packed, p1, 0x00007ff0, -4);
378    packed = nir_mask_shift_or(b, packed, p1, 0x7ff00000, -9);
379    packed = nir_mask_shift_or(b, packed, p2, 0x00007fe0, 17);
380 
381    return packed;
382 }
383 
384 static inline nir_ssa_def *
nir_format_pack_r9g9b9e5(nir_builder * b,nir_ssa_def * color)385 nir_format_pack_r9g9b9e5(nir_builder *b, nir_ssa_def *color)
386 {
387    /* See also float3_to_rgb9e5 */
388 
389    /* First, we need to clamp it to range. */
390    nir_ssa_def *clamped = nir_fmin(b, color, nir_imm_float(b, MAX_RGB9E5));
391 
392    /* Get rid of negatives and NaN */
393    clamped = nir_bcsel(b, nir_ult(b, nir_imm_int(b, 0x7f800000), color),
394                           nir_imm_float(b, 0), clamped);
395 
396    /* maxrgb.u = MAX3(rc.u, gc.u, bc.u); */
397    nir_ssa_def *maxu = nir_umax(b, nir_channel(b, clamped, 0),
398                        nir_umax(b, nir_channel(b, clamped, 1),
399                                    nir_channel(b, clamped, 2)));
400 
401    /* maxrgb.u += maxrgb.u & (1 << (23-9)); */
402    maxu = nir_iadd(b, maxu, nir_iand(b, maxu, nir_imm_int(b, 1 << 14)));
403 
404    /* exp_shared = MAX2((maxrgb.u >> 23), -RGB9E5_EXP_BIAS - 1 + 127) +
405     *              1 + RGB9E5_EXP_BIAS - 127;
406     */
407    nir_ssa_def *exp_shared =
408       nir_iadd(b, nir_umax(b, nir_ushr_imm(b, maxu, 23),
409                               nir_imm_int(b, -RGB9E5_EXP_BIAS - 1 + 127)),
410                   nir_imm_int(b, 1 + RGB9E5_EXP_BIAS - 127));
411 
412    /* revdenom_biasedexp = 127 - (exp_shared - RGB9E5_EXP_BIAS -
413     *                             RGB9E5_MANTISSA_BITS) + 1;
414     */
415    nir_ssa_def *revdenom_biasedexp =
416       nir_isub(b, nir_imm_int(b, 127 + RGB9E5_EXP_BIAS +
417                                  RGB9E5_MANTISSA_BITS + 1),
418                   exp_shared);
419 
420    /* revdenom.u = revdenom_biasedexp << 23; */
421    nir_ssa_def *revdenom =
422       nir_ishl(b, revdenom_biasedexp, nir_imm_int(b, 23));
423 
424    /* rm = (int) (rc.f * revdenom.f);
425     * gm = (int) (gc.f * revdenom.f);
426     * bm = (int) (bc.f * revdenom.f);
427     */
428    nir_ssa_def *mantissa =
429       nir_f2i32(b, nir_fmul(b, clamped, revdenom));
430 
431    /* rm = (rm & 1) + (rm >> 1);
432     * gm = (gm & 1) + (gm >> 1);
433     * bm = (bm & 1) + (bm >> 1);
434     */
435    mantissa = nir_iadd(b, nir_iand_imm(b, mantissa, 1),
436                           nir_ushr_imm(b, mantissa, 1));
437 
438    nir_ssa_def *packed = nir_channel(b, mantissa, 0);
439    packed = nir_mask_shift_or(b, packed, nir_channel(b, mantissa, 1), ~0, 9);
440    packed = nir_mask_shift_or(b, packed, nir_channel(b, mantissa, 2), ~0, 18);
441    packed = nir_mask_shift_or(b, packed, exp_shared, ~0, 27);
442 
443    return packed;
444 }
445