1 /*
2  * Copyright © 2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /*
25  * This lowering pass supports (as configured via nir_lower_tex_options)
26  * various texture related conversions:
27  *   + texture projector lowering: converts the coordinate division for
28  *     texture projection to be done in ALU instructions instead of
29  *     asking the texture operation to do so.
30  *   + lowering RECT: converts the un-normalized RECT texture coordinates
31  *     to normalized coordinates with txs plus ALU instructions
32  *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33  *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
34  *     Note that this automatically triggers texture projector lowering if
35  *     needed, since clamping must happen after projector lowering.
36  */
37 
38 #include "nir.h"
39 #include "nir_builder.h"
40 #include "nir_builtin_builder.h"
41 #include "nir_format_convert.h"
42 
43 static float bt601_csc_coeffs[9] = {
44    1.16438356f,  1.16438356f, 1.16438356f,
45    0.0f,        -0.39176229f, 2.01723214f,
46    1.59602678f, -0.81296764f, 0.0f,
47 };
48 static float bt709_csc_coeffs[9] = {
49    1.16438356f,  1.16438356f, 1.16438356f,
50    0.0f       , -0.21324861f, 2.11240179f,
51    1.79274107f, -0.53290933f, 0.0f,
52 };
53 static float bt2020_csc_coeffs[9] = {
54    1.16438356f,  1.16438356f, 1.16438356f,
55    0.0f       , -0.18732610f, 2.14177232f,
56    1.67867411f, -0.65042432f, 0.0f,
57 };
58 
59 static float bt601_csc_offsets[3] = {
60    -0.874202218f, 0.531667823f, -1.085630789f
61 };
62 static float bt709_csc_offsets[3] = {
63    -0.972945075f, 0.301482665f, -1.133402218f
64 };
65 static float bt2020_csc_offsets[3] = {
66    -0.915687932f, 0.347458499f, -1.148145075f
67 };
68 
69 static bool
project_src(nir_builder * b,nir_tex_instr * tex)70 project_src(nir_builder *b, nir_tex_instr *tex)
71 {
72    /* Find the projector in the srcs list, if present. */
73    int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
74    if (proj_index < 0)
75       return false;
76 
77    b->cursor = nir_before_instr(&tex->instr);
78 
79    nir_ssa_def *inv_proj =
80       nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
81 
82    /* Walk through the sources projecting the arguments. */
83    for (unsigned i = 0; i < tex->num_srcs; i++) {
84       switch (tex->src[i].src_type) {
85       case nir_tex_src_coord:
86       case nir_tex_src_comparator:
87          break;
88       default:
89          continue;
90       }
91       nir_ssa_def *unprojected =
92          nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
93       nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
94 
95       /* Array indices don't get projected, so make an new vector with the
96        * coordinate's array index untouched.
97        */
98       if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
99          switch (tex->coord_components) {
100          case 4:
101             projected = nir_vec4(b,
102                                  nir_channel(b, projected, 0),
103                                  nir_channel(b, projected, 1),
104                                  nir_channel(b, projected, 2),
105                                  nir_channel(b, unprojected, 3));
106             break;
107          case 3:
108             projected = nir_vec3(b,
109                                  nir_channel(b, projected, 0),
110                                  nir_channel(b, projected, 1),
111                                  nir_channel(b, unprojected, 2));
112             break;
113          case 2:
114             projected = nir_vec2(b,
115                                  nir_channel(b, projected, 0),
116                                  nir_channel(b, unprojected, 1));
117             break;
118          default:
119             unreachable("bad texture coord count for array");
120             break;
121          }
122       }
123 
124       nir_instr_rewrite_src(&tex->instr,
125                             &tex->src[i].src,
126                             nir_src_for_ssa(projected));
127    }
128 
129    nir_tex_instr_remove_src(tex, proj_index);
130    return true;
131 }
132 
133 static bool
lower_offset(nir_builder * b,nir_tex_instr * tex)134 lower_offset(nir_builder *b, nir_tex_instr *tex)
135 {
136    int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
137    if (offset_index < 0)
138       return false;
139 
140    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
141    assert(coord_index >= 0);
142 
143    assert(tex->src[offset_index].src.is_ssa);
144    assert(tex->src[coord_index].src.is_ssa);
145    nir_ssa_def *offset = tex->src[offset_index].src.ssa;
146    nir_ssa_def *coord = tex->src[coord_index].src.ssa;
147 
148    b->cursor = nir_before_instr(&tex->instr);
149 
150    nir_ssa_def *offset_coord;
151    if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
152       if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
153          offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
154       } else {
155          nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
156          nir_ssa_def *scale = nir_frcp(b, txs);
157 
158          offset_coord = nir_fadd(b, coord,
159                                  nir_fmul(b,
160                                           nir_i2f32(b, offset),
161                                           scale));
162       }
163    } else {
164       offset_coord = nir_iadd(b, coord, offset);
165    }
166 
167    if (tex->is_array) {
168       /* The offset is not applied to the array index */
169       if (tex->coord_components == 2) {
170          offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
171                                     nir_channel(b, coord, 1));
172       } else if (tex->coord_components == 3) {
173          offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
174                                     nir_channel(b, offset_coord, 1),
175                                     nir_channel(b, coord, 2));
176       } else {
177          unreachable("Invalid number of components");
178       }
179    }
180 
181    nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
182                          nir_src_for_ssa(offset_coord));
183 
184    nir_tex_instr_remove_src(tex, offset_index);
185 
186    return true;
187 }
188 
189 static void
lower_rect(nir_builder * b,nir_tex_instr * tex)190 lower_rect(nir_builder *b, nir_tex_instr *tex)
191 {
192    /* Set the sampler_dim to 2D here so that get_texture_size picks up the
193     * right dimensionality.
194     */
195    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
196 
197    nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
198    nir_ssa_def *scale = nir_frcp(b, txs);
199 
200    /* Walk through the sources normalizing the requested arguments. */
201    for (unsigned i = 0; i < tex->num_srcs; i++) {
202       if (tex->src[i].src_type != nir_tex_src_coord)
203          continue;
204 
205       nir_ssa_def *coords =
206          nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
207       nir_instr_rewrite_src(&tex->instr,
208                             &tex->src[i].src,
209                             nir_src_for_ssa(nir_fmul(b, coords, scale)));
210    }
211 }
212 
213 static void
lower_implicit_lod(nir_builder * b,nir_tex_instr * tex)214 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
215 {
216    assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
217    assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
218    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
219    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
220 
221    b->cursor = nir_before_instr(&tex->instr);
222 
223    nir_ssa_def *lod = nir_get_texture_lod(b, tex);
224 
225    int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
226    if (bias_idx >= 0) {
227       /* If we have a bias, add it in */
228       lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
229       nir_tex_instr_remove_src(tex, bias_idx);
230    }
231 
232    int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
233    if (min_lod_idx >= 0) {
234       /* If we have a minimum LOD, clamp LOD accordingly */
235       lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
236       nir_tex_instr_remove_src(tex, min_lod_idx);
237    }
238 
239    nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
240    tex->op = nir_texop_txl;
241 }
242 
243 static nir_ssa_def *
sample_plane(nir_builder * b,nir_tex_instr * tex,int plane,const nir_lower_tex_options * options)244 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
245              const nir_lower_tex_options *options)
246 {
247    assert(tex->dest.is_ssa);
248    assert(nir_tex_instr_dest_size(tex) == 4);
249    assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
250    assert(tex->op == nir_texop_tex);
251    assert(tex->coord_components == 2);
252 
253    nir_tex_instr *plane_tex =
254       nir_tex_instr_create(b->shader, tex->num_srcs + 1);
255    for (unsigned i = 0; i < tex->num_srcs; i++) {
256       nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src, plane_tex);
257       plane_tex->src[i].src_type = tex->src[i].src_type;
258    }
259    plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
260    plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
261    plane_tex->op = nir_texop_tex;
262    plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
263    plane_tex->dest_type = nir_type_float;
264    plane_tex->coord_components = 2;
265 
266    plane_tex->texture_index = tex->texture_index;
267    plane_tex->sampler_index = tex->sampler_index;
268 
269    nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4,
270          nir_dest_bit_size(tex->dest), NULL);
271 
272    nir_builder_instr_insert(b, &plane_tex->instr);
273 
274    /* If scaling_factor is set, return a scaled value. */
275    if (options->scale_factors[tex->texture_index])
276       return nir_fmul_imm(b, &plane_tex->dest.ssa,
277                           options->scale_factors[tex->texture_index]);
278 
279    return &plane_tex->dest.ssa;
280 }
281 
282 static void
convert_yuv_to_rgb(nir_builder * b,nir_tex_instr * tex,nir_ssa_def * y,nir_ssa_def * u,nir_ssa_def * v,nir_ssa_def * a,const nir_lower_tex_options * options)283 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
284                    nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
285                    nir_ssa_def *a,
286                    const nir_lower_tex_options *options)
287 {
288 
289    float *offset_vals;
290    float *m_vals;
291    assert((options->bt709_external & options->bt2020_external) == 0);
292    if (options->bt709_external & (1 << tex->texture_index)) {
293       m_vals = bt709_csc_coeffs;
294       offset_vals = bt709_csc_offsets;
295    } else if (options->bt2020_external & (1 << tex->texture_index)) {
296       m_vals = bt2020_csc_coeffs;
297       offset_vals = bt2020_csc_offsets;
298    } else {
299       m_vals = bt601_csc_coeffs;
300       offset_vals = bt601_csc_offsets;
301    }
302 
303    nir_const_value m[3][4] = {
304       { { .f32 = m_vals[0] }, { .f32 =  m_vals[1] }, { .f32 = m_vals[2] }, { .f32 = 0.0f } },
305       { { .f32 = m_vals[3] }, { .f32 =  m_vals[4] }, { .f32 = m_vals[5] }, { .f32 = 0.0f } },
306       { { .f32 = m_vals[6] }, { .f32 =  m_vals[7] }, { .f32 = m_vals[8] }, { .f32 = 0.0f } },
307    };
308    unsigned bit_size = nir_dest_bit_size(tex->dest);
309 
310    nir_ssa_def *offset =
311       nir_vec4(b,
312                nir_imm_float(b, offset_vals[0]),
313                nir_imm_float(b, offset_vals[1]),
314                nir_imm_float(b, offset_vals[2]),
315                a);
316 
317    offset = nir_f2fN(b, offset, bit_size);
318 
319    nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[0]), bit_size);
320    nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[1]), bit_size);
321    nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[2]), bit_size);
322 
323    nir_ssa_def *result =
324       nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
325 
326    nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
327 }
328 
329 static void
lower_y_uv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)330 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
331                     const nir_lower_tex_options *options)
332 {
333    b->cursor = nir_after_instr(&tex->instr);
334 
335    nir_ssa_def *y = sample_plane(b, tex, 0, options);
336    nir_ssa_def *uv = sample_plane(b, tex, 1, options);
337 
338    convert_yuv_to_rgb(b, tex,
339                       nir_channel(b, y, 0),
340                       nir_channel(b, uv, 0),
341                       nir_channel(b, uv, 1),
342                       nir_imm_float(b, 1.0f),
343                       options);
344 }
345 
346 static void
lower_y_u_v_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)347 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
348                      const nir_lower_tex_options *options)
349 {
350    b->cursor = nir_after_instr(&tex->instr);
351 
352    nir_ssa_def *y = sample_plane(b, tex, 0, options);
353    nir_ssa_def *u = sample_plane(b, tex, 1, options);
354    nir_ssa_def *v = sample_plane(b, tex, 2, options);
355 
356    convert_yuv_to_rgb(b, tex,
357                       nir_channel(b, y, 0),
358                       nir_channel(b, u, 0),
359                       nir_channel(b, v, 0),
360                       nir_imm_float(b, 1.0f),
361                       options);
362 }
363 
364 static void
lower_yx_xuxv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)365 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
366                        const nir_lower_tex_options *options)
367 {
368    b->cursor = nir_after_instr(&tex->instr);
369 
370    nir_ssa_def *y = sample_plane(b, tex, 0, options);
371    nir_ssa_def *xuxv = sample_plane(b, tex, 1, options);
372 
373    convert_yuv_to_rgb(b, tex,
374                       nir_channel(b, y, 0),
375                       nir_channel(b, xuxv, 1),
376                       nir_channel(b, xuxv, 3),
377                       nir_imm_float(b, 1.0f),
378                       options);
379 }
380 
381 static void
lower_xy_uxvx_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)382 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
383                        const nir_lower_tex_options *options)
384 {
385   b->cursor = nir_after_instr(&tex->instr);
386 
387   nir_ssa_def *y = sample_plane(b, tex, 0, options);
388   nir_ssa_def *uxvx = sample_plane(b, tex, 1, options);
389 
390   convert_yuv_to_rgb(b, tex,
391                      nir_channel(b, y, 1),
392                      nir_channel(b, uxvx, 0),
393                      nir_channel(b, uxvx, 2),
394                      nir_imm_float(b, 1.0f),
395                      options);
396 }
397 
398 static void
lower_ayuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)399 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
400                     const nir_lower_tex_options *options)
401 {
402   b->cursor = nir_after_instr(&tex->instr);
403 
404   nir_ssa_def *ayuv = sample_plane(b, tex, 0, options);
405 
406   convert_yuv_to_rgb(b, tex,
407                      nir_channel(b, ayuv, 2),
408                      nir_channel(b, ayuv, 1),
409                      nir_channel(b, ayuv, 0),
410                      nir_channel(b, ayuv, 3),
411                      options);
412 }
413 
414 static void
lower_xyuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)415 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
416                     const nir_lower_tex_options *options)
417 {
418   b->cursor = nir_after_instr(&tex->instr);
419 
420   nir_ssa_def *xyuv = sample_plane(b, tex, 0, options);
421 
422   convert_yuv_to_rgb(b, tex,
423                      nir_channel(b, xyuv, 2),
424                      nir_channel(b, xyuv, 1),
425                      nir_channel(b, xyuv, 0),
426                      nir_imm_float(b, 1.0f),
427                      options);
428 }
429 
430 static void
lower_yuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)431 lower_yuv_external(nir_builder *b, nir_tex_instr *tex,
432                    const nir_lower_tex_options *options)
433 {
434   b->cursor = nir_after_instr(&tex->instr);
435 
436   nir_ssa_def *yuv = sample_plane(b, tex, 0, options);
437 
438   convert_yuv_to_rgb(b, tex,
439                      nir_channel(b, yuv, 0),
440                      nir_channel(b, yuv, 1),
441                      nir_channel(b, yuv, 2),
442                      nir_imm_float(b, 1.0f),
443                      options);
444 }
445 
446 /*
447  * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
448  * computed from the gradients.
449  */
450 static void
replace_gradient_with_lod(nir_builder * b,nir_ssa_def * lod,nir_tex_instr * tex)451 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
452 {
453    assert(tex->op == nir_texop_txd);
454 
455    nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
456    nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
457 
458    int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
459    if (min_lod_idx >= 0) {
460       /* If we have a minimum LOD, clamp LOD accordingly */
461       lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
462       nir_tex_instr_remove_src(tex, min_lod_idx);
463    }
464 
465    nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
466    tex->op = nir_texop_txl;
467 }
468 
469 static void
lower_gradient_cube_map(nir_builder * b,nir_tex_instr * tex)470 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
471 {
472    assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
473    assert(tex->op == nir_texop_txd);
474    assert(tex->dest.is_ssa);
475 
476    /* Use textureSize() to get the width and height of LOD 0 */
477    nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
478 
479    /* Cubemap texture lookups first generate a texture coordinate normalized
480     * to [-1, 1] on the appropiate face. The appropiate face is determined
481     * by which component has largest magnitude and its sign. The texture
482     * coordinate is the quotient of the remaining texture coordinates against
483     * that absolute value of the component of largest magnitude. This
484     * division requires that the computing of the derivative of the texel
485     * coordinate must use the quotient rule. The high level GLSL code is as
486     * follows:
487     *
488     * Step 1: selection
489     *
490     * vec3 abs_p, Q, dQdx, dQdy;
491     * abs_p = abs(ir->coordinate);
492     * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
493     *    Q = ir->coordinate.yzx;
494     *    dQdx = ir->lod_info.grad.dPdx.yzx;
495     *    dQdy = ir->lod_info.grad.dPdy.yzx;
496     * }
497     * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
498     *    Q = ir->coordinate.xzy;
499     *    dQdx = ir->lod_info.grad.dPdx.xzy;
500     *    dQdy = ir->lod_info.grad.dPdy.xzy;
501     * }
502     * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
503     *    Q = ir->coordinate;
504     *    dQdx = ir->lod_info.grad.dPdx;
505     *    dQdy = ir->lod_info.grad.dPdy;
506     * }
507     *
508     * Step 2: use quotient rule to compute derivative. The normalized to
509     * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
510     * only concerned with the magnitudes of the derivatives whose values are
511     * not affected by the sign. We drop the sign from the computation.
512     *
513     * vec2 dx, dy;
514     * float recip;
515     *
516     * recip = 1.0 / Q.z;
517     * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
518     * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
519     *
520     * Step 3: compute LOD. At this point we have the derivatives of the
521     * texture coordinates normalized to [-1,1]. We take the LOD to be
522     *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
523     *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
524     *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
525     *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
526     *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
527     * where L is the dimension of the cubemap. The code is:
528     *
529     * float M, result;
530     * M = max(dot(dx, dx), dot(dy, dy));
531     * L = textureSize(sampler, 0).x;
532     * result = -1.0 + 0.5 * log2(L * L * M);
533     */
534 
535    /* coordinate */
536    nir_ssa_def *p =
537       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
538 
539    /* unmodified dPdx, dPdy values */
540    nir_ssa_def *dPdx =
541       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
542    nir_ssa_def *dPdy =
543       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
544 
545    nir_ssa_def *abs_p = nir_fabs(b, p);
546    nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
547    nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
548    nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
549 
550    /* 1. compute selector */
551    nir_ssa_def *Q, *dQdx, *dQdy;
552 
553    nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
554    nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
555 
556    unsigned yzx[3] = { 1, 2, 0 };
557    unsigned xzy[3] = { 0, 2, 1 };
558 
559    Q = nir_bcsel(b, cond_z,
560                  p,
561                  nir_bcsel(b, cond_y,
562                            nir_swizzle(b, p, xzy, 3),
563                            nir_swizzle(b, p, yzx, 3)));
564 
565    dQdx = nir_bcsel(b, cond_z,
566                     dPdx,
567                     nir_bcsel(b, cond_y,
568                               nir_swizzle(b, dPdx, xzy, 3),
569                               nir_swizzle(b, dPdx, yzx, 3)));
570 
571    dQdy = nir_bcsel(b, cond_z,
572                     dPdy,
573                     nir_bcsel(b, cond_y,
574                               nir_swizzle(b, dPdy, xzy, 3),
575                               nir_swizzle(b, dPdy, yzx, 3)));
576 
577    /* 2. quotient rule */
578 
579    /* tmp = Q.xy * recip;
580     * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
581     * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
582     */
583    nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
584 
585    nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
586    nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
587 
588    nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
589    nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
590    nir_ssa_def *dx =
591       nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
592 
593    nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
594    nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
595    nir_ssa_def *dy =
596       nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
597 
598    /* M = max(dot(dx, dx), dot(dy, dy)); */
599    nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
600 
601    /* size has textureSize() of LOD 0 */
602    nir_ssa_def *L = nir_channel(b, size, 0);
603 
604    /* lod = -1.0 + 0.5 * log2(L * L * M); */
605    nir_ssa_def *lod =
606       nir_fadd(b,
607                nir_imm_float(b, -1.0f),
608                nir_fmul(b,
609                         nir_imm_float(b, 0.5f),
610                         nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
611 
612    /* 3. Replace the gradient instruction with an equivalent lod instruction */
613    replace_gradient_with_lod(b, lod, tex);
614 }
615 
616 static void
lower_gradient(nir_builder * b,nir_tex_instr * tex)617 lower_gradient(nir_builder *b, nir_tex_instr *tex)
618 {
619    /* Cubes are more complicated and have their own function */
620    if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
621       lower_gradient_cube_map(b, tex);
622       return;
623    }
624 
625    assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
626    assert(tex->op == nir_texop_txd);
627    assert(tex->dest.is_ssa);
628 
629    /* Use textureSize() to get the width and height of LOD 0 */
630    unsigned component_mask;
631    switch (tex->sampler_dim) {
632    case GLSL_SAMPLER_DIM_3D:
633       component_mask = 7;
634       break;
635    case GLSL_SAMPLER_DIM_1D:
636       component_mask = 1;
637       break;
638    default:
639       component_mask = 3;
640       break;
641    }
642 
643    nir_ssa_def *size =
644       nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
645                       component_mask);
646 
647    /* Scale the gradients by width and height.  Effectively, the incoming
648     * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
649     * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
650     */
651    nir_ssa_def *ddx =
652       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
653    nir_ssa_def *ddy =
654       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
655 
656    nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
657    nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
658 
659    nir_ssa_def *rho;
660    if (dPdx->num_components == 1) {
661       rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
662    } else {
663       rho = nir_fmax(b,
664                      nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
665                      nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
666    }
667 
668    /* lod = log2(rho).  We're ignoring GL state biases for now. */
669    nir_ssa_def *lod = nir_flog2(b, rho);
670 
671    /* Replace the gradient instruction with an equivalent lod instruction */
672    replace_gradient_with_lod(b, lod, tex);
673 }
674 
675 static void
saturate_src(nir_builder * b,nir_tex_instr * tex,unsigned sat_mask)676 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
677 {
678    b->cursor = nir_before_instr(&tex->instr);
679 
680    /* Walk through the sources saturating the requested arguments. */
681    for (unsigned i = 0; i < tex->num_srcs; i++) {
682       if (tex->src[i].src_type != nir_tex_src_coord)
683          continue;
684 
685       nir_ssa_def *src =
686          nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
687 
688       /* split src into components: */
689       nir_ssa_def *comp[4];
690 
691       assume(tex->coord_components >= 1);
692 
693       for (unsigned j = 0; j < tex->coord_components; j++)
694          comp[j] = nir_channel(b, src, j);
695 
696       /* clamp requested components, array index does not get clamped: */
697       unsigned ncomp = tex->coord_components;
698       if (tex->is_array)
699          ncomp--;
700 
701       for (unsigned j = 0; j < ncomp; j++) {
702          if ((1 << j) & sat_mask) {
703             if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
704                /* non-normalized texture coords, so clamp to texture
705                 * size rather than [0.0, 1.0]
706                 */
707                nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
708                comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
709                comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
710             } else {
711                comp[j] = nir_fsat(b, comp[j]);
712             }
713          }
714       }
715 
716       /* and move the result back into a single vecN: */
717       src = nir_vec(b, comp, tex->coord_components);
718 
719       nir_instr_rewrite_src(&tex->instr,
720                             &tex->src[i].src,
721                             nir_src_for_ssa(src));
722    }
723 }
724 
725 static nir_ssa_def *
get_zero_or_one(nir_builder * b,nir_alu_type type,uint8_t swizzle_val)726 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
727 {
728    nir_const_value v[4];
729 
730    memset(&v, 0, sizeof(v));
731 
732    if (swizzle_val == 4) {
733       v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
734    } else {
735       assert(swizzle_val == 5);
736       if (type == nir_type_float)
737          v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
738       else
739          v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
740    }
741 
742    return nir_build_imm(b, 4, 32, v);
743 }
744 
745 static void
swizzle_tg4_broadcom(nir_builder * b,nir_tex_instr * tex)746 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
747 {
748    assert(tex->dest.is_ssa);
749 
750    b->cursor = nir_after_instr(&tex->instr);
751 
752    assert(nir_tex_instr_dest_size(tex) == 4);
753    unsigned swiz[4] = { 2, 3, 1, 0 };
754    nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
755 
756    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
757                                   swizzled->parent_instr);
758 }
759 
760 static void
swizzle_result(nir_builder * b,nir_tex_instr * tex,const uint8_t swizzle[4])761 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
762 {
763    assert(tex->dest.is_ssa);
764 
765    b->cursor = nir_after_instr(&tex->instr);
766 
767    nir_ssa_def *swizzled;
768    if (tex->op == nir_texop_tg4) {
769       if (swizzle[tex->component] < 4) {
770          /* This one's easy */
771          tex->component = swizzle[tex->component];
772          return;
773       } else {
774          swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
775       }
776    } else {
777       assert(nir_tex_instr_dest_size(tex) == 4);
778       if (swizzle[0] < 4 && swizzle[1] < 4 &&
779           swizzle[2] < 4 && swizzle[3] < 4) {
780          unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
781          /* We have no 0s or 1s, just emit a swizzling MOV */
782          swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
783       } else {
784          nir_ssa_def *srcs[4];
785          for (unsigned i = 0; i < 4; i++) {
786             if (swizzle[i] < 4) {
787                srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
788             } else {
789                srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
790             }
791          }
792          swizzled = nir_vec(b, srcs, 4);
793       }
794    }
795 
796    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
797                                   swizzled->parent_instr);
798 }
799 
800 static void
linearize_srgb_result(nir_builder * b,nir_tex_instr * tex)801 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
802 {
803    assert(tex->dest.is_ssa);
804    assert(nir_tex_instr_dest_size(tex) == 4);
805    assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
806 
807    b->cursor = nir_after_instr(&tex->instr);
808 
809    nir_ssa_def *rgb =
810       nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
811 
812    /* alpha is untouched: */
813    nir_ssa_def *result = nir_vec4(b,
814                                   nir_channel(b, rgb, 0),
815                                   nir_channel(b, rgb, 1),
816                                   nir_channel(b, rgb, 2),
817                                   nir_channel(b, &tex->dest.ssa, 3));
818 
819    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
820                                   result->parent_instr);
821 }
822 
823 /**
824  * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
825  * i16, or u16, or a single unorm4x8 value.
826  *
827  * Note that we don't change the destination num_components, because
828  * nir_tex_instr_dest_size() will still return 4.  The driver is just expected
829  * to not store the other channels, given that nothing at the NIR level will
830  * read them.
831  */
832 static void
lower_tex_packing(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)833 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
834                   const nir_lower_tex_options *options)
835 {
836    nir_ssa_def *color = &tex->dest.ssa;
837 
838    b->cursor = nir_after_instr(&tex->instr);
839 
840    switch (options->lower_tex_packing[tex->sampler_index]) {
841    case nir_lower_tex_packing_none:
842       return;
843 
844    case nir_lower_tex_packing_16: {
845       static const unsigned bits[4] = {16, 16, 16, 16};
846 
847       switch (nir_alu_type_get_base_type(tex->dest_type)) {
848       case nir_type_float:
849          switch (nir_tex_instr_dest_size(tex)) {
850          case 1:
851             assert(tex->is_shadow && tex->is_new_style_shadow);
852             color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
853             break;
854          case 2: {
855             nir_ssa_def *rg = nir_channel(b, color, 0);
856             color = nir_vec2(b,
857                              nir_unpack_half_2x16_split_x(b, rg),
858                              nir_unpack_half_2x16_split_y(b, rg));
859             break;
860          }
861          case 4: {
862             nir_ssa_def *rg = nir_channel(b, color, 0);
863             nir_ssa_def *ba = nir_channel(b, color, 1);
864             color = nir_vec4(b,
865                              nir_unpack_half_2x16_split_x(b, rg),
866                              nir_unpack_half_2x16_split_y(b, rg),
867                              nir_unpack_half_2x16_split_x(b, ba),
868                              nir_unpack_half_2x16_split_y(b, ba));
869             break;
870          }
871          default:
872             unreachable("wrong dest_size");
873          }
874          break;
875 
876       case nir_type_int:
877          color = nir_format_unpack_sint(b, color, bits, 4);
878          break;
879 
880       case nir_type_uint:
881          color = nir_format_unpack_uint(b, color, bits, 4);
882          break;
883 
884       default:
885          unreachable("unknown base type");
886       }
887       break;
888    }
889 
890    case nir_lower_tex_packing_8:
891       assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
892       color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
893       break;
894    }
895 
896    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(color),
897                                   color->parent_instr);
898 }
899 
900 static bool
sampler_index_lt(nir_tex_instr * tex,unsigned max)901 sampler_index_lt(nir_tex_instr *tex, unsigned max)
902 {
903    assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
904 
905    unsigned sampler_index = tex->sampler_index;
906 
907    int sampler_offset_idx =
908       nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
909    if (sampler_offset_idx >= 0) {
910       if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
911          return false;
912 
913       sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
914    }
915 
916    return sampler_index < max;
917 }
918 
919 static bool
lower_tg4_offsets(nir_builder * b,nir_tex_instr * tex)920 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
921 {
922    assert(tex->op == nir_texop_tg4);
923    assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
924    assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
925 
926    b->cursor = nir_after_instr(&tex->instr);
927 
928    nir_ssa_def *dest[4];
929    for (unsigned i = 0; i < 4; ++i) {
930       nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
931       tex_copy->op = tex->op;
932       tex_copy->coord_components = tex->coord_components;
933       tex_copy->sampler_dim = tex->sampler_dim;
934       tex_copy->is_array = tex->is_array;
935       tex_copy->is_shadow = tex->is_shadow;
936       tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
937       tex_copy->component = tex->component;
938       tex_copy->dest_type = tex->dest_type;
939 
940       for (unsigned j = 0; j < tex->num_srcs; ++j) {
941          nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src, tex_copy);
942          tex_copy->src[j].src_type = tex->src[j].src_type;
943       }
944 
945       nir_tex_src src;
946       src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0],
947                                                  tex->tg4_offsets[i][1]));
948       src.src_type = nir_tex_src_offset;
949       tex_copy->src[tex_copy->num_srcs - 1] = src;
950 
951       nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest,
952                         nir_tex_instr_dest_size(tex), 32, NULL);
953 
954       nir_builder_instr_insert(b, &tex_copy->instr);
955 
956       dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3);
957    }
958 
959    nir_ssa_def *res = nir_vec4(b, dest[0], dest[1], dest[2], dest[3]);
960    nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(res));
961    nir_instr_remove(&tex->instr);
962 
963    return true;
964 }
965 
966 static bool
nir_lower_txs_lod(nir_builder * b,nir_tex_instr * tex)967 nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
968 {
969    int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
970    if (lod_idx < 0 ||
971        (nir_src_is_const(tex->src[lod_idx].src) &&
972         nir_src_as_int(tex->src[lod_idx].src) == 0))
973       return false;
974 
975    unsigned dest_size = nir_tex_instr_dest_size(tex);
976 
977    b->cursor = nir_before_instr(&tex->instr);
978    nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1);
979 
980    /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
981    nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src,
982                          nir_src_for_ssa(nir_imm_int(b, 0)));
983 
984    /* TXS(LOD) = max(TXS(0) >> LOD, 1) */
985    b->cursor = nir_after_instr(&tex->instr);
986    nir_ssa_def *minified = nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod),
987                                     nir_imm_int(b, 1));
988 
989    /* Make sure the component encoding the array size (if any) is not
990     * minified.
991     */
992    if (tex->is_array) {
993       nir_ssa_def *comp[3];
994 
995       assert(dest_size <= ARRAY_SIZE(comp));
996       for (unsigned i = 0; i < dest_size - 1; i++)
997          comp[i] = nir_channel(b, minified, i);
998 
999       comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1);
1000       minified = nir_vec(b, comp, dest_size);
1001    }
1002 
1003    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(minified),
1004                                   minified->parent_instr);
1005    return true;
1006 }
1007 
1008 static bool
nir_lower_tex_block(nir_block * block,nir_builder * b,const nir_lower_tex_options * options)1009 nir_lower_tex_block(nir_block *block, nir_builder *b,
1010                     const nir_lower_tex_options *options)
1011 {
1012    bool progress = false;
1013 
1014    nir_foreach_instr_safe(instr, block) {
1015       if (instr->type != nir_instr_type_tex)
1016          continue;
1017 
1018       nir_tex_instr *tex = nir_instr_as_tex(instr);
1019       bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
1020 
1021       /* mask of src coords to saturate (clamp): */
1022       unsigned sat_mask = 0;
1023 
1024       if ((1 << tex->sampler_index) & options->saturate_r)
1025          sat_mask |= (1 << 2);    /* .z */
1026       if ((1 << tex->sampler_index) & options->saturate_t)
1027          sat_mask |= (1 << 1);    /* .y */
1028       if ((1 << tex->sampler_index) & options->saturate_s)
1029          sat_mask |= (1 << 0);    /* .x */
1030 
1031       /* If we are clamping any coords, we must lower projector first
1032        * as clamping happens *after* projection:
1033        */
1034       if (lower_txp || sat_mask) {
1035          progress |= project_src(b, tex);
1036       }
1037 
1038       if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1039           (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1040           (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1041            options->lower_rect_offset)) {
1042          progress = lower_offset(b, tex) || progress;
1043       }
1044 
1045       if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
1046           tex->op != nir_texop_txf && !nir_tex_instr_is_query(tex)) {
1047          lower_rect(b, tex);
1048          progress = true;
1049       }
1050 
1051       if ((1 << tex->texture_index) & options->lower_y_uv_external) {
1052          lower_y_uv_external(b, tex, options);
1053          progress = true;
1054       }
1055 
1056       if ((1 << tex->texture_index) & options->lower_y_u_v_external) {
1057          lower_y_u_v_external(b, tex, options);
1058          progress = true;
1059       }
1060 
1061       if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) {
1062          lower_yx_xuxv_external(b, tex, options);
1063          progress = true;
1064       }
1065 
1066       if ((1 << tex->texture_index) & options->lower_xy_uxvx_external) {
1067          lower_xy_uxvx_external(b, tex, options);
1068          progress = true;
1069       }
1070 
1071       if ((1 << tex->texture_index) & options->lower_ayuv_external) {
1072          lower_ayuv_external(b, tex, options);
1073          progress = true;
1074       }
1075 
1076       if ((1 << tex->texture_index) & options->lower_xyuv_external) {
1077          lower_xyuv_external(b, tex, options);
1078          progress = true;
1079       }
1080 
1081       if ((1 << tex->texture_index) & options->lower_yuv_external) {
1082          lower_yuv_external(b, tex, options);
1083          progress = true;
1084       }
1085 
1086       if (sat_mask) {
1087          saturate_src(b, tex, sat_mask);
1088          progress = true;
1089       }
1090 
1091       if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1092          swizzle_tg4_broadcom(b, tex);
1093          progress = true;
1094       }
1095 
1096       if (((1 << tex->texture_index) & options->swizzle_result) &&
1097           !nir_tex_instr_is_query(tex) &&
1098           !(tex->is_shadow && tex->is_new_style_shadow)) {
1099          swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1100          progress = true;
1101       }
1102 
1103       /* should be after swizzle so we know which channels are rgb: */
1104       if (((1 << tex->texture_index) & options->lower_srgb) &&
1105           !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1106          linearize_srgb_result(b, tex);
1107          progress = true;
1108       }
1109 
1110       const bool has_min_lod =
1111          nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1112       const bool has_offset =
1113          nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1114 
1115       if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1116           options->lower_txb_shadow_clamp) {
1117          lower_implicit_lod(b, tex);
1118          progress = true;
1119       }
1120 
1121       if (options->lower_tex_packing[tex->sampler_index] !=
1122           nir_lower_tex_packing_none &&
1123           tex->op != nir_texop_txs &&
1124           tex->op != nir_texop_query_levels &&
1125           tex->op != nir_texop_texture_samples) {
1126          lower_tex_packing(b, tex, options);
1127          progress = true;
1128       }
1129 
1130       if (tex->op == nir_texop_txd &&
1131           (options->lower_txd ||
1132            (options->lower_txd_shadow && tex->is_shadow) ||
1133            (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1134            (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1135            (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1136             nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1137            (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1138             has_min_lod && !sampler_index_lt(tex, 16)) ||
1139            (options->lower_txd_cube_map &&
1140             tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1141            (options->lower_txd_3d &&
1142             tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
1143          lower_gradient(b, tex);
1144          progress = true;
1145          continue;
1146       }
1147 
1148       bool shader_supports_implicit_lod =
1149          b->shader->info.stage == MESA_SHADER_FRAGMENT ||
1150          (b->shader->info.stage == MESA_SHADER_COMPUTE &&
1151           b->shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE);
1152 
1153       /* TXF, TXS and TXL require a LOD but not everything we implement using those
1154        * three opcodes provides one.  Provide a default LOD of 0.
1155        */
1156       if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1157           (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1158            tex->op == nir_texop_txl || tex->op == nir_texop_query_levels ||
1159            (tex->op == nir_texop_tex && !shader_supports_implicit_lod))) {
1160          b->cursor = nir_before_instr(&tex->instr);
1161          nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
1162          if (tex->op == nir_texop_tex && options->lower_tex_without_implicit_lod)
1163             tex->op = nir_texop_txl;
1164          progress = true;
1165          continue;
1166       }
1167 
1168       if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1169          progress |= nir_lower_txs_lod(b, tex);
1170          continue;
1171       }
1172 
1173       /* has to happen after all the other lowerings as the original tg4 gets
1174        * replaced by 4 tg4 instructions.
1175        */
1176       if (tex->op == nir_texop_tg4 &&
1177           nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1178           options->lower_tg4_offsets) {
1179          progress |= lower_tg4_offsets(b, tex);
1180          continue;
1181       }
1182    }
1183 
1184    return progress;
1185 }
1186 
1187 static bool
nir_lower_tex_impl(nir_function_impl * impl,const nir_lower_tex_options * options)1188 nir_lower_tex_impl(nir_function_impl *impl,
1189                    const nir_lower_tex_options *options)
1190 {
1191    bool progress = false;
1192    nir_builder builder;
1193    nir_builder_init(&builder, impl);
1194 
1195    nir_foreach_block(block, impl) {
1196       progress |= nir_lower_tex_block(block, &builder, options);
1197    }
1198 
1199    nir_metadata_preserve(impl, nir_metadata_block_index |
1200                                nir_metadata_dominance);
1201    return progress;
1202 }
1203 
1204 bool
nir_lower_tex(nir_shader * shader,const nir_lower_tex_options * options)1205 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1206 {
1207    bool progress = false;
1208 
1209    nir_foreach_function(function, shader) {
1210       if (function->impl)
1211          progress |= nir_lower_tex_impl(function->impl, options);
1212    }
1213 
1214    return progress;
1215 }
1216