1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2019 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_instruction_tex.h"
28 #include "nir_builder.h"
29 #include "nir_builtin_builder.h"
30 
31 namespace r600 {
32 
TexInstruction(Opcode op,const GPRVector & dest,const GPRVector & src,unsigned sid,unsigned rid,PValue sampler_offset)33 TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src,
34                                unsigned sid, unsigned rid, PValue sampler_offset):
35    Instruction(tex),
36    m_opcode(op),
37    m_dst(dest),
38    m_src(src),
39    m_sampler_id(sid),
40    m_resource_id(rid),
41    m_flags(0),
42    m_inst_mode(0),
43    m_dest_swizzle{0,1,2,3},
44    m_sampler_offset(sampler_offset)
45 
46 {
47    memset(m_offset, 0, sizeof (m_offset));
48 
49    add_remappable_src_value(&m_src);
50    add_remappable_src_value(&m_sampler_offset);
51    add_remappable_dst_value(&m_dst);
52 }
53 
set_gather_comp(int cmp)54 void TexInstruction::set_gather_comp(int cmp)
55 {
56    m_inst_mode = cmp;
57 }
58 
replace_values(const ValueSet & candiates,PValue new_value)59 void TexInstruction::replace_values(const ValueSet& candiates, PValue new_value)
60 {
61    // I wonder whether we can actually end up here ...
62    for (auto c: candiates) {
63       if (*c == *m_src.reg_i(c->chan()))
64          m_src.set_reg_i(c->chan(), new_value);
65       if (*c == *m_dst.reg_i(c->chan()))
66          m_dst.set_reg_i(c->chan(), new_value);
67    }
68 }
69 
set_offset(unsigned index,int32_t val)70 void TexInstruction::set_offset(unsigned index, int32_t val)
71 {
72    assert(index < 3);
73    m_offset[index] = val;
74 }
75 
get_offset(unsigned index) const76 int TexInstruction::get_offset(unsigned index) const
77 {
78    assert(index < 3);
79    return (m_offset[index] << 1 & 0x1f);
80 }
81 
is_equal_to(const Instruction & rhs) const82 bool TexInstruction::is_equal_to(const Instruction& rhs) const
83 {
84    assert(rhs.type() == tex);
85    const auto& r = static_cast<const TexInstruction&>(rhs);
86    return (m_opcode == r.m_opcode &&
87            m_dst == r.m_dst &&
88            m_src == r.m_src &&
89            m_sampler_id == r.m_sampler_id &&
90            m_resource_id == r.m_resource_id);
91 }
92 
do_print(std::ostream & os) const93 void TexInstruction::do_print(std::ostream& os) const
94 {
95    const char *map_swz = "xyzw01?_";
96    os << opname(m_opcode) << " R" << m_dst.sel() << ".";
97    for (int i = 0; i < 4; ++i)
98       os << map_swz[m_dest_swizzle[i]];
99 
100    os << " " << m_src
101       << " RESID:"  << m_resource_id << " SAMPLER:"
102       << m_sampler_id;
103 }
104 
opname(Opcode op)105 const char *TexInstruction::opname(Opcode op)
106 {
107    switch (op) {
108    case ld: return "LD";
109    case get_resinfo: return "GET_TEXTURE_RESINFO";
110    case get_nsampled: return "GET_NUMBER_OF_SAMPLES";
111    case get_tex_lod: return "GET_LOD";
112    case get_gradient_h: return "GET_GRADIENTS_H";
113    case get_gradient_v: return "GET_GRADIENTS_V";
114    case set_offsets: return "SET_TEXTURE_OFFSETS";
115    case keep_gradients: return "KEEP_GRADIENTS";
116    case set_gradient_h: return "SET_GRADIENTS_H";
117    case set_gradient_v: return "SET_GRADIENTS_V";
118    case sample: return "SAMPLE";
119    case sample_l: return "SAMPLE_L";
120    case sample_lb: return "SAMPLE_LB";
121    case sample_lz: return "SAMPLE_LZ";
122    case sample_g: return "SAMPLE_G";
123    case sample_g_lb: return "SAMPLE_G_L";
124    case gather4: return "GATHER4";
125    case gather4_o: return "GATHER4_O";
126    case sample_c: return "SAMPLE_C";
127    case sample_c_l: return "SAMPLE_C_L";
128    case sample_c_lb: return "SAMPLE_C_LB";
129    case sample_c_lz: return "SAMPLE_C_LZ";
130    case sample_c_g: return "SAMPLE_C_G";
131    case sample_c_g_lb: return "SAMPLE_C_G_L";
132    case gather4_c: return "GATHER4_C";
133    case gather4_c_o: return "OP_GATHER4_C_O";
134    }
135    return "ERROR";
136 }
137 
138 
139 
lower_coord_shift_normalized(nir_builder & b,nir_tex_instr * tex)140 static bool lower_coord_shift_normalized(nir_builder& b, nir_tex_instr *tex)
141 {
142    b.cursor = nir_before_instr(&tex->instr);
143 
144    nir_ssa_def * size = nir_i2f32(&b, nir_get_texture_size(&b, tex));
145    nir_ssa_def *scale = nir_frcp(&b, size);
146 
147    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
148    nir_ssa_def *corr = nir_fadd(&b,
149                                 nir_fmul(&b, nir_imm_float(&b, -0.5f), scale),
150                                 tex->src[coord_index].src.ssa);
151    nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
152                          nir_src_for_ssa(corr));
153    return true;
154 }
155 
lower_coord_shift_unnormalized(nir_builder & b,nir_tex_instr * tex)156 static bool lower_coord_shift_unnormalized(nir_builder& b, nir_tex_instr *tex)
157 {
158    b.cursor = nir_before_instr(&tex->instr);
159    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
160    nir_ssa_def *corr = nir_fadd(&b, tex->src[coord_index].src.ssa,
161                                 nir_imm_float(&b, -0.5f));
162    nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
163                          nir_src_for_ssa(corr));
164    return true;
165 }
166 
167 static bool
r600_nir_lower_int_tg4_impl(nir_function_impl * impl)168 r600_nir_lower_int_tg4_impl(nir_function_impl *impl)
169 {
170    nir_builder b;
171    nir_builder_init(&b, impl);
172 
173    bool progress = false;
174    nir_foreach_block(block, impl) {
175       nir_foreach_instr_safe(instr, block) {
176          if (instr->type == nir_instr_type_tex) {
177             nir_tex_instr *tex = nir_instr_as_tex(instr);
178             if (tex->op == nir_texop_tg4 &&
179                 tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) {
180                if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float) {
181                   if (tex->sampler_dim != GLSL_SAMPLER_DIM_RECT)
182                      lower_coord_shift_normalized(b, tex);
183                   else
184                      lower_coord_shift_unnormalized(b, tex);
185                   progress = true;
186                }
187             }
188          }
189       }
190    }
191    return progress;
192 }
193 
194 /*
195  * This lowering pass works around a bug in r600 when doing TG4 from
196  * integral valued samplers.
197 
198  * Gather4 should follow the same rules as bilinear filtering, but the hardware
199  * incorrectly forces nearest filtering if the texture format is integer.
200  * The only effect it has on Gather4, which always returns 4 texels for
201  * bilinear filtering, is that the final coordinates are off by 0.5 of
202  * the texel size.
203 */
204 
r600_nir_lower_int_tg4(nir_shader * shader)205 bool r600_nir_lower_int_tg4(nir_shader *shader)
206 {
207    bool progress = false;
208    bool need_lowering = false;
209 
210    nir_foreach_uniform_variable(var, shader) {
211       if (var->type->is_sampler()) {
212          if (glsl_base_type_is_integer(var->type->sampled_type)) {
213             need_lowering = true;
214          }
215       }
216    }
217 
218    if (need_lowering) {
219       nir_foreach_function(function, shader) {
220          if (function->impl && r600_nir_lower_int_tg4_impl(function->impl))
221             progress = true;
222       }
223    }
224 
225    return progress;
226 }
227 
228 static
lower_txl_txf_array_or_cube(nir_builder * b,nir_tex_instr * tex)229 bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
230 {
231    assert(tex->op == nir_texop_txb || tex->op == nir_texop_txl);
232    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
233    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
234 
235    b->cursor = nir_before_instr(&tex->instr);
236 
237    int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
238    int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
239    int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
240    assert (lod_idx >= 0 || bias_idx >= 0);
241 
242    nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
243    nir_ssa_def *lod = (lod_idx >= 0) ?
244                          nir_ssa_for_src(b, tex->src[lod_idx].src, 1) :
245                          nir_get_texture_lod(b, tex);
246 
247    if (bias_idx >= 0)
248       lod = nir_fadd(b, lod,nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
249 
250    if (min_lod_idx >= 0)
251       lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
252 
253    /* max lod? */
254 
255    nir_ssa_def *lambda_exp =  nir_fexp2(b, lod);
256    nir_ssa_def *scale = NULL;
257 
258    if  (tex->is_array) {
259       int cmp_mask = (1 << (size->num_components - 1)) - 1;
260       scale = nir_frcp(b, nir_channels(b, size,
261                                        (nir_component_mask_t)cmp_mask));
262    } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
263       unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
264       scale = nir_frcp(b, nir_channels(b, size, 1));
265       scale = nir_swizzle(b, scale, swizzle, 3);
266    }
267 
268    nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale);
269 
270    if (lod_idx >= 0)
271       nir_tex_instr_remove_src(tex, lod_idx);
272    if (bias_idx >= 0)
273       nir_tex_instr_remove_src(tex, bias_idx);
274    if (min_lod_idx >= 0)
275       nir_tex_instr_remove_src(tex, min_lod_idx);
276    nir_tex_instr_add_src(tex, nir_tex_src_ddx, nir_src_for_ssa(grad));
277    nir_tex_instr_add_src(tex, nir_tex_src_ddy, nir_src_for_ssa(grad));
278 
279    tex->op = nir_texop_txd;
280    return true;
281 }
282 
283 
284 static bool
r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl * impl)285 r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl)
286 {
287    nir_builder b;
288    nir_builder_init(&b, impl);
289 
290    bool progress = false;
291    nir_foreach_block(block, impl) {
292       nir_foreach_instr_safe(instr, block) {
293          if (instr->type == nir_instr_type_tex) {
294             nir_tex_instr *tex = nir_instr_as_tex(instr);
295 
296             if (tex->is_shadow &&
297                 (tex->op == nir_texop_txl || tex->op == nir_texop_txb) &&
298                 (tex->is_array || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE))
299                progress |= lower_txl_txf_array_or_cube(&b, tex);
300          }
301       }
302    }
303    return progress;
304 }
305 
306 bool
r600_nir_lower_txl_txf_array_or_cube(nir_shader * shader)307 r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader)
308 {
309    bool progress = false;
310    nir_foreach_function(function, shader) {
311       if (function->impl && r600_nir_lower_txl_txf_array_or_cube_impl(function->impl))
312          progress = true;
313    }
314    return progress;
315 }
316 
317 
318 }
319