1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2019 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_nir_lower_fs_out_to_vector.h"
28 
29 #include "nir_builder.h"
30 #include "nir_deref.h"
31 #include "util/u_math.h"
32 
33 #include <set>
34 #include <vector>
35 #include <array>
36 #include <algorithm>
37 
38 namespace r600 {
39 
40 using std::multiset;
41 using std::vector;
42 using std::array;
43 
44 struct nir_intrinsic_instr_less  {
operator ()r600::nir_intrinsic_instr_less45    bool operator () (const nir_intrinsic_instr *lhs, const nir_intrinsic_instr *rhs) const
46    {
47       nir_variable *vlhs = nir_deref_instr_get_variable(nir_src_as_deref(lhs->src[0]));
48       nir_variable *vrhs = nir_deref_instr_get_variable(nir_src_as_deref(rhs->src[0]));
49 
50       auto ltype = glsl_get_base_type(vlhs->type);
51       auto rtype = glsl_get_base_type(vrhs->type);
52 
53       if (ltype != rtype)
54          return ltype < rtype;
55       return vlhs->data.location < vrhs->data.location;
56    }
57 };
58 
59 class NirLowerIOToVector {
60 public:
61    NirLowerIOToVector(int base_slot);
62    bool run(nir_function_impl *shader);
63 
64 protected:
65    bool var_can_merge(const nir_variable *lhs, const nir_variable *rhs);
66    bool var_can_rewrite(nir_variable *var) const;
67    void create_new_io_vars(nir_shader *shader);
68    void create_new_io_var(nir_shader *shader, unsigned location, unsigned comps);
69 
70    nir_deref_instr *clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
71                                       const nir_deref_instr *src_head);
72 
73    bool vectorize_block(nir_builder *b, nir_block *block);
74    bool instr_can_rewrite(nir_instr *instr);
75    bool vec_instr_set_remove(nir_builder *b,nir_instr *instr);
76 
77    using InstrSet  = multiset<nir_intrinsic_instr *, nir_intrinsic_instr_less>;
78    using InstrSubSet = std::pair<InstrSet::iterator, InstrSet::iterator>;
79 
80    bool vec_instr_stack_pop(nir_builder *b, InstrSubSet& ir_set,
81                             nir_intrinsic_instr *instr);
82 
83    array<array<nir_variable *, 4>, 16> m_vars;
84    InstrSet m_block_io;
85    int m_next_index;
86 private:
87    virtual nir_variable_mode get_io_mode(nir_shader *shader) const  = 0;
88    virtual bool instr_can_rewrite_type(nir_intrinsic_instr *intr) const  = 0;
89    virtual bool var_can_rewrite_slot(nir_variable *var) const = 0;
90    virtual void create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
91                               nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps) = 0;
92 
93    int m_base_slot;
94 };
95 
96 class NirLowerFSOutToVector : public NirLowerIOToVector {
97 public:
98    NirLowerFSOutToVector();
99 
100 private:
101    nir_variable_mode get_io_mode(nir_shader *shader) const  override;
102    bool var_can_rewrite_slot(nir_variable *var) const override;
103    void create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
104                          nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps) override;
105    bool instr_can_rewrite_type(nir_intrinsic_instr *intr) const  override;
106 
107    nir_ssa_def *create_combined_vector(nir_builder *b, nir_ssa_def **srcs,
108                                        int first_comp, int num_comp);
109 };
110 
r600_lower_fs_out_to_vector(nir_shader * shader)111 bool r600_lower_fs_out_to_vector(nir_shader *shader)
112 {
113    NirLowerFSOutToVector processor;
114 
115    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
116    bool progress = false;
117 
118    nir_foreach_function(function, shader) {
119       if (function->impl)
120          progress |= processor.run(function->impl);
121    }
122    return progress;
123 }
124 
NirLowerIOToVector(int base_slot)125 NirLowerIOToVector::NirLowerIOToVector(int base_slot):
126    m_next_index(0),
127    m_base_slot(base_slot)
128 {
129    for(auto& a : m_vars)
130       for(auto& aa : a)
131          aa = nullptr;
132 }
133 
run(nir_function_impl * impl)134 bool NirLowerIOToVector::run(nir_function_impl *impl)
135 {
136    nir_builder b;
137    nir_builder_init(&b, impl);
138 
139    nir_metadata_require(impl, nir_metadata_dominance);
140    create_new_io_vars(impl->function->shader);
141 
142    bool progress = vectorize_block(&b, nir_start_block(impl));
143    if (progress) {
144       nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
145    }
146    return progress;
147 }
148 
create_new_io_vars(nir_shader * shader)149 void NirLowerIOToVector::create_new_io_vars(nir_shader *shader)
150 {
151    nir_variable_mode mode = get_io_mode(shader);
152 
153    bool can_rewrite_vars = false;
154    nir_foreach_variable_with_modes(var, shader, mode) {
155       if (var_can_rewrite(var)) {
156          can_rewrite_vars = true;
157          unsigned loc = var->data.location - m_base_slot;
158          m_vars[loc][var->data.location_frac] = var;
159       }
160    }
161 
162    if (!can_rewrite_vars)
163       return;
164 
165    /* We don't handle combining vars of different type e.g. different array
166     * lengths.
167     */
168    for (unsigned i = 0; i < 16; i++) {
169       unsigned comps = 0;
170 
171       for (unsigned j = 0; j < 3; j++) {
172          if (!m_vars[i][j])
173             continue;
174 
175          for (unsigned k = j + 1; k < 4; k++) {
176             if (!m_vars[i][k])
177                continue;
178 
179             if (!var_can_merge(m_vars[i][j], m_vars[i][k]))
180                continue;
181 
182             /* Set comps */
183             for (unsigned n = 0; n < glsl_get_components(m_vars[i][j]->type); ++n)
184                comps |= 1 << (m_vars[i][j]->data.location_frac + n);
185 
186             for (unsigned n = 0; n < glsl_get_components(m_vars[i][k]->type); ++n)
187                comps |= 1 << (m_vars[i][k]->data.location_frac + n);
188 
189          }
190       }
191       if (comps)
192          create_new_io_var(shader, i, comps);
193    }
194 }
195 
196 bool
var_can_merge(const nir_variable * lhs,const nir_variable * rhs)197 NirLowerIOToVector::var_can_merge(const nir_variable *lhs,
198                                      const nir_variable *rhs)
199 {
200    return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
201 }
202 
203 void
create_new_io_var(nir_shader * shader,unsigned location,unsigned comps)204 NirLowerIOToVector::create_new_io_var(nir_shader *shader,
205                                     unsigned location, unsigned comps)
206 {
207    unsigned num_comps = util_bitcount(comps);
208    assert(num_comps > 1);
209 
210    /* Note: u_bit_scan() strips a component of the comps bitfield here */
211    unsigned first_comp = u_bit_scan(&comps);
212 
213    nir_variable *var = nir_variable_clone(m_vars[location][first_comp], shader);
214    var->data.location_frac = first_comp;
215    var->type = glsl_replace_vector_type(var->type, num_comps);
216 
217    nir_shader_add_variable(shader, var);
218 
219    m_vars[location][first_comp] = var;
220 
221    while (comps) {
222       const int comp = u_bit_scan(&comps);
223       if (m_vars[location][comp]) {
224          m_vars[location][comp] = var;
225       }
226    }
227 }
228 
var_can_rewrite(nir_variable * var) const229 bool NirLowerIOToVector::var_can_rewrite(nir_variable *var) const
230 {
231    /* Skip complex types we don't split in the first place */
232    if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
233       return false;
234 
235    if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
236       return false;
237 
238    return var_can_rewrite_slot(var);
239 }
240 
241 bool
vectorize_block(nir_builder * b,nir_block * block)242 NirLowerIOToVector::vectorize_block(nir_builder *b, nir_block *block)
243 {
244    bool progress = false;
245 
246    nir_foreach_instr_safe(instr, block) {
247       if (instr_can_rewrite(instr)) {
248          instr->index = m_next_index++;
249          nir_intrinsic_instr *ir = nir_instr_as_intrinsic(instr);
250          m_block_io.insert(ir);
251       }
252    }
253 
254    for (unsigned i = 0; i < block->num_dom_children; i++) {
255       nir_block *child = block->dom_children[i];
256       progress |= vectorize_block(b, child);
257    }
258 
259    nir_foreach_instr_reverse_safe(instr, block) {
260       progress |= vec_instr_set_remove(b, instr);
261    }
262    m_block_io.clear();
263 
264    return progress;
265 }
266 
instr_can_rewrite(nir_instr * instr)267 bool NirLowerIOToVector::instr_can_rewrite(nir_instr *instr)
268 {
269    if (instr->type != nir_instr_type_intrinsic)
270       return false;
271 
272    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
273 
274    if (intr->num_components > 3)
275       return false;
276 
277    return instr_can_rewrite_type(intr);
278 }
279 
vec_instr_set_remove(nir_builder * b,nir_instr * instr)280 bool NirLowerIOToVector::vec_instr_set_remove(nir_builder *b,nir_instr *instr)
281 {
282    if (!instr_can_rewrite(instr))
283       return false;
284 
285    nir_intrinsic_instr *ir = nir_instr_as_intrinsic(instr);
286    auto entry = m_block_io.equal_range(ir);
287    if (entry.first != m_block_io.end()) {
288       vec_instr_stack_pop(b, entry, ir);
289    }
290    return true;
291 }
292 
293 nir_deref_instr *
clone_deref_array(nir_builder * b,nir_deref_instr * dst_tail,const nir_deref_instr * src_head)294 NirLowerIOToVector::clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
295                                     const nir_deref_instr *src_head)
296 {
297    const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
298 
299    if (!parent)
300       return dst_tail;
301 
302    assert(src_head->deref_type == nir_deref_type_array);
303 
304    dst_tail = clone_deref_array(b, dst_tail, parent);
305 
306    return nir_build_deref_array(b, dst_tail,
307                                 nir_ssa_for_src(b, src_head->arr.index, 1));
308 }
309 
NirLowerFSOutToVector()310 NirLowerFSOutToVector::NirLowerFSOutToVector():
311   NirLowerIOToVector(FRAG_RESULT_COLOR)
312 {
313 
314 }
315 
var_can_rewrite_slot(nir_variable * var) const316 bool NirLowerFSOutToVector::var_can_rewrite_slot(nir_variable *var) const
317 {
318    return ((var->data.mode == nir_var_shader_out) &&
319            ((var->data.location == FRAG_RESULT_COLOR) ||
320               ((var->data.location >= FRAG_RESULT_DATA0) &&
321                (var->data.location <= FRAG_RESULT_DATA7))));
322 }
323 
vec_instr_stack_pop(nir_builder * b,InstrSubSet & ir_set,nir_intrinsic_instr * instr)324 bool NirLowerIOToVector::vec_instr_stack_pop(nir_builder *b, InstrSubSet &ir_set,
325                                            nir_intrinsic_instr *instr)
326 {
327    vector< nir_intrinsic_instr *> ir_sorted_set(ir_set.first, ir_set.second);
328    std::sort(ir_sorted_set.begin(), ir_sorted_set.end(),
329              [](const nir_intrinsic_instr *lhs, const nir_intrinsic_instr *rhs) {
330                   return lhs->instr.index > rhs->instr.index;
331              }
332    );
333 
334    nir_intrinsic_instr *intr = *ir_sorted_set.begin();
335    nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
336 
337    unsigned loc = var->data.location - m_base_slot;
338 
339    nir_variable *new_var = m_vars[loc][var->data.location_frac];
340    unsigned num_comps = glsl_get_vector_elements(glsl_without_array(new_var->type));
341    unsigned old_num_comps = glsl_get_vector_elements(glsl_without_array(var->type));
342 
343    /* Don't bother walking the stack if this component can't be vectorised. */
344    if (old_num_comps > 3) {
345       return false;
346    }
347 
348    if (new_var == var) {
349       return false;
350    }
351 
352    b->cursor = nir_after_instr(&intr->instr);
353    nir_ssa_undef_instr *instr_undef =
354       nir_ssa_undef_instr_create(b->shader, 1, 32);
355    nir_builder_instr_insert(b, &instr_undef->instr);
356 
357    nir_ssa_def *srcs[4];
358    for (int i = 0; i < 4; i++) {
359       srcs[i] = &instr_undef->def;
360    }
361    srcs[var->data.location_frac] = intr->src[1].ssa;
362 
363    for (auto k = ir_sorted_set.begin() + 1; k != ir_sorted_set.end(); ++k) {
364       nir_intrinsic_instr *intr2 = *k;
365       nir_variable *var2 =
366          nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0]));
367       unsigned loc2 = var->data.location - m_base_slot;
368 
369       if (m_vars[loc][var->data.location_frac] !=
370           m_vars[loc2][var2->data.location_frac]) {
371          continue;
372       }
373 
374      assert(glsl_get_vector_elements(glsl_without_array(var2->type)) < 4);
375 
376       if (srcs[var2->data.location_frac] == &instr_undef->def) {
377          assert(intr2->src[1].is_ssa);
378          assert(intr2->src[1].ssa);
379          srcs[var2->data.location_frac] = intr2->src[1].ssa;
380       }
381       nir_instr_remove(&intr2->instr);
382    }
383 
384    create_new_io(b, intr, new_var, srcs, new_var->data.location_frac,
385                  num_comps);
386    return true;
387 }
388 
get_io_mode(nir_shader * shader) const389 nir_variable_mode NirLowerFSOutToVector::get_io_mode(nir_shader *shader) const
390 {
391    return nir_var_shader_out;
392 }
393 
394 void
create_new_io(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var,nir_ssa_def ** srcs,unsigned first_comp,unsigned num_comps)395 NirLowerFSOutToVector::create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
396                                         nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps)
397 {
398    b->cursor = nir_before_instr(&intr->instr);
399 
400    nir_intrinsic_instr *new_intr =
401       nir_intrinsic_instr_create(b->shader, intr->intrinsic);
402    new_intr->num_components = num_comps;
403 
404    nir_intrinsic_set_write_mask(new_intr, (1 << num_comps) - 1);
405 
406    nir_deref_instr *deref = nir_build_deref_var(b, var);
407    deref = clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
408 
409    new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
410    new_intr->src[1] = nir_src_for_ssa(create_combined_vector(b, srcs, first_comp, num_comps));
411 
412    nir_builder_instr_insert(b, &new_intr->instr);
413 
414    /* Remove the old store intrinsic */
415    nir_instr_remove(&intr->instr);
416 }
417 
instr_can_rewrite_type(nir_intrinsic_instr * intr) const418 bool NirLowerFSOutToVector::instr_can_rewrite_type(nir_intrinsic_instr *intr) const
419 {
420    if (intr->intrinsic != nir_intrinsic_store_deref)
421       return false;
422 
423    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
424    if (!nir_deref_mode_is(deref, nir_var_shader_out))
425       return false;
426 
427    return var_can_rewrite(nir_deref_instr_get_variable(deref));
428 }
429 
create_combined_vector(nir_builder * b,nir_ssa_def ** srcs,int first_comp,int num_comp)430 nir_ssa_def *NirLowerFSOutToVector::create_combined_vector(nir_builder *b, nir_ssa_def **srcs,
431                                                            int first_comp, int num_comp)
432 {
433    nir_op op;
434    switch (num_comp) {
435    case 2: op = nir_op_vec2; break;
436    case 3: op = nir_op_vec3; break;
437    case 4: op = nir_op_vec4; break;
438    default:
439       assert(0 && "combined vector must have 2 to 4 components");
440 
441    }
442    nir_alu_instr * instr = nir_alu_instr_create(b->shader, op);
443    instr->exact = b->exact;
444 
445    int i = 0;
446    unsigned k = 0;
447    while (i < num_comp) {
448       nir_ssa_def *s = srcs[first_comp + k];
449       for(uint8_t kk = 0; kk < s->num_components && i < num_comp; ++kk) {
450          instr->src[i].src  = nir_src_for_ssa(s);
451          instr->src[i].swizzle[0] = kk;
452          ++i;
453       }
454       k += s->num_components;
455    }
456 
457    nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_comp, 32, NULL);
458    instr->dest.write_mask = (1 << num_comp) - 1;
459    nir_builder_instr_insert(b, &instr->instr);
460    return &instr->dest.dest.ssa;
461 }
462 
463 }
464