1 /*
2  * Copyright © 2015 Red Hat
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "nir.h"
28 #include "nir_builder.h"
29 
30 /* Lowers idiv/udiv/umod
31  * Based on NV50LegalizeSSA::handleDIV()
32  *
33  * Note that this is probably not enough precision for compute shaders.
34  * Perhaps we want a second higher precision (looping) version of this?
35  * Or perhaps we assume if you can do compute shaders you can also
36  * branch out to a pre-optimized shader library routine..
37  */
38 
39 static bool
convert_instr(nir_builder * bld,nir_alu_instr * alu)40 convert_instr(nir_builder *bld, nir_alu_instr *alu)
41 {
42    nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r;
43    nir_op op = alu->op;
44    bool is_signed;
45 
46    if ((op != nir_op_idiv) &&
47        (op != nir_op_udiv) &&
48        (op != nir_op_umod))
49       return false;
50 
51    is_signed = (op == nir_op_idiv);
52 
53    bld->cursor = nir_before_instr(&alu->instr);
54 
55    numer = nir_ssa_for_alu_src(bld, alu, 0);
56    denom = nir_ssa_for_alu_src(bld, alu, 1);
57 
58    if (is_signed) {
59       af = nir_i2f32(bld, numer);
60       bf = nir_i2f32(bld, denom);
61       af = nir_fabs(bld, af);
62       bf = nir_fabs(bld, bf);
63       a  = nir_iabs(bld, numer);
64       b  = nir_iabs(bld, denom);
65    } else {
66       af = nir_u2f32(bld, numer);
67       bf = nir_u2f32(bld, denom);
68       a  = numer;
69       b  = denom;
70    }
71 
72    /* get first result: */
73    bf = nir_frcp(bld, bf);
74    bf = nir_isub(bld, bf, nir_imm_int(bld, 2));  /* yes, really */
75    q  = nir_fmul(bld, af, bf);
76 
77    if (is_signed) {
78       q = nir_f2i32(bld, q);
79    } else {
80       q = nir_f2u32(bld, q);
81    }
82 
83    /* get error of first result: */
84    r = nir_imul(bld, q, b);
85    r = nir_isub(bld, a, r);
86    r = nir_u2f32(bld, r);
87    r = nir_fmul(bld, r, bf);
88    r = nir_f2u32(bld, r);
89 
90    /* add quotients: */
91    q = nir_iadd(bld, q, r);
92 
93    /* correction: if modulus >= divisor, add 1 */
94    r = nir_imul(bld, q, b);
95    r = nir_isub(bld, a, r);
96 
97    r = nir_uge(bld, r, b);
98    r = nir_b2i(bld, r);
99 
100    q = nir_iadd(bld, q, r);
101    if (is_signed)  {
102       /* fix the sign: */
103       r = nir_ixor(bld, numer, denom);
104       r = nir_ishr(bld, r, nir_imm_int(bld, 31));
105       b = nir_ineg(bld, q);
106       q = nir_bcsel(bld, r, b, q);
107    }
108 
109    if (op == nir_op_umod) {
110       /* division result in q */
111       r = nir_imul(bld, q, b);
112       q = nir_isub(bld, a, r);
113    }
114 
115    assert(alu->dest.dest.is_ssa);
116    nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(q));
117 
118    return true;
119 }
120 
121 static bool
convert_impl(nir_function_impl * impl)122 convert_impl(nir_function_impl *impl)
123 {
124    nir_builder b;
125    nir_builder_init(&b, impl);
126    bool progress = false;
127 
128    nir_foreach_block(block, impl) {
129       nir_foreach_instr_safe(instr, block) {
130          if (instr->type == nir_instr_type_alu)
131             progress |= convert_instr(&b, nir_instr_as_alu(instr));
132       }
133    }
134 
135    nir_metadata_preserve(impl, nir_metadata_block_index |
136                                nir_metadata_dominance);
137 
138    return progress;
139 }
140 
141 bool
nir_lower_idiv(nir_shader * shader)142 nir_lower_idiv(nir_shader *shader)
143 {
144    bool progress = false;
145 
146    nir_foreach_function(function, shader) {
147       if (function->impl)
148          progress |= convert_impl(function->impl);
149    }
150 
151    return progress;
152 }
153