1 /*
2  * Copyright (C) 2019 Alyssa Rosenzweig
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file
26  *
27  * Implements the fragment pipeline (blending and writeout) in software, to be
28  * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
29  * shader variant on typical GPUs. This pass is useful if hardware lacks
30  * fixed-function blending in part or in full.
31  */
32 
33 #include "compiler/nir/nir.h"
34 #include "compiler/nir/nir_builder.h"
35 #include "compiler/nir/nir_format_convert.h"
36 #include "nir_lower_blend.h"
37 
38 /* Given processed factors, combine them per a blend function */
39 
40 static nir_ssa_def *
nir_blend_func(nir_builder * b,enum blend_func func,nir_ssa_def * src,nir_ssa_def * dst)41 nir_blend_func(
42    nir_builder *b,
43    enum blend_func func,
44    nir_ssa_def *src, nir_ssa_def *dst)
45 {
46    switch (func) {
47    case BLEND_FUNC_ADD:
48       return nir_fadd(b, src, dst);
49    case BLEND_FUNC_SUBTRACT:
50       return nir_fsub(b, src, dst);
51    case BLEND_FUNC_REVERSE_SUBTRACT:
52       return nir_fsub(b, dst, src);
53    case BLEND_FUNC_MIN:
54       return nir_fmin(b, src, dst);
55    case BLEND_FUNC_MAX:
56       return nir_fmax(b, src, dst);
57    }
58 
59    unreachable("Invalid blend function");
60 }
61 
62 /* Does this blend function multiply by a blend factor? */
63 
64 static bool
nir_blend_factored(enum blend_func func)65 nir_blend_factored(enum blend_func func)
66 {
67    switch (func) {
68    case BLEND_FUNC_ADD:
69    case BLEND_FUNC_SUBTRACT:
70    case BLEND_FUNC_REVERSE_SUBTRACT:
71       return true;
72    default:
73       return false;
74    }
75 }
76 
77 /* Compute a src_alpha_saturate factor */
78 static nir_ssa_def *
nir_alpha_saturate(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,unsigned chan,bool half)79 nir_alpha_saturate(
80    nir_builder *b,
81    nir_ssa_def *src, nir_ssa_def *dst,
82    unsigned chan,
83    bool half)
84 {
85    nir_ssa_def *Asrc = nir_channel(b, src, 3);
86    nir_ssa_def *Adst = nir_channel(b, dst, 3);
87    nir_ssa_def *one = half ? nir_imm_float16(b, 1.0) : nir_imm_float(b, 1.0);
88    nir_ssa_def *Adsti = nir_fsub(b, one, Adst);
89 
90    return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
91 }
92 
93 /* Returns a scalar single factor, unmultiplied */
94 
95 static nir_ssa_def *
nir_blend_factor_value(nir_builder * b,nir_ssa_def * src,nir_ssa_def * src1,nir_ssa_def * dst,nir_ssa_def * bconst,unsigned chan,enum blend_factor factor,bool half)96 nir_blend_factor_value(
97    nir_builder *b,
98    nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
99    unsigned chan,
100    enum blend_factor factor,
101    bool half)
102 {
103    switch (factor) {
104    case BLEND_FACTOR_ZERO:
105       return half ? nir_imm_float16(b, 0.0) : nir_imm_float(b, 0.0);
106    case BLEND_FACTOR_SRC_COLOR:
107       return nir_channel(b, src, chan);
108    case BLEND_FACTOR_SRC1_COLOR:
109       return nir_channel(b, src1, chan);
110    case BLEND_FACTOR_DST_COLOR:
111       return nir_channel(b, dst, chan);
112    case BLEND_FACTOR_SRC_ALPHA:
113       return nir_channel(b, src, 3);
114    case BLEND_FACTOR_SRC1_ALPHA:
115       return nir_channel(b, src1, 3);
116    case BLEND_FACTOR_DST_ALPHA:
117       return nir_channel(b, dst, 3);
118    case BLEND_FACTOR_CONSTANT_COLOR:
119       return nir_channel(b, bconst, chan);
120    case BLEND_FACTOR_CONSTANT_ALPHA:
121       return nir_channel(b, bconst, 3);
122    case BLEND_FACTOR_SRC_ALPHA_SATURATE:
123       return nir_alpha_saturate(b, src, dst, chan, half);
124    }
125 
126    unreachable("Invalid blend factor");
127 }
128 
129 static nir_ssa_def *
nir_blend_factor(nir_builder * b,nir_ssa_def * raw_scalar,nir_ssa_def * src,nir_ssa_def * src1,nir_ssa_def * dst,nir_ssa_def * bconst,unsigned chan,enum blend_factor factor,bool inverted,bool half)130 nir_blend_factor(
131    nir_builder *b,
132    nir_ssa_def *raw_scalar,
133    nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
134    unsigned chan,
135    enum blend_factor factor,
136    bool inverted,
137    bool half)
138 {
139    nir_ssa_def *f =
140       nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor, half);
141 
142    nir_ssa_def *unity = half ? nir_imm_float16(b, 1.0) : nir_imm_float(b, 1.0);
143 
144    if (inverted)
145       f = nir_fsub(b, unity, f);
146 
147    return nir_fmul(b, raw_scalar, f);
148 }
149 
150 /* Given a colormask, "blend" with the destination */
151 
152 static nir_ssa_def *
nir_color_mask(nir_builder * b,unsigned mask,nir_ssa_def * src,nir_ssa_def * dst)153 nir_color_mask(
154    nir_builder *b,
155    unsigned mask,
156    nir_ssa_def *src,
157    nir_ssa_def *dst)
158 {
159    nir_ssa_def *masked[4];
160 
161    for (unsigned c = 0; c < 4; ++c) {
162       bool enab = (mask & (1 << c));
163       masked[c] = enab ? nir_channel(b, src, c) : nir_channel(b, dst, c);
164    }
165 
166    return nir_vec(b, masked, 4);
167 }
168 
169 static nir_ssa_def *
nir_logicop_func(nir_builder * b,unsigned func,nir_ssa_def * src,nir_ssa_def * dst)170 nir_logicop_func(
171    nir_builder *b,
172    unsigned func,
173    nir_ssa_def *src, nir_ssa_def *dst)
174 {
175    switch (func) {
176    case PIPE_LOGICOP_CLEAR:
177       return nir_imm_ivec4(b, 0, 0, 0, 0);
178    case PIPE_LOGICOP_NOR:
179       return nir_inot(b, nir_ior(b, src, dst));
180    case PIPE_LOGICOP_AND_INVERTED:
181       return nir_iand(b, nir_inot(b, src), dst);
182    case PIPE_LOGICOP_COPY_INVERTED:
183       return nir_inot(b, src);
184    case PIPE_LOGICOP_AND_REVERSE:
185       return nir_iand(b, src, nir_inot(b, dst));
186    case PIPE_LOGICOP_INVERT:
187       return nir_inot(b, dst);
188    case PIPE_LOGICOP_XOR:
189       return nir_ixor(b, src, dst);
190    case PIPE_LOGICOP_NAND:
191       return nir_inot(b, nir_iand(b, src, dst));
192    case PIPE_LOGICOP_AND:
193       return nir_iand(b, src, dst);
194    case PIPE_LOGICOP_EQUIV:
195       return nir_inot(b, nir_ixor(b, src, dst));
196    case PIPE_LOGICOP_NOOP:
197       return dst;
198    case PIPE_LOGICOP_OR_INVERTED:
199       return nir_ior(b, nir_inot(b, src), dst);
200    case PIPE_LOGICOP_COPY:
201       return src;
202    case PIPE_LOGICOP_OR_REVERSE:
203       return nir_ior(b, src, nir_inot(b, dst));
204    case PIPE_LOGICOP_OR:
205       return nir_ior(b, src, dst);
206    case PIPE_LOGICOP_SET:
207       return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
208    }
209 
210    unreachable("Invalid logciop function");
211 }
212 
213 static nir_ssa_def *
nir_blend_logicop(nir_builder * b,nir_lower_blend_options options,nir_ssa_def * src,nir_ssa_def * dst)214 nir_blend_logicop(
215    nir_builder *b,
216    nir_lower_blend_options options,
217    nir_ssa_def *src, nir_ssa_def *dst)
218 {
219    const struct util_format_description *format_desc =
220       util_format_description(options.format);
221 
222    if (options.half) {
223       src = nir_f2f32(b, src);
224       dst = nir_f2f32(b, dst);
225    }
226 
227    assert(src->num_components <= 4);
228    assert(dst->num_components <= 4);
229 
230    unsigned bits[4];
231    for (int i = 0; i < 4; ++i)
232        bits[i] = format_desc->channel[i].size;
233 
234    src = nir_format_float_to_unorm(b, src, bits);
235    dst = nir_format_float_to_unorm(b, dst, bits);
236 
237    nir_ssa_def *out = nir_logicop_func(b, options.logicop_func, src, dst);
238 
239    if (bits[0] < 32) {
240        nir_const_value mask[4];
241        for (int i = 0; i < 4; ++i)
242            mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32);
243 
244        out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask));
245    }
246 
247    out = nir_format_unorm_to_float(b, out, bits);
248 
249    if (options.half)
250       out = nir_f2f16(b, out);
251 
252    return out;
253 }
254 
255 /* Given a blend state, the source color, and the destination color,
256  * return the blended color
257  */
258 
259 static nir_ssa_def *
nir_blend(nir_builder * b,nir_lower_blend_options options,nir_ssa_def * src,nir_ssa_def * src1,nir_ssa_def * dst)260 nir_blend(
261    nir_builder *b,
262    nir_lower_blend_options options,
263    nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst)
264 {
265    if (options.logicop_enable)
266       return nir_blend_logicop(b, options, src, dst);
267 
268    /* Grab the blend constant ahead of time */
269    nir_ssa_def *bconst;
270    if (options.is_bifrost) {
271       /* Bifrost is a scalar architecture, so let's split loads now to avoid a
272        * lowering pass.
273        */
274       bconst = nir_vec4(b,
275                         nir_load_blend_const_color_r_float(b),
276                         nir_load_blend_const_color_g_float(b),
277                         nir_load_blend_const_color_b_float(b),
278                         nir_load_blend_const_color_a_float(b));
279    } else {
280       bconst = nir_load_blend_const_color_rgba(b);
281    }
282 
283    if (options.half)
284       bconst = nir_f2f16(b, bconst);
285 
286    /* We blend per channel and recombine later */
287    nir_ssa_def *channels[4];
288 
289    for (unsigned c = 0; c < 4; ++c) {
290       /* Decide properties based on channel */
291       nir_lower_blend_channel chan =
292          (c < 3) ? options.rgb : options.alpha;
293 
294       nir_ssa_def *psrc = nir_channel(b, src, c);
295       nir_ssa_def *pdst = nir_channel(b, dst, c);
296 
297       if (nir_blend_factored(chan.func)) {
298          psrc = nir_blend_factor(
299                    b, psrc,
300                    src, src1, dst, bconst, c,
301                    chan.src_factor, chan.invert_src_factor, options.half);
302 
303          pdst = nir_blend_factor(
304                    b, pdst,
305                    src, src1, dst, bconst, c,
306                    chan.dst_factor, chan.invert_dst_factor, options.half);
307       }
308 
309       channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
310    }
311 
312    /* Then just recombine with an applied colormask */
313    nir_ssa_def *blended = nir_vec(b, channels, 4);
314    return nir_color_mask(b, options.colormask, blended, dst);
315 }
316 
317 static bool
nir_is_blend_channel_replace(nir_lower_blend_channel chan)318 nir_is_blend_channel_replace(nir_lower_blend_channel chan)
319 {
320    return
321       (chan.src_factor == BLEND_FACTOR_ZERO) &&
322       (chan.dst_factor == BLEND_FACTOR_ZERO) &&
323       (chan.invert_src_factor && !chan.invert_dst_factor) &&
324       (chan.func == BLEND_FUNC_ADD || chan.func == BLEND_FUNC_SUBTRACT || chan.func == BLEND_FUNC_MAX);
325 }
326 
327 static bool
nir_is_blend_replace(nir_lower_blend_options options)328 nir_is_blend_replace(nir_lower_blend_options options)
329 {
330    return
331       nir_is_blend_channel_replace(options.rgb) &&
332       nir_is_blend_channel_replace(options.alpha);
333 }
334 
335 void
nir_lower_blend(nir_shader * shader,nir_lower_blend_options options)336 nir_lower_blend(nir_shader *shader, nir_lower_blend_options options)
337 {
338    /* Blend shaders are represented as special fragment shaders */
339    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
340 
341    /* Special case replace, since there's nothing to do and we don't want to
342     * degrade intermediate precision (e.g. for non-blendable R32F targets) */
343    if (nir_is_blend_replace(options))
344       return;
345 
346    nir_foreach_function(func, shader) {
347       nir_foreach_block(block, func->impl) {
348          nir_foreach_instr_safe(instr, block) {
349             if (instr->type != nir_instr_type_intrinsic)
350                continue;
351 
352             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
353             if (intr->intrinsic != nir_intrinsic_store_deref)
354                continue;
355 
356             /* TODO: Extending to MRT */
357             nir_variable *var = nir_intrinsic_get_var(intr, 0);
358             if (var->data.location != FRAG_RESULT_COLOR)
359                continue;
360 
361             nir_builder b;
362             nir_builder_init(&b, func->impl);
363             b.cursor = nir_before_instr(instr);
364 
365             /* Grab the input color */
366             nir_ssa_def *src = nir_ssa_for_src(&b, intr->src[1], 4);
367 
368             /* Grab the dual-source input color */
369             nir_ssa_def *src1 = options.src1;
370 
371             /* Grab the tilebuffer color - io lowered to load_output */
372             nir_ssa_def *dst = nir_load_var(&b, var);
373 
374             /* Blend the two colors per the passed options */
375             nir_ssa_def *blended = nir_blend(&b, options, src, src1, dst);
376 
377             /* Write out the final color instead of the input */
378             nir_instr_rewrite_src(instr, &intr->src[1],
379                                   nir_src_for_ssa(blended));
380 
381          }
382       }
383 
384       nir_metadata_preserve(func->impl, nir_metadata_block_index |
385                             nir_metadata_dominance);
386    }
387 }
388