1 /*
2  * © Copyright 2018 Alyssa Rosenzweig
3  * Copyright (C) 2019-2020 Collabora, Ltd.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  */
25 
26 #include <stdio.h>
27 #include "pan_blend_shaders.h"
28 #include "pan_util.h"
29 #include "panfrost-quirks.h"
30 #include "midgard/midgard_compile.h"
31 #include "bifrost/bifrost_compile.h"
32 #include "compiler/nir/nir_builder.h"
33 #include "nir/nir_lower_blend.h"
34 #include "panfrost/util/pan_lower_framebuffer.h"
35 #include "gallium/auxiliary/util/u_blend.h"
36 #include "util/u_memory.h"
37 
38 /*
39  * Implements the command stream portion of programmatic blend shaders.
40  *
41  * On Midgard, common blending operations are accelerated by the fixed-function
42  * blending pipeline. Panfrost supports this fast path via the code in
43  * pan_blending.c. Nevertheless, uncommon blend modes (including some seemingly
44  * simple modes present in ES2) require "blend shaders", a special internal
45  * shader type used for programmable blending.
46  *
47  * Blend shaders operate during the normal blending time, but they bypass the
48  * fixed-function blending pipeline and instead go straight to the Midgard
49  * shader cores. The shaders themselves are essentially just fragment shaders,
50  * making heavy use of uint8 arithmetic to manipulate RGB values for the
51  * framebuffer.
52  *
53  * As is typical with Midgard, shader binaries must be accompanied by
54  * information about the first tag (ORed with the bottom nibble of address,
55  * like usual) and work registers. Work register count is assumed to be less
56  * than or equal to the coresponding fragment shader's work count. This
57  * suggests that blend shader invocation is tied to fragment shader
58  * execution.
59  *
60  * The shaders themselves use the standard ISA. The source pixel colour,
61  * including alpha, is preloaded into r0 as a vec4 of float32. The destination
62  * pixel colour must be loaded explicitly via load/store ops, possibly
63  * performing conversions in software. The blended colour must be stored with a
64  * fragment writeout in the correct framebuffer format, either in software or
65  * via conversion opcodes on the load/store pipe.
66  *
67  * Blend shaders hardcode constants. Naively, this requires recompilation each
68  * time the blend color changes, which is a performance risk. Accordingly, we
69  * 'cheat' a bit: instead of loading the constant, we compile a shader with a
70  * dummy constant, exporting the offset to the immediate in the shader binary,
71  * storing this generic binary and metadata in the CSO itself at CSO create
72  * time.
73  *
74  * We then hot patch in the color into this shader at attachment / color change
75  * time, allowing for CSO create to be the only expensive operation
76  * (compilation).
77  */
78 
79 static nir_lower_blend_options
nir_make_options(const struct pipe_blend_state * blend,unsigned i)80 nir_make_options(const struct pipe_blend_state *blend, unsigned i)
81 {
82         nir_lower_blend_options options = { 0 };
83 
84         if (blend->logicop_enable) {
85             options.logicop_enable = true;
86             options.logicop_func = blend->logicop_func;
87             return options;
88         }
89 
90         options.logicop_enable = false;
91 
92         if (!blend->independent_blend_enable)
93                 i = 0;
94 
95         /* If blend is disabled, we just use replace mode */
96 
97         nir_lower_blend_channel rgb = {
98                 .func = BLEND_FUNC_ADD,
99                 .src_factor = BLEND_FACTOR_ZERO,
100                 .invert_src_factor = true,
101                 .dst_factor = BLEND_FACTOR_ZERO,
102                 .invert_dst_factor = false
103         };
104 
105         nir_lower_blend_channel alpha = rgb;
106 
107         if (blend->rt[i].blend_enable) {
108                 rgb.func = util_blend_func_to_shader(blend->rt[i].rgb_func);
109                 rgb.src_factor = util_blend_factor_to_shader(blend->rt[i].rgb_src_factor);
110                 rgb.dst_factor = util_blend_factor_to_shader(blend->rt[i].rgb_dst_factor);
111                 rgb.invert_src_factor = util_blend_factor_is_inverted(blend->rt[i].rgb_src_factor);
112                 rgb.invert_dst_factor = util_blend_factor_is_inverted(blend->rt[i].rgb_dst_factor);
113 
114                 alpha.func = util_blend_func_to_shader(blend->rt[i].alpha_func);
115                 alpha.src_factor = util_blend_factor_to_shader(blend->rt[i].alpha_src_factor);
116                 alpha.dst_factor = util_blend_factor_to_shader(blend->rt[i].alpha_dst_factor);
117                 alpha.invert_src_factor = util_blend_factor_is_inverted(blend->rt[i].alpha_src_factor);
118                 alpha.invert_dst_factor = util_blend_factor_is_inverted(blend->rt[i].alpha_dst_factor);
119         }
120 
121         options.rgb = rgb;
122         options.alpha = alpha;
123 
124         options.colormask = blend->rt[i].colormask;
125 
126         return options;
127 }
128 
129 static nir_ssa_def *
nir_iclamp(nir_builder * b,nir_ssa_def * v,int32_t lo,int32_t hi)130 nir_iclamp(nir_builder *b, nir_ssa_def *v, int32_t lo, int32_t hi)
131 {
132         return nir_imin(b, nir_imax(b, v, nir_imm_int(b, lo)), nir_imm_int(b, hi));
133 }
134 
135 struct panfrost_blend_shader *
panfrost_create_blend_shader(struct panfrost_context * ctx,struct panfrost_blend_state * state,const struct panfrost_blend_shader_key * key)136 panfrost_create_blend_shader(struct panfrost_context *ctx,
137                              struct panfrost_blend_state *state,
138                              const struct panfrost_blend_shader_key *key)
139 {
140         struct panfrost_device *dev = pan_device(ctx->base.screen);
141         struct panfrost_blend_shader *res = rzalloc(ctx, struct panfrost_blend_shader);
142 
143         res->ctx = ctx;
144         res->key = *key;
145 
146         /* Build the shader */
147 
148         nir_shader *shader = nir_shader_create(ctx, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL);
149         nir_function *fn = nir_function_create(shader, "main");
150         fn->is_entrypoint = true;
151         nir_function_impl *impl = nir_function_impl_create(fn);
152 
153         const struct util_format_description *format_desc =
154                 util_format_description(key->format);
155 
156         nir_alu_type T = pan_unpacked_type_for_format(format_desc);
157         enum glsl_base_type g =
158                 (T == nir_type_float16) ? GLSL_TYPE_FLOAT16 :
159                 (T == nir_type_float32) ? GLSL_TYPE_FLOAT :
160                 (T == nir_type_int8) ? GLSL_TYPE_INT8 :
161                 (T == nir_type_int16) ? GLSL_TYPE_INT16 :
162                 (T == nir_type_int32) ? GLSL_TYPE_INT :
163                 (T == nir_type_uint8) ? GLSL_TYPE_UINT8 :
164                 (T == nir_type_uint16) ? GLSL_TYPE_UINT16 :
165                 (T == nir_type_uint32) ? GLSL_TYPE_UINT :
166                 GLSL_TYPE_FLOAT;
167 
168         /* Create the blend variables */
169 
170         nir_variable *c_src = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_Color");
171         nir_variable *c_src1 = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_Color1");
172         nir_variable *c_out = nir_variable_create(shader, nir_var_shader_out, glsl_vector_type(g, 4), "gl_FragColor");
173 
174         c_src->data.location = VARYING_SLOT_COL0;
175         c_src1->data.location = VARYING_SLOT_VAR0;
176         c_out->data.location = FRAG_RESULT_COLOR;
177 
178         c_src1->data.driver_location = 1;
179 
180         /* Setup nir_builder */
181 
182         nir_builder _b;
183         nir_builder *b = &_b;
184         nir_builder_init(b, impl);
185         b->cursor = nir_before_block(nir_start_block(impl));
186 
187         /* Setup inputs */
188 
189         nir_ssa_def *s_src[] = {nir_load_var(b, c_src), nir_load_var(b, c_src1)};
190 
191         for (int i = 0; i < ARRAY_SIZE(s_src); ++i) {
192                 if (T == nir_type_float16)
193                         s_src[i] = nir_f2f16(b, s_src[i]);
194                 else if (T == nir_type_int16)
195                         s_src[i] = nir_i2i16(b, nir_iclamp(b, s_src[i], -32768, 32767));
196                 else if (T == nir_type_uint16)
197                         s_src[i] = nir_u2u16(b, nir_umin(b, s_src[i], nir_imm_int(b, 65535)));
198                 else if (T == nir_type_int8)
199                         s_src[i] = nir_i2i8(b, nir_iclamp(b, s_src[i], -128, 127));
200                 else if (T == nir_type_uint8)
201                         s_src[i] = nir_u2u8(b, nir_umin(b, s_src[i], nir_imm_int(b, 255)));
202         }
203 
204         /* Build a trivial blend shader */
205         nir_store_var(b, c_out, s_src[0], 0xFF);
206 
207         nir_lower_blend_options options = nir_make_options(&state->base, key->rt);
208         options.format = key->format;
209         options.is_bifrost = !!(dev->quirks & IS_BIFROST);
210         options.src1 = s_src[1];
211 
212         if (T == nir_type_float16)
213                 options.half = true;
214 
215         NIR_PASS_V(shader, nir_lower_blend, options);
216 
217         res->nir = shader;
218         return res;
219 }
220 
221 static uint64_t
bifrost_get_blend_desc(enum pipe_format fmt,unsigned rt)222 bifrost_get_blend_desc(enum pipe_format fmt, unsigned rt)
223 {
224         const struct util_format_description *desc = util_format_description(fmt);
225         uint64_t res;
226 
227         pan_pack(&res, BIFROST_INTERNAL_BLEND, cfg) {
228                 cfg.mode = MALI_BIFROST_BLEND_MODE_OPAQUE;
229                 cfg.fixed_function.num_comps = desc->nr_channels;
230                 cfg.fixed_function.rt = rt;
231 
232                 nir_alu_type T = pan_unpacked_type_for_format(desc);
233                 switch (T) {
234                 case nir_type_float16:
235                         cfg.fixed_function.conversion.register_format =
236                                 MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
237                         break;
238                 case nir_type_float32:
239                         cfg.fixed_function.conversion.register_format =
240                                 MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
241                         break;
242                 case nir_type_int16:
243                         cfg.fixed_function.conversion.register_format =
244                                 MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
245                         break;
246                 case nir_type_int32:
247                         cfg.fixed_function.conversion.register_format =
248                                 MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
249                         break;
250                 case nir_type_uint16:
251                         cfg.fixed_function.conversion.register_format =
252                                 MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
253                         break;
254                 case nir_type_uint32:
255                         cfg.fixed_function.conversion.register_format =
256                                 MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
257                         break;
258                 default:
259                         unreachable("Invalid format");
260                 }
261 
262                 cfg.fixed_function.conversion.memory_format.srgb =
263                         desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB;
264 
265                 cfg.fixed_function.conversion.memory_format.format =
266                          panfrost_format_to_bifrost_blend(desc, true);
267         }
268 
269         return res;
270 }
271 
272 void
panfrost_compile_blend_shader(struct panfrost_blend_shader * shader,const float * constants)273 panfrost_compile_blend_shader(struct panfrost_blend_shader *shader,
274                               const float *constants)
275 {
276         struct panfrost_device *dev = pan_device(shader->ctx->base.screen);
277 
278         /* If the shader has already been compiled and the constants match
279          * or the shader doesn't use the blend constants, we can keep the
280          * compiled version.
281          */
282         if (shader->buffer &&
283             (!constants ||
284              !memcmp(shader->constants, constants, sizeof(shader->constants))))
285                 return;
286 
287         /* Compile or recompile the NIR shader */
288         struct panfrost_compile_inputs inputs = {
289                 .gpu_id = dev->gpu_id,
290                 .is_blend = true,
291                 .blend.rt = shader->key.rt,
292                 .rt_formats = {shader->key.format},
293         };
294 
295         if (constants)
296                 memcpy(inputs.blend.constants, constants, sizeof(inputs.blend.constants));
297 
298         panfrost_program *program;
299 
300         if (dev->quirks & IS_BIFROST) {
301                 inputs.blend.bifrost_blend_desc =
302                         bifrost_get_blend_desc(shader->key.format, shader->key.rt);
303                 program = bifrost_compile_shader_nir(NULL, shader->nir, &inputs);
304 	} else {
305                 program = midgard_compile_shader_nir(NULL, shader->nir, &inputs);
306         }
307 
308         /* Allow us to patch later */
309         shader->first_tag = program->first_tag;
310         shader->size = program->compiled.size;
311         shader->buffer = reralloc_size(shader, shader->buffer, shader->size);
312         memcpy(shader->buffer, program->compiled.data, shader->size);
313         shader->work_count = program->work_register_count;
314 
315         ralloc_free(program);
316 }
317