1 /*
2 * © Copyright 2018 Alyssa Rosenzweig
3 * Copyright (C) 2019-2020 Collabora, Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 */
25
26 #include <stdio.h>
27 #include "pan_blend_shaders.h"
28 #include "pan_util.h"
29 #include "panfrost-quirks.h"
30 #include "midgard/midgard_compile.h"
31 #include "bifrost/bifrost_compile.h"
32 #include "compiler/nir/nir_builder.h"
33 #include "nir/nir_lower_blend.h"
34 #include "panfrost/util/pan_lower_framebuffer.h"
35 #include "gallium/auxiliary/util/u_blend.h"
36 #include "util/u_memory.h"
37
38 /*
39 * Implements the command stream portion of programmatic blend shaders.
40 *
41 * On Midgard, common blending operations are accelerated by the fixed-function
42 * blending pipeline. Panfrost supports this fast path via the code in
43 * pan_blending.c. Nevertheless, uncommon blend modes (including some seemingly
44 * simple modes present in ES2) require "blend shaders", a special internal
45 * shader type used for programmable blending.
46 *
47 * Blend shaders operate during the normal blending time, but they bypass the
48 * fixed-function blending pipeline and instead go straight to the Midgard
49 * shader cores. The shaders themselves are essentially just fragment shaders,
50 * making heavy use of uint8 arithmetic to manipulate RGB values for the
51 * framebuffer.
52 *
53 * As is typical with Midgard, shader binaries must be accompanied by
54 * information about the first tag (ORed with the bottom nibble of address,
55 * like usual) and work registers. Work register count is assumed to be less
56 * than or equal to the coresponding fragment shader's work count. This
57 * suggests that blend shader invocation is tied to fragment shader
58 * execution.
59 *
60 * The shaders themselves use the standard ISA. The source pixel colour,
61 * including alpha, is preloaded into r0 as a vec4 of float32. The destination
62 * pixel colour must be loaded explicitly via load/store ops, possibly
63 * performing conversions in software. The blended colour must be stored with a
64 * fragment writeout in the correct framebuffer format, either in software or
65 * via conversion opcodes on the load/store pipe.
66 *
67 * Blend shaders hardcode constants. Naively, this requires recompilation each
68 * time the blend color changes, which is a performance risk. Accordingly, we
69 * 'cheat' a bit: instead of loading the constant, we compile a shader with a
70 * dummy constant, exporting the offset to the immediate in the shader binary,
71 * storing this generic binary and metadata in the CSO itself at CSO create
72 * time.
73 *
74 * We then hot patch in the color into this shader at attachment / color change
75 * time, allowing for CSO create to be the only expensive operation
76 * (compilation).
77 */
78
79 static nir_lower_blend_options
nir_make_options(const struct pipe_blend_state * blend,unsigned i)80 nir_make_options(const struct pipe_blend_state *blend, unsigned i)
81 {
82 nir_lower_blend_options options = { 0 };
83
84 if (blend->logicop_enable) {
85 options.logicop_enable = true;
86 options.logicop_func = blend->logicop_func;
87 return options;
88 }
89
90 options.logicop_enable = false;
91
92 if (!blend->independent_blend_enable)
93 i = 0;
94
95 /* If blend is disabled, we just use replace mode */
96
97 nir_lower_blend_channel rgb = {
98 .func = BLEND_FUNC_ADD,
99 .src_factor = BLEND_FACTOR_ZERO,
100 .invert_src_factor = true,
101 .dst_factor = BLEND_FACTOR_ZERO,
102 .invert_dst_factor = false
103 };
104
105 nir_lower_blend_channel alpha = rgb;
106
107 if (blend->rt[i].blend_enable) {
108 rgb.func = util_blend_func_to_shader(blend->rt[i].rgb_func);
109 rgb.src_factor = util_blend_factor_to_shader(blend->rt[i].rgb_src_factor);
110 rgb.dst_factor = util_blend_factor_to_shader(blend->rt[i].rgb_dst_factor);
111 rgb.invert_src_factor = util_blend_factor_is_inverted(blend->rt[i].rgb_src_factor);
112 rgb.invert_dst_factor = util_blend_factor_is_inverted(blend->rt[i].rgb_dst_factor);
113
114 alpha.func = util_blend_func_to_shader(blend->rt[i].alpha_func);
115 alpha.src_factor = util_blend_factor_to_shader(blend->rt[i].alpha_src_factor);
116 alpha.dst_factor = util_blend_factor_to_shader(blend->rt[i].alpha_dst_factor);
117 alpha.invert_src_factor = util_blend_factor_is_inverted(blend->rt[i].alpha_src_factor);
118 alpha.invert_dst_factor = util_blend_factor_is_inverted(blend->rt[i].alpha_dst_factor);
119 }
120
121 options.rgb = rgb;
122 options.alpha = alpha;
123
124 options.colormask = blend->rt[i].colormask;
125
126 return options;
127 }
128
129 static nir_ssa_def *
nir_iclamp(nir_builder * b,nir_ssa_def * v,int32_t lo,int32_t hi)130 nir_iclamp(nir_builder *b, nir_ssa_def *v, int32_t lo, int32_t hi)
131 {
132 return nir_imin(b, nir_imax(b, v, nir_imm_int(b, lo)), nir_imm_int(b, hi));
133 }
134
135 struct panfrost_blend_shader *
panfrost_create_blend_shader(struct panfrost_context * ctx,struct panfrost_blend_state * state,const struct panfrost_blend_shader_key * key)136 panfrost_create_blend_shader(struct panfrost_context *ctx,
137 struct panfrost_blend_state *state,
138 const struct panfrost_blend_shader_key *key)
139 {
140 struct panfrost_device *dev = pan_device(ctx->base.screen);
141 struct panfrost_blend_shader *res = rzalloc(ctx, struct panfrost_blend_shader);
142
143 res->ctx = ctx;
144 res->key = *key;
145
146 /* Build the shader */
147
148 nir_shader *shader = nir_shader_create(ctx, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL);
149 nir_function *fn = nir_function_create(shader, "main");
150 fn->is_entrypoint = true;
151 nir_function_impl *impl = nir_function_impl_create(fn);
152
153 const struct util_format_description *format_desc =
154 util_format_description(key->format);
155
156 nir_alu_type T = pan_unpacked_type_for_format(format_desc);
157 enum glsl_base_type g =
158 (T == nir_type_float16) ? GLSL_TYPE_FLOAT16 :
159 (T == nir_type_float32) ? GLSL_TYPE_FLOAT :
160 (T == nir_type_int8) ? GLSL_TYPE_INT8 :
161 (T == nir_type_int16) ? GLSL_TYPE_INT16 :
162 (T == nir_type_int32) ? GLSL_TYPE_INT :
163 (T == nir_type_uint8) ? GLSL_TYPE_UINT8 :
164 (T == nir_type_uint16) ? GLSL_TYPE_UINT16 :
165 (T == nir_type_uint32) ? GLSL_TYPE_UINT :
166 GLSL_TYPE_FLOAT;
167
168 /* Create the blend variables */
169
170 nir_variable *c_src = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_Color");
171 nir_variable *c_src1 = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_Color1");
172 nir_variable *c_out = nir_variable_create(shader, nir_var_shader_out, glsl_vector_type(g, 4), "gl_FragColor");
173
174 c_src->data.location = VARYING_SLOT_COL0;
175 c_src1->data.location = VARYING_SLOT_VAR0;
176 c_out->data.location = FRAG_RESULT_COLOR;
177
178 c_src1->data.driver_location = 1;
179
180 /* Setup nir_builder */
181
182 nir_builder _b;
183 nir_builder *b = &_b;
184 nir_builder_init(b, impl);
185 b->cursor = nir_before_block(nir_start_block(impl));
186
187 /* Setup inputs */
188
189 nir_ssa_def *s_src[] = {nir_load_var(b, c_src), nir_load_var(b, c_src1)};
190
191 for (int i = 0; i < ARRAY_SIZE(s_src); ++i) {
192 if (T == nir_type_float16)
193 s_src[i] = nir_f2f16(b, s_src[i]);
194 else if (T == nir_type_int16)
195 s_src[i] = nir_i2i16(b, nir_iclamp(b, s_src[i], -32768, 32767));
196 else if (T == nir_type_uint16)
197 s_src[i] = nir_u2u16(b, nir_umin(b, s_src[i], nir_imm_int(b, 65535)));
198 else if (T == nir_type_int8)
199 s_src[i] = nir_i2i8(b, nir_iclamp(b, s_src[i], -128, 127));
200 else if (T == nir_type_uint8)
201 s_src[i] = nir_u2u8(b, nir_umin(b, s_src[i], nir_imm_int(b, 255)));
202 }
203
204 /* Build a trivial blend shader */
205 nir_store_var(b, c_out, s_src[0], 0xFF);
206
207 nir_lower_blend_options options = nir_make_options(&state->base, key->rt);
208 options.format = key->format;
209 options.is_bifrost = !!(dev->quirks & IS_BIFROST);
210 options.src1 = s_src[1];
211
212 if (T == nir_type_float16)
213 options.half = true;
214
215 NIR_PASS_V(shader, nir_lower_blend, options);
216
217 res->nir = shader;
218 return res;
219 }
220
221 static uint64_t
bifrost_get_blend_desc(enum pipe_format fmt,unsigned rt)222 bifrost_get_blend_desc(enum pipe_format fmt, unsigned rt)
223 {
224 const struct util_format_description *desc = util_format_description(fmt);
225 uint64_t res;
226
227 pan_pack(&res, BIFROST_INTERNAL_BLEND, cfg) {
228 cfg.mode = MALI_BIFROST_BLEND_MODE_OPAQUE;
229 cfg.fixed_function.num_comps = desc->nr_channels;
230 cfg.fixed_function.rt = rt;
231
232 nir_alu_type T = pan_unpacked_type_for_format(desc);
233 switch (T) {
234 case nir_type_float16:
235 cfg.fixed_function.conversion.register_format =
236 MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
237 break;
238 case nir_type_float32:
239 cfg.fixed_function.conversion.register_format =
240 MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
241 break;
242 case nir_type_int16:
243 cfg.fixed_function.conversion.register_format =
244 MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
245 break;
246 case nir_type_int32:
247 cfg.fixed_function.conversion.register_format =
248 MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
249 break;
250 case nir_type_uint16:
251 cfg.fixed_function.conversion.register_format =
252 MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
253 break;
254 case nir_type_uint32:
255 cfg.fixed_function.conversion.register_format =
256 MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
257 break;
258 default:
259 unreachable("Invalid format");
260 }
261
262 cfg.fixed_function.conversion.memory_format.srgb =
263 desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB;
264
265 cfg.fixed_function.conversion.memory_format.format =
266 panfrost_format_to_bifrost_blend(desc, true);
267 }
268
269 return res;
270 }
271
272 void
panfrost_compile_blend_shader(struct panfrost_blend_shader * shader,const float * constants)273 panfrost_compile_blend_shader(struct panfrost_blend_shader *shader,
274 const float *constants)
275 {
276 struct panfrost_device *dev = pan_device(shader->ctx->base.screen);
277
278 /* If the shader has already been compiled and the constants match
279 * or the shader doesn't use the blend constants, we can keep the
280 * compiled version.
281 */
282 if (shader->buffer &&
283 (!constants ||
284 !memcmp(shader->constants, constants, sizeof(shader->constants))))
285 return;
286
287 /* Compile or recompile the NIR shader */
288 struct panfrost_compile_inputs inputs = {
289 .gpu_id = dev->gpu_id,
290 .is_blend = true,
291 .blend.rt = shader->key.rt,
292 .rt_formats = {shader->key.format},
293 };
294
295 if (constants)
296 memcpy(inputs.blend.constants, constants, sizeof(inputs.blend.constants));
297
298 panfrost_program *program;
299
300 if (dev->quirks & IS_BIFROST) {
301 inputs.blend.bifrost_blend_desc =
302 bifrost_get_blend_desc(shader->key.format, shader->key.rt);
303 program = bifrost_compile_shader_nir(NULL, shader->nir, &inputs);
304 } else {
305 program = midgard_compile_shader_nir(NULL, shader->nir, &inputs);
306 }
307
308 /* Allow us to patch later */
309 shader->first_tag = program->first_tag;
310 shader->size = program->compiled.size;
311 shader->buffer = reralloc_size(shader, shader->buffer, shader->size);
312 memcpy(shader->buffer, program->compiled.data, shader->size);
313 shader->work_count = program->work_register_count;
314
315 ralloc_free(program);
316 }
317