1 /*
2  * Copyright (C) 2020 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include <math.h>
28 #include <stdio.h>
29 #include "pan_encoder.h"
30 #include "pan_pool.h"
31 #include "pan_scoreboard.h"
32 #include "pan_texture.h"
33 #include "panfrost-quirks.h"
34 #include "../midgard/midgard_compile.h"
35 #include "../bifrost/bifrost_compile.h"
36 #include "compiler/nir/nir_builder.h"
37 #include "util/u_math.h"
38 
39 /* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
40  * missing in many cases. We instead use software paths as fallbacks to
41  * implement blits, which are done as TILER jobs. No vertex shader is
42  * necessary since we can supply screen-space coordinates directly.
43  *
44  * This is primarily designed as a fallback for preloads but could be extended
45  * for other clears/blits if needed in the future. */
46 
47 static panfrost_program *
panfrost_build_blit_shader(struct panfrost_device * dev,gl_frag_result loc,nir_alu_type T,bool ms)48 panfrost_build_blit_shader(struct panfrost_device *dev,
49                            gl_frag_result loc,
50                            nir_alu_type T,
51                            bool ms)
52 {
53         bool is_colour = loc >= FRAG_RESULT_DATA0;
54 
55         nir_builder _b;
56         nir_builder_init_simple_shader(&_b, NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options);
57         nir_builder *b = &_b;
58         nir_shader *shader = b->shader;
59 
60         shader->info.internal = true;
61 
62         nir_variable *c_src = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "coord");
63         nir_variable *c_out = nir_variable_create(shader, nir_var_shader_out, glsl_vector_type(
64                                 GLSL_TYPE_FLOAT, is_colour ? 4 : 1), "out");
65 
66         c_src->data.location = VARYING_SLOT_TEX0;
67         c_out->data.location = loc;
68 
69         nir_ssa_def *coord = nir_load_var(b, c_src);
70 
71         nir_tex_instr *tex = nir_tex_instr_create(shader, ms ? 3 : 1);
72 
73         tex->dest_type = T;
74 
75         if (ms) {
76                 tex->src[0].src_type = nir_tex_src_coord;
77                 tex->src[0].src = nir_src_for_ssa(nir_f2i32(b, coord));
78                 tex->coord_components = 2;
79 
80                 tex->src[1].src_type = nir_tex_src_ms_index;
81                 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(b));
82 
83                 tex->src[2].src_type = nir_tex_src_lod;
84                 tex->src[2].src = nir_src_for_ssa(nir_imm_int(b, 0));
85                 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
86                 tex->op = nir_texop_txf_ms;
87         } else {
88                 tex->op = nir_texop_tex;
89 
90                 tex->src[0].src_type = nir_tex_src_coord;
91                 tex->src[0].src = nir_src_for_ssa(coord);
92                 tex->coord_components = 2;
93 
94                 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
95         }
96 
97         nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
98         nir_builder_instr_insert(b, &tex->instr);
99 
100         if (is_colour)
101                 nir_store_var(b, c_out, &tex->dest.ssa, 0xFF);
102         else
103                 nir_store_var(b, c_out, nir_channel(b, &tex->dest.ssa, 0), 0xFF);
104 
105         struct panfrost_compile_inputs inputs = {
106                 .gpu_id = dev->gpu_id,
107         };
108 
109         panfrost_program *program;
110 
111         if (dev->quirks & IS_BIFROST)
112                 program = bifrost_compile_shader_nir(NULL, shader, &inputs);
113         else
114                 program = midgard_compile_shader_nir(NULL, shader, &inputs);
115 
116         ralloc_free(shader);
117         return program;
118 }
119 
120 /* Compile and upload all possible blit shaders ahead-of-time to reduce draw
121  * time overhead. There's only ~30 of them at the moment, so this is fine */
122 
123 void
panfrost_init_blit_shaders(struct panfrost_device * dev)124 panfrost_init_blit_shaders(struct panfrost_device *dev)
125 {
126         bool is_bifrost = !!(dev->quirks & IS_BIFROST);
127         static const struct {
128                 gl_frag_result loc;
129                 unsigned types;
130         } shader_descs[] = {
131                 { FRAG_RESULT_DEPTH,   1 << PAN_BLIT_FLOAT },
132                 { FRAG_RESULT_STENCIL, 1 << PAN_BLIT_UINT },
133                 { FRAG_RESULT_DATA0,  ~0 },
134                 { FRAG_RESULT_DATA1,  ~0 },
135                 { FRAG_RESULT_DATA2,  ~0 },
136                 { FRAG_RESULT_DATA3,  ~0 },
137                 { FRAG_RESULT_DATA4,  ~0 },
138                 { FRAG_RESULT_DATA5,  ~0 },
139                 { FRAG_RESULT_DATA6,  ~0 },
140                 { FRAG_RESULT_DATA7,  ~0 }
141         };
142 
143         nir_alu_type nir_types[PAN_BLIT_NUM_TYPES] = {
144                 nir_type_float,
145                 nir_type_uint,
146                 nir_type_int
147         };
148 
149         /* Total size = # of shaders * bytes per shader. There are
150          * shaders for each RT (so up to DATA7 -- overestimate is
151          * okay) and up to NUM_TYPES variants of each, * 2 for multisampling
152          * variants. These shaders are simple enough that they should be less
153          * than 8 quadwords each (again, overestimate is fine). */
154 
155         unsigned offset = 0;
156         unsigned total_size = (FRAG_RESULT_DATA7 * PAN_BLIT_NUM_TYPES) * (8 * 16) * 2;
157 
158         if (is_bifrost)
159                 total_size *= 2;
160 
161         dev->blit_shaders.bo = panfrost_bo_create(dev, total_size, PAN_BO_EXECUTE);
162 
163         /* Don't bother generating multisampling variants if we don't actually
164          * support multisampling */
165         bool has_ms = !(dev->quirks & MIDGARD_SFBD);
166 
167         for (unsigned ms = 0; ms <= has_ms; ++ms) {
168                 for (unsigned i = 0; i < ARRAY_SIZE(shader_descs); ++i) {
169                         unsigned loc = shader_descs[i].loc;
170 
171                         for (enum pan_blit_type T = 0; T < PAN_BLIT_NUM_TYPES; ++T) {
172                                 if (!(shader_descs[i].types & (1 << T)))
173                                         continue;
174 
175                                 struct pan_blit_shader *shader = &dev->blit_shaders.loads[loc][T][ms];
176                                 panfrost_program *program =
177                                         panfrost_build_blit_shader(dev, loc,
178                                                                    nir_types[T], ms);
179 
180                                 assert(offset + program->compiled.size < total_size);
181                                 memcpy(dev->blit_shaders.bo->ptr.cpu + offset,
182                                        program->compiled.data, program->compiled.size);
183 
184                                 shader->shader = (dev->blit_shaders.bo->ptr.gpu + offset) |
185                                                  program->first_tag;
186 
187                                 int rt = loc - FRAG_RESULT_DATA0;
188                                 if (rt >= 0 && rt < 8 && program->blend_ret_offsets[rt])
189                                         shader->blend_ret_addr = program->blend_ret_offsets[rt] + shader->shader;
190 
191                                 offset += ALIGN_POT(program->compiled.size, is_bifrost ? 128 : 64);
192                                 ralloc_free(program);
193                         }
194                 }
195         }
196 }
197 
198 static void
panfrost_load_emit_viewport(struct pan_pool * pool,struct MALI_DRAW * draw,struct pan_image * image)199 panfrost_load_emit_viewport(struct pan_pool *pool, struct MALI_DRAW *draw,
200                             struct pan_image *image)
201 {
202         struct panfrost_ptr t = panfrost_pool_alloc(pool, MALI_VIEWPORT_LENGTH);
203         unsigned width = u_minify(image->width0, image->first_level);
204         unsigned height = u_minify(image->height0, image->first_level);
205 
206         pan_pack(t.cpu, VIEWPORT, cfg) {
207                 cfg.scissor_maximum_x = width - 1; /* Inclusive */
208                 cfg.scissor_maximum_y = height - 1;
209         }
210 
211         draw->viewport = t.gpu;
212 }
213 
214 static void
panfrost_load_prepare_rsd(struct pan_pool * pool,struct MALI_RENDERER_STATE * state,struct pan_image * image,unsigned loc)215 panfrost_load_prepare_rsd(struct pan_pool *pool, struct MALI_RENDERER_STATE *state,
216                           struct pan_image *image, unsigned loc)
217 {
218         /* Determine the sampler type needed. Stencil is always sampled as
219          * UINT. Pure (U)INT is always (U)INT. Everything else is FLOAT. */
220         enum pan_blit_type T =
221                 (loc == FRAG_RESULT_STENCIL) ? PAN_BLIT_UINT :
222                 (util_format_is_pure_uint(image->format)) ? PAN_BLIT_UINT :
223                 (util_format_is_pure_sint(image->format)) ? PAN_BLIT_INT :
224                 PAN_BLIT_FLOAT;
225         bool ms = image->nr_samples > 1;
226         const struct pan_blit_shader *shader =
227                 &pool->dev->blit_shaders.loads[loc][T][ms];
228 
229         state->shader.shader = shader->shader;
230         assert(state->shader.shader);
231         state->shader.varying_count = 1;
232         state->shader.texture_count = 1;
233         state->shader.sampler_count = 1;
234 
235         state->properties.stencil_from_shader = (loc == FRAG_RESULT_STENCIL);
236         state->properties.depth_source = (loc == FRAG_RESULT_DEPTH) ?
237                                          MALI_DEPTH_SOURCE_SHADER :
238                                          MALI_DEPTH_SOURCE_FIXED_FUNCTION;
239 
240         state->multisample_misc.sample_mask = 0xFFFF;
241         state->multisample_misc.multisample_enable = ms;
242         state->multisample_misc.evaluate_per_sample = ms;
243         state->multisample_misc.depth_write_mask = (loc == FRAG_RESULT_DEPTH);
244         state->multisample_misc.depth_function = MALI_FUNC_ALWAYS;
245 
246         state->stencil_mask_misc.stencil_enable = (loc == FRAG_RESULT_STENCIL);
247         state->stencil_mask_misc.stencil_mask_front = 0xFF;
248         state->stencil_mask_misc.stencil_mask_back = 0xFF;
249 
250         state->stencil_front.compare_function = MALI_FUNC_ALWAYS;
251         state->stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
252         state->stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
253         state->stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
254         state->stencil_front.mask = 0xFF;
255         state->stencil_back = state->stencil_front;
256 }
257 
258 static void
panfrost_load_emit_varying(struct pan_pool * pool,struct MALI_DRAW * draw,mali_ptr coordinates,unsigned vertex_count,bool is_bifrost)259 panfrost_load_emit_varying(struct pan_pool *pool, struct MALI_DRAW *draw,
260                           mali_ptr coordinates, unsigned vertex_count,
261                           bool is_bifrost)
262 {
263         /* Bifrost needs an empty desc to mark end of prefetching */
264         bool padding_buffer = is_bifrost;
265 
266         struct panfrost_ptr varying =
267                 panfrost_pool_alloc(pool, MALI_ATTRIBUTE_LENGTH);
268         struct panfrost_ptr varying_buffer =
269                 panfrost_pool_alloc(pool, MALI_ATTRIBUTE_BUFFER_LENGTH *
270                                 (padding_buffer ? 2 : 1));
271 
272         pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
273                 cfg.pointer = coordinates;
274                 cfg.stride = 4 * sizeof(float);
275                 cfg.size = cfg.stride * vertex_count;
276         }
277 
278         if (padding_buffer) {
279                 pan_pack(varying_buffer.cpu + MALI_ATTRIBUTE_BUFFER_LENGTH,
280                          ATTRIBUTE_BUFFER, cfg);
281         }
282 
283         pan_pack(varying.cpu, ATTRIBUTE, cfg) {
284                 cfg.buffer_index = 0;
285                 cfg.offset_enable = !is_bifrost;
286                 cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32_FLOAT].hw;
287         }
288 
289         draw->varyings = varying.gpu;
290         draw->varying_buffers = varying_buffer.gpu;
291         draw->position = coordinates;
292 }
293 
294 static void
midgard_load_emit_texture(struct pan_pool * pool,struct MALI_DRAW * draw,struct pan_image * image)295 midgard_load_emit_texture(struct pan_pool *pool, struct MALI_DRAW *draw,
296                           struct pan_image *image)
297 {
298         struct panfrost_ptr texture =
299                  panfrost_pool_alloc_aligned(pool,
300                                              MALI_MIDGARD_TEXTURE_LENGTH +
301                                              sizeof(mali_ptr) * 2 *
302                                              MAX2(image->nr_samples, 1),
303                                              128);
304 
305         struct panfrost_ptr sampler =
306                  panfrost_pool_alloc(pool, MALI_MIDGARD_SAMPLER_LENGTH);
307 
308         /* Create the texture descriptor. We partially compute the base address
309          * ourselves to account for layer, such that the texture descriptor
310          * itself is for a 2D texture with array size 1 even for 3D/array
311          * textures, removing the need to separately key the blit shaders for
312          * 2D and 3D variants */
313          panfrost_new_texture(texture.cpu,
314                               image->width0, image->height0,
315                               MAX2(image->nr_samples, 1), 1,
316                               image->format, MALI_TEXTURE_DIMENSION_2D,
317                               image->modifier,
318                               image->first_level, image->last_level,
319                               0, 0,
320                               image->nr_samples,
321                               0,
322                               (MALI_CHANNEL_R << 0) | (MALI_CHANNEL_G << 3) |
323                               (MALI_CHANNEL_B << 6) | (MALI_CHANNEL_A << 9),
324                               image->bo->ptr.gpu + image->first_layer *
325                               panfrost_get_layer_stride(image->slices,
326                                                         image->dim == MALI_TEXTURE_DIMENSION_3D,
327                                                         image->cubemap_stride, image->first_level),
328                               image->slices);
329 
330         pan_pack(sampler.cpu, MIDGARD_SAMPLER, cfg)
331                 cfg.normalized_coordinates = false;
332 
333         draw->textures = panfrost_pool_upload(pool, &texture.gpu, sizeof(texture.gpu));
334         draw->samplers = sampler.gpu;
335 }
336 
337 static void
midgard_load_emit_blend_rt(struct pan_pool * pool,void * out,mali_ptr blend_shader,struct pan_image * image,unsigned rt,unsigned loc)338 midgard_load_emit_blend_rt(struct pan_pool *pool, void *out,
339                            mali_ptr blend_shader, struct pan_image *image,
340                            unsigned rt, unsigned loc)
341 {
342         bool disabled = loc != (FRAG_RESULT_DATA0 + rt);
343         bool srgb = util_format_is_srgb(image->format);
344 
345         pan_pack(out, BLEND, cfg) {
346                 if (disabled) {
347                         cfg.midgard.equation.color_mask = 0xf;
348                         cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
349                         cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
350                         cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
351                         cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
352                         cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
353                         cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
354                         continue;
355                 }
356 
357                 cfg.round_to_fb_precision = true;
358                 cfg.srgb = srgb;
359 
360                 if (!blend_shader) {
361                         cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
362                         cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
363                         cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
364                         cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
365                         cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
366                         cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
367                         cfg.midgard.equation.color_mask = 0xf;
368                 } else {
369                         cfg.midgard.blend_shader = true;
370                         cfg.midgard.shader_pc = blend_shader;
371                 }
372         }
373 }
374 
375 static void
midgard_load_emit_rsd(struct pan_pool * pool,struct MALI_DRAW * draw,mali_ptr blend_shader,struct pan_image * image,unsigned loc)376 midgard_load_emit_rsd(struct pan_pool *pool, struct MALI_DRAW *draw,
377                       mali_ptr blend_shader, struct pan_image *image,
378                       unsigned loc)
379 {
380         struct panfrost_ptr t =
381                 panfrost_pool_alloc_aligned(pool,
382                                             MALI_RENDERER_STATE_LENGTH +
383                                             8 * MALI_BLEND_LENGTH,
384                                             128);
385         bool srgb = util_format_is_srgb(image->format);
386 
387         pan_pack(t.cpu, RENDERER_STATE, cfg) {
388                 panfrost_load_prepare_rsd(pool, &cfg, image, loc);
389                 cfg.properties.midgard.work_register_count = 4;
390                 cfg.properties.midgard.force_early_z = (loc >= FRAG_RESULT_DATA0);
391                 cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
392                 if (!(pool->dev->quirks & MIDGARD_SFBD)) {
393                         cfg.sfbd_blend_shader = blend_shader;
394                         continue;
395                 }
396 
397                 cfg.stencil_mask_misc.sfbd_write_enable = true;
398                 cfg.stencil_mask_misc.sfbd_dither_disable = true;
399                 cfg.stencil_mask_misc.sfbd_srgb = srgb;
400                 cfg.multisample_misc.sfbd_blend_shader = !!blend_shader;
401                 if (cfg.multisample_misc.sfbd_blend_shader) {
402                         cfg.sfbd_blend_shader = blend_shader;
403                         continue;
404                 }
405 
406                 cfg.sfbd_blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
407                 cfg.sfbd_blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
408                 cfg.sfbd_blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
409                 cfg.sfbd_blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
410                 cfg.sfbd_blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
411                 cfg.sfbd_blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
412                 cfg.sfbd_blend_constant = 0;
413 
414                 if (loc >= FRAG_RESULT_DATA0)
415                         cfg.sfbd_blend_equation.color_mask = 0xf;
416         }
417 
418         for (unsigned i = 0; i < 8; ++i) {
419                 void *dest = t.cpu + MALI_RENDERER_STATE_LENGTH + MALI_BLEND_LENGTH * i;
420 
421                 midgard_load_emit_blend_rt(pool, dest, blend_shader, image, i, loc);
422         }
423 
424         draw->state = t.gpu;
425 }
426 
427 /* Add a shader-based load on Midgard (draw-time for GL). Shaders are
428  * precached */
429 
430 void
panfrost_load_midg(struct pan_pool * pool,struct pan_scoreboard * scoreboard,mali_ptr blend_shader,mali_ptr fbd,mali_ptr coordinates,unsigned vertex_count,struct pan_image * image,unsigned loc)431 panfrost_load_midg(struct pan_pool *pool,
432                    struct pan_scoreboard *scoreboard,
433                    mali_ptr blend_shader,
434                    mali_ptr fbd,
435                    mali_ptr coordinates, unsigned vertex_count,
436                    struct pan_image *image,
437                    unsigned loc)
438 {
439         struct panfrost_ptr t =
440                 panfrost_pool_alloc_aligned(pool,
441                                             MALI_MIDGARD_TILER_JOB_LENGTH,
442                                             64);
443         pan_section_pack(t.cpu, MIDGARD_TILER_JOB, DRAW, cfg) {
444                 cfg.texture_descriptor_is_64b = true;
445                 cfg.draw_descriptor_is_64b = true;
446                 cfg.four_components_per_vertex = true;
447 
448                 panfrost_load_emit_varying(pool, &cfg, coordinates, vertex_count, false);
449                 midgard_load_emit_texture(pool, &cfg, image);
450                 panfrost_load_emit_viewport(pool, &cfg, image);
451                 cfg.fbd = fbd;
452                 midgard_load_emit_rsd(pool, &cfg, blend_shader, image, loc);
453         }
454 
455         pan_section_pack(t.cpu, MIDGARD_TILER_JOB, PRIMITIVE, cfg) {
456                 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLES;
457                 cfg.index_count = vertex_count;
458                 cfg.job_task_split = 6;
459         }
460 
461         pan_section_pack(t.cpu, MIDGARD_TILER_JOB, PRIMITIVE_SIZE, cfg) {
462                 cfg.constant = 1.0f;
463         }
464 
465         panfrost_pack_work_groups_compute(pan_section_ptr(t.cpu,
466                                                           MIDGARD_TILER_JOB,
467                                                           INVOCATION),
468                                           1, vertex_count, 1, 1, 1, 1, true);
469 
470         panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &t, true);
471 }
472 
473 static void
bifrost_load_emit_texture(struct pan_pool * pool,struct MALI_DRAW * draw,struct pan_image * image)474 bifrost_load_emit_texture(struct pan_pool *pool, struct MALI_DRAW *draw,
475                           struct pan_image *image)
476 {
477         struct panfrost_ptr texture =
478                  panfrost_pool_alloc_aligned(pool,
479                                              MALI_BIFROST_TEXTURE_LENGTH +
480                                              sizeof(mali_ptr) * 2 *
481                                              MAX2(image->nr_samples, 1),
482                                              128);
483         struct panfrost_ptr sampler =
484                  panfrost_pool_alloc(pool, MALI_BIFROST_SAMPLER_LENGTH);
485         struct panfrost_ptr payload = {
486                  .cpu = texture.cpu + MALI_BIFROST_TEXTURE_LENGTH,
487                  .gpu = texture.gpu + MALI_BIFROST_TEXTURE_LENGTH,
488         };
489 
490         panfrost_new_texture_bifrost(pool->dev, (void *)texture.cpu,
491                                      image->width0, image->height0,
492                                      MAX2(image->nr_samples, 1), 1,
493                                      image->format, MALI_TEXTURE_DIMENSION_2D,
494                                      image->modifier,
495                                      image->first_level, image->last_level,
496                                      0, 0,
497                                      image->nr_samples,
498                                      0,
499                                      (MALI_CHANNEL_R << 0) | (MALI_CHANNEL_G << 3) |
500                                      (MALI_CHANNEL_B << 6) | (MALI_CHANNEL_A << 9),
501                                      image->bo->ptr.gpu + image->first_layer *
502                                      panfrost_get_layer_stride(image->slices,
503                                                                image->dim == MALI_TEXTURE_DIMENSION_3D,
504                                                                image->cubemap_stride, image->first_level),
505                                      image->slices,
506                                      &payload);
507 
508         pan_pack(sampler.cpu, BIFROST_SAMPLER, cfg) {
509                 cfg.seamless_cube_map = false;
510                 cfg.normalized_coordinates = false;
511                 cfg.point_sample_minify = true;
512                 cfg.point_sample_magnify = true;
513         }
514 
515         draw->textures = texture.gpu;
516         draw->samplers = sampler.gpu;
517 }
518 
519 static enum mali_bifrost_register_file_format
blit_type_to_reg_fmt(enum pan_blit_type btype)520 blit_type_to_reg_fmt(enum pan_blit_type btype)
521 {
522         switch (btype) {
523         case PAN_BLIT_FLOAT:
524                 return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
525         case PAN_BLIT_INT:
526                 return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
527         case PAN_BLIT_UINT:
528                 return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
529         default:
530                 unreachable("Invalid blit type");
531         }
532 }
533 
534 static void
bifrost_load_emit_blend_rt(struct pan_pool * pool,void * out,mali_ptr blend_shader,struct pan_image * image,unsigned rt,unsigned loc)535 bifrost_load_emit_blend_rt(struct pan_pool *pool, void *out,
536                            mali_ptr blend_shader, struct pan_image *image,
537                            unsigned rt, unsigned loc)
538 {
539         enum pan_blit_type T =
540                 (loc == FRAG_RESULT_STENCIL) ? PAN_BLIT_UINT :
541                 (util_format_is_pure_uint(image->format)) ? PAN_BLIT_UINT :
542                 (util_format_is_pure_sint(image->format)) ? PAN_BLIT_INT :
543                 PAN_BLIT_FLOAT;
544         bool disabled = loc != (FRAG_RESULT_DATA0 + rt);
545         bool srgb = util_format_is_srgb(image->format);
546 
547         pan_pack(out, BLEND, cfg) {
548                 if (disabled) {
549                         cfg.enable = false;
550                         cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OFF;
551                         continue;
552                 }
553 
554                 cfg.round_to_fb_precision = true;
555                 cfg.srgb = srgb;
556                 cfg.bifrost.internal.mode = blend_shader ?
557                                             MALI_BIFROST_BLEND_MODE_SHADER :
558                                             MALI_BIFROST_BLEND_MODE_OPAQUE;
559                 if (blend_shader) {
560                         cfg.bifrost.internal.shader.pc = blend_shader;
561                 } else {
562                         const struct util_format_description *format_desc =
563                                 util_format_description(image->format);
564 
565                         cfg.bifrost.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
566                         cfg.bifrost.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
567                         cfg.bifrost.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
568                         cfg.bifrost.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
569                         cfg.bifrost.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
570                         cfg.bifrost.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
571                         cfg.bifrost.equation.color_mask = 0xf;
572                         cfg.bifrost.internal.fixed_function.num_comps = 4;
573                         cfg.bifrost.internal.fixed_function.conversion.memory_format.format =
574                                 panfrost_format_to_bifrost_blend(format_desc, true);
575                         cfg.bifrost.internal.fixed_function.conversion.register_format =
576                                 blit_type_to_reg_fmt(T);
577 
578                         cfg.bifrost.internal.fixed_function.rt = rt;
579                         if (pool->dev->quirks & HAS_SWIZZLES) {
580                                 cfg.bifrost.internal.fixed_function.conversion.memory_format.swizzle =
581                                         panfrost_get_default_swizzle(4);
582                         }
583                 }
584         }
585 }
586 
587 static void
bifrost_load_emit_rsd(struct pan_pool * pool,struct MALI_DRAW * draw,mali_ptr blend_shader,struct pan_image * image,unsigned loc)588 bifrost_load_emit_rsd(struct pan_pool *pool, struct MALI_DRAW *draw,
589                       mali_ptr blend_shader, struct pan_image *image,
590                       unsigned loc)
591 {
592         struct panfrost_ptr t =
593                 panfrost_pool_alloc_aligned(pool,
594                                             MALI_RENDERER_STATE_LENGTH +
595                                             8 * MALI_BLEND_LENGTH,
596                                             128);
597 
598         pan_pack(t.cpu, RENDERER_STATE, cfg) {
599                 panfrost_load_prepare_rsd(pool, &cfg, image, loc);
600                 if (loc >= FRAG_RESULT_DATA0) {
601                         cfg.properties.bifrost.zs_update_operation =
602                                 MALI_PIXEL_KILL_STRONG_EARLY;
603                         cfg.properties.bifrost.pixel_kill_operation =
604                                 MALI_PIXEL_KILL_FORCE_EARLY;
605                 } else {
606                         cfg.properties.bifrost.zs_update_operation =
607                                 MALI_PIXEL_KILL_FORCE_LATE;
608                         cfg.properties.bifrost.pixel_kill_operation =
609                                 MALI_PIXEL_KILL_FORCE_LATE;
610                 }
611                 cfg.properties.bifrost.allow_forward_pixel_to_kill = true;
612                 cfg.preload.fragment.coverage = true;
613         }
614 
615         for (unsigned i = 0; i < 8; ++i) {
616                 void *dest = t.cpu + MALI_RENDERER_STATE_LENGTH + MALI_BLEND_LENGTH * i;
617 
618                 bifrost_load_emit_blend_rt(pool, dest, blend_shader, image, i, loc);
619         }
620 
621         draw->state = t.gpu;
622 }
623 
624 void
panfrost_load_bifrost(struct pan_pool * pool,struct pan_scoreboard * scoreboard,mali_ptr blend_shader,mali_ptr thread_storage,mali_ptr tiler,mali_ptr coordinates,unsigned vertex_count,struct pan_image * image,unsigned loc)625 panfrost_load_bifrost(struct pan_pool *pool,
626                       struct pan_scoreboard *scoreboard,
627                       mali_ptr blend_shader,
628                       mali_ptr thread_storage,
629                       mali_ptr tiler,
630                       mali_ptr coordinates, unsigned vertex_count,
631                       struct pan_image *image,
632                       unsigned loc)
633 {
634         struct panfrost_ptr t =
635                 panfrost_pool_alloc_aligned(pool,
636                                             MALI_BIFROST_TILER_JOB_LENGTH,
637                                             64);
638         pan_section_pack(t.cpu, BIFROST_TILER_JOB, DRAW, cfg) {
639                 cfg.four_components_per_vertex = true;
640                 cfg.draw_descriptor_is_64b = true;
641 
642                 panfrost_load_emit_varying(pool, &cfg, coordinates, vertex_count, true);
643                 bifrost_load_emit_texture(pool, &cfg, image);
644                 panfrost_load_emit_viewport(pool, &cfg, image);
645                 cfg.thread_storage = thread_storage;
646                 bifrost_load_emit_rsd(pool, &cfg, blend_shader, image, loc);
647         }
648 
649         pan_section_pack(t.cpu, BIFROST_TILER_JOB, PRIMITIVE, cfg) {
650                 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLES;
651                 cfg.index_count = vertex_count;
652                 cfg.job_task_split = 6;
653         }
654 
655         pan_section_pack(t.cpu, BIFROST_TILER_JOB, PRIMITIVE_SIZE, cfg) {
656                 cfg.constant = 1.0f;
657         }
658 
659         panfrost_pack_work_groups_compute(pan_section_ptr(t.cpu,
660                                                           MIDGARD_TILER_JOB,
661                                                           INVOCATION),
662                                           1, vertex_count, 1, 1, 1, 1, true);
663 
664         pan_section_pack(t.cpu, BIFROST_TILER_JOB, PADDING, cfg) { }
665         pan_section_pack(t.cpu, BIFROST_TILER_JOB, TILER, cfg) {
666                 cfg.address = tiler;
667         }
668 
669         panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &t, true);
670 }
671