1 /*
2  * Copyright (c) 2017-2019 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "util/u_memory.h"
26 #include "util/ralloc.h"
27 #include "util/u_debug.h"
28 
29 #include "tgsi/tgsi_dump.h"
30 #include "compiler/nir/nir.h"
31 #include "nir/tgsi_to_nir.h"
32 
33 #include "pipe/p_state.h"
34 
35 #include "lima_screen.h"
36 #include "lima_context.h"
37 #include "lima_job.h"
38 #include "lima_program.h"
39 #include "lima_bo.h"
40 #include "lima_format.h"
41 
42 #include "ir/lima_ir.h"
43 
44 static const nir_shader_compiler_options vs_nir_options = {
45    .lower_ffma16 = true,
46    .lower_ffma32 = true,
47    .lower_ffma64 = true,
48    .lower_fpow = true,
49    .lower_ffract = true,
50    .lower_fdiv = true,
51    .lower_fmod = true,
52    .lower_fsqrt = true,
53    .lower_sub = true,
54    .lower_flrp32 = true,
55    .lower_flrp64 = true,
56    /* could be implemented by clamp */
57    .lower_fsat = true,
58    .lower_bitops = true,
59    .lower_rotate = true,
60    .lower_sincos = true,
61    .lower_fceil = true,
62 };
63 
64 static const nir_shader_compiler_options fs_nir_options = {
65    .lower_ffma16 = true,
66    .lower_ffma32 = true,
67    .lower_ffma64 = true,
68    .lower_fpow = true,
69    .lower_fdiv = true,
70    .lower_fmod = true,
71    .lower_sub = true,
72    .lower_flrp32 = true,
73    .lower_flrp64 = true,
74    .lower_fsign = true,
75    .lower_rotate = true,
76    .lower_fdot = true,
77    .lower_fdph = true,
78    .lower_bitops = true,
79    .lower_vector_cmp = true,
80 };
81 
82 const void *
lima_program_get_compiler_options(enum pipe_shader_type shader)83 lima_program_get_compiler_options(enum pipe_shader_type shader)
84 {
85    switch (shader) {
86    case PIPE_SHADER_VERTEX:
87       return &vs_nir_options;
88    case PIPE_SHADER_FRAGMENT:
89       return &fs_nir_options;
90    default:
91       return NULL;
92    }
93 }
94 
95 static int
type_size(const struct glsl_type * type,bool bindless)96 type_size(const struct glsl_type *type, bool bindless)
97 {
98    return glsl_count_attribute_slots(type, false);
99 }
100 
101 void
lima_program_optimize_vs_nir(struct nir_shader * s)102 lima_program_optimize_vs_nir(struct nir_shader *s)
103 {
104    bool progress;
105 
106    NIR_PASS_V(s, nir_lower_viewport_transform);
107    NIR_PASS_V(s, nir_lower_point_size, 1.0f, 100.0f);
108    NIR_PASS_V(s, nir_lower_io,
109 	      nir_var_shader_in | nir_var_shader_out, type_size, 0);
110    NIR_PASS_V(s, nir_lower_load_const_to_scalar);
111    NIR_PASS_V(s, lima_nir_lower_uniform_to_scalar);
112    NIR_PASS_V(s, nir_lower_io_to_scalar,
113               nir_var_shader_in|nir_var_shader_out);
114 
115    do {
116       progress = false;
117 
118       NIR_PASS_V(s, nir_lower_vars_to_ssa);
119       NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
120       NIR_PASS(progress, s, nir_lower_phis_to_scalar);
121       NIR_PASS(progress, s, nir_copy_prop);
122       NIR_PASS(progress, s, nir_opt_remove_phis);
123       NIR_PASS(progress, s, nir_opt_dce);
124       NIR_PASS(progress, s, nir_opt_dead_cf);
125       NIR_PASS(progress, s, nir_opt_cse);
126       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
127       NIR_PASS(progress, s, nir_opt_algebraic);
128       NIR_PASS(progress, s, lima_nir_lower_ftrunc);
129       NIR_PASS(progress, s, nir_opt_constant_folding);
130       NIR_PASS(progress, s, nir_opt_undef);
131       NIR_PASS(progress, s, nir_opt_loop_unroll,
132                nir_var_shader_in |
133                nir_var_shader_out |
134                nir_var_function_temp);
135    } while (progress);
136 
137    NIR_PASS_V(s, nir_lower_int_to_float);
138    /* int_to_float pass generates ftrunc, so lower it */
139    NIR_PASS(progress, s, lima_nir_lower_ftrunc);
140    NIR_PASS_V(s, nir_lower_bool_to_float);
141 
142    NIR_PASS_V(s, nir_copy_prop);
143    NIR_PASS_V(s, nir_opt_dce);
144    NIR_PASS_V(s, nir_lower_locals_to_regs);
145    NIR_PASS_V(s, nir_convert_from_ssa, true);
146    NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
147    nir_sweep(s);
148 }
149 
150 static bool
lima_alu_to_scalar_filter_cb(const nir_instr * instr,const void * data)151 lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
152 {
153    if (instr->type != nir_instr_type_alu)
154       return false;
155 
156    nir_alu_instr *alu = nir_instr_as_alu(instr);
157    switch (alu->op) {
158    case nir_op_frcp:
159    case nir_op_frsq:
160    case nir_op_flog2:
161    case nir_op_fexp2:
162    case nir_op_fsqrt:
163    case nir_op_fsin:
164    case nir_op_fcos:
165       return true;
166    default:
167       break;
168    }
169 
170    /* nir vec4 fcsel assumes that each component of the condition will be
171     * used to select the same component from the two options, but Utgard PP
172     * has only 1 component condition. If all condition components are not the
173     * same we need to lower it to scalar.
174     */
175    switch (alu->op) {
176    case nir_op_bcsel:
177    case nir_op_fcsel:
178       break;
179    default:
180       return false;
181    }
182 
183    int num_components = nir_dest_num_components(alu->dest.dest);
184 
185    uint8_t swizzle = alu->src[0].swizzle[0];
186 
187    for (int i = 1; i < num_components; i++)
188       if (alu->src[0].swizzle[i] != swizzle)
189          return true;
190 
191    return false;
192 }
193 
194 void
lima_program_optimize_fs_nir(struct nir_shader * s,struct nir_lower_tex_options * tex_options)195 lima_program_optimize_fs_nir(struct nir_shader *s,
196                              struct nir_lower_tex_options *tex_options)
197 {
198    bool progress;
199 
200    NIR_PASS_V(s, nir_lower_fragcoord_wtrans);
201    NIR_PASS_V(s, nir_lower_io,
202 	      nir_var_shader_in | nir_var_shader_out, type_size, 0);
203    NIR_PASS_V(s, nir_lower_regs_to_ssa);
204    NIR_PASS_V(s, nir_lower_tex, tex_options);
205 
206    do {
207       progress = false;
208       NIR_PASS(progress, s, nir_opt_vectorize, NULL, NULL);
209    } while (progress);
210 
211    do {
212       progress = false;
213 
214       NIR_PASS_V(s, nir_lower_vars_to_ssa);
215       NIR_PASS(progress, s, nir_lower_alu_to_scalar, lima_alu_to_scalar_filter_cb, NULL);
216       NIR_PASS(progress, s, nir_copy_prop);
217       NIR_PASS(progress, s, nir_opt_remove_phis);
218       NIR_PASS(progress, s, nir_opt_dce);
219       NIR_PASS(progress, s, nir_opt_dead_cf);
220       NIR_PASS(progress, s, nir_opt_cse);
221       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
222       NIR_PASS(progress, s, nir_opt_algebraic);
223       NIR_PASS(progress, s, nir_opt_constant_folding);
224       NIR_PASS(progress, s, nir_opt_undef);
225       NIR_PASS(progress, s, nir_opt_loop_unroll,
226                nir_var_shader_in |
227                nir_var_shader_out |
228                nir_var_function_temp);
229       NIR_PASS(progress, s, lima_nir_split_load_input);
230    } while (progress);
231 
232    NIR_PASS_V(s, nir_lower_int_to_float);
233    NIR_PASS_V(s, nir_lower_bool_to_float);
234 
235    /* Some ops must be lowered after being converted from int ops,
236     * so re-run nir_opt_algebraic after int lowering. */
237    do {
238       progress = false;
239       NIR_PASS(progress, s, nir_opt_algebraic);
240    } while (progress);
241 
242    /* Must be run after optimization loop */
243    NIR_PASS_V(s, lima_nir_scale_trig);
244 
245    /* Lower modifiers */
246    NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_all_source_mods);
247    NIR_PASS_V(s, nir_copy_prop);
248    NIR_PASS_V(s, nir_opt_dce);
249 
250    NIR_PASS_V(s, nir_lower_locals_to_regs);
251    NIR_PASS_V(s, nir_convert_from_ssa, true);
252    NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
253 
254    NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
255    NIR_PASS_V(s, nir_lower_vec_to_movs);
256 
257    NIR_PASS_V(s, lima_nir_duplicate_load_uniforms);
258    NIR_PASS_V(s, lima_nir_duplicate_load_inputs);
259    NIR_PASS_V(s, lima_nir_duplicate_load_consts);
260 
261    nir_sweep(s);
262 }
263 
264 static bool
lima_fs_compile_shader(struct lima_context * ctx,struct lima_fs_shader_state * fs,struct nir_lower_tex_options * tex_options)265 lima_fs_compile_shader(struct lima_context *ctx,
266                        struct lima_fs_shader_state *fs,
267                        struct nir_lower_tex_options *tex_options)
268 {
269    struct lima_screen *screen = lima_screen(ctx->base.screen);
270    nir_shader *nir = nir_shader_clone(fs, fs->base.ir.nir);
271 
272    lima_program_optimize_fs_nir(nir, tex_options);
273 
274    if (lima_debug & LIMA_DEBUG_PP)
275       nir_print_shader(nir, stdout);
276 
277    if (!ppir_compile_nir(fs, nir, screen->pp_ra, &ctx->debug)) {
278       ralloc_free(nir);
279       return false;
280    }
281 
282    fs->uses_discard = nir->info.fs.uses_discard;
283    ralloc_free(nir);
284 
285    return true;
286 }
287 
288 static void *
lima_create_fs_state(struct pipe_context * pctx,const struct pipe_shader_state * cso)289 lima_create_fs_state(struct pipe_context *pctx,
290                      const struct pipe_shader_state *cso)
291 {
292    struct lima_context *ctx = lima_context(pctx);
293    struct lima_fs_shader_state *so = rzalloc(NULL, struct lima_fs_shader_state);
294 
295    if (!so)
296       return NULL;
297 
298    nir_shader *nir;
299    if (cso->type == PIPE_SHADER_IR_NIR)
300       /* The backend takes ownership of the NIR shader on state
301        * creation.
302        */
303       nir = cso->ir.nir;
304    else {
305       assert(cso->type == PIPE_SHADER_IR_TGSI);
306 
307       nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
308    }
309 
310    so->base.type = PIPE_SHADER_IR_NIR;
311    so->base.ir.nir = nir;
312 
313    uint8_t identity[4] = { PIPE_SWIZZLE_X,
314                            PIPE_SWIZZLE_Y,
315                            PIPE_SWIZZLE_Z,
316                            PIPE_SWIZZLE_W };
317 
318    struct nir_lower_tex_options tex_options = {
319       .lower_txp = ~0u,
320       .swizzle_result = 0,
321    };
322 
323    /* Initialize with identity swizzles. That should suffice for most shaders  */
324    for (int i = 0; i < PIPE_MAX_SAMPLERS; i++)
325       memcpy(so->swizzles[i], identity, 4);
326 
327    if (!lima_fs_compile_shader(ctx, so, &tex_options)) {
328       ralloc_free(so);
329       return NULL;
330    }
331 
332    return so;
333 }
334 
335 static void
lima_bind_fs_state(struct pipe_context * pctx,void * hwcso)336 lima_bind_fs_state(struct pipe_context *pctx, void *hwcso)
337 {
338    struct lima_context *ctx = lima_context(pctx);
339 
340    ctx->fs = hwcso;
341    ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_FRAG;
342 }
343 
344 static void
lima_delete_fs_state(struct pipe_context * pctx,void * hwcso)345 lima_delete_fs_state(struct pipe_context *pctx, void *hwcso)
346 {
347    struct lima_fs_shader_state *so = hwcso;
348 
349    if (so->bo)
350       lima_bo_unreference(so->bo);
351 
352    ralloc_free(so->base.ir.nir);
353    ralloc_free(so);
354 }
355 
356 bool
lima_update_vs_state(struct lima_context * ctx)357 lima_update_vs_state(struct lima_context *ctx)
358 {
359    struct lima_vs_shader_state *vs = ctx->vs;
360    if (!vs->bo) {
361       struct lima_screen *screen = lima_screen(ctx->base.screen);
362       vs->bo = lima_bo_create(screen, vs->shader_size, 0);
363       if (!vs->bo) {
364          fprintf(stderr, "lima: create vs shader bo fail\n");
365          return false;
366       }
367 
368       memcpy(lima_bo_map(vs->bo), vs->shader, vs->shader_size);
369       ralloc_free(vs->shader);
370       vs->shader = NULL;
371    }
372 
373    return true;
374 }
375 
376 bool
lima_update_fs_state(struct lima_context * ctx)377 lima_update_fs_state(struct lima_context *ctx)
378 {
379    struct lima_fs_shader_state *fs = ctx->fs;
380    struct lima_texture_stateobj *lima_tex = &ctx->tex_stateobj;
381    struct nir_lower_tex_options tex_options = {
382       .lower_txp = ~0u,
383       .swizzle_result = 0,
384    };
385    bool needs_recompile = false;
386 
387    /* Check if texture formats has changed since last compilation.
388     * If it has we need to recompile shader.
389     */
390    if (((ctx->dirty & LIMA_CONTEXT_DIRTY_TEXTURES) &&
391        lima_tex->num_samplers &&
392        lima_tex->num_textures)) {
393       uint8_t identity[4] = { PIPE_SWIZZLE_X,
394                               PIPE_SWIZZLE_Y,
395                               PIPE_SWIZZLE_Z,
396                               PIPE_SWIZZLE_W };
397       for (int i = 0; i < lima_tex->num_samplers; i++) {
398          struct lima_sampler_view *texture = lima_sampler_view(lima_tex->textures[i]);
399          struct pipe_resource *prsc = texture->base.texture;
400          const uint8_t *swizzle = lima_format_get_texel_swizzle(prsc->format);
401          if (memcmp(fs->swizzles[i], swizzle, 4)) {
402             needs_recompile = true;
403             memcpy(fs->swizzles[i], swizzle, 4);
404          }
405 
406          for (int j = 0; j < 4; j++)
407             tex_options.swizzles[i][j] = swizzle[j];
408 
409          if (memcmp(swizzle, identity, 4))
410             tex_options.swizzle_result |= (1 << i);
411       }
412 
413       /* Fill rest with identity swizzle */
414       for (int i = lima_tex->num_samplers; i < PIPE_MAX_SAMPLERS; i++)
415          memcpy(fs->swizzles[i], identity, 4);
416    }
417 
418    if (needs_recompile) {
419       if (fs->bo) {
420          lima_bo_unreference(fs->bo);
421          fs->bo = NULL;
422       }
423 
424       if (!lima_fs_compile_shader(ctx, fs, &tex_options))
425          return false;
426    }
427 
428    if (!fs->bo) {
429       struct lima_screen *screen = lima_screen(ctx->base.screen);
430       fs->bo = lima_bo_create(screen, fs->shader_size, 0);
431       if (!fs->bo) {
432          fprintf(stderr, "lima: create fs shader bo fail\n");
433          return false;
434       }
435 
436       memcpy(lima_bo_map(fs->bo), fs->shader, fs->shader_size);
437       ralloc_free(fs->shader);
438       fs->shader = NULL;
439    }
440 
441    struct lima_job *job = lima_job_get(ctx);
442    job->pp_max_stack_size = MAX2(job->pp_max_stack_size, ctx->fs->stack_size);
443 
444    return true;
445 }
446 
447 static void *
lima_create_vs_state(struct pipe_context * pctx,const struct pipe_shader_state * cso)448 lima_create_vs_state(struct pipe_context *pctx,
449                      const struct pipe_shader_state *cso)
450 {
451    struct lima_context *ctx = lima_context(pctx);
452    struct lima_vs_shader_state *so = rzalloc(NULL, struct lima_vs_shader_state);
453 
454    if (!so)
455       return NULL;
456 
457    nir_shader *nir;
458    if (cso->type == PIPE_SHADER_IR_NIR)
459       nir = cso->ir.nir;
460    else {
461       assert(cso->type == PIPE_SHADER_IR_TGSI);
462 
463       nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
464    }
465 
466    lima_program_optimize_vs_nir(nir);
467 
468    if (lima_debug & LIMA_DEBUG_GP)
469       nir_print_shader(nir, stdout);
470 
471    if (!gpir_compile_nir(so, nir, &ctx->debug)) {
472       ralloc_free(so);
473       return NULL;
474    }
475 
476    ralloc_free(nir);
477 
478    return so;
479 }
480 
481 static void
lima_bind_vs_state(struct pipe_context * pctx,void * hwcso)482 lima_bind_vs_state(struct pipe_context *pctx, void *hwcso)
483 {
484    struct lima_context *ctx = lima_context(pctx);
485 
486    ctx->vs = hwcso;
487    ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_VERT;
488 }
489 
490 static void
lima_delete_vs_state(struct pipe_context * pctx,void * hwcso)491 lima_delete_vs_state(struct pipe_context *pctx, void *hwcso)
492 {
493    struct lima_vs_shader_state *so = hwcso;
494 
495    if (so->bo)
496       lima_bo_unreference(so->bo);
497 
498    ralloc_free(so);
499 }
500 
501 void
lima_program_init(struct lima_context * ctx)502 lima_program_init(struct lima_context *ctx)
503 {
504    ctx->base.create_fs_state = lima_create_fs_state;
505    ctx->base.bind_fs_state = lima_bind_fs_state;
506    ctx->base.delete_fs_state = lima_delete_fs_state;
507 
508    ctx->base.create_vs_state = lima_create_vs_state;
509    ctx->base.bind_vs_state = lima_bind_vs_state;
510    ctx->base.delete_vs_state = lima_delete_vs_state;
511 }
512