1 /*
2  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3  * Copyright 2015 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 
26 #include "si_compute.h"
27 #include "si_pipe.h"
28 #include "util/format/u_format.h"
29 #include "util/u_log.h"
30 #include "util/u_surface.h"
31 
32 enum
33 {
34    SI_COPY =
35       SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND,
36 
37    SI_BLIT = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | SI_SAVE_FRAGMENT_STATE,
38 
39    SI_DECOMPRESS = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND,
40 
41    SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE
42 };
43 
si_blitter_begin(struct si_context * sctx,enum si_blitter_op op)44 void si_blitter_begin(struct si_context *sctx, enum si_blitter_op op)
45 {
46    util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
47    util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso);
48    util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso);
49    util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
50    util_blitter_save_so_targets(sctx->blitter, sctx->streamout.num_targets,
51                                 (struct pipe_stream_output_target **)sctx->streamout.targets);
52    util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
53 
54    if (op & SI_SAVE_FRAGMENT_STATE) {
55       util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
56       util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
57       util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
58       util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
59       util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask);
60       util_blitter_save_scissor(sctx->blitter, &sctx->scissors[0]);
61       util_blitter_save_window_rectangles(sctx->blitter, sctx->window_rectangles_include,
62                                           sctx->num_window_rectangles, sctx->window_rectangles);
63    }
64 
65    if (op & SI_SAVE_FRAMEBUFFER)
66       util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state);
67 
68    if (op & SI_SAVE_TEXTURES) {
69       util_blitter_save_fragment_sampler_states(
70          sctx->blitter, 2, (void **)sctx->samplers[PIPE_SHADER_FRAGMENT].sampler_states);
71 
72       util_blitter_save_fragment_sampler_views(sctx->blitter, 2,
73                                                sctx->samplers[PIPE_SHADER_FRAGMENT].views);
74    }
75 
76    if (op & SI_DISABLE_RENDER_COND)
77       sctx->render_cond_force_off = true;
78 
79    if (sctx->screen->dpbb_allowed) {
80       sctx->dpbb_force_off = true;
81       si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
82    }
83 }
84 
si_blitter_end(struct si_context * sctx)85 void si_blitter_end(struct si_context *sctx)
86 {
87    if (sctx->screen->dpbb_allowed) {
88       sctx->dpbb_force_off = false;
89       si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
90    }
91 
92    sctx->render_cond_force_off = false;
93 
94    /* Restore shader pointers because the VS blit shader changed all
95     * non-global VS user SGPRs. */
96    sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX);
97    sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL;
98    sctx->vertex_buffer_user_sgprs_dirty = sctx->num_vertex_elements > 0;
99    si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
100 }
101 
u_max_sample(struct pipe_resource * r)102 static unsigned u_max_sample(struct pipe_resource *r)
103 {
104    return r->nr_samples ? r->nr_samples - 1 : 0;
105 }
106 
si_blit_dbcb_copy(struct si_context * sctx,struct si_texture * src,struct si_texture * dst,unsigned planes,unsigned level_mask,unsigned first_layer,unsigned last_layer,unsigned first_sample,unsigned last_sample)107 static unsigned si_blit_dbcb_copy(struct si_context *sctx, struct si_texture *src,
108                                   struct si_texture *dst, unsigned planes, unsigned level_mask,
109                                   unsigned first_layer, unsigned last_layer, unsigned first_sample,
110                                   unsigned last_sample)
111 {
112    struct pipe_surface surf_tmpl = {{0}};
113    unsigned layer, sample, checked_last_layer, max_layer;
114    unsigned fully_copied_levels = 0;
115 
116    if (planes & PIPE_MASK_Z)
117       sctx->dbcb_depth_copy_enabled = true;
118    if (planes & PIPE_MASK_S)
119       sctx->dbcb_stencil_copy_enabled = true;
120    si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
121 
122    assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);
123 
124    sctx->decompression_enabled = true;
125 
126    while (level_mask) {
127       unsigned level = u_bit_scan(&level_mask);
128 
129       /* The smaller the mipmap level, the less layers there are
130        * as far as 3D textures are concerned. */
131       max_layer = util_max_layer(&src->buffer.b.b, level);
132       checked_last_layer = MIN2(last_layer, max_layer);
133 
134       surf_tmpl.u.tex.level = level;
135 
136       for (layer = first_layer; layer <= checked_last_layer; layer++) {
137          struct pipe_surface *zsurf, *cbsurf;
138 
139          surf_tmpl.format = src->buffer.b.b.format;
140          surf_tmpl.u.tex.first_layer = layer;
141          surf_tmpl.u.tex.last_layer = layer;
142 
143          zsurf = sctx->b.create_surface(&sctx->b, &src->buffer.b.b, &surf_tmpl);
144 
145          surf_tmpl.format = dst->buffer.b.b.format;
146          cbsurf = sctx->b.create_surface(&sctx->b, &dst->buffer.b.b, &surf_tmpl);
147 
148          for (sample = first_sample; sample <= last_sample; sample++) {
149             if (sample != sctx->dbcb_copy_sample) {
150                sctx->dbcb_copy_sample = sample;
151                si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
152             }
153 
154             si_blitter_begin(sctx, SI_DECOMPRESS);
155             util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample,
156                                               sctx->custom_dsa_flush, 1.0f);
157             si_blitter_end(sctx);
158          }
159 
160          pipe_surface_reference(&zsurf, NULL);
161          pipe_surface_reference(&cbsurf, NULL);
162       }
163 
164       if (first_layer == 0 && last_layer >= max_layer && first_sample == 0 &&
165           last_sample >= u_max_sample(&src->buffer.b.b))
166          fully_copied_levels |= 1u << level;
167    }
168 
169    sctx->decompression_enabled = false;
170    sctx->dbcb_depth_copy_enabled = false;
171    sctx->dbcb_stencil_copy_enabled = false;
172    si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
173 
174    return fully_copied_levels;
175 }
176 
177 /* Helper function for si_blit_decompress_zs_in_place.
178  */
si_blit_decompress_zs_planes_in_place(struct si_context * sctx,struct si_texture * texture,unsigned planes,unsigned level_mask,unsigned first_layer,unsigned last_layer)179 static void si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
180                                                   struct si_texture *texture, unsigned planes,
181                                                   unsigned level_mask, unsigned first_layer,
182                                                   unsigned last_layer)
183 {
184    struct pipe_surface *zsurf, surf_tmpl = {{0}};
185    unsigned layer, max_layer, checked_last_layer;
186    unsigned fully_decompressed_mask = 0;
187 
188    if (!level_mask)
189       return;
190 
191    if (planes & PIPE_MASK_S)
192       sctx->db_flush_stencil_inplace = true;
193    if (planes & PIPE_MASK_Z)
194       sctx->db_flush_depth_inplace = true;
195    si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
196 
197    surf_tmpl.format = texture->buffer.b.b.format;
198 
199    sctx->decompression_enabled = true;
200 
201    while (level_mask) {
202       unsigned level = u_bit_scan(&level_mask);
203 
204       surf_tmpl.u.tex.level = level;
205 
206       /* The smaller the mipmap level, the less layers there are
207        * as far as 3D textures are concerned. */
208       max_layer = util_max_layer(&texture->buffer.b.b, level);
209       checked_last_layer = MIN2(last_layer, max_layer);
210 
211       for (layer = first_layer; layer <= checked_last_layer; layer++) {
212          surf_tmpl.u.tex.first_layer = layer;
213          surf_tmpl.u.tex.last_layer = layer;
214 
215          zsurf = sctx->b.create_surface(&sctx->b, &texture->buffer.b.b, &surf_tmpl);
216 
217          si_blitter_begin(sctx, SI_DECOMPRESS);
218          util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0, sctx->custom_dsa_flush,
219                                            1.0f);
220          si_blitter_end(sctx);
221 
222          pipe_surface_reference(&zsurf, NULL);
223       }
224 
225       /* The texture will always be dirty if some layers aren't flushed.
226        * I don't think this case occurs often though. */
227       if (first_layer == 0 && last_layer >= max_layer) {
228          fully_decompressed_mask |= 1u << level;
229       }
230    }
231 
232    if (planes & PIPE_MASK_Z)
233       texture->dirty_level_mask &= ~fully_decompressed_mask;
234    if (planes & PIPE_MASK_S)
235       texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;
236 
237    sctx->decompression_enabled = false;
238    sctx->db_flush_depth_inplace = false;
239    sctx->db_flush_stencil_inplace = false;
240    si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
241 }
242 
243 /* Helper function of si_flush_depth_texture: decompress the given levels
244  * of Z and/or S planes in place.
245  */
si_blit_decompress_zs_in_place(struct si_context * sctx,struct si_texture * texture,unsigned levels_z,unsigned levels_s,unsigned first_layer,unsigned last_layer)246 static void si_blit_decompress_zs_in_place(struct si_context *sctx, struct si_texture *texture,
247                                            unsigned levels_z, unsigned levels_s,
248                                            unsigned first_layer, unsigned last_layer)
249 {
250    unsigned both = levels_z & levels_s;
251 
252    /* First, do combined Z & S decompresses for levels that need it. */
253    if (both) {
254       si_blit_decompress_zs_planes_in_place(sctx, texture, PIPE_MASK_Z | PIPE_MASK_S, both,
255                                             first_layer, last_layer);
256       levels_z &= ~both;
257       levels_s &= ~both;
258    }
259 
260    /* Now do separate Z and S decompresses. */
261    if (levels_z) {
262       si_blit_decompress_zs_planes_in_place(sctx, texture, PIPE_MASK_Z, levels_z, first_layer,
263                                             last_layer);
264    }
265 
266    if (levels_s) {
267       si_blit_decompress_zs_planes_in_place(sctx, texture, PIPE_MASK_S, levels_s, first_layer,
268                                             last_layer);
269    }
270 }
271 
si_decompress_depth(struct si_context * sctx,struct si_texture * tex,unsigned required_planes,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer)272 static void si_decompress_depth(struct si_context *sctx, struct si_texture *tex,
273                                 unsigned required_planes, unsigned first_level, unsigned last_level,
274                                 unsigned first_layer, unsigned last_layer)
275 {
276    unsigned inplace_planes = 0;
277    unsigned copy_planes = 0;
278    unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1);
279    unsigned levels_z = 0;
280    unsigned levels_s = 0;
281 
282    if (required_planes & PIPE_MASK_Z) {
283       levels_z = level_mask & tex->dirty_level_mask;
284 
285       if (levels_z) {
286          if (si_can_sample_zs(tex, false))
287             inplace_planes |= PIPE_MASK_Z;
288          else
289             copy_planes |= PIPE_MASK_Z;
290       }
291    }
292    if (required_planes & PIPE_MASK_S) {
293       levels_s = level_mask & tex->stencil_dirty_level_mask;
294 
295       if (levels_s) {
296          if (si_can_sample_zs(tex, true))
297             inplace_planes |= PIPE_MASK_S;
298          else
299             copy_planes |= PIPE_MASK_S;
300       }
301    }
302 
303    if (unlikely(sctx->log))
304       u_log_printf(sctx->log,
305                    "\n------------------------------------------------\n"
306                    "Decompress Depth (levels %u - %u, levels Z: 0x%x S: 0x%x)\n\n",
307                    first_level, last_level, levels_z, levels_s);
308 
309    /* We may have to allocate the flushed texture here when called from
310     * si_decompress_subresource.
311     */
312    if (copy_planes &&
313        (tex->flushed_depth_texture || si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b))) {
314       struct si_texture *dst = tex->flushed_depth_texture;
315       unsigned fully_copied_levels;
316       unsigned levels = 0;
317 
318       assert(tex->flushed_depth_texture);
319 
320       if (util_format_is_depth_and_stencil(dst->buffer.b.b.format))
321          copy_planes = PIPE_MASK_Z | PIPE_MASK_S;
322 
323       if (copy_planes & PIPE_MASK_Z) {
324          levels |= levels_z;
325          levels_z = 0;
326       }
327       if (copy_planes & PIPE_MASK_S) {
328          levels |= levels_s;
329          levels_s = 0;
330       }
331 
332       fully_copied_levels = si_blit_dbcb_copy(sctx, tex, dst, copy_planes, levels, first_layer,
333                                               last_layer, 0, u_max_sample(&tex->buffer.b.b));
334 
335       if (copy_planes & PIPE_MASK_Z)
336          tex->dirty_level_mask &= ~fully_copied_levels;
337       if (copy_planes & PIPE_MASK_S)
338          tex->stencil_dirty_level_mask &= ~fully_copied_levels;
339    }
340 
341    if (inplace_planes) {
342       bool has_htile = si_htile_enabled(tex, first_level, inplace_planes);
343       bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, first_level, inplace_planes);
344 
345       /* Don't decompress if there is no HTILE or when HTILE is
346        * TC-compatible. */
347       if (has_htile && !tc_compat_htile) {
348          si_blit_decompress_zs_in_place(sctx, tex, levels_z, levels_s, first_layer, last_layer);
349       } else {
350          /* This is only a cache flush.
351           *
352           * Only clear the mask that we are flushing, because
353           * si_make_DB_shader_coherent() treats different levels
354           * and depth and stencil differently.
355           */
356          if (inplace_planes & PIPE_MASK_Z)
357             tex->dirty_level_mask &= ~levels_z;
358          if (inplace_planes & PIPE_MASK_S)
359             tex->stencil_dirty_level_mask &= ~levels_s;
360       }
361 
362       /* We just had to completely decompress Z/S for texturing. Enable
363        * TC-compatible HTILE on the next clear, so that the decompression
364        * doesn't have to be done for this texture ever again.
365        *
366        * TC-compatible HTILE might slightly reduce Z/S performance, but
367        * the decompression is much worse.
368        */
369       if (has_htile && !tc_compat_htile &&
370           tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE &&
371           (inplace_planes & PIPE_MASK_Z || !tex->htile_stencil_disabled))
372          tex->enable_tc_compatible_htile_next_clear = true;
373 
374       /* Only in-place decompression needs to flush DB caches, or
375        * when we don't decompress but TC-compatible planes are dirty.
376        */
377       si_make_DB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, inplace_planes & PIPE_MASK_S,
378                                  tc_compat_htile);
379    }
380    /* set_framebuffer_state takes care of coherency for single-sample.
381     * The DB->CB copy uses CB for the final writes.
382     */
383    if (copy_planes && tex->buffer.b.b.nr_samples > 1)
384       si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, false, true /* no DCC */);
385 }
386 
si_decompress_sampler_depth_textures(struct si_context * sctx,struct si_samplers * textures)387 static void si_decompress_sampler_depth_textures(struct si_context *sctx,
388                                                  struct si_samplers *textures)
389 {
390    unsigned i;
391    unsigned mask = textures->needs_depth_decompress_mask;
392 
393    while (mask) {
394       struct pipe_sampler_view *view;
395       struct si_sampler_view *sview;
396       struct si_texture *tex;
397 
398       i = u_bit_scan(&mask);
399 
400       view = textures->views[i];
401       assert(view);
402       sview = (struct si_sampler_view *)view;
403 
404       tex = (struct si_texture *)view->texture;
405       assert(tex->db_compatible);
406 
407       si_decompress_depth(sctx, tex, sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z,
408                           view->u.tex.first_level, view->u.tex.last_level, 0,
409                           util_max_layer(&tex->buffer.b.b, view->u.tex.first_level));
410    }
411 }
412 
si_blit_decompress_color(struct si_context * sctx,struct si_texture * tex,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,bool need_dcc_decompress,bool need_fmask_expand)413 static void si_blit_decompress_color(struct si_context *sctx, struct si_texture *tex,
414                                      unsigned first_level, unsigned last_level,
415                                      unsigned first_layer, unsigned last_layer,
416                                      bool need_dcc_decompress, bool need_fmask_expand)
417 {
418    void *custom_blend;
419    unsigned layer, checked_last_layer, max_layer;
420    unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1);
421 
422    if (!need_dcc_decompress)
423       level_mask &= tex->dirty_level_mask;
424    if (!level_mask)
425       goto expand_fmask;
426 
427    if (unlikely(sctx->log))
428       u_log_printf(sctx->log,
429                    "\n------------------------------------------------\n"
430                    "Decompress Color (levels %u - %u, mask 0x%x)\n\n",
431                    first_level, last_level, level_mask);
432 
433    if (need_dcc_decompress) {
434       assert(sctx->chip_class == GFX8);
435       custom_blend = sctx->custom_blend_dcc_decompress;
436 
437       assert(vi_dcc_enabled(tex, first_level));
438 
439       /* disable levels without DCC */
440       for (int i = first_level; i <= last_level; i++) {
441          if (!vi_dcc_enabled(tex, i))
442             level_mask &= ~(1 << i);
443       }
444    } else if (tex->surface.fmask_size) {
445       custom_blend = sctx->custom_blend_fmask_decompress;
446    } else {
447       custom_blend = sctx->custom_blend_eliminate_fastclear;
448    }
449 
450    sctx->decompression_enabled = true;
451 
452    while (level_mask) {
453       unsigned level = u_bit_scan(&level_mask);
454 
455       /* The smaller the mipmap level, the less layers there are
456        * as far as 3D textures are concerned. */
457       max_layer = util_max_layer(&tex->buffer.b.b, level);
458       checked_last_layer = MIN2(last_layer, max_layer);
459 
460       for (layer = first_layer; layer <= checked_last_layer; layer++) {
461          struct pipe_surface *cbsurf, surf_tmpl;
462 
463          surf_tmpl.format = tex->buffer.b.b.format;
464          surf_tmpl.u.tex.level = level;
465          surf_tmpl.u.tex.first_layer = layer;
466          surf_tmpl.u.tex.last_layer = layer;
467          cbsurf = sctx->b.create_surface(&sctx->b, &tex->buffer.b.b, &surf_tmpl);
468 
469          /* Required before and after FMASK and DCC_DECOMPRESS. */
470          if (custom_blend == sctx->custom_blend_fmask_decompress ||
471              custom_blend == sctx->custom_blend_dcc_decompress)
472             sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
473 
474          si_blitter_begin(sctx, SI_DECOMPRESS);
475          util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend);
476          si_blitter_end(sctx);
477 
478          if (custom_blend == sctx->custom_blend_fmask_decompress ||
479              custom_blend == sctx->custom_blend_dcc_decompress)
480             sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
481 
482          pipe_surface_reference(&cbsurf, NULL);
483       }
484 
485       /* The texture will always be dirty if some layers aren't flushed.
486        * I don't think this case occurs often though. */
487       if (first_layer == 0 && last_layer >= max_layer) {
488          tex->dirty_level_mask &= ~(1 << level);
489       }
490    }
491 
492    sctx->decompression_enabled = false;
493    si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, vi_dcc_enabled(tex, first_level),
494                               tex->surface.u.gfx9.dcc.pipe_aligned);
495 
496 expand_fmask:
497    if (need_fmask_expand && tex->surface.fmask_offset && !tex->fmask_is_identity) {
498       si_compute_expand_fmask(&sctx->b, &tex->buffer.b.b);
499       tex->fmask_is_identity = true;
500    }
501 }
502 
si_decompress_color_texture(struct si_context * sctx,struct si_texture * tex,unsigned first_level,unsigned last_level,bool need_fmask_expand)503 static void si_decompress_color_texture(struct si_context *sctx, struct si_texture *tex,
504                                         unsigned first_level, unsigned last_level,
505                                         bool need_fmask_expand)
506 {
507    /* CMASK or DCC can be discarded and we can still end up here. */
508    if (!tex->cmask_buffer && !tex->surface.fmask_size &&
509        !vi_dcc_enabled(tex, first_level))
510       return;
511 
512    si_blit_decompress_color(sctx, tex, first_level, last_level, 0,
513                             util_max_layer(&tex->buffer.b.b, first_level), false,
514                             need_fmask_expand);
515 }
516 
si_decompress_sampler_color_textures(struct si_context * sctx,struct si_samplers * textures)517 static void si_decompress_sampler_color_textures(struct si_context *sctx,
518                                                  struct si_samplers *textures)
519 {
520    unsigned i;
521    unsigned mask = textures->needs_color_decompress_mask;
522 
523    while (mask) {
524       struct pipe_sampler_view *view;
525       struct si_texture *tex;
526 
527       i = u_bit_scan(&mask);
528 
529       view = textures->views[i];
530       assert(view);
531 
532       tex = (struct si_texture *)view->texture;
533 
534       si_decompress_color_texture(sctx, tex, view->u.tex.first_level, view->u.tex.last_level,
535                                   false);
536    }
537 }
538 
si_decompress_image_color_textures(struct si_context * sctx,struct si_images * images)539 static void si_decompress_image_color_textures(struct si_context *sctx, struct si_images *images)
540 {
541    unsigned i;
542    unsigned mask = images->needs_color_decompress_mask;
543 
544    while (mask) {
545       const struct pipe_image_view *view;
546       struct si_texture *tex;
547 
548       i = u_bit_scan(&mask);
549 
550       view = &images->views[i];
551       assert(view->resource->target != PIPE_BUFFER);
552 
553       tex = (struct si_texture *)view->resource;
554 
555       si_decompress_color_texture(sctx, tex, view->u.tex.level, view->u.tex.level,
556                                   view->access & PIPE_IMAGE_ACCESS_WRITE);
557    }
558 }
559 
si_check_render_feedback_texture(struct si_context * sctx,struct si_texture * tex,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer)560 static void si_check_render_feedback_texture(struct si_context *sctx, struct si_texture *tex,
561                                              unsigned first_level, unsigned last_level,
562                                              unsigned first_layer, unsigned last_layer)
563 {
564    bool render_feedback = false;
565 
566    if (!vi_dcc_enabled(tex, first_level))
567       return;
568 
569    for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) {
570       struct si_surface *surf;
571 
572       if (!sctx->framebuffer.state.cbufs[j])
573          continue;
574 
575       surf = (struct si_surface *)sctx->framebuffer.state.cbufs[j];
576 
577       if (tex == (struct si_texture *)surf->base.texture && surf->base.u.tex.level >= first_level &&
578           surf->base.u.tex.level <= last_level && surf->base.u.tex.first_layer <= last_layer &&
579           surf->base.u.tex.last_layer >= first_layer) {
580          render_feedback = true;
581          break;
582       }
583    }
584 
585    if (render_feedback)
586       si_texture_disable_dcc(sctx, tex);
587 }
588 
si_check_render_feedback_textures(struct si_context * sctx,struct si_samplers * textures)589 static void si_check_render_feedback_textures(struct si_context *sctx, struct si_samplers *textures)
590 {
591    uint32_t mask = textures->enabled_mask;
592 
593    while (mask) {
594       const struct pipe_sampler_view *view;
595       struct si_texture *tex;
596 
597       unsigned i = u_bit_scan(&mask);
598 
599       view = textures->views[i];
600       if (view->texture->target == PIPE_BUFFER)
601          continue;
602 
603       tex = (struct si_texture *)view->texture;
604 
605       si_check_render_feedback_texture(sctx, tex, view->u.tex.first_level, view->u.tex.last_level,
606                                        view->u.tex.first_layer, view->u.tex.last_layer);
607    }
608 }
609 
si_check_render_feedback_images(struct si_context * sctx,struct si_images * images)610 static void si_check_render_feedback_images(struct si_context *sctx, struct si_images *images)
611 {
612    uint32_t mask = images->enabled_mask;
613 
614    while (mask) {
615       const struct pipe_image_view *view;
616       struct si_texture *tex;
617 
618       unsigned i = u_bit_scan(&mask);
619 
620       view = &images->views[i];
621       if (view->resource->target == PIPE_BUFFER)
622          continue;
623 
624       tex = (struct si_texture *)view->resource;
625 
626       si_check_render_feedback_texture(sctx, tex, view->u.tex.level, view->u.tex.level,
627                                        view->u.tex.first_layer, view->u.tex.last_layer);
628    }
629 }
630 
si_check_render_feedback_resident_textures(struct si_context * sctx)631 static void si_check_render_feedback_resident_textures(struct si_context *sctx)
632 {
633    util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
634       struct pipe_sampler_view *view;
635       struct si_texture *tex;
636 
637       view = (*tex_handle)->view;
638       if (view->texture->target == PIPE_BUFFER)
639          continue;
640 
641       tex = (struct si_texture *)view->texture;
642 
643       si_check_render_feedback_texture(sctx, tex, view->u.tex.first_level, view->u.tex.last_level,
644                                        view->u.tex.first_layer, view->u.tex.last_layer);
645    }
646 }
647 
si_check_render_feedback_resident_images(struct si_context * sctx)648 static void si_check_render_feedback_resident_images(struct si_context *sctx)
649 {
650    util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {
651       struct pipe_image_view *view;
652       struct si_texture *tex;
653 
654       view = &(*img_handle)->view;
655       if (view->resource->target == PIPE_BUFFER)
656          continue;
657 
658       tex = (struct si_texture *)view->resource;
659 
660       si_check_render_feedback_texture(sctx, tex, view->u.tex.level, view->u.tex.level,
661                                        view->u.tex.first_layer, view->u.tex.last_layer);
662    }
663 }
664 
si_check_render_feedback(struct si_context * sctx)665 static void si_check_render_feedback(struct si_context *sctx)
666 {
667    if (!sctx->need_check_render_feedback)
668       return;
669 
670    /* There is no render feedback if color writes are disabled.
671     * (e.g. a pixel shader with image stores)
672     */
673    if (!si_get_total_colormask(sctx))
674       return;
675 
676    for (int i = 0; i < SI_NUM_SHADERS; ++i) {
677       si_check_render_feedback_images(sctx, &sctx->images[i]);
678       si_check_render_feedback_textures(sctx, &sctx->samplers[i]);
679    }
680 
681    si_check_render_feedback_resident_images(sctx);
682    si_check_render_feedback_resident_textures(sctx);
683 
684    sctx->need_check_render_feedback = false;
685 }
686 
si_decompress_resident_textures(struct si_context * sctx)687 static void si_decompress_resident_textures(struct si_context *sctx)
688 {
689    util_dynarray_foreach (&sctx->resident_tex_needs_color_decompress, struct si_texture_handle *,
690                           tex_handle) {
691       struct pipe_sampler_view *view = (*tex_handle)->view;
692       struct si_texture *tex = (struct si_texture *)view->texture;
693 
694       si_decompress_color_texture(sctx, tex, view->u.tex.first_level, view->u.tex.last_level,
695                                   false);
696    }
697 
698    util_dynarray_foreach (&sctx->resident_tex_needs_depth_decompress, struct si_texture_handle *,
699                           tex_handle) {
700       struct pipe_sampler_view *view = (*tex_handle)->view;
701       struct si_sampler_view *sview = (struct si_sampler_view *)view;
702       struct si_texture *tex = (struct si_texture *)view->texture;
703 
704       si_decompress_depth(sctx, tex, sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z,
705                           view->u.tex.first_level, view->u.tex.last_level, 0,
706                           util_max_layer(&tex->buffer.b.b, view->u.tex.first_level));
707    }
708 }
709 
si_decompress_resident_images(struct si_context * sctx)710 static void si_decompress_resident_images(struct si_context *sctx)
711 {
712    util_dynarray_foreach (&sctx->resident_img_needs_color_decompress, struct si_image_handle *,
713                           img_handle) {
714       struct pipe_image_view *view = &(*img_handle)->view;
715       struct si_texture *tex = (struct si_texture *)view->resource;
716 
717       si_decompress_color_texture(sctx, tex, view->u.tex.level, view->u.tex.level,
718                                   view->access & PIPE_IMAGE_ACCESS_WRITE);
719    }
720 }
721 
si_decompress_textures(struct si_context * sctx,unsigned shader_mask)722 void si_decompress_textures(struct si_context *sctx, unsigned shader_mask)
723 {
724    unsigned compressed_colortex_counter, mask;
725 
726    if (sctx->blitter->running)
727       return;
728 
729    /* Update the compressed_colortex_mask if necessary. */
730    compressed_colortex_counter = p_atomic_read(&sctx->screen->compressed_colortex_counter);
731    if (compressed_colortex_counter != sctx->last_compressed_colortex_counter) {
732       sctx->last_compressed_colortex_counter = compressed_colortex_counter;
733       si_update_needs_color_decompress_masks(sctx);
734    }
735 
736    /* Decompress color & depth textures if needed. */
737    mask = sctx->shader_needs_decompress_mask & shader_mask;
738    while (mask) {
739       unsigned i = u_bit_scan(&mask);
740 
741       if (sctx->samplers[i].needs_depth_decompress_mask) {
742          si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]);
743       }
744       if (sctx->samplers[i].needs_color_decompress_mask) {
745          si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]);
746       }
747       if (sctx->images[i].needs_color_decompress_mask) {
748          si_decompress_image_color_textures(sctx, &sctx->images[i]);
749       }
750    }
751 
752    if (shader_mask & u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)) {
753       if (sctx->uses_bindless_samplers)
754          si_decompress_resident_textures(sctx);
755       if (sctx->uses_bindless_images)
756          si_decompress_resident_images(sctx);
757 
758       if (sctx->ps_uses_fbfetch) {
759          struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0];
760          si_decompress_color_texture(sctx, (struct si_texture *)cb0->texture,
761                                      cb0->u.tex.first_layer, cb0->u.tex.last_layer, false);
762       }
763 
764       si_check_render_feedback(sctx);
765    } else if (shader_mask & (1 << PIPE_SHADER_COMPUTE)) {
766       if (sctx->cs_shader_state.program->sel.info.uses_bindless_samplers)
767          si_decompress_resident_textures(sctx);
768       if (sctx->cs_shader_state.program->sel.info.uses_bindless_images)
769          si_decompress_resident_images(sctx);
770    }
771 }
772 
773 /* Helper for decompressing a portion of a color or depth resource before
774  * blitting if any decompression is needed.
775  * The driver doesn't decompress resources automatically while u_blitter is
776  * rendering. */
si_decompress_subresource(struct pipe_context * ctx,struct pipe_resource * tex,unsigned planes,unsigned level,unsigned first_layer,unsigned last_layer)777 void si_decompress_subresource(struct pipe_context *ctx, struct pipe_resource *tex, unsigned planes,
778                                unsigned level, unsigned first_layer, unsigned last_layer)
779 {
780    struct si_context *sctx = (struct si_context *)ctx;
781    struct si_texture *stex = (struct si_texture *)tex;
782 
783    if (stex->db_compatible) {
784       planes &= PIPE_MASK_Z | PIPE_MASK_S;
785 
786       if (!stex->surface.has_stencil)
787          planes &= ~PIPE_MASK_S;
788 
789       /* If we've rendered into the framebuffer and it's a blitting
790        * source, make sure the decompression pass is invoked
791        * by dirtying the framebuffer.
792        */
793       if (sctx->framebuffer.state.zsbuf && sctx->framebuffer.state.zsbuf->u.tex.level == level &&
794           sctx->framebuffer.state.zsbuf->texture == tex)
795          si_update_fb_dirtiness_after_rendering(sctx);
796 
797       si_decompress_depth(sctx, stex, planes, level, level, first_layer, last_layer);
798    } else if (stex->surface.fmask_size || stex->cmask_buffer ||
799               vi_dcc_enabled(stex, level)) {
800       /* If we've rendered into the framebuffer and it's a blitting
801        * source, make sure the decompression pass is invoked
802        * by dirtying the framebuffer.
803        */
804       for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
805          if (sctx->framebuffer.state.cbufs[i] &&
806              sctx->framebuffer.state.cbufs[i]->u.tex.level == level &&
807              sctx->framebuffer.state.cbufs[i]->texture == tex) {
808             si_update_fb_dirtiness_after_rendering(sctx);
809             break;
810          }
811       }
812 
813       si_blit_decompress_color(sctx, stex, level, level, first_layer, last_layer, false, false);
814    }
815 }
816 
817 struct texture_orig_info {
818    unsigned format;
819    unsigned width0;
820    unsigned height0;
821    unsigned npix_x;
822    unsigned npix_y;
823    unsigned npix0_x;
824    unsigned npix0_y;
825 };
826 
si_use_compute_copy_for_float_formats(struct si_context * sctx,struct pipe_resource * texture,unsigned level)827 static void si_use_compute_copy_for_float_formats(struct si_context *sctx,
828                                                   struct pipe_resource *texture,
829                                                   unsigned level) {
830    struct si_texture *tex = (struct si_texture *)texture;
831 
832    /* If we are uploading into FP16 or R11G11B10_FLOAT via a blit, CB clobbers NaNs,
833     * so in order to preserve them exactly, we have to use the compute blit.
834     * The compute blit is used only when the destination doesn't have DCC, so
835     * disable it here, which is kinda a hack.
836     * If we are uploading into 32-bit floats with DCC via a blit, NaNs will also get
837     * lost so we need to disable DCC as well.
838     *
839     * This makes KHR-GL45.texture_view.view_classes pass on gfx9.
840     * gfx10 has the same issue, but the test doesn't use a large enough texture
841     * to enable DCC and fail, so it always passes.
842     */
843    if (vi_dcc_enabled(tex, level) &&
844        util_format_is_float(texture->format)) {
845       si_texture_disable_dcc(sctx, tex);
846    }
847 }
848 
si_resource_copy_region(struct pipe_context * ctx,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)849 void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst,
850                              unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz,
851                              struct pipe_resource *src, unsigned src_level,
852                              const struct pipe_box *src_box)
853 {
854    struct si_context *sctx = (struct si_context *)ctx;
855    struct si_texture *ssrc = (struct si_texture *)src;
856    struct si_texture *sdst = (struct si_texture *)dst;
857    struct pipe_surface *dst_view, dst_templ;
858    struct pipe_sampler_view src_templ, *src_view;
859    unsigned dst_width, dst_height, src_width0, src_height0;
860    unsigned dst_width0, dst_height0, src_force_level = 0;
861    struct pipe_box sbox, dstbox;
862 
863    /* Handle buffers first. */
864    if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
865       si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
866       return;
867    }
868 
869    si_use_compute_copy_for_float_formats(sctx, dst, dst_level);
870 
871    if (!util_format_is_compressed(src->format) && !util_format_is_compressed(dst->format) &&
872        !util_format_is_depth_or_stencil(src->format) && src->nr_samples <= 1 &&
873        !vi_dcc_enabled(sdst, dst_level) &&
874        !(dst->target != src->target &&
875          (src->target == PIPE_TEXTURE_1D_ARRAY || dst->target == PIPE_TEXTURE_1D_ARRAY))) {
876       si_compute_copy_image(sctx, dst, dst_level, src, src_level, dstx, dsty, dstz,
877                             src_box, false);
878       return;
879    }
880 
881    assert(u_max_sample(dst) == u_max_sample(src));
882 
883    /* The driver doesn't decompress resources automatically while
884     * u_blitter is rendering. */
885    si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level, src_box->z,
886                              src_box->z + src_box->depth - 1);
887 
888    dst_width = u_minify(dst->width0, dst_level);
889    dst_height = u_minify(dst->height0, dst_level);
890    dst_width0 = dst->width0;
891    dst_height0 = dst->height0;
892    src_width0 = src->width0;
893    src_height0 = src->height0;
894 
895    util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
896    util_blitter_default_src_texture(sctx->blitter, &src_templ, src, src_level);
897 
898    if (util_format_is_compressed(src->format) || util_format_is_compressed(dst->format)) {
899       unsigned blocksize = ssrc->surface.bpe;
900 
901       if (blocksize == 8)
902          src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
903       else
904          src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
905       dst_templ.format = src_templ.format;
906 
907       dst_width = util_format_get_nblocksx(dst->format, dst_width);
908       dst_height = util_format_get_nblocksy(dst->format, dst_height);
909       dst_width0 = util_format_get_nblocksx(dst->format, dst_width0);
910       dst_height0 = util_format_get_nblocksy(dst->format, dst_height0);
911       src_width0 = util_format_get_nblocksx(src->format, src_width0);
912       src_height0 = util_format_get_nblocksy(src->format, src_height0);
913 
914       dstx = util_format_get_nblocksx(dst->format, dstx);
915       dsty = util_format_get_nblocksy(dst->format, dsty);
916 
917       sbox.x = util_format_get_nblocksx(src->format, src_box->x);
918       sbox.y = util_format_get_nblocksy(src->format, src_box->y);
919       sbox.z = src_box->z;
920       sbox.width = util_format_get_nblocksx(src->format, src_box->width);
921       sbox.height = util_format_get_nblocksy(src->format, src_box->height);
922       sbox.depth = src_box->depth;
923       src_box = &sbox;
924 
925       src_force_level = src_level;
926    } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src)) {
927       if (util_format_is_subsampled_422(src->format)) {
928          src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
929          dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
930 
931          dst_width = util_format_get_nblocksx(dst->format, dst_width);
932          dst_width0 = util_format_get_nblocksx(dst->format, dst_width0);
933          src_width0 = util_format_get_nblocksx(src->format, src_width0);
934 
935          dstx = util_format_get_nblocksx(dst->format, dstx);
936 
937          sbox = *src_box;
938          sbox.x = util_format_get_nblocksx(src->format, src_box->x);
939          sbox.width = util_format_get_nblocksx(src->format, src_box->width);
940          src_box = &sbox;
941       } else {
942          unsigned blocksize = ssrc->surface.bpe;
943 
944          switch (blocksize) {
945          case 1:
946             dst_templ.format = PIPE_FORMAT_R8_UNORM;
947             src_templ.format = PIPE_FORMAT_R8_UNORM;
948             break;
949          case 2:
950             dst_templ.format = PIPE_FORMAT_R8G8_UNORM;
951             src_templ.format = PIPE_FORMAT_R8G8_UNORM;
952             break;
953          case 4:
954             dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
955             src_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
956             break;
957          case 8:
958             dst_templ.format = PIPE_FORMAT_R16G16B16A16_UINT;
959             src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT;
960             break;
961          case 16:
962             dst_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
963             src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
964             break;
965          default:
966             fprintf(stderr, "Unhandled format %s with blocksize %u\n",
967                     util_format_short_name(src->format), blocksize);
968             assert(0);
969          }
970       }
971    }
972 
973    /* SNORM8 blitting has precision issues on some chips. Use the SINT
974     * equivalent instead, which doesn't force DCC decompression.
975     * Note that some chips avoid this issue by using SDMA.
976     */
977    if (util_format_is_snorm8(dst_templ.format)) {
978       dst_templ.format = src_templ.format = util_format_snorm8_to_sint8(dst_templ.format);
979    }
980 
981    vi_disable_dcc_if_incompatible_format(sctx, dst, dst_level, dst_templ.format);
982    vi_disable_dcc_if_incompatible_format(sctx, src, src_level, src_templ.format);
983 
984    /* Initialize the surface. */
985    dst_view = si_create_surface_custom(ctx, dst, &dst_templ, dst_width0, dst_height0, dst_width,
986                                        dst_height);
987 
988    /* Initialize the sampler view. */
989    src_view =
990       si_create_sampler_view_custom(ctx, src, &src_templ, src_width0, src_height0, src_force_level);
991 
992    u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height), abs(src_box->depth),
993             &dstbox);
994 
995    /* Copy. */
996    si_blitter_begin(sctx, SI_COPY);
997    util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox, src_view, src_box, src_width0,
998                              src_height0, PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, false);
999    si_blitter_end(sctx);
1000 
1001    pipe_surface_reference(&dst_view, NULL);
1002    pipe_sampler_view_reference(&src_view, NULL);
1003 }
1004 
si_do_CB_resolve(struct si_context * sctx,const struct pipe_blit_info * info,struct pipe_resource * dst,unsigned dst_level,unsigned dst_z,enum pipe_format format)1005 static void si_do_CB_resolve(struct si_context *sctx, const struct pipe_blit_info *info,
1006                              struct pipe_resource *dst, unsigned dst_level, unsigned dst_z,
1007                              enum pipe_format format)
1008 {
1009    /* Required before and after CB_RESOLVE. */
1010    sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
1011 
1012    si_blitter_begin(
1013       sctx, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
1014    util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z, info->src.resource,
1015                                      info->src.box.z, ~0, sctx->custom_blend_resolve, format);
1016    si_blitter_end(sctx);
1017 
1018    /* Flush caches for possible texturing. */
1019    si_make_CB_shader_coherent(sctx, 1, false, true /* no DCC */);
1020 }
1021 
do_hardware_msaa_resolve(struct pipe_context * ctx,const struct pipe_blit_info * info)1022 static bool do_hardware_msaa_resolve(struct pipe_context *ctx, const struct pipe_blit_info *info)
1023 {
1024    struct si_context *sctx = (struct si_context *)ctx;
1025    struct si_texture *src = (struct si_texture *)info->src.resource;
1026    struct si_texture *dst = (struct si_texture *)info->dst.resource;
1027    ASSERTED struct si_texture *stmp;
1028    unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
1029    unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level);
1030    enum pipe_format format = info->src.format;
1031    struct pipe_resource *tmp, templ;
1032    struct pipe_blit_info blit;
1033 
1034    /* Check basic requirements for hw resolve. */
1035    if (!(info->src.resource->nr_samples > 1 && info->dst.resource->nr_samples <= 1 &&
1036          !util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format) &&
1037          util_max_layer(info->src.resource, 0) == 0))
1038       return false;
1039 
1040    /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and
1041     * the format is R16G16. Use R16A16, which does work.
1042     */
1043    if (format == PIPE_FORMAT_R16G16_UNORM)
1044       format = PIPE_FORMAT_R16A16_UNORM;
1045    if (format == PIPE_FORMAT_R16G16_SNORM)
1046       format = PIPE_FORMAT_R16A16_SNORM;
1047 
1048    /* Check the remaining requirements for hw resolve. */
1049    if (util_max_layer(info->dst.resource, info->dst.level) == 0 && !info->scissor_enable &&
1050        (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA &&
1051        util_is_format_compatible(util_format_description(info->src.format),
1052                                  util_format_description(info->dst.format)) &&
1053        dst_width == info->src.resource->width0 && dst_height == info->src.resource->height0 &&
1054        info->dst.box.x == 0 && info->dst.box.y == 0 && info->dst.box.width == dst_width &&
1055        info->dst.box.height == dst_height && info->dst.box.depth == 1 && info->src.box.x == 0 &&
1056        info->src.box.y == 0 && info->src.box.width == dst_width &&
1057        info->src.box.height == dst_height && info->src.box.depth == 1 && !dst->surface.is_linear &&
1058        (!dst->cmask_buffer || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */
1059       /* Check the last constraint. */
1060       if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) {
1061          /* The next fast clear will switch to this mode to
1062           * get direct hw resolve next time if the mode is
1063           * different now.
1064           *
1065           * TODO-GFX10: This does not work in GFX10 because MSAA
1066           * is restricted to 64KB_R_X and 64KB_Z_X swizzle modes.
1067           * In some cases we could change the swizzle of the
1068           * destination texture instead, but the more general
1069           * solution is to implement compute shader resolve.
1070           */
1071          src->last_msaa_resolve_target_micro_mode = dst->surface.micro_tile_mode;
1072          goto resolve_to_temp;
1073       }
1074 
1075       /* Resolving into a surface with DCC is unsupported. Since
1076        * it's being overwritten anyway, clear it to uncompressed.
1077        * This is still the fastest codepath even with this clear.
1078        */
1079       if (vi_dcc_enabled(dst, info->dst.level)) {
1080          if (!vi_dcc_clear_level(sctx, dst, info->dst.level, DCC_UNCOMPRESSED))
1081             goto resolve_to_temp;
1082 
1083          dst->dirty_level_mask &= ~(1 << info->dst.level);
1084       }
1085 
1086       /* Resolve directly from src to dst. */
1087       si_do_CB_resolve(sctx, info, info->dst.resource, info->dst.level, info->dst.box.z, format);
1088       return true;
1089    }
1090 
1091 resolve_to_temp:
1092    /* Shader-based resolve is VERY SLOW. Instead, resolve into
1093     * a temporary texture and blit.
1094     */
1095    memset(&templ, 0, sizeof(templ));
1096    templ.target = PIPE_TEXTURE_2D;
1097    templ.format = info->src.resource->format;
1098    templ.width0 = info->src.resource->width0;
1099    templ.height0 = info->src.resource->height0;
1100    templ.depth0 = 1;
1101    templ.array_size = 1;
1102    templ.usage = PIPE_USAGE_DEFAULT;
1103    templ.flags = SI_RESOURCE_FLAG_FORCE_MSAA_TILING | SI_RESOURCE_FLAG_FORCE_MICRO_TILE_MODE |
1104                  SI_RESOURCE_FLAG_MICRO_TILE_MODE_SET(src->surface.micro_tile_mode) |
1105                  SI_RESOURCE_FLAG_DISABLE_DCC | SI_RESOURCE_FLAG_DRIVER_INTERNAL;
1106 
1107    /* The src and dst microtile modes must be the same. */
1108    if (sctx->chip_class <= GFX8 && src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
1109       templ.bind = PIPE_BIND_SCANOUT;
1110    else
1111       templ.bind = 0;
1112 
1113    tmp = ctx->screen->resource_create(ctx->screen, &templ);
1114    if (!tmp)
1115       return false;
1116    stmp = (struct si_texture *)tmp;
1117 
1118    assert(!stmp->surface.is_linear);
1119    assert(src->surface.micro_tile_mode == stmp->surface.micro_tile_mode);
1120 
1121    /* resolve */
1122    si_do_CB_resolve(sctx, info, tmp, 0, 0, format);
1123 
1124    /* blit */
1125    blit = *info;
1126    blit.src.resource = tmp;
1127    blit.src.box.z = 0;
1128 
1129    si_blitter_begin(sctx, SI_BLIT | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
1130    util_blitter_blit(sctx->blitter, &blit);
1131    si_blitter_end(sctx);
1132 
1133    pipe_resource_reference(&tmp, NULL);
1134    return true;
1135 }
1136 
si_blit(struct pipe_context * ctx,const struct pipe_blit_info * info)1137 static void si_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
1138 {
1139    struct si_context *sctx = (struct si_context *)ctx;
1140    struct si_texture *dst = (struct si_texture *)info->dst.resource;
1141 
1142    if (do_hardware_msaa_resolve(ctx, info)) {
1143       return;
1144    }
1145 
1146    /* Using SDMA for copying to a linear texture in GTT is much faster.
1147     * This improves DRI PRIME performance.
1148     *
1149     * resource_copy_region can't do this yet, because dma_copy calls it
1150     * on failure (recursion).
1151     */
1152    if (dst->surface.is_linear && util_can_blit_via_copy_region(info, false)) {
1153       sctx->dma_copy(ctx, info->dst.resource, info->dst.level, info->dst.box.x, info->dst.box.y,
1154                      info->dst.box.z, info->src.resource, info->src.level, &info->src.box);
1155       return;
1156    }
1157 
1158    assert(util_blitter_is_blit_supported(sctx->blitter, info));
1159 
1160    /* The driver doesn't decompress resources automatically while
1161     * u_blitter is rendering. */
1162    vi_disable_dcc_if_incompatible_format(sctx, info->src.resource, info->src.level,
1163                                          info->src.format);
1164    vi_disable_dcc_if_incompatible_format(sctx, info->dst.resource, info->dst.level,
1165                                          info->dst.format);
1166    si_decompress_subresource(ctx, info->src.resource, PIPE_MASK_RGBAZS, info->src.level,
1167                              info->src.box.z, info->src.box.z + info->src.box.depth - 1);
1168 
1169    if (sctx->screen->debug_flags & DBG(FORCE_SDMA) && util_try_blit_via_copy_region(ctx, info))
1170       return;
1171 
1172    si_blitter_begin(sctx, SI_BLIT | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
1173    util_blitter_blit(sctx->blitter, info);
1174    si_blitter_end(sctx);
1175 }
1176 
si_generate_mipmap(struct pipe_context * ctx,struct pipe_resource * tex,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)1177 static bool si_generate_mipmap(struct pipe_context *ctx, struct pipe_resource *tex,
1178                                enum pipe_format format, unsigned base_level, unsigned last_level,
1179                                unsigned first_layer, unsigned last_layer)
1180 {
1181    struct si_context *sctx = (struct si_context *)ctx;
1182    struct si_texture *stex = (struct si_texture *)tex;
1183 
1184    if (!util_blitter_is_copy_supported(sctx->blitter, tex, tex))
1185       return false;
1186 
1187    /* The driver doesn't decompress resources automatically while
1188     * u_blitter is rendering. */
1189    vi_disable_dcc_if_incompatible_format(sctx, tex, base_level, format);
1190    si_decompress_subresource(ctx, tex, PIPE_MASK_RGBAZS, base_level, first_layer, last_layer);
1191 
1192    /* Clear dirty_level_mask for the levels that will be overwritten. */
1193    assert(base_level < last_level);
1194    stex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1, last_level - base_level);
1195 
1196    sctx->generate_mipmap_for_depth = stex->is_depth;
1197 
1198    si_blitter_begin(sctx, SI_BLIT | SI_DISABLE_RENDER_COND);
1199    util_blitter_generate_mipmap(sctx->blitter, tex, format, base_level, last_level, first_layer,
1200                                 last_layer);
1201    si_blitter_end(sctx);
1202 
1203    sctx->generate_mipmap_for_depth = false;
1204    return true;
1205 }
1206 
si_flush_resource(struct pipe_context * ctx,struct pipe_resource * res)1207 static void si_flush_resource(struct pipe_context *ctx, struct pipe_resource *res)
1208 {
1209    struct si_context *sctx = (struct si_context *)ctx;
1210    struct si_texture *tex = (struct si_texture *)res;
1211 
1212    assert(res->target != PIPE_BUFFER);
1213    assert(!tex->dcc_separate_buffer || tex->dcc_gather_statistics);
1214 
1215    /* st/dri calls flush twice per frame (not a bug), this prevents double
1216     * decompression. */
1217    if (tex->dcc_separate_buffer && !tex->separate_dcc_dirty)
1218       return;
1219 
1220    if (!tex->is_depth && (tex->cmask_buffer || vi_dcc_enabled(tex, 0))) {
1221       si_blit_decompress_color(sctx, tex, 0, res->last_level, 0, util_max_layer(res, 0),
1222                                tex->dcc_separate_buffer != NULL, false);
1223 
1224       if (tex->surface.display_dcc_offset && tex->displayable_dcc_dirty) {
1225          si_retile_dcc(sctx, tex);
1226          tex->displayable_dcc_dirty = false;
1227       }
1228    }
1229 
1230    /* Always do the analysis even if DCC is disabled at the moment. */
1231    if (tex->dcc_gather_statistics) {
1232       bool separate_dcc_dirty = tex->separate_dcc_dirty;
1233 
1234       /* If the color buffer hasn't been unbound and fast clear hasn't
1235        * been used, separate_dcc_dirty is false, but there may have been
1236        * new rendering. Check if the color buffer is bound and assume
1237        * it's dirty.
1238        *
1239        * Note that DRI2 never unbinds window colorbuffers, which means
1240        * the DCC pipeline statistics query would never be re-set and would
1241        * keep adding new results until all free memory is exhausted if we
1242        * didn't do this.
1243        */
1244       if (!separate_dcc_dirty) {
1245          for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
1246             if (sctx->framebuffer.state.cbufs[i] &&
1247                 sctx->framebuffer.state.cbufs[i]->texture == res) {
1248                separate_dcc_dirty = true;
1249                break;
1250             }
1251          }
1252       }
1253 
1254       if (separate_dcc_dirty) {
1255          tex->separate_dcc_dirty = false;
1256          vi_separate_dcc_process_and_reset_stats(ctx, tex);
1257       }
1258    }
1259 }
1260 
si_decompress_dcc(struct si_context * sctx,struct si_texture * tex)1261 void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex)
1262 {
1263    /* If graphics is disabled, we can't decompress DCC, but it shouldn't
1264     * be compressed either. The caller should simply discard it.
1265     */
1266    if (!tex->surface.dcc_offset || !sctx->has_graphics)
1267       return;
1268 
1269    if (sctx->chip_class == GFX8) {
1270       si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level, 0,
1271                                util_max_layer(&tex->buffer.b.b, 0), true, false);
1272    } else {
1273       struct pipe_resource *ptex = &tex->buffer.b.b;
1274 
1275       /* DCC decompression using a compute shader. */
1276       for (unsigned level = 0; level < tex->surface.num_dcc_levels; level++) {
1277          struct pipe_box box;
1278 
1279          u_box_3d(0, 0, 0, u_minify(ptex->width0, level),
1280                   u_minify(ptex->height0, level),
1281                   util_num_layers(ptex, level), &box);
1282          si_compute_copy_image(sctx, ptex, level, ptex, level, 0, 0, 0, &box,
1283                                true);
1284       }
1285 
1286       /* Now clear DCC metadata to uncompressed. */
1287       uint32_t clear_value = DCC_UNCOMPRESSED;
1288       si_clear_buffer(sctx, ptex, tex->surface.dcc_offset,
1289                       tex->surface.dcc_size, &clear_value, 4,
1290                       SI_COHERENCY_CB_META, false);
1291    }
1292 }
1293 
si_init_blit_functions(struct si_context * sctx)1294 void si_init_blit_functions(struct si_context *sctx)
1295 {
1296    sctx->b.resource_copy_region = si_resource_copy_region;
1297 
1298    if (sctx->has_graphics) {
1299       sctx->b.blit = si_blit;
1300       sctx->b.flush_resource = si_flush_resource;
1301       sctx->b.generate_mipmap = si_generate_mipmap;
1302    }
1303 }
1304