1 /*
2  * Copyright 2017 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "si_pipe.h"
25 #include "sid.h"
26 
27 #include "util/u_format.h"
28 #include "util/u_pack_color.h"
29 #include "util/u_surface.h"
30 
31 enum {
32 	SI_CLEAR         = SI_SAVE_FRAGMENT_STATE,
33 	SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
34 };
35 
si_alloc_separate_cmask(struct si_screen * sscreen,struct r600_texture * rtex)36 static void si_alloc_separate_cmask(struct si_screen *sscreen,
37 				    struct r600_texture *rtex)
38 {
39 	if (rtex->cmask_buffer)
40                 return;
41 
42 	assert(rtex->cmask.size == 0);
43 
44 	si_texture_get_cmask_info(sscreen, rtex, &rtex->cmask);
45 	if (!rtex->cmask.size)
46 		return;
47 
48 	rtex->cmask_buffer = (struct r600_resource *)
49 		si_aligned_buffer_create(&sscreen->b,
50 					 R600_RESOURCE_FLAG_UNMAPPABLE,
51 					 PIPE_USAGE_DEFAULT,
52 					 rtex->cmask.size,
53 					 rtex->cmask.alignment);
54 	if (rtex->cmask_buffer == NULL) {
55 		rtex->cmask.size = 0;
56 		return;
57 	}
58 
59 	/* update colorbuffer state bits */
60 	rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
61 
62 	rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
63 
64 	p_atomic_inc(&sscreen->compressed_colortex_counter);
65 }
66 
si_set_clear_color(struct r600_texture * rtex,enum pipe_format surface_format,const union pipe_color_union * color)67 static void si_set_clear_color(struct r600_texture *rtex,
68 			       enum pipe_format surface_format,
69 			       const union pipe_color_union *color)
70 {
71 	union util_color uc;
72 
73 	memset(&uc, 0, sizeof(uc));
74 
75 	if (rtex->surface.bpe == 16) {
76 		/* DCC fast clear only:
77 		 *   CLEAR_WORD0 = R = G = B
78 		 *   CLEAR_WORD1 = A
79 		 */
80 		assert(color->ui[0] == color->ui[1] &&
81 		       color->ui[0] == color->ui[2]);
82 		uc.ui[0] = color->ui[0];
83 		uc.ui[1] = color->ui[3];
84 	} else if (util_format_is_pure_uint(surface_format)) {
85 		util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
86 	} else if (util_format_is_pure_sint(surface_format)) {
87 		util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
88 	} else {
89 		util_pack_color(color->f, surface_format, &uc);
90 	}
91 
92 	memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
93 }
94 
vi_get_fast_clear_parameters(enum pipe_format surface_format,const union pipe_color_union * color,uint32_t * reset_value,bool * clear_words_needed)95 static bool vi_get_fast_clear_parameters(enum pipe_format surface_format,
96 					 const union pipe_color_union *color,
97 					 uint32_t* reset_value,
98 					 bool* clear_words_needed)
99 {
100 	bool values[4] = {};
101 	int i;
102 	bool main_value = false;
103 	bool extra_value = false;
104 	int extra_channel;
105 
106 	/* This is needed to get the correct DCC clear value for luminance formats.
107 	 * 1) Get the linear format (because the next step can't handle L8_SRGB).
108 	 * 2) Convert luminance to red. (the real hw format for luminance)
109 	 */
110 	surface_format = util_format_linear(surface_format);
111 	surface_format = util_format_luminance_to_red(surface_format);
112 
113 	const struct util_format_description *desc = util_format_description(surface_format);
114 
115 	if (desc->block.bits == 128 &&
116 	    (color->ui[0] != color->ui[1] ||
117 	     color->ui[0] != color->ui[2]))
118 		return false;
119 
120 	*clear_words_needed = true;
121 	*reset_value = 0x20202020U;
122 
123 	/* If we want to clear without needing a fast clear eliminate step, we
124 	 * can set each channel to 0 or 1 (or 0/max for integer formats). We
125 	 * have two sets of flags, one for the last or first channel(extra) and
126 	 * one for the other channels(main).
127 	 */
128 
129 	if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
130 	    surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
131 	    surface_format == PIPE_FORMAT_B5G6R5_SRGB ||
132 	    util_format_is_alpha(surface_format)) {
133 		extra_channel = -1;
134 	} else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
135 		if (si_translate_colorswap(surface_format, false) <= 1)
136 			extra_channel = desc->nr_channels - 1;
137 		else
138 			extra_channel = 0;
139 	} else
140 		return true;
141 
142 	for (i = 0; i < 4; ++i) {
143 		int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
144 
145 		if (desc->swizzle[i] < PIPE_SWIZZLE_X ||
146 		    desc->swizzle[i] > PIPE_SWIZZLE_W)
147 			continue;
148 
149 		if (desc->channel[i].pure_integer &&
150 		    desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
151 			/* Use the maximum value for clamping the clear color. */
152 			int max = u_bit_consecutive(0, desc->channel[i].size - 1);
153 
154 			values[i] = color->i[i] != 0;
155 			if (color->i[i] != 0 && MIN2(color->i[i], max) != max)
156 				return true;
157 		} else if (desc->channel[i].pure_integer &&
158 			   desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
159 			/* Use the maximum value for clamping the clear color. */
160 			unsigned max = u_bit_consecutive(0, desc->channel[i].size);
161 
162 			values[i] = color->ui[i] != 0U;
163 			if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max)
164 				return true;
165 		} else {
166 			values[i] = color->f[i] != 0.0F;
167 			if (color->f[i] != 0.0F && color->f[i] != 1.0F)
168 				return true;
169 		}
170 
171 		if (index == extra_channel)
172 			extra_value = values[i];
173 		else
174 			main_value = values[i];
175 	}
176 
177 	for (int i = 0; i < 4; ++i)
178 		if (values[i] != main_value &&
179 		    desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
180 		    desc->swizzle[i] >= PIPE_SWIZZLE_X &&
181 		    desc->swizzle[i] <= PIPE_SWIZZLE_W)
182 			return true;
183 
184 	*clear_words_needed = false;
185 	if (main_value)
186 		*reset_value |= 0x80808080U;
187 
188 	if (extra_value)
189 		*reset_value |= 0x40404040U;
190 	return true;
191 }
192 
vi_dcc_clear_level(struct si_context * sctx,struct r600_texture * rtex,unsigned level,unsigned clear_value)193 void vi_dcc_clear_level(struct si_context *sctx,
194 			struct r600_texture *rtex,
195 			unsigned level, unsigned clear_value)
196 {
197 	struct pipe_resource *dcc_buffer;
198 	uint64_t dcc_offset, clear_size;
199 
200 	assert(vi_dcc_enabled(rtex, level));
201 
202 	if (rtex->dcc_separate_buffer) {
203 		dcc_buffer = &rtex->dcc_separate_buffer->b.b;
204 		dcc_offset = 0;
205 	} else {
206 		dcc_buffer = &rtex->resource.b.b;
207 		dcc_offset = rtex->dcc_offset;
208 	}
209 
210 	if (sctx->b.chip_class >= GFX9) {
211 		/* Mipmap level clears aren't implemented. */
212 		assert(rtex->resource.b.b.last_level == 0);
213 		/* MSAA needs a different clear size. */
214 		assert(rtex->resource.b.b.nr_samples <= 1);
215 		clear_size = rtex->surface.dcc_size;
216 	} else {
217 		unsigned num_layers = util_num_layers(&rtex->resource.b.b, level);
218 
219 		/* If this is 0, fast clear isn't possible. (can occur with MSAA) */
220 		assert(rtex->surface.u.legacy.level[level].dcc_fast_clear_size);
221 		/* Layered MSAA DCC fast clears need to clear dcc_fast_clear_size
222 		 * bytes for each layer. This is not currently implemented, and
223 		 * therefore MSAA DCC isn't even enabled with multiple layers.
224 		 */
225 		assert(rtex->resource.b.b.nr_samples <= 1 || num_layers == 1);
226 
227 		dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset;
228 		clear_size = rtex->surface.u.legacy.level[level].dcc_fast_clear_size *
229 			     num_layers;
230 	}
231 
232 	si_clear_buffer(&sctx->b.b, dcc_buffer, dcc_offset, clear_size,
233 			clear_value, R600_COHERENCY_CB_META);
234 }
235 
236 /* Set the same micro tile mode as the destination of the last MSAA resolve.
237  * This allows hitting the MSAA resolve fast path, which requires that both
238  * src and dst micro tile modes match.
239  */
si_set_optimal_micro_tile_mode(struct si_screen * sscreen,struct r600_texture * rtex)240 static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen,
241 					   struct r600_texture *rtex)
242 {
243 	if (rtex->resource.b.is_shared ||
244 	    rtex->resource.b.b.nr_samples <= 1 ||
245 	    rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
246 		return;
247 
248 	assert(sscreen->info.chip_class >= GFX9 ||
249 	       rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
250 	assert(rtex->resource.b.b.last_level == 0);
251 
252 	if (sscreen->info.chip_class >= GFX9) {
253 		/* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */
254 		assert(rtex->surface.u.gfx9.surf.swizzle_mode >= 4);
255 
256 		/* If you do swizzle_mode % 4, you'll get:
257 		 *   0 = Depth
258 		 *   1 = Standard,
259 		 *   2 = Displayable
260 		 *   3 = Rotated
261 		 *
262 		 * Depth-sample order isn't allowed:
263 		 */
264 		assert(rtex->surface.u.gfx9.surf.swizzle_mode % 4 != 0);
265 
266 		switch (rtex->last_msaa_resolve_target_micro_mode) {
267 		case RADEON_MICRO_MODE_DISPLAY:
268 			rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
269 			rtex->surface.u.gfx9.surf.swizzle_mode += 2; /* D */
270 			break;
271 		case RADEON_MICRO_MODE_THIN:
272 			rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
273 			rtex->surface.u.gfx9.surf.swizzle_mode += 1; /* S */
274 			break;
275 		case RADEON_MICRO_MODE_ROTATED:
276 			rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
277 			rtex->surface.u.gfx9.surf.swizzle_mode += 3; /* R */
278 			break;
279 		default: /* depth */
280 			assert(!"unexpected micro mode");
281 			return;
282 		}
283 	} else if (sscreen->info.chip_class >= CIK) {
284 		/* These magic numbers were copied from addrlib. It doesn't use
285 		 * any definitions for them either. They are all 2D_TILED_THIN1
286 		 * modes with different bpp and micro tile mode.
287 		 */
288 		switch (rtex->last_msaa_resolve_target_micro_mode) {
289 		case RADEON_MICRO_MODE_DISPLAY:
290 			rtex->surface.u.legacy.tiling_index[0] = 10;
291 			break;
292 		case RADEON_MICRO_MODE_THIN:
293 			rtex->surface.u.legacy.tiling_index[0] = 14;
294 			break;
295 		case RADEON_MICRO_MODE_ROTATED:
296 			rtex->surface.u.legacy.tiling_index[0] = 28;
297 			break;
298 		default: /* depth, thick */
299 			assert(!"unexpected micro mode");
300 			return;
301 		}
302 	} else { /* SI */
303 		switch (rtex->last_msaa_resolve_target_micro_mode) {
304 		case RADEON_MICRO_MODE_DISPLAY:
305 			switch (rtex->surface.bpe) {
306 			case 1:
307                             rtex->surface.u.legacy.tiling_index[0] = 10;
308                             break;
309 			case 2:
310                             rtex->surface.u.legacy.tiling_index[0] = 11;
311                             break;
312 			default: /* 4, 8 */
313                             rtex->surface.u.legacy.tiling_index[0] = 12;
314                             break;
315 			}
316 			break;
317 		case RADEON_MICRO_MODE_THIN:
318 			switch (rtex->surface.bpe) {
319 			case 1:
320                                 rtex->surface.u.legacy.tiling_index[0] = 14;
321                                 break;
322 			case 2:
323                                 rtex->surface.u.legacy.tiling_index[0] = 15;
324                                 break;
325 			case 4:
326                                 rtex->surface.u.legacy.tiling_index[0] = 16;
327                                 break;
328 			default: /* 8, 16 */
329                                 rtex->surface.u.legacy.tiling_index[0] = 17;
330                                 break;
331 			}
332 			break;
333 		default: /* depth, thick */
334 			assert(!"unexpected micro mode");
335 			return;
336 		}
337 	}
338 
339 	rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
340 
341 	p_atomic_inc(&sscreen->dirty_tex_counter);
342 }
343 
si_do_fast_color_clear(struct si_context * sctx,unsigned * buffers,const union pipe_color_union * color)344 static void si_do_fast_color_clear(struct si_context *sctx,
345 				   unsigned *buffers,
346 				   const union pipe_color_union *color)
347 {
348 	struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
349 	int i;
350 
351 	/* This function is broken in BE, so just disable this path for now */
352 #ifdef PIPE_ARCH_BIG_ENDIAN
353 	return;
354 #endif
355 
356 	if (sctx->b.render_cond)
357 		return;
358 
359 	for (i = 0; i < fb->nr_cbufs; i++) {
360 		struct r600_texture *tex;
361 		unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
362 
363 		if (!fb->cbufs[i])
364 			continue;
365 
366 		/* if this colorbuffer is not being cleared */
367 		if (!(*buffers & clear_bit))
368 			continue;
369 
370 		unsigned level = fb->cbufs[i]->u.tex.level;
371 		tex = (struct r600_texture *)fb->cbufs[i]->texture;
372 
373 		/* the clear is allowed if all layers are bound */
374 		if (fb->cbufs[i]->u.tex.first_layer != 0 ||
375 		    fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) {
376 			continue;
377 		}
378 
379 		/* cannot clear mipmapped textures */
380 		if (fb->cbufs[i]->texture->last_level != 0) {
381 			continue;
382 		}
383 
384 		/* only supported on tiled surfaces */
385 		if (tex->surface.is_linear) {
386 			continue;
387 		}
388 
389 		/* shared textures can't use fast clear without an explicit flush,
390 		 * because there is no way to communicate the clear color among
391 		 * all clients
392 		 */
393 		if (tex->resource.b.is_shared &&
394 		    !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
395 			continue;
396 
397 		/* fast color clear with 1D tiling doesn't work on old kernels and CIK */
398 		if (sctx->b.chip_class == CIK &&
399 		    tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
400 		    sctx->screen->info.drm_major == 2 &&
401 		    sctx->screen->info.drm_minor < 38) {
402 			continue;
403 		}
404 
405 		/* Fast clear is the most appropriate place to enable DCC for
406 		 * displayable surfaces.
407 		 */
408 		if (sctx->b.chip_class >= VI &&
409 		    !(sctx->screen->debug_flags & DBG(NO_DCC_FB))) {
410 			vi_separate_dcc_try_enable(&sctx->b, tex);
411 
412 			/* RB+ isn't supported with a CMASK clear only on Stoney,
413 			 * so all clears are considered to be hypothetically slow
414 			 * clears, which is weighed when determining whether to
415 			 * enable separate DCC.
416 			 */
417 			if (tex->dcc_gather_statistics &&
418 			    sctx->b.family == CHIP_STONEY)
419 				tex->num_slow_clears++;
420 		}
421 
422 		bool need_decompress_pass = false;
423 
424 		/* Use a slow clear for small surfaces where the cost of
425 		 * the eliminate pass can be higher than the benefit of fast
426 		 * clear. The closed driver does this, but the numbers may differ.
427 		 *
428 		 * This helps on both dGPUs and APUs, even small APUs like Mullins.
429 		 */
430 		bool too_small = tex->resource.b.b.nr_samples <= 1 &&
431 				 tex->resource.b.b.width0 *
432 				 tex->resource.b.b.height0 <= 512 * 512;
433 
434 		/* Try to clear DCC first, otherwise try CMASK. */
435 		if (vi_dcc_enabled(tex, 0)) {
436 			uint32_t reset_value;
437 			bool clear_words_needed;
438 
439 			if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR))
440 				continue;
441 
442 			/* This can only occur with MSAA. */
443 			if (sctx->b.chip_class == VI &&
444 			    !tex->surface.u.legacy.level[level].dcc_fast_clear_size)
445 				continue;
446 
447 			if (!vi_get_fast_clear_parameters(fb->cbufs[i]->format,
448 							  color, &reset_value,
449 							  &clear_words_needed))
450 				continue;
451 
452 			if (clear_words_needed && too_small)
453 				continue;
454 
455 			/* DCC fast clear with MSAA should clear CMASK to 0xC. */
456 			if (tex->resource.b.b.nr_samples >= 2 && tex->cmask.size) {
457 				/* TODO: This doesn't work with MSAA. */
458 				if (clear_words_needed)
459 					continue;
460 
461 				si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,
462 						tex->cmask.offset, tex->cmask.size,
463 						0xCCCCCCCC, R600_COHERENCY_CB_META);
464 				need_decompress_pass = true;
465 			}
466 
467 			vi_dcc_clear_level(sctx, tex, 0, reset_value);
468 
469 			if (clear_words_needed)
470 				need_decompress_pass = true;
471 
472 			tex->separate_dcc_dirty = true;
473 		} else {
474 			if (too_small)
475 				continue;
476 
477 			/* 128-bit formats are unusupported */
478 			if (tex->surface.bpe > 8) {
479 				continue;
480 			}
481 
482 			/* RB+ doesn't work with CMASK fast clear on Stoney. */
483 			if (sctx->b.family == CHIP_STONEY)
484 				continue;
485 
486 			/* ensure CMASK is enabled */
487 			si_alloc_separate_cmask(sctx->screen, tex);
488 			if (tex->cmask.size == 0) {
489 				continue;
490 			}
491 
492 			/* Do the fast clear. */
493 			si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,
494 					tex->cmask.offset, tex->cmask.size, 0,
495 					R600_COHERENCY_CB_META);
496 			need_decompress_pass = true;
497 		}
498 
499 		if (need_decompress_pass &&
500 		    !(tex->dirty_level_mask & (1 << level))) {
501 			tex->dirty_level_mask |= 1 << level;
502 			p_atomic_inc(&sctx->screen->compressed_colortex_counter);
503 		}
504 
505 		/* We can change the micro tile mode before a full clear. */
506 		si_set_optimal_micro_tile_mode(sctx->screen, tex);
507 
508 		si_set_clear_color(tex, fb->cbufs[i]->format, color);
509 
510 		sctx->framebuffer.dirty_cbufs |= 1 << i;
511 		si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
512 		*buffers &= ~clear_bit;
513 	}
514 }
515 
si_clear(struct pipe_context * ctx,unsigned buffers,const union pipe_color_union * color,double depth,unsigned stencil)516 static void si_clear(struct pipe_context *ctx, unsigned buffers,
517 		     const union pipe_color_union *color,
518 		     double depth, unsigned stencil)
519 {
520 	struct si_context *sctx = (struct si_context *)ctx;
521 	struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
522 	struct pipe_surface *zsbuf = fb->zsbuf;
523 	struct r600_texture *zstex =
524 		zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
525 
526 	if (buffers & PIPE_CLEAR_COLOR) {
527 		si_do_fast_color_clear(sctx, &buffers, color);
528 		if (!buffers)
529 			return; /* all buffers have been fast cleared */
530 	}
531 
532 	if (buffers & PIPE_CLEAR_COLOR) {
533 		int i;
534 
535 		/* These buffers cannot use fast clear, make sure to disable expansion. */
536 		for (i = 0; i < fb->nr_cbufs; i++) {
537 			struct r600_texture *tex;
538 
539 			/* If not clearing this buffer, skip. */
540 			if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
541 				continue;
542 
543 			if (!fb->cbufs[i])
544 				continue;
545 
546 			tex = (struct r600_texture *)fb->cbufs[i]->texture;
547 			if (tex->fmask.size == 0)
548 				tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level);
549 		}
550 	}
551 
552 	if (zstex &&
553 	    si_htile_enabled(zstex, zsbuf->u.tex.level) &&
554 	    zsbuf->u.tex.first_layer == 0 &&
555 	    zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
556 		/* TC-compatible HTILE only supports depth clears to 0 or 1. */
557 		if (buffers & PIPE_CLEAR_DEPTH &&
558 		    (!zstex->tc_compatible_htile ||
559 		     depth == 0 || depth == 1)) {
560 			/* Need to disable EXPCLEAR temporarily if clearing
561 			 * to a new value. */
562 			if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
563 				sctx->db_depth_disable_expclear = true;
564 			}
565 
566 			zstex->depth_clear_value = depth;
567 			sctx->framebuffer.dirty_zsbuf = true;
568 			si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
569 			sctx->db_depth_clear = true;
570 			si_mark_atom_dirty(sctx, &sctx->db_render_state);
571 		}
572 
573 		/* TC-compatible HTILE only supports stencil clears to 0. */
574 		if (buffers & PIPE_CLEAR_STENCIL &&
575 		    (!zstex->tc_compatible_htile || stencil == 0)) {
576 			stencil &= 0xff;
577 
578 			/* Need to disable EXPCLEAR temporarily if clearing
579 			 * to a new value. */
580 			if (!zstex->stencil_cleared || zstex->stencil_clear_value != stencil) {
581 				sctx->db_stencil_disable_expclear = true;
582 			}
583 
584 			zstex->stencil_clear_value = stencil;
585 			sctx->framebuffer.dirty_zsbuf = true;
586 			si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_STENCIL_CLEAR */
587 			sctx->db_stencil_clear = true;
588 			si_mark_atom_dirty(sctx, &sctx->db_render_state);
589 		}
590 
591 		/* TODO: Find out what's wrong here. Fast depth clear leads to
592 		 * corruption in ARK: Survival Evolved, but that may just be
593 		 * a coincidence and the root cause is elsewhere.
594 		 *
595 		 * The corruption can be fixed by putting the DB flush before
596 		 * or after the depth clear. (surprisingly)
597 		 *
598 		 * https://bugs.freedesktop.org/show_bug.cgi?id=102955 (apitrace)
599 		 *
600 		 * This hack decreases back-to-back ClearDepth performance.
601 		 */
602 		if (sctx->screen->clear_db_cache_before_clear) {
603 			sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
604 		}
605 	}
606 
607 	si_blitter_begin(ctx, SI_CLEAR);
608 	util_blitter_clear(sctx->blitter, fb->width, fb->height,
609 			   util_framebuffer_get_num_layers(fb),
610 			   buffers, color, depth, stencil);
611 	si_blitter_end(ctx);
612 
613 	if (sctx->db_depth_clear) {
614 		sctx->db_depth_clear = false;
615 		sctx->db_depth_disable_expclear = false;
616 		zstex->depth_cleared = true;
617 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
618 	}
619 
620 	if (sctx->db_stencil_clear) {
621 		sctx->db_stencil_clear = false;
622 		sctx->db_stencil_disable_expclear = false;
623 		zstex->stencil_cleared = true;
624 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
625 	}
626 }
627 
si_clear_render_target(struct pipe_context * ctx,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)628 static void si_clear_render_target(struct pipe_context *ctx,
629 				   struct pipe_surface *dst,
630 				   const union pipe_color_union *color,
631 				   unsigned dstx, unsigned dsty,
632 				   unsigned width, unsigned height,
633 				   bool render_condition_enabled)
634 {
635 	struct si_context *sctx = (struct si_context *)ctx;
636 
637 	si_blitter_begin(ctx, SI_CLEAR_SURFACE |
638 			 (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
639 	util_blitter_clear_render_target(sctx->blitter, dst, color,
640 					 dstx, dsty, width, height);
641 	si_blitter_end(ctx);
642 }
643 
si_clear_depth_stencil(struct pipe_context * ctx,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)644 static void si_clear_depth_stencil(struct pipe_context *ctx,
645 				   struct pipe_surface *dst,
646 				   unsigned clear_flags,
647 				   double depth,
648 				   unsigned stencil,
649 				   unsigned dstx, unsigned dsty,
650 				   unsigned width, unsigned height,
651 				   bool render_condition_enabled)
652 {
653 	struct si_context *sctx = (struct si_context *)ctx;
654 
655 	si_blitter_begin(ctx, SI_CLEAR_SURFACE |
656 			 (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
657 	util_blitter_clear_depth_stencil(sctx->blitter, dst, clear_flags, depth, stencil,
658 					 dstx, dsty, width, height);
659 	si_blitter_end(ctx);
660 }
661 
si_clear_texture(struct pipe_context * pipe,struct pipe_resource * tex,unsigned level,const struct pipe_box * box,const void * data)662 static void si_clear_texture(struct pipe_context *pipe,
663 			     struct pipe_resource *tex,
664 			     unsigned level,
665 			     const struct pipe_box *box,
666 			     const void *data)
667 {
668 	struct pipe_screen *screen = pipe->screen;
669 	struct r600_texture *rtex = (struct r600_texture*)tex;
670 	struct pipe_surface tmpl = {{0}};
671 	struct pipe_surface *sf;
672 	const struct util_format_description *desc =
673 		util_format_description(tex->format);
674 
675 	tmpl.format = tex->format;
676 	tmpl.u.tex.first_layer = box->z;
677 	tmpl.u.tex.last_layer = box->z + box->depth - 1;
678 	tmpl.u.tex.level = level;
679 	sf = pipe->create_surface(pipe, tex, &tmpl);
680 	if (!sf)
681 		return;
682 
683 	if (rtex->is_depth) {
684 		unsigned clear;
685 		float depth;
686 		uint8_t stencil = 0;
687 
688 		/* Depth is always present. */
689 		clear = PIPE_CLEAR_DEPTH;
690 		desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
691 
692 		if (rtex->surface.has_stencil) {
693 			clear |= PIPE_CLEAR_STENCIL;
694 			desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
695 		}
696 
697 		si_clear_depth_stencil(pipe, sf, clear, depth, stencil,
698 				       box->x, box->y,
699 				       box->width, box->height, false);
700 	} else {
701 		union pipe_color_union color;
702 
703 		/* pipe_color_union requires the full vec4 representation. */
704 		if (util_format_is_pure_uint(tex->format))
705 			desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
706 		else if (util_format_is_pure_sint(tex->format))
707 			desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
708 		else
709 			desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
710 
711 		if (screen->is_format_supported(screen, tex->format,
712 						tex->target, 0,
713 						PIPE_BIND_RENDER_TARGET)) {
714 			si_clear_render_target(pipe, sf, &color,
715 					       box->x, box->y,
716 					       box->width, box->height, false);
717 		} else {
718 			/* Software fallback - just for R9G9B9E5_FLOAT */
719 			util_clear_render_target(pipe, sf, &color,
720 						 box->x, box->y,
721 						 box->width, box->height);
722 		}
723 	}
724 	pipe_surface_reference(&sf, NULL);
725 }
726 
si_init_clear_functions(struct si_context * sctx)727 void si_init_clear_functions(struct si_context *sctx)
728 {
729 	sctx->b.b.clear = si_clear;
730 	sctx->b.b.clear_render_target = si_clear_render_target;
731 	sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
732 	sctx->b.b.clear_texture = si_clear_texture;
733 }
734