1 /*
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 /* Resource binding slots and sampler states (each described with 8 or
25  * 4 dwords) are stored in lists in memory which is accessed by shaders
26  * using scalar load instructions.
27  *
28  * This file is responsible for managing such lists. It keeps a copy of all
29  * descriptors in CPU memory and re-uploads a whole list if some slots have
30  * been changed.
31  *
32  * This code is also reponsible for updating shader pointers to those lists.
33  *
34  * Note that CP DMA can't be used for updating the lists, because a GPU hang
35  * could leave the list in a mid-IB state and the next IB would get wrong
36  * descriptors and the whole context would be unusable at that point.
37  * (Note: The register shadowing can't be used due to the same reason)
38  *
39  * Also, uploading descriptors to newly allocated memory doesn't require
40  * a KCACHE flush.
41  *
42  *
43  * Possible scenarios for one 16 dword image+sampler slot:
44  *
45  *       | Image        | w/ FMASK   | Buffer       | NULL
46  * [ 0: 3] Image[0:3]   | Image[0:3] | Null[0:3]    | Null[0:3]
47  * [ 4: 7] Image[4:7]   | Image[4:7] | Buffer[0:3]  | 0
48  * [ 8:11] Null[0:3]    | Fmask[0:3] | Null[0:3]    | Null[0:3]
49  * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]
50  *
51  * FMASK implies MSAA, therefore no sampler state.
52  * Sampler states are never unbound except when FMASK is bound.
53  */
54 
55 #include "radeon/r600_cs.h"
56 #include "si_pipe.h"
57 #include "sid.h"
58 #include "gfx9d.h"
59 
60 #include "util/hash_table.h"
61 #include "util/u_idalloc.h"
62 #include "util/u_format.h"
63 #include "util/u_memory.h"
64 #include "util/u_upload_mgr.h"
65 
66 
67 /* NULL image and buffer descriptor for textures (alpha = 1) and images
68  * (alpha = 0).
69  *
70  * For images, all fields must be zero except for the swizzle, which
71  * supports arbitrary combinations of 0s and 1s. The texture type must be
72  * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs.
73  *
74  * For buffers, all fields must be zero. If they are not, the hw hangs.
75  *
76  * This is the only reason why the buffer descriptor must be in words [4:7].
77  */
78 static uint32_t null_texture_descriptor[8] = {
79 	0,
80 	0,
81 	0,
82 	S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) |
83 	S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
84 	/* the rest must contain zeros, which is also used by the buffer
85 	 * descriptor */
86 };
87 
88 static uint32_t null_image_descriptor[8] = {
89 	0,
90 	0,
91 	0,
92 	S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
93 	/* the rest must contain zeros, which is also used by the buffer
94 	 * descriptor */
95 };
96 
si_desc_extract_buffer_address(uint32_t * desc)97 static uint64_t si_desc_extract_buffer_address(uint32_t *desc)
98 {
99 	return desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
100 }
101 
si_init_descriptor_list(uint32_t * desc_list,unsigned element_dw_size,unsigned num_elements,const uint32_t * null_descriptor)102 static void si_init_descriptor_list(uint32_t *desc_list,
103 				    unsigned element_dw_size,
104 				    unsigned num_elements,
105 				    const uint32_t *null_descriptor)
106 {
107 	int i;
108 
109 	/* Initialize the array to NULL descriptors if the element size is 8. */
110 	if (null_descriptor) {
111 		assert(element_dw_size % 8 == 0);
112 		for (i = 0; i < num_elements * element_dw_size / 8; i++)
113 			memcpy(desc_list + i * 8, null_descriptor, 8 * 4);
114 	}
115 }
116 
si_init_descriptors(struct si_descriptors * desc,unsigned shader_userdata_index,unsigned element_dw_size,unsigned num_elements)117 static void si_init_descriptors(struct si_descriptors *desc,
118 				unsigned shader_userdata_index,
119 				unsigned element_dw_size,
120 				unsigned num_elements)
121 {
122 	desc->list = CALLOC(num_elements, element_dw_size * 4);
123 	desc->element_dw_size = element_dw_size;
124 	desc->num_elements = num_elements;
125 	desc->shader_userdata_offset = shader_userdata_index * 4;
126 	desc->slot_index_to_bind_directly = -1;
127 }
128 
si_release_descriptors(struct si_descriptors * desc)129 static void si_release_descriptors(struct si_descriptors *desc)
130 {
131 	r600_resource_reference(&desc->buffer, NULL);
132 	FREE(desc->list);
133 }
134 
si_upload_descriptors(struct si_context * sctx,struct si_descriptors * desc)135 static bool si_upload_descriptors(struct si_context *sctx,
136 				  struct si_descriptors *desc)
137 {
138 	unsigned slot_size = desc->element_dw_size * 4;
139 	unsigned first_slot_offset = desc->first_active_slot * slot_size;
140 	unsigned upload_size = desc->num_active_slots * slot_size;
141 
142 	/* Skip the upload if no shader is using the descriptors. dirty_mask
143 	 * will stay dirty and the descriptors will be uploaded when there is
144 	 * a shader using them.
145 	 */
146 	if (!upload_size)
147 		return true;
148 
149 	/* If there is just one active descriptor, bind it directly. */
150 	if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly &&
151 	    desc->num_active_slots == 1) {
152 		uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly *
153 						   desc->element_dw_size];
154 
155 		/* The buffer is already in the buffer list. */
156 		r600_resource_reference(&desc->buffer, NULL);
157 		desc->gpu_list = NULL;
158 		desc->gpu_address = si_desc_extract_buffer_address(descriptor);
159 		si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
160 		return true;
161 	}
162 
163 	uint32_t *ptr;
164 	unsigned buffer_offset;
165 	u_upload_alloc(sctx->b.b.const_uploader, first_slot_offset, upload_size,
166 		       si_optimal_tcc_alignment(sctx, upload_size),
167 		       &buffer_offset, (struct pipe_resource**)&desc->buffer,
168 		       (void**)&ptr);
169 	if (!desc->buffer) {
170 		desc->gpu_address = 0;
171 		return false; /* skip the draw call */
172 	}
173 
174 	util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset,
175 				upload_size);
176 	desc->gpu_list = ptr - first_slot_offset / 4;
177 
178 	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
179                             RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
180 
181 	/* The shader pointer should point to slot 0. */
182 	buffer_offset -= first_slot_offset;
183 	desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
184 
185 	si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
186 	return true;
187 }
188 
189 static void
si_descriptors_begin_new_cs(struct si_context * sctx,struct si_descriptors * desc)190 si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc)
191 {
192 	if (!desc->buffer)
193 		return;
194 
195 	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
196 				  RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
197 }
198 
199 /* SAMPLER VIEWS */
200 
201 static inline enum radeon_bo_priority
si_get_sampler_view_priority(struct r600_resource * res)202 si_get_sampler_view_priority(struct r600_resource *res)
203 {
204 	if (res->b.b.target == PIPE_BUFFER)
205 		return RADEON_PRIO_SAMPLER_BUFFER;
206 
207 	if (res->b.b.nr_samples > 1)
208 		return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
209 
210 	return RADEON_PRIO_SAMPLER_TEXTURE;
211 }
212 
213 static unsigned
si_sampler_and_image_descriptors_idx(unsigned shader)214 si_sampler_and_image_descriptors_idx(unsigned shader)
215 {
216 	return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
217 	       SI_SHADER_DESCS_SAMPLERS_AND_IMAGES;
218 }
219 
220 static struct si_descriptors *
si_sampler_and_image_descriptors(struct si_context * sctx,unsigned shader)221 si_sampler_and_image_descriptors(struct si_context *sctx, unsigned shader)
222 {
223 	return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)];
224 }
225 
si_release_sampler_views(struct si_samplers * samplers)226 static void si_release_sampler_views(struct si_samplers *samplers)
227 {
228 	int i;
229 
230 	for (i = 0; i < ARRAY_SIZE(samplers->views); i++) {
231 		pipe_sampler_view_reference(&samplers->views[i], NULL);
232 	}
233 }
234 
si_sampler_view_add_buffer(struct si_context * sctx,struct pipe_resource * resource,enum radeon_bo_usage usage,bool is_stencil_sampler,bool check_mem)235 static void si_sampler_view_add_buffer(struct si_context *sctx,
236 				       struct pipe_resource *resource,
237 				       enum radeon_bo_usage usage,
238 				       bool is_stencil_sampler,
239 				       bool check_mem)
240 {
241 	struct r600_resource *rres;
242 	struct r600_texture *rtex;
243 	enum radeon_bo_priority priority;
244 
245 	if (!resource)
246 		return;
247 
248 	if (resource->target != PIPE_BUFFER) {
249 		struct r600_texture *tex = (struct r600_texture*)resource;
250 
251 		if (tex->is_depth && !si_can_sample_zs(tex, is_stencil_sampler))
252 			resource = &tex->flushed_depth_texture->resource.b.b;
253 	}
254 
255 	rres = (struct r600_resource*)resource;
256 	priority = si_get_sampler_view_priority(rres);
257 
258 	radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
259 					    rres, usage, priority,
260 					    check_mem);
261 
262 	if (resource->target == PIPE_BUFFER)
263 		return;
264 
265 	/* Now add separate DCC or HTILE. */
266 	rtex = (struct r600_texture*)resource;
267 	if (rtex->dcc_separate_buffer) {
268 		radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
269 						    rtex->dcc_separate_buffer, usage,
270 						    RADEON_PRIO_DCC, check_mem);
271 	}
272 }
273 
si_sampler_views_begin_new_cs(struct si_context * sctx,struct si_samplers * samplers)274 static void si_sampler_views_begin_new_cs(struct si_context *sctx,
275 					  struct si_samplers *samplers)
276 {
277 	unsigned mask = samplers->enabled_mask;
278 
279 	/* Add buffers to the CS. */
280 	while (mask) {
281 		int i = u_bit_scan(&mask);
282 		struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i];
283 
284 		si_sampler_view_add_buffer(sctx, sview->base.texture,
285 					   RADEON_USAGE_READ,
286 					   sview->is_stencil_sampler, false);
287 	}
288 }
289 
290 /* Set buffer descriptor fields that can be changed by reallocations. */
si_set_buf_desc_address(struct r600_resource * buf,uint64_t offset,uint32_t * state)291 static void si_set_buf_desc_address(struct r600_resource *buf,
292 				    uint64_t offset, uint32_t *state)
293 {
294 	uint64_t va = buf->gpu_address + offset;
295 
296 	state[0] = va;
297 	state[1] &= C_008F04_BASE_ADDRESS_HI;
298 	state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32);
299 }
300 
301 /* Set texture descriptor fields that can be changed by reallocations.
302  *
303  * \param tex			texture
304  * \param base_level_info	information of the level of BASE_ADDRESS
305  * \param base_level		the level of BASE_ADDRESS
306  * \param first_level		pipe_sampler_view.u.tex.first_level
307  * \param block_width		util_format_get_blockwidth()
308  * \param is_stencil		select between separate Z & Stencil
309  * \param state			descriptor to update
310  */
si_set_mutable_tex_desc_fields(struct si_screen * sscreen,struct r600_texture * tex,const struct legacy_surf_level * base_level_info,unsigned base_level,unsigned first_level,unsigned block_width,bool is_stencil,uint32_t * state)311 void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
312 				    struct r600_texture *tex,
313 				    const struct legacy_surf_level *base_level_info,
314 				    unsigned base_level, unsigned first_level,
315 				    unsigned block_width, bool is_stencil,
316 				    uint32_t *state)
317 {
318 	uint64_t va, meta_va = 0;
319 
320 	if (tex->is_depth && !si_can_sample_zs(tex, is_stencil)) {
321 		tex = tex->flushed_depth_texture;
322 		is_stencil = false;
323 	}
324 
325 	va = tex->resource.gpu_address;
326 
327 	if (sscreen->info.chip_class >= GFX9) {
328 		/* Only stencil_offset needs to be added here. */
329 		if (is_stencil)
330 			va += tex->surface.u.gfx9.stencil_offset;
331 		else
332 			va += tex->surface.u.gfx9.surf_offset;
333 	} else {
334 		va += base_level_info->offset;
335 	}
336 
337 	state[0] = va >> 8;
338 	state[1] &= C_008F14_BASE_ADDRESS_HI;
339 	state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
340 
341 	/* Only macrotiled modes can set tile swizzle.
342 	 * GFX9 doesn't use (legacy) base_level_info.
343 	 */
344 	if (sscreen->info.chip_class >= GFX9 ||
345 	    base_level_info->mode == RADEON_SURF_MODE_2D)
346 		state[0] |= tex->surface.tile_swizzle;
347 
348 	if (sscreen->info.chip_class >= VI) {
349 		state[6] &= C_008F28_COMPRESSION_EN;
350 		state[7] = 0;
351 
352 		if (vi_dcc_enabled(tex, first_level)) {
353 			meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
354 				  tex->dcc_offset;
355 
356 			if (sscreen->info.chip_class == VI) {
357 				meta_va += base_level_info->dcc_offset;
358 				assert(base_level_info->mode == RADEON_SURF_MODE_2D);
359 			}
360 
361 			meta_va |= (uint32_t)tex->surface.tile_swizzle << 8;
362 		} else if (vi_tc_compat_htile_enabled(tex, first_level)) {
363 			meta_va = tex->resource.gpu_address + tex->htile_offset;
364 		}
365 
366 		if (meta_va) {
367 			state[6] |= S_008F28_COMPRESSION_EN(1);
368 			state[7] = meta_va >> 8;
369 		}
370 	}
371 
372 	if (sscreen->info.chip_class >= GFX9) {
373 		state[3] &= C_008F1C_SW_MODE;
374 		state[4] &= C_008F20_PITCH_GFX9;
375 
376 		if (is_stencil) {
377 			state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.stencil.swizzle_mode);
378 			state[4] |= S_008F20_PITCH_GFX9(tex->surface.u.gfx9.stencil.epitch);
379 		} else {
380 			state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode);
381 			state[4] |= S_008F20_PITCH_GFX9(tex->surface.u.gfx9.surf.epitch);
382 		}
383 
384 		state[5] &= C_008F24_META_DATA_ADDRESS &
385 			    C_008F24_META_PIPE_ALIGNED &
386 			    C_008F24_META_RB_ALIGNED;
387 		if (meta_va) {
388 			struct gfx9_surf_meta_flags meta;
389 
390 			if (tex->dcc_offset)
391 				meta = tex->surface.u.gfx9.dcc;
392 			else
393 				meta = tex->surface.u.gfx9.htile;
394 
395 			state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
396 				    S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
397 				    S_008F24_META_RB_ALIGNED(meta.rb_aligned);
398 		}
399 	} else {
400 		/* SI-CI-VI */
401 		unsigned pitch = base_level_info->nblk_x * block_width;
402 		unsigned index = si_tile_mode_index(tex, base_level, is_stencil);
403 
404 		state[3] &= C_008F1C_TILING_INDEX;
405 		state[3] |= S_008F1C_TILING_INDEX(index);
406 		state[4] &= C_008F20_PITCH_GFX6;
407 		state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
408 	}
409 }
410 
si_set_sampler_state_desc(struct si_sampler_state * sstate,struct si_sampler_view * sview,struct r600_texture * tex,uint32_t * desc)411 static void si_set_sampler_state_desc(struct si_sampler_state *sstate,
412 				      struct si_sampler_view *sview,
413 				      struct r600_texture *tex,
414 				      uint32_t *desc)
415 {
416 	if (sview && sview->is_integer)
417 		memcpy(desc, sstate->integer_val, 4*4);
418 	else if (tex && tex->upgraded_depth &&
419 		 (!sview || !sview->is_stencil_sampler))
420 		memcpy(desc, sstate->upgraded_depth_val, 4*4);
421 	else
422 		memcpy(desc, sstate->val, 4*4);
423 }
424 
si_set_sampler_view_desc(struct si_context * sctx,struct si_sampler_view * sview,struct si_sampler_state * sstate,uint32_t * desc)425 static void si_set_sampler_view_desc(struct si_context *sctx,
426 				     struct si_sampler_view *sview,
427 				     struct si_sampler_state *sstate,
428 				     uint32_t *desc)
429 {
430 	struct pipe_sampler_view *view = &sview->base;
431 	struct r600_texture *rtex = (struct r600_texture *)view->texture;
432 	bool is_buffer = rtex->resource.b.b.target == PIPE_BUFFER;
433 
434 	if (unlikely(!is_buffer && sview->dcc_incompatible)) {
435 		if (vi_dcc_enabled(rtex, view->u.tex.first_level))
436 			if (!si_texture_disable_dcc(&sctx->b, rtex))
437 				sctx->b.decompress_dcc(&sctx->b.b, rtex);
438 
439 		sview->dcc_incompatible = false;
440 	}
441 
442 	assert(rtex); /* views with texture == NULL aren't supported */
443 	memcpy(desc, sview->state, 8*4);
444 
445 	if (is_buffer) {
446 		si_set_buf_desc_address(&rtex->resource,
447 					sview->base.u.buf.offset,
448 					desc + 4);
449 	} else {
450 		bool is_separate_stencil = rtex->db_compatible &&
451 					   sview->is_stencil_sampler;
452 
453 		si_set_mutable_tex_desc_fields(sctx->screen, rtex,
454 					       sview->base_level_info,
455 					       sview->base_level,
456 					       sview->base.u.tex.first_level,
457 					       sview->block_width,
458 					       is_separate_stencil,
459 					       desc);
460 	}
461 
462 	if (!is_buffer && rtex->fmask.size) {
463 		memcpy(desc + 8, sview->fmask_state, 8*4);
464 	} else {
465 		/* Disable FMASK and bind sampler state in [12:15]. */
466 		memcpy(desc + 8, null_texture_descriptor, 4*4);
467 
468 		if (sstate)
469 			si_set_sampler_state_desc(sstate, sview,
470 						  is_buffer ? NULL : rtex,
471 						  desc + 12);
472 	}
473 }
474 
color_needs_decompression(struct r600_texture * rtex)475 static bool color_needs_decompression(struct r600_texture *rtex)
476 {
477 	return rtex->fmask.size ||
478 	       (rtex->dirty_level_mask &&
479 		(rtex->cmask.size || rtex->dcc_offset));
480 }
481 
depth_needs_decompression(struct r600_texture * rtex)482 static bool depth_needs_decompression(struct r600_texture *rtex)
483 {
484 	/* If the depth/stencil texture is TC-compatible, no decompression
485 	 * will be done. The decompression function will only flush DB caches
486 	 * to make it coherent with shaders. That's necessary because the driver
487 	 * doesn't flush DB caches in any other case.
488 	 */
489 	return rtex->db_compatible;
490 }
491 
si_set_sampler_view(struct si_context * sctx,unsigned shader,unsigned slot,struct pipe_sampler_view * view,bool disallow_early_out)492 static void si_set_sampler_view(struct si_context *sctx,
493 				unsigned shader,
494 				unsigned slot, struct pipe_sampler_view *view,
495 				bool disallow_early_out)
496 {
497 	struct si_samplers *samplers = &sctx->samplers[shader];
498 	struct si_sampler_view *rview = (struct si_sampler_view*)view;
499 	struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
500 	unsigned desc_slot = si_get_sampler_slot(slot);
501 	uint32_t *desc = descs->list + desc_slot * 16;
502 
503 	if (samplers->views[slot] == view && !disallow_early_out)
504 		return;
505 
506 	if (view) {
507 		struct r600_texture *rtex = (struct r600_texture *)view->texture;
508 
509 		si_set_sampler_view_desc(sctx, rview,
510 					 samplers->sampler_states[slot], desc);
511 
512 		if (rtex->resource.b.b.target == PIPE_BUFFER) {
513 			rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW;
514 			samplers->needs_depth_decompress_mask &= ~(1u << slot);
515 			samplers->needs_color_decompress_mask &= ~(1u << slot);
516 		} else {
517 			if (depth_needs_decompression(rtex)) {
518 				samplers->needs_depth_decompress_mask |= 1u << slot;
519 			} else {
520 				samplers->needs_depth_decompress_mask &= ~(1u << slot);
521 			}
522 			if (color_needs_decompression(rtex)) {
523 				samplers->needs_color_decompress_mask |= 1u << slot;
524 			} else {
525 				samplers->needs_color_decompress_mask &= ~(1u << slot);
526 			}
527 
528 			if (rtex->dcc_offset &&
529 			    p_atomic_read(&rtex->framebuffers_bound))
530 				sctx->need_check_render_feedback = true;
531 		}
532 
533 		pipe_sampler_view_reference(&samplers->views[slot], view);
534 		samplers->enabled_mask |= 1u << slot;
535 
536 		/* Since this can flush, it must be done after enabled_mask is
537 		 * updated. */
538 		si_sampler_view_add_buffer(sctx, view->texture,
539 					   RADEON_USAGE_READ,
540 					   rview->is_stencil_sampler, true);
541 	} else {
542 		pipe_sampler_view_reference(&samplers->views[slot], NULL);
543 		memcpy(desc, null_texture_descriptor, 8*4);
544 		/* Only clear the lower dwords of FMASK. */
545 		memcpy(desc + 8, null_texture_descriptor, 4*4);
546 		/* Re-set the sampler state if we are transitioning from FMASK. */
547 		if (samplers->sampler_states[slot])
548 			si_set_sampler_state_desc(samplers->sampler_states[slot], NULL, NULL,
549 						  desc + 12);
550 
551 		samplers->enabled_mask &= ~(1u << slot);
552 		samplers->needs_depth_decompress_mask &= ~(1u << slot);
553 		samplers->needs_color_decompress_mask &= ~(1u << slot);
554 	}
555 
556 	sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
557 }
558 
si_update_shader_needs_decompress_mask(struct si_context * sctx,unsigned shader)559 static void si_update_shader_needs_decompress_mask(struct si_context *sctx,
560 						   unsigned shader)
561 {
562 	struct si_samplers *samplers = &sctx->samplers[shader];
563 	unsigned shader_bit = 1 << shader;
564 
565 	if (samplers->needs_depth_decompress_mask ||
566 	    samplers->needs_color_decompress_mask ||
567 	    sctx->images[shader].needs_color_decompress_mask)
568 		sctx->shader_needs_decompress_mask |= shader_bit;
569 	else
570 		sctx->shader_needs_decompress_mask &= ~shader_bit;
571 }
572 
si_set_sampler_views(struct pipe_context * ctx,enum pipe_shader_type shader,unsigned start,unsigned count,struct pipe_sampler_view ** views)573 static void si_set_sampler_views(struct pipe_context *ctx,
574 				 enum pipe_shader_type shader, unsigned start,
575                                  unsigned count,
576 				 struct pipe_sampler_view **views)
577 {
578 	struct si_context *sctx = (struct si_context *)ctx;
579 	int i;
580 
581 	if (!count || shader >= SI_NUM_SHADERS)
582 		return;
583 
584 	if (views) {
585 		for (i = 0; i < count; i++)
586 			si_set_sampler_view(sctx, shader, start + i, views[i], false);
587 	} else {
588 		for (i = 0; i < count; i++)
589 			si_set_sampler_view(sctx, shader, start + i, NULL, false);
590 	}
591 
592 	si_update_shader_needs_decompress_mask(sctx, shader);
593 }
594 
595 static void
si_samplers_update_needs_color_decompress_mask(struct si_samplers * samplers)596 si_samplers_update_needs_color_decompress_mask(struct si_samplers *samplers)
597 {
598 	unsigned mask = samplers->enabled_mask;
599 
600 	while (mask) {
601 		int i = u_bit_scan(&mask);
602 		struct pipe_resource *res = samplers->views[i]->texture;
603 
604 		if (res && res->target != PIPE_BUFFER) {
605 			struct r600_texture *rtex = (struct r600_texture *)res;
606 
607 			if (color_needs_decompression(rtex)) {
608 				samplers->needs_color_decompress_mask |= 1u << i;
609 			} else {
610 				samplers->needs_color_decompress_mask &= ~(1u << i);
611 			}
612 		}
613 	}
614 }
615 
616 /* IMAGE VIEWS */
617 
618 static void
si_release_image_views(struct si_images * images)619 si_release_image_views(struct si_images *images)
620 {
621 	unsigned i;
622 
623 	for (i = 0; i < SI_NUM_IMAGES; ++i) {
624 		struct pipe_image_view *view = &images->views[i];
625 
626 		pipe_resource_reference(&view->resource, NULL);
627 	}
628 }
629 
630 static void
si_image_views_begin_new_cs(struct si_context * sctx,struct si_images * images)631 si_image_views_begin_new_cs(struct si_context *sctx, struct si_images *images)
632 {
633 	uint mask = images->enabled_mask;
634 
635 	/* Add buffers to the CS. */
636 	while (mask) {
637 		int i = u_bit_scan(&mask);
638 		struct pipe_image_view *view = &images->views[i];
639 
640 		assert(view->resource);
641 
642 		si_sampler_view_add_buffer(sctx, view->resource,
643 					   RADEON_USAGE_READWRITE, false, false);
644 	}
645 }
646 
647 static void
si_disable_shader_image(struct si_context * ctx,unsigned shader,unsigned slot)648 si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)
649 {
650 	struct si_images *images = &ctx->images[shader];
651 
652 	if (images->enabled_mask & (1u << slot)) {
653 		struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
654 		unsigned desc_slot = si_get_image_slot(slot);
655 
656 		pipe_resource_reference(&images->views[slot].resource, NULL);
657 		images->needs_color_decompress_mask &= ~(1 << slot);
658 
659 		memcpy(descs->list + desc_slot*8, null_image_descriptor, 8*4);
660 		images->enabled_mask &= ~(1u << slot);
661 		ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
662 	}
663 }
664 
665 static void
si_mark_image_range_valid(const struct pipe_image_view * view)666 si_mark_image_range_valid(const struct pipe_image_view *view)
667 {
668 	struct r600_resource *res = (struct r600_resource *)view->resource;
669 
670 	assert(res && res->b.b.target == PIPE_BUFFER);
671 
672 	util_range_add(&res->valid_buffer_range,
673 		       view->u.buf.offset,
674 		       view->u.buf.offset + view->u.buf.size);
675 }
676 
si_set_shader_image_desc(struct si_context * ctx,const struct pipe_image_view * view,bool skip_decompress,uint32_t * desc)677 static void si_set_shader_image_desc(struct si_context *ctx,
678 				     const struct pipe_image_view *view,
679 				     bool skip_decompress,
680 				     uint32_t *desc)
681 {
682 	struct si_screen *screen = ctx->screen;
683 	struct r600_resource *res;
684 
685 	res = (struct r600_resource *)view->resource;
686 
687 	if (res->b.b.target == PIPE_BUFFER) {
688 		if (view->access & PIPE_IMAGE_ACCESS_WRITE)
689 			si_mark_image_range_valid(view);
690 
691 		si_make_buffer_descriptor(screen, res,
692 					  view->format,
693 					  view->u.buf.offset,
694 					  view->u.buf.size, desc);
695 		si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);
696 	} else {
697 		static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
698 		struct r600_texture *tex = (struct r600_texture *)res;
699 		unsigned level = view->u.tex.level;
700 		unsigned width, height, depth, hw_level;
701 		bool uses_dcc = vi_dcc_enabled(tex, level);
702 		unsigned access = view->access;
703 
704 		/* Clear the write flag when writes can't occur.
705 		 * Note that DCC_DECOMPRESS for MSAA doesn't work in some cases,
706 		 * so we don't wanna trigger it.
707 		 */
708 		if (tex->is_depth || tex->resource.b.b.nr_samples >= 2) {
709 			assert(!"Z/S and MSAA image stores are not supported");
710 			access &= ~PIPE_IMAGE_ACCESS_WRITE;
711 		}
712 
713 		assert(!tex->is_depth);
714 		assert(tex->fmask.size == 0);
715 
716 		if (uses_dcc && !skip_decompress &&
717 		    (view->access & PIPE_IMAGE_ACCESS_WRITE ||
718 		     !vi_dcc_formats_compatible(res->b.b.format, view->format))) {
719 			/* If DCC can't be disabled, at least decompress it.
720 			 * The decompression is relatively cheap if the surface
721 			 * has been decompressed already.
722 			 */
723 			if (!si_texture_disable_dcc(&ctx->b, tex))
724 				ctx->b.decompress_dcc(&ctx->b.b, tex);
725 		}
726 
727 		if (ctx->b.chip_class >= GFX9) {
728 			/* Always set the base address. The swizzle modes don't
729 			 * allow setting mipmap level offsets as the base.
730 			 */
731 			width = res->b.b.width0;
732 			height = res->b.b.height0;
733 			depth = res->b.b.depth0;
734 			hw_level = level;
735 		} else {
736 			/* Always force the base level to the selected level.
737 			 *
738 			 * This is required for 3D textures, where otherwise
739 			 * selecting a single slice for non-layered bindings
740 			 * fails. It doesn't hurt the other targets.
741 			 */
742 			width = u_minify(res->b.b.width0, level);
743 			height = u_minify(res->b.b.height0, level);
744 			depth = u_minify(res->b.b.depth0, level);
745 			hw_level = 0;
746 		}
747 
748 		si_make_texture_descriptor(screen, tex,
749 					   false, res->b.b.target,
750 					   view->format, swizzle,
751 					   hw_level, hw_level,
752 					   view->u.tex.first_layer,
753 					   view->u.tex.last_layer,
754 					   width, height, depth,
755 					   desc, NULL);
756 		si_set_mutable_tex_desc_fields(screen, tex,
757 					       &tex->surface.u.legacy.level[level],
758 					       level, level,
759 					       util_format_get_blockwidth(view->format),
760 					       false, desc);
761 	}
762 }
763 
si_set_shader_image(struct si_context * ctx,unsigned shader,unsigned slot,const struct pipe_image_view * view,bool skip_decompress)764 static void si_set_shader_image(struct si_context *ctx,
765 				unsigned shader,
766 				unsigned slot, const struct pipe_image_view *view,
767 				bool skip_decompress)
768 {
769 	struct si_images *images = &ctx->images[shader];
770 	struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
771 	struct r600_resource *res;
772 	unsigned desc_slot = si_get_image_slot(slot);
773 	uint32_t *desc = descs->list + desc_slot * 8;
774 
775 	if (!view || !view->resource) {
776 		si_disable_shader_image(ctx, shader, slot);
777 		return;
778 	}
779 
780 	res = (struct r600_resource *)view->resource;
781 
782 	if (&images->views[slot] != view)
783 		util_copy_image_view(&images->views[slot], view);
784 
785 	si_set_shader_image_desc(ctx, view, skip_decompress, desc);
786 
787 	if (res->b.b.target == PIPE_BUFFER) {
788 		images->needs_color_decompress_mask &= ~(1 << slot);
789 		res->bind_history |= PIPE_BIND_SHADER_IMAGE;
790 	} else {
791 		struct r600_texture *tex = (struct r600_texture *)res;
792 		unsigned level = view->u.tex.level;
793 
794 		if (color_needs_decompression(tex)) {
795 			images->needs_color_decompress_mask |= 1 << slot;
796 		} else {
797 			images->needs_color_decompress_mask &= ~(1 << slot);
798 		}
799 
800 		if (vi_dcc_enabled(tex, level) &&
801 		    p_atomic_read(&tex->framebuffers_bound))
802 			ctx->need_check_render_feedback = true;
803 	}
804 
805 	images->enabled_mask |= 1u << slot;
806 	ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
807 
808 	/* Since this can flush, it must be done after enabled_mask is updated. */
809 	si_sampler_view_add_buffer(ctx, &res->b.b,
810 				   (view->access & PIPE_IMAGE_ACCESS_WRITE) ?
811 				   RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
812 				   false, true);
813 }
814 
815 static void
si_set_shader_images(struct pipe_context * pipe,enum pipe_shader_type shader,unsigned start_slot,unsigned count,const struct pipe_image_view * views)816 si_set_shader_images(struct pipe_context *pipe,
817 		     enum pipe_shader_type shader,
818 		     unsigned start_slot, unsigned count,
819 		     const struct pipe_image_view *views)
820 {
821 	struct si_context *ctx = (struct si_context *)pipe;
822 	unsigned i, slot;
823 
824 	assert(shader < SI_NUM_SHADERS);
825 
826 	if (!count)
827 		return;
828 
829 	assert(start_slot + count <= SI_NUM_IMAGES);
830 
831 	if (views) {
832 		for (i = 0, slot = start_slot; i < count; ++i, ++slot)
833 			si_set_shader_image(ctx, shader, slot, &views[i], false);
834 	} else {
835 		for (i = 0, slot = start_slot; i < count; ++i, ++slot)
836 			si_set_shader_image(ctx, shader, slot, NULL, false);
837 	}
838 
839 	si_update_shader_needs_decompress_mask(ctx, shader);
840 }
841 
842 static void
si_images_update_needs_color_decompress_mask(struct si_images * images)843 si_images_update_needs_color_decompress_mask(struct si_images *images)
844 {
845 	unsigned mask = images->enabled_mask;
846 
847 	while (mask) {
848 		int i = u_bit_scan(&mask);
849 		struct pipe_resource *res = images->views[i].resource;
850 
851 		if (res && res->target != PIPE_BUFFER) {
852 			struct r600_texture *rtex = (struct r600_texture *)res;
853 
854 			if (color_needs_decompression(rtex)) {
855 				images->needs_color_decompress_mask |= 1 << i;
856 			} else {
857 				images->needs_color_decompress_mask &= ~(1 << i);
858 			}
859 		}
860 	}
861 }
862 
863 /* SAMPLER STATES */
864 
si_bind_sampler_states(struct pipe_context * ctx,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)865 static void si_bind_sampler_states(struct pipe_context *ctx,
866                                    enum pipe_shader_type shader,
867                                    unsigned start, unsigned count, void **states)
868 {
869 	struct si_context *sctx = (struct si_context *)ctx;
870 	struct si_samplers *samplers = &sctx->samplers[shader];
871 	struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);
872 	struct si_sampler_state **sstates = (struct si_sampler_state**)states;
873 	int i;
874 
875 	if (!count || shader >= SI_NUM_SHADERS)
876 		return;
877 
878 	for (i = 0; i < count; i++) {
879 		unsigned slot = start + i;
880 		unsigned desc_slot = si_get_sampler_slot(slot);
881 
882 		if (!sstates[i] ||
883 		    sstates[i] == samplers->sampler_states[slot])
884 			continue;
885 
886 #ifdef DEBUG
887 		assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
888 #endif
889 		samplers->sampler_states[slot] = sstates[i];
890 
891 		/* If FMASK is bound, don't overwrite it.
892 		 * The sampler state will be set after FMASK is unbound.
893 		 */
894 		struct si_sampler_view *sview =
895 			(struct si_sampler_view *)samplers->views[slot];
896 
897 		struct r600_texture *tex = NULL;
898 
899 		if (sview && sview->base.texture &&
900 		    sview->base.texture->target != PIPE_BUFFER)
901 			tex = (struct r600_texture *)sview->base.texture;
902 
903 		if (tex && tex->fmask.size)
904 			continue;
905 
906 		si_set_sampler_state_desc(sstates[i], sview, tex,
907 					  desc->list + desc_slot * 16 + 12);
908 
909 		sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
910 	}
911 }
912 
913 /* BUFFER RESOURCES */
914 
si_init_buffer_resources(struct si_buffer_resources * buffers,struct si_descriptors * descs,unsigned num_buffers,unsigned shader_userdata_index,enum radeon_bo_usage shader_usage,enum radeon_bo_usage shader_usage_constbuf,enum radeon_bo_priority priority,enum radeon_bo_priority priority_constbuf)915 static void si_init_buffer_resources(struct si_buffer_resources *buffers,
916 				     struct si_descriptors *descs,
917 				     unsigned num_buffers,
918 				     unsigned shader_userdata_index,
919 				     enum radeon_bo_usage shader_usage,
920 				     enum radeon_bo_usage shader_usage_constbuf,
921 				     enum radeon_bo_priority priority,
922 				     enum radeon_bo_priority priority_constbuf)
923 {
924 	buffers->shader_usage = shader_usage;
925 	buffers->shader_usage_constbuf = shader_usage_constbuf;
926 	buffers->priority = priority;
927 	buffers->priority_constbuf = priority_constbuf;
928 	buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
929 
930 	si_init_descriptors(descs, shader_userdata_index, 4, num_buffers);
931 }
932 
si_release_buffer_resources(struct si_buffer_resources * buffers,struct si_descriptors * descs)933 static void si_release_buffer_resources(struct si_buffer_resources *buffers,
934 					struct si_descriptors *descs)
935 {
936 	int i;
937 
938 	for (i = 0; i < descs->num_elements; i++) {
939 		pipe_resource_reference(&buffers->buffers[i], NULL);
940 	}
941 
942 	FREE(buffers->buffers);
943 }
944 
si_buffer_resources_begin_new_cs(struct si_context * sctx,struct si_buffer_resources * buffers)945 static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
946 					     struct si_buffer_resources *buffers)
947 {
948 	unsigned mask = buffers->enabled_mask;
949 
950 	/* Add buffers to the CS. */
951 	while (mask) {
952 		int i = u_bit_scan(&mask);
953 
954 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
955 			r600_resource(buffers->buffers[i]),
956 			i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage :
957 						    buffers->shader_usage_constbuf,
958 			i < SI_NUM_SHADER_BUFFERS ? buffers->priority :
959 						    buffers->priority_constbuf);
960 	}
961 }
962 
si_get_buffer_from_descriptors(struct si_buffer_resources * buffers,struct si_descriptors * descs,unsigned idx,struct pipe_resource ** buf,unsigned * offset,unsigned * size)963 static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
964 					   struct si_descriptors *descs,
965 					   unsigned idx, struct pipe_resource **buf,
966 					   unsigned *offset, unsigned *size)
967 {
968 	pipe_resource_reference(buf, buffers->buffers[idx]);
969 	if (*buf) {
970 		struct r600_resource *res = r600_resource(*buf);
971 		const uint32_t *desc = descs->list + idx * 4;
972 		uint64_t va;
973 
974 		*size = desc[2];
975 
976 		assert(G_008F04_STRIDE(desc[1]) == 0);
977 		va = ((uint64_t)desc[1] << 32) | desc[0];
978 
979 		assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size);
980 		*offset = va - res->gpu_address;
981 	}
982 }
983 
984 /* VERTEX BUFFERS */
985 
si_vertex_buffers_begin_new_cs(struct si_context * sctx)986 static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
987 {
988 	struct si_descriptors *desc = &sctx->vertex_buffers;
989 	int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0;
990 	int i;
991 
992 	for (i = 0; i < count; i++) {
993 		int vb = sctx->vertex_elements->vertex_buffer_index[i];
994 
995 		if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
996 			continue;
997 		if (!sctx->vertex_buffer[vb].buffer.resource)
998 			continue;
999 
1000 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1001 				      (struct r600_resource*)sctx->vertex_buffer[vb].buffer.resource,
1002 				      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
1003 	}
1004 
1005 	if (!desc->buffer)
1006 		return;
1007 	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1008 			      desc->buffer, RADEON_USAGE_READ,
1009 			      RADEON_PRIO_DESCRIPTORS);
1010 }
1011 
si_upload_vertex_buffer_descriptors(struct si_context * sctx)1012 bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
1013 {
1014 	struct si_vertex_elements *velems = sctx->vertex_elements;
1015 	struct si_descriptors *desc = &sctx->vertex_buffers;
1016 	unsigned i, count;
1017 	unsigned desc_list_byte_size;
1018 	unsigned first_vb_use_mask;
1019 	uint32_t *ptr;
1020 
1021 	if (!sctx->vertex_buffers_dirty || !velems)
1022 		return true;
1023 
1024 	count = velems->count;
1025 
1026 	if (!count)
1027 		return true;
1028 
1029 	desc_list_byte_size = velems->desc_list_byte_size;
1030 	first_vb_use_mask = velems->first_vb_use_mask;
1031 
1032 	/* Vertex buffer descriptors are the only ones which are uploaded
1033 	 * directly through a staging buffer and don't go through
1034 	 * the fine-grained upload path.
1035 	 */
1036 	unsigned buffer_offset = 0;
1037 	u_upload_alloc(sctx->b.b.const_uploader, 0,
1038 		       desc_list_byte_size,
1039 		       si_optimal_tcc_alignment(sctx, desc_list_byte_size),
1040 		       &buffer_offset,
1041 		       (struct pipe_resource**)&desc->buffer, (void**)&ptr);
1042 	if (!desc->buffer) {
1043 		desc->gpu_address = 0;
1044 		return false;
1045 	}
1046 
1047 	desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
1048 	desc->list = ptr;
1049 	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1050 			      desc->buffer, RADEON_USAGE_READ,
1051 			      RADEON_PRIO_DESCRIPTORS);
1052 
1053 	assert(count <= SI_MAX_ATTRIBS);
1054 
1055 	for (i = 0; i < count; i++) {
1056 		struct pipe_vertex_buffer *vb;
1057 		struct r600_resource *rbuffer;
1058 		unsigned vbo_index = velems->vertex_buffer_index[i];
1059 		uint32_t *desc = &ptr[i*4];
1060 
1061 		vb = &sctx->vertex_buffer[vbo_index];
1062 		rbuffer = (struct r600_resource*)vb->buffer.resource;
1063 		if (!rbuffer) {
1064 			memset(desc, 0, 16);
1065 			continue;
1066 		}
1067 
1068 		int64_t offset = (int64_t)((int)vb->buffer_offset) +
1069 				 velems->src_offset[i];
1070 		uint64_t va = rbuffer->gpu_address + offset;
1071 
1072 		int64_t num_records = (int64_t)rbuffer->b.b.width0 - offset;
1073 		if (sctx->b.chip_class != VI && vb->stride) {
1074 			/* Round up by rounding down and adding 1 */
1075 			num_records = (num_records - velems->format_size[i]) /
1076 				      vb->stride + 1;
1077 		}
1078 		assert(num_records >= 0 && num_records <= UINT_MAX);
1079 
1080 		desc[0] = va;
1081 		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1082 			  S_008F04_STRIDE(vb->stride);
1083 		desc[2] = num_records;
1084 		desc[3] = velems->rsrc_word3[i];
1085 
1086 		if (first_vb_use_mask & (1 << i)) {
1087 			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1088 					      (struct r600_resource*)vb->buffer.resource,
1089 					      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
1090 		}
1091 	}
1092 
1093 	/* Don't flush the const cache. It would have a very negative effect
1094 	 * on performance (confirmed by testing). New descriptors are always
1095 	 * uploaded to a fresh new buffer, so I don't think flushing the const
1096 	 * cache is needed. */
1097 	si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
1098 	sctx->vertex_buffers_dirty = false;
1099 	sctx->vertex_buffer_pointer_dirty = true;
1100 	sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
1101 	return true;
1102 }
1103 
1104 
1105 /* CONSTANT BUFFERS */
1106 
1107 static unsigned
si_const_and_shader_buffer_descriptors_idx(unsigned shader)1108 si_const_and_shader_buffer_descriptors_idx(unsigned shader)
1109 {
1110 	return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
1111 	       SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS;
1112 }
1113 
1114 static struct si_descriptors *
si_const_and_shader_buffer_descriptors(struct si_context * sctx,unsigned shader)1115 si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned shader)
1116 {
1117 	return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
1118 }
1119 
si_upload_const_buffer(struct si_context * sctx,struct r600_resource ** rbuffer,const uint8_t * ptr,unsigned size,uint32_t * const_offset)1120 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
1121 			    const uint8_t *ptr, unsigned size, uint32_t *const_offset)
1122 {
1123 	void *tmp;
1124 
1125 	u_upload_alloc(sctx->b.b.const_uploader, 0, size,
1126 		       si_optimal_tcc_alignment(sctx, size),
1127 		       const_offset,
1128 		       (struct pipe_resource**)rbuffer, &tmp);
1129 	if (*rbuffer)
1130 		util_memcpy_cpu_to_le32(tmp, ptr, size);
1131 }
1132 
si_set_constant_buffer(struct si_context * sctx,struct si_buffer_resources * buffers,unsigned descriptors_idx,uint slot,const struct pipe_constant_buffer * input)1133 static void si_set_constant_buffer(struct si_context *sctx,
1134 				   struct si_buffer_resources *buffers,
1135 				   unsigned descriptors_idx,
1136 				   uint slot, const struct pipe_constant_buffer *input)
1137 {
1138 	struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1139 	assert(slot < descs->num_elements);
1140 	pipe_resource_reference(&buffers->buffers[slot], NULL);
1141 
1142 	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
1143 	 * with a NULL buffer). We need to use a dummy buffer instead. */
1144 	if (sctx->b.chip_class == CIK &&
1145 	    (!input || (!input->buffer && !input->user_buffer)))
1146 		input = &sctx->null_const_buf;
1147 
1148 	if (input && (input->buffer || input->user_buffer)) {
1149 		struct pipe_resource *buffer = NULL;
1150 		uint64_t va;
1151 
1152 		/* Upload the user buffer if needed. */
1153 		if (input->user_buffer) {
1154 			unsigned buffer_offset;
1155 
1156 			si_upload_const_buffer(sctx,
1157 					       (struct r600_resource**)&buffer, input->user_buffer,
1158 					       input->buffer_size, &buffer_offset);
1159 			if (!buffer) {
1160 				/* Just unbind on failure. */
1161 				si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL);
1162 				return;
1163 			}
1164 			va = r600_resource(buffer)->gpu_address + buffer_offset;
1165 		} else {
1166 			pipe_resource_reference(&buffer, input->buffer);
1167 			va = r600_resource(buffer)->gpu_address + input->buffer_offset;
1168 			/* Only track usage for non-user buffers. */
1169 			r600_resource(buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER;
1170 		}
1171 
1172 		/* Set the descriptor. */
1173 		uint32_t *desc = descs->list + slot*4;
1174 		desc[0] = va;
1175 		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1176 			  S_008F04_STRIDE(0);
1177 		desc[2] = input->buffer_size;
1178 		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1179 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1180 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1181 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1182 			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1183 			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1184 
1185 		buffers->buffers[slot] = buffer;
1186 		radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1187 						    (struct r600_resource*)buffer,
1188 						    buffers->shader_usage_constbuf,
1189 						    buffers->priority_constbuf, true);
1190 		buffers->enabled_mask |= 1u << slot;
1191 	} else {
1192 		/* Clear the descriptor. */
1193 		memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1194 		buffers->enabled_mask &= ~(1u << slot);
1195 	}
1196 
1197 	sctx->descriptors_dirty |= 1u << descriptors_idx;
1198 }
1199 
si_set_rw_buffer(struct si_context * sctx,uint slot,const struct pipe_constant_buffer * input)1200 void si_set_rw_buffer(struct si_context *sctx,
1201 		      uint slot, const struct pipe_constant_buffer *input)
1202 {
1203 	si_set_constant_buffer(sctx, &sctx->rw_buffers,
1204 			                        SI_DESCS_RW_BUFFERS, slot, input);
1205 }
1206 
si_pipe_set_constant_buffer(struct pipe_context * ctx,enum pipe_shader_type shader,uint slot,const struct pipe_constant_buffer * input)1207 static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
1208 					enum pipe_shader_type shader, uint slot,
1209 					const struct pipe_constant_buffer *input)
1210 {
1211 	struct si_context *sctx = (struct si_context *)ctx;
1212 
1213 	if (shader >= SI_NUM_SHADERS)
1214 		return;
1215 
1216 	slot = si_get_constbuf_slot(slot);
1217 	si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader],
1218 			       si_const_and_shader_buffer_descriptors_idx(shader),
1219 			       slot, input);
1220 }
1221 
si_get_pipe_constant_buffer(struct si_context * sctx,uint shader,uint slot,struct pipe_constant_buffer * cbuf)1222 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
1223 				 uint slot, struct pipe_constant_buffer *cbuf)
1224 {
1225 	cbuf->user_buffer = NULL;
1226 	si_get_buffer_from_descriptors(
1227 		&sctx->const_and_shader_buffers[shader],
1228 		si_const_and_shader_buffer_descriptors(sctx, shader),
1229 		si_get_constbuf_slot(slot),
1230 		&cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
1231 }
1232 
1233 /* SHADER BUFFERS */
1234 
si_set_shader_buffers(struct pipe_context * ctx,enum pipe_shader_type shader,unsigned start_slot,unsigned count,const struct pipe_shader_buffer * sbuffers)1235 static void si_set_shader_buffers(struct pipe_context *ctx,
1236 				  enum pipe_shader_type shader,
1237 				  unsigned start_slot, unsigned count,
1238 				  const struct pipe_shader_buffer *sbuffers)
1239 {
1240 	struct si_context *sctx = (struct si_context *)ctx;
1241 	struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
1242 	struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
1243 	unsigned i;
1244 
1245 	assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
1246 
1247 	for (i = 0; i < count; ++i) {
1248 		const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
1249 		struct r600_resource *buf;
1250 		unsigned slot = si_get_shaderbuf_slot(start_slot + i);
1251 		uint32_t *desc = descs->list + slot * 4;
1252 		uint64_t va;
1253 
1254 		if (!sbuffer || !sbuffer->buffer) {
1255 			pipe_resource_reference(&buffers->buffers[slot], NULL);
1256 			memset(desc, 0, sizeof(uint32_t) * 4);
1257 			buffers->enabled_mask &= ~(1u << slot);
1258 			sctx->descriptors_dirty |=
1259 				1u << si_const_and_shader_buffer_descriptors_idx(shader);
1260 			continue;
1261 		}
1262 
1263 		buf = (struct r600_resource *)sbuffer->buffer;
1264 		va = buf->gpu_address + sbuffer->buffer_offset;
1265 
1266 		desc[0] = va;
1267 		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1268 			  S_008F04_STRIDE(0);
1269 		desc[2] = sbuffer->buffer_size;
1270 		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1271 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1272 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1273 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1274 			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1275 			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1276 
1277 		pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
1278 		radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf,
1279 						    buffers->shader_usage,
1280 						    buffers->priority, true);
1281 		buf->bind_history |= PIPE_BIND_SHADER_BUFFER;
1282 
1283 		buffers->enabled_mask |= 1u << slot;
1284 		sctx->descriptors_dirty |=
1285 			1u << si_const_and_shader_buffer_descriptors_idx(shader);
1286 
1287 		util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
1288 			       sbuffer->buffer_offset + sbuffer->buffer_size);
1289 	}
1290 }
1291 
si_get_shader_buffers(struct si_context * sctx,enum pipe_shader_type shader,uint start_slot,uint count,struct pipe_shader_buffer * sbuf)1292 void si_get_shader_buffers(struct si_context *sctx,
1293 			   enum pipe_shader_type shader,
1294 			   uint start_slot, uint count,
1295 			   struct pipe_shader_buffer *sbuf)
1296 {
1297 	struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
1298 	struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
1299 
1300 	for (unsigned i = 0; i < count; ++i) {
1301 		si_get_buffer_from_descriptors(
1302 			buffers, descs,
1303 			si_get_shaderbuf_slot(start_slot + i),
1304 			&sbuf[i].buffer, &sbuf[i].buffer_offset,
1305 			&sbuf[i].buffer_size);
1306 	}
1307 }
1308 
1309 /* RING BUFFERS */
1310 
si_set_ring_buffer(struct pipe_context * ctx,uint slot,struct pipe_resource * buffer,unsigned stride,unsigned num_records,bool add_tid,bool swizzle,unsigned element_size,unsigned index_stride,uint64_t offset)1311 void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
1312 			struct pipe_resource *buffer,
1313 			unsigned stride, unsigned num_records,
1314 			bool add_tid, bool swizzle,
1315 			unsigned element_size, unsigned index_stride, uint64_t offset)
1316 {
1317 	struct si_context *sctx = (struct si_context *)ctx;
1318 	struct si_buffer_resources *buffers = &sctx->rw_buffers;
1319 	struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1320 
1321 	/* The stride field in the resource descriptor has 14 bits */
1322 	assert(stride < (1 << 14));
1323 
1324 	assert(slot < descs->num_elements);
1325 	pipe_resource_reference(&buffers->buffers[slot], NULL);
1326 
1327 	if (buffer) {
1328 		uint64_t va;
1329 
1330 		va = r600_resource(buffer)->gpu_address + offset;
1331 
1332 		switch (element_size) {
1333 		default:
1334 			assert(!"Unsupported ring buffer element size");
1335 		case 0:
1336 		case 2:
1337 			element_size = 0;
1338 			break;
1339 		case 4:
1340 			element_size = 1;
1341 			break;
1342 		case 8:
1343 			element_size = 2;
1344 			break;
1345 		case 16:
1346 			element_size = 3;
1347 			break;
1348 		}
1349 
1350 		switch (index_stride) {
1351 		default:
1352 			assert(!"Unsupported ring buffer index stride");
1353 		case 0:
1354 		case 8:
1355 			index_stride = 0;
1356 			break;
1357 		case 16:
1358 			index_stride = 1;
1359 			break;
1360 		case 32:
1361 			index_stride = 2;
1362 			break;
1363 		case 64:
1364 			index_stride = 3;
1365 			break;
1366 		}
1367 
1368 		if (sctx->b.chip_class >= VI && stride)
1369 			num_records *= stride;
1370 
1371 		/* Set the descriptor. */
1372 		uint32_t *desc = descs->list + slot*4;
1373 		desc[0] = va;
1374 		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1375 			  S_008F04_STRIDE(stride) |
1376 			  S_008F04_SWIZZLE_ENABLE(swizzle);
1377 		desc[2] = num_records;
1378 		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1379 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1380 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1381 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1382 			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1383 			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1384 			  S_008F0C_INDEX_STRIDE(index_stride) |
1385 			  S_008F0C_ADD_TID_ENABLE(add_tid);
1386 
1387 		if (sctx->b.chip_class >= GFX9)
1388 			assert(!swizzle || element_size == 1); /* always 4 bytes on GFX9 */
1389 		else
1390 			desc[3] |= S_008F0C_ELEMENT_SIZE(element_size);
1391 
1392 		pipe_resource_reference(&buffers->buffers[slot], buffer);
1393 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1394 				      (struct r600_resource*)buffer,
1395 				      buffers->shader_usage, buffers->priority);
1396 		buffers->enabled_mask |= 1u << slot;
1397 	} else {
1398 		/* Clear the descriptor. */
1399 		memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1400 		buffers->enabled_mask &= ~(1u << slot);
1401 	}
1402 
1403 	sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1404 }
1405 
si_desc_reset_buffer_offset(struct pipe_context * ctx,uint32_t * desc,uint64_t old_buf_va,struct pipe_resource * new_buf)1406 static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
1407 					uint32_t *desc, uint64_t old_buf_va,
1408 					struct pipe_resource *new_buf)
1409 {
1410 	/* Retrieve the buffer offset from the descriptor. */
1411 	uint64_t old_desc_va = si_desc_extract_buffer_address(desc);
1412 
1413 	assert(old_buf_va <= old_desc_va);
1414 	uint64_t offset_within_buffer = old_desc_va - old_buf_va;
1415 
1416 	/* Update the descriptor. */
1417 	si_set_buf_desc_address(r600_resource(new_buf), offset_within_buffer,
1418 				desc);
1419 }
1420 
1421 /* INTERNAL CONST BUFFERS */
1422 
si_set_polygon_stipple(struct pipe_context * ctx,const struct pipe_poly_stipple * state)1423 static void si_set_polygon_stipple(struct pipe_context *ctx,
1424 				   const struct pipe_poly_stipple *state)
1425 {
1426 	struct si_context *sctx = (struct si_context *)ctx;
1427 	struct pipe_constant_buffer cb = {};
1428 	unsigned stipple[32];
1429 	int i;
1430 
1431 	for (i = 0; i < 32; i++)
1432 		stipple[i] = util_bitreverse(state->stipple[i]);
1433 
1434 	cb.user_buffer = stipple;
1435 	cb.buffer_size = sizeof(stipple);
1436 
1437 	si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);
1438 }
1439 
1440 /* TEXTURE METADATA ENABLE/DISABLE */
1441 
1442 static void
si_resident_handles_update_needs_color_decompress(struct si_context * sctx)1443 si_resident_handles_update_needs_color_decompress(struct si_context *sctx)
1444 {
1445 	util_dynarray_clear(&sctx->resident_tex_needs_color_decompress);
1446 	util_dynarray_clear(&sctx->resident_img_needs_color_decompress);
1447 
1448 	util_dynarray_foreach(&sctx->resident_tex_handles,
1449 			      struct si_texture_handle *, tex_handle) {
1450 		struct pipe_resource *res = (*tex_handle)->view->texture;
1451 		struct r600_texture *rtex;
1452 
1453 		if (!res || res->target == PIPE_BUFFER)
1454 			continue;
1455 
1456 		rtex = (struct r600_texture *)res;
1457 		if (!color_needs_decompression(rtex))
1458 			continue;
1459 
1460 		util_dynarray_append(&sctx->resident_tex_needs_color_decompress,
1461 				     struct si_texture_handle *, *tex_handle);
1462 	}
1463 
1464 	util_dynarray_foreach(&sctx->resident_img_handles,
1465 			      struct si_image_handle *, img_handle) {
1466 		struct pipe_image_view *view = &(*img_handle)->view;
1467 		struct pipe_resource *res = view->resource;
1468 		struct r600_texture *rtex;
1469 
1470 		if (!res || res->target == PIPE_BUFFER)
1471 			continue;
1472 
1473 		rtex = (struct r600_texture *)res;
1474 		if (!color_needs_decompression(rtex))
1475 			continue;
1476 
1477 		util_dynarray_append(&sctx->resident_img_needs_color_decompress,
1478 				     struct si_image_handle *, *img_handle);
1479 	}
1480 }
1481 
1482 /* CMASK can be enabled (for fast clear) and disabled (for texture export)
1483  * while the texture is bound, possibly by a different context. In that case,
1484  * call this function to update needs_*_decompress_masks.
1485  */
si_update_needs_color_decompress_masks(struct si_context * sctx)1486 void si_update_needs_color_decompress_masks(struct si_context *sctx)
1487 {
1488 	for (int i = 0; i < SI_NUM_SHADERS; ++i) {
1489 		si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]);
1490 		si_images_update_needs_color_decompress_mask(&sctx->images[i]);
1491 		si_update_shader_needs_decompress_mask(sctx, i);
1492 	}
1493 
1494 	si_resident_handles_update_needs_color_decompress(sctx);
1495 }
1496 
1497 /* BUFFER DISCARD/INVALIDATION */
1498 
1499 /** Reset descriptors of buffer resources after \p buf has been invalidated. */
si_reset_buffer_resources(struct si_context * sctx,struct si_buffer_resources * buffers,unsigned descriptors_idx,unsigned slot_mask,struct pipe_resource * buf,uint64_t old_va,enum radeon_bo_usage usage,enum radeon_bo_priority priority)1500 static void si_reset_buffer_resources(struct si_context *sctx,
1501 				      struct si_buffer_resources *buffers,
1502 				      unsigned descriptors_idx,
1503 				      unsigned slot_mask,
1504 				      struct pipe_resource *buf,
1505 				      uint64_t old_va,
1506 				      enum radeon_bo_usage usage,
1507 				      enum radeon_bo_priority priority)
1508 {
1509 	struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1510 	unsigned mask = buffers->enabled_mask & slot_mask;
1511 
1512 	while (mask) {
1513 		unsigned i = u_bit_scan(&mask);
1514 		if (buffers->buffers[i] == buf) {
1515 			si_desc_reset_buffer_offset(&sctx->b.b,
1516 						    descs->list + i*4,
1517 						    old_va, buf);
1518 			sctx->descriptors_dirty |= 1u << descriptors_idx;
1519 
1520 			radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1521 							    (struct r600_resource *)buf,
1522 							    usage, priority, true);
1523 		}
1524 	}
1525 }
1526 
si_rebind_buffer(struct pipe_context * ctx,struct pipe_resource * buf,uint64_t old_va)1527 static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf,
1528 			     uint64_t old_va)
1529 {
1530 	struct si_context *sctx = (struct si_context*)ctx;
1531 	struct r600_resource *rbuffer = r600_resource(buf);
1532 	unsigned i, shader;
1533 	unsigned num_elems = sctx->vertex_elements ?
1534 				       sctx->vertex_elements->count : 0;
1535 
1536 	/* We changed the buffer, now we need to bind it where the old one
1537 	 * was bound. This consists of 2 things:
1538 	 *   1) Updating the resource descriptor and dirtying it.
1539 	 *   2) Adding a relocation to the CS, so that it's usable.
1540 	 */
1541 
1542 	/* Vertex buffers. */
1543 	if (rbuffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
1544 		for (i = 0; i < num_elems; i++) {
1545 			int vb = sctx->vertex_elements->vertex_buffer_index[i];
1546 
1547 			if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
1548 				continue;
1549 			if (!sctx->vertex_buffer[vb].buffer.resource)
1550 				continue;
1551 
1552 			if (sctx->vertex_buffer[vb].buffer.resource == buf) {
1553 				sctx->vertex_buffers_dirty = true;
1554 				break;
1555 			}
1556 		}
1557 	}
1558 
1559 	/* Streamout buffers. (other internal buffers can't be invalidated) */
1560 	if (rbuffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
1561 		for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
1562 			struct si_buffer_resources *buffers = &sctx->rw_buffers;
1563 			struct si_descriptors *descs =
1564 				&sctx->descriptors[SI_DESCS_RW_BUFFERS];
1565 
1566 			if (buffers->buffers[i] != buf)
1567 				continue;
1568 
1569 			si_desc_reset_buffer_offset(ctx, descs->list + i*4,
1570 						    old_va, buf);
1571 			sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1572 
1573 			radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1574 							    rbuffer, buffers->shader_usage,
1575 							    RADEON_PRIO_SHADER_RW_BUFFER,
1576 							    true);
1577 
1578 			/* Update the streamout state. */
1579 			if (sctx->streamout.begin_emitted)
1580 				si_emit_streamout_end(sctx);
1581 			sctx->streamout.append_bitmask =
1582 					sctx->streamout.enabled_mask;
1583 			si_streamout_buffers_dirty(sctx);
1584 		}
1585 	}
1586 
1587 	/* Constant and shader buffers. */
1588 	if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
1589 		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1590 			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
1591 						  si_const_and_shader_buffer_descriptors_idx(shader),
1592 						  u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
1593 						  buf, old_va,
1594 						  sctx->const_and_shader_buffers[shader].shader_usage_constbuf,
1595 						  sctx->const_and_shader_buffers[shader].priority_constbuf);
1596 	}
1597 
1598 	if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
1599 		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1600 			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
1601 						  si_const_and_shader_buffer_descriptors_idx(shader),
1602 						  u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
1603 						  buf, old_va,
1604 						  sctx->const_and_shader_buffers[shader].shader_usage,
1605 						  sctx->const_and_shader_buffers[shader].priority);
1606 	}
1607 
1608 	if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
1609 		/* Texture buffers - update bindings. */
1610 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1611 			struct si_samplers *samplers = &sctx->samplers[shader];
1612 			struct si_descriptors *descs =
1613 				si_sampler_and_image_descriptors(sctx, shader);
1614 			unsigned mask = samplers->enabled_mask;
1615 
1616 			while (mask) {
1617 				unsigned i = u_bit_scan(&mask);
1618 				if (samplers->views[i]->texture == buf) {
1619 					unsigned desc_slot = si_get_sampler_slot(i);
1620 
1621 					si_desc_reset_buffer_offset(ctx,
1622 								    descs->list +
1623 								    desc_slot * 16 + 4,
1624 								    old_va, buf);
1625 					sctx->descriptors_dirty |=
1626 						1u << si_sampler_and_image_descriptors_idx(shader);
1627 
1628 					radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1629 									    rbuffer, RADEON_USAGE_READ,
1630 									    RADEON_PRIO_SAMPLER_BUFFER,
1631 									    true);
1632 				}
1633 			}
1634 		}
1635 	}
1636 
1637 	/* Shader images */
1638 	if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
1639 		for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
1640 			struct si_images *images = &sctx->images[shader];
1641 			struct si_descriptors *descs =
1642 				si_sampler_and_image_descriptors(sctx, shader);
1643 			unsigned mask = images->enabled_mask;
1644 
1645 			while (mask) {
1646 				unsigned i = u_bit_scan(&mask);
1647 
1648 				if (images->views[i].resource == buf) {
1649 					unsigned desc_slot = si_get_image_slot(i);
1650 
1651 					if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
1652 						si_mark_image_range_valid(&images->views[i]);
1653 
1654 					si_desc_reset_buffer_offset(
1655 						ctx, descs->list + desc_slot * 8 + 4,
1656 						old_va, buf);
1657 					sctx->descriptors_dirty |=
1658 						1u << si_sampler_and_image_descriptors_idx(shader);
1659 
1660 					radeon_add_to_buffer_list_check_mem(
1661 						&sctx->b, &sctx->b.gfx, rbuffer,
1662 						RADEON_USAGE_READWRITE,
1663 						RADEON_PRIO_SAMPLER_BUFFER, true);
1664 				}
1665 			}
1666 		}
1667 	}
1668 
1669 	/* Bindless texture handles */
1670 	if (rbuffer->texture_handle_allocated) {
1671 		struct si_descriptors *descs = &sctx->bindless_descriptors;
1672 
1673 		util_dynarray_foreach(&sctx->resident_tex_handles,
1674 				      struct si_texture_handle *, tex_handle) {
1675 			struct pipe_sampler_view *view = (*tex_handle)->view;
1676 			unsigned desc_slot = (*tex_handle)->desc_slot;
1677 
1678 			if (view->texture == buf) {
1679 				si_set_buf_desc_address(rbuffer,
1680 							view->u.buf.offset,
1681 							descs->list +
1682 							desc_slot * 16 + 4);
1683 
1684 				(*tex_handle)->desc_dirty = true;
1685 				sctx->bindless_descriptors_dirty = true;
1686 
1687 				radeon_add_to_buffer_list_check_mem(
1688 					&sctx->b, &sctx->b.gfx, rbuffer,
1689 					RADEON_USAGE_READ,
1690 					RADEON_PRIO_SAMPLER_BUFFER, true);
1691 			}
1692 		}
1693 	}
1694 
1695 	/* Bindless image handles */
1696 	if (rbuffer->image_handle_allocated) {
1697 		struct si_descriptors *descs = &sctx->bindless_descriptors;
1698 
1699 		util_dynarray_foreach(&sctx->resident_img_handles,
1700 				      struct si_image_handle *, img_handle) {
1701 			struct pipe_image_view *view = &(*img_handle)->view;
1702 			unsigned desc_slot = (*img_handle)->desc_slot;
1703 
1704 			if (view->resource == buf) {
1705 				if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1706 					si_mark_image_range_valid(view);
1707 
1708 				si_set_buf_desc_address(rbuffer,
1709 							view->u.buf.offset,
1710 							descs->list +
1711 							desc_slot * 16 + 4);
1712 
1713 				(*img_handle)->desc_dirty = true;
1714 				sctx->bindless_descriptors_dirty = true;
1715 
1716 				radeon_add_to_buffer_list_check_mem(
1717 					&sctx->b, &sctx->b.gfx, rbuffer,
1718 					RADEON_USAGE_READWRITE,
1719 					RADEON_PRIO_SAMPLER_BUFFER, true);
1720 			}
1721 		}
1722 	}
1723 }
1724 
1725 /* Reallocate a buffer a update all resource bindings where the buffer is
1726  * bound.
1727  *
1728  * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
1729  * idle by discarding its contents. Apps usually tell us when to do this using
1730  * map_buffer flags, for example.
1731  */
si_invalidate_buffer(struct pipe_context * ctx,struct pipe_resource * buf)1732 static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
1733 {
1734 	struct si_context *sctx = (struct si_context*)ctx;
1735 	struct r600_resource *rbuffer = r600_resource(buf);
1736 	uint64_t old_va = rbuffer->gpu_address;
1737 
1738 	/* Reallocate the buffer in the same pipe_resource. */
1739 	si_alloc_resource(sctx->screen, rbuffer);
1740 
1741 	si_rebind_buffer(ctx, buf, old_va);
1742 }
1743 
si_upload_bindless_descriptor(struct si_context * sctx,unsigned desc_slot,unsigned num_dwords)1744 static void si_upload_bindless_descriptor(struct si_context *sctx,
1745 					  unsigned desc_slot,
1746 					  unsigned num_dwords)
1747 {
1748 	struct si_descriptors *desc = &sctx->bindless_descriptors;
1749 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1750 	unsigned desc_slot_offset = desc_slot * 16;
1751 	uint32_t *data;
1752 	uint64_t va;
1753 
1754 	data = desc->list + desc_slot_offset;
1755 	va = desc->gpu_address + desc_slot_offset * 4;
1756 
1757 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0));
1758 	radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) |
1759 		    S_370_WR_CONFIRM(1) |
1760 		    S_370_ENGINE_SEL(V_370_ME));
1761 	radeon_emit(cs, va);
1762 	radeon_emit(cs, va >> 32);
1763 	radeon_emit_array(cs, data, num_dwords);
1764 }
1765 
si_upload_bindless_descriptors(struct si_context * sctx)1766 static void si_upload_bindless_descriptors(struct si_context *sctx)
1767 {
1768 	if (!sctx->bindless_descriptors_dirty)
1769 		return;
1770 
1771 	/* Wait for graphics/compute to be idle before updating the resident
1772 	 * descriptors directly in memory, in case the GPU is using them.
1773 	 */
1774 	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
1775 			 SI_CONTEXT_CS_PARTIAL_FLUSH;
1776 	si_emit_cache_flush(sctx);
1777 
1778 	util_dynarray_foreach(&sctx->resident_tex_handles,
1779 			      struct si_texture_handle *, tex_handle) {
1780 		unsigned desc_slot = (*tex_handle)->desc_slot;
1781 
1782 		if (!(*tex_handle)->desc_dirty)
1783 			continue;
1784 
1785 		si_upload_bindless_descriptor(sctx, desc_slot, 16);
1786 		(*tex_handle)->desc_dirty = false;
1787 	}
1788 
1789 	util_dynarray_foreach(&sctx->resident_img_handles,
1790 			      struct si_image_handle *, img_handle) {
1791 		unsigned desc_slot = (*img_handle)->desc_slot;
1792 
1793 		if (!(*img_handle)->desc_dirty)
1794 			continue;
1795 
1796 		si_upload_bindless_descriptor(sctx, desc_slot, 8);
1797 		(*img_handle)->desc_dirty = false;
1798 	}
1799 
1800 	/* Invalidate L1 because it doesn't know that L2 changed. */
1801 	sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1;
1802 	si_emit_cache_flush(sctx);
1803 
1804 	sctx->bindless_descriptors_dirty = false;
1805 }
1806 
1807 /* Update mutable image descriptor fields of all resident textures. */
si_update_bindless_texture_descriptor(struct si_context * sctx,struct si_texture_handle * tex_handle)1808 static void si_update_bindless_texture_descriptor(struct si_context *sctx,
1809 						  struct si_texture_handle *tex_handle)
1810 {
1811 	struct si_sampler_view *sview = (struct si_sampler_view *)tex_handle->view;
1812 	struct si_descriptors *desc = &sctx->bindless_descriptors;
1813 	unsigned desc_slot_offset = tex_handle->desc_slot * 16;
1814 	uint32_t desc_list[16];
1815 
1816 	if (sview->base.texture->target == PIPE_BUFFER)
1817 		return;
1818 
1819 	memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list));
1820 	si_set_sampler_view_desc(sctx, sview, &tex_handle->sstate,
1821 				 desc->list + desc_slot_offset);
1822 
1823 	if (memcmp(desc_list, desc->list + desc_slot_offset,
1824 		   sizeof(desc_list))) {
1825 		tex_handle->desc_dirty = true;
1826 		sctx->bindless_descriptors_dirty = true;
1827 	}
1828 }
1829 
si_update_bindless_image_descriptor(struct si_context * sctx,struct si_image_handle * img_handle)1830 static void si_update_bindless_image_descriptor(struct si_context *sctx,
1831 						struct si_image_handle *img_handle)
1832 {
1833 	struct si_descriptors *desc = &sctx->bindless_descriptors;
1834 	unsigned desc_slot_offset = img_handle->desc_slot * 16;
1835 	struct pipe_image_view *view = &img_handle->view;
1836 	uint32_t desc_list[8];
1837 
1838 	if (view->resource->target == PIPE_BUFFER)
1839 		return;
1840 
1841 	memcpy(desc_list, desc->list + desc_slot_offset,
1842 	       sizeof(desc_list));
1843 	si_set_shader_image_desc(sctx, view, true,
1844 				 desc->list + desc_slot_offset);
1845 
1846 	if (memcmp(desc_list, desc->list + desc_slot_offset,
1847 		   sizeof(desc_list))) {
1848 		img_handle->desc_dirty = true;
1849 		sctx->bindless_descriptors_dirty = true;
1850 	}
1851 }
1852 
si_update_all_resident_texture_descriptors(struct si_context * sctx)1853 static void si_update_all_resident_texture_descriptors(struct si_context *sctx)
1854 {
1855 	util_dynarray_foreach(&sctx->resident_tex_handles,
1856 			      struct si_texture_handle *, tex_handle) {
1857 		si_update_bindless_texture_descriptor(sctx, *tex_handle);
1858 	}
1859 
1860 	util_dynarray_foreach(&sctx->resident_img_handles,
1861 			      struct si_image_handle *, img_handle) {
1862 		si_update_bindless_image_descriptor(sctx, *img_handle);
1863 	}
1864 
1865 	si_upload_bindless_descriptors(sctx);
1866 }
1867 
1868 /* Update mutable image descriptor fields of all bound textures. */
si_update_all_texture_descriptors(struct si_context * sctx)1869 void si_update_all_texture_descriptors(struct si_context *sctx)
1870 {
1871 	unsigned shader;
1872 
1873 	for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1874 		struct si_samplers *samplers = &sctx->samplers[shader];
1875 		struct si_images *images = &sctx->images[shader];
1876 		unsigned mask;
1877 
1878 		/* Images. */
1879 		mask = images->enabled_mask;
1880 		while (mask) {
1881 			unsigned i = u_bit_scan(&mask);
1882 			struct pipe_image_view *view = &images->views[i];
1883 
1884 			if (!view->resource ||
1885 			    view->resource->target == PIPE_BUFFER)
1886 				continue;
1887 
1888 			si_set_shader_image(sctx, shader, i, view, true);
1889 		}
1890 
1891 		/* Sampler views. */
1892 		mask = samplers->enabled_mask;
1893 		while (mask) {
1894 			unsigned i = u_bit_scan(&mask);
1895 			struct pipe_sampler_view *view = samplers->views[i];
1896 
1897 			if (!view ||
1898 			    !view->texture ||
1899 			    view->texture->target == PIPE_BUFFER)
1900 				continue;
1901 
1902 			si_set_sampler_view(sctx, shader, i,
1903 					    samplers->views[i], true);
1904 		}
1905 
1906 		si_update_shader_needs_decompress_mask(sctx, shader);
1907 	}
1908 
1909 	si_update_all_resident_texture_descriptors(sctx);
1910 }
1911 
1912 /* SHADER USER DATA */
1913 
si_mark_shader_pointers_dirty(struct si_context * sctx,unsigned shader)1914 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
1915 					  unsigned shader)
1916 {
1917 	sctx->shader_pointers_dirty |=
1918 		u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS,
1919 				  SI_NUM_SHADER_DESCS);
1920 
1921 	if (shader == PIPE_SHADER_VERTEX)
1922 		sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
1923 
1924 	si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
1925 }
1926 
si_shader_pointers_begin_new_cs(struct si_context * sctx)1927 static void si_shader_pointers_begin_new_cs(struct si_context *sctx)
1928 {
1929 	sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
1930 	sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
1931 	si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
1932 	sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
1933 	sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
1934 }
1935 
1936 /* Set a base register address for user data constants in the given shader.
1937  * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
1938  */
si_set_user_data_base(struct si_context * sctx,unsigned shader,uint32_t new_base)1939 static void si_set_user_data_base(struct si_context *sctx,
1940 				  unsigned shader, uint32_t new_base)
1941 {
1942 	uint32_t *base = &sctx->shader_pointers.sh_base[shader];
1943 
1944 	if (*base != new_base) {
1945 		*base = new_base;
1946 
1947 		if (new_base) {
1948 			si_mark_shader_pointers_dirty(sctx, shader);
1949 
1950 			if (shader == PIPE_SHADER_VERTEX)
1951 				sctx->last_vs_state = ~0;
1952 		}
1953 	}
1954 }
1955 
1956 /* This must be called when these shaders are changed from non-NULL to NULL
1957  * and vice versa:
1958  * - geometry shader
1959  * - tessellation control shader
1960  * - tessellation evaluation shader
1961  */
si_shader_change_notify(struct si_context * sctx)1962 void si_shader_change_notify(struct si_context *sctx)
1963 {
1964 	/* VS can be bound as VS, ES, or LS. */
1965 	if (sctx->tes_shader.cso) {
1966 		if (sctx->b.chip_class >= GFX9) {
1967 			si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1968 					      R_00B430_SPI_SHADER_USER_DATA_LS_0);
1969 		} else {
1970 			si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1971 					      R_00B530_SPI_SHADER_USER_DATA_LS_0);
1972 		}
1973 	} else if (sctx->gs_shader.cso) {
1974 		si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1975 				      R_00B330_SPI_SHADER_USER_DATA_ES_0);
1976 	} else {
1977 		si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1978 				      R_00B130_SPI_SHADER_USER_DATA_VS_0);
1979 	}
1980 
1981 	/* TES can be bound as ES, VS, or not bound. */
1982 	if (sctx->tes_shader.cso) {
1983 		if (sctx->gs_shader.cso)
1984 			si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1985 					      R_00B330_SPI_SHADER_USER_DATA_ES_0);
1986 		else
1987 			si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1988 					      R_00B130_SPI_SHADER_USER_DATA_VS_0);
1989 	} else {
1990 		si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
1991 	}
1992 }
1993 
si_emit_shader_pointer_head(struct radeon_winsys_cs * cs,struct si_descriptors * desc,unsigned sh_base,unsigned pointer_count)1994 static void si_emit_shader_pointer_head(struct radeon_winsys_cs *cs,
1995 					struct si_descriptors *desc,
1996 					unsigned sh_base,
1997 					unsigned pointer_count)
1998 {
1999 	radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0));
2000 	radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
2001 }
2002 
si_emit_shader_pointer_body(struct radeon_winsys_cs * cs,struct si_descriptors * desc)2003 static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs,
2004 					struct si_descriptors *desc)
2005 {
2006 	uint64_t va = desc->gpu_address;
2007 
2008 	radeon_emit(cs, va);
2009 	radeon_emit(cs, va >> 32);
2010 }
2011 
si_emit_shader_pointer(struct si_context * sctx,struct si_descriptors * desc,unsigned sh_base)2012 static void si_emit_shader_pointer(struct si_context *sctx,
2013 				   struct si_descriptors *desc,
2014 				   unsigned sh_base)
2015 {
2016 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2017 
2018 	si_emit_shader_pointer_head(cs, desc, sh_base, 1);
2019 	si_emit_shader_pointer_body(cs, desc);
2020 }
2021 
si_emit_consecutive_shader_pointers(struct si_context * sctx,unsigned pointer_mask,unsigned sh_base)2022 static void si_emit_consecutive_shader_pointers(struct si_context *sctx,
2023 						unsigned pointer_mask,
2024 						unsigned sh_base)
2025 {
2026 	if (!sh_base)
2027 		return;
2028 
2029 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2030 	unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
2031 
2032 	while (mask) {
2033 		int start, count;
2034 		u_bit_scan_consecutive_range(&mask, &start, &count);
2035 
2036 		struct si_descriptors *descs = &sctx->descriptors[start];
2037 
2038 		si_emit_shader_pointer_head(cs, descs, sh_base, count);
2039 		for (int i = 0; i < count; i++)
2040 			si_emit_shader_pointer_body(cs, descs + i);
2041 	}
2042 }
2043 
si_emit_global_shader_pointers(struct si_context * sctx,struct si_descriptors * descs)2044 static void si_emit_global_shader_pointers(struct si_context *sctx,
2045 					   struct si_descriptors *descs)
2046 {
2047 	if (sctx->b.chip_class == GFX9) {
2048 		/* Broadcast it to all shader stages. */
2049 		si_emit_shader_pointer(sctx, descs,
2050 				       R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
2051 		return;
2052 	}
2053 
2054 	si_emit_shader_pointer(sctx, descs,
2055 			       R_00B030_SPI_SHADER_USER_DATA_PS_0);
2056 	si_emit_shader_pointer(sctx, descs,
2057 			       R_00B130_SPI_SHADER_USER_DATA_VS_0);
2058 	si_emit_shader_pointer(sctx, descs,
2059 			       R_00B330_SPI_SHADER_USER_DATA_ES_0);
2060 	si_emit_shader_pointer(sctx, descs,
2061 			       R_00B230_SPI_SHADER_USER_DATA_GS_0);
2062 	si_emit_shader_pointer(sctx, descs,
2063 			       R_00B430_SPI_SHADER_USER_DATA_HS_0);
2064 	si_emit_shader_pointer(sctx, descs,
2065 			       R_00B530_SPI_SHADER_USER_DATA_LS_0);
2066 }
2067 
si_emit_graphics_shader_pointers(struct si_context * sctx,struct r600_atom * atom)2068 void si_emit_graphics_shader_pointers(struct si_context *sctx,
2069                                       struct r600_atom *atom)
2070 {
2071 	uint32_t *sh_base = sctx->shader_pointers.sh_base;
2072 
2073 	if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
2074 		si_emit_global_shader_pointers(sctx,
2075 					       &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
2076 	}
2077 
2078 	si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
2079 					    sh_base[PIPE_SHADER_VERTEX]);
2080 	si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
2081 					    sh_base[PIPE_SHADER_TESS_CTRL]);
2082 	si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),
2083 					    sh_base[PIPE_SHADER_TESS_EVAL]);
2084 	si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
2085 					    sh_base[PIPE_SHADER_GEOMETRY]);
2086 	si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
2087 					    sh_base[PIPE_SHADER_FRAGMENT]);
2088 
2089 	sctx->shader_pointers_dirty &=
2090 		~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
2091 
2092 	if (sctx->vertex_buffer_pointer_dirty) {
2093 		si_emit_shader_pointer(sctx, &sctx->vertex_buffers,
2094 				       sh_base[PIPE_SHADER_VERTEX]);
2095 		sctx->vertex_buffer_pointer_dirty = false;
2096 	}
2097 
2098 	if (sctx->graphics_bindless_pointer_dirty) {
2099 		si_emit_global_shader_pointers(sctx,
2100 					       &sctx->bindless_descriptors);
2101 		sctx->graphics_bindless_pointer_dirty = false;
2102 	}
2103 }
2104 
si_emit_compute_shader_pointers(struct si_context * sctx)2105 void si_emit_compute_shader_pointers(struct si_context *sctx)
2106 {
2107 	unsigned base = R_00B900_COMPUTE_USER_DATA_0;
2108 
2109 	si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
2110 					    R_00B900_COMPUTE_USER_DATA_0);
2111 	sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
2112 
2113 	if (sctx->compute_bindless_pointer_dirty) {
2114 		si_emit_shader_pointer(sctx, &sctx->bindless_descriptors, base);
2115 		sctx->compute_bindless_pointer_dirty = false;
2116 	}
2117 }
2118 
2119 /* BINDLESS */
2120 
si_init_bindless_descriptors(struct si_context * sctx,struct si_descriptors * desc,unsigned shader_userdata_index,unsigned num_elements)2121 static void si_init_bindless_descriptors(struct si_context *sctx,
2122 					 struct si_descriptors *desc,
2123 					 unsigned shader_userdata_index,
2124 					 unsigned num_elements)
2125 {
2126 	MAYBE_UNUSED unsigned desc_slot;
2127 
2128 	si_init_descriptors(desc, shader_userdata_index, 16, num_elements);
2129 	sctx->bindless_descriptors.num_active_slots = num_elements;
2130 
2131 	/* The first bindless descriptor is stored at slot 1, because 0 is not
2132 	 * considered to be a valid handle.
2133 	 */
2134 	sctx->num_bindless_descriptors = 1;
2135 
2136 	/* Track which bindless slots are used (or not). */
2137 	util_idalloc_init(&sctx->bindless_used_slots);
2138 	util_idalloc_resize(&sctx->bindless_used_slots, num_elements);
2139 
2140 	/* Reserve slot 0 because it's an invalid handle for bindless. */
2141 	desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
2142 	assert(desc_slot == 0);
2143 }
2144 
si_release_bindless_descriptors(struct si_context * sctx)2145 static void si_release_bindless_descriptors(struct si_context *sctx)
2146 {
2147 	si_release_descriptors(&sctx->bindless_descriptors);
2148 	util_idalloc_fini(&sctx->bindless_used_slots);
2149 }
2150 
si_get_first_free_bindless_slot(struct si_context * sctx)2151 static unsigned si_get_first_free_bindless_slot(struct si_context *sctx)
2152 {
2153 	struct si_descriptors *desc = &sctx->bindless_descriptors;
2154 	unsigned desc_slot;
2155 
2156 	desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
2157 	if (desc_slot >= desc->num_elements) {
2158 		/* The array of bindless descriptors is full, resize it. */
2159 		unsigned slot_size = desc->element_dw_size * 4;
2160 		unsigned new_num_elements = desc->num_elements * 2;
2161 
2162 		desc->list = REALLOC(desc->list, desc->num_elements * slot_size,
2163 				     new_num_elements * slot_size);
2164 		desc->num_elements = new_num_elements;
2165 		desc->num_active_slots = new_num_elements;
2166 	}
2167 
2168 	assert(desc_slot);
2169 	return desc_slot;
2170 }
2171 
2172 static unsigned
si_create_bindless_descriptor(struct si_context * sctx,uint32_t * desc_list,unsigned size)2173 si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list,
2174 			      unsigned size)
2175 {
2176 	struct si_descriptors *desc = &sctx->bindless_descriptors;
2177 	unsigned desc_slot, desc_slot_offset;
2178 
2179 	/* Find a free slot. */
2180 	desc_slot = si_get_first_free_bindless_slot(sctx);
2181 
2182 	/* For simplicity, sampler and image bindless descriptors use fixed
2183 	 * 16-dword slots for now. Image descriptors only need 8-dword but this
2184 	 * doesn't really matter because no real apps use image handles.
2185 	 */
2186 	desc_slot_offset = desc_slot * 16;
2187 
2188 	/* Copy the descriptor into the array. */
2189 	memcpy(desc->list + desc_slot_offset, desc_list, size);
2190 
2191 	/* Re-upload the whole array of bindless descriptors into a new buffer.
2192 	 */
2193 	if (!si_upload_descriptors(sctx, desc))
2194 		return 0;
2195 
2196 	/* Make sure to re-emit the shader pointers for all stages. */
2197 	sctx->graphics_bindless_pointer_dirty = true;
2198 	sctx->compute_bindless_pointer_dirty = true;
2199 
2200 	return desc_slot;
2201 }
2202 
si_update_bindless_buffer_descriptor(struct si_context * sctx,unsigned desc_slot,struct pipe_resource * resource,uint64_t offset,bool * desc_dirty)2203 static void si_update_bindless_buffer_descriptor(struct si_context *sctx,
2204 						 unsigned desc_slot,
2205 						 struct pipe_resource *resource,
2206 						 uint64_t offset,
2207 						 bool *desc_dirty)
2208 {
2209 	struct si_descriptors *desc = &sctx->bindless_descriptors;
2210 	struct r600_resource *buf = r600_resource(resource);
2211 	unsigned desc_slot_offset = desc_slot * 16;
2212 	uint32_t *desc_list = desc->list + desc_slot_offset + 4;
2213 	uint64_t old_desc_va;
2214 
2215 	assert(resource->target == PIPE_BUFFER);
2216 
2217 	/* Retrieve the old buffer addr from the descriptor. */
2218 	old_desc_va = si_desc_extract_buffer_address(desc_list);
2219 
2220 	if (old_desc_va != buf->gpu_address + offset) {
2221 		/* The buffer has been invalidated when the handle wasn't
2222 		 * resident, update the descriptor and the dirty flag.
2223 		 */
2224 		si_set_buf_desc_address(buf, offset, &desc_list[0]);
2225 
2226 		*desc_dirty = true;
2227 	}
2228 }
2229 
si_create_texture_handle(struct pipe_context * ctx,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)2230 static uint64_t si_create_texture_handle(struct pipe_context *ctx,
2231 					 struct pipe_sampler_view *view,
2232 					 const struct pipe_sampler_state *state)
2233 {
2234 	struct si_sampler_view *sview = (struct si_sampler_view *)view;
2235 	struct si_context *sctx = (struct si_context *)ctx;
2236 	struct si_texture_handle *tex_handle;
2237 	struct si_sampler_state *sstate;
2238 	uint32_t desc_list[16];
2239 	uint64_t handle;
2240 
2241 	tex_handle = CALLOC_STRUCT(si_texture_handle);
2242 	if (!tex_handle)
2243 		return 0;
2244 
2245 	memset(desc_list, 0, sizeof(desc_list));
2246 	si_init_descriptor_list(&desc_list[0], 16, 1, null_texture_descriptor);
2247 
2248 	sstate = ctx->create_sampler_state(ctx, state);
2249 	if (!sstate) {
2250 		FREE(tex_handle);
2251 		return 0;
2252 	}
2253 
2254 	si_set_sampler_view_desc(sctx, sview, sstate, &desc_list[0]);
2255 	memcpy(&tex_handle->sstate, sstate, sizeof(*sstate));
2256 	ctx->delete_sampler_state(ctx, sstate);
2257 
2258 	tex_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
2259 							      sizeof(desc_list));
2260 	if (!tex_handle->desc_slot) {
2261 		FREE(tex_handle);
2262 		return 0;
2263 	}
2264 
2265 	handle = tex_handle->desc_slot;
2266 
2267 	if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)handle,
2268 				     tex_handle)) {
2269 		FREE(tex_handle);
2270 		return 0;
2271 	}
2272 
2273 	pipe_sampler_view_reference(&tex_handle->view, view);
2274 
2275 	r600_resource(sview->base.texture)->texture_handle_allocated = true;
2276 
2277 	return handle;
2278 }
2279 
si_delete_texture_handle(struct pipe_context * ctx,uint64_t handle)2280 static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle)
2281 {
2282 	struct si_context *sctx = (struct si_context *)ctx;
2283 	struct si_texture_handle *tex_handle;
2284 	struct hash_entry *entry;
2285 
2286 	entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle);
2287 	if (!entry)
2288 		return;
2289 
2290 	tex_handle = (struct si_texture_handle *)entry->data;
2291 
2292 	/* Allow this descriptor slot to be re-used. */
2293 	util_idalloc_free(&sctx->bindless_used_slots, tex_handle->desc_slot);
2294 
2295 	pipe_sampler_view_reference(&tex_handle->view, NULL);
2296 	_mesa_hash_table_remove(sctx->tex_handles, entry);
2297 	FREE(tex_handle);
2298 }
2299 
si_make_texture_handle_resident(struct pipe_context * ctx,uint64_t handle,bool resident)2300 static void si_make_texture_handle_resident(struct pipe_context *ctx,
2301 					    uint64_t handle, bool resident)
2302 {
2303 	struct si_context *sctx = (struct si_context *)ctx;
2304 	struct si_texture_handle *tex_handle;
2305 	struct si_sampler_view *sview;
2306 	struct hash_entry *entry;
2307 
2308 	entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle);
2309 	if (!entry)
2310 		return;
2311 
2312 	tex_handle = (struct si_texture_handle *)entry->data;
2313 	sview = (struct si_sampler_view *)tex_handle->view;
2314 
2315 	if (resident) {
2316 		if (sview->base.texture->target != PIPE_BUFFER) {
2317 			struct r600_texture *rtex =
2318 				(struct r600_texture *)sview->base.texture;
2319 
2320 			if (depth_needs_decompression(rtex)) {
2321 				util_dynarray_append(
2322 					&sctx->resident_tex_needs_depth_decompress,
2323 					struct si_texture_handle *,
2324 					tex_handle);
2325 			}
2326 
2327 			if (color_needs_decompression(rtex)) {
2328 				util_dynarray_append(
2329 					&sctx->resident_tex_needs_color_decompress,
2330 					struct si_texture_handle *,
2331 					tex_handle);
2332 			}
2333 
2334 			if (rtex->dcc_offset &&
2335 			    p_atomic_read(&rtex->framebuffers_bound))
2336 				sctx->need_check_render_feedback = true;
2337 
2338 			si_update_bindless_texture_descriptor(sctx, tex_handle);
2339 		} else {
2340 			si_update_bindless_buffer_descriptor(sctx,
2341 							     tex_handle->desc_slot,
2342 							     sview->base.texture,
2343 							     sview->base.u.buf.offset,
2344 							     &tex_handle->desc_dirty);
2345 		}
2346 
2347 		/* Re-upload the descriptor if it has been updated while it
2348 		 * wasn't resident.
2349 		 */
2350 		if (tex_handle->desc_dirty)
2351 			sctx->bindless_descriptors_dirty = true;
2352 
2353 		/* Add the texture handle to the per-context list. */
2354 		util_dynarray_append(&sctx->resident_tex_handles,
2355 				     struct si_texture_handle *, tex_handle);
2356 
2357 		/* Add the buffers to the current CS in case si_begin_new_cs()
2358 		 * is not going to be called.
2359 		 */
2360 		si_sampler_view_add_buffer(sctx, sview->base.texture,
2361 					   RADEON_USAGE_READ,
2362 					   sview->is_stencil_sampler, false);
2363 	} else {
2364 		/* Remove the texture handle from the per-context list. */
2365 		util_dynarray_delete_unordered(&sctx->resident_tex_handles,
2366 					       struct si_texture_handle *,
2367 					       tex_handle);
2368 
2369 		if (sview->base.texture->target != PIPE_BUFFER) {
2370 			util_dynarray_delete_unordered(
2371 				&sctx->resident_tex_needs_depth_decompress,
2372 				struct si_texture_handle *, tex_handle);
2373 
2374 			util_dynarray_delete_unordered(
2375 				&sctx->resident_tex_needs_color_decompress,
2376 				struct si_texture_handle *, tex_handle);
2377 		}
2378 	}
2379 }
2380 
si_create_image_handle(struct pipe_context * ctx,const struct pipe_image_view * view)2381 static uint64_t si_create_image_handle(struct pipe_context *ctx,
2382 				       const struct pipe_image_view *view)
2383 {
2384 	struct si_context *sctx = (struct si_context *)ctx;
2385 	struct si_image_handle *img_handle;
2386 	uint32_t desc_list[8];
2387 	uint64_t handle;
2388 
2389 	if (!view || !view->resource)
2390 		return 0;
2391 
2392 	img_handle = CALLOC_STRUCT(si_image_handle);
2393 	if (!img_handle)
2394 		return 0;
2395 
2396 	memset(desc_list, 0, sizeof(desc_list));
2397 	si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor);
2398 
2399 	si_set_shader_image_desc(sctx, view, false, &desc_list[0]);
2400 
2401 	img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
2402 							      sizeof(desc_list));
2403 	if (!img_handle->desc_slot) {
2404 		FREE(img_handle);
2405 		return 0;
2406 	}
2407 
2408 	handle = img_handle->desc_slot;
2409 
2410 	if (!_mesa_hash_table_insert(sctx->img_handles, (void *)handle,
2411 				     img_handle)) {
2412 		FREE(img_handle);
2413 		return 0;
2414 	}
2415 
2416 	util_copy_image_view(&img_handle->view, view);
2417 
2418 	r600_resource(view->resource)->image_handle_allocated = true;
2419 
2420 	return handle;
2421 }
2422 
si_delete_image_handle(struct pipe_context * ctx,uint64_t handle)2423 static void si_delete_image_handle(struct pipe_context *ctx, uint64_t handle)
2424 {
2425 	struct si_context *sctx = (struct si_context *)ctx;
2426 	struct si_image_handle *img_handle;
2427 	struct hash_entry *entry;
2428 
2429 	entry = _mesa_hash_table_search(sctx->img_handles, (void *)handle);
2430 	if (!entry)
2431 		return;
2432 
2433 	img_handle = (struct si_image_handle *)entry->data;
2434 
2435 	util_copy_image_view(&img_handle->view, NULL);
2436 	_mesa_hash_table_remove(sctx->img_handles, entry);
2437 	FREE(img_handle);
2438 }
2439 
si_make_image_handle_resident(struct pipe_context * ctx,uint64_t handle,unsigned access,bool resident)2440 static void si_make_image_handle_resident(struct pipe_context *ctx,
2441 					  uint64_t handle, unsigned access,
2442 					  bool resident)
2443 {
2444 	struct si_context *sctx = (struct si_context *)ctx;
2445 	struct si_image_handle *img_handle;
2446 	struct pipe_image_view *view;
2447 	struct r600_resource *res;
2448 	struct hash_entry *entry;
2449 
2450 	entry = _mesa_hash_table_search(sctx->img_handles, (void *)handle);
2451 	if (!entry)
2452 		return;
2453 
2454 	img_handle = (struct si_image_handle *)entry->data;
2455 	view = &img_handle->view;
2456 	res = (struct r600_resource *)view->resource;
2457 
2458 	if (resident) {
2459 		if (res->b.b.target != PIPE_BUFFER) {
2460 			struct r600_texture *rtex = (struct r600_texture *)res;
2461 			unsigned level = view->u.tex.level;
2462 
2463 			if (color_needs_decompression(rtex)) {
2464 				util_dynarray_append(
2465 					&sctx->resident_img_needs_color_decompress,
2466 					struct si_image_handle *,
2467 					img_handle);
2468 			}
2469 
2470 			if (vi_dcc_enabled(rtex, level) &&
2471 			    p_atomic_read(&rtex->framebuffers_bound))
2472 				sctx->need_check_render_feedback = true;
2473 
2474 			si_update_bindless_image_descriptor(sctx, img_handle);
2475 		} else {
2476 			si_update_bindless_buffer_descriptor(sctx,
2477 							     img_handle->desc_slot,
2478 							     view->resource,
2479 							     view->u.buf.offset,
2480 							     &img_handle->desc_dirty);
2481 		}
2482 
2483 		/* Re-upload the descriptor if it has been updated while it
2484 		 * wasn't resident.
2485 		 */
2486 		if (img_handle->desc_dirty)
2487 			sctx->bindless_descriptors_dirty = true;
2488 
2489 		/* Add the image handle to the per-context list. */
2490 		util_dynarray_append(&sctx->resident_img_handles,
2491 				     struct si_image_handle *, img_handle);
2492 
2493 		/* Add the buffers to the current CS in case si_begin_new_cs()
2494 		 * is not going to be called.
2495 		 */
2496 		si_sampler_view_add_buffer(sctx, view->resource,
2497 					   (access & PIPE_IMAGE_ACCESS_WRITE) ?
2498 					   RADEON_USAGE_READWRITE :
2499 					   RADEON_USAGE_READ, false, false);
2500 	} else {
2501 		/* Remove the image handle from the per-context list. */
2502 		util_dynarray_delete_unordered(&sctx->resident_img_handles,
2503 					       struct si_image_handle *,
2504 					       img_handle);
2505 
2506 		if (res->b.b.target != PIPE_BUFFER) {
2507 			util_dynarray_delete_unordered(
2508 				&sctx->resident_img_needs_color_decompress,
2509 				struct si_image_handle *,
2510 				img_handle);
2511 		}
2512 	}
2513 }
2514 
2515 
si_all_resident_buffers_begin_new_cs(struct si_context * sctx)2516 void si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
2517 {
2518 	unsigned num_resident_tex_handles, num_resident_img_handles;
2519 
2520 	num_resident_tex_handles = sctx->resident_tex_handles.size /
2521 				   sizeof(struct si_texture_handle *);
2522 	num_resident_img_handles = sctx->resident_img_handles.size /
2523 				   sizeof(struct si_image_handle *);
2524 
2525 	/* Add all resident texture handles. */
2526 	util_dynarray_foreach(&sctx->resident_tex_handles,
2527 			      struct si_texture_handle *, tex_handle) {
2528 		struct si_sampler_view *sview =
2529 			(struct si_sampler_view *)(*tex_handle)->view;
2530 
2531 		si_sampler_view_add_buffer(sctx, sview->base.texture,
2532 					   RADEON_USAGE_READ,
2533 					   sview->is_stencil_sampler, false);
2534 	}
2535 
2536 	/* Add all resident image handles. */
2537 	util_dynarray_foreach(&sctx->resident_img_handles,
2538 			      struct si_image_handle *, img_handle) {
2539 		struct pipe_image_view *view = &(*img_handle)->view;
2540 
2541 		si_sampler_view_add_buffer(sctx, view->resource,
2542 					   RADEON_USAGE_READWRITE,
2543 					   false, false);
2544 	}
2545 
2546 	sctx->b.num_resident_handles += num_resident_tex_handles +
2547 					num_resident_img_handles;
2548 }
2549 
2550 /* INIT/DEINIT/UPLOAD */
2551 
si_init_all_descriptors(struct si_context * sctx)2552 void si_init_all_descriptors(struct si_context *sctx)
2553 {
2554 	int i;
2555 
2556 	STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
2557 	STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
2558 
2559 	for (i = 0; i < SI_NUM_SHADERS; i++) {
2560 		bool gfx9_tcs = false;
2561 		bool gfx9_gs = false;
2562 		unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
2563 		unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
2564 		struct si_descriptors *desc;
2565 
2566 		if (sctx->b.chip_class >= GFX9) {
2567 			gfx9_tcs = i == PIPE_SHADER_TESS_CTRL;
2568 			gfx9_gs = i == PIPE_SHADER_GEOMETRY;
2569 		}
2570 
2571 		desc = si_const_and_shader_buffer_descriptors(sctx, i);
2572 		si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc,
2573 					 num_buffer_slots,
2574 					 gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
2575 					 gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
2576 						   SI_SGPR_CONST_AND_SHADER_BUFFERS,
2577 					 RADEON_USAGE_READWRITE,
2578 					 RADEON_USAGE_READ,
2579 					 RADEON_PRIO_SHADER_RW_BUFFER,
2580 					 RADEON_PRIO_CONST_BUFFER);
2581 		desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
2582 
2583 		desc = si_sampler_and_image_descriptors(sctx, i);
2584 		si_init_descriptors(desc,
2585 				    gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES :
2586 				    gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES :
2587 					      SI_SGPR_SAMPLERS_AND_IMAGES,
2588 				    16, num_sampler_slots);
2589 
2590 		int j;
2591 		for (j = 0; j < SI_NUM_IMAGES; j++)
2592 			memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);
2593 		for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++)
2594 			memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
2595 	}
2596 
2597 	si_init_buffer_resources(&sctx->rw_buffers,
2598 				 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
2599 				 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
2600 				 /* The second set of usage/priority is used by
2601 				  * const buffers in RW buffer slots. */
2602 				 RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
2603 				 RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER);
2604 	sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = SI_NUM_RW_BUFFERS;
2605 
2606 	si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
2607 			    4, SI_NUM_VERTEX_BUFFERS);
2608 	FREE(sctx->vertex_buffers.list); /* not used */
2609 	sctx->vertex_buffers.list = NULL;
2610 
2611 	/* Initialize an array of 1024 bindless descriptors, when the limit is
2612 	 * reached, just make it larger and re-upload the whole array.
2613 	 */
2614 	si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
2615 				     SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
2616 				     1024);
2617 
2618 	sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
2619 
2620 	/* Set pipe_context functions. */
2621 	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
2622 	sctx->b.b.set_shader_images = si_set_shader_images;
2623 	sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer;
2624 	sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
2625 	sctx->b.b.set_shader_buffers = si_set_shader_buffers;
2626 	sctx->b.b.set_sampler_views = si_set_sampler_views;
2627 	sctx->b.b.create_texture_handle = si_create_texture_handle;
2628 	sctx->b.b.delete_texture_handle = si_delete_texture_handle;
2629 	sctx->b.b.make_texture_handle_resident = si_make_texture_handle_resident;
2630 	sctx->b.b.create_image_handle = si_create_image_handle;
2631 	sctx->b.b.delete_image_handle = si_delete_image_handle;
2632 	sctx->b.b.make_image_handle_resident = si_make_image_handle_resident;
2633 	sctx->b.invalidate_buffer = si_invalidate_buffer;
2634 	sctx->b.rebind_buffer = si_rebind_buffer;
2635 
2636 	/* Shader user data. */
2637 	si_init_atom(sctx, &sctx->shader_pointers.atom, &sctx->atoms.s.shader_pointers,
2638 		     si_emit_graphics_shader_pointers);
2639 
2640 	/* Set default and immutable mappings. */
2641 	si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
2642 
2643 	if (sctx->b.chip_class >= GFX9) {
2644 		si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
2645 				      R_00B430_SPI_SHADER_USER_DATA_LS_0);
2646 		si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
2647 				      R_00B330_SPI_SHADER_USER_DATA_ES_0);
2648 	} else {
2649 		si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
2650 				      R_00B430_SPI_SHADER_USER_DATA_HS_0);
2651 		si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
2652 				      R_00B230_SPI_SHADER_USER_DATA_GS_0);
2653 	}
2654 	si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2655 }
2656 
si_upload_shader_descriptors(struct si_context * sctx,unsigned mask)2657 static bool si_upload_shader_descriptors(struct si_context *sctx, unsigned mask)
2658 {
2659 	unsigned dirty = sctx->descriptors_dirty & mask;
2660 
2661 	/* Assume nothing will go wrong: */
2662 	sctx->shader_pointers_dirty |= dirty;
2663 
2664 	while (dirty) {
2665 		unsigned i = u_bit_scan(&dirty);
2666 
2667 		if (!si_upload_descriptors(sctx, &sctx->descriptors[i]))
2668 			return false;
2669 	}
2670 
2671 	sctx->descriptors_dirty &= ~mask;
2672 
2673 	si_upload_bindless_descriptors(sctx);
2674 
2675 	return true;
2676 }
2677 
si_upload_graphics_shader_descriptors(struct si_context * sctx)2678 bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
2679 {
2680 	const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
2681 	return si_upload_shader_descriptors(sctx, mask);
2682 }
2683 
si_upload_compute_shader_descriptors(struct si_context * sctx)2684 bool si_upload_compute_shader_descriptors(struct si_context *sctx)
2685 {
2686 	/* Does not update rw_buffers as that is not needed for compute shaders
2687 	 * and the input buffer is using the same SGPR's anyway.
2688 	 */
2689 	const unsigned mask = u_bit_consecutive(SI_DESCS_FIRST_COMPUTE,
2690 						SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
2691 	return si_upload_shader_descriptors(sctx, mask);
2692 }
2693 
si_release_all_descriptors(struct si_context * sctx)2694 void si_release_all_descriptors(struct si_context *sctx)
2695 {
2696 	int i;
2697 
2698 	for (i = 0; i < SI_NUM_SHADERS; i++) {
2699 		si_release_buffer_resources(&sctx->const_and_shader_buffers[i],
2700 					    si_const_and_shader_buffer_descriptors(sctx, i));
2701 		si_release_sampler_views(&sctx->samplers[i]);
2702 		si_release_image_views(&sctx->images[i]);
2703 	}
2704 	si_release_buffer_resources(&sctx->rw_buffers,
2705 				    &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
2706 	for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++)
2707 		pipe_vertex_buffer_unreference(&sctx->vertex_buffer[i]);
2708 
2709 	for (i = 0; i < SI_NUM_DESCS; ++i)
2710 		si_release_descriptors(&sctx->descriptors[i]);
2711 
2712 	sctx->vertex_buffers.list = NULL; /* points into a mapped buffer */
2713 	si_release_descriptors(&sctx->vertex_buffers);
2714 	si_release_bindless_descriptors(sctx);
2715 }
2716 
si_all_descriptors_begin_new_cs(struct si_context * sctx)2717 void si_all_descriptors_begin_new_cs(struct si_context *sctx)
2718 {
2719 	int i;
2720 
2721 	for (i = 0; i < SI_NUM_SHADERS; i++) {
2722 		si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]);
2723 		si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]);
2724 		si_image_views_begin_new_cs(sctx, &sctx->images[i]);
2725 	}
2726 	si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers);
2727 	si_vertex_buffers_begin_new_cs(sctx);
2728 
2729 	for (i = 0; i < SI_NUM_DESCS; ++i)
2730 		si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]);
2731 	si_descriptors_begin_new_cs(sctx, &sctx->bindless_descriptors);
2732 
2733 	si_shader_pointers_begin_new_cs(sctx);
2734 }
2735 
si_set_active_descriptors(struct si_context * sctx,unsigned desc_idx,uint64_t new_active_mask)2736 void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
2737 			       uint64_t new_active_mask)
2738 {
2739 	struct si_descriptors *desc = &sctx->descriptors[desc_idx];
2740 
2741 	/* Ignore no-op updates and updates that disable all slots. */
2742 	if (!new_active_mask ||
2743 	    new_active_mask == u_bit_consecutive64(desc->first_active_slot,
2744 						   desc->num_active_slots))
2745 		return;
2746 
2747 	int first, count;
2748 	u_bit_scan_consecutive_range64(&new_active_mask, &first, &count);
2749 	assert(new_active_mask == 0);
2750 
2751 	/* Upload/dump descriptors if slots are being enabled. */
2752 	if (first < desc->first_active_slot ||
2753 	    first + count > desc->first_active_slot + desc->num_active_slots)
2754 		sctx->descriptors_dirty |= 1u << desc_idx;
2755 
2756 	desc->first_active_slot = first;
2757 	desc->num_active_slots = count;
2758 }
2759 
si_set_active_descriptors_for_shader(struct si_context * sctx,struct si_shader_selector * sel)2760 void si_set_active_descriptors_for_shader(struct si_context *sctx,
2761 					  struct si_shader_selector *sel)
2762 {
2763 	if (!sel)
2764 		return;
2765 
2766 	si_set_active_descriptors(sctx,
2767 		si_const_and_shader_buffer_descriptors_idx(sel->type),
2768 		sel->active_const_and_shader_buffers);
2769 	si_set_active_descriptors(sctx,
2770 		si_sampler_and_image_descriptors_idx(sel->type),
2771 		sel->active_samplers_and_images);
2772 }
2773