1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #ifndef SI_STATE_H
25 #define SI_STATE_H
26 
27 #include "si_pm4.h"
28 #include "radeon/r600_pipe_common.h"
29 
30 #include "pipebuffer/pb_slab.h"
31 
32 #define SI_NUM_GRAPHICS_SHADERS (PIPE_SHADER_TESS_EVAL+1)
33 #define SI_NUM_SHADERS (PIPE_SHADER_COMPUTE+1)
34 
35 #define SI_MAX_ATTRIBS			16
36 #define SI_NUM_VERTEX_BUFFERS		SI_MAX_ATTRIBS
37 #define SI_NUM_SAMPLERS			32 /* OpenGL textures units per shader */
38 #define SI_NUM_CONST_BUFFERS		16
39 #define SI_NUM_IMAGES			16
40 #define SI_NUM_SHADER_BUFFERS		16
41 
42 struct si_screen;
43 struct si_shader;
44 struct si_shader_selector;
45 
46 struct si_state_blend {
47 	struct si_pm4_state	pm4;
48 	uint32_t		cb_target_mask;
49 	/* Set 0xf or 0x0 (4 bits) per render target if the following is
50 	 * true. ANDed with spi_shader_col_format.
51 	 */
52 	unsigned		cb_target_enabled_4bit;
53 	unsigned		blend_enable_4bit;
54 	unsigned		need_src_alpha_4bit;
55 	unsigned		commutative_4bit;
56 	bool			alpha_to_coverage:1;
57 	bool			alpha_to_one:1;
58 	bool			dual_src_blend:1;
59 	bool			logicop_enable:1;
60 };
61 
62 struct si_state_rasterizer {
63 	struct si_pm4_state	pm4;
64 	/* poly offset states for 16-bit, 24-bit, and 32-bit zbuffers */
65 	struct si_pm4_state	*pm4_poly_offset;
66 	unsigned		pa_sc_line_stipple;
67 	unsigned		pa_cl_clip_cntl;
68 	float			line_width;
69 	float			max_point_size;
70 	unsigned		sprite_coord_enable:8;
71 	unsigned		clip_plane_enable:8;
72 	unsigned		flatshade:1;
73 	unsigned		two_side:1;
74 	unsigned		multisample_enable:1;
75 	unsigned		force_persample_interp:1;
76 	unsigned		line_stipple_enable:1;
77 	unsigned		poly_stipple_enable:1;
78 	unsigned		line_smooth:1;
79 	unsigned		poly_smooth:1;
80 	unsigned		uses_poly_offset:1;
81 	unsigned		clamp_fragment_color:1;
82 	unsigned		clamp_vertex_color:1;
83 	unsigned		rasterizer_discard:1;
84 	unsigned		scissor_enable:1;
85 	unsigned		clip_halfz:1;
86 };
87 
88 struct si_dsa_stencil_ref_part {
89 	uint8_t			valuemask[2];
90 	uint8_t			writemask[2];
91 };
92 
93 struct si_dsa_order_invariance {
94 	/** Whether the final result in Z/S buffers is guaranteed to be
95 	 * invariant under changes to the order in which fragments arrive. */
96 	bool zs:1;
97 
98 	/** Whether the set of fragments that pass the combined Z/S test is
99 	 * guaranteed to be invariant under changes to the order in which
100 	 * fragments arrive. */
101 	bool pass_set:1;
102 
103 	/** Whether the last fragment that passes the combined Z/S test at each
104 	 * sample is guaranteed to be invariant under changes to the order in
105 	 * which fragments arrive. */
106 	bool pass_last:1;
107 };
108 
109 struct si_state_dsa {
110 	struct si_pm4_state		pm4;
111 	struct si_dsa_stencil_ref_part	stencil_ref;
112 
113 	/* 0 = without stencil buffer, 1 = when both Z and S buffers are present */
114 	struct si_dsa_order_invariance	order_invariance[2];
115 
116 	ubyte				alpha_func:3;
117 	bool				depth_enabled:1;
118 	bool				depth_write_enabled:1;
119 	bool				stencil_enabled:1;
120 	bool				stencil_write_enabled:1;
121 	bool				db_can_write:1;
122 
123 };
124 
125 struct si_stencil_ref {
126 	struct r600_atom		atom;
127 	struct pipe_stencil_ref		state;
128 	struct si_dsa_stencil_ref_part	dsa_part;
129 };
130 
131 struct si_vertex_elements
132 {
133 	uint32_t			instance_divisors[SI_MAX_ATTRIBS];
134 	uint32_t			rsrc_word3[SI_MAX_ATTRIBS];
135 	uint16_t			src_offset[SI_MAX_ATTRIBS];
136 	uint8_t				fix_fetch[SI_MAX_ATTRIBS];
137 	uint8_t				format_size[SI_MAX_ATTRIBS];
138 	uint8_t				vertex_buffer_index[SI_MAX_ATTRIBS];
139 
140 	uint8_t				count;
141 	bool				uses_instance_divisors;
142 
143 	uint16_t			first_vb_use_mask;
144 	/* Vertex buffer descriptor list size aligned for optimal prefetch. */
145 	uint16_t			desc_list_byte_size;
146 	uint16_t			instance_divisor_is_one; /* bitmask of inputs */
147 	uint16_t			instance_divisor_is_fetched;  /* bitmask of inputs */
148 };
149 
150 union si_state {
151 	struct {
152 		struct si_state_blend		*blend;
153 		struct si_state_rasterizer	*rasterizer;
154 		struct si_state_dsa		*dsa;
155 		struct si_pm4_state		*poly_offset;
156 		struct si_pm4_state		*ls;
157 		struct si_pm4_state		*hs;
158 		struct si_pm4_state		*es;
159 		struct si_pm4_state		*gs;
160 		struct si_pm4_state		*vgt_shader_config;
161 		struct si_pm4_state		*vs;
162 		struct si_pm4_state		*ps;
163 	} named;
164 	struct si_pm4_state	*array[0];
165 };
166 
167 #define SI_NUM_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
168 
169 union si_state_atoms {
170 	struct {
171 		/* The order matters. */
172 		struct r600_atom *render_cond;
173 		struct r600_atom *streamout_begin;
174 		struct r600_atom *streamout_enable; /* must be after streamout_begin */
175 		struct r600_atom *framebuffer;
176 		struct r600_atom *msaa_sample_locs;
177 		struct r600_atom *db_render_state;
178 		struct r600_atom *dpbb_state;
179 		struct r600_atom *msaa_config;
180 		struct r600_atom *sample_mask;
181 		struct r600_atom *cb_render_state;
182 		struct r600_atom *blend_color;
183 		struct r600_atom *clip_regs;
184 		struct r600_atom *clip_state;
185 		struct r600_atom *shader_pointers;
186 		struct r600_atom *scissors;
187 		struct r600_atom *viewports;
188 		struct r600_atom *stencil_ref;
189 		struct r600_atom *spi_map;
190 		struct r600_atom *scratch_state;
191 	} s;
192 	struct r600_atom *array[0];
193 };
194 
195 #define SI_NUM_ATOMS (sizeof(union si_state_atoms)/sizeof(struct r600_atom*))
196 
197 struct si_shader_data {
198 	struct r600_atom	atom;
199 	uint32_t		sh_base[SI_NUM_SHADERS];
200 };
201 
202 /* Private read-write buffer slots. */
203 enum {
204 	SI_ES_RING_ESGS,
205 	SI_GS_RING_ESGS,
206 
207 	SI_RING_GSVS,
208 
209 	SI_VS_STREAMOUT_BUF0,
210 	SI_VS_STREAMOUT_BUF1,
211 	SI_VS_STREAMOUT_BUF2,
212 	SI_VS_STREAMOUT_BUF3,
213 
214 	SI_HS_CONST_DEFAULT_TESS_LEVELS,
215 	SI_VS_CONST_INSTANCE_DIVISORS,
216 	SI_VS_CONST_CLIP_PLANES,
217 	SI_PS_CONST_POLY_STIPPLE,
218 	SI_PS_CONST_SAMPLE_POSITIONS,
219 
220 	SI_NUM_RW_BUFFERS,
221 };
222 
223 /* Indices into sctx->descriptors, laid out so that gfx and compute pipelines
224  * are contiguous:
225  *
226  *  0 - rw buffers
227  *  1 - vertex const and shader buffers
228  *  2 - vertex samplers and images
229  *  3 - fragment const and shader buffer
230  *   ...
231  *  11 - compute const and shader buffers
232  *  12 - compute samplers and images
233  */
234 enum {
235 	SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS,
236 	SI_SHADER_DESCS_SAMPLERS_AND_IMAGES,
237 	SI_NUM_SHADER_DESCS,
238 };
239 
240 #define SI_DESCS_RW_BUFFERS            0
241 #define SI_DESCS_FIRST_SHADER          1
242 #define SI_DESCS_FIRST_COMPUTE         (SI_DESCS_FIRST_SHADER + \
243                                         PIPE_SHADER_COMPUTE * SI_NUM_SHADER_DESCS)
244 #define SI_NUM_DESCS                   (SI_DESCS_FIRST_SHADER + \
245                                         SI_NUM_SHADERS * SI_NUM_SHADER_DESCS)
246 
247 #define SI_DESCS_SHADER_MASK(name) \
248 	u_bit_consecutive(SI_DESCS_FIRST_SHADER + \
249 			  PIPE_SHADER_##name * SI_NUM_SHADER_DESCS, \
250 			  SI_NUM_SHADER_DESCS)
251 
252 /* This represents descriptors in memory, such as buffer resources,
253  * image resources, and sampler states.
254  */
255 struct si_descriptors {
256 	/* The list of descriptors in malloc'd memory. */
257 	uint32_t *list;
258 	/* The list in mapped GPU memory. */
259 	uint32_t *gpu_list;
260 
261 	/* The buffer where the descriptors have been uploaded. */
262 	struct r600_resource *buffer;
263 	uint64_t gpu_address;
264 
265 	/* The maximum number of descriptors. */
266 	uint32_t num_elements;
267 
268 	/* Slots that are used by currently-bound shaders.
269 	 * It determines which slots are uploaded.
270 	 */
271 	uint32_t first_active_slot;
272 	uint32_t num_active_slots;
273 
274 	/* The SGPR index where the 64-bit pointer to the descriptor array will
275 	 * be stored. */
276 	ubyte shader_userdata_offset;
277 	/* The size of one descriptor. */
278 	ubyte element_dw_size;
279 	/* If there is only one slot enabled, bind it directly instead of
280 	 * uploading descriptors. -1 if disabled. */
281 	signed char slot_index_to_bind_directly;
282 };
283 
284 struct si_buffer_resources {
285 	struct pipe_resource		**buffers; /* this has num_buffers elements */
286 
287 	enum radeon_bo_usage		shader_usage:4; /* READ, WRITE, or READWRITE */
288 	enum radeon_bo_usage		shader_usage_constbuf:4;
289 	enum radeon_bo_priority		priority:6;
290 	enum radeon_bo_priority		priority_constbuf:6;
291 
292 	/* The i-th bit is set if that element is enabled (non-NULL resource). */
293 	unsigned			enabled_mask;
294 };
295 
296 #define si_pm4_block_idx(member) \
297 	(offsetof(union si_state, named.member) / sizeof(struct si_pm4_state *))
298 
299 #define si_pm4_state_changed(sctx, member) \
300 	((sctx)->queued.named.member != (sctx)->emitted.named.member)
301 
302 #define si_pm4_state_enabled_and_changed(sctx, member) \
303 	((sctx)->queued.named.member && si_pm4_state_changed(sctx, member))
304 
305 #define si_pm4_bind_state(sctx, member, value) \
306 	do { \
307 		(sctx)->queued.named.member = (value); \
308 		(sctx)->dirty_states |= 1 << si_pm4_block_idx(member); \
309 	} while(0)
310 
311 #define si_pm4_delete_state(sctx, member, value) \
312 	do { \
313 		if ((sctx)->queued.named.member == (value)) { \
314 			(sctx)->queued.named.member = NULL; \
315 		} \
316 		si_pm4_free_state(sctx, (struct si_pm4_state *)(value), \
317 				  si_pm4_block_idx(member)); \
318 	} while(0)
319 
320 /* si_descriptors.c */
321 void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
322 				    struct r600_texture *tex,
323 				    const struct legacy_surf_level *base_level_info,
324 				    unsigned base_level, unsigned first_level,
325 				    unsigned block_width, bool is_stencil,
326 				    uint32_t *state);
327 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
328 				 uint slot, struct pipe_constant_buffer *cbuf);
329 void si_get_shader_buffers(struct si_context *sctx,
330 			   enum pipe_shader_type shader,
331 			   uint start_slot, uint count,
332 			   struct pipe_shader_buffer *sbuf);
333 void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
334 			struct pipe_resource *buffer,
335 			unsigned stride, unsigned num_records,
336 			bool add_tid, bool swizzle,
337 			unsigned element_size, unsigned index_stride, uint64_t offset);
338 void si_init_all_descriptors(struct si_context *sctx);
339 bool si_upload_vertex_buffer_descriptors(struct si_context *sctx);
340 bool si_upload_graphics_shader_descriptors(struct si_context *sctx);
341 bool si_upload_compute_shader_descriptors(struct si_context *sctx);
342 void si_release_all_descriptors(struct si_context *sctx);
343 void si_all_descriptors_begin_new_cs(struct si_context *sctx);
344 void si_all_resident_buffers_begin_new_cs(struct si_context *sctx);
345 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
346 			    const uint8_t *ptr, unsigned size, uint32_t *const_offset);
347 void si_update_all_texture_descriptors(struct si_context *sctx);
348 void si_shader_change_notify(struct si_context *sctx);
349 void si_update_needs_color_decompress_masks(struct si_context *sctx);
350 void si_emit_graphics_shader_pointers(struct si_context *sctx,
351                                       struct r600_atom *atom);
352 void si_emit_compute_shader_pointers(struct si_context *sctx);
353 void si_set_rw_buffer(struct si_context *sctx,
354 		      uint slot, const struct pipe_constant_buffer *input);
355 void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
356 			       uint64_t new_active_mask);
357 void si_set_active_descriptors_for_shader(struct si_context *sctx,
358 					  struct si_shader_selector *sel);
359 bool si_bindless_descriptor_can_reclaim_slab(void *priv,
360 					     struct pb_slab_entry *entry);
361 struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap,
362 						  unsigned entry_size,
363 						  unsigned group_index);
364 void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab);
365 
366 /* si_state.c */
367 struct si_shader_selector;
368 
369 void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
370 		  struct r600_atom **list_elem,
371 		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state));
372 void si_init_state_functions(struct si_context *sctx);
373 void si_init_screen_state_functions(struct si_screen *sscreen);
374 void
375 si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
376 			  enum pipe_format format,
377 			  unsigned offset, unsigned size,
378 			  uint32_t *state);
379 void
380 si_make_texture_descriptor(struct si_screen *screen,
381 			   struct r600_texture *tex,
382 			   bool sampler,
383 			   enum pipe_texture_target target,
384 			   enum pipe_format pipe_format,
385 			   const unsigned char state_swizzle[4],
386 			   unsigned first_level, unsigned last_level,
387 			   unsigned first_layer, unsigned last_layer,
388 			   unsigned width, unsigned height, unsigned depth,
389 			   uint32_t *state,
390 			   uint32_t *fmask_state);
391 struct pipe_sampler_view *
392 si_create_sampler_view_custom(struct pipe_context *ctx,
393 			      struct pipe_resource *texture,
394 			      const struct pipe_sampler_view *state,
395 			      unsigned width0, unsigned height0,
396 			      unsigned force_level);
397 void si_update_fb_dirtiness_after_rendering(struct si_context *sctx);
398 
399 /* si_state_binning.c */
400 void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state);
401 
402 /* si_state_shaders.c */
403 bool si_update_shaders(struct si_context *sctx);
404 void si_init_shader_functions(struct si_context *sctx);
405 bool si_init_shader_cache(struct si_screen *sscreen);
406 void si_destroy_shader_cache(struct si_screen *sscreen);
407 void si_get_active_slot_masks(const struct tgsi_shader_info *info,
408 			      uint32_t *const_and_shader_buffers,
409 			      uint64_t *samplers_and_images);
410 void *si_get_blit_vs(struct si_context *sctx, enum blitter_attrib_type type,
411 		     unsigned num_layers);
412 
413 /* si_state_draw.c */
414 void si_init_ia_multi_vgt_param_table(struct si_context *sctx);
415 void si_emit_cache_flush(struct si_context *sctx);
416 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo);
417 void si_draw_rectangle(struct blitter_context *blitter,
418 		       void *vertex_elements_cso,
419 		       blitter_get_vs_func get_vs,
420 		       int x1, int y1, int x2, int y2,
421 		       float depth, unsigned num_instances,
422 		       enum blitter_attrib_type type,
423 		       const union blitter_attrib *attrib);
424 void si_trace_emit(struct si_context *sctx);
425 
426 /* si_state_msaa.c */
427 void si_init_msaa_functions(struct si_context *sctx);
428 void si_emit_sample_locations(struct radeon_winsys_cs *cs, int nr_samples);
429 
430 /* si_state_streamout.c */
431 void si_streamout_buffers_dirty(struct si_context *sctx);
432 void si_emit_streamout_end(struct si_context *sctx);
433 void si_update_prims_generated_query_state(struct si_context *sctx,
434 					   unsigned type, int diff);
435 void si_init_streamout_functions(struct si_context *sctx);
436 
437 
438 static inline unsigned
si_tile_mode_index(struct r600_texture * rtex,unsigned level,bool stencil)439 si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
440 {
441 	if (stencil)
442 		return rtex->surface.u.legacy.stencil_tiling_index[level];
443 	else
444 		return rtex->surface.u.legacy.tiling_index[level];
445 }
446 
si_get_constbuf_slot(unsigned slot)447 static inline unsigned si_get_constbuf_slot(unsigned slot)
448 {
449 	/* Constant buffers are in slots [16..31], ascending */
450 	return SI_NUM_SHADER_BUFFERS + slot;
451 }
452 
si_get_shaderbuf_slot(unsigned slot)453 static inline unsigned si_get_shaderbuf_slot(unsigned slot)
454 {
455 	/* shader buffers are in slots [15..0], descending */
456 	return SI_NUM_SHADER_BUFFERS - 1 - slot;
457 }
458 
si_get_sampler_slot(unsigned slot)459 static inline unsigned si_get_sampler_slot(unsigned slot)
460 {
461 	/* samplers are in slots [8..39], ascending */
462 	return SI_NUM_IMAGES / 2 + slot;
463 }
464 
si_get_image_slot(unsigned slot)465 static inline unsigned si_get_image_slot(unsigned slot)
466 {
467 	/* images are in slots [15..0] (sampler slots [7..0]), descending */
468 	return SI_NUM_IMAGES - 1 - slot;
469 }
470 
471 #endif
472