1 /*
2  * Copyright © 2019 Valve Corporation.
3  * Copyright © 2016 Red Hat.
4  * Copyright © 2016 Bas Nieuwenhuizen
5  *
6  * based in part on anv driver which is:
7  * Copyright © 2015 Intel Corporation
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26  * IN THE SOFTWARE.
27  */
28 
29 #include "radv_private.h"
30 #include "radv_shader.h"
31 #include "radv_shader_args.h"
32 
33 static void
set_loc(struct radv_userdata_info * ud_info,uint8_t * sgpr_idx,uint8_t num_sgprs)34 set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx,
35 	uint8_t num_sgprs)
36 {
37 	ud_info->sgpr_idx = *sgpr_idx;
38 	ud_info->num_sgprs = num_sgprs;
39 	*sgpr_idx += num_sgprs;
40 }
41 
42 static void
set_loc_shader(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx,uint8_t num_sgprs)43 set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx,
44 	       uint8_t num_sgprs)
45 {
46 	struct radv_userdata_info *ud_info =
47 		&args->shader_info->user_sgprs_locs.shader_data[idx];
48 	assert(ud_info);
49 
50 	set_loc(ud_info, sgpr_idx, num_sgprs);
51 }
52 
53 static void
set_loc_shader_ptr(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx)54 set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
55 {
56 	bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
57 
58 	set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
59 }
60 
61 static void
set_loc_desc(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx)62 set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
63 {
64 	struct radv_userdata_locations *locs =
65 		&args->shader_info->user_sgprs_locs;
66 	struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
67 	assert(ud_info);
68 
69 	set_loc(ud_info, sgpr_idx, 1);
70 
71 	locs->descriptor_sets_enabled |= 1u << idx;
72 }
73 
74 struct user_sgpr_info {
75 	bool indirect_all_descriptor_sets;
76 	uint8_t remaining_sgprs;
77 };
78 
needs_view_index_sgpr(struct radv_shader_args * args,gl_shader_stage stage)79 static bool needs_view_index_sgpr(struct radv_shader_args *args,
80 				  gl_shader_stage stage)
81 {
82 	switch (stage) {
83 	case MESA_SHADER_VERTEX:
84 		if (args->shader_info->needs_multiview_view_index ||
85 		    (!args->options->key.vs_common_out.as_es && !args->options->key.vs_common_out.as_ls && args->options->key.has_multiview_view_index))
86 			return true;
87 		break;
88 	case MESA_SHADER_TESS_EVAL:
89 		if (args->shader_info->needs_multiview_view_index || (!args->options->key.vs_common_out.as_es && args->options->key.has_multiview_view_index))
90 			return true;
91 		break;
92 	case MESA_SHADER_TESS_CTRL:
93 		if (args->shader_info->needs_multiview_view_index)
94 			return true;
95 		break;
96 	case MESA_SHADER_GEOMETRY:
97 		if (args->shader_info->needs_multiview_view_index ||
98 		    (args->options->key.vs_common_out.as_ngg &&
99 		     args->options->key.has_multiview_view_index))
100 			return true;
101 		break;
102 	default:
103 		break;
104 	}
105 	return false;
106 }
107 
108 static uint8_t
count_vs_user_sgprs(struct radv_shader_args * args)109 count_vs_user_sgprs(struct radv_shader_args *args)
110 {
111 	uint8_t count = 0;
112 
113 	if (args->shader_info->vs.has_vertex_buffers)
114 		count++;
115 	count += args->shader_info->vs.needs_draw_id ? 3 : 2;
116 
117 	return count;
118 }
119 
allocate_inline_push_consts(struct radv_shader_args * args,struct user_sgpr_info * user_sgpr_info)120 static void allocate_inline_push_consts(struct radv_shader_args *args,
121 					struct user_sgpr_info *user_sgpr_info)
122 {
123 	uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
124 
125 	/* Only supported if shaders use push constants. */
126 	if (args->shader_info->min_push_constant_used == UINT8_MAX)
127 		return;
128 
129 	/* Only supported if shaders don't have indirect push constants. */
130 	if (args->shader_info->has_indirect_push_constants)
131 		return;
132 
133 	/* Only supported for 32-bit push constants. */
134 	if (!args->shader_info->has_only_32bit_push_constants)
135 		return;
136 
137 	uint8_t num_push_consts =
138 		(args->shader_info->max_push_constant_used -
139 		 args->shader_info->min_push_constant_used) / 4;
140 
141 	/* Check if the number of user SGPRs is large enough. */
142 	if (num_push_consts < remaining_sgprs) {
143 		args->shader_info->num_inline_push_consts = num_push_consts;
144 	} else {
145 		args->shader_info->num_inline_push_consts = remaining_sgprs;
146 	}
147 
148 	/* Clamp to the maximum number of allowed inlined push constants. */
149 	if (args->shader_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
150 		args->shader_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
151 
152 	if (args->shader_info->num_inline_push_consts == num_push_consts &&
153 	    !args->shader_info->loads_dynamic_offsets) {
154 		/* Disable the default push constants path if all constants are
155 		 * inlined and if shaders don't use dynamic descriptors.
156 		 */
157 		args->shader_info->loads_push_constants = false;
158 	}
159 
160 	args->shader_info->base_inline_push_consts =
161 		args->shader_info->min_push_constant_used / 4;
162 }
163 
allocate_user_sgprs(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage,bool needs_view_index,struct user_sgpr_info * user_sgpr_info)164 static void allocate_user_sgprs(struct radv_shader_args *args,
165 				gl_shader_stage stage,
166 				bool has_previous_stage,
167 				gl_shader_stage previous_stage,
168 				bool needs_view_index,
169 				struct user_sgpr_info *user_sgpr_info)
170 {
171 	uint8_t user_sgpr_count = 0;
172 
173 	memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
174 
175 	/* 2 user sgprs will always be allocated for scratch/rings */
176 	user_sgpr_count += 2;
177 
178 	switch (stage) {
179 	case MESA_SHADER_COMPUTE:
180 		if (args->shader_info->cs.uses_grid_size)
181 			user_sgpr_count += 3;
182 		break;
183 	case MESA_SHADER_FRAGMENT:
184 		user_sgpr_count += args->shader_info->ps.needs_sample_positions;
185 		break;
186 	case MESA_SHADER_VERTEX:
187 		if (!args->is_gs_copy_shader)
188 			user_sgpr_count += count_vs_user_sgprs(args);
189 		break;
190 	case MESA_SHADER_TESS_CTRL:
191 		if (has_previous_stage) {
192 			if (previous_stage == MESA_SHADER_VERTEX)
193 				user_sgpr_count += count_vs_user_sgprs(args);
194 		}
195 		break;
196 	case MESA_SHADER_TESS_EVAL:
197 		break;
198 	case MESA_SHADER_GEOMETRY:
199 		if (has_previous_stage) {
200 			if (previous_stage == MESA_SHADER_VERTEX) {
201 				user_sgpr_count += count_vs_user_sgprs(args);
202 			}
203 		}
204 		break;
205 	default:
206 		break;
207 	}
208 
209 	if (needs_view_index)
210 		user_sgpr_count++;
211 
212 	if (args->shader_info->loads_push_constants)
213 		user_sgpr_count++;
214 
215 	if (args->shader_info->so.num_outputs)
216 		user_sgpr_count++;
217 
218 	uint32_t available_sgprs = args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
219 	uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
220 	uint32_t num_desc_set =
221 		util_bitcount(args->shader_info->desc_set_used_mask);
222 
223 	if (remaining_sgprs < num_desc_set) {
224 		user_sgpr_info->indirect_all_descriptor_sets = true;
225 		user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
226 	} else {
227 		user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
228 	}
229 
230 	allocate_inline_push_consts(args, user_sgpr_info);
231 }
232 
233 static void
declare_global_input_sgprs(struct radv_shader_args * args,const struct user_sgpr_info * user_sgpr_info)234 declare_global_input_sgprs(struct radv_shader_args *args,
235 			   const struct user_sgpr_info *user_sgpr_info)
236 {
237 	/* 1 for each descriptor set */
238 	if (!user_sgpr_info->indirect_all_descriptor_sets) {
239 		uint32_t mask = args->shader_info->desc_set_used_mask;
240 
241 		while (mask) {
242 			int i = u_bit_scan(&mask);
243 
244 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR,
245 				   &args->descriptor_sets[i]);
246 		}
247 	} else {
248 		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR,
249 			   &args->descriptor_sets[0]);
250 	}
251 
252 	if (args->shader_info->loads_push_constants) {
253 		/* 1 for push constants and dynamic descriptors */
254 		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR,
255 			   &args->ac.push_constants);
256 	}
257 
258 	for (unsigned i = 0; i < args->shader_info->num_inline_push_consts; i++) {
259 		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
260 			   &args->ac.inline_push_consts[i]);
261 	}
262 	args->ac.num_inline_push_consts = args->shader_info->num_inline_push_consts;
263 	args->ac.base_inline_push_consts = args->shader_info->base_inline_push_consts;
264 
265 	if (args->shader_info->so.num_outputs) {
266 		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
267 			   &args->streamout_buffers);
268 	}
269 }
270 
271 static void
declare_vs_specific_input_sgprs(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage)272 declare_vs_specific_input_sgprs(struct radv_shader_args *args,
273 				gl_shader_stage stage,
274 				bool has_previous_stage,
275 				gl_shader_stage previous_stage)
276 {
277 	if (!args->is_gs_copy_shader &&
278 	    (stage == MESA_SHADER_VERTEX ||
279 	     (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
280 		if (args->shader_info->vs.has_vertex_buffers) {
281 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
282 				   &args->vertex_buffers);
283 		}
284 		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
285 		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
286 		if (args->shader_info->vs.needs_draw_id) {
287 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
288 		}
289 	}
290 }
291 
292 static void
declare_vs_input_vgprs(struct radv_shader_args * args)293 declare_vs_input_vgprs(struct radv_shader_args *args)
294 {
295 	ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
296 	if (!args->is_gs_copy_shader) {
297 		if (args->options->key.vs_common_out.as_ls) {
298 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->rel_auto_id);
299 			if (args->options->chip_class >= GFX10) {
300 				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
301 				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
302 			} else {
303 				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
304 				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
305 			}
306 		} else {
307 			if (args->options->chip_class >= GFX10) {
308 				if (args->options->key.vs_common_out.as_ngg) {
309 					ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
310 					ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
311 					ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
312 				} else {
313 					ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
314 					ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id);
315 					ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
316 				}
317 			} else {
318 				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
319 				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id);
320 				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
321 			}
322 		}
323 	}
324 }
325 
326 static void
declare_streamout_sgprs(struct radv_shader_args * args,gl_shader_stage stage)327 declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
328 {
329 	int i;
330 
331 	if (args->options->use_ngg_streamout) {
332 		if (stage == MESA_SHADER_TESS_EVAL)
333 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
334 		return;
335 	}
336 
337 	/* Streamout SGPRs. */
338 	if (args->shader_info->so.num_outputs) {
339 		assert(stage == MESA_SHADER_VERTEX ||
340 		       stage == MESA_SHADER_TESS_EVAL);
341 
342 		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_config);
343 		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_write_idx);
344 	} else if (stage == MESA_SHADER_TESS_EVAL) {
345 		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
346 	}
347 
348 	/* A streamout buffer offset is loaded if the stride is non-zero. */
349 	for (i = 0; i < 4; i++) {
350 		if (!args->shader_info->so.strides[i])
351 			continue;
352 
353 		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_offset[i]);
354 	}
355 }
356 
357 static void
declare_tes_input_vgprs(struct radv_shader_args * args)358 declare_tes_input_vgprs(struct radv_shader_args *args)
359 {
360 	ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_u);
361 	ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_v);
362 	ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->tes_rel_patch_id);
363 	ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
364 }
365 
366 static void
set_global_input_locs(struct radv_shader_args * args,const struct user_sgpr_info * user_sgpr_info,uint8_t * user_sgpr_idx)367 set_global_input_locs(struct radv_shader_args *args,
368 		      const struct user_sgpr_info *user_sgpr_info,
369 		      uint8_t *user_sgpr_idx)
370 {
371 	uint32_t mask = args->shader_info->desc_set_used_mask;
372 
373 	if (!user_sgpr_info->indirect_all_descriptor_sets) {
374 		while (mask) {
375 			int i = u_bit_scan(&mask);
376 
377 			set_loc_desc(args, i, user_sgpr_idx);
378 		}
379 	} else {
380 		set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS,
381 				   user_sgpr_idx);
382 
383 		args->shader_info->need_indirect_descriptor_sets = true;
384 	}
385 
386 	if (args->shader_info->loads_push_constants) {
387 		set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
388 	}
389 
390 	if (args->shader_info->num_inline_push_consts) {
391 		set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx,
392 			       args->shader_info->num_inline_push_consts);
393 	}
394 
395 	if (args->streamout_buffers.used) {
396 		set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS,
397 				   user_sgpr_idx);
398 	}
399 }
400 
401 static void
set_vs_specific_input_locs(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage,uint8_t * user_sgpr_idx)402 set_vs_specific_input_locs(struct radv_shader_args *args,
403 			   gl_shader_stage stage, bool has_previous_stage,
404 			   gl_shader_stage previous_stage,
405 			   uint8_t *user_sgpr_idx)
406 {
407 	if (!args->is_gs_copy_shader &&
408 	    (stage == MESA_SHADER_VERTEX ||
409 	     (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
410 		if (args->shader_info->vs.has_vertex_buffers) {
411 			set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS,
412 					   user_sgpr_idx);
413 		}
414 
415 		unsigned vs_num = 2;
416 		if (args->shader_info->vs.needs_draw_id)
417 			vs_num++;
418 
419 		set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE,
420 			       user_sgpr_idx, vs_num);
421 	}
422 }
423 
424 /* Returns whether the stage is a stage that can be directly before the GS */
is_pre_gs_stage(gl_shader_stage stage)425 static bool is_pre_gs_stage(gl_shader_stage stage)
426 {
427 	return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
428 }
429 
430 void
radv_declare_shader_args(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage)431 radv_declare_shader_args(struct radv_shader_args *args,
432 			 gl_shader_stage stage,
433 			 bool has_previous_stage,
434 			 gl_shader_stage previous_stage)
435 {
436 	struct user_sgpr_info user_sgpr_info;
437 	bool needs_view_index = needs_view_index_sgpr(args, stage);
438 
439 	if (args->options->chip_class >= GFX10) {
440 		if (is_pre_gs_stage(stage) && args->options->key.vs_common_out.as_ngg) {
441 			/* On GFX10, VS is merged into GS for NGG. */
442 			previous_stage = stage;
443 			stage = MESA_SHADER_GEOMETRY;
444 			has_previous_stage = true;
445 		}
446 	}
447 
448 	for (int i = 0; i < MAX_SETS; i++)
449 		args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
450 	for (int i = 0; i < AC_UD_MAX_UD; i++)
451 		args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
452 
453 
454 	allocate_user_sgprs(args, stage, has_previous_stage,
455 			    previous_stage, needs_view_index, &user_sgpr_info);
456 
457 	if (args->options->explicit_scratch_args) {
458 		ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR,
459 			   &args->ring_offsets);
460 	}
461 
462 	switch (stage) {
463 	case MESA_SHADER_COMPUTE:
464 		declare_global_input_sgprs(args, &user_sgpr_info);
465 
466 		if (args->shader_info->cs.uses_grid_size) {
467 			ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT,
468 				   &args->ac.num_work_groups);
469 		}
470 
471 		for (int i = 0; i < 3; i++) {
472 			if (args->shader_info->cs.uses_block_id[i]) {
473 				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
474 					   &args->ac.workgroup_ids[i]);
475 			}
476 		}
477 
478 		if (args->shader_info->cs.uses_local_invocation_idx) {
479 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
480 				   &args->ac.tg_size);
481 		}
482 
483 		if (args->options->explicit_scratch_args) {
484 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
485 				   &args->scratch_offset);
486 		}
487 
488 		ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT,
489 			   &args->ac.local_invocation_ids);
490 		break;
491 	case MESA_SHADER_VERTEX:
492 		declare_global_input_sgprs(args, &user_sgpr_info);
493 
494 		declare_vs_specific_input_sgprs(args, stage, has_previous_stage,
495 						previous_stage);
496 
497 		if (needs_view_index) {
498 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
499 				   &args->ac.view_index);
500 		}
501 
502 		if (args->options->key.vs_common_out.as_es) {
503 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
504 				&args->es2gs_offset);
505 		} else if (args->options->key.vs_common_out.as_ls) {
506 			/* no extra parameters */
507 		} else {
508 			declare_streamout_sgprs(args, stage);
509 		}
510 
511 		if (args->options->explicit_scratch_args) {
512 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
513 				   &args->scratch_offset);
514 		}
515 
516 		declare_vs_input_vgprs(args);
517 		break;
518 	case MESA_SHADER_TESS_CTRL:
519 		if (has_previous_stage) {
520 			// First 6 system regs
521 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
522 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
523 				   &args->merged_wave_info);
524 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
525 				   &args->tess_factor_offset);
526 
527 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
528 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
529 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
530 
531 			declare_global_input_sgprs(args, &user_sgpr_info);
532 
533 			declare_vs_specific_input_sgprs(args, stage,
534 							has_previous_stage,
535 							previous_stage);
536 
537 			if (needs_view_index) {
538 				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
539 					   &args->ac.view_index);
540 			}
541 
542 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
543 				  &args->ac.tcs_patch_id);
544 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
545 				   &args->ac.tcs_rel_ids);
546 
547 			declare_vs_input_vgprs(args);
548 		} else {
549 			declare_global_input_sgprs(args, &user_sgpr_info);
550 
551 			if (needs_view_index) {
552 				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
553 					   &args->ac.view_index);
554 			}
555 
556 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
557 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
558 				   &args->tess_factor_offset);
559 			if (args->options->explicit_scratch_args) {
560 				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
561 					   &args->scratch_offset);
562 			}
563 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
564 				   &args->ac.tcs_patch_id);
565 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
566 				   &args->ac.tcs_rel_ids);
567 		}
568 		break;
569 	case MESA_SHADER_TESS_EVAL:
570 		declare_global_input_sgprs(args, &user_sgpr_info);
571 
572 		if (needs_view_index)
573 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
574 				&args->ac.view_index);
575 
576 		if (args->options->key.vs_common_out.as_es) {
577 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
578 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
579 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
580 				&args->es2gs_offset);
581 		} else {
582 			declare_streamout_sgprs(args, stage);
583 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
584 		}
585 		if (args->options->explicit_scratch_args) {
586 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
587 				   &args->scratch_offset);
588 		}
589 		declare_tes_input_vgprs(args);
590 		break;
591 	case MESA_SHADER_GEOMETRY:
592 		if (has_previous_stage) {
593 			// First 6 system regs
594 			if (args->options->key.vs_common_out.as_ngg) {
595 				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
596 					&args->gs_tg_info);
597 			} else {
598 				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
599 					&args->gs2vs_offset);
600 			}
601 
602 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
603 				   &args->merged_wave_info);
604 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
605 
606 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
607 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
608 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
609 
610 			declare_global_input_sgprs(args, &user_sgpr_info);
611 
612 			if (previous_stage != MESA_SHADER_TESS_EVAL) {
613 				declare_vs_specific_input_sgprs(args, stage,
614 								has_previous_stage,
615 								previous_stage);
616 			}
617 
618 			if (needs_view_index) {
619 				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
620 					   &args->ac.view_index);
621 			}
622 
623 			if (args->options->key.vs_common_out.as_ngg) {
624 				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
625 					   &args->ngg_gs_state);
626 			}
627 
628 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
629 				   &args->gs_vtx_offset[0]);
630 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
631 				   &args->gs_vtx_offset[2]);
632 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
633 				   &args->ac.gs_prim_id);
634 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
635 				   &args->ac.gs_invocation_id);
636 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
637 				   &args->gs_vtx_offset[4]);
638 
639 			if (previous_stage == MESA_SHADER_VERTEX) {
640 				declare_vs_input_vgprs(args);
641 			} else {
642 				declare_tes_input_vgprs(args);
643 			}
644 		} else {
645 			declare_global_input_sgprs(args, &user_sgpr_info);
646 
647 			if (needs_view_index) {
648 				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
649 					   &args->ac.view_index);
650 			}
651 
652 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset);
653 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id);
654 			if (args->options->explicit_scratch_args) {
655 				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
656 					   &args->scratch_offset);
657 			}
658 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
659 				   &args->gs_vtx_offset[0]);
660 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
661 				   &args->gs_vtx_offset[1]);
662 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
663 				   &args->ac.gs_prim_id);
664 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
665 				   &args->gs_vtx_offset[2]);
666 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
667 				   &args->gs_vtx_offset[3]);
668 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
669 				   &args->gs_vtx_offset[4]);
670 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
671 				   &args->gs_vtx_offset[5]);
672 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
673 				   &args->ac.gs_invocation_id);
674 		}
675 		break;
676 	case MESA_SHADER_FRAGMENT:
677 		declare_global_input_sgprs(args, &user_sgpr_info);
678 
679 		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
680 		if (args->options->explicit_scratch_args) {
681 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
682 				   &args->scratch_offset);
683 		}
684 		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
685 		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
686 		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
687 		ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
688 		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
689 		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
690 		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
691 		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);  /* line stipple tex */
692 		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
693 		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
694 		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
695 		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
696 		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
697 		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
698 		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
699 		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);  /* fixed pt */
700 		break;
701 	default:
702 		unreachable("Shader stage not implemented");
703 	}
704 
705 	args->shader_info->num_input_vgprs = 0;
706 	args->shader_info->num_input_sgprs = 2;
707 	args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
708 	args->shader_info->num_input_vgprs = args->ac.num_vgprs_used;
709 
710 	uint8_t user_sgpr_idx = 0;
711 
712 	set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
713 			   &user_sgpr_idx);
714 
715 	/* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
716 	 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
717 	if (has_previous_stage)
718 		user_sgpr_idx = 0;
719 
720 	set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
721 
722 	switch (stage) {
723 	case MESA_SHADER_COMPUTE:
724 		if (args->shader_info->cs.uses_grid_size) {
725 			set_loc_shader(args, AC_UD_CS_GRID_SIZE,
726 				       &user_sgpr_idx, 3);
727 		}
728 		break;
729 	case MESA_SHADER_VERTEX:
730 		set_vs_specific_input_locs(args, stage, has_previous_stage,
731 					   previous_stage, &user_sgpr_idx);
732 		if (args->ac.view_index.used)
733 			set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
734 		break;
735 	case MESA_SHADER_TESS_CTRL:
736 		set_vs_specific_input_locs(args, stage, has_previous_stage,
737 					   previous_stage, &user_sgpr_idx);
738 		if (args->ac.view_index.used)
739 			set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
740 		break;
741 	case MESA_SHADER_TESS_EVAL:
742 		if (args->ac.view_index.used)
743 			set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
744 		break;
745 	case MESA_SHADER_GEOMETRY:
746 		if (has_previous_stage) {
747 			if (previous_stage == MESA_SHADER_VERTEX)
748 				set_vs_specific_input_locs(args, stage,
749 							   has_previous_stage,
750 							   previous_stage,
751 							   &user_sgpr_idx);
752 		}
753 		if (args->ac.view_index.used)
754 			set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
755 
756 		if (args->ngg_gs_state.used)
757 			set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1);
758 		break;
759 	case MESA_SHADER_FRAGMENT:
760 		break;
761 	default:
762 		unreachable("Shader stage not implemented");
763 	}
764 
765 	args->shader_info->num_user_sgprs = user_sgpr_idx;
766 }
767 
768