1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "si_shader_internal.h"
25 #include "si_pipe.h"
26 
27 #include "gallivm/lp_bld_const.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_misc.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39 
40 #include <stdio.h>
41 #include <llvm-c/Transforms/IPO.h>
42 #include <llvm-c/Transforms/Scalar.h>
43 
44 enum si_llvm_calling_convention {
45 	RADEON_LLVM_AMDGPU_VS = 87,
46 	RADEON_LLVM_AMDGPU_GS = 88,
47 	RADEON_LLVM_AMDGPU_PS = 89,
48 	RADEON_LLVM_AMDGPU_CS = 90,
49 	RADEON_LLVM_AMDGPU_HS = 93,
50 };
51 
si_llvm_add_attribute(LLVMValueRef F,const char * name,int value)52 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
53 {
54 	char str[16];
55 
56 	snprintf(str, sizeof(str), "%i", value);
57 	LLVMAddTargetDependentFunctionAttr(F, name, str);
58 }
59 
60 struct si_llvm_diagnostics {
61 	struct pipe_debug_callback *debug;
62 	unsigned retval;
63 };
64 
si_diagnostic_handler(LLVMDiagnosticInfoRef di,void * context)65 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
66 {
67 	struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
68 	LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
69 	char *description = LLVMGetDiagInfoDescription(di);
70 	const char *severity_str = NULL;
71 
72 	switch (severity) {
73 	case LLVMDSError:
74 		severity_str = "error";
75 		break;
76 	case LLVMDSWarning:
77 		severity_str = "warning";
78 		break;
79 	case LLVMDSRemark:
80 		severity_str = "remark";
81 		break;
82 	case LLVMDSNote:
83 		severity_str = "note";
84 		break;
85 	default:
86 		severity_str = "unknown";
87 	}
88 
89 	pipe_debug_message(diag->debug, SHADER_INFO,
90 			   "LLVM diagnostic (%s): %s", severity_str, description);
91 
92 	if (severity == LLVMDSError) {
93 		diag->retval = 1;
94 		fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
95 	}
96 
97 	LLVMDisposeMessage(description);
98 }
99 
100 /**
101  * Compile an LLVM module to machine code.
102  *
103  * @returns 0 for success, 1 for failure
104  */
si_llvm_compile(LLVMModuleRef M,struct ac_shader_binary * binary,LLVMTargetMachineRef tm,struct pipe_debug_callback * debug)105 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
106 			 LLVMTargetMachineRef tm,
107 			 struct pipe_debug_callback *debug)
108 {
109 	struct si_llvm_diagnostics diag;
110 	char *err;
111 	LLVMContextRef llvm_ctx;
112 	LLVMMemoryBufferRef out_buffer;
113 	unsigned buffer_size;
114 	const char *buffer_data;
115 	LLVMBool mem_err;
116 
117 	diag.debug = debug;
118 	diag.retval = 0;
119 
120 	/* Setup Diagnostic Handler*/
121 	llvm_ctx = LLVMGetModuleContext(M);
122 
123 	LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
124 
125 	/* Compile IR*/
126 	mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
127 								 &out_buffer);
128 
129 	/* Process Errors/Warnings */
130 	if (mem_err) {
131 		fprintf(stderr, "%s: %s", __FUNCTION__, err);
132 		pipe_debug_message(debug, SHADER_INFO,
133 				   "LLVM emit error: %s", err);
134 		FREE(err);
135 		diag.retval = 1;
136 		goto out;
137 	}
138 
139 	/* Extract Shader Code*/
140 	buffer_size = LLVMGetBufferSize(out_buffer);
141 	buffer_data = LLVMGetBufferStart(out_buffer);
142 
143 	if (!ac_elf_read(buffer_data, buffer_size, binary)) {
144 		fprintf(stderr, "radeonsi: cannot read an ELF shader binary\n");
145 		diag.retval = 1;
146 	}
147 
148 	/* Clean up */
149 	LLVMDisposeMemoryBuffer(out_buffer);
150 
151 out:
152 	if (diag.retval != 0)
153 		pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
154 	return diag.retval;
155 }
156 
tgsi2llvmtype(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type type)157 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
158 			  enum tgsi_opcode_type type)
159 {
160 	struct si_shader_context *ctx = si_shader_context(bld_base);
161 
162 	switch (type) {
163 	case TGSI_TYPE_UNSIGNED:
164 	case TGSI_TYPE_SIGNED:
165 		return ctx->ac.i32;
166 	case TGSI_TYPE_UNSIGNED64:
167 	case TGSI_TYPE_SIGNED64:
168 		return ctx->ac.i64;
169 	case TGSI_TYPE_DOUBLE:
170 		return ctx->ac.f64;
171 	case TGSI_TYPE_UNTYPED:
172 	case TGSI_TYPE_FLOAT:
173 		return ctx->ac.f32;
174 	default: break;
175 	}
176 	return 0;
177 }
178 
bitcast(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type type,LLVMValueRef value)179 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
180 		     enum tgsi_opcode_type type, LLVMValueRef value)
181 {
182 	struct si_shader_context *ctx = si_shader_context(bld_base);
183 	LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
184 
185 	if (dst_type)
186 		return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, "");
187 	else
188 		return value;
189 }
190 
191 /**
192  * Return a value that is equal to the given i32 \p index if it lies in [0,num)
193  * or an undefined value in the same interval otherwise.
194  */
si_llvm_bound_index(struct si_shader_context * ctx,LLVMValueRef index,unsigned num)195 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
196 				 LLVMValueRef index,
197 				 unsigned num)
198 {
199 	LLVMBuilderRef builder = ctx->ac.builder;
200 	LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
201 	LLVMValueRef cc;
202 
203 	if (util_is_power_of_two(num)) {
204 		index = LLVMBuildAnd(builder, index, c_max, "");
205 	} else {
206 		/* In theory, this MAX pattern should result in code that is
207 		 * as good as the bit-wise AND above.
208 		 *
209 		 * In practice, LLVM generates worse code (at the time of
210 		 * writing), because its value tracking is not strong enough.
211 		 */
212 		cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
213 		index = LLVMBuildSelect(builder, cc, index, c_max, "");
214 	}
215 
216 	return index;
217 }
218 
emit_swizzle(struct lp_build_tgsi_context * bld_base,LLVMValueRef value,unsigned swizzle_x,unsigned swizzle_y,unsigned swizzle_z,unsigned swizzle_w)219 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
220 				 LLVMValueRef value,
221 				 unsigned swizzle_x,
222 				 unsigned swizzle_y,
223 				 unsigned swizzle_z,
224 				 unsigned swizzle_w)
225 {
226 	struct si_shader_context *ctx = si_shader_context(bld_base);
227 	LLVMValueRef swizzles[4];
228 
229 	swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0);
230 	swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0);
231 	swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0);
232 	swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0);
233 
234 	return LLVMBuildShuffleVector(ctx->ac.builder,
235 				      value,
236 				      LLVMGetUndef(LLVMTypeOf(value)),
237 				      LLVMConstVector(swizzles, 4), "");
238 }
239 
240 /**
241  * Return the description of the array covering the given temporary register
242  * index.
243  */
244 static unsigned
get_temp_array_id(struct lp_build_tgsi_context * bld_base,unsigned reg_index,const struct tgsi_ind_register * reg)245 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
246 		  unsigned reg_index,
247 		  const struct tgsi_ind_register *reg)
248 {
249 	struct si_shader_context *ctx = si_shader_context(bld_base);
250 	unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
251 	unsigned i;
252 
253 	if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
254 		return reg->ArrayID;
255 
256 	for (i = 0; i < num_arrays; i++) {
257 		const struct tgsi_array_info *array = &ctx->temp_arrays[i];
258 
259 		if (reg_index >= array->range.First && reg_index <= array->range.Last)
260 			return i + 1;
261 	}
262 
263 	return 0;
264 }
265 
266 static struct tgsi_declaration_range
get_array_range(struct lp_build_tgsi_context * bld_base,unsigned File,unsigned reg_index,const struct tgsi_ind_register * reg)267 get_array_range(struct lp_build_tgsi_context *bld_base,
268 		unsigned File, unsigned reg_index,
269 		const struct tgsi_ind_register *reg)
270 {
271 	struct si_shader_context *ctx = si_shader_context(bld_base);
272 	struct tgsi_declaration_range range;
273 
274 	if (File == TGSI_FILE_TEMPORARY) {
275 		unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
276 		if (array_id)
277 			return ctx->temp_arrays[array_id - 1].range;
278 	}
279 
280 	range.First = 0;
281 	range.Last = bld_base->info->file_max[File];
282 	return range;
283 }
284 
285 /**
286  * For indirect registers, construct a pointer directly to the requested
287  * element using getelementptr if possible.
288  *
289  * Returns NULL if the insertelement/extractelement fallback for array access
290  * must be used.
291  */
292 static LLVMValueRef
get_pointer_into_array(struct si_shader_context * ctx,unsigned file,unsigned swizzle,unsigned reg_index,const struct tgsi_ind_register * reg_indirect)293 get_pointer_into_array(struct si_shader_context *ctx,
294 		       unsigned file,
295 		       unsigned swizzle,
296 		       unsigned reg_index,
297 		       const struct tgsi_ind_register *reg_indirect)
298 {
299 	unsigned array_id;
300 	struct tgsi_array_info *array;
301 	LLVMBuilderRef builder = ctx->ac.builder;
302 	LLVMValueRef idxs[2];
303 	LLVMValueRef index;
304 	LLVMValueRef alloca;
305 
306 	if (file != TGSI_FILE_TEMPORARY)
307 		return NULL;
308 
309 	array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
310 	if (!array_id)
311 		return NULL;
312 
313 	alloca = ctx->temp_array_allocas[array_id - 1];
314 	if (!alloca)
315 		return NULL;
316 
317 	array = &ctx->temp_arrays[array_id - 1];
318 
319 	if (!(array->writemask & (1 << swizzle)))
320 		return ctx->undef_alloca;
321 
322 	index = si_get_indirect_index(ctx, reg_indirect, 1,
323 				      reg_index - ctx->temp_arrays[array_id - 1].range.First);
324 
325 	/* Ensure that the index is within a valid range, to guard against
326 	 * VM faults and overwriting critical data (e.g. spilled resource
327 	 * descriptors).
328 	 *
329 	 * TODO It should be possible to avoid the additional instructions
330 	 * if LLVM is changed so that it guarantuees:
331 	 * 1. the scratch space descriptor isolates the current wave (this
332 	 *    could even save the scratch offset SGPR at the cost of an
333 	 *    additional SALU instruction)
334 	 * 2. the memory for allocas must be allocated at the _end_ of the
335 	 *    scratch space (after spilled registers)
336 	 */
337 	index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
338 
339 	index = LLVMBuildMul(
340 		builder, index,
341 		LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
342 		"");
343 	index = LLVMBuildAdd(
344 		builder, index,
345 		LLVMConstInt(ctx->i32,
346 			     util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
347 		"");
348 	idxs[0] = ctx->i32_0;
349 	idxs[1] = index;
350 	return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
351 }
352 
353 LLVMValueRef
si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context * bld_base,LLVMTypeRef type,LLVMValueRef ptr,LLVMValueRef ptr2)354 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
355 			 LLVMTypeRef type,
356 			 LLVMValueRef ptr,
357 			 LLVMValueRef ptr2)
358 {
359 	struct si_shader_context *ctx = si_shader_context(bld_base);
360 	LLVMValueRef result;
361 
362 	result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
363 
364 	result = LLVMBuildInsertElement(ctx->ac.builder,
365 					result,
366 					ac_to_integer(&ctx->ac, ptr),
367 					ctx->i32_0, "");
368 	result = LLVMBuildInsertElement(ctx->ac.builder,
369 					result,
370 					ac_to_integer(&ctx->ac, ptr2),
371 					ctx->i32_1, "");
372 	return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
373 }
374 
375 static LLVMValueRef
emit_array_fetch(struct lp_build_tgsi_context * bld_base,unsigned File,enum tgsi_opcode_type type,struct tgsi_declaration_range range,unsigned swizzle)376 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
377 		 unsigned File, enum tgsi_opcode_type type,
378 		 struct tgsi_declaration_range range,
379 		 unsigned swizzle)
380 {
381 	struct si_shader_context *ctx = si_shader_context(bld_base);
382 	unsigned i, size = range.Last - range.First + 1;
383 	LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
384 	LLVMValueRef result = LLVMGetUndef(vec);
385 
386 	struct tgsi_full_src_register tmp_reg = {};
387 	tmp_reg.Register.File = File;
388 
389 	for (i = 0; i < size; ++i) {
390 		tmp_reg.Register.Index = i + range.First;
391 		LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
392 		result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
393 			LLVMConstInt(ctx->i32, i, 0), "array_vector");
394 	}
395 	return result;
396 }
397 
398 static LLVMValueRef
load_value_from_array(struct lp_build_tgsi_context * bld_base,unsigned file,enum tgsi_opcode_type type,unsigned swizzle,unsigned reg_index,const struct tgsi_ind_register * reg_indirect)399 load_value_from_array(struct lp_build_tgsi_context *bld_base,
400 		      unsigned file,
401 		      enum tgsi_opcode_type type,
402 		      unsigned swizzle,
403 		      unsigned reg_index,
404 		      const struct tgsi_ind_register *reg_indirect)
405 {
406 	struct si_shader_context *ctx = si_shader_context(bld_base);
407 	LLVMBuilderRef builder = ctx->ac.builder;
408 	LLVMValueRef ptr;
409 
410 	ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
411 	if (ptr) {
412 		LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
413 		if (tgsi_type_is_64bit(type)) {
414 			LLVMValueRef ptr_hi, val_hi;
415 			ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
416 			val_hi = LLVMBuildLoad(builder, ptr_hi, "");
417 			val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
418 						       val, val_hi);
419 		}
420 
421 		return val;
422 	} else {
423 		struct tgsi_declaration_range range =
424 			get_array_range(bld_base, file, reg_index, reg_indirect);
425 		LLVMValueRef index =
426 			si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
427 		LLVMValueRef array =
428 			emit_array_fetch(bld_base, file, type, range, swizzle);
429 		return LLVMBuildExtractElement(builder, array, index, "");
430 	}
431 }
432 
433 static void
store_value_to_array(struct lp_build_tgsi_context * bld_base,LLVMValueRef value,unsigned file,unsigned chan_index,unsigned reg_index,const struct tgsi_ind_register * reg_indirect)434 store_value_to_array(struct lp_build_tgsi_context *bld_base,
435 		     LLVMValueRef value,
436 		     unsigned file,
437 		     unsigned chan_index,
438 		     unsigned reg_index,
439 		     const struct tgsi_ind_register *reg_indirect)
440 {
441 	struct si_shader_context *ctx = si_shader_context(bld_base);
442 	LLVMBuilderRef builder = ctx->ac.builder;
443 	LLVMValueRef ptr;
444 
445 	ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
446 	if (ptr) {
447 		LLVMBuildStore(builder, value, ptr);
448 	} else {
449 		unsigned i, size;
450 		struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
451 		LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
452 		LLVMValueRef array =
453 			emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
454 		LLVMValueRef temp_ptr;
455 
456 		array = LLVMBuildInsertElement(builder, array, value, index, "");
457 
458 		size = range.Last - range.First + 1;
459 		for (i = 0; i < size; ++i) {
460 			switch(file) {
461 			case TGSI_FILE_OUTPUT:
462 				temp_ptr = ctx->outputs[i + range.First][chan_index];
463 				break;
464 
465 			case TGSI_FILE_TEMPORARY:
466 				if (range.First + i >= ctx->temps_count)
467 					continue;
468 				temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
469 				break;
470 
471 			default:
472 				continue;
473 			}
474 			value = LLVMBuildExtractElement(builder, array,
475 				LLVMConstInt(ctx->i32, i, 0), "");
476 			LLVMBuildStore(builder, value, temp_ptr);
477 		}
478 	}
479 }
480 
481 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
482  * reload them at each use. This must be true if the shader is using
483  * derivatives and KILL, because KILL can leave the WQM and then a lazy
484  * input load isn't in the WQM anymore.
485  */
si_preload_fs_inputs(struct si_shader_context * ctx)486 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
487 {
488 	struct si_shader_selector *sel = ctx->shader->selector;
489 
490 	return sel->info.uses_derivatives &&
491 	       sel->info.uses_kill;
492 }
493 
494 static LLVMValueRef
get_output_ptr(struct lp_build_tgsi_context * bld_base,unsigned index,unsigned chan)495 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
496 	       unsigned chan)
497 {
498 	struct si_shader_context *ctx = si_shader_context(bld_base);
499 
500 	assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
501 	return ctx->outputs[index][chan];
502 }
503 
si_llvm_emit_fetch(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type type,unsigned swizzle)504 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
505 				const struct tgsi_full_src_register *reg,
506 				enum tgsi_opcode_type type,
507 				unsigned swizzle)
508 {
509 	struct si_shader_context *ctx = si_shader_context(bld_base);
510 	LLVMBuilderRef builder = ctx->ac.builder;
511 	LLVMValueRef result = NULL, ptr, ptr2;
512 
513 	if (swizzle == ~0) {
514 		LLVMValueRef values[TGSI_NUM_CHANNELS];
515 		unsigned chan;
516 		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
517 			values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
518 		}
519 		return lp_build_gather_values(&ctx->gallivm, values,
520 					      TGSI_NUM_CHANNELS);
521 	}
522 
523 	if (reg->Register.Indirect) {
524 		LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
525 				swizzle, reg->Register.Index, &reg->Indirect);
526 		return bitcast(bld_base, type, load);
527 	}
528 
529 	switch(reg->Register.File) {
530 	case TGSI_FILE_IMMEDIATE: {
531 		LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
532 		if (tgsi_type_is_64bit(type)) {
533 			result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
534 			result = LLVMConstInsertElement(result,
535 							ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
536 							ctx->i32_0);
537 			result = LLVMConstInsertElement(result,
538 							ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
539 							ctx->i32_1);
540 			return LLVMConstBitCast(result, ctype);
541 		} else {
542 			return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
543 		}
544 	}
545 
546 	case TGSI_FILE_INPUT: {
547 		unsigned index = reg->Register.Index;
548 		LLVMValueRef input[4];
549 
550 		/* I don't think doing this for vertex shaders is beneficial.
551 		 * For those, we want to make sure the VMEM loads are executed
552 		 * only once. Fragment shaders don't care much, because
553 		 * v_interp instructions are much cheaper than VMEM loads.
554 		 */
555 		if (!si_preload_fs_inputs(ctx) &&
556 		    ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
557 			ctx->load_input(ctx, index, &ctx->input_decls[index], input);
558 		else
559 			memcpy(input, &ctx->inputs[index * 4], sizeof(input));
560 
561 		result = input[swizzle];
562 
563 		if (tgsi_type_is_64bit(type)) {
564 			ptr = result;
565 			ptr2 = input[swizzle + 1];
566 			return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
567 							ptr, ptr2);
568 		}
569 		break;
570 	}
571 
572 	case TGSI_FILE_TEMPORARY:
573 		if (reg->Register.Index >= ctx->temps_count)
574 			return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
575 		ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
576 		if (tgsi_type_is_64bit(type)) {
577 			ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
578 			return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
579 							LLVMBuildLoad(builder, ptr, ""),
580 							LLVMBuildLoad(builder, ptr2, ""));
581 		}
582 		result = LLVMBuildLoad(builder, ptr, "");
583 		break;
584 
585 	case TGSI_FILE_OUTPUT:
586 		ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
587 		if (tgsi_type_is_64bit(type)) {
588 			ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
589 			return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
590 							LLVMBuildLoad(builder, ptr, ""),
591 							LLVMBuildLoad(builder, ptr2, ""));
592 		}
593 		result = LLVMBuildLoad(builder, ptr, "");
594 		break;
595 
596 	default:
597 		return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
598 	}
599 
600 	return bitcast(bld_base, type, result);
601 }
602 
fetch_system_value(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type type,unsigned swizzle)603 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
604 				       const struct tgsi_full_src_register *reg,
605 				       enum tgsi_opcode_type type,
606 				       unsigned swizzle)
607 {
608 	struct si_shader_context *ctx = si_shader_context(bld_base);
609 	LLVMBuilderRef builder = ctx->ac.builder;
610 	LLVMValueRef cval = ctx->system_values[reg->Register.Index];
611 
612 	if (tgsi_type_is_64bit(type)) {
613 		LLVMValueRef lo, hi;
614 
615 		assert(swizzle == 0 || swizzle == 2);
616 
617 		lo = LLVMBuildExtractElement(
618 			builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
619 		hi = LLVMBuildExtractElement(
620 			builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
621 
622 		return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
623 						lo, hi);
624 	}
625 
626 	if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
627 		cval = LLVMBuildExtractElement(
628 			builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
629 	} else {
630 		assert(swizzle == 0);
631 	}
632 
633 	return bitcast(bld_base, type, cval);
634 }
635 
emit_declaration(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_declaration * decl)636 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
637 			     const struct tgsi_full_declaration *decl)
638 {
639 	struct si_shader_context *ctx = si_shader_context(bld_base);
640 	LLVMBuilderRef builder = ctx->ac.builder;
641 	unsigned first, last, i;
642 	switch(decl->Declaration.File) {
643 	case TGSI_FILE_ADDRESS:
644 	{
645 		 unsigned idx;
646 		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
647 			unsigned chan;
648 			for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
649 				 ctx->addrs[idx][chan] = lp_build_alloca_undef(
650 					&ctx->gallivm,
651 					ctx->i32, "");
652 			}
653 		}
654 		break;
655 	}
656 
657 	case TGSI_FILE_TEMPORARY:
658 	{
659 		char name[16] = "";
660 		LLVMValueRef array_alloca = NULL;
661 		unsigned decl_size;
662 		unsigned writemask = decl->Declaration.UsageMask;
663 		first = decl->Range.First;
664 		last = decl->Range.Last;
665 		decl_size = 4 * ((last - first) + 1);
666 
667 		if (decl->Declaration.Array) {
668 			unsigned id = decl->Array.ArrayID - 1;
669 			unsigned array_size;
670 
671 			writemask &= ctx->temp_arrays[id].writemask;
672 			ctx->temp_arrays[id].writemask = writemask;
673 			array_size = ((last - first) + 1) * util_bitcount(writemask);
674 
675 			/* If the array has more than 16 elements, store it
676 			 * in memory using an alloca that spans the entire
677 			 * array.
678 			 *
679 			 * Otherwise, store each array element individually.
680 			 * We will then generate vectors (per-channel, up to
681 			 * <16 x float> if the usagemask is a single bit) for
682 			 * indirect addressing.
683 			 *
684 			 * Note that 16 is the number of vector elements that
685 			 * LLVM will store in a register, so theoretically an
686 			 * array with up to 4 * 16 = 64 elements could be
687 			 * handled this way, but whether that's a good idea
688 			 * depends on VGPR register pressure elsewhere.
689 			 *
690 			 * FIXME: We shouldn't need to have the non-alloca
691 			 * code path for arrays. LLVM should be smart enough to
692 			 * promote allocas into registers when profitable.
693 			 */
694 			if (array_size > 16 ||
695 			    !ctx->screen->llvm_has_working_vgpr_indexing) {
696 				array_alloca = lp_build_alloca_undef(&ctx->gallivm,
697 					LLVMArrayType(ctx->f32,
698 						      array_size), "array");
699 				ctx->temp_array_allocas[id] = array_alloca;
700 			}
701 		}
702 
703 		if (!ctx->temps_count) {
704 			ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
705 			ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
706 		}
707 		if (!array_alloca) {
708 			for (i = 0; i < decl_size; ++i) {
709 #ifdef DEBUG
710 				snprintf(name, sizeof(name), "TEMP%d.%c",
711 					 first + i / 4, "xyzw"[i % 4]);
712 #endif
713 				ctx->temps[first * TGSI_NUM_CHANNELS + i] =
714 					lp_build_alloca_undef(&ctx->gallivm,
715 							      ctx->f32,
716 							      name);
717 			}
718 		} else {
719 			LLVMValueRef idxs[2] = {
720 				ctx->i32_0,
721 				NULL
722 			};
723 			unsigned j = 0;
724 
725 			if (writemask != TGSI_WRITEMASK_XYZW &&
726 			    !ctx->undef_alloca) {
727 				/* Create a dummy alloca. We use it so that we
728 				 * have a pointer that is safe to load from if
729 				 * a shader ever reads from a channel that
730 				 * it never writes to.
731 				 */
732 				ctx->undef_alloca = lp_build_alloca_undef(
733 					&ctx->gallivm,
734 					ctx->f32, "undef");
735 			}
736 
737 			for (i = 0; i < decl_size; ++i) {
738 				LLVMValueRef ptr;
739 				if (writemask & (1 << (i % 4))) {
740 #ifdef DEBUG
741 					snprintf(name, sizeof(name), "TEMP%d.%c",
742 						 first + i / 4, "xyzw"[i % 4]);
743 #endif
744 					idxs[1] = LLVMConstInt(ctx->i32, j, 0);
745 					ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
746 					j++;
747 				} else {
748 					ptr = ctx->undef_alloca;
749 				}
750 				ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
751 			}
752 		}
753 		break;
754 	}
755 	case TGSI_FILE_INPUT:
756 	{
757 		unsigned idx;
758 		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
759 			if (ctx->load_input &&
760 			    ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
761 				ctx->input_decls[idx] = *decl;
762 				ctx->input_decls[idx].Range.First = idx;
763 				ctx->input_decls[idx].Range.Last = idx;
764 				ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
765 
766 				if (si_preload_fs_inputs(ctx) ||
767 				    bld_base->info->processor != PIPE_SHADER_FRAGMENT)
768 					ctx->load_input(ctx, idx, &ctx->input_decls[idx],
769 							&ctx->inputs[idx * 4]);
770 			}
771 		}
772 	}
773 	break;
774 
775 	case TGSI_FILE_SYSTEM_VALUE:
776 	{
777 		unsigned idx;
778 		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
779 			si_load_system_value(ctx, idx, decl);
780 		}
781 	}
782 	break;
783 
784 	case TGSI_FILE_OUTPUT:
785 	{
786 		char name[16] = "";
787 		unsigned idx;
788 		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
789 			unsigned chan;
790 			assert(idx < RADEON_LLVM_MAX_OUTPUTS);
791 			if (ctx->outputs[idx][0])
792 				continue;
793 			for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
794 #ifdef DEBUG
795 				snprintf(name, sizeof(name), "OUT%d.%c",
796 					 idx, "xyzw"[chan % 4]);
797 #endif
798 				ctx->outputs[idx][chan] = lp_build_alloca_undef(
799 					&ctx->gallivm,
800 					ctx->f32, name);
801 			}
802 		}
803 		break;
804 	}
805 
806 	case TGSI_FILE_MEMORY:
807 		si_declare_compute_memory(ctx, decl);
808 		break;
809 
810 	default:
811 		break;
812 	}
813 }
814 
si_llvm_emit_store(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,unsigned index,LLVMValueRef dst[4])815 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
816 			const struct tgsi_full_instruction *inst,
817 			const struct tgsi_opcode_info *info,
818 			unsigned index,
819 			LLVMValueRef dst[4])
820 {
821 	struct si_shader_context *ctx = si_shader_context(bld_base);
822 	const struct tgsi_full_dst_register *reg = &inst->Dst[index];
823 	LLVMBuilderRef builder = ctx->ac.builder;
824 	LLVMValueRef temp_ptr, temp_ptr2 = NULL;
825 	bool is_vec_store = false;
826 	enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
827 
828 	if (dst[0]) {
829 		LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
830 		is_vec_store = (k == LLVMVectorTypeKind);
831 	}
832 
833 	if (is_vec_store) {
834 		LLVMValueRef values[4] = {};
835 		uint32_t writemask = reg->Register.WriteMask;
836 		while (writemask) {
837 			unsigned chan = u_bit_scan(&writemask);
838 			LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
839 			values[chan]  = LLVMBuildExtractElement(ctx->ac.builder,
840 							dst[0], index, "");
841 		}
842 		bld_base->emit_store(bld_base, inst, info, index, values);
843 		return;
844 	}
845 
846 	uint32_t writemask = reg->Register.WriteMask;
847 	while (writemask) {
848 		unsigned chan_index = u_bit_scan(&writemask);
849 		LLVMValueRef value = dst[chan_index];
850 
851 		if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
852 			continue;
853 		if (inst->Instruction.Saturate)
854 			value = ac_build_clamp(&ctx->ac, value);
855 
856 		if (reg->Register.File == TGSI_FILE_ADDRESS) {
857 			temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
858 			LLVMBuildStore(builder, value, temp_ptr);
859 			continue;
860 		}
861 
862 		if (!tgsi_type_is_64bit(dtype))
863 			value = ac_to_float(&ctx->ac, value);
864 
865 		if (reg->Register.Indirect) {
866 			unsigned file = reg->Register.File;
867 			unsigned reg_index = reg->Register.Index;
868 			store_value_to_array(bld_base, value, file, chan_index,
869 					     reg_index, &reg->Indirect);
870 		} else {
871 			switch(reg->Register.File) {
872 			case TGSI_FILE_OUTPUT:
873 				temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
874 				if (tgsi_type_is_64bit(dtype))
875 					temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
876 				break;
877 
878 			case TGSI_FILE_TEMPORARY:
879 			{
880 				if (reg->Register.Index >= ctx->temps_count)
881 					continue;
882 
883 				temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
884 				if (tgsi_type_is_64bit(dtype))
885 					temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
886 
887 				break;
888 			}
889 			default:
890 				return;
891 			}
892 			if (!tgsi_type_is_64bit(dtype))
893 				LLVMBuildStore(builder, value, temp_ptr);
894 			else {
895 				LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
896 								    LLVMVectorType(ctx->i32, 2), "");
897 				LLVMValueRef val2;
898 				value = LLVMBuildExtractElement(builder, ptr,
899 								ctx->i32_0, "");
900 				val2 = LLVMBuildExtractElement(builder, ptr,
901 							       ctx->i32_1, "");
902 
903 				LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
904 				LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
905 			}
906 		}
907 	}
908 }
909 
get_line(int pc)910 static int get_line(int pc)
911 {
912 	/* Subtract 1 so that the number shown is that of the corresponding
913 	 * opcode in the TGSI dump, e.g. an if block has the same suffix as
914 	 * the instruction number of the corresponding TGSI IF.
915 	 */
916 	return pc - 1;
917 }
918 
bgnloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)919 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
920 			 struct lp_build_tgsi_context *bld_base,
921 			 struct lp_build_emit_data *emit_data)
922 {
923 	struct si_shader_context *ctx = si_shader_context(bld_base);
924 	ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc));
925 }
926 
brk_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)927 static void brk_emit(const struct lp_build_tgsi_action *action,
928 		     struct lp_build_tgsi_context *bld_base,
929 		     struct lp_build_emit_data *emit_data)
930 {
931 	struct si_shader_context *ctx = si_shader_context(bld_base);
932 	ac_build_break(&ctx->ac);
933 }
934 
cont_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)935 static void cont_emit(const struct lp_build_tgsi_action *action,
936 		      struct lp_build_tgsi_context *bld_base,
937 		      struct lp_build_emit_data *emit_data)
938 {
939 	struct si_shader_context *ctx = si_shader_context(bld_base);
940 	ac_build_continue(&ctx->ac);
941 }
942 
else_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)943 static void else_emit(const struct lp_build_tgsi_action *action,
944 		      struct lp_build_tgsi_context *bld_base,
945 		      struct lp_build_emit_data *emit_data)
946 {
947 	struct si_shader_context *ctx = si_shader_context(bld_base);
948 	ac_build_else(&ctx->ac, get_line(bld_base->pc));
949 }
950 
endif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)951 static void endif_emit(const struct lp_build_tgsi_action *action,
952 		       struct lp_build_tgsi_context *bld_base,
953 		       struct lp_build_emit_data *emit_data)
954 {
955 	struct si_shader_context *ctx = si_shader_context(bld_base);
956 	ac_build_endif(&ctx->ac, get_line(bld_base->pc));
957 }
958 
endloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)959 static void endloop_emit(const struct lp_build_tgsi_action *action,
960 			 struct lp_build_tgsi_context *bld_base,
961 			 struct lp_build_emit_data *emit_data)
962 {
963 	struct si_shader_context *ctx = si_shader_context(bld_base);
964 	ac_build_endloop(&ctx->ac, get_line(bld_base->pc));
965 }
966 
if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)967 static void if_emit(const struct lp_build_tgsi_action *action,
968 		    struct lp_build_tgsi_context *bld_base,
969 		    struct lp_build_emit_data *emit_data)
970 {
971 	struct si_shader_context *ctx = si_shader_context(bld_base);
972 	ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
973 }
974 
uif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)975 static void uif_emit(const struct lp_build_tgsi_action *action,
976 		     struct lp_build_tgsi_context *bld_base,
977 		     struct lp_build_emit_data *emit_data)
978 {
979 	struct si_shader_context *ctx = si_shader_context(bld_base);
980 	ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
981 }
982 
emit_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_immediate * imm)983 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
984 			   const struct tgsi_full_immediate *imm)
985 {
986 	unsigned i;
987 	struct si_shader_context *ctx = si_shader_context(bld_base);
988 
989 	for (i = 0; i < 4; ++i) {
990 		ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
991 				LLVMConstInt(ctx->i32, imm->u[i].Uint, false   );
992 	}
993 
994 	ctx->imms_num++;
995 }
996 
si_llvm_context_init(struct si_shader_context * ctx,struct si_screen * sscreen,LLVMTargetMachineRef tm)997 void si_llvm_context_init(struct si_shader_context *ctx,
998 			  struct si_screen *sscreen,
999 			  LLVMTargetMachineRef tm)
1000 {
1001 	struct lp_type type;
1002 
1003 	/* Initialize the gallivm object:
1004 	 * We are only using the module, context, and builder fields of this struct.
1005 	 * This should be enough for us to be able to pass our gallivm struct to the
1006 	 * helper functions in the gallivm module.
1007 	 */
1008 	memset(ctx, 0, sizeof(*ctx));
1009 	ctx->screen = sscreen;
1010 	ctx->tm = tm;
1011 
1012 	ctx->gallivm.context = LLVMContextCreate();
1013 	ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1014 						ctx->gallivm.context);
1015 	LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1016 
1017 	LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1018 	char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1019 	LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1020 	LLVMDisposeTargetData(data_layout);
1021 	LLVMDisposeMessage(data_layout_str);
1022 
1023 	bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0;
1024 	enum ac_float_mode float_mode =
1025 		unsafe_fpmath ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
1026 				AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1027 
1028 	ctx->gallivm.builder = ac_create_builder(ctx->gallivm.context,
1029 						 float_mode);
1030 
1031 	ac_llvm_context_init(&ctx->ac, ctx->gallivm.context,
1032 			     sscreen->info.chip_class, sscreen->info.family);
1033 	ctx->ac.module = ctx->gallivm.module;
1034 	ctx->ac.builder = ctx->gallivm.builder;
1035 
1036 	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1037 
1038 	type.floating = true;
1039 	type.fixed = false;
1040 	type.sign = true;
1041 	type.norm = false;
1042 	type.width = 32;
1043 	type.length = 1;
1044 
1045 	lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1046 	lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1047 	lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1048 	type.width *= 2;
1049 	lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1050 	lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1051 	lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1052 
1053 	bld_base->soa = 1;
1054 	bld_base->emit_swizzle = emit_swizzle;
1055 	bld_base->emit_declaration = emit_declaration;
1056 	bld_base->emit_immediate = emit_immediate;
1057 
1058 	/* metadata allowing 2.5 ULP */
1059 	ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->ac.context,
1060 						       "fpmath", 6);
1061 	LLVMValueRef arg = LLVMConstReal(ctx->ac.f32, 2.5);
1062 	ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->ac.context,
1063 						     &arg, 1);
1064 
1065 	bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1066 	bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1067 	bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1068 	bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1069 	bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1070 	bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1071 	bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1072 	bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1073 
1074 	si_shader_context_init_alu(&ctx->bld_base);
1075 	si_shader_context_init_mem(ctx);
1076 
1077 	ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
1078 	ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
1079 	ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
1080 	ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
1081 	ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
1082 	ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
1083 	ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
1084 	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1085 	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1086 	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1087 	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1088 
1089 	ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1090 	ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1091 }
1092 
1093 /* Set the context to a certain TGSI shader. Can be called repeatedly
1094  * to change the shader. */
si_llvm_context_set_tgsi(struct si_shader_context * ctx,struct si_shader * shader)1095 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1096 			      struct si_shader *shader)
1097 {
1098 	const struct tgsi_shader_info *info = NULL;
1099 	const struct tgsi_token *tokens = NULL;
1100 
1101 	if (shader && shader->selector) {
1102 		info = &shader->selector->info;
1103 		tokens = shader->selector->tokens;
1104 	}
1105 
1106 	ctx->shader = shader;
1107 	ctx->type = info ? info->processor : -1;
1108 	ctx->bld_base.info = info;
1109 
1110 	/* Clean up the old contents. */
1111 	FREE(ctx->temp_arrays);
1112 	ctx->temp_arrays = NULL;
1113 	FREE(ctx->temp_array_allocas);
1114 	ctx->temp_array_allocas = NULL;
1115 
1116 	FREE(ctx->imms);
1117 	ctx->imms = NULL;
1118 	ctx->imms_num = 0;
1119 
1120 	FREE(ctx->temps);
1121 	ctx->temps = NULL;
1122 	ctx->temps_count = 0;
1123 
1124 	if (!info || !tokens)
1125 		return;
1126 
1127 	if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1128 		int size = info->array_max[TGSI_FILE_TEMPORARY];
1129 
1130 		ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1131 		ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1132 
1133 		tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1134 				 ctx->temp_arrays);
1135 	}
1136 	if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1137 		int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1138 		ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1139 	}
1140 
1141 	/* Re-set these to start with a clean slate. */
1142 	ctx->bld_base.num_instructions = 0;
1143 	ctx->bld_base.pc = 0;
1144 	memset(ctx->outputs, 0, sizeof(ctx->outputs));
1145 
1146 	ctx->bld_base.emit_store = si_llvm_emit_store;
1147 	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1148 	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1149 	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1150 	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1151 	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1152 
1153 	ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
1154 	ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
1155 	ctx->num_samplers = util_last_bit(info->samplers_declared);
1156 	ctx->num_images = util_last_bit(info->images_declared);
1157 }
1158 
si_llvm_create_func(struct si_shader_context * ctx,const char * name,LLVMTypeRef * return_types,unsigned num_return_elems,LLVMTypeRef * ParamTypes,unsigned ParamCount)1159 void si_llvm_create_func(struct si_shader_context *ctx,
1160 			 const char *name,
1161 			 LLVMTypeRef *return_types, unsigned num_return_elems,
1162 			 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1163 {
1164 	LLVMTypeRef main_fn_type, ret_type;
1165 	LLVMBasicBlockRef main_fn_body;
1166 	enum si_llvm_calling_convention call_conv;
1167 	unsigned real_shader_type;
1168 
1169 	if (num_return_elems)
1170 		ret_type = LLVMStructTypeInContext(ctx->ac.context,
1171 						   return_types,
1172 						   num_return_elems, true);
1173 	else
1174 		ret_type = ctx->voidt;
1175 
1176 	/* Setup the function */
1177 	ctx->return_type = ret_type;
1178 	main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1179 	ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1180 	main_fn_body = LLVMAppendBasicBlockInContext(ctx->ac.context,
1181 			ctx->main_fn, "main_body");
1182 	LLVMPositionBuilderAtEnd(ctx->ac.builder, main_fn_body);
1183 
1184 	real_shader_type = ctx->type;
1185 
1186 	/* LS is merged into HS (TCS), and ES is merged into GS. */
1187 	if (ctx->screen->info.chip_class >= GFX9) {
1188 		if (ctx->shader->key.as_ls)
1189 			real_shader_type = PIPE_SHADER_TESS_CTRL;
1190 		else if (ctx->shader->key.as_es)
1191 			real_shader_type = PIPE_SHADER_GEOMETRY;
1192 	}
1193 
1194 	switch (real_shader_type) {
1195 	case PIPE_SHADER_VERTEX:
1196 	case PIPE_SHADER_TESS_EVAL:
1197 		call_conv = RADEON_LLVM_AMDGPU_VS;
1198 		break;
1199 	case PIPE_SHADER_TESS_CTRL:
1200 		call_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS :
1201 						  RADEON_LLVM_AMDGPU_VS;
1202 		break;
1203 	case PIPE_SHADER_GEOMETRY:
1204 		call_conv = RADEON_LLVM_AMDGPU_GS;
1205 		break;
1206 	case PIPE_SHADER_FRAGMENT:
1207 		call_conv = RADEON_LLVM_AMDGPU_PS;
1208 		break;
1209 	case PIPE_SHADER_COMPUTE:
1210 		call_conv = RADEON_LLVM_AMDGPU_CS;
1211 		break;
1212 	default:
1213 		unreachable("Unhandle shader type");
1214 	}
1215 
1216 	LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1217 }
1218 
si_llvm_optimize_module(struct si_shader_context * ctx)1219 void si_llvm_optimize_module(struct si_shader_context *ctx)
1220 {
1221 	struct gallivm_state *gallivm = &ctx->gallivm;
1222 	const char *triple = LLVMGetTarget(gallivm->module);
1223 	LLVMTargetLibraryInfoRef target_library_info;
1224 
1225 	/* Dump LLVM IR before any optimization passes */
1226 	if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
1227 	    si_can_dump_shader(ctx->screen, ctx->type))
1228 		LLVMDumpModule(ctx->gallivm.module);
1229 
1230 	/* Create the pass manager */
1231 	gallivm->passmgr = LLVMCreatePassManager();
1232 
1233 	target_library_info = gallivm_create_target_library_info(triple);
1234 	LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1235 
1236 	if (si_extra_shader_checks(ctx->screen, ctx->type))
1237 		LLVMAddVerifierPass(gallivm->passmgr);
1238 
1239 	LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1240 
1241 	/* This pass should eliminate all the load and store instructions */
1242 	LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1243 
1244 	/* Add some optimization passes */
1245 	LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1246 	LLVMAddLICMPass(gallivm->passmgr);
1247 	LLVMAddAggressiveDCEPass(gallivm->passmgr);
1248 	LLVMAddCFGSimplificationPass(gallivm->passmgr);
1249 #if HAVE_LLVM >= 0x0400
1250 	/* This is recommended by the instruction combining pass. */
1251 	LLVMAddEarlyCSEMemSSAPass(gallivm->passmgr);
1252 #endif
1253 	LLVMAddInstructionCombiningPass(gallivm->passmgr);
1254 
1255 	/* Run the pass */
1256 	LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1257 
1258 	LLVMDisposeBuilder(ctx->ac.builder);
1259 	LLVMDisposePassManager(gallivm->passmgr);
1260 	gallivm_dispose_target_library_info(target_library_info);
1261 }
1262 
si_llvm_dispose(struct si_shader_context * ctx)1263 void si_llvm_dispose(struct si_shader_context *ctx)
1264 {
1265 	LLVMDisposeModule(ctx->gallivm.module);
1266 	LLVMContextDispose(ctx->gallivm.context);
1267 	FREE(ctx->temp_arrays);
1268 	ctx->temp_arrays = NULL;
1269 	FREE(ctx->temp_array_allocas);
1270 	ctx->temp_array_allocas = NULL;
1271 	FREE(ctx->temps);
1272 	ctx->temps = NULL;
1273 	ctx->temps_count = 0;
1274 	FREE(ctx->imms);
1275 	ctx->imms = NULL;
1276 	ctx->imms_num = 0;
1277 	ac_llvm_context_dispose(&ctx->ac);
1278 }
1279