1 /*
2  * Copyright 2017 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "si_shader_internal.h"
25 #include "si_pipe.h"
26 #include "sid.h"
27 #include "gallivm/lp_bld_arit.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_intr.h"
30 #include "tgsi/tgsi_build.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_util.h"
33 
34 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
35 				struct lp_build_tgsi_context *bld_base,
36 				struct lp_build_emit_data *emit_data);
37 
38 static const struct lp_build_tgsi_action tex_action;
39 
40 /**
41  * Given a v8i32 resource descriptor for a buffer, extract the size of the
42  * buffer in number of elements and return it as an i32.
43  */
get_buffer_size(struct lp_build_tgsi_context * bld_base,LLVMValueRef descriptor)44 static LLVMValueRef get_buffer_size(
45 	struct lp_build_tgsi_context *bld_base,
46 	LLVMValueRef descriptor)
47 {
48 	struct si_shader_context *ctx = si_shader_context(bld_base);
49 	LLVMBuilderRef builder = ctx->ac.builder;
50 	LLVMValueRef size =
51 		LLVMBuildExtractElement(builder, descriptor,
52 					LLVMConstInt(ctx->i32, 2, 0), "");
53 
54 	if (ctx->screen->info.chip_class == VI) {
55 		/* On VI, the descriptor contains the size in bytes,
56 		 * but TXQ must return the size in elements.
57 		 * The stride is always non-zero for resources using TXQ.
58 		 */
59 		LLVMValueRef stride =
60 			LLVMBuildExtractElement(builder, descriptor,
61 						ctx->i32_1, "");
62 		stride = LLVMBuildLShr(builder, stride,
63 				       LLVMConstInt(ctx->i32, 16, 0), "");
64 		stride = LLVMBuildAnd(builder, stride,
65 				      LLVMConstInt(ctx->i32, 0x3FFF, 0), "");
66 
67 		size = LLVMBuildUDiv(builder, size, stride, "");
68 	}
69 
70 	return size;
71 }
72 
73 static LLVMValueRef
shader_buffer_fetch_rsrc(struct si_shader_context * ctx,const struct tgsi_full_src_register * reg,bool ubo)74 shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
75 			 const struct tgsi_full_src_register *reg,
76 			 bool ubo)
77 {
78 	LLVMValueRef index;
79 
80 	if (!reg->Register.Indirect) {
81 		index = LLVMConstInt(ctx->i32, reg->Register.Index, false);
82 	} else {
83 		index = si_get_indirect_index(ctx, &reg->Indirect,
84 					      1, reg->Register.Index);
85 	}
86 
87 	if (ubo)
88 		return ctx->abi.load_ubo(&ctx->abi, index);
89 	else
90 		return ctx->abi.load_ssbo(&ctx->abi, index, false);
91 }
92 
tgsi_is_array_image(unsigned target)93 static bool tgsi_is_array_image(unsigned target)
94 {
95 	return target == TGSI_TEXTURE_3D ||
96 	       target == TGSI_TEXTURE_CUBE ||
97 	       target == TGSI_TEXTURE_1D_ARRAY ||
98 	       target == TGSI_TEXTURE_2D_ARRAY ||
99 	       target == TGSI_TEXTURE_CUBE_ARRAY ||
100 	       target == TGSI_TEXTURE_2D_ARRAY_MSAA;
101 }
102 
103 /**
104  * Given a 256-bit resource descriptor, force the DCC enable bit to off.
105  *
106  * At least on Tonga, executing image stores on images with DCC enabled and
107  * non-trivial can eventually lead to lockups. This can occur when an
108  * application binds an image as read-only but then uses a shader that writes
109  * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
110  * program termination) in this case, but it doesn't cost much to be a bit
111  * nicer: disabling DCC in the shader still leads to undefined results but
112  * avoids the lockup.
113  */
force_dcc_off(struct si_shader_context * ctx,LLVMValueRef rsrc)114 static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
115 				  LLVMValueRef rsrc)
116 {
117 	if (ctx->screen->info.chip_class <= CIK) {
118 		return rsrc;
119 	} else {
120 		LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
121 		LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
122 		LLVMValueRef tmp;
123 
124 		tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
125 		tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
126 		return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
127 	}
128 }
129 
si_load_image_desc(struct si_shader_context * ctx,LLVMValueRef list,LLVMValueRef index,enum ac_descriptor_type desc_type,bool dcc_off)130 LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
131 				LLVMValueRef list, LLVMValueRef index,
132 				enum ac_descriptor_type desc_type, bool dcc_off)
133 {
134 	LLVMBuilderRef builder = ctx->ac.builder;
135 	LLVMValueRef rsrc;
136 
137 	if (desc_type == AC_DESC_BUFFER) {
138 		index = LLVMBuildMul(builder, index,
139 				     LLVMConstInt(ctx->i32, 2, 0), "");
140 		index = LLVMBuildAdd(builder, index,
141 				     ctx->i32_1, "");
142 		list = LLVMBuildPointerCast(builder, list,
143 					    si_const_array(ctx->v4i32, 0), "");
144 	} else {
145 		assert(desc_type == AC_DESC_IMAGE);
146 	}
147 
148 	rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
149 	if (desc_type == AC_DESC_IMAGE && dcc_off)
150 		rsrc = force_dcc_off(ctx, rsrc);
151 	return rsrc;
152 }
153 
154 /**
155  * Load the resource descriptor for \p image.
156  */
157 static void
image_fetch_rsrc(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * image,bool is_store,unsigned target,LLVMValueRef * rsrc)158 image_fetch_rsrc(
159 	struct lp_build_tgsi_context *bld_base,
160 	const struct tgsi_full_src_register *image,
161 	bool is_store, unsigned target,
162 	LLVMValueRef *rsrc)
163 {
164 	struct si_shader_context *ctx = si_shader_context(bld_base);
165 	LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
166 					     ctx->param_samplers_and_images);
167 	LLVMValueRef index;
168 	bool dcc_off = is_store;
169 
170 	if (!image->Register.Indirect) {
171 		const struct tgsi_shader_info *info = bld_base->info;
172 		unsigned images_writemask = info->images_store |
173 					    info->images_atomic;
174 
175 		index = LLVMConstInt(ctx->i32,
176 				     si_get_image_slot(image->Register.Index), 0);
177 
178 		if (images_writemask & (1 << image->Register.Index))
179 			dcc_off = true;
180 	} else {
181 		/* From the GL_ARB_shader_image_load_store extension spec:
182 		 *
183 		 *    If a shader performs an image load, store, or atomic
184 		 *    operation using an image variable declared as an array,
185 		 *    and if the index used to select an individual element is
186 		 *    negative or greater than or equal to the size of the
187 		 *    array, the results of the operation are undefined but may
188 		 *    not lead to termination.
189 		 */
190 		index = si_get_bounded_indirect_index(ctx, &image->Indirect,
191 						      image->Register.Index,
192 						      ctx->num_images);
193 		index = LLVMBuildSub(ctx->ac.builder,
194 				     LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
195 				     index, "");
196 	}
197 
198 	if (image->Register.File != TGSI_FILE_IMAGE) {
199 		/* Bindless descriptors are accessible from a different pair of
200 		 * user SGPR indices.
201 		 */
202 		rsrc_ptr = LLVMGetParam(ctx->main_fn,
203 					ctx->param_bindless_samplers_and_images);
204 		index = lp_build_emit_fetch_src(bld_base, image,
205 						TGSI_TYPE_UNSIGNED, 0);
206 
207 		/* For simplicity, bindless image descriptors use fixed
208 		 * 16-dword slots for now.
209 		 */
210 		index = LLVMBuildMul(ctx->ac.builder, index,
211 				     LLVMConstInt(ctx->i32, 2, 0), "");
212 	}
213 
214 	*rsrc = si_load_image_desc(ctx, rsrc_ptr, index,
215 				   target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE,
216 				   dcc_off);
217 }
218 
image_fetch_coords(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned src,LLVMValueRef desc)219 static LLVMValueRef image_fetch_coords(
220 		struct lp_build_tgsi_context *bld_base,
221 		const struct tgsi_full_instruction *inst,
222 		unsigned src, LLVMValueRef desc)
223 {
224 	struct si_shader_context *ctx = si_shader_context(bld_base);
225 	LLVMBuilderRef builder = ctx->ac.builder;
226 	unsigned target = inst->Memory.Texture;
227 	unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
228 	LLVMValueRef coords[4];
229 	LLVMValueRef tmp;
230 	int chan;
231 
232 	for (chan = 0; chan < num_coords; ++chan) {
233 		tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
234 		tmp = ac_to_integer(&ctx->ac, tmp);
235 		coords[chan] = tmp;
236 	}
237 
238 	if (ctx->screen->info.chip_class >= GFX9) {
239 		/* 1D textures are allocated and used as 2D on GFX9. */
240 		if (target == TGSI_TEXTURE_1D) {
241 			coords[1] = ctx->i32_0;
242 			num_coords++;
243 		} else if (target == TGSI_TEXTURE_1D_ARRAY) {
244 			coords[2] = coords[1];
245 			coords[1] = ctx->i32_0;
246 			num_coords++;
247 		} else if (target == TGSI_TEXTURE_2D) {
248 			/* The hw can't bind a slice of a 3D image as a 2D
249 			 * image, because it ignores BASE_ARRAY if the target
250 			 * is 3D. The workaround is to read BASE_ARRAY and set
251 			 * it as the 3rd address operand for all 2D images.
252 			 */
253 			LLVMValueRef first_layer, const5, mask;
254 
255 			const5 = LLVMConstInt(ctx->i32, 5, 0);
256 			mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 0);
257 			first_layer = LLVMBuildExtractElement(builder, desc, const5, "");
258 			first_layer = LLVMBuildAnd(builder, first_layer, mask, "");
259 
260 			coords[2] = first_layer;
261 			num_coords++;
262 		}
263 	}
264 
265 	if (num_coords == 1)
266 		return coords[0];
267 
268 	if (num_coords == 3) {
269 		/* LLVM has difficulties lowering 3-element vectors. */
270 		coords[3] = bld_base->uint_bld.undef;
271 		num_coords = 4;
272 	}
273 
274 	return lp_build_gather_values(&ctx->gallivm, coords, num_coords);
275 }
276 
277 /**
278  * Append the extra mode bits that are used by image load and store.
279  */
image_append_args(struct si_shader_context * ctx,struct lp_build_emit_data * emit_data,unsigned target,bool atomic,bool force_glc)280 static void image_append_args(
281 		struct si_shader_context *ctx,
282 		struct lp_build_emit_data * emit_data,
283 		unsigned target,
284 		bool atomic,
285 		bool force_glc)
286 {
287 	const struct tgsi_full_instruction *inst = emit_data->inst;
288 	LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
289 	LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
290 	LLVMValueRef r128 = i1false;
291 	LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false;
292 	LLVMValueRef glc =
293 		force_glc ||
294 		inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
295 		i1true : i1false;
296 	LLVMValueRef slc = i1false;
297 	LLVMValueRef lwe = i1false;
298 
299 	if (atomic || (HAVE_LLVM <= 0x0309)) {
300 		emit_data->args[emit_data->arg_count++] = r128;
301 		emit_data->args[emit_data->arg_count++] = da;
302 		if (!atomic) {
303 			emit_data->args[emit_data->arg_count++] = glc;
304 		}
305 		emit_data->args[emit_data->arg_count++] = slc;
306 		return;
307 	}
308 
309 	/* HAVE_LLVM >= 0x0400 */
310 	emit_data->args[emit_data->arg_count++] = glc;
311 	emit_data->args[emit_data->arg_count++] = slc;
312 	emit_data->args[emit_data->arg_count++] = lwe;
313 	emit_data->args[emit_data->arg_count++] = da;
314 }
315 
316 /**
317  * Append the resource and indexing arguments for buffer intrinsics.
318  *
319  * \param rsrc the v4i32 buffer resource
320  * \param index index into the buffer (stride-based)
321  * \param offset byte offset into the buffer
322  */
buffer_append_args(struct si_shader_context * ctx,struct lp_build_emit_data * emit_data,LLVMValueRef rsrc,LLVMValueRef index,LLVMValueRef offset,bool atomic,bool force_glc)323 static void buffer_append_args(
324 		struct si_shader_context *ctx,
325 		struct lp_build_emit_data *emit_data,
326 		LLVMValueRef rsrc,
327 		LLVMValueRef index,
328 		LLVMValueRef offset,
329 		bool atomic,
330 		bool force_glc)
331 {
332 	const struct tgsi_full_instruction *inst = emit_data->inst;
333 	LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
334 	LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
335 
336 	emit_data->args[emit_data->arg_count++] = rsrc;
337 	emit_data->args[emit_data->arg_count++] = index; /* vindex */
338 	emit_data->args[emit_data->arg_count++] = offset; /* voffset */
339 	if (!atomic) {
340 		emit_data->args[emit_data->arg_count++] =
341 			force_glc ||
342 			inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
343 			i1true : i1false; /* glc */
344 	}
345 	emit_data->args[emit_data->arg_count++] = i1false; /* slc */
346 }
347 
load_fetch_args(struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)348 static void load_fetch_args(
349 		struct lp_build_tgsi_context * bld_base,
350 		struct lp_build_emit_data * emit_data)
351 {
352 	struct si_shader_context *ctx = si_shader_context(bld_base);
353 	const struct tgsi_full_instruction * inst = emit_data->inst;
354 	unsigned target = inst->Memory.Texture;
355 	LLVMValueRef rsrc;
356 
357 	emit_data->dst_type = ctx->v4f32;
358 
359 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
360 		   inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
361 		LLVMValueRef offset;
362 		LLVMValueRef tmp;
363 
364 		bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF;
365 		rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo);
366 
367 		tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
368 		offset = ac_to_integer(&ctx->ac, tmp);
369 
370 		buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
371 				   offset, false, false);
372 	} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
373 		   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
374 		LLVMValueRef coords;
375 
376 		image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc);
377 		coords = image_fetch_coords(bld_base, inst, 1, rsrc);
378 
379 		if (target == TGSI_TEXTURE_BUFFER) {
380 			buffer_append_args(ctx, emit_data, rsrc, coords,
381 					   ctx->i32_0, false, false);
382 		} else {
383 			emit_data->args[0] = coords;
384 			emit_data->args[1] = rsrc;
385 			emit_data->args[2] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */
386 			emit_data->arg_count = 3;
387 
388 			image_append_args(ctx, emit_data, target, false, false);
389 		}
390 	}
391 }
392 
load_emit_buffer(struct si_shader_context * ctx,struct lp_build_emit_data * emit_data,bool can_speculate,bool allow_smem)393 static void load_emit_buffer(struct si_shader_context *ctx,
394 			     struct lp_build_emit_data *emit_data,
395 			     bool can_speculate, bool allow_smem)
396 {
397 	const struct tgsi_full_instruction *inst = emit_data->inst;
398 	uint writemask = inst->Dst[0].Register.WriteMask;
399 	uint count = util_last_bit(writemask);
400 	LLVMValueRef *args = emit_data->args;
401 
402 	/* Don't use SMEM for shader buffer loads, because LLVM doesn't
403 	 * select SMEM for SI.load.const with a non-constant offset, and
404 	 * constant offsets practically don't exist with shader buffers.
405 	 *
406 	 * Also, SI.load.const doesn't use inst_offset when it's lowered
407 	 * to VMEM, so we just end up with more VALU instructions in the end
408 	 * and no benefit.
409 	 *
410 	 * TODO: Remove this line once LLVM can select SMEM with a non-constant
411 	 *       offset, and can derive inst_offset when VMEM is selected.
412 	 *       After that, si_memory_barrier should invalidate sL1 for shader
413 	 *       buffers.
414 	 */
415 
416 	assert(LLVMConstIntGetZExtValue(args[1]) == 0); /* vindex */
417 	emit_data->output[emit_data->chan] =
418 		ac_build_buffer_load(&ctx->ac, args[0], count, NULL,
419 				     args[2], NULL, 0,
420 				     LLVMConstIntGetZExtValue(args[3]),
421 				     LLVMConstIntGetZExtValue(args[4]),
422 				     can_speculate, allow_smem);
423 }
424 
get_memory_ptr(struct si_shader_context * ctx,const struct tgsi_full_instruction * inst,LLVMTypeRef type,int arg)425 static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx,
426                                    const struct tgsi_full_instruction *inst,
427                                    LLVMTypeRef type, int arg)
428 {
429 	LLVMBuilderRef builder = ctx->ac.builder;
430 	LLVMValueRef offset, ptr;
431 	int addr_space;
432 
433 	offset = lp_build_emit_fetch(&ctx->bld_base, inst, arg, 0);
434 	offset = ac_to_integer(&ctx->ac, offset);
435 
436 	ptr = ctx->ac.lds;
437 	ptr = LLVMBuildGEP(builder, ptr, &offset, 1, "");
438 	addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
439 	ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), "");
440 
441 	return ptr;
442 }
443 
load_emit_memory(struct si_shader_context * ctx,struct lp_build_emit_data * emit_data)444 static void load_emit_memory(
445 		struct si_shader_context *ctx,
446 		struct lp_build_emit_data *emit_data)
447 {
448 	const struct tgsi_full_instruction *inst = emit_data->inst;
449 	unsigned writemask = inst->Dst[0].Register.WriteMask;
450 	LLVMValueRef channels[4], ptr, derived_ptr, index;
451 	int chan;
452 
453 	ptr = get_memory_ptr(ctx, inst, ctx->f32, 1);
454 
455 	for (chan = 0; chan < 4; ++chan) {
456 		if (!(writemask & (1 << chan))) {
457 			channels[chan] = LLVMGetUndef(ctx->f32);
458 			continue;
459 		}
460 
461 		index = LLVMConstInt(ctx->i32, chan, 0);
462 		derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
463 		channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
464 	}
465 	emit_data->output[emit_data->chan] = lp_build_gather_values(&ctx->gallivm, channels, 4);
466 }
467 
468 /**
469  * Return true if the memory accessed by a LOAD or STORE instruction is
470  * read-only or write-only, respectively.
471  *
472  * \param shader_buffers_reverse_access_mask
473  *	For LOAD, set this to (store | atomic) slot usage in the shader.
474  *	For STORE, set this to (load | atomic) slot usage in the shader.
475  * \param images_reverse_access_mask  Same as above, but for images.
476  */
is_oneway_access_only(const struct tgsi_full_instruction * inst,const struct tgsi_shader_info * info,unsigned shader_buffers_reverse_access_mask,unsigned images_reverse_access_mask)477 static bool is_oneway_access_only(const struct tgsi_full_instruction *inst,
478 				  const struct tgsi_shader_info *info,
479 				  unsigned shader_buffers_reverse_access_mask,
480 				  unsigned images_reverse_access_mask)
481 {
482 	/* RESTRICT means NOALIAS.
483 	 * If there are no writes, we can assume the accessed memory is read-only.
484 	 * If there are no reads, we can assume the accessed memory is write-only.
485 	 */
486 	if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT) {
487 		unsigned reverse_access_mask;
488 
489 		if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
490 			reverse_access_mask = shader_buffers_reverse_access_mask;
491 		} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
492 			reverse_access_mask = info->images_buffers &
493 					      images_reverse_access_mask;
494 		} else {
495 			reverse_access_mask = ~info->images_buffers &
496 					      images_reverse_access_mask;
497 		}
498 
499 		if (inst->Src[0].Register.Indirect) {
500 			if (!reverse_access_mask)
501 				return true;
502 		} else {
503 			if (!(reverse_access_mask &
504 			      (1u << inst->Src[0].Register.Index)))
505 				return true;
506 		}
507 	}
508 
509 	/* If there are no buffer writes (for both shader buffers & image
510 	 * buffers), it implies that buffer memory is read-only.
511 	 * If there are no buffer reads (for both shader buffers & image
512 	 * buffers), it implies that buffer memory is write-only.
513 	 *
514 	 * Same for the case when there are no writes/reads for non-buffer
515 	 * images.
516 	 */
517 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
518 	    (inst->Memory.Texture == TGSI_TEXTURE_BUFFER &&
519 	     (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
520 	      tgsi_is_bindless_image_file(inst->Src[0].Register.File)))) {
521 		if (!shader_buffers_reverse_access_mask &&
522 		    !(info->images_buffers & images_reverse_access_mask))
523 			return true;
524 	} else {
525 		if (!(~info->images_buffers & images_reverse_access_mask))
526 			return true;
527 	}
528 	return false;
529 }
530 
load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)531 static void load_emit(
532 		const struct lp_build_tgsi_action *action,
533 		struct lp_build_tgsi_context *bld_base,
534 		struct lp_build_emit_data *emit_data)
535 {
536 	struct si_shader_context *ctx = si_shader_context(bld_base);
537 	LLVMBuilderRef builder = ctx->ac.builder;
538 	const struct tgsi_full_instruction * inst = emit_data->inst;
539 	const struct tgsi_shader_info *info = &ctx->shader->selector->info;
540 	char intrinsic_name[64];
541 	bool can_speculate = false;
542 
543 	if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
544 		load_emit_memory(ctx, emit_data);
545 		return;
546 	}
547 
548 	if (inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
549 		load_emit_buffer(ctx, emit_data, true, true);
550 		return;
551 	}
552 
553 	if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
554 		ac_build_waitcnt(&ctx->ac, VM_CNT);
555 
556 	can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) &&
557 			  is_oneway_access_only(inst, info,
558 						info->shader_buffers_store |
559 						info->shader_buffers_atomic,
560 						info->images_store |
561 						info->images_atomic);
562 
563 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
564 		load_emit_buffer(ctx, emit_data, can_speculate, false);
565 		return;
566 	}
567 
568 	if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
569 		emit_data->output[emit_data->chan] =
570 			lp_build_intrinsic(
571 				builder, "llvm.amdgcn.buffer.load.format.v4f32", emit_data->dst_type,
572 				emit_data->args, emit_data->arg_count,
573 				ac_get_load_intr_attribs(can_speculate));
574 	} else {
575 		ac_get_image_intr_name("llvm.amdgcn.image.load",
576 				       emit_data->dst_type,		/* vdata */
577 				       LLVMTypeOf(emit_data->args[0]), /* coords */
578 				       LLVMTypeOf(emit_data->args[1]), /* rsrc */
579 				       intrinsic_name, sizeof(intrinsic_name));
580 
581 		emit_data->output[emit_data->chan] =
582 			lp_build_intrinsic(
583 				builder, intrinsic_name, emit_data->dst_type,
584 				emit_data->args, emit_data->arg_count,
585 				ac_get_load_intr_attribs(can_speculate));
586 	}
587 }
588 
store_fetch_args(struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)589 static void store_fetch_args(
590 		struct lp_build_tgsi_context * bld_base,
591 		struct lp_build_emit_data * emit_data)
592 {
593 	struct si_shader_context *ctx = si_shader_context(bld_base);
594 	const struct tgsi_full_instruction * inst = emit_data->inst;
595 	struct tgsi_full_src_register memory;
596 	LLVMValueRef chans[4];
597 	LLVMValueRef data;
598 	LLVMValueRef rsrc;
599 	unsigned chan;
600 
601 	emit_data->dst_type = ctx->voidt;
602 
603 	for (chan = 0; chan < 4; ++chan) {
604 		chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
605 	}
606 	data = lp_build_gather_values(&ctx->gallivm, chans, 4);
607 
608 	emit_data->args[emit_data->arg_count++] = data;
609 
610 	memory = tgsi_full_src_register_from_dst(&inst->Dst[0]);
611 
612 	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
613 		LLVMValueRef offset;
614 		LLVMValueRef tmp;
615 
616 		rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false);
617 
618 		tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
619 		offset = ac_to_integer(&ctx->ac, tmp);
620 
621 		buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
622 				   offset, false, false);
623 	} else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
624 		   tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) {
625 		unsigned target = inst->Memory.Texture;
626 		LLVMValueRef coords;
627 
628 		/* 8bit/16bit TC L1 write corruption bug on SI.
629 		 * All store opcodes not aligned to a dword are affected.
630 		 *
631 		 * The only way to get unaligned stores in radeonsi is through
632 		 * shader images.
633 		 */
634 		bool force_glc = ctx->screen->info.chip_class == SI;
635 
636 		image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
637 		coords = image_fetch_coords(bld_base, inst, 0, rsrc);
638 
639 		if (target == TGSI_TEXTURE_BUFFER) {
640 			buffer_append_args(ctx, emit_data, rsrc, coords,
641 					   ctx->i32_0, false, force_glc);
642 		} else {
643 			emit_data->args[1] = coords;
644 			emit_data->args[2] = rsrc;
645 			emit_data->args[3] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */
646 			emit_data->arg_count = 4;
647 
648 			image_append_args(ctx, emit_data, target, false, force_glc);
649 		}
650 	}
651 }
652 
store_emit_buffer(struct si_shader_context * ctx,struct lp_build_emit_data * emit_data,bool writeonly_memory)653 static void store_emit_buffer(
654 		struct si_shader_context *ctx,
655 		struct lp_build_emit_data *emit_data,
656 		bool writeonly_memory)
657 {
658 	const struct tgsi_full_instruction *inst = emit_data->inst;
659 	LLVMBuilderRef builder = ctx->ac.builder;
660 	LLVMValueRef base_data = emit_data->args[0];
661 	LLVMValueRef base_offset = emit_data->args[3];
662 	unsigned writemask = inst->Dst[0].Register.WriteMask;
663 
664 	while (writemask) {
665 		int start, count;
666 		const char *intrinsic_name;
667 		LLVMValueRef data;
668 		LLVMValueRef offset;
669 		LLVMValueRef tmp;
670 
671 		u_bit_scan_consecutive_range(&writemask, &start, &count);
672 
673 		/* Due to an LLVM limitation, split 3-element writes
674 		 * into a 2-element and a 1-element write. */
675 		if (count == 3) {
676 			writemask |= 1 << (start + 2);
677 			count = 2;
678 		}
679 
680 		if (count == 4) {
681 			data = base_data;
682 			intrinsic_name = "llvm.amdgcn.buffer.store.v4f32";
683 		} else if (count == 2) {
684 			LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
685 
686 			tmp = LLVMBuildExtractElement(
687 				builder, base_data,
688 				LLVMConstInt(ctx->i32, start, 0), "");
689 			data = LLVMBuildInsertElement(
690 				builder, LLVMGetUndef(v2f32), tmp,
691 				ctx->i32_0, "");
692 
693 			tmp = LLVMBuildExtractElement(
694 				builder, base_data,
695 				LLVMConstInt(ctx->i32, start + 1, 0), "");
696 			data = LLVMBuildInsertElement(
697 				builder, data, tmp, ctx->i32_1, "");
698 
699 			intrinsic_name = "llvm.amdgcn.buffer.store.v2f32";
700 		} else {
701 			assert(count == 1);
702 			data = LLVMBuildExtractElement(
703 				builder, base_data,
704 				LLVMConstInt(ctx->i32, start, 0), "");
705 			intrinsic_name = "llvm.amdgcn.buffer.store.f32";
706 		}
707 
708 		offset = base_offset;
709 		if (start != 0) {
710 			offset = LLVMBuildAdd(
711 				builder, offset,
712 				LLVMConstInt(ctx->i32, start * 4, 0), "");
713 		}
714 
715 		emit_data->args[0] = data;
716 		emit_data->args[3] = offset;
717 
718 		lp_build_intrinsic(
719 			builder, intrinsic_name, emit_data->dst_type,
720 			emit_data->args, emit_data->arg_count,
721 			ac_get_store_intr_attribs(writeonly_memory));
722 	}
723 }
724 
store_emit_memory(struct si_shader_context * ctx,struct lp_build_emit_data * emit_data)725 static void store_emit_memory(
726 		struct si_shader_context *ctx,
727 		struct lp_build_emit_data *emit_data)
728 {
729 	const struct tgsi_full_instruction *inst = emit_data->inst;
730 	LLVMBuilderRef builder = ctx->ac.builder;
731 	unsigned writemask = inst->Dst[0].Register.WriteMask;
732 	LLVMValueRef ptr, derived_ptr, data, index;
733 	int chan;
734 
735 	ptr = get_memory_ptr(ctx, inst, ctx->f32, 0);
736 
737 	for (chan = 0; chan < 4; ++chan) {
738 		if (!(writemask & (1 << chan))) {
739 			continue;
740 		}
741 		data = lp_build_emit_fetch(&ctx->bld_base, inst, 1, chan);
742 		index = LLVMConstInt(ctx->i32, chan, 0);
743 		derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
744 		LLVMBuildStore(builder, data, derived_ptr);
745 	}
746 }
747 
store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)748 static void store_emit(
749 		const struct lp_build_tgsi_action *action,
750 		struct lp_build_tgsi_context *bld_base,
751 		struct lp_build_emit_data *emit_data)
752 {
753 	struct si_shader_context *ctx = si_shader_context(bld_base);
754 	LLVMBuilderRef builder = ctx->ac.builder;
755 	const struct tgsi_full_instruction * inst = emit_data->inst;
756 	const struct tgsi_shader_info *info = &ctx->shader->selector->info;
757 	unsigned target = inst->Memory.Texture;
758 	char intrinsic_name[64];
759 	bool writeonly_memory = false;
760 
761 	if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
762 		store_emit_memory(ctx, emit_data);
763 		return;
764 	}
765 
766 	if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
767 		ac_build_waitcnt(&ctx->ac, VM_CNT);
768 
769 	writeonly_memory = is_oneway_access_only(inst, info,
770 						 info->shader_buffers_load |
771 						 info->shader_buffers_atomic,
772 						 info->images_load |
773 						 info->images_atomic);
774 
775 	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
776 		store_emit_buffer(ctx, emit_data, writeonly_memory);
777 		return;
778 	}
779 
780 	if (target == TGSI_TEXTURE_BUFFER) {
781 		emit_data->output[emit_data->chan] = lp_build_intrinsic(
782 			builder, "llvm.amdgcn.buffer.store.format.v4f32",
783 			emit_data->dst_type, emit_data->args,
784 			emit_data->arg_count,
785 			ac_get_store_intr_attribs(writeonly_memory));
786 	} else {
787 		ac_get_image_intr_name("llvm.amdgcn.image.store",
788 				       LLVMTypeOf(emit_data->args[0]), /* vdata */
789 				       LLVMTypeOf(emit_data->args[1]), /* coords */
790 				       LLVMTypeOf(emit_data->args[2]), /* rsrc */
791 				       intrinsic_name, sizeof(intrinsic_name));
792 
793 		emit_data->output[emit_data->chan] =
794 			lp_build_intrinsic(
795 				builder, intrinsic_name, emit_data->dst_type,
796 				emit_data->args, emit_data->arg_count,
797 				ac_get_store_intr_attribs(writeonly_memory));
798 	}
799 }
800 
atomic_fetch_args(struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)801 static void atomic_fetch_args(
802 		struct lp_build_tgsi_context * bld_base,
803 		struct lp_build_emit_data * emit_data)
804 {
805 	struct si_shader_context *ctx = si_shader_context(bld_base);
806 	const struct tgsi_full_instruction * inst = emit_data->inst;
807 	LLVMValueRef data1, data2;
808 	LLVMValueRef rsrc;
809 	LLVMValueRef tmp;
810 
811 	emit_data->dst_type = ctx->f32;
812 
813 	tmp = lp_build_emit_fetch(bld_base, inst, 2, 0);
814 	data1 = ac_to_integer(&ctx->ac, tmp);
815 
816 	if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
817 		tmp = lp_build_emit_fetch(bld_base, inst, 3, 0);
818 		data2 = ac_to_integer(&ctx->ac, tmp);
819 	}
820 
821 	/* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order
822 	 * of arguments, which is reversed relative to TGSI (and GLSL)
823 	 */
824 	if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
825 		emit_data->args[emit_data->arg_count++] = data2;
826 	emit_data->args[emit_data->arg_count++] = data1;
827 
828 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
829 		LLVMValueRef offset;
830 
831 		rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
832 
833 		tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
834 		offset = ac_to_integer(&ctx->ac, tmp);
835 
836 		buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
837 				   offset, true, false);
838 	} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
839 		   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
840 		unsigned target = inst->Memory.Texture;
841 		LLVMValueRef coords;
842 
843 		image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc);
844 		coords = image_fetch_coords(bld_base, inst, 1, rsrc);
845 
846 		if (target == TGSI_TEXTURE_BUFFER) {
847 			buffer_append_args(ctx, emit_data, rsrc, coords,
848 					   ctx->i32_0, true, false);
849 		} else {
850 			emit_data->args[emit_data->arg_count++] = coords;
851 			emit_data->args[emit_data->arg_count++] = rsrc;
852 
853 			image_append_args(ctx, emit_data, target, true, false);
854 		}
855 	}
856 }
857 
atomic_emit_memory(struct si_shader_context * ctx,struct lp_build_emit_data * emit_data)858 static void atomic_emit_memory(struct si_shader_context *ctx,
859                                struct lp_build_emit_data *emit_data) {
860 	LLVMBuilderRef builder = ctx->ac.builder;
861 	const struct tgsi_full_instruction * inst = emit_data->inst;
862 	LLVMValueRef ptr, result, arg;
863 
864 	ptr = get_memory_ptr(ctx, inst, ctx->i32, 1);
865 
866 	arg = lp_build_emit_fetch(&ctx->bld_base, inst, 2, 0);
867 	arg = ac_to_integer(&ctx->ac, arg);
868 
869 	if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
870 		LLVMValueRef new_data;
871 		new_data = lp_build_emit_fetch(&ctx->bld_base,
872 		                               inst, 3, 0);
873 
874 		new_data = ac_to_integer(&ctx->ac, new_data);
875 
876 		result = LLVMBuildAtomicCmpXchg(builder, ptr, arg, new_data,
877 		                       LLVMAtomicOrderingSequentiallyConsistent,
878 		                       LLVMAtomicOrderingSequentiallyConsistent,
879 		                       false);
880 
881 		result = LLVMBuildExtractValue(builder, result, 0, "");
882 	} else {
883 		LLVMAtomicRMWBinOp op;
884 
885 		switch(inst->Instruction.Opcode) {
886 			case TGSI_OPCODE_ATOMUADD:
887 				op = LLVMAtomicRMWBinOpAdd;
888 				break;
889 			case TGSI_OPCODE_ATOMXCHG:
890 				op = LLVMAtomicRMWBinOpXchg;
891 				break;
892 			case TGSI_OPCODE_ATOMAND:
893 				op = LLVMAtomicRMWBinOpAnd;
894 				break;
895 			case TGSI_OPCODE_ATOMOR:
896 				op = LLVMAtomicRMWBinOpOr;
897 				break;
898 			case TGSI_OPCODE_ATOMXOR:
899 				op = LLVMAtomicRMWBinOpXor;
900 				break;
901 			case TGSI_OPCODE_ATOMUMIN:
902 				op = LLVMAtomicRMWBinOpUMin;
903 				break;
904 			case TGSI_OPCODE_ATOMUMAX:
905 				op = LLVMAtomicRMWBinOpUMax;
906 				break;
907 			case TGSI_OPCODE_ATOMIMIN:
908 				op = LLVMAtomicRMWBinOpMin;
909 				break;
910 			case TGSI_OPCODE_ATOMIMAX:
911 				op = LLVMAtomicRMWBinOpMax;
912 				break;
913 			default:
914 				unreachable("unknown atomic opcode");
915 		}
916 
917 		result = LLVMBuildAtomicRMW(builder, op, ptr, arg,
918 		                       LLVMAtomicOrderingSequentiallyConsistent,
919 		                       false);
920 	}
921 	emit_data->output[emit_data->chan] = LLVMBuildBitCast(builder, result, emit_data->dst_type, "");
922 }
923 
atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)924 static void atomic_emit(
925 		const struct lp_build_tgsi_action *action,
926 		struct lp_build_tgsi_context *bld_base,
927 		struct lp_build_emit_data *emit_data)
928 {
929 	struct si_shader_context *ctx = si_shader_context(bld_base);
930 	LLVMBuilderRef builder = ctx->ac.builder;
931 	const struct tgsi_full_instruction * inst = emit_data->inst;
932 	char intrinsic_name[40];
933 	LLVMValueRef tmp;
934 
935 	if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
936 		atomic_emit_memory(ctx, emit_data);
937 		return;
938 	}
939 
940 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
941 	    inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
942 		snprintf(intrinsic_name, sizeof(intrinsic_name),
943 			 "llvm.amdgcn.buffer.atomic.%s", action->intr_name);
944 	} else {
945 		LLVMValueRef coords;
946 		char coords_type[8];
947 
948 		if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
949 			coords = emit_data->args[2];
950 		else
951 			coords = emit_data->args[1];
952 
953 		ac_build_type_name_for_intr(LLVMTypeOf(coords), coords_type, sizeof(coords_type));
954 		snprintf(intrinsic_name, sizeof(intrinsic_name),
955 			 "llvm.amdgcn.image.atomic.%s.%s",
956 			 action->intr_name, coords_type);
957 	}
958 
959 	tmp = lp_build_intrinsic(
960 		builder, intrinsic_name, ctx->i32,
961 		emit_data->args, emit_data->arg_count, 0);
962 	emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp);
963 }
964 
set_tex_fetch_args(struct si_shader_context * ctx,struct lp_build_emit_data * emit_data,unsigned target,LLVMValueRef res_ptr,LLVMValueRef samp_ptr,LLVMValueRef * param,unsigned count,unsigned dmask)965 static void set_tex_fetch_args(struct si_shader_context *ctx,
966 			       struct lp_build_emit_data *emit_data,
967 			       unsigned target,
968 			       LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
969 			       LLVMValueRef *param, unsigned count,
970 			       unsigned dmask)
971 {
972 	struct ac_image_args args = {};
973 
974 	/* Pad to power of two vector */
975 	while (count < util_next_power_of_two(count))
976 		param[count++] = LLVMGetUndef(ctx->i32);
977 
978 	if (count > 1)
979 		args.addr = lp_build_gather_values(&ctx->gallivm, param, count);
980 	else
981 		args.addr = param[0];
982 
983 	args.resource = res_ptr;
984 	args.sampler = samp_ptr;
985 	args.dmask = dmask;
986 	args.unorm = target == TGSI_TEXTURE_RECT ||
987 		     target == TGSI_TEXTURE_SHADOWRECT;
988 	args.da = tgsi_is_array_sampler(target);
989 
990 	/* Ugly, but we seem to have no other choice right now. */
991 	STATIC_ASSERT(sizeof(args) <= sizeof(emit_data->args));
992 	memcpy(emit_data->args, &args, sizeof(args));
993 }
994 
fix_resinfo(struct si_shader_context * ctx,unsigned target,LLVMValueRef out)995 static LLVMValueRef fix_resinfo(struct si_shader_context *ctx,
996 				unsigned target, LLVMValueRef out)
997 {
998 	LLVMBuilderRef builder = ctx->ac.builder;
999 
1000 	/* 1D textures are allocated and used as 2D on GFX9. */
1001         if (ctx->screen->info.chip_class >= GFX9 &&
1002 	    (target == TGSI_TEXTURE_1D_ARRAY ||
1003 	     target == TGSI_TEXTURE_SHADOW1D_ARRAY)) {
1004 		LLVMValueRef layers =
1005 			LLVMBuildExtractElement(builder, out,
1006 						LLVMConstInt(ctx->i32, 2, 0), "");
1007 		out = LLVMBuildInsertElement(builder, out, layers,
1008 					     ctx->i32_1, "");
1009 	}
1010 
1011 	/* Divide the number of layers by 6 to get the number of cubes. */
1012 	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
1013 	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1014 		LLVMValueRef imm2 = LLVMConstInt(ctx->i32, 2, 0);
1015 
1016 		LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
1017 		z = LLVMBuildSDiv(builder, z, LLVMConstInt(ctx->i32, 6, 0), "");
1018 
1019 		out = LLVMBuildInsertElement(builder, out, z, imm2, "");
1020 	}
1021 	return out;
1022 }
1023 
resq_fetch_args(struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)1024 static void resq_fetch_args(
1025 		struct lp_build_tgsi_context * bld_base,
1026 		struct lp_build_emit_data * emit_data)
1027 {
1028 	struct si_shader_context *ctx = si_shader_context(bld_base);
1029 	const struct tgsi_full_instruction *inst = emit_data->inst;
1030 	const struct tgsi_full_src_register *reg = &inst->Src[0];
1031 
1032 	emit_data->dst_type = ctx->v4i32;
1033 
1034 	if (reg->Register.File == TGSI_FILE_BUFFER) {
1035 		emit_data->args[0] = shader_buffer_fetch_rsrc(ctx, reg, false);
1036 		emit_data->arg_count = 1;
1037 	} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
1038 		image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture,
1039 				 &emit_data->args[0]);
1040 		emit_data->arg_count = 1;
1041 	} else {
1042 		LLVMValueRef res_ptr;
1043 		unsigned image_target;
1044 
1045 		if (inst->Memory.Texture == TGSI_TEXTURE_3D)
1046 			image_target = TGSI_TEXTURE_2D_ARRAY;
1047 		else
1048 			image_target = inst->Memory.Texture;
1049 
1050 		image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture,
1051 				 &res_ptr);
1052 		set_tex_fetch_args(ctx, emit_data, image_target,
1053 				   res_ptr, NULL, &ctx->i32_0, 1,
1054 				   0xf);
1055 	}
1056 }
1057 
resq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)1058 static void resq_emit(
1059 		const struct lp_build_tgsi_action *action,
1060 		struct lp_build_tgsi_context *bld_base,
1061 		struct lp_build_emit_data *emit_data)
1062 {
1063 	struct si_shader_context *ctx = si_shader_context(bld_base);
1064 	LLVMBuilderRef builder = ctx->ac.builder;
1065 	const struct tgsi_full_instruction *inst = emit_data->inst;
1066 	LLVMValueRef out;
1067 
1068 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
1069 		out = LLVMBuildExtractElement(builder, emit_data->args[0],
1070 					      LLVMConstInt(ctx->i32, 2, 0), "");
1071 	} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
1072 		out = get_buffer_size(bld_base, emit_data->args[0]);
1073 	} else {
1074 		struct ac_image_args args;
1075 
1076 		memcpy(&args, emit_data->args, sizeof(args)); /* ugly */
1077 		args.opcode = ac_image_get_resinfo;
1078 		out = ac_build_image_opcode(&ctx->ac, &args);
1079 
1080 		out = fix_resinfo(ctx, inst->Memory.Texture, out);
1081 	}
1082 
1083 	emit_data->output[emit_data->chan] = out;
1084 }
1085 
1086 /**
1087  * Load an image view, fmask view. or sampler state descriptor.
1088  */
si_load_sampler_desc(struct si_shader_context * ctx,LLVMValueRef list,LLVMValueRef index,enum ac_descriptor_type type)1089 LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
1090 				  LLVMValueRef list, LLVMValueRef index,
1091 				  enum ac_descriptor_type type)
1092 {
1093 	LLVMBuilderRef builder = ctx->ac.builder;
1094 
1095 	switch (type) {
1096 	case AC_DESC_IMAGE:
1097 		/* The image is at [0:7]. */
1098 		index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
1099 		break;
1100 	case AC_DESC_BUFFER:
1101 		/* The buffer is in [4:7]. */
1102 		index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
1103 		index = LLVMBuildAdd(builder, index, ctx->i32_1, "");
1104 		list = LLVMBuildPointerCast(builder, list,
1105 					    si_const_array(ctx->v4i32, 0), "");
1106 		break;
1107 	case AC_DESC_FMASK:
1108 		/* The FMASK is at [8:15]. */
1109 		index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
1110 		index = LLVMBuildAdd(builder, index, ctx->i32_1, "");
1111 		break;
1112 	case AC_DESC_SAMPLER:
1113 		/* The sampler state is at [12:15]. */
1114 		index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
1115 		index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), "");
1116 		list = LLVMBuildPointerCast(builder, list,
1117 					    si_const_array(ctx->v4i32, 0), "");
1118 		break;
1119 	}
1120 
1121 	return ac_build_load_to_sgpr(&ctx->ac, list, index);
1122 }
1123 
1124 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
1125  *
1126  * SI-CI:
1127  *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
1128  *   filtering manually. The driver sets img7 to a mask clearing
1129  *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
1130  *     s_and_b32 samp0, samp0, img7
1131  *
1132  * VI:
1133  *   The ANISO_OVERRIDE sampler field enables this fix in TA.
1134  */
sici_fix_sampler_aniso(struct si_shader_context * ctx,LLVMValueRef res,LLVMValueRef samp)1135 static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
1136 					   LLVMValueRef res, LLVMValueRef samp)
1137 {
1138 	LLVMValueRef img7, samp0;
1139 
1140 	if (ctx->screen->info.chip_class >= VI)
1141 		return samp;
1142 
1143 	img7 = LLVMBuildExtractElement(ctx->ac.builder, res,
1144 				       LLVMConstInt(ctx->i32, 7, 0), "");
1145 	samp0 = LLVMBuildExtractElement(ctx->ac.builder, samp,
1146 					ctx->i32_0, "");
1147 	samp0 = LLVMBuildAnd(ctx->ac.builder, samp0, img7, "");
1148 	return LLVMBuildInsertElement(ctx->ac.builder, samp, samp0,
1149 				      ctx->i32_0, "");
1150 }
1151 
tex_fetch_ptrs(struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data,LLVMValueRef * res_ptr,LLVMValueRef * samp_ptr,LLVMValueRef * fmask_ptr)1152 static void tex_fetch_ptrs(
1153 	struct lp_build_tgsi_context *bld_base,
1154 	struct lp_build_emit_data *emit_data,
1155 	LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
1156 {
1157 	struct si_shader_context *ctx = si_shader_context(bld_base);
1158 	LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images);
1159 	const struct tgsi_full_instruction *inst = emit_data->inst;
1160 	const struct tgsi_full_src_register *reg;
1161 	unsigned target = inst->Texture.Texture;
1162 	unsigned sampler_src;
1163 	LLVMValueRef index;
1164 
1165 	sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
1166 	reg = &emit_data->inst->Src[sampler_src];
1167 
1168 	if (reg->Register.Indirect) {
1169 		index = si_get_bounded_indirect_index(ctx,
1170 						      &reg->Indirect,
1171 						      reg->Register.Index,
1172 						      ctx->num_samplers);
1173 		index = LLVMBuildAdd(ctx->ac.builder, index,
1174 				     LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
1175 	} else {
1176 		index = LLVMConstInt(ctx->i32,
1177 				     si_get_sampler_slot(reg->Register.Index), 0);
1178 	}
1179 
1180 	if (reg->Register.File != TGSI_FILE_SAMPLER) {
1181 		/* Bindless descriptors are accessible from a different pair of
1182 		 * user SGPR indices.
1183 		 */
1184 		list = LLVMGetParam(ctx->main_fn,
1185 				    ctx->param_bindless_samplers_and_images);
1186 		index = lp_build_emit_fetch_src(bld_base, reg,
1187 						TGSI_TYPE_UNSIGNED, 0);
1188 	}
1189 
1190 	if (target == TGSI_TEXTURE_BUFFER)
1191 		*res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_BUFFER);
1192 	else
1193 		*res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_IMAGE);
1194 
1195 	if (samp_ptr)
1196 		*samp_ptr = NULL;
1197 	if (fmask_ptr)
1198 		*fmask_ptr = NULL;
1199 
1200 	if (target == TGSI_TEXTURE_2D_MSAA ||
1201 	    target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1202 		if (fmask_ptr)
1203 			*fmask_ptr = si_load_sampler_desc(ctx, list, index,
1204 						          AC_DESC_FMASK);
1205 	} else if (target != TGSI_TEXTURE_BUFFER) {
1206 		if (samp_ptr) {
1207 			*samp_ptr = si_load_sampler_desc(ctx, list, index,
1208 						         AC_DESC_SAMPLER);
1209 			*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
1210 		}
1211 	}
1212 }
1213 
txq_fetch_args(struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)1214 static void txq_fetch_args(
1215 	struct lp_build_tgsi_context *bld_base,
1216 	struct lp_build_emit_data *emit_data)
1217 {
1218 	struct si_shader_context *ctx = si_shader_context(bld_base);
1219 	const struct tgsi_full_instruction *inst = emit_data->inst;
1220 	unsigned target = inst->Texture.Texture;
1221 	LLVMValueRef res_ptr;
1222 	LLVMValueRef address;
1223 
1224 	tex_fetch_ptrs(bld_base, emit_data, &res_ptr, NULL, NULL);
1225 
1226 	if (target == TGSI_TEXTURE_BUFFER) {
1227 		/* Read the size from the buffer descriptor directly. */
1228 		emit_data->args[0] = get_buffer_size(bld_base, res_ptr);
1229 		return;
1230 	}
1231 
1232 	/* Textures - set the mip level. */
1233 	address = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
1234 
1235 	set_tex_fetch_args(ctx, emit_data, target, res_ptr,
1236 			   NULL, &address, 1, 0xf);
1237 }
1238 
txq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)1239 static void txq_emit(const struct lp_build_tgsi_action *action,
1240 		     struct lp_build_tgsi_context *bld_base,
1241 		     struct lp_build_emit_data *emit_data)
1242 {
1243 	struct si_shader_context *ctx = si_shader_context(bld_base);
1244 	struct ac_image_args args;
1245 	unsigned target = emit_data->inst->Texture.Texture;
1246 
1247 	if (target == TGSI_TEXTURE_BUFFER) {
1248 		/* Just return the buffer size. */
1249 		emit_data->output[emit_data->chan] = emit_data->args[0];
1250 		return;
1251 	}
1252 
1253 	memcpy(&args, emit_data->args, sizeof(args)); /* ugly */
1254 
1255 	args.opcode = ac_image_get_resinfo;
1256 	LLVMValueRef result = ac_build_image_opcode(&ctx->ac, &args);
1257 
1258 	emit_data->output[emit_data->chan] = fix_resinfo(ctx, target, result);
1259 }
1260 
tex_fetch_args(struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)1261 static void tex_fetch_args(
1262 	struct lp_build_tgsi_context *bld_base,
1263 	struct lp_build_emit_data *emit_data)
1264 {
1265 	struct si_shader_context *ctx = si_shader_context(bld_base);
1266 	const struct tgsi_full_instruction *inst = emit_data->inst;
1267 	unsigned opcode = inst->Instruction.Opcode;
1268 	unsigned target = inst->Texture.Texture;
1269 	LLVMValueRef coords[5], derivs[6];
1270 	LLVMValueRef address[16];
1271 	unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
1272 	int ref_pos = tgsi_util_get_shadow_ref_src_index(target);
1273 	unsigned count = 0;
1274 	unsigned chan;
1275 	unsigned num_deriv_channels = 0;
1276 	bool has_offset = inst->Texture.NumOffsets > 0;
1277 	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
1278 	unsigned dmask = 0xf;
1279 
1280 	tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
1281 
1282 	if (target == TGSI_TEXTURE_BUFFER) {
1283 		emit_data->dst_type = ctx->v4f32;
1284 		emit_data->args[0] = res_ptr;
1285 		emit_data->args[1] = ctx->i32_0;
1286 		emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
1287 		emit_data->arg_count = 3;
1288 		return;
1289 	}
1290 
1291 	/* Fetch and project texture coordinates */
1292 	coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
1293 	for (chan = 0; chan < 3; chan++) {
1294 		coords[chan] = lp_build_emit_fetch(bld_base,
1295 						   emit_data->inst, 0,
1296 						   chan);
1297 		if (opcode == TGSI_OPCODE_TXP)
1298 			coords[chan] = lp_build_emit_llvm_binary(bld_base,
1299 								 TGSI_OPCODE_DIV,
1300 								 coords[chan],
1301 								 coords[3]);
1302 	}
1303 
1304 	if (opcode == TGSI_OPCODE_TXP)
1305 		coords[3] = ctx->ac.f32_1;
1306 
1307 	/* Pack offsets. */
1308 	if (has_offset &&
1309 	    opcode != TGSI_OPCODE_TXF &&
1310 	    opcode != TGSI_OPCODE_TXF_LZ) {
1311 		/* The offsets are six-bit signed integers packed like this:
1312 		 *   X=[5:0], Y=[13:8], and Z=[21:16].
1313 		 */
1314 		LLVMValueRef offset[3], pack;
1315 
1316 		assert(inst->Texture.NumOffsets == 1);
1317 
1318 		for (chan = 0; chan < 3; chan++) {
1319 			offset[chan] = lp_build_emit_fetch_texoffset(bld_base,
1320 								     emit_data->inst, 0, chan);
1321 			offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
1322 						    LLVMConstInt(ctx->i32, 0x3f, 0), "");
1323 			if (chan)
1324 				offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
1325 							    LLVMConstInt(ctx->i32, chan*8, 0), "");
1326 		}
1327 
1328 		pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
1329 		pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
1330 		address[count++] = pack;
1331 	}
1332 
1333 	/* Pack LOD bias value */
1334 	if (opcode == TGSI_OPCODE_TXB)
1335 		address[count++] = coords[3];
1336 	if (opcode == TGSI_OPCODE_TXB2)
1337 		address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
1338 
1339 	/* Pack depth comparison value */
1340 	if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
1341 		LLVMValueRef z;
1342 
1343 		if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1344 			z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
1345 		} else {
1346 			assert(ref_pos >= 0);
1347 			z = coords[ref_pos];
1348 		}
1349 
1350 		/* Section 8.23.1 (Depth Texture Comparison Mode) of the
1351 		 * OpenGL 4.5 spec says:
1352 		 *
1353 		 *    "If the texture’s internal format indicates a fixed-point
1354 		 *     depth texture, then D_t and D_ref are clamped to the
1355 		 *     range [0, 1]; otherwise no clamping is performed."
1356 		 *
1357 		 * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
1358 		 * so the depth comparison value isn't clamped for Z16 and
1359 		 * Z24 anymore. Do it manually here.
1360 		 */
1361 		if (ctx->screen->info.chip_class >= VI) {
1362 			LLVMValueRef upgraded;
1363 			LLVMValueRef clamped;
1364 			upgraded = LLVMBuildExtractElement(ctx->ac.builder, samp_ptr,
1365 							   LLVMConstInt(ctx->i32, 3, false), "");
1366 			upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
1367 						 LLVMConstInt(ctx->i32, 29, false), "");
1368 			upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->i1, "");
1369 			clamped = ac_build_clamp(&ctx->ac, z);
1370 			z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, "");
1371 		}
1372 
1373 		address[count++] = z;
1374 	}
1375 
1376 	/* Pack user derivatives */
1377 	if (opcode == TGSI_OPCODE_TXD) {
1378 		int param, num_src_deriv_channels, num_dst_deriv_channels;
1379 
1380 		switch (target) {
1381 		case TGSI_TEXTURE_3D:
1382 			num_src_deriv_channels = 3;
1383 			num_dst_deriv_channels = 3;
1384 			num_deriv_channels = 3;
1385 			break;
1386 		case TGSI_TEXTURE_2D:
1387 		case TGSI_TEXTURE_SHADOW2D:
1388 		case TGSI_TEXTURE_RECT:
1389 		case TGSI_TEXTURE_SHADOWRECT:
1390 		case TGSI_TEXTURE_2D_ARRAY:
1391 		case TGSI_TEXTURE_SHADOW2D_ARRAY:
1392 			num_src_deriv_channels = 2;
1393 			num_dst_deriv_channels = 2;
1394 			num_deriv_channels = 2;
1395 			break;
1396 		case TGSI_TEXTURE_CUBE:
1397 		case TGSI_TEXTURE_SHADOWCUBE:
1398 		case TGSI_TEXTURE_CUBE_ARRAY:
1399 		case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1400 			/* Cube derivatives will be converted to 2D. */
1401 			num_src_deriv_channels = 3;
1402 			num_dst_deriv_channels = 3;
1403 			num_deriv_channels = 2;
1404 			break;
1405 		case TGSI_TEXTURE_1D:
1406 		case TGSI_TEXTURE_SHADOW1D:
1407 		case TGSI_TEXTURE_1D_ARRAY:
1408 		case TGSI_TEXTURE_SHADOW1D_ARRAY:
1409 			num_src_deriv_channels = 1;
1410 
1411 			/* 1D textures are allocated and used as 2D on GFX9. */
1412 			if (ctx->screen->info.chip_class >= GFX9) {
1413 				num_dst_deriv_channels = 2;
1414 				num_deriv_channels = 2;
1415 			} else {
1416 				num_dst_deriv_channels = 1;
1417 				num_deriv_channels = 1;
1418 			}
1419 			break;
1420 		default:
1421 			unreachable("invalid target");
1422 		}
1423 
1424 		for (param = 0; param < 2; param++) {
1425 			for (chan = 0; chan < num_src_deriv_channels; chan++)
1426 				derivs[param * num_dst_deriv_channels + chan] =
1427 					lp_build_emit_fetch(bld_base, inst, param+1, chan);
1428 
1429 			/* Fill in the rest with zeros. */
1430 			for (chan = num_src_deriv_channels;
1431 			     chan < num_dst_deriv_channels; chan++)
1432 				derivs[param * num_dst_deriv_channels + chan] =
1433 					ctx->ac.f32_0;
1434 		}
1435 	}
1436 
1437 	if (target == TGSI_TEXTURE_CUBE ||
1438 	    target == TGSI_TEXTURE_CUBE_ARRAY ||
1439 	    target == TGSI_TEXTURE_SHADOWCUBE ||
1440 	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1441 		ac_prepare_cube_coords(&ctx->ac,
1442 				       opcode == TGSI_OPCODE_TXD,
1443 				       target == TGSI_TEXTURE_CUBE_ARRAY ||
1444 				       target == TGSI_TEXTURE_SHADOWCUBE_ARRAY,
1445 				       opcode == TGSI_OPCODE_LODQ,
1446 				       coords, derivs);
1447 	} else if (tgsi_is_array_sampler(target) &&
1448 		   opcode != TGSI_OPCODE_TXF &&
1449 		   opcode != TGSI_OPCODE_TXF_LZ &&
1450 		   ctx->screen->info.chip_class <= VI) {
1451 		unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2;
1452 		coords[array_coord] =
1453 			ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32,
1454 					   &coords[array_coord], 1, 0);
1455 	}
1456 
1457 	if (opcode == TGSI_OPCODE_TXD)
1458 		for (int i = 0; i < num_deriv_channels * 2; i++)
1459 			address[count++] = derivs[i];
1460 
1461 	/* Pack texture coordinates */
1462 	address[count++] = coords[0];
1463 	if (num_coords > 1)
1464 		address[count++] = coords[1];
1465 	if (num_coords > 2)
1466 		address[count++] = coords[2];
1467 
1468 	/* 1D textures are allocated and used as 2D on GFX9. */
1469 	if (ctx->screen->info.chip_class >= GFX9) {
1470 		LLVMValueRef filler;
1471 
1472 		/* Use 0.5, so that we don't sample the border color. */
1473 		if (opcode == TGSI_OPCODE_TXF ||
1474 		    opcode == TGSI_OPCODE_TXF_LZ)
1475 			filler = ctx->i32_0;
1476 		else
1477 			filler = LLVMConstReal(ctx->f32, 0.5);
1478 
1479 		if (target == TGSI_TEXTURE_1D ||
1480 		    target == TGSI_TEXTURE_SHADOW1D) {
1481 			address[count++] = filler;
1482 		} else if (target == TGSI_TEXTURE_1D_ARRAY ||
1483 			   target == TGSI_TEXTURE_SHADOW1D_ARRAY) {
1484 			address[count] = address[count - 1];
1485 			address[count - 1] = filler;
1486 			count++;
1487 		}
1488 	}
1489 
1490 	/* Pack LOD or sample index */
1491 	if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF)
1492 		address[count++] = coords[3];
1493 	else if (opcode == TGSI_OPCODE_TXL2)
1494 		address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
1495 
1496 	if (count > 16) {
1497 		assert(!"Cannot handle more than 16 texture address parameters");
1498 		count = 16;
1499 	}
1500 
1501 	for (chan = 0; chan < count; chan++)
1502 		address[chan] = ac_to_integer(&ctx->ac, address[chan]);
1503 
1504 	/* Adjust the sample index according to FMASK.
1505 	 *
1506 	 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
1507 	 * which is the identity mapping. Each nibble says which physical sample
1508 	 * should be fetched to get that sample.
1509 	 *
1510 	 * For example, 0x11111100 means there are only 2 samples stored and
1511 	 * the second sample covers 3/4 of the pixel. When reading samples 0
1512 	 * and 1, return physical sample 0 (determined by the first two 0s
1513 	 * in FMASK), otherwise return physical sample 1.
1514 	 *
1515 	 * The sample index should be adjusted as follows:
1516 	 *   sample_index = (fmask >> (sample_index * 4)) & 0xF;
1517 	 */
1518 	if (target == TGSI_TEXTURE_2D_MSAA ||
1519 	    target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1520 		struct lp_build_emit_data txf_emit_data = *emit_data;
1521 		LLVMValueRef txf_address[4];
1522 		/* We only need .xy for non-arrays, and .xyz for arrays. */
1523 		unsigned txf_count = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3;
1524 		struct tgsi_full_instruction inst = {};
1525 
1526 		memcpy(txf_address, address, sizeof(txf_address));
1527 
1528 		/* Read FMASK using TXF_LZ. */
1529 		inst.Instruction.Opcode = TGSI_OPCODE_TXF_LZ;
1530 		inst.Texture.Texture = target;
1531 		txf_emit_data.inst = &inst;
1532 		txf_emit_data.chan = 0;
1533 		set_tex_fetch_args(ctx, &txf_emit_data,
1534 				   target, fmask_ptr, NULL,
1535 				   txf_address, txf_count, 0xf);
1536 		build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data);
1537 
1538 		/* Initialize some constants. */
1539 		LLVMValueRef four = LLVMConstInt(ctx->i32, 4, 0);
1540 		LLVMValueRef F = LLVMConstInt(ctx->i32, 0xF, 0);
1541 
1542 		/* Apply the formula. */
1543 		LLVMValueRef fmask =
1544 			LLVMBuildExtractElement(ctx->ac.builder,
1545 						txf_emit_data.output[0],
1546 						ctx->i32_0, "");
1547 
1548 		unsigned sample_chan = txf_count; /* the sample index is last */
1549 
1550 		LLVMValueRef sample_index4 =
1551 			LLVMBuildMul(ctx->ac.builder, address[sample_chan], four, "");
1552 
1553 		LLVMValueRef shifted_fmask =
1554 			LLVMBuildLShr(ctx->ac.builder, fmask, sample_index4, "");
1555 
1556 		LLVMValueRef final_sample =
1557 			LLVMBuildAnd(ctx->ac.builder, shifted_fmask, F, "");
1558 
1559 		/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
1560 		 * resource descriptor is 0 (invalid),
1561 		 */
1562 		LLVMValueRef fmask_desc =
1563 			LLVMBuildBitCast(ctx->ac.builder, fmask_ptr,
1564 					 ctx->v8i32, "");
1565 
1566 		LLVMValueRef fmask_word1 =
1567 			LLVMBuildExtractElement(ctx->ac.builder, fmask_desc,
1568 						ctx->i32_1, "");
1569 
1570 		LLVMValueRef word1_is_nonzero =
1571 			LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
1572 				      fmask_word1, ctx->i32_0, "");
1573 
1574 		/* Replace the MSAA sample index. */
1575 		address[sample_chan] =
1576 			LLVMBuildSelect(ctx->ac.builder, word1_is_nonzero,
1577 					final_sample, address[sample_chan], "");
1578 	}
1579 
1580 	if (opcode == TGSI_OPCODE_TXF ||
1581 	    opcode == TGSI_OPCODE_TXF_LZ) {
1582 		/* add tex offsets */
1583 		if (inst->Texture.NumOffsets) {
1584 			struct lp_build_context *uint_bld = &bld_base->uint_bld;
1585 			const struct tgsi_texture_offset *off = inst->TexOffsets;
1586 
1587 			assert(inst->Texture.NumOffsets == 1);
1588 
1589 			switch (target) {
1590 			case TGSI_TEXTURE_3D:
1591 				address[2] = lp_build_add(uint_bld, address[2],
1592 						ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ]);
1593 				/* fall through */
1594 			case TGSI_TEXTURE_2D:
1595 			case TGSI_TEXTURE_SHADOW2D:
1596 			case TGSI_TEXTURE_RECT:
1597 			case TGSI_TEXTURE_SHADOWRECT:
1598 			case TGSI_TEXTURE_2D_ARRAY:
1599 			case TGSI_TEXTURE_SHADOW2D_ARRAY:
1600 				address[1] =
1601 					lp_build_add(uint_bld, address[1],
1602 						ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY]);
1603 				/* fall through */
1604 			case TGSI_TEXTURE_1D:
1605 			case TGSI_TEXTURE_SHADOW1D:
1606 			case TGSI_TEXTURE_1D_ARRAY:
1607 			case TGSI_TEXTURE_SHADOW1D_ARRAY:
1608 				address[0] =
1609 					lp_build_add(uint_bld, address[0],
1610 						ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX]);
1611 				break;
1612 				/* texture offsets do not apply to other texture targets */
1613 			}
1614 		}
1615 	}
1616 
1617 	if (opcode == TGSI_OPCODE_TG4) {
1618 		unsigned gather_comp = 0;
1619 
1620 		/* DMASK was repurposed for GATHER4. 4 components are always
1621 		 * returned and DMASK works like a swizzle - it selects
1622 		 * the component to fetch. The only valid DMASK values are
1623 		 * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
1624 		 * (red,red,red,red) etc.) The ISA document doesn't mention
1625 		 * this.
1626 		 */
1627 
1628 		/* Get the component index from src1.x for Gather4. */
1629 		if (!tgsi_is_shadow_target(target)) {
1630 			LLVMValueRef comp_imm;
1631 			struct tgsi_src_register src1 = inst->Src[1].Register;
1632 
1633 			assert(src1.File == TGSI_FILE_IMMEDIATE);
1634 
1635 			comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX];
1636 			gather_comp = LLVMConstIntGetZExtValue(comp_imm);
1637 			gather_comp = CLAMP(gather_comp, 0, 3);
1638 		}
1639 
1640 		dmask = 1 << gather_comp;
1641 	}
1642 
1643 	set_tex_fetch_args(ctx, emit_data, target, res_ptr,
1644 			   samp_ptr, address, count, dmask);
1645 }
1646 
1647 /* Gather4 should follow the same rules as bilinear filtering, but the hardware
1648  * incorrectly forces nearest filtering if the texture format is integer.
1649  * The only effect it has on Gather4, which always returns 4 texels for
1650  * bilinear filtering, is that the final coordinates are off by 0.5 of
1651  * the texel size.
1652  *
1653  * The workaround is to subtract 0.5 from the unnormalized coordinates,
1654  * or (0.5 / size) from the normalized coordinates.
1655  *
1656  * However, cube textures with 8_8_8_8 data formats require a different
1657  * workaround of overriding the num format to USCALED/SSCALED. This would lose
1658  * precision in 32-bit data formats, so it needs to be applied dynamically at
1659  * runtime. In this case, return an i1 value that indicates whether the
1660  * descriptor was overridden (and hence a fixup of the sampler result is needed).
1661  */
1662 static LLVMValueRef
si_lower_gather4_integer(struct si_shader_context * ctx,struct ac_image_args * args,unsigned target,enum tgsi_return_type return_type)1663 si_lower_gather4_integer(struct si_shader_context *ctx,
1664 			 struct ac_image_args *args,
1665 			 unsigned target,
1666 			 enum tgsi_return_type return_type)
1667 {
1668 	LLVMBuilderRef builder = ctx->ac.builder;
1669 	LLVMValueRef wa_8888 = NULL;
1670 	LLVMValueRef coord = args->addr;
1671 	LLVMValueRef half_texel[2];
1672 	/* Texture coordinates start after:
1673 	 *   {offset, bias, z-compare, derivatives}
1674 	 * Only the offset and z-compare can occur here.
1675 	 */
1676 	unsigned coord_vgpr_index = (int)args->offset + (int)args->compare;
1677 	int c;
1678 
1679 	assert(return_type == TGSI_RETURN_TYPE_SINT ||
1680 	       return_type == TGSI_RETURN_TYPE_UINT);
1681 
1682 	if (target == TGSI_TEXTURE_CUBE ||
1683 	    target == TGSI_TEXTURE_CUBE_ARRAY) {
1684 		LLVMValueRef formats;
1685 		LLVMValueRef data_format;
1686 		LLVMValueRef wa_formats;
1687 
1688 		formats = LLVMBuildExtractElement(builder, args->resource, ctx->i32_1, "");
1689 
1690 		data_format = LLVMBuildLShr(builder, formats,
1691 					    LLVMConstInt(ctx->i32, 20, false), "");
1692 		data_format = LLVMBuildAnd(builder, data_format,
1693 					   LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
1694 		wa_8888 = LLVMBuildICmp(
1695 			builder, LLVMIntEQ, data_format,
1696 			LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
1697 			"");
1698 
1699 		uint32_t wa_num_format =
1700 			return_type == TGSI_RETURN_TYPE_UINT ?
1701 			S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_USCALED) :
1702 			S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_SSCALED);
1703 		wa_formats = LLVMBuildAnd(builder, formats,
1704 					  LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false),
1705 					  "");
1706 		wa_formats = LLVMBuildOr(builder, wa_formats,
1707 					LLVMConstInt(ctx->i32, wa_num_format, false), "");
1708 
1709 		formats = LLVMBuildSelect(builder, wa_8888, wa_formats, formats, "");
1710 		args->resource = LLVMBuildInsertElement(
1711 			builder, args->resource, formats, ctx->i32_1, "");
1712 	}
1713 
1714 	if (target == TGSI_TEXTURE_RECT ||
1715 	    target == TGSI_TEXTURE_SHADOWRECT) {
1716 		assert(!wa_8888);
1717 		half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
1718 	} else {
1719 		struct tgsi_full_instruction txq_inst = {};
1720 		struct lp_build_emit_data txq_emit_data = {};
1721 		struct lp_build_if_state if_ctx;
1722 
1723 		if (wa_8888) {
1724 			/* Skip the texture size query entirely if we don't need it. */
1725 			lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, ""));
1726 		}
1727 
1728 		/* Query the texture size. */
1729 		txq_inst.Texture.Texture = target;
1730 		txq_emit_data.inst = &txq_inst;
1731 		txq_emit_data.dst_type = ctx->v4i32;
1732 		set_tex_fetch_args(ctx, &txq_emit_data, target,
1733 				   args->resource, NULL, &ctx->i32_0,
1734 				   1, 0xf);
1735 		txq_emit(NULL, &ctx->bld_base, &txq_emit_data);
1736 
1737 		/* Compute -0.5 / size. */
1738 		for (c = 0; c < 2; c++) {
1739 			half_texel[c] =
1740 				LLVMBuildExtractElement(builder, txq_emit_data.output[0],
1741 							LLVMConstInt(ctx->i32, c, 0), "");
1742 			half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, "");
1743 			half_texel[c] =
1744 				lp_build_emit_llvm_unary(&ctx->bld_base,
1745 							 TGSI_OPCODE_RCP, half_texel[c]);
1746 			half_texel[c] = LLVMBuildFMul(builder, half_texel[c],
1747 						      LLVMConstReal(ctx->f32, -0.5), "");
1748 		}
1749 
1750 		if (wa_8888) {
1751 			lp_build_endif(&if_ctx);
1752 
1753 			LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block };
1754 
1755 			for (c = 0; c < 2; c++) {
1756 				LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 };
1757 				half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2,
1758 							     values, bb);
1759 			}
1760 		}
1761 	}
1762 
1763 	for (c = 0; c < 2; c++) {
1764 		LLVMValueRef tmp;
1765 		LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
1766 
1767 		tmp = LLVMBuildExtractElement(builder, coord, index, "");
1768 		tmp = ac_to_float(&ctx->ac, tmp);
1769 		tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
1770 		tmp = ac_to_integer(&ctx->ac, tmp);
1771 		coord = LLVMBuildInsertElement(builder, coord, tmp, index, "");
1772 	}
1773 
1774 	args->addr = coord;
1775 
1776 	return wa_8888;
1777 }
1778 
1779 /* The second half of the cube texture 8_8_8_8 integer workaround: adjust the
1780  * result after the gather operation.
1781  */
1782 static LLVMValueRef
si_fix_gather4_integer_result(struct si_shader_context * ctx,LLVMValueRef result,enum tgsi_return_type return_type,LLVMValueRef wa)1783 si_fix_gather4_integer_result(struct si_shader_context *ctx,
1784 			   LLVMValueRef result,
1785 			   enum tgsi_return_type return_type,
1786 			   LLVMValueRef wa)
1787 {
1788 	LLVMBuilderRef builder = ctx->ac.builder;
1789 
1790 	assert(return_type == TGSI_RETURN_TYPE_SINT ||
1791 	       return_type == TGSI_RETURN_TYPE_UINT);
1792 
1793 	for (unsigned chan = 0; chan < 4; ++chan) {
1794 		LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false);
1795 		LLVMValueRef value;
1796 		LLVMValueRef wa_value;
1797 
1798 		value = LLVMBuildExtractElement(builder, result, chanv, "");
1799 
1800 		if (return_type == TGSI_RETURN_TYPE_UINT)
1801 			wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, "");
1802 		else
1803 			wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, "");
1804 		wa_value = ac_to_float(&ctx->ac, wa_value);
1805 		value = LLVMBuildSelect(builder, wa, wa_value, value, "");
1806 
1807 		result = LLVMBuildInsertElement(builder, result, value, chanv, "");
1808 	}
1809 
1810 	return result;
1811 }
1812 
build_tex_intrinsic(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)1813 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
1814 				struct lp_build_tgsi_context *bld_base,
1815 				struct lp_build_emit_data *emit_data)
1816 {
1817 	struct si_shader_context *ctx = si_shader_context(bld_base);
1818 	const struct tgsi_full_instruction *inst = emit_data->inst;
1819 	struct ac_image_args args;
1820 	unsigned opcode = inst->Instruction.Opcode;
1821 	unsigned target = inst->Texture.Texture;
1822 
1823 	if (target == TGSI_TEXTURE_BUFFER) {
1824 		emit_data->output[emit_data->chan] =
1825 			ac_build_buffer_load_format(&ctx->ac,
1826 						    emit_data->args[0],
1827 						    emit_data->args[2],
1828 						    emit_data->args[1],
1829 						    true);
1830 		return;
1831 	}
1832 
1833 	memcpy(&args, emit_data->args, sizeof(args)); /* ugly */
1834 
1835 	args.opcode = ac_image_sample;
1836 	args.compare = tgsi_is_shadow_target(target);
1837 	args.offset = inst->Texture.NumOffsets > 0;
1838 
1839 	switch (opcode) {
1840 	case TGSI_OPCODE_TXF:
1841 	case TGSI_OPCODE_TXF_LZ:
1842 		args.opcode = opcode == TGSI_OPCODE_TXF_LZ ||
1843 			      target == TGSI_TEXTURE_2D_MSAA ||
1844 			      target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
1845 				      ac_image_load : ac_image_load_mip;
1846 		args.compare = false;
1847 		args.offset = false;
1848 		break;
1849 	case TGSI_OPCODE_LODQ:
1850 		args.opcode = ac_image_get_lod;
1851 		args.compare = false;
1852 		args.offset = false;
1853 		break;
1854 	case TGSI_OPCODE_TEX:
1855 	case TGSI_OPCODE_TEX2:
1856 	case TGSI_OPCODE_TXP:
1857 		if (ctx->type != PIPE_SHADER_FRAGMENT)
1858 			args.level_zero = true;
1859 		break;
1860 	case TGSI_OPCODE_TEX_LZ:
1861 		args.level_zero = true;
1862 		break;
1863 	case TGSI_OPCODE_TXB:
1864 	case TGSI_OPCODE_TXB2:
1865 		assert(ctx->type == PIPE_SHADER_FRAGMENT);
1866 		args.bias = true;
1867 		break;
1868 	case TGSI_OPCODE_TXL:
1869 	case TGSI_OPCODE_TXL2:
1870 		args.lod = true;
1871 		break;
1872 	case TGSI_OPCODE_TXD:
1873 		args.deriv = true;
1874 		break;
1875 	case TGSI_OPCODE_TG4:
1876 		args.opcode = ac_image_gather4;
1877 		args.level_zero = true;
1878 		break;
1879 	default:
1880 		assert(0);
1881 		return;
1882 	}
1883 
1884 	/* The hardware needs special lowering for Gather4 with integer formats. */
1885 	LLVMValueRef gather4_int_result_workaround = NULL;
1886 
1887 	if (ctx->screen->info.chip_class <= VI &&
1888 	    opcode == TGSI_OPCODE_TG4) {
1889 		assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN);
1890 
1891 		if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT ||
1892 		    inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) {
1893 			gather4_int_result_workaround =
1894 				si_lower_gather4_integer(ctx, &args, target,
1895 							 inst->Texture.ReturnType);
1896 		}
1897 	}
1898 
1899 	LLVMValueRef result =
1900 		ac_build_image_opcode(&ctx->ac, &args);
1901 
1902 	if (gather4_int_result_workaround) {
1903 		result = si_fix_gather4_integer_result(ctx, result,
1904 						       inst->Texture.ReturnType,
1905 						       gather4_int_result_workaround);
1906 	}
1907 
1908 	emit_data->output[emit_data->chan] = result;
1909 }
1910 
si_llvm_emit_txqs(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)1911 static void si_llvm_emit_txqs(
1912 	const struct lp_build_tgsi_action *action,
1913 	struct lp_build_tgsi_context *bld_base,
1914 	struct lp_build_emit_data *emit_data)
1915 {
1916 	struct si_shader_context *ctx = si_shader_context(bld_base);
1917 	LLVMValueRef res, samples;
1918 	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
1919 
1920 	tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
1921 
1922 
1923 	/* Read the samples from the descriptor directly. */
1924 	res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->v8i32, "");
1925 	samples = LLVMBuildExtractElement(ctx->ac.builder, res,
1926 					  LLVMConstInt(ctx->i32, 3, 0), "");
1927 	samples = LLVMBuildLShr(ctx->ac.builder, samples,
1928 				LLVMConstInt(ctx->i32, 16, 0), "");
1929 	samples = LLVMBuildAnd(ctx->ac.builder, samples,
1930 			       LLVMConstInt(ctx->i32, 0xf, 0), "");
1931 	samples = LLVMBuildShl(ctx->ac.builder, ctx->i32_1,
1932 			       samples, "");
1933 
1934 	emit_data->output[emit_data->chan] = samples;
1935 }
1936 
1937 static const struct lp_build_tgsi_action tex_action = {
1938 	.fetch_args = tex_fetch_args,
1939 	.emit = build_tex_intrinsic,
1940 };
1941 
1942 /**
1943  * Setup actions for TGSI memory opcode, including texture opcodes.
1944  */
si_shader_context_init_mem(struct si_shader_context * ctx)1945 void si_shader_context_init_mem(struct si_shader_context *ctx)
1946 {
1947 	struct lp_build_tgsi_context *bld_base;
1948 	struct lp_build_tgsi_action tmpl = {};
1949 
1950 	bld_base = &ctx->bld_base;
1951 
1952 	bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
1953 	bld_base->op_actions[TGSI_OPCODE_TEX_LZ] = tex_action;
1954 	bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action;
1955 	bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action;
1956 	bld_base->op_actions[TGSI_OPCODE_TXB2] = tex_action;
1957 	bld_base->op_actions[TGSI_OPCODE_TXD] = tex_action;
1958 	bld_base->op_actions[TGSI_OPCODE_TXF] = tex_action;
1959 	bld_base->op_actions[TGSI_OPCODE_TXF_LZ] = tex_action;
1960 	bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action;
1961 	bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action;
1962 	bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
1963 	bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = txq_fetch_args;
1964 	bld_base->op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
1965 	bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
1966 	bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
1967 	bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
1968 
1969 	bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args;
1970 	bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
1971 	bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args;
1972 	bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit;
1973 	bld_base->op_actions[TGSI_OPCODE_RESQ].fetch_args = resq_fetch_args;
1974 	bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
1975 
1976 	tmpl.fetch_args = atomic_fetch_args;
1977 	tmpl.emit = atomic_emit;
1978 	bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl;
1979 	bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add";
1980 	bld_base->op_actions[TGSI_OPCODE_ATOMXCHG] = tmpl;
1981 	bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap";
1982 	bld_base->op_actions[TGSI_OPCODE_ATOMCAS] = tmpl;
1983 	bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap";
1984 	bld_base->op_actions[TGSI_OPCODE_ATOMAND] = tmpl;
1985 	bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and";
1986 	bld_base->op_actions[TGSI_OPCODE_ATOMOR] = tmpl;
1987 	bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or";
1988 	bld_base->op_actions[TGSI_OPCODE_ATOMXOR] = tmpl;
1989 	bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor";
1990 	bld_base->op_actions[TGSI_OPCODE_ATOMUMIN] = tmpl;
1991 	bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin";
1992 	bld_base->op_actions[TGSI_OPCODE_ATOMUMAX] = tmpl;
1993 	bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax";
1994 	bld_base->op_actions[TGSI_OPCODE_ATOMIMIN] = tmpl;
1995 	bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin";
1996 	bld_base->op_actions[TGSI_OPCODE_ATOMIMAX] = tmpl;
1997 	bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax";
1998 }
1999