1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "draw_llvm.h"
29 
30 #include "draw_context.h"
31 #include "draw_vs.h"
32 #include "draw_gs.h"
33 
34 #include "gallivm/lp_bld_arit.h"
35 #include "gallivm/lp_bld_arit_overflow.h"
36 #include "gallivm/lp_bld_bitarit.h"
37 #include "gallivm/lp_bld_gather.h"
38 #include "gallivm/lp_bld_logic.h"
39 #include "gallivm/lp_bld_const.h"
40 #include "gallivm/lp_bld_coro.h"
41 #include "gallivm/lp_bld_swizzle.h"
42 #include "gallivm/lp_bld_struct.h"
43 #include "gallivm/lp_bld_type.h"
44 #include "gallivm/lp_bld_flow.h"
45 #include "gallivm/lp_bld_debug.h"
46 #include "gallivm/lp_bld_tgsi.h"
47 #include "gallivm/lp_bld_nir.h"
48 #include "gallivm/lp_bld_printf.h"
49 #include "gallivm/lp_bld_intr.h"
50 #include "gallivm/lp_bld_init.h"
51 #include "gallivm/lp_bld_type.h"
52 #include "gallivm/lp_bld_pack.h"
53 #include "gallivm/lp_bld_format.h"
54 #include "gallivm/lp_bld_misc.h"
55 #include "tgsi/tgsi_exec.h"
56 #include "tgsi/tgsi_dump.h"
57 
58 #include "util/u_math.h"
59 #include "util/u_pointer.h"
60 #include "util/u_string.h"
61 #include "util/simple_list.h"
62 #include "nir_serialize.h"
63 #include "util/mesa-sha1.h"
64 #define DEBUG_STORE 0
65 
66 
67 static void
68 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
69 
70 
71 struct draw_gs_llvm_iface {
72    struct lp_build_gs_iface base;
73 
74    struct draw_gs_llvm_variant *variant;
75    LLVMValueRef input;
76 };
77 
78 static inline const struct draw_gs_llvm_iface *
draw_gs_llvm_iface(const struct lp_build_gs_iface * iface)79 draw_gs_llvm_iface(const struct lp_build_gs_iface *iface)
80 {
81    return (const struct draw_gs_llvm_iface *)iface;
82 }
83 
84 struct draw_tcs_llvm_iface {
85    struct lp_build_tcs_iface base;
86 
87    struct draw_tcs_llvm_variant *variant;
88    LLVMValueRef input;
89    LLVMValueRef output;
90 };
91 
92 static inline const struct draw_tcs_llvm_iface *
draw_tcs_llvm_iface(const struct lp_build_tcs_iface * iface)93 draw_tcs_llvm_iface(const struct lp_build_tcs_iface *iface)
94 {
95    return (const struct draw_tcs_llvm_iface *)iface;
96 }
97 
98 struct draw_tes_llvm_iface {
99    struct lp_build_tes_iface base;
100 
101    struct draw_tes_llvm_variant *variant;
102    LLVMValueRef input;
103 };
104 
105 static inline const struct draw_tes_llvm_iface *
draw_tes_llvm_iface(const struct lp_build_tes_iface * iface)106 draw_tes_llvm_iface(const struct lp_build_tes_iface *iface)
107 {
108    return (const struct draw_tes_llvm_iface *)iface;
109 }
110 
111 /**
112  * Create LLVM type for draw_vertex_buffer.
113  */
114 static LLVMTypeRef
create_jit_dvbuffer_type(struct gallivm_state * gallivm,const char * struct_name)115 create_jit_dvbuffer_type(struct gallivm_state *gallivm,
116                          const char *struct_name)
117 {
118    LLVMTargetDataRef target = gallivm->target;
119    LLVMTypeRef dvbuffer_type;
120    LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS];
121    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
122 
123    elem_types[DRAW_JIT_DVBUFFER_MAP] =
124       LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
125    elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type;
126 
127    dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,
128                                            ARRAY_SIZE(elem_types), 0);
129 
130    (void) target; /* silence unused var warning for non-debug build */
131    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,
132                           target, dvbuffer_type,
133                           DRAW_JIT_DVBUFFER_MAP);
134    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size,
135                           target, dvbuffer_type,
136                           DRAW_JIT_DVBUFFER_SIZE);
137 
138    return dvbuffer_type;
139 }
140 
141 /**
142  * Create LLVM type for struct draw_jit_texture
143  */
144 static LLVMTypeRef
create_jit_texture_type(struct gallivm_state * gallivm,const char * struct_name)145 create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name)
146 {
147    LLVMTargetDataRef target = gallivm->target;
148    LLVMTypeRef texture_type;
149    LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
150    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
151 
152    elem_types[DRAW_JIT_TEXTURE_WIDTH]  =
153    elem_types[DRAW_JIT_TEXTURE_HEIGHT] =
154    elem_types[DRAW_JIT_TEXTURE_DEPTH] =
155    elem_types[DRAW_JIT_TEXTURE_NUM_SAMPLES] =
156    elem_types[DRAW_JIT_TEXTURE_SAMPLE_STRIDE] =
157    elem_types[DRAW_JIT_TEXTURE_FIRST_LEVEL] =
158    elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type;
159    elem_types[DRAW_JIT_TEXTURE_BASE] =
160       LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
161    elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
162    elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
163    elem_types[DRAW_JIT_TEXTURE_MIP_OFFSETS] =
164       LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS);
165 
166    texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
167                                           ARRAY_SIZE(elem_types), 0);
168 
169    (void) target; /* silence unused var warning for non-debug build */
170    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
171                           target, texture_type,
172                           DRAW_JIT_TEXTURE_WIDTH);
173    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
174                           target, texture_type,
175                           DRAW_JIT_TEXTURE_HEIGHT);
176    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
177                           target, texture_type,
178                           DRAW_JIT_TEXTURE_DEPTH);
179    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, base,
180                           target, texture_type,
181                           DRAW_JIT_TEXTURE_BASE);
182    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
183                           target, texture_type,
184                           DRAW_JIT_TEXTURE_ROW_STRIDE);
185    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
186                           target, texture_type,
187                           DRAW_JIT_TEXTURE_IMG_STRIDE);
188    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, first_level,
189                           target, texture_type,
190                           DRAW_JIT_TEXTURE_FIRST_LEVEL);
191    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
192                           target, texture_type,
193                           DRAW_JIT_TEXTURE_LAST_LEVEL);
194    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, mip_offsets,
195                           target, texture_type,
196                           DRAW_JIT_TEXTURE_MIP_OFFSETS);
197    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, num_samples,
198                           target, texture_type,
199                           DRAW_JIT_TEXTURE_NUM_SAMPLES);
200    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, sample_stride,
201                           target, texture_type,
202                           DRAW_JIT_TEXTURE_SAMPLE_STRIDE);
203 
204    LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type);
205 
206    return texture_type;
207 }
208 
209 
210 /**
211  * Create LLVM type for struct draw_jit_sampler
212  */
213 static LLVMTypeRef
create_jit_sampler_type(struct gallivm_state * gallivm,const char * struct_name)214 create_jit_sampler_type(struct gallivm_state *gallivm, const char *struct_name)
215 {
216    LLVMTargetDataRef target = gallivm->target;
217    LLVMTypeRef sampler_type;
218    LLVMTypeRef elem_types[DRAW_JIT_SAMPLER_NUM_FIELDS];
219 
220    elem_types[DRAW_JIT_SAMPLER_MIN_LOD] =
221    elem_types[DRAW_JIT_SAMPLER_MAX_LOD] =
222    elem_types[DRAW_JIT_SAMPLER_LOD_BIAS] = LLVMFloatTypeInContext(gallivm->context);
223    elem_types[DRAW_JIT_SAMPLER_BORDER_COLOR] =
224       LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
225 
226    sampler_type = LLVMStructTypeInContext(gallivm->context, elem_types,
227                                           ARRAY_SIZE(elem_types), 0);
228 
229    (void) target; /* silence unused var warning for non-debug build */
230    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, min_lod,
231                           target, sampler_type,
232                           DRAW_JIT_SAMPLER_MIN_LOD);
233    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, max_lod,
234                           target, sampler_type,
235                           DRAW_JIT_SAMPLER_MAX_LOD);
236    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, lod_bias,
237                           target, sampler_type,
238                           DRAW_JIT_SAMPLER_LOD_BIAS);
239    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, border_color,
240                           target, sampler_type,
241                           DRAW_JIT_SAMPLER_BORDER_COLOR);
242 
243    LP_CHECK_STRUCT_SIZE(struct draw_jit_sampler, target, sampler_type);
244 
245    return sampler_type;
246 }
247 
248 /**
249  * Create LLVM type for struct draw_jit_texture
250  */
251 static LLVMTypeRef
create_jit_image_type(struct gallivm_state * gallivm,const char * struct_name)252 create_jit_image_type(struct gallivm_state *gallivm, const char *struct_name)
253 {
254    LLVMTargetDataRef target = gallivm->target;
255    LLVMTypeRef image_type;
256    LLVMTypeRef elem_types[DRAW_JIT_IMAGE_NUM_FIELDS];
257    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
258 
259    elem_types[DRAW_JIT_IMAGE_WIDTH]  =
260    elem_types[DRAW_JIT_IMAGE_HEIGHT] =
261    elem_types[DRAW_JIT_IMAGE_DEPTH] =
262    elem_types[DRAW_JIT_IMAGE_ROW_STRIDE] =
263    elem_types[DRAW_JIT_IMAGE_IMG_STRIDE] =
264    elem_types[DRAW_JIT_IMAGE_NUM_SAMPLES] =
265    elem_types[DRAW_JIT_IMAGE_SAMPLE_STRIDE] = int32_type;
266    elem_types[DRAW_JIT_IMAGE_BASE] =
267       LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
268 
269    image_type = LLVMStructTypeInContext(gallivm->context, elem_types,
270                                           ARRAY_SIZE(elem_types), 0);
271 
272    (void) target; /* silence unused var warning for non-debug build */
273    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, width,
274                           target, image_type,
275                           DRAW_JIT_IMAGE_WIDTH);
276    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, height,
277                           target, image_type,
278                           DRAW_JIT_IMAGE_HEIGHT);
279    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, depth,
280                           target, image_type,
281                           DRAW_JIT_IMAGE_DEPTH);
282    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, base,
283                           target, image_type,
284                           DRAW_JIT_IMAGE_BASE);
285    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, row_stride,
286                           target, image_type,
287                           DRAW_JIT_IMAGE_ROW_STRIDE);
288    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, img_stride,
289                           target, image_type,
290                           DRAW_JIT_IMAGE_IMG_STRIDE);
291    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, num_samples,
292                           target, image_type,
293                           DRAW_JIT_IMAGE_NUM_SAMPLES);
294    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, sample_stride,
295                           target, image_type,
296                           DRAW_JIT_IMAGE_SAMPLE_STRIDE);
297 
298    LP_CHECK_STRUCT_SIZE(struct draw_jit_image, target, image_type);
299 
300    return image_type;
301 }
302 
303 /**
304  * Create LLVM type for struct draw_jit_context
305  */
306 static LLVMTypeRef
create_jit_context_type(struct gallivm_state * gallivm,LLVMTypeRef texture_type,LLVMTypeRef sampler_type,LLVMTypeRef image_type,const char * struct_name)307 create_jit_context_type(struct gallivm_state *gallivm,
308                         LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
309                         LLVMTypeRef image_type,
310                         const char *struct_name)
311 {
312    LLVMTargetDataRef target = gallivm->target;
313    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
314    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
315    LLVMTypeRef elem_types[DRAW_JIT_CTX_NUM_FIELDS];
316    LLVMTypeRef context_type;
317 
318    elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* vs_constants */
319                                  LP_MAX_TGSI_CONST_BUFFERS);
320    elem_types[1] = LLVMArrayType(int_type, /* num_vs_constants */
321                                  LP_MAX_TGSI_CONST_BUFFERS);
322    elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
323                                                  DRAW_TOTAL_CLIP_PLANES), 0);
324    elem_types[3] = LLVMPointerType(float_type, 0); /* viewports */
325    elem_types[4] = LLVMArrayType(texture_type,
326                                  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
327    elem_types[5] = LLVMArrayType(sampler_type,
328                                  PIPE_MAX_SAMPLERS); /* samplers */
329    elem_types[6] = LLVMArrayType(image_type,
330                                  PIPE_MAX_SHADER_IMAGES); /* images */
331    elem_types[7] = LLVMArrayType(LLVMPointerType(int_type, 0), /* vs_ssbo */
332                                  LP_MAX_TGSI_SHADER_BUFFERS);
333    elem_types[8] = LLVMArrayType(int_type, /* num_vs_ssbos */
334                                  LP_MAX_TGSI_SHADER_BUFFERS);
335    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
336                                           ARRAY_SIZE(elem_types), 0);
337 
338    (void) target; /* silence unused var warning for non-debug build */
339    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
340                           target, context_type, DRAW_JIT_CTX_CONSTANTS);
341    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, num_vs_constants,
342                           target, context_type, DRAW_JIT_CTX_NUM_CONSTANTS);
343    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
344                           target, context_type, DRAW_JIT_CTX_PLANES);
345    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, viewports,
346                           target, context_type, DRAW_JIT_CTX_VIEWPORT);
347    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
348                           target, context_type,
349                           DRAW_JIT_CTX_TEXTURES);
350    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, samplers,
351                           target, context_type,
352                           DRAW_JIT_CTX_SAMPLERS);
353    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, images,
354                           target, context_type, DRAW_JIT_CTX_IMAGES);
355    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_ssbos,
356                           target, context_type, DRAW_JIT_CTX_SSBOS);
357    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, num_vs_ssbos,
358                           target, context_type, DRAW_JIT_CTX_NUM_SSBOS);
359    LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
360                         target, context_type);
361 
362    return context_type;
363 }
364 
365 
366 /**
367  * Create LLVM type for struct draw_gs_jit_context
368  */
369 static LLVMTypeRef
create_gs_jit_context_type(struct gallivm_state * gallivm,unsigned vector_length,LLVMTypeRef texture_type,LLVMTypeRef sampler_type,LLVMTypeRef image_type,const char * struct_name)370 create_gs_jit_context_type(struct gallivm_state *gallivm,
371                            unsigned vector_length,
372                            LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
373                            LLVMTypeRef image_type,
374                            const char *struct_name)
375 {
376    LLVMTargetDataRef target = gallivm->target;
377    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
378    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
379    LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS];
380    LLVMTypeRef context_type;
381 
382    elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */
383                                  LP_MAX_TGSI_CONST_BUFFERS);
384    elem_types[1] = LLVMArrayType(int_type, /* num_constants */
385                                  LP_MAX_TGSI_CONST_BUFFERS);
386    elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
387                                                  DRAW_TOTAL_CLIP_PLANES), 0);
388    elem_types[3] = LLVMPointerType(float_type, 0); /* viewports */
389 
390    elem_types[4] = LLVMArrayType(texture_type,
391                                  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
392    elem_types[5] = LLVMArrayType(sampler_type,
393                                  PIPE_MAX_SAMPLERS); /* samplers */
394    elem_types[6] = LLVMArrayType(image_type,
395                                  PIPE_MAX_SHADER_IMAGES); /* images */
396    elem_types[7] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
397    elem_types[8] = LLVMPointerType(LLVMVectorType(int_type,
398                                                   vector_length), 0);
399    elem_types[9] = LLVMPointerType(LLVMVectorType(int_type,
400                                                   vector_length), 0);
401 
402    elem_types[10] = LLVMArrayType(LLVMPointerType(int_type, 0), /* ssbos */
403                                  LP_MAX_TGSI_SHADER_BUFFERS);
404    elem_types[11] = LLVMArrayType(int_type, /* num_ssbos */
405                                  LP_MAX_TGSI_SHADER_BUFFERS);
406 
407    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
408                                           ARRAY_SIZE(elem_types), 0);
409 
410    (void) target; /* silence unused var warning for non-debug build */
411    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants,
412                           target, context_type, DRAW_GS_JIT_CTX_CONSTANTS);
413    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, num_constants,
414                           target, context_type, DRAW_GS_JIT_CTX_NUM_CONSTANTS);
415    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
416                           target, context_type, DRAW_GS_JIT_CTX_PLANES);
417    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewports,
418                           target, context_type, DRAW_GS_JIT_CTX_VIEWPORT);
419    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures,
420                           target, context_type,
421                           DRAW_GS_JIT_CTX_TEXTURES);
422    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers,
423                           target, context_type,
424                           DRAW_GS_JIT_CTX_SAMPLERS);
425    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
426                           target, context_type,
427                           DRAW_GS_JIT_CTX_PRIM_LENGTHS);
428    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
429                           target, context_type,
430                           DRAW_GS_JIT_CTX_EMITTED_VERTICES);
431    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
432                           target, context_type,
433                           DRAW_GS_JIT_CTX_EMITTED_PRIMS);
434    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, ssbos,
435                           target, context_type, DRAW_GS_JIT_CTX_SSBOS);
436    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, num_ssbos,
437                           target, context_type, DRAW_GS_JIT_CTX_NUM_SSBOS);
438    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, images,
439                           target, context_type, DRAW_GS_JIT_CTX_IMAGES);
440    LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
441                         target, context_type);
442 
443    return context_type;
444 }
445 
446 
447 static LLVMTypeRef
create_gs_jit_input_type(struct gallivm_state * gallivm)448 create_gs_jit_input_type(struct gallivm_state *gallivm)
449 {
450    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
451    LLVMTypeRef input_array;
452 
453    input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
454    input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
455    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
456    input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
457 
458    return input_array;
459 }
460 
461 /**
462  * Create LLVM type for struct pipe_vertex_buffer
463  */
464 static LLVMTypeRef
create_jit_vertex_buffer_type(struct gallivm_state * gallivm,const char * struct_name)465 create_jit_vertex_buffer_type(struct gallivm_state *gallivm,
466                               const char *struct_name)
467 {
468    LLVMTargetDataRef target = gallivm->target;
469    LLVMTypeRef elem_types[4];
470    LLVMTypeRef vb_type;
471 
472    elem_types[0] = LLVMInt16TypeInContext(gallivm->context);
473    elem_types[1] = LLVMInt8TypeInContext(gallivm->context);
474    elem_types[2] = LLVMInt32TypeInContext(gallivm->context);
475    elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
476 
477    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
478                                      ARRAY_SIZE(elem_types), 0);
479 
480    (void) target; /* silence unused var warning for non-debug build */
481    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
482                           target, vb_type, 0);
483    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, is_user_buffer,
484                           target, vb_type, 1);
485    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
486                           target, vb_type, 2);
487    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer.resource,
488                           target, vb_type, 3);
489 
490    LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
491 
492    return vb_type;
493 }
494 
495 
496 /**
497  * Create LLVM type for struct vertex_header;
498  */
499 static LLVMTypeRef
create_jit_vertex_header(struct gallivm_state * gallivm,int data_elems)500 create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
501 {
502    LLVMTargetDataRef target = gallivm->target;
503    LLVMTypeRef elem_types[3];
504    LLVMTypeRef vertex_header;
505    char struct_name[24];
506 
507    snprintf(struct_name, 23, "vertex_header%d", data_elems);
508 
509    elem_types[DRAW_JIT_VERTEX_VERTEX_ID]  = LLVMIntTypeInContext(gallivm->context, 32);
510    elem_types[DRAW_JIT_VERTEX_CLIP_POS]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
511    elem_types[DRAW_JIT_VERTEX_DATA]  = LLVMArrayType(elem_types[1], data_elems);
512 
513    vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
514                                            ARRAY_SIZE(elem_types), 0);
515 
516    /* these are bit-fields and we can't take address of them
517       LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
518       target, vertex_header,
519       DRAW_JIT_VERTEX_CLIPMASK);
520       LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
521       target, vertex_header,
522       DRAW_JIT_VERTEX_EDGEFLAG);
523       LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
524       target, vertex_header,
525       DRAW_JIT_VERTEX_PAD);
526       LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
527       target, vertex_header,
528       DRAW_JIT_VERTEX_VERTEX_ID);
529    */
530    (void) target; /* silence unused var warning for non-debug build */
531    LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip_pos,
532                           target, vertex_header,
533                           DRAW_JIT_VERTEX_CLIP_POS);
534    LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
535                           target, vertex_header,
536                           DRAW_JIT_VERTEX_DATA);
537 
538    assert(LLVMABISizeOfType(target, vertex_header) ==
539           offsetof(struct vertex_header, data[data_elems]));
540 
541    return vertex_header;
542 }
543 
544 /**
545  * Create LLVM type for struct draw_tcs_jit_context
546  */
547 static LLVMTypeRef
create_tcs_jit_context_type(struct gallivm_state * gallivm,unsigned vector_length,LLVMTypeRef texture_type,LLVMTypeRef sampler_type,LLVMTypeRef image_type,const char * struct_name)548 create_tcs_jit_context_type(struct gallivm_state *gallivm,
549                             unsigned vector_length,
550                             LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
551                             LLVMTypeRef image_type,
552                             const char *struct_name)
553 {
554    LLVMTargetDataRef target = gallivm->target;
555    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
556    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
557    LLVMTypeRef elem_types[DRAW_TCS_JIT_CTX_NUM_FIELDS];
558    LLVMTypeRef context_type;
559 
560    elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */
561                                  LP_MAX_TGSI_CONST_BUFFERS);
562    elem_types[1] = LLVMArrayType(int_type, /* num_constants */
563                                  LP_MAX_TGSI_CONST_BUFFERS);
564    elem_types[2] = LLVMInt32TypeInContext(gallivm->context);
565    elem_types[3] = LLVMInt32TypeInContext(gallivm->context);
566 
567    elem_types[4] = LLVMArrayType(texture_type,
568                                  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
569    elem_types[5] = LLVMArrayType(sampler_type,
570                                  PIPE_MAX_SAMPLERS); /* samplers */
571    elem_types[6] = LLVMArrayType(image_type,
572                                  PIPE_MAX_SHADER_IMAGES); /* images */
573 
574    elem_types[7] = LLVMArrayType(LLVMPointerType(int_type, 0), /* ssbos */
575                                  LP_MAX_TGSI_SHADER_BUFFERS);
576    elem_types[8] = LLVMArrayType(int_type, /* num_ssbos */
577                                  LP_MAX_TGSI_SHADER_BUFFERS);
578 
579    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
580                                           ARRAY_SIZE(elem_types), 0);
581 
582    (void) target; /* silence unused var warning for non-debug build */
583    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, constants,
584                           target, context_type, DRAW_TCS_JIT_CTX_CONSTANTS);
585    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, num_constants,
586                           target, context_type, DRAW_TCS_JIT_CTX_NUM_CONSTANTS);
587    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, textures,
588                           target, context_type,
589                           DRAW_TCS_JIT_CTX_TEXTURES);
590    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, samplers,
591                           target, context_type,
592                           DRAW_TCS_JIT_CTX_SAMPLERS);
593    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, ssbos,
594                           target, context_type, DRAW_TCS_JIT_CTX_SSBOS);
595    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, num_ssbos,
596                           target, context_type, DRAW_TCS_JIT_CTX_NUM_SSBOS);
597    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, images,
598                           target, context_type, DRAW_TCS_JIT_CTX_IMAGES);
599    LP_CHECK_STRUCT_SIZE(struct draw_tcs_jit_context,
600                         target, context_type);
601 
602    return context_type;
603 }
604 
605 static LLVMTypeRef
create_tcs_jit_input_type(struct gallivm_state * gallivm)606 create_tcs_jit_input_type(struct gallivm_state *gallivm)
607 {
608    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
609    LLVMTypeRef input_array;
610 
611    input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
612    input_array = LLVMArrayType(input_array, NUM_TCS_INPUTS); /* num attrs per vertex */
613    input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
614 
615    return input_array;
616 }
617 
618 static LLVMTypeRef
create_tcs_jit_output_type(struct gallivm_state * gallivm)619 create_tcs_jit_output_type(struct gallivm_state *gallivm)
620 {
621    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
622    LLVMTypeRef output_array;
623 
624    output_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
625    output_array = LLVMArrayType(output_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
626    output_array = LLVMPointerType(output_array, 0); /* num vertices per prim */
627 
628    return output_array;
629 }
630 
631 static LLVMTypeRef
create_tes_jit_input_type(struct gallivm_state * gallivm)632 create_tes_jit_input_type(struct gallivm_state *gallivm)
633 {
634    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
635    LLVMTypeRef input_array;
636 
637    input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
638    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
639    input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
640 
641    return input_array;
642 }
643 
644 /**
645  * Create LLVM type for struct draw_tes_jit_context
646  */
647 static LLVMTypeRef
create_tes_jit_context_type(struct gallivm_state * gallivm,unsigned vector_length,LLVMTypeRef texture_type,LLVMTypeRef sampler_type,LLVMTypeRef image_type,const char * struct_name)648 create_tes_jit_context_type(struct gallivm_state *gallivm,
649                             unsigned vector_length,
650                             LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
651                             LLVMTypeRef image_type,
652                             const char *struct_name)
653 {
654    LLVMTargetDataRef target = gallivm->target;
655    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
656    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
657    LLVMTypeRef elem_types[DRAW_TCS_JIT_CTX_NUM_FIELDS];
658    LLVMTypeRef context_type;
659 
660    elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */
661                                  LP_MAX_TGSI_CONST_BUFFERS);
662    elem_types[1] = LLVMArrayType(int_type, /* num_constants */
663                                  LP_MAX_TGSI_CONST_BUFFERS);
664    elem_types[2] = LLVMInt32TypeInContext(gallivm->context);
665    elem_types[3] = LLVMInt32TypeInContext(gallivm->context);
666 
667    elem_types[4] = LLVMArrayType(texture_type,
668                                  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
669    elem_types[5] = LLVMArrayType(sampler_type,
670                                  PIPE_MAX_SAMPLERS); /* samplers */
671    elem_types[6] = LLVMArrayType(image_type,
672                                  PIPE_MAX_SHADER_IMAGES); /* images */
673 
674    elem_types[7] = LLVMArrayType(LLVMPointerType(int_type, 0), /* ssbos */
675                                  LP_MAX_TGSI_SHADER_BUFFERS);
676    elem_types[8] = LLVMArrayType(int_type, /* num_ssbos */
677                                  LP_MAX_TGSI_SHADER_BUFFERS);
678 
679    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
680                                           ARRAY_SIZE(elem_types), 0);
681 
682    (void) target; /* silence unused var warning for non-debug build */
683    LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, constants,
684                           target, context_type, DRAW_TCS_JIT_CTX_CONSTANTS);
685    LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, num_constants,
686                           target, context_type, DRAW_TCS_JIT_CTX_NUM_CONSTANTS);
687    LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, textures,
688                           target, context_type,
689                           DRAW_TCS_JIT_CTX_TEXTURES);
690    LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, samplers,
691                           target, context_type,
692                           DRAW_TCS_JIT_CTX_SAMPLERS);
693    LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, ssbos,
694                           target, context_type, DRAW_TCS_JIT_CTX_SSBOS);
695    LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, num_ssbos,
696                           target, context_type, DRAW_TCS_JIT_CTX_NUM_SSBOS);
697    LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, images,
698                           target, context_type, DRAW_TCS_JIT_CTX_IMAGES);
699    LP_CHECK_STRUCT_SIZE(struct draw_tes_jit_context,
700                         target, context_type);
701 
702    return context_type;
703 }
704 
705 /**
706  * Create LLVM types for various structures.
707  */
708 static void
create_jit_types(struct draw_llvm_variant * variant)709 create_jit_types(struct draw_llvm_variant *variant)
710 {
711    struct gallivm_state *gallivm = variant->gallivm;
712    LLVMTypeRef texture_type, sampler_type, context_type, buffer_type,
713       vb_type, image_type;
714 
715    texture_type = create_jit_texture_type(gallivm, "texture");
716    sampler_type = create_jit_sampler_type(gallivm, "sampler");
717    image_type = create_jit_image_type(gallivm, "image");
718 
719    context_type = create_jit_context_type(gallivm, texture_type, sampler_type,
720                                           image_type,
721                                           "draw_jit_context");
722    variant->context_ptr_type = LLVMPointerType(context_type, 0);
723 
724    buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer");
725    variant->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
726 
727    vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
728    variant->vb_ptr_type = LLVMPointerType(vb_type, 0);
729 }
730 
731 
732 static LLVMTypeRef
get_context_ptr_type(struct draw_llvm_variant * variant)733 get_context_ptr_type(struct draw_llvm_variant *variant)
734 {
735    if (!variant->context_ptr_type)
736       create_jit_types(variant);
737    return variant->context_ptr_type;
738 }
739 
740 
741 static LLVMTypeRef
get_buffer_ptr_type(struct draw_llvm_variant * variant)742 get_buffer_ptr_type(struct draw_llvm_variant *variant)
743 {
744    if (!variant->buffer_ptr_type)
745       create_jit_types(variant);
746    return variant->buffer_ptr_type;
747 }
748 
749 
750 static LLVMTypeRef
get_vb_ptr_type(struct draw_llvm_variant * variant)751 get_vb_ptr_type(struct draw_llvm_variant *variant)
752 {
753    if (!variant->vb_ptr_type)
754       create_jit_types(variant);
755    return variant->vb_ptr_type;
756 }
757 
758 static LLVMTypeRef
get_vertex_header_ptr_type(struct draw_llvm_variant * variant)759 get_vertex_header_ptr_type(struct draw_llvm_variant *variant)
760 {
761    if (!variant->vertex_header_ptr_type)
762       create_jit_types(variant);
763    return variant->vertex_header_ptr_type;
764 }
765 
766 
767 /**
768  * Create per-context LLVM info.
769  */
770 struct draw_llvm *
draw_llvm_create(struct draw_context * draw,LLVMContextRef context)771 draw_llvm_create(struct draw_context *draw, LLVMContextRef context)
772 {
773    struct draw_llvm *llvm;
774 
775    if (!lp_build_init())
776       return NULL;
777 
778    llvm = CALLOC_STRUCT( draw_llvm );
779    if (!llvm)
780       return NULL;
781 
782    llvm->draw = draw;
783 
784    llvm->context = context;
785    if (!llvm->context) {
786       llvm->context = LLVMContextCreate();
787       llvm->context_owned = true;
788    }
789    if (!llvm->context)
790       goto fail;
791 
792    llvm->nr_variants = 0;
793    make_empty_list(&llvm->vs_variants_list);
794 
795    llvm->nr_gs_variants = 0;
796    make_empty_list(&llvm->gs_variants_list);
797 
798    llvm->nr_tcs_variants = 0;
799    make_empty_list(&llvm->tcs_variants_list);
800 
801    llvm->nr_tes_variants = 0;
802    make_empty_list(&llvm->tes_variants_list);
803 
804    return llvm;
805 
806 fail:
807    draw_llvm_destroy(llvm);
808    return NULL;
809 }
810 
811 
812 /**
813  * Free per-context LLVM info.
814  */
815 void
draw_llvm_destroy(struct draw_llvm * llvm)816 draw_llvm_destroy(struct draw_llvm *llvm)
817 {
818    if (llvm->context_owned)
819       LLVMContextDispose(llvm->context);
820    llvm->context = NULL;
821 
822    /* XXX free other draw_llvm data? */
823    FREE(llvm);
824 }
825 
826 static void
draw_get_ir_cache_key(struct nir_shader * nir,const void * key,size_t key_size,uint32_t val_32bit,unsigned char ir_sha1_cache_key[20])827 draw_get_ir_cache_key(struct nir_shader *nir,
828                       const void *key, size_t key_size,
829                       uint32_t val_32bit,
830                       unsigned char ir_sha1_cache_key[20])
831 {
832    struct blob blob = { 0 };
833    unsigned ir_size;
834    void *ir_binary;
835 
836    blob_init(&blob);
837    nir_serialize(&blob, nir, true);
838    ir_binary = blob.data;
839    ir_size = blob.size;
840 
841    struct mesa_sha1 ctx;
842    _mesa_sha1_init(&ctx);
843    _mesa_sha1_update(&ctx, key, key_size);
844    _mesa_sha1_update(&ctx, ir_binary, ir_size);
845    _mesa_sha1_update(&ctx, &val_32bit, 4);
846    _mesa_sha1_final(&ctx, ir_sha1_cache_key);
847 
848    blob_finish(&blob);
849 }
850 
851 /**
852  * Create LLVM-generated code for a vertex shader.
853  */
854 struct draw_llvm_variant *
draw_llvm_create_variant(struct draw_llvm * llvm,unsigned num_inputs,const struct draw_llvm_variant_key * key)855 draw_llvm_create_variant(struct draw_llvm *llvm,
856                          unsigned num_inputs,
857                          const struct draw_llvm_variant_key *key)
858 {
859    struct draw_llvm_variant *variant;
860    struct llvm_vertex_shader *shader =
861       llvm_vertex_shader(llvm->draw->vs.vertex_shader);
862    LLVMTypeRef vertex_header;
863    char module_name[64];
864    unsigned char ir_sha1_cache_key[20];
865    struct lp_cached_code cached = { 0 };
866    bool needs_caching = false;
867    variant = MALLOC(sizeof *variant +
868                     shader->variant_key_size -
869                     sizeof variant->key);
870    if (!variant)
871       return NULL;
872 
873    variant->llvm = llvm;
874    variant->shader = shader;
875    memcpy(&variant->key, key, shader->variant_key_size);
876 
877    snprintf(module_name, sizeof(module_name), "draw_llvm_vs_variant%u",
878             variant->shader->variants_cached);
879 
880    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
881       draw_get_ir_cache_key(shader->base.state.ir.nir,
882                             key,
883                             shader->variant_key_size,
884                             num_inputs,
885                             ir_sha1_cache_key);
886 
887       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
888                                          &cached,
889                                          ir_sha1_cache_key);
890       if (!cached.data_size)
891          needs_caching = true;
892    }
893    variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
894 
895    create_jit_types(variant);
896 
897    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
898       if (llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_TGSI)
899          tgsi_dump(llvm->draw->vs.vertex_shader->state.tokens, 0);
900       else
901          nir_print_shader(llvm->draw->vs.vertex_shader->state.ir.nir, stderr);
902       draw_llvm_dump_variant_key(&variant->key);
903    }
904 
905    vertex_header = create_jit_vertex_header(variant->gallivm, num_inputs);
906 
907    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
908 
909    draw_llvm_generate(llvm, variant);
910 
911    gallivm_compile_module(variant->gallivm);
912 
913    variant->jit_func = (draw_jit_vert_func)
914          gallivm_jit_function(variant->gallivm, variant->function);
915 
916    if (needs_caching)
917       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
918                                            &cached,
919                                            ir_sha1_cache_key);
920    gallivm_free_ir(variant->gallivm);
921 
922    variant->list_item_global.base = variant;
923    variant->list_item_local.base = variant;
924    /*variant->no = */shader->variants_created++;
925    variant->list_item_global.base = variant;
926 
927    return variant;
928 }
929 
930 
931 static void
generate_vs(struct draw_llvm_variant * variant,LLVMBuilderRef builder,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],const LLVMValueRef (* inputs)[TGSI_NUM_CHANNELS],const struct lp_bld_tgsi_system_values * system_values,LLVMValueRef context_ptr,const struct lp_build_sampler_soa * draw_sampler,const struct lp_build_image_soa * draw_image,boolean clamp_vertex_color,struct lp_build_mask_context * bld_mask)932 generate_vs(struct draw_llvm_variant *variant,
933             LLVMBuilderRef builder,
934             struct lp_type vs_type,
935             LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
936             const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
937             const struct lp_bld_tgsi_system_values *system_values,
938             LLVMValueRef context_ptr,
939             const struct lp_build_sampler_soa *draw_sampler,
940             const struct lp_build_image_soa *draw_image,
941             boolean clamp_vertex_color,
942             struct lp_build_mask_context *bld_mask)
943 {
944    struct draw_llvm *llvm = variant->llvm;
945    const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
946    LLVMValueRef consts_ptr =
947       draw_jit_context_vs_constants(variant->gallivm, context_ptr);
948    LLVMValueRef num_consts_ptr =
949       draw_jit_context_num_vs_constants(variant->gallivm, context_ptr);
950    LLVMValueRef ssbos_ptr =
951       draw_jit_context_vs_ssbos(variant->gallivm, context_ptr);
952    LLVMValueRef num_ssbos_ptr =
953       draw_jit_context_num_vs_ssbos(variant->gallivm, context_ptr);
954 
955    struct lp_build_tgsi_params params;
956    memset(&params, 0, sizeof(params));
957 
958    params.type = vs_type;
959    params.mask = bld_mask;
960    params.consts_ptr = consts_ptr;
961    params.const_sizes_ptr = num_consts_ptr;
962    params.system_values = system_values;
963    params.inputs = inputs;
964    params.context_ptr = context_ptr;
965    params.sampler = draw_sampler;
966    params.info = &llvm->draw->vs.vertex_shader->info;
967    params.ssbo_ptr = ssbos_ptr;
968    params.ssbo_sizes_ptr = num_ssbos_ptr;
969    params.image = draw_image;
970 
971    if (llvm->draw->vs.vertex_shader->state.ir.nir &&
972        llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_NIR)
973       lp_build_nir_soa(variant->gallivm,
974                        llvm->draw->vs.vertex_shader->state.ir.nir,
975                        &params,
976                        outputs);
977    else
978       lp_build_tgsi_soa(variant->gallivm,
979                         tokens,
980                         &params,
981                         outputs);
982 
983    {
984       LLVMValueRef out;
985       unsigned chan, attrib;
986       struct lp_build_context bld;
987       struct tgsi_shader_info* info = &llvm->draw->vs.vertex_shader->info;
988       lp_build_context_init(&bld, variant->gallivm, vs_type);
989 
990       for (attrib = 0; attrib < info->num_outputs; ++attrib) {
991          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
992             if (outputs[attrib][chan]) {
993                switch (info->output_semantic_name[attrib]) {
994                case TGSI_SEMANTIC_COLOR:
995                case TGSI_SEMANTIC_BCOLOR:
996                   if (clamp_vertex_color) {
997                      out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
998                      out = lp_build_clamp(&bld, out, bld.zero, bld.one);
999                      LLVMBuildStore(builder, out, outputs[attrib][chan]);
1000                   }
1001                   break;
1002                }
1003             }
1004          }
1005       }
1006    }
1007 }
1008 
1009 
1010 static void
fetch_instanced(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type vs_type,LLVMValueRef vb_stride,LLVMValueRef map_ptr,LLVMValueRef buffer_size_adj,LLVMValueRef * inputs,LLVMValueRef index)1011 fetch_instanced(struct gallivm_state *gallivm,
1012                 const struct util_format_description *format_desc,
1013                 struct lp_type vs_type,
1014                 LLVMValueRef vb_stride,
1015                 LLVMValueRef map_ptr,
1016                 LLVMValueRef buffer_size_adj,
1017                 LLVMValueRef *inputs,
1018                 LLVMValueRef index)
1019 {
1020    LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context);
1021    LLVMTypeRef aosf_t, aosi_t;
1022    LLVMValueRef zero = LLVMConstNull(i32_t);
1023    LLVMBuilderRef builder = gallivm->builder;
1024    LLVMValueRef stride, buffer_overflowed, aos, index_valid;
1025    unsigned i;
1026 
1027    aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type());
1028    aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type());
1029 
1030    /* This mul can overflow. Wraparound is ok. */
1031    stride = LLVMBuildMul(builder, vb_stride, index, "");
1032 
1033    buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
1034                                      stride, buffer_size_adj,
1035                                      "buffer_overflowed");
1036 
1037    if (0) {
1038       lp_build_print_value(gallivm, "   instance index = ", index);
1039       lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
1040    }
1041 
1042    index_valid = LLVMBuildNot(builder, buffer_overflowed, "");
1043    index_valid = LLVMBuildSExt(builder, index_valid, i32_t, "");
1044    stride = LLVMBuildAnd(builder, stride, index_valid, "");
1045 
1046    aos = lp_build_fetch_rgba_aos(gallivm,
1047                                  format_desc,
1048                                  lp_float32_vec4_type(),
1049                                  FALSE,
1050                                  map_ptr,
1051                                  stride, zero, zero,
1052                                  NULL);
1053 
1054    index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid);
1055    aos = LLVMBuildBitCast(builder, aos, aosi_t, "");
1056    aos = LLVMBuildAnd(builder, aos, index_valid, "");
1057    aos = LLVMBuildBitCast(builder, aos, aosf_t, "");
1058 
1059    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1060       LLVMValueRef index = lp_build_const_int32(gallivm, i);
1061       inputs[i] = lp_build_extract_broadcast(gallivm,
1062                                              lp_float32_vec4_type(),
1063                                              vs_type, aos, index);
1064    }
1065 }
1066 
1067 
1068 static void
fetch_vector(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type vs_type,LLVMValueRef vb_stride,LLVMValueRef map_ptr,LLVMValueRef buffer_size_adj,LLVMValueRef * inputs,LLVMValueRef indices)1069 fetch_vector(struct gallivm_state *gallivm,
1070              const struct util_format_description *format_desc,
1071              struct lp_type vs_type,
1072              LLVMValueRef vb_stride,
1073              LLVMValueRef map_ptr,
1074              LLVMValueRef buffer_size_adj,
1075              LLVMValueRef *inputs,
1076              LLVMValueRef indices)
1077 {
1078    LLVMBuilderRef builder = gallivm->builder;
1079    struct lp_build_context blduivec;
1080    struct lp_type fetch_type = vs_type;
1081    LLVMValueRef offset, valid_mask;
1082    unsigned i;
1083 
1084    lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
1085 
1086    vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride);
1087    buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj);
1088 
1089    /* This mul can overflow. Wraparound is ok. */
1090    offset = lp_build_mul(&blduivec, vb_stride, indices);
1091 
1092    valid_mask = lp_build_compare(gallivm, blduivec.type,
1093                                  PIPE_FUNC_LESS, offset, buffer_size_adj);
1094 
1095    /* not valid elements use offset 0 */
1096    offset = LLVMBuildAnd(builder, offset, valid_mask, "");
1097 
1098    if (0) {
1099       lp_build_print_value(gallivm, "   indices = ", indices);
1100       lp_build_print_value(gallivm, "   offsets = ", offset);
1101       lp_build_print_value(gallivm, "   valid_mask = ", valid_mask);
1102    }
1103 
1104    /*
1105     * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches.
1106     * This should always produce better code.
1107     */
1108 
1109    /* The type handling is annoying here... */
1110    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
1111        format_desc->channel[0].pure_integer) {
1112       if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
1113          fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length);
1114       }
1115       else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1116          fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length);
1117       }
1118    }
1119 
1120    lp_build_fetch_rgba_soa(gallivm, format_desc,
1121                            fetch_type, FALSE, map_ptr, offset,
1122                            blduivec.zero, blduivec.zero,
1123                            NULL, inputs);
1124 
1125    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1126       inputs[i] = LLVMBuildBitCast(builder, inputs[i],
1127                                    lp_build_vec_type(gallivm, vs_type), "");
1128    }
1129 
1130    /* out-of-bound fetches return all zeros */
1131    for (i = 0; i < format_desc->nr_channels; i++) {
1132       inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, "");
1133       inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, "");
1134       inputs[i] = LLVMBuildBitCast(builder, inputs[i],
1135                                    lp_build_vec_type(gallivm, vs_type), "");
1136    }
1137 }
1138 
1139 
1140 static void
store_aos(struct gallivm_state * gallivm,LLVMValueRef io_ptr,LLVMValueRef index,LLVMValueRef value)1141 store_aos(struct gallivm_state *gallivm,
1142           LLVMValueRef io_ptr,
1143           LLVMValueRef index,
1144           LLVMValueRef value)
1145 {
1146    LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);
1147    LLVMBuilderRef builder = gallivm->builder;
1148    LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr);
1149    LLVMValueRef indices[3];
1150 
1151    indices[0] = lp_build_const_int32(gallivm, 0);
1152    indices[1] = index;
1153    indices[2] = lp_build_const_int32(gallivm, 0);
1154 
1155    data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
1156    data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
1157 
1158 #if DEBUG_STORE
1159    lp_build_printf(gallivm, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
1160 #endif
1161 
1162    /* Unaligned store due to the vertex header */
1163    LLVMSetAlignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
1164 }
1165 
1166 /**
1167  * Adjust the mask to architecture endianess. The mask will the store in struct:
1168  *
1169  * struct vertex_header {
1170  *    unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
1171  *    unsigned edgeflag:1;
1172  *    unsigned pad:1;
1173  *    unsigned vertex_id:16;
1174  *    [...]
1175  * }
1176  *
1177  * On little-endian machine nothing needs to done, however on bit-endian machine
1178  * the mask's fields need to be adjusted with the algorithm:
1179  *
1180  * uint32_t reverse (uint32_t x)
1181  * {
1182  *   return (x >> 16) |              // vertex_id
1183  *          ((x & 0x3fff) << 18) |   // clipmask
1184  *          ((x & 0x4000) << 3) |    // edgeflag
1185  *          ((x & 0x8000) << 1);     // pad
1186  * }
1187  */
1188 static LLVMValueRef
adjust_mask(struct gallivm_state * gallivm,LLVMValueRef mask)1189 adjust_mask(struct gallivm_state *gallivm,
1190             LLVMValueRef mask)
1191 {
1192 #if UTIL_ARCH_BIG_ENDIAN
1193    LLVMBuilderRef builder = gallivm->builder;
1194    LLVMValueRef vertex_id;
1195    LLVMValueRef clipmask;
1196    LLVMValueRef pad;
1197    LLVMValueRef edgeflag;
1198 
1199    vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
1200    clipmask  = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
1201    clipmask  = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
1202    if (0) {
1203       pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
1204       pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 1), "");
1205    }
1206    edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
1207    edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 3), "");
1208 
1209    mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
1210    if (0) {
1211       mask = LLVMBuildOr(builder, mask, pad, "");
1212    }
1213    mask = LLVMBuildOr(builder, mask, edgeflag, "");
1214 #endif
1215    return mask;
1216 }
1217 
1218 static void
store_aos_array(struct gallivm_state * gallivm,struct lp_type soa_type,LLVMValueRef io_ptr,LLVMValueRef * indices,LLVMValueRef * aos,int attrib,int num_outputs,LLVMValueRef clipmask,boolean need_edgeflag)1219 store_aos_array(struct gallivm_state *gallivm,
1220                 struct lp_type soa_type,
1221                 LLVMValueRef io_ptr,
1222                 LLVMValueRef *indices,
1223                 LLVMValueRef* aos,
1224                 int attrib,
1225                 int num_outputs,
1226                 LLVMValueRef clipmask,
1227                 boolean need_edgeflag)
1228 {
1229    LLVMBuilderRef builder = gallivm->builder;
1230    LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
1231    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
1232    LLVMValueRef linear_inds[LP_MAX_VECTOR_WIDTH / 32];
1233    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1234    int vector_length = soa_type.length;
1235    int i;
1236 
1237    debug_assert(TGSI_NUM_CHANNELS == 4);
1238 
1239    for (i = 0; i < vector_length; i++) {
1240       linear_inds[i] = lp_build_const_int32(gallivm, i);
1241       if (indices) {
1242          inds[i] = indices[i];
1243       } else {
1244          inds[i] = linear_inds[i];
1245       }
1246       io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
1247    }
1248 
1249    if (attrib == 0) {
1250       /* store vertex header for each of the n vertices */
1251       LLVMValueRef val, cliptmp;
1252       int vertex_id_pad_edgeflag;
1253 
1254       /* If this assertion fails, it means we need to update the bit twidding
1255        * code here.  See struct vertex_header in draw_private.h.
1256        */
1257       assert(DRAW_TOTAL_CLIP_PLANES==14);
1258       /* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */
1259       if (!need_edgeflag) {
1260          vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
1261       }
1262       else {
1263          vertex_id_pad_edgeflag = (0xffff << 16);
1264       }
1265       val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type),
1266                                    vertex_id_pad_edgeflag);
1267       /* OR with the clipmask */
1268       cliptmp = LLVMBuildOr(builder, val, clipmask, "");
1269       for (i = 0; i < vector_length; i++) {
1270          LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptrs[i]);
1271          val = LLVMBuildExtractElement(builder, cliptmp, linear_inds[i], "");
1272          val = adjust_mask(gallivm, val);
1273 #if DEBUG_STORE
1274          lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",
1275                          io_ptrs[i], inds[i], val);
1276 #endif
1277          LLVMBuildStore(builder, val, id_ptr);
1278       }
1279    }
1280 
1281    /* store for each of the n vertices */
1282    for (i = 0; i < vector_length; i++) {
1283       store_aos(gallivm, io_ptrs[i], attr_index, aos[i]);
1284    }
1285 }
1286 
1287 
1288 static void
convert_to_aos(struct gallivm_state * gallivm,LLVMValueRef io,LLVMValueRef * indices,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],LLVMValueRef clipmask,int num_outputs,struct lp_type soa_type,boolean need_edgeflag)1289 convert_to_aos(struct gallivm_state *gallivm,
1290                LLVMValueRef io,
1291                LLVMValueRef *indices,
1292                LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1293                LLVMValueRef clipmask,
1294                int num_outputs,
1295                struct lp_type soa_type,
1296                boolean need_edgeflag)
1297 {
1298    LLVMBuilderRef builder = gallivm->builder;
1299    unsigned chan, attrib, i;
1300 
1301 #if DEBUG_STORE
1302    lp_build_printf(gallivm, "   # storing begin\n");
1303 #endif
1304    for (attrib = 0; attrib < num_outputs; ++attrib) {
1305       LLVMValueRef soa[TGSI_NUM_CHANNELS];
1306       LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
1307       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1308          if (outputs[attrib][chan]) {
1309             LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
1310             lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
1311 #if DEBUG_STORE
1312             lp_build_printf(gallivm, "output %d : %d ",
1313                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
1314                                          attrib, 0),
1315                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
1316                                          chan, 0));
1317             lp_build_print_value(gallivm, "val = ", out);
1318             {
1319                LLVMValueRef iv =
1320                   LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
1321 
1322                lp_build_print_value(gallivm, "  ival = ", iv);
1323             }
1324 #endif
1325             soa[chan] = out;
1326          }
1327          else {
1328             soa[chan] = 0;
1329          }
1330       }
1331 
1332 
1333       if (soa_type.length == TGSI_NUM_CHANNELS) {
1334          lp_build_transpose_aos(gallivm, soa_type, soa, aos);
1335       } else {
1336          lp_build_transpose_aos(gallivm, soa_type, soa, soa);
1337 
1338          for (i = 0; i < soa_type.length; ++i) {
1339             aos[i] = lp_build_extract_range(gallivm,
1340                                             soa[i % TGSI_NUM_CHANNELS],
1341                                             (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
1342                                             TGSI_NUM_CHANNELS);
1343          }
1344       }
1345 
1346       store_aos_array(gallivm,
1347                       soa_type,
1348                       io, indices,
1349                       aos,
1350                       attrib,
1351                       num_outputs,
1352                       clipmask,
1353                       need_edgeflag);
1354    }
1355 #if DEBUG_STORE
1356    lp_build_printf(gallivm, "   # storing end\n");
1357 #endif
1358 }
1359 
1360 
1361 /**
1362  * Stores original vertex positions in clip coordinates
1363  */
1364 static void
store_clip(struct gallivm_state * gallivm,const struct lp_type vs_type,LLVMValueRef io_ptr,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],int idx)1365 store_clip(struct gallivm_state *gallivm,
1366            const struct lp_type vs_type,
1367            LLVMValueRef io_ptr,
1368            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1369            int idx)
1370 {
1371    LLVMBuilderRef builder = gallivm->builder;
1372    LLVMValueRef soa[4];
1373    LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
1374    LLVMValueRef indices[2];
1375    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1376    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
1377    LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1378    LLVMTypeRef clip_ptr_type =
1379       LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context),
1380                                      4), 0);
1381    int i, j;
1382 
1383    indices[0] =
1384    indices[1] = lp_build_const_int32(gallivm, 0);
1385 
1386    for (i = 0; i < vs_type.length; i++) {
1387       inds[i] = lp_build_const_int32(gallivm, i);
1388       io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
1389    }
1390 
1391    soa[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 .. xn*/
1392    soa[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 .. yn*/
1393    soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/
1394    soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/
1395 
1396    for (i = 0; i < vs_type.length; i++) {
1397       clip_ptrs[i] = draw_jit_header_clip_pos(gallivm, io_ptrs[i]);
1398    }
1399 
1400    lp_build_transpose_aos(gallivm, vs_type, soa, soa);
1401    for (i = 0; i < vs_type.length; ++i) {
1402       aos[i] = lp_build_extract_range(gallivm,
1403                                       soa[i % TGSI_NUM_CHANNELS],
1404                                       (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
1405                                       TGSI_NUM_CHANNELS);
1406    }
1407 
1408    for (j = 0; j < vs_type.length; j++) {
1409       LLVMValueRef clip_ptr;
1410 
1411       clip_ptr = LLVMBuildGEP(builder, clip_ptrs[j], indices, 2, "clipo");
1412       clip_ptr = LLVMBuildPointerCast(builder, clip_ptr, clip_ptr_type, "");
1413 
1414       /* Unaligned store */
1415       LLVMSetAlignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
1416    }
1417 }
1418 
1419 
1420 /**
1421  * Transforms the outputs for viewport mapping
1422  */
1423 static void
generate_viewport(struct draw_llvm_variant * variant,LLVMBuilderRef builder,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],LLVMValueRef context_ptr)1424 generate_viewport(struct draw_llvm_variant *variant,
1425                   LLVMBuilderRef builder,
1426                   struct lp_type vs_type,
1427                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1428                   LLVMValueRef context_ptr)
1429 {
1430    int i;
1431    struct gallivm_state *gallivm = variant->gallivm;
1432    struct lp_type f32_type = vs_type;
1433    const unsigned pos = variant->llvm->draw->vs.position_output;
1434    LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1435    LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn*/
1436    LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
1437    LLVMValueRef vp_ptr = draw_jit_context_viewports(gallivm, context_ptr);
1438 
1439    /* We treat pipe_viewport_state as a float array */
1440    const int scale_index_offset = offsetof(struct pipe_viewport_state, scale) / sizeof(float);
1441    const int trans_index_offset = offsetof(struct pipe_viewport_state, translate) / sizeof(float);
1442 
1443    /* for 1/w convention*/
1444    out3 = LLVMBuildFDiv(builder, const1, out3, "");
1445    LLVMBuildStore(builder, out3, outputs[pos][3]);
1446 
1447    /* Viewport Mapping */
1448    for (i=0; i<3; i++) {
1449       LLVMValueRef out = LLVMBuildLoad(builder, outputs[pos][i], ""); /*x0 x1 .. xn*/
1450       LLVMValueRef scale;
1451       LLVMValueRef trans;
1452       LLVMValueRef scale_i;
1453       LLVMValueRef trans_i;
1454       LLVMValueRef index;
1455 
1456       index = lp_build_const_int32(gallivm, i + scale_index_offset);
1457       scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
1458 
1459       index = lp_build_const_int32(gallivm, i + trans_index_offset);
1460       trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
1461 
1462       scale = lp_build_broadcast(gallivm, vs_type_llvm,
1463                                  LLVMBuildLoad(builder, scale_i, "scale"));
1464       trans = lp_build_broadcast(gallivm, vs_type_llvm,
1465                                  LLVMBuildLoad(builder, trans_i, "trans"));
1466 
1467       /* divide by w */
1468       out = LLVMBuildFMul(builder, out, out3, "");
1469       /* mult by scale, add translation */
1470       out = lp_build_fmuladd(builder, out, scale, trans);
1471 
1472       /* store transformed outputs */
1473       LLVMBuildStore(builder, out, outputs[pos][i]);
1474    }
1475 
1476 }
1477 
1478 
1479 /**
1480  * Returns clipmask as nxi32 bitmask for the n vertices
1481  */
1482 static LLVMValueRef
generate_clipmask(struct draw_llvm * llvm,struct gallivm_state * gallivm,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],struct draw_llvm_variant_key * key,LLVMValueRef context_ptr,boolean * have_clipdist)1483 generate_clipmask(struct draw_llvm *llvm,
1484                   struct gallivm_state *gallivm,
1485                   struct lp_type vs_type,
1486                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1487                   struct draw_llvm_variant_key *key,
1488                   LLVMValueRef context_ptr,
1489                   boolean *have_clipdist)
1490 {
1491    LLVMBuilderRef builder = gallivm->builder;
1492    LLVMValueRef mask; /* stores the <nxi32> clipmasks */
1493    LLVMValueRef test, temp;
1494    LLVMValueRef zero, shift;
1495    LLVMValueRef pos_x, pos_y, pos_z, pos_w;
1496    LLVMValueRef cv_x, cv_y, cv_z, cv_w;
1497    LLVMValueRef plane1, planes, plane_ptr, sum;
1498    struct lp_type f32_type = vs_type;
1499    struct lp_type i32_type = lp_int_type(vs_type);
1500    const unsigned pos = llvm->draw->vs.position_output;
1501    const unsigned cv = llvm->draw->vs.clipvertex_output;
1502    int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
1503    boolean have_cd = false;
1504    boolean clip_user = key->clip_user;
1505    unsigned ucp_enable = key->ucp_enable;
1506    unsigned cd[2];
1507 
1508    cd[0] = llvm->draw->vs.ccdistance_output[0];
1509    cd[1] = llvm->draw->vs.ccdistance_output[1];
1510 
1511    if (cd[0] != pos || cd[1] != pos)
1512       have_cd = true;
1513 
1514    if (num_written_clipdistance && !clip_user) {
1515       clip_user = true;
1516       ucp_enable = (1 << num_written_clipdistance) - 1;
1517    }
1518 
1519    mask = lp_build_const_int_vec(gallivm, i32_type, 0);
1520    temp = lp_build_const_int_vec(gallivm, i32_type, 0);
1521    zero = lp_build_const_vec(gallivm, f32_type, 0);         /* 0.0f 0.0f 0.0f 0.0f */
1522    shift = lp_build_const_int_vec(gallivm, i32_type, 1);    /* 1 1 1 1 */
1523 
1524    /*
1525     * load clipvertex and position from correct locations.
1526     * if they are the same just load them once.
1527     */
1528    pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 .. xn */
1529    pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 .. yn */
1530    pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 .. zn */
1531    pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn */
1532 
1533    if (clip_user && cv != pos) {
1534       cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 .. xn */
1535       cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 .. yn */
1536       cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 .. zn */
1537       cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 .. wn */
1538    } else {
1539       cv_x = pos_x;
1540       cv_y = pos_y;
1541       cv_z = pos_z;
1542       cv_w = pos_w;
1543    }
1544 
1545    /*
1546     * Be careful with the comparisons and NaNs (using llvm's unordered
1547     * comparisons here).
1548     */
1549    /* Cliptest, for hardwired planes */
1550    /*
1551     * XXX should take guardband into account (currently not in key).
1552     * Otherwise might run the draw pipeline stages for nothing.
1553     */
1554    if (key->clip_xy) {
1555       /* plane 1 */
1556       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
1557       temp = shift;
1558       test = LLVMBuildAnd(builder, test, temp, "");
1559       mask = test;
1560 
1561       /* plane 2 */
1562       test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
1563       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1564       temp = LLVMBuildShl(builder, temp, shift, "");
1565       test = LLVMBuildAnd(builder, test, temp, "");
1566       mask = LLVMBuildOr(builder, mask, test, "");
1567 
1568       /* plane 3 */
1569       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
1570       temp = LLVMBuildShl(builder, temp, shift, "");
1571       test = LLVMBuildAnd(builder, test, temp, "");
1572       mask = LLVMBuildOr(builder, mask, test, "");
1573 
1574       /* plane 4 */
1575       test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
1576       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1577       temp = LLVMBuildShl(builder, temp, shift, "");
1578       test = LLVMBuildAnd(builder, test, temp, "");
1579       mask = LLVMBuildOr(builder, mask, test, "");
1580    }
1581 
1582    if (key->clip_z) {
1583       temp = lp_build_const_int_vec(gallivm, i32_type, 16);
1584       if (key->clip_halfz) {
1585          /* plane 5 */
1586          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
1587          test = LLVMBuildAnd(builder, test, temp, "");
1588          mask = LLVMBuildOr(builder, mask, test, "");
1589       }
1590       else {
1591          /* plane 5 */
1592          test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
1593          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1594          test = LLVMBuildAnd(builder, test, temp, "");
1595          mask = LLVMBuildOr(builder, mask, test, "");
1596       }
1597       /* plane 6 */
1598       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
1599       temp = LLVMBuildShl(builder, temp, shift, "");
1600       test = LLVMBuildAnd(builder, test, temp, "");
1601       mask = LLVMBuildOr(builder, mask, test, "");
1602    }
1603 
1604    if (clip_user) {
1605       LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
1606       LLVMValueRef indices[3];
1607       LLVMValueRef is_nan_or_inf;
1608 
1609       /* userclip planes */
1610       while (ucp_enable) {
1611          unsigned plane_idx = ffs(ucp_enable)-1;
1612          ucp_enable &= ~(1 << plane_idx);
1613          plane_idx += 6;
1614 
1615          if (have_cd && num_written_clipdistance) {
1616             LLVMValueRef clipdist;
1617             int i;
1618             i = plane_idx - 6;
1619 
1620             *have_clipdist = TRUE;
1621             if (i < 4) {
1622                clipdist = LLVMBuildLoad(builder, outputs[cd[0]][i], "");
1623             } else {
1624                clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], "");
1625             }
1626             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
1627             is_nan_or_inf = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist);
1628             test = LLVMBuildOr(builder, test, is_nan_or_inf, "");
1629             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1630             test = LLVMBuildAnd(builder, test, temp, "");
1631             mask = LLVMBuildOr(builder, mask, test, "");
1632          } else {
1633             LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1634             indices[0] = lp_build_const_int32(gallivm, 0);
1635             indices[1] = lp_build_const_int32(gallivm, plane_idx);
1636 
1637             indices[2] = lp_build_const_int32(gallivm, 0);
1638             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1639             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
1640             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1641             sum = LLVMBuildFMul(builder, planes, cv_x, "");
1642 
1643             indices[2] = lp_build_const_int32(gallivm, 1);
1644             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1645             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
1646             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1647             sum = lp_build_fmuladd(builder, planes, cv_y, sum);
1648 
1649             indices[2] = lp_build_const_int32(gallivm, 2);
1650             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1651             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
1652             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1653             sum = lp_build_fmuladd(builder, planes, cv_z, sum);
1654 
1655             indices[2] = lp_build_const_int32(gallivm, 3);
1656             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1657             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
1658             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1659             sum = lp_build_fmuladd(builder, planes, cv_w, sum);
1660 
1661             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
1662             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1663             test = LLVMBuildAnd(builder, test, temp, "");
1664             mask = LLVMBuildOr(builder, mask, test, "");
1665          }
1666       }
1667    }
1668    if (key->need_edgeflags) {
1669       /*
1670        * This isn't really part of clipmask but stored the same in vertex
1671        * header later, so do it here.
1672        */
1673       unsigned edge_attr = llvm->draw->vs.edgeflag_output;
1674       LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0);
1675       LLVMValueRef edgeflag = LLVMBuildLoad(builder, outputs[edge_attr][0], "");
1676       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag);
1677       temp = lp_build_const_int_vec(gallivm, i32_type,
1678                                     1LL << DRAW_TOTAL_CLIP_PLANES);
1679       test = LLVMBuildAnd(builder, test, temp, "");
1680       mask = LLVMBuildOr(builder, mask, test, "");
1681    }
1682    return mask;
1683 }
1684 
1685 
1686 /**
1687  * Returns boolean if any clipping has occurred
1688  * Used zero/one i8 value to represent boolean
1689  */
1690 static LLVMValueRef
clipmask_booli8(struct gallivm_state * gallivm,const struct lp_type vs_type,LLVMValueRef clipmask_bool_ptr,boolean edgeflag_in_clipmask)1691 clipmask_booli8(struct gallivm_state *gallivm,
1692                 const struct lp_type vs_type,
1693                 LLVMValueRef clipmask_bool_ptr,
1694                 boolean edgeflag_in_clipmask)
1695 {
1696    LLVMBuilderRef builder = gallivm->builder;
1697    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
1698    LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
1699    LLVMValueRef ret;
1700    struct lp_build_context bldivec;
1701 
1702    lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type));
1703 
1704    /*
1705     * We need to invert the edgeflag bit from the clipmask here
1706     * (because the result is really if we want to run the pipeline or not
1707     * and we (may) need it if edgeflag was 0).
1708     */
1709    if (edgeflag_in_clipmask) {
1710       LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type,
1711                                                  1LL << DRAW_TOTAL_CLIP_PLANES);
1712       clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");
1713    }
1714 
1715    /*
1716     * XXX: probably should mask off bits from the mask which come from
1717     * vertices which were beyond the count (i.e. indices_valid for
1718     * linear fetches, for elts ones we don't have the correct mask
1719     * right now). Otherwise might run the pipeline for nothing,
1720     * though everything should still work.
1721     */
1722    ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool);
1723    ret = LLVMBuildZExt(builder, ret, int8_type, "");
1724    return ret;
1725 }
1726 
1727 static LLVMValueRef
draw_gs_llvm_fetch_input(const struct lp_build_gs_iface * gs_iface,struct lp_build_context * bld,boolean is_vindex_indirect,LLVMValueRef vertex_index,boolean is_aindex_indirect,LLVMValueRef attrib_index,LLVMValueRef swizzle_index)1728 draw_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
1729                          struct lp_build_context * bld,
1730                          boolean is_vindex_indirect,
1731                          LLVMValueRef vertex_index,
1732                          boolean is_aindex_indirect,
1733                          LLVMValueRef attrib_index,
1734                          LLVMValueRef swizzle_index)
1735 {
1736    const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface);
1737    struct gallivm_state *gallivm = bld->gallivm;
1738    LLVMBuilderRef builder = gallivm->builder;
1739    LLVMValueRef indices[3];
1740    LLVMValueRef res;
1741    struct lp_type type = bld->type;
1742 
1743    if (is_vindex_indirect || is_aindex_indirect) {
1744       int i;
1745       res = bld->zero;
1746       for (i = 0; i < type.length; ++i) {
1747          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
1748          LLVMValueRef vert_chan_index = vertex_index;
1749          LLVMValueRef attr_chan_index = attrib_index;
1750          LLVMValueRef channel_vec, value;
1751 
1752          if (is_vindex_indirect) {
1753             vert_chan_index = LLVMBuildExtractElement(builder,
1754                                                       vertex_index, idx, "");
1755          }
1756          if (is_aindex_indirect) {
1757             attr_chan_index = LLVMBuildExtractElement(builder,
1758                                                       attrib_index, idx, "");
1759          }
1760 
1761          indices[0] = vert_chan_index;
1762          indices[1] = attr_chan_index;
1763          indices[2] = swizzle_index;
1764 
1765          channel_vec = LLVMBuildGEP(builder, gs->input, indices, 3, "");
1766          channel_vec = LLVMBuildLoad(builder, channel_vec, "");
1767          value = LLVMBuildExtractElement(builder, channel_vec, idx, "");
1768 
1769          res = LLVMBuildInsertElement(builder, res, value, idx, "");
1770       }
1771    } else {
1772       indices[0] = vertex_index;
1773       indices[1] = attrib_index;
1774       indices[2] = swizzle_index;
1775 
1776       res = LLVMBuildGEP(builder, gs->input, indices, 3, "");
1777       res = LLVMBuildLoad(builder, res, "");
1778    }
1779 
1780    return res;
1781 }
1782 
1783 static void
draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface * gs_base,struct lp_build_context * bld,LLVMValueRef (* outputs)[4],LLVMValueRef emitted_vertices_vec,LLVMValueRef mask_vec,LLVMValueRef stream_id)1784 draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
1785                          struct lp_build_context * bld,
1786                          LLVMValueRef (*outputs)[4],
1787                          LLVMValueRef emitted_vertices_vec,
1788                          LLVMValueRef mask_vec, LLVMValueRef stream_id)
1789 {
1790    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1791    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1792    struct gallivm_state *gallivm = variant->gallivm;
1793    LLVMBuilderRef builder = gallivm->builder;
1794    struct lp_type gs_type = bld->type;
1795    LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
1796                                                   lp_int_type(gs_type), 0);
1797    LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
1798    LLVMValueRef next_prim_offset =
1799       lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
1800    LLVMValueRef io = variant->io_ptr;
1801    unsigned i;
1802    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
1803 
1804    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1805    for (i = 0; i < gs_type.length; ++i) {
1806       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1807       LLVMValueRef currently_emitted =
1808          LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
1809       indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
1810       indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
1811       indices[i] = LLVMBuildSelect(builder, LLVMBuildExtractElement(builder, cond, ind, ""), indices[i],
1812                                    lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary - 1), "");
1813    }
1814 
1815    LLVMValueRef stream_idx = LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), "");
1816    LLVMValueRef cnd = LLVMBuildICmp(builder, LLVMIntULT, stream_idx, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
1817    struct lp_build_if_state if_ctx;
1818    lp_build_if(&if_ctx, gallivm, cnd);
1819    io = lp_build_pointer_get(builder, io, LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), ""));
1820 
1821    convert_to_aos(gallivm, io, indices,
1822                   outputs, clipmask,
1823                   gs_info->num_outputs, gs_type,
1824                   FALSE);
1825    lp_build_endif(&if_ctx);
1826 }
1827 
1828 static void
draw_gs_llvm_end_primitive(const struct lp_build_gs_iface * gs_base,struct lp_build_context * bld,LLVMValueRef total_emitted_vertices_vec_ptr,LLVMValueRef verts_per_prim_vec,LLVMValueRef emitted_prims_vec,LLVMValueRef mask_vec,unsigned stream)1829 draw_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
1830                            struct lp_build_context * bld,
1831                            LLVMValueRef total_emitted_vertices_vec_ptr,
1832                            LLVMValueRef verts_per_prim_vec,
1833                            LLVMValueRef emitted_prims_vec,
1834                            LLVMValueRef mask_vec, unsigned stream)
1835 {
1836    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1837    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1838    struct gallivm_state *gallivm = variant->gallivm;
1839    LLVMBuilderRef builder = gallivm->builder;
1840    LLVMValueRef prim_lengts_ptr =
1841       draw_gs_jit_prim_lengths(variant->gallivm, variant->context_ptr);
1842    unsigned i;
1843 
1844    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1845    for (i = 0; i < bld->type.length; ++i) {
1846       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1847       LLVMValueRef prims_emitted =
1848          LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
1849       LLVMValueRef store_ptr;
1850       LLVMValueRef num_vertices =
1851          LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
1852 
1853       LLVMValueRef this_cond = LLVMBuildExtractElement(gallivm->builder, cond, ind, "");
1854       struct lp_build_if_state ifthen;
1855       lp_build_if(&ifthen, gallivm, this_cond);
1856       prims_emitted = LLVMBuildMul(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
1857       prims_emitted = LLVMBuildAdd(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, stream), "");
1858       store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, "");
1859       store_ptr = LLVMBuildLoad(builder, store_ptr, "");
1860       store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, "");
1861       LLVMBuildStore(builder, num_vertices, store_ptr);
1862       lp_build_endif(&ifthen);
1863    }
1864 }
1865 
1866 static void
draw_gs_llvm_epilogue(const struct lp_build_gs_iface * gs_base,LLVMValueRef total_emitted_vertices_vec,LLVMValueRef emitted_prims_vec,unsigned stream)1867 draw_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
1868                       LLVMValueRef total_emitted_vertices_vec,
1869                       LLVMValueRef emitted_prims_vec, unsigned stream)
1870 {
1871    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1872    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1873    struct gallivm_state *gallivm = variant->gallivm;
1874    LLVMBuilderRef builder = gallivm->builder;
1875    LLVMValueRef emitted_verts_ptr =
1876       draw_gs_jit_emitted_vertices(gallivm, variant->context_ptr);
1877    LLVMValueRef emitted_prims_ptr =
1878       draw_gs_jit_emitted_prims(gallivm, variant->context_ptr);
1879    LLVMValueRef stream_val = lp_build_const_int32(gallivm, stream);
1880 
1881    emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &stream_val, 1, "");
1882    emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &stream_val, 1, "");
1883 
1884    LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
1885    LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
1886 }
1887 
1888 static void
draw_llvm_generate(struct draw_llvm * llvm,struct draw_llvm_variant * variant)1889 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1890 {
1891    struct gallivm_state *gallivm = variant->gallivm;
1892    LLVMContextRef context = gallivm->context;
1893    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
1894    LLVMTypeRef arg_types[12];
1895    unsigned num_arg_types = ARRAY_SIZE(arg_types);
1896    LLVMTypeRef func_type;
1897    LLVMValueRef context_ptr;
1898    LLVMBasicBlockRef block;
1899    LLVMBuilderRef builder;
1900    char func_name[64];
1901    struct lp_type vs_type;
1902    LLVMValueRef count, fetch_elts, start_or_maxelt;
1903    LLVMValueRef vertex_id_offset;
1904    LLVMValueRef stride, step, io_itr;
1905    LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp;
1906    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1907    LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];
1908    LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];
1909    LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS];
1910    LLVMValueRef instance_index[PIPE_MAX_ATTRIBS];
1911    LLVMValueRef fake_buf_ptr, fake_buf;
1912 
1913    struct draw_context *draw = llvm->draw;
1914    const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
1915    unsigned i, j;
1916    struct lp_build_context bld, blduivec;
1917    struct lp_build_loop_state lp_loop;
1918    struct lp_build_if_state if_ctx;
1919    const int vector_length = lp_native_vector_width / 32;
1920    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1921    struct lp_build_sampler_soa *sampler = 0;
1922    struct lp_build_image_soa *image = NULL;
1923    LLVMValueRef ret, clipmask_bool_ptr;
1924    struct draw_llvm_variant_key *key = &variant->key;
1925    /* If geometry shader is present we need to skip both the viewport
1926     * transformation and clipping otherwise the inputs to the geometry
1927     * shader will be incorrect.
1928     * The code can't handle vp transform when vs writes vp index neither
1929     * (though this would be fixable here, but couldn't just broadcast
1930     * the values).
1931     */
1932    const boolean bypass_viewport = key->has_gs_or_tes || key->bypass_viewport ||
1933                                    vs_info->writes_viewport_index;
1934    const boolean enable_cliptest = !key->has_gs_or_tes && (key->clip_xy ||
1935                                                     key->clip_z ||
1936                                                     key->clip_user ||
1937                                                     key->need_edgeflags);
1938    LLVMValueRef variant_func;
1939    const unsigned pos = draw->vs.position_output;
1940    const unsigned cv = draw->vs.clipvertex_output;
1941    boolean have_clipdist = FALSE;
1942    struct lp_bld_tgsi_system_values system_values;
1943 
1944    memset(&system_values, 0, sizeof(system_values));
1945    memset(&outputs, 0, sizeof(outputs));
1946    snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant");
1947 
1948    i = 0;
1949    arg_types[i++] = get_context_ptr_type(variant);       /* context */
1950    arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */
1951    arg_types[i++] = get_buffer_ptr_type(variant);        /* vbuffers */
1952    arg_types[i++] = int32_type;                          /* count */
1953    arg_types[i++] = int32_type;                          /* start/fetch_elt_max */
1954    arg_types[i++] = int32_type;                          /* stride */
1955    arg_types[i++] = get_vb_ptr_type(variant);            /* pipe_vertex_buffer's */
1956    arg_types[i++] = int32_type;                          /* instance_id */
1957    arg_types[i++] = int32_type;                          /* vertex_id_offset */
1958    arg_types[i++] = int32_type;                          /* start_instance */
1959    arg_types[i++] = LLVMPointerType(int32_type, 0);      /* fetch_elts  */
1960    arg_types[i++] = int32_type;                          /* draw_id */
1961 
1962    func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
1963                                 arg_types, num_arg_types, 0);
1964 
1965    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
1966    variant->function = variant_func;
1967 
1968    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
1969    for (i = 0; i < num_arg_types; ++i)
1970       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1971          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
1972 
1973    if (gallivm->cache && gallivm->cache->data_size)
1974       return;
1975    context_ptr               = LLVMGetParam(variant_func, 0);
1976    io_ptr                    = LLVMGetParam(variant_func, 1);
1977    vbuffers_ptr              = LLVMGetParam(variant_func, 2);
1978    count                     = LLVMGetParam(variant_func, 3);
1979    /*
1980     * XXX: the maxelt part is unused. Not really useful, since we cannot
1981     * get index buffer overflows due to vsplit (which provides its own
1982     * elts buffer, with a different size than what's passed in here).
1983     */
1984    start_or_maxelt           = LLVMGetParam(variant_func, 4);
1985    /*
1986     * XXX: stride is actually unused. The stride we use is strictly calculated
1987     * from the number of outputs (including the draw_extra outputs).
1988     * Should probably fix some day (we need a new vs just because of extra
1989     * outputs which the generated vs won't touch).
1990     */
1991    stride                    = LLVMGetParam(variant_func, 5);
1992    vb_ptr                    = LLVMGetParam(variant_func, 6);
1993    system_values.instance_id = LLVMGetParam(variant_func, 7);
1994    vertex_id_offset          = LLVMGetParam(variant_func, 8);
1995    system_values.base_instance = LLVMGetParam(variant_func, 9);
1996    fetch_elts                = LLVMGetParam(variant_func, 10);
1997    system_values.draw_id     = LLVMGetParam(variant_func, 11);
1998 
1999    lp_build_name(context_ptr, "context");
2000    lp_build_name(io_ptr, "io");
2001    lp_build_name(vbuffers_ptr, "vbuffers");
2002    lp_build_name(count, "count");
2003    lp_build_name(start_or_maxelt, "start_or_maxelt");
2004    lp_build_name(stride, "stride");
2005    lp_build_name(vb_ptr, "vb");
2006    lp_build_name(system_values.instance_id, "instance_id");
2007    lp_build_name(vertex_id_offset, "vertex_id_offset");
2008    lp_build_name(system_values.base_instance, "start_instance");
2009    lp_build_name(fetch_elts, "fetch_elts");
2010    lp_build_name(system_values.draw_id, "draw_id");
2011 
2012    /*
2013     * Function body
2014     */
2015 
2016    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2017    builder = gallivm->builder;
2018    LLVMPositionBuilderAtEnd(builder, block);
2019 
2020    memset(&vs_type, 0, sizeof vs_type);
2021    vs_type.floating = TRUE; /* floating point values */
2022    vs_type.sign = TRUE;     /* values are signed */
2023    vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
2024    vs_type.width = 32;      /* 32-bit float */
2025    vs_type.length = vector_length;
2026 
2027    lp_build_context_init(&bld, gallivm, lp_type_uint(32));
2028    lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
2029 
2030    /* hold temporary "bool" clipmask */
2031    clipmask_bool_ptr = lp_build_alloca(gallivm, blduivec.vec_type, "");
2032 
2033    fake_buf = lp_build_alloca_undef(gallivm,
2034                  LLVMVectorType(LLVMInt64TypeInContext(context), 4), "");
2035    fake_buf = LLVMBuildBitCast(builder, fake_buf,
2036                  LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
2037    fake_buf_ptr = LLVMBuildGEP(builder, fake_buf, &bld.zero, 1, "");
2038 
2039    /* code generated texture sampling */
2040    sampler = draw_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key), key->nr_samplers);
2041 
2042    image = draw_llvm_image_soa_create(draw_llvm_variant_key_images(key),
2043                                       key->nr_images);
2044 
2045    step = lp_build_const_int32(gallivm, vector_length);
2046 
2047    ind_vec = blduivec.undef;
2048    for (i = 0; i < vs_type.length; i++) {
2049       LLVMValueRef index = lp_build_const_int32(gallivm, i);
2050       ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, "");
2051    }
2052 
2053    have_elts = LLVMBuildICmp(builder, LLVMIntNE,
2054                              LLVMConstPointerNull(arg_types[10]), fetch_elts, "");
2055 
2056    fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max");
2057    fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max);
2058    /*
2059     * Only needed for non-indexed path.
2060     */
2061    start_vec = lp_build_broadcast_scalar(&blduivec, start_or_maxelt);
2062 
2063    /*
2064     * Pre-calculate everything which is constant per shader invocation.
2065     */
2066    for (j = 0; j < key->nr_vertex_elements; ++j) {
2067       LLVMValueRef vb_buffer_offset, buffer_size, temp_ptr;
2068       LLVMValueRef vb_info, vbuffer_ptr, buf_offset, ofbit;
2069       struct pipe_vertex_element *velem = &key->vertex_element[j];
2070       LLVMValueRef vb_index =
2071          lp_build_const_int32(gallivm, velem->vertex_buffer_index);
2072       LLVMValueRef bsize = lp_build_const_int32(gallivm,
2073                                                 util_format_get_blocksize(velem->src_format));
2074       LLVMValueRef src_offset = lp_build_const_int32(gallivm,
2075                                                      velem->src_offset);
2076       struct lp_build_if_state if_ctx;
2077 
2078       if (velem->src_format != PIPE_FORMAT_NONE) {
2079          vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &vb_index, 1, "");
2080          vb_info = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
2081          vb_stride[j] = draw_jit_vbuffer_stride(gallivm, vb_info);
2082          vb_stride[j] = LLVMBuildZExt(gallivm->builder, vb_stride[j],
2083                                       LLVMInt32TypeInContext(context), "");
2084          vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vb_info);
2085          map_ptr[j] = draw_jit_dvbuffer_map(gallivm, vbuffer_ptr);
2086          buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
2087 
2088          ofbit = NULL;
2089          /*
2090           * We'll set buffer_size_adj to zero if we have of, so it will
2091           * always overflow later automatically without having to keep ofbit.
2092           * Overflows (with normal wraparound) doing the actual offset
2093           * calculation should be ok, just not for the buffer size calc.
2094           * It would also be possible to detect such overflows and return
2095           * zeros if that happens, but this would be more complex.
2096           */
2097          buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset);
2098          tmp = lp_build_sub(&bld, bsize, bld.one);
2099          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp,
2100                                                      &ofbit);
2101          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j],
2102                                                      buf_offset, &ofbit);
2103 
2104          /*
2105           * We can't easily set fake vertex buffers outside the generated code.
2106           * Hence, set fake vertex buffers here instead basically, so fetch
2107           * code can always fetch using offset 0, eliminating all control flow
2108           * inside the main loop.
2109           * (Alternatively, could have control flow per vector skipping fetch
2110           * if ofbit is true.)
2111           */
2112          if (velem->instance_divisor) {
2113             /*
2114              * Index is equal to the start instance plus the number of current
2115              * instance divided by the divisor. In this case we compute it as:
2116              * index = start_instance + (instance_id  / divisor).
2117              * Note we could actually do the fetch here, outside the loop -
2118              * it's all constant, hopefully llvm recognizes this.
2119              */
2120             LLVMValueRef current_instance;
2121             current_instance = LLVMBuildUDiv(builder, system_values.instance_id,
2122                                              lp_build_const_int32(gallivm,
2123                                                                   velem->instance_divisor),
2124                                              "instance_divisor");
2125             instance_index[j] = lp_build_uadd_overflow(gallivm, system_values.base_instance,
2126                                                        current_instance, &ofbit);
2127          }
2128 
2129          buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero,
2130                                               buffer_size_adj[j], "");
2131 
2132          temp_ptr = lp_build_alloca_undef(gallivm,
2133                        LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
2134 
2135          lp_build_if(&if_ctx, gallivm, ofbit);
2136          {
2137             LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
2138          }
2139          lp_build_else(&if_ctx);
2140          {
2141             map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, "");
2142             LLVMBuildStore(builder, map_ptr[j], temp_ptr);
2143          }
2144          lp_build_endif(&if_ctx);
2145          map_ptr[j] = LLVMBuildLoad(builder, temp_ptr, "map_ptr");
2146 
2147          if (0) {
2148             lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n",
2149                             lp_build_const_int32(gallivm, j),
2150                             vb_index, vb_stride[j]);
2151             lp_build_printf(gallivm,
2152                             "   vb_buffer_offset = %u, src_offset = %u, buf_offset = %u\n",
2153                             vb_buffer_offset, src_offset, buf_offset);
2154             lp_build_printf(gallivm, "   buffer size = %u, blocksize = %u\n",
2155                             buffer_size, bsize);
2156             lp_build_printf(gallivm, "   instance_id = %u\n", system_values.instance_id);
2157          }
2158       }
2159    }
2160 
2161    lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
2162    {
2163       LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
2164       LLVMValueRef io;
2165       LLVMValueRef clipmask;   /* holds the clipmask value */
2166       LLVMValueRef true_index_array, index_store;
2167       const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
2168 
2169       io_itr = lp_loop.counter;
2170 
2171       io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
2172 #if DEBUG_STORE
2173       lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
2174                       io_itr, io, lp_loop.counter);
2175 #endif
2176 
2177       true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter);
2178       true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, "");
2179 
2180       LLVMValueRef exec_mask = lp_build_cmp(&blduivec, PIPE_FUNC_LEQUAL, true_index_array, fetch_max);
2181       /*
2182        * Limit indices to fetch_max, otherwise might try to access indices
2183        * beyond index buffer (or rather vsplit elt buffer) size.
2184        * Could probably safely (?) skip this for non-indexed draws and
2185        * simplify things minimally (by removing it could combine the ind_vec
2186        * and start_vec adds). I think the only effect for non-indexed draws will
2187        * be that for the invalid elements they will be all fetched from the
2188        * same location as the last valid one, but noone should really care.
2189        */
2190       true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max);
2191 
2192       index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store");
2193 
2194       lp_build_if(&if_ctx, gallivm, have_elts);
2195       {
2196          /*
2197           * Note: you'd expect some comparison/clamp against fetch_elt_max
2198           * here.
2199           * There used to be one here but it was incorrect: overflow was
2200           * detected if index > fetch_elt_max - but the correct condition
2201           * would be index >= fetch_elt_max (since this is just size of elts
2202           * buffer / element size).
2203           * Using the correct condition however will cause failures - due to
2204           * vsplit/vcache code which rebases indices. So, as an example, if
2205           * fetch_elt_max is just 1 and fetch_count 2, vsplit cache will
2206           * replace all invalid indices with 0 - which in case of elt_bias
2207           * not being zero will get a different fetch index than the valid
2208           * index 0. So, just rely on vsplit code preventing out-of-bounds
2209           * fetches. This is also why it's safe to do elts fetch even if there
2210           * was no index buffer bound - the real buffer is never seen here, at
2211           * least not if there are index buffer overflows...
2212           */
2213 
2214          /*
2215           * XXX should not have to do this, as scale can be handled
2216           * natively by loads (hits asserts though).
2217           */
2218          tmp = lp_build_shl_imm(&blduivec, true_index_array, 2);
2219          fetch_elts = LLVMBuildBitCast(builder, fetch_elts,
2220                                        LLVMPointerType(LLVMInt8TypeInContext(context),
2221                                                        0), "");
2222          tmp = lp_build_gather(gallivm, vs_type.length,
2223                                32, bld.type, TRUE,
2224                                fetch_elts, tmp, FALSE);
2225          LLVMBuildStore(builder, tmp, index_store);
2226       }
2227       lp_build_else(&if_ctx);
2228       {
2229          tmp = LLVMBuildAdd(builder, true_index_array, start_vec, "");
2230          LLVMBuildStore(builder, tmp, index_store);
2231       }
2232       lp_build_endif(&if_ctx);
2233 
2234       true_index_array = LLVMBuildLoad(builder, index_store, "");
2235 
2236       for (j = 0; j < key->nr_vertex_elements; ++j) {
2237          struct pipe_vertex_element *velem = &key->vertex_element[j];
2238          const struct util_format_description *format_desc =
2239             util_format_description(velem->src_format);
2240 
2241          if (format_desc->format == PIPE_FORMAT_NONE) {
2242             for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
2243                inputs[j][i] = lp_build_zero(gallivm, vs_type);
2244             }
2245          }
2246          else if (velem->instance_divisor) {
2247             fetch_instanced(gallivm, format_desc, vs_type,
2248                             vb_stride[j], map_ptr[j],
2249                             buffer_size_adj[j],
2250                             inputs[j], instance_index[j]);
2251          }
2252          else {
2253             fetch_vector(gallivm, format_desc, vs_type,
2254                          vb_stride[j], map_ptr[j],
2255                          buffer_size_adj[j],
2256                          inputs[j], true_index_array);
2257          }
2258       }
2259 
2260       struct lp_build_mask_context mask;
2261 
2262       lp_build_mask_begin(&mask, gallivm, vs_type, exec_mask);
2263       /* In the paths with elts vertex id has to be unaffected by the
2264        * index bias and because indices inside our elements array have
2265        * already had index bias applied we need to subtract it here to
2266        * get back to the original index.
2267        * in the linear paths vertex id has to be unaffected by the
2268        * original start index and because we abuse the 'start' variable
2269        * to either represent the actual start index or the index at which
2270        * the primitive was split (we split rendering into chunks of at
2271        * most 4095-vertices) we need to back out the original start
2272        * index out of our vertex id here.
2273        * for ARB_shader_draw_parameters, base_vertex should be 0 for non-indexed draws.
2274        */
2275       LLVMValueRef base_vertex = lp_build_select(&bld, have_elts, vertex_id_offset, lp_build_const_int32(gallivm, 0));;
2276       system_values.basevertex = lp_build_broadcast_scalar(&blduivec, base_vertex);
2277       system_values.vertex_id = true_index_array;
2278       system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array,
2279                                                     lp_build_broadcast_scalar(&blduivec, vertex_id_offset), "");
2280 
2281       ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
2282       generate_vs(variant,
2283                   builder,
2284                   vs_type,
2285                   outputs,
2286                   ptr_aos,
2287                   &system_values,
2288                   context_ptr,
2289                   sampler,
2290                   image,
2291                   key->clamp_vertex_color,
2292                   &mask);
2293 
2294       lp_build_mask_end(&mask);
2295       if (pos != -1 && cv != -1) {
2296          /* store original positions in clip before further manipulation */
2297          store_clip(gallivm, vs_type, io, outputs, pos);
2298 
2299          /* do cliptest */
2300          if (enable_cliptest) {
2301             LLVMValueRef temp = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
2302             /* allocate clipmask, assign it integer type */
2303             clipmask = generate_clipmask(llvm,
2304                                          gallivm,
2305                                          vs_type,
2306                                          outputs,
2307                                          key,
2308                                          context_ptr, &have_clipdist);
2309             temp = LLVMBuildOr(builder, clipmask, temp, "");
2310             /* store temporary clipping boolean value */
2311             LLVMBuildStore(builder, temp, clipmask_bool_ptr);
2312          }
2313          else {
2314             clipmask = blduivec.zero;
2315          }
2316 
2317          /* do viewport mapping */
2318          if (!bypass_viewport) {
2319             generate_viewport(variant, builder, vs_type, outputs, context_ptr);
2320          }
2321       }
2322       else {
2323          clipmask = blduivec.zero;
2324       }
2325 
2326       /* store clipmask in vertex header,
2327        * original positions in clip
2328        * and transformed positions in data
2329        */
2330       convert_to_aos(gallivm, io, NULL, outputs, clipmask,
2331                      vs_info->num_outputs, vs_type,
2332                      enable_cliptest && key->need_edgeflags);
2333    }
2334    lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
2335 
2336    sampler->destroy(sampler);
2337    image->destroy(image);
2338 
2339    /* return clipping boolean value for function */
2340    ret = clipmask_booli8(gallivm, vs_type, clipmask_bool_ptr,
2341                          enable_cliptest && key->need_edgeflags);
2342 
2343    LLVMBuildRet(builder, ret);
2344 
2345    gallivm_verify_function(gallivm, variant_func);
2346 }
2347 
2348 
2349 struct draw_llvm_variant_key *
draw_llvm_make_variant_key(struct draw_llvm * llvm,char * store)2350 draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2351 {
2352    unsigned i;
2353    struct draw_llvm_variant_key *key;
2354    struct draw_sampler_static_state *draw_sampler;
2355    struct draw_image_static_state *draw_image;
2356 
2357    key = (struct draw_llvm_variant_key *)store;
2358 
2359    memset(key, 0, offsetof(struct draw_llvm_variant_key, vertex_element[0]));
2360 
2361    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color; /**/
2362 
2363    /* will have to rig this up properly later */
2364    key->clip_xy = llvm->draw->clip_xy;
2365    key->clip_z = llvm->draw->clip_z;
2366    key->clip_user = llvm->draw->clip_user;
2367    key->bypass_viewport = llvm->draw->bypass_viewport;
2368    key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
2369    /* XXX assumes edgeflag output not at 0 */
2370    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
2371    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
2372    key->has_gs_or_tes = llvm->draw->gs.geometry_shader != NULL || llvm->draw->tes.tess_eval_shader != NULL;
2373    key->num_outputs = draw_total_vs_outputs(llvm->draw);
2374 
2375    /* All variants of this shader will have the same value for
2376     * nr_samplers.  Not yet trying to compact away holes in the
2377     * sampler array.
2378     */
2379    key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2380    if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2381       key->nr_sampler_views =
2382          llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2383    }
2384    else {
2385       key->nr_sampler_views = key->nr_samplers;
2386    }
2387 
2388    key->nr_images = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
2389 
2390    /* Presumably all variants of the shader should have the same
2391     * number of vertex elements - ie the number of shader inputs.
2392     * NOTE: we NEED to store the needed number of needed inputs
2393     * here, not the number of provided elements to match keysize
2394     * (and the offset of sampler state in the key).
2395     * If we have excess number of vertex elements, this is valid,
2396     * but the excess ones don't matter.
2397     * If we don't have enough vertex elements (which looks not really
2398     * valid but we'll handle it gracefully) fill out missing ones with
2399     * zero (we'll recognize these later by PIPE_FORMAT_NONE).
2400     */
2401    key->nr_vertex_elements =
2402       llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1;
2403 
2404    if (llvm->draw->pt.nr_vertex_elements < key->nr_vertex_elements) {
2405       debug_printf("draw: vs with %d inputs but only have %d vertex elements\n",
2406                    key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements);
2407       memset(key->vertex_element, 0,
2408              sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
2409    }
2410    memcpy(key->vertex_element,
2411           llvm->draw->pt.vertex_element,
2412           sizeof(struct pipe_vertex_element) *
2413              MIN2(key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements));
2414 
2415    draw_sampler = draw_llvm_variant_key_samplers(key);
2416    memset(draw_sampler, 0,
2417           MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
2418 
2419    for (i = 0 ; i < key->nr_samplers; i++) {
2420       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
2421                                       llvm->draw->samplers[PIPE_SHADER_VERTEX][i]);
2422    }
2423    for (i = 0 ; i < key->nr_sampler_views; i++) {
2424       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
2425                                       llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]);
2426    }
2427 
2428    draw_image = draw_llvm_variant_key_images(key);
2429    memset(draw_image, 0,
2430           key->nr_images * sizeof *draw_image);
2431    for (i = 0; i < key->nr_images; i++) {
2432       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
2433                                             llvm->draw->images[PIPE_SHADER_VERTEX][i]);
2434    }
2435    return key;
2436 }
2437 
2438 
2439 void
draw_llvm_dump_variant_key(struct draw_llvm_variant_key * key)2440 draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
2441 {
2442    unsigned i;
2443    struct draw_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key);
2444    struct draw_image_static_state *image = draw_llvm_variant_key_images(key);
2445    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
2446    debug_printf("clip_xy = %u\n", key->clip_xy);
2447    debug_printf("clip_z = %u\n", key->clip_z);
2448    debug_printf("clip_user = %u\n", key->clip_user);
2449    debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
2450    debug_printf("clip_halfz = %u\n", key->clip_halfz);
2451    debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
2452    debug_printf("has_gs_or_tes = %u\n", key->has_gs_or_tes);
2453    debug_printf("ucp_enable = %u\n", key->ucp_enable);
2454 
2455    for (i = 0 ; i < key->nr_vertex_elements; i++) {
2456       debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset);
2457       debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor);
2458       debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index);
2459       debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format));
2460    }
2461 
2462    for (i = 0 ; i < key->nr_sampler_views; i++) {
2463       debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format));
2464    }
2465 
2466    for (i = 0 ; i < key->nr_images; i++)
2467       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
2468 }
2469 
2470 
2471 void
draw_llvm_set_mapped_texture(struct draw_context * draw,enum pipe_shader_type shader_stage,unsigned sview_idx,uint32_t width,uint32_t height,uint32_t depth,uint32_t first_level,uint32_t last_level,uint32_t num_samples,uint32_t sample_stride,const void * base_ptr,uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])2472 draw_llvm_set_mapped_texture(struct draw_context *draw,
2473                              enum pipe_shader_type shader_stage,
2474                              unsigned sview_idx,
2475                              uint32_t width, uint32_t height, uint32_t depth,
2476                              uint32_t first_level, uint32_t last_level,
2477                              uint32_t num_samples,
2478                              uint32_t sample_stride,
2479                              const void *base_ptr,
2480                              uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
2481                              uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
2482                              uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
2483 {
2484    unsigned j;
2485    struct draw_jit_texture *jit_tex;
2486 
2487    switch (shader_stage) {
2488    case PIPE_SHADER_VERTEX:
2489       assert(sview_idx < ARRAY_SIZE(draw->llvm->jit_context.textures));
2490       jit_tex = &draw->llvm->jit_context.textures[sview_idx];
2491       break;
2492    case PIPE_SHADER_GEOMETRY:
2493       assert(sview_idx < ARRAY_SIZE(draw->llvm->gs_jit_context.textures));
2494       jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx];
2495       break;
2496    case PIPE_SHADER_TESS_CTRL:
2497       assert(sview_idx < ARRAY_SIZE(draw->llvm->tcs_jit_context.textures));
2498       jit_tex = &draw->llvm->tcs_jit_context.textures[sview_idx];
2499       break;
2500    case PIPE_SHADER_TESS_EVAL:
2501       assert(sview_idx < ARRAY_SIZE(draw->llvm->tes_jit_context.textures));
2502       jit_tex = &draw->llvm->tes_jit_context.textures[sview_idx];
2503       break;
2504    default:
2505       assert(0);
2506       return;
2507    }
2508 
2509    jit_tex->width = width;
2510    jit_tex->height = height;
2511    jit_tex->depth = depth;
2512    jit_tex->first_level = first_level;
2513    jit_tex->last_level = last_level;
2514    jit_tex->base = base_ptr;
2515    jit_tex->num_samples = num_samples;
2516    jit_tex->sample_stride = sample_stride;
2517 
2518    for (j = first_level; j <= last_level; j++) {
2519       jit_tex->mip_offsets[j] = mip_offsets[j];
2520       jit_tex->row_stride[j] = row_stride[j];
2521       jit_tex->img_stride[j] = img_stride[j];
2522    }
2523 }
2524 
2525 void
draw_llvm_set_mapped_image(struct draw_context * draw,enum pipe_shader_type shader_stage,unsigned idx,uint32_t width,uint32_t height,uint32_t depth,const void * base_ptr,uint32_t row_stride,uint32_t img_stride,uint32_t num_samples,uint32_t sample_stride)2526 draw_llvm_set_mapped_image(struct draw_context *draw,
2527                            enum pipe_shader_type shader_stage,
2528                            unsigned idx,
2529                            uint32_t width, uint32_t height, uint32_t depth,
2530                            const void *base_ptr,
2531                            uint32_t row_stride,
2532                            uint32_t img_stride,
2533                            uint32_t num_samples,
2534                            uint32_t sample_stride)
2535 {
2536    struct draw_jit_image *jit_image;
2537 
2538    switch (shader_stage) {
2539    case PIPE_SHADER_VERTEX:
2540       assert(idx < ARRAY_SIZE(draw->llvm->jit_context.images));
2541       jit_image = &draw->llvm->jit_context.images[idx];
2542       break;
2543    case PIPE_SHADER_GEOMETRY:
2544       assert(idx < ARRAY_SIZE(draw->llvm->gs_jit_context.images));
2545       jit_image = &draw->llvm->gs_jit_context.images[idx];
2546       break;
2547    case PIPE_SHADER_TESS_CTRL:
2548       assert(idx < ARRAY_SIZE(draw->llvm->tcs_jit_context.images));
2549       jit_image = &draw->llvm->tcs_jit_context.images[idx];
2550       break;
2551    case PIPE_SHADER_TESS_EVAL:
2552       assert(idx < ARRAY_SIZE(draw->llvm->tes_jit_context.images));
2553       jit_image = &draw->llvm->tes_jit_context.images[idx];
2554       break;
2555    default:
2556       assert(0);
2557       return;
2558    }
2559 
2560    jit_image->width = width;
2561    jit_image->height = height;
2562    jit_image->depth = depth;
2563    jit_image->base = base_ptr;
2564 
2565    jit_image->row_stride = row_stride;
2566    jit_image->img_stride = img_stride;
2567    jit_image->num_samples = num_samples;
2568    jit_image->sample_stride = sample_stride;
2569 }
2570 
2571 
2572 void
draw_llvm_set_sampler_state(struct draw_context * draw,enum pipe_shader_type shader_type)2573 draw_llvm_set_sampler_state(struct draw_context *draw,
2574                             enum pipe_shader_type shader_type)
2575 {
2576    unsigned i;
2577 
2578    switch (shader_type) {
2579    case PIPE_SHADER_VERTEX:
2580       for (i = 0; i < draw->num_samplers[PIPE_SHADER_VERTEX]; i++) {
2581          struct draw_jit_sampler *jit_sam = &draw->llvm->jit_context.samplers[i];
2582 
2583          if (draw->samplers[PIPE_SHADER_VERTEX][i]) {
2584             const struct pipe_sampler_state *s
2585                = draw->samplers[PIPE_SHADER_VERTEX][i];
2586             jit_sam->min_lod = s->min_lod;
2587             jit_sam->max_lod = s->max_lod;
2588             jit_sam->lod_bias = s->lod_bias;
2589             COPY_4V(jit_sam->border_color, s->border_color.f);
2590          }
2591       }
2592       break;
2593    case PIPE_SHADER_GEOMETRY:
2594       for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) {
2595          struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i];
2596 
2597          if (draw->samplers[PIPE_SHADER_GEOMETRY][i]) {
2598             const struct pipe_sampler_state *s
2599                = draw->samplers[PIPE_SHADER_GEOMETRY][i];
2600             jit_sam->min_lod = s->min_lod;
2601             jit_sam->max_lod = s->max_lod;
2602             jit_sam->lod_bias = s->lod_bias;
2603             COPY_4V(jit_sam->border_color, s->border_color.f);
2604          }
2605       }
2606       break;
2607    case PIPE_SHADER_TESS_CTRL:
2608       for (i = 0; i < draw->num_samplers[PIPE_SHADER_TESS_CTRL]; i++) {
2609          struct draw_jit_sampler *jit_sam = &draw->llvm->tcs_jit_context.samplers[i];
2610 
2611          if (draw->samplers[PIPE_SHADER_TESS_CTRL][i]) {
2612             const struct pipe_sampler_state *s
2613                = draw->samplers[PIPE_SHADER_TESS_CTRL][i];
2614             jit_sam->min_lod = s->min_lod;
2615             jit_sam->max_lod = s->max_lod;
2616             jit_sam->lod_bias = s->lod_bias;
2617             COPY_4V(jit_sam->border_color, s->border_color.f);
2618          }
2619       }
2620       break;
2621    case PIPE_SHADER_TESS_EVAL:
2622       for (i = 0; i < draw->num_samplers[PIPE_SHADER_TESS_EVAL]; i++) {
2623          struct draw_jit_sampler *jit_sam = &draw->llvm->tes_jit_context.samplers[i];
2624 
2625          if (draw->samplers[PIPE_SHADER_TESS_EVAL][i]) {
2626             const struct pipe_sampler_state *s
2627                = draw->samplers[PIPE_SHADER_TESS_EVAL][i];
2628             jit_sam->min_lod = s->min_lod;
2629             jit_sam->max_lod = s->max_lod;
2630             jit_sam->lod_bias = s->lod_bias;
2631             COPY_4V(jit_sam->border_color, s->border_color.f);
2632          }
2633       }
2634       break;
2635    default:
2636       assert(0);
2637       break;
2638    }
2639 }
2640 
2641 
2642 void
draw_llvm_destroy_variant(struct draw_llvm_variant * variant)2643 draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
2644 {
2645    struct draw_llvm *llvm = variant->llvm;
2646 
2647    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2648       debug_printf("Deleting VS variant: %u vs variants,\t%u total variants\n",
2649                     variant->shader->variants_cached, llvm->nr_variants);
2650    }
2651 
2652    gallivm_destroy(variant->gallivm);
2653 
2654    remove_from_list(&variant->list_item_local);
2655    variant->shader->variants_cached--;
2656    remove_from_list(&variant->list_item_global);
2657    llvm->nr_variants--;
2658    FREE(variant);
2659 }
2660 
2661 
2662 /**
2663  * Create LLVM types for various structures.
2664  */
2665 static void
create_gs_jit_types(struct draw_gs_llvm_variant * var)2666 create_gs_jit_types(struct draw_gs_llvm_variant *var)
2667 {
2668    struct gallivm_state *gallivm = var->gallivm;
2669    LLVMTypeRef texture_type, sampler_type, image_type, context_type;
2670 
2671    texture_type = create_jit_texture_type(gallivm, "texture");
2672    sampler_type = create_jit_sampler_type(gallivm, "sampler");
2673    image_type = create_jit_image_type(gallivm, "image");
2674 
2675    context_type = create_gs_jit_context_type(gallivm,
2676                                              var->shader->base.vector_length,
2677                                              texture_type, sampler_type,
2678                                              image_type,
2679                                              "draw_gs_jit_context");
2680    var->context_ptr_type = LLVMPointerType(context_type, 0);
2681 
2682    var->input_array_type = create_gs_jit_input_type(gallivm);
2683 }
2684 
2685 static LLVMTypeRef
get_gs_context_ptr_type(struct draw_gs_llvm_variant * variant)2686 get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
2687 {
2688    if (!variant->context_ptr_type)
2689       create_gs_jit_types(variant);
2690    return variant->context_ptr_type;
2691 }
2692 
2693 static LLVMValueRef
generate_mask_value(struct draw_gs_llvm_variant * variant,struct lp_type gs_type)2694 generate_mask_value(struct draw_gs_llvm_variant *variant,
2695                     struct lp_type gs_type)
2696 {
2697    struct gallivm_state *gallivm = variant->gallivm;
2698    LLVMBuilderRef builder = gallivm->builder;
2699    struct lp_type mask_type = lp_int_type(gs_type);
2700    LLVMValueRef num_prims;
2701    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
2702    unsigned i;
2703 
2704    num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
2705                                   variant->num_prims);
2706    for (i = 0; i < gs_type.length; i++) {
2707       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2708       mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
2709    }
2710    mask_val = lp_build_compare(gallivm, mask_type,
2711                                PIPE_FUNC_GREATER, num_prims, mask_val);
2712 
2713    return mask_val;
2714 }
2715 
2716 static void
draw_gs_llvm_generate(struct draw_llvm * llvm,struct draw_gs_llvm_variant * variant)2717 draw_gs_llvm_generate(struct draw_llvm *llvm,
2718                       struct draw_gs_llvm_variant *variant)
2719 {
2720    struct gallivm_state *gallivm = variant->gallivm;
2721    LLVMContextRef context = gallivm->context;
2722    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
2723    LLVMTypeRef arg_types[7];
2724    LLVMTypeRef func_type;
2725    LLVMValueRef variant_func;
2726    LLVMValueRef context_ptr;
2727    LLVMValueRef prim_id_ptr;
2728    LLVMBasicBlockRef block;
2729    LLVMBuilderRef builder;
2730    LLVMValueRef io_ptr, input_array, num_prims, mask_val;
2731    struct lp_build_sampler_soa *sampler = 0;
2732    struct lp_build_image_soa *image = NULL;
2733    struct lp_build_context bld;
2734    struct lp_bld_tgsi_system_values system_values;
2735    char func_name[64];
2736    struct lp_type gs_type;
2737    unsigned i;
2738    struct draw_gs_llvm_iface gs_iface;
2739    const struct tgsi_token *tokens = variant->shader->base.state.tokens;
2740    LLVMValueRef consts_ptr, num_consts_ptr;
2741    LLVMValueRef ssbos_ptr, num_ssbos_ptr;
2742    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2743    struct lp_build_mask_context mask;
2744    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
2745    unsigned vector_length = variant->shader->base.vector_length;
2746 
2747    memset(&system_values, 0, sizeof(system_values));
2748    memset(&outputs, 0, sizeof(outputs));
2749 
2750    snprintf(func_name, sizeof(func_name), "draw_llvm_gs_variant");
2751 
2752    assert(variant->vertex_header_ptr_type);
2753 
2754    arg_types[0] = get_gs_context_ptr_type(variant);    /* context */
2755    arg_types[1] = variant->input_array_type;           /* input */
2756    arg_types[2] = LLVMPointerType(variant->vertex_header_ptr_type, 0);     /* vertex_header */
2757    arg_types[3] = int32_type;                          /* num_prims */
2758    arg_types[4] = int32_type;                          /* instance_id */
2759    arg_types[5] = LLVMPointerType(
2760       LLVMVectorType(int32_type, vector_length), 0);   /* prim_id_ptr */
2761    arg_types[6] = int32_type;
2762 
2763    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
2764 
2765    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
2766 
2767    variant->function = variant_func;
2768 
2769    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
2770 
2771    for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
2772       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
2773          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
2774 
2775    if (gallivm->cache && gallivm->cache->data_size)
2776       return;
2777    context_ptr               = LLVMGetParam(variant_func, 0);
2778    input_array               = LLVMGetParam(variant_func, 1);
2779    io_ptr                    = LLVMGetParam(variant_func, 2);
2780    num_prims                 = LLVMGetParam(variant_func, 3);
2781    system_values.instance_id = LLVMGetParam(variant_func, 4);
2782    prim_id_ptr               = LLVMGetParam(variant_func, 5);
2783    system_values.invocation_id = LLVMGetParam(variant_func, 6);
2784 
2785    lp_build_name(context_ptr, "context");
2786    lp_build_name(input_array, "input");
2787    lp_build_name(io_ptr, "io");
2788    lp_build_name(num_prims, "num_prims");
2789    lp_build_name(system_values.instance_id, "instance_id");
2790    lp_build_name(prim_id_ptr, "prim_id_ptr");
2791    lp_build_name(system_values.invocation_id, "invocation_id");
2792 
2793    variant->context_ptr = context_ptr;
2794    variant->io_ptr = io_ptr;
2795    variant->num_prims = num_prims;
2796 
2797    gs_iface.base.fetch_input = draw_gs_llvm_fetch_input;
2798    gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex;
2799    gs_iface.base.end_primitive = draw_gs_llvm_end_primitive;
2800    gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue;
2801    gs_iface.input = input_array;
2802    gs_iface.variant = variant;
2803 
2804    /*
2805     * Function body
2806     */
2807 
2808    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2809    builder = gallivm->builder;
2810    LLVMPositionBuilderAtEnd(builder, block);
2811 
2812    lp_build_context_init(&bld, gallivm, lp_type_int(32));
2813 
2814    memset(&gs_type, 0, sizeof gs_type);
2815    gs_type.floating = TRUE; /* floating point values */
2816    gs_type.sign = TRUE;     /* values are signed */
2817    gs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
2818    gs_type.width = 32;      /* 32-bit float */
2819    gs_type.length = vector_length;
2820 
2821    consts_ptr = draw_gs_jit_context_constants(variant->gallivm, context_ptr);
2822    num_consts_ptr =
2823       draw_gs_jit_context_num_constants(variant->gallivm, context_ptr);
2824 
2825    ssbos_ptr = draw_gs_jit_context_ssbos(variant->gallivm, context_ptr);
2826    num_ssbos_ptr =
2827       draw_gs_jit_context_num_ssbos(variant->gallivm, context_ptr);
2828 
2829    /* code generated texture sampling */
2830    sampler = draw_llvm_sampler_soa_create(variant->key.samplers, variant->key.nr_samplers);
2831    image = draw_llvm_image_soa_create(draw_gs_llvm_variant_key_images(&variant->key),
2832                                       variant->key.nr_images);
2833    mask_val = generate_mask_value(variant, gs_type);
2834    lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
2835 
2836    if (gs_info->uses_primid) {
2837       system_values.prim_id = LLVMBuildLoad(builder, prim_id_ptr, "prim_id");
2838    }
2839 
2840    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2841       if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
2842          tgsi_dump(tokens, 0);
2843       else
2844          nir_print_shader(llvm->draw->gs.geometry_shader->state.ir.nir, stderr);
2845       draw_gs_llvm_dump_variant_key(&variant->key);
2846    }
2847 
2848    struct lp_build_tgsi_params params;
2849    memset(&params, 0, sizeof(params));
2850 
2851    params.type = gs_type;
2852    params.mask = &mask;
2853    params.consts_ptr = consts_ptr;
2854    params.const_sizes_ptr = num_consts_ptr;
2855    params.system_values = &system_values;
2856    params.context_ptr = context_ptr;
2857    params.sampler = sampler;
2858    params.info = &llvm->draw->gs.geometry_shader->info;
2859    params.gs_iface = (const struct lp_build_gs_iface *)&gs_iface;
2860    params.ssbo_ptr = ssbos_ptr;
2861    params.ssbo_sizes_ptr = num_ssbos_ptr;
2862    params.image = image;
2863    params.gs_vertex_streams = variant->shader->base.num_vertex_streams;
2864 
2865    if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
2866       lp_build_tgsi_soa(variant->gallivm,
2867                         tokens,
2868                         &params,
2869                         outputs);
2870    else
2871       lp_build_nir_soa(variant->gallivm,
2872                        llvm->draw->gs.geometry_shader->state.ir.nir,
2873                        &params,
2874                        outputs);
2875 
2876    sampler->destroy(sampler);
2877    image->destroy(image);
2878 
2879    lp_build_mask_end(&mask);
2880 
2881    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
2882 
2883    gallivm_verify_function(gallivm, variant_func);
2884 }
2885 
2886 struct draw_gs_llvm_variant *
draw_gs_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_gs_llvm_variant_key * key)2887 draw_gs_llvm_create_variant(struct draw_llvm *llvm,
2888                             unsigned num_outputs,
2889                             const struct draw_gs_llvm_variant_key *key)
2890 {
2891    struct draw_gs_llvm_variant *variant;
2892    struct llvm_geometry_shader *shader =
2893       llvm_geometry_shader(llvm->draw->gs.geometry_shader);
2894    LLVMTypeRef vertex_header;
2895    char module_name[64];
2896    unsigned char ir_sha1_cache_key[20];
2897    struct lp_cached_code cached = { 0 };
2898    bool needs_caching = false;
2899 
2900    variant = MALLOC(sizeof *variant +
2901                     shader->variant_key_size -
2902                     sizeof variant->key);
2903    if (!variant)
2904       return NULL;
2905 
2906    variant->llvm = llvm;
2907    variant->shader = shader;
2908 
2909    snprintf(module_name, sizeof(module_name), "draw_llvm_gs_variant%u",
2910             variant->shader->variants_cached);
2911 
2912    memcpy(&variant->key, key, shader->variant_key_size);
2913 
2914    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
2915       draw_get_ir_cache_key(shader->base.state.ir.nir,
2916                             key,
2917                             shader->variant_key_size,
2918                             num_outputs,
2919                             ir_sha1_cache_key);
2920 
2921       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
2922                                          &cached,
2923                                          ir_sha1_cache_key);
2924       if (!cached.data_size)
2925          needs_caching = true;
2926    }
2927    variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
2928 
2929    create_gs_jit_types(variant);
2930 
2931    vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs);
2932 
2933    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
2934 
2935    draw_gs_llvm_generate(llvm, variant);
2936 
2937    gallivm_compile_module(variant->gallivm);
2938 
2939    variant->jit_func = (draw_gs_jit_func)
2940          gallivm_jit_function(variant->gallivm, variant->function);
2941 
2942    if (needs_caching)
2943       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
2944                                            &cached,
2945                                            ir_sha1_cache_key);
2946    gallivm_free_ir(variant->gallivm);
2947 
2948    variant->list_item_global.base = variant;
2949    variant->list_item_local.base = variant;
2950    /*variant->no = */shader->variants_created++;
2951    variant->list_item_global.base = variant;
2952 
2953    return variant;
2954 }
2955 
2956 void
draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant * variant)2957 draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
2958 {
2959    struct draw_llvm *llvm = variant->llvm;
2960 
2961    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2962       debug_printf("Deleting GS variant: %u gs variants,\t%u total variants\n",
2963                     variant->shader->variants_cached, llvm->nr_gs_variants);
2964    }
2965 
2966    gallivm_destroy(variant->gallivm);
2967 
2968    remove_from_list(&variant->list_item_local);
2969    variant->shader->variants_cached--;
2970    remove_from_list(&variant->list_item_global);
2971    llvm->nr_gs_variants--;
2972    FREE(variant);
2973 }
2974 
2975 struct draw_gs_llvm_variant_key *
draw_gs_llvm_make_variant_key(struct draw_llvm * llvm,char * store)2976 draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2977 {
2978    unsigned i;
2979    struct draw_gs_llvm_variant_key *key;
2980    struct draw_sampler_static_state *draw_sampler;
2981    struct draw_image_static_state *draw_image;
2982 
2983    key = (struct draw_gs_llvm_variant_key *)store;
2984 
2985    memset(key, 0, offsetof(struct draw_gs_llvm_variant_key, samplers[0]));
2986 
2987    key->num_outputs = draw_total_gs_outputs(llvm->draw);
2988 
2989    /* All variants of this shader will have the same value for
2990     * nr_samplers.  Not yet trying to compact away holes in the
2991     * sampler array.
2992     */
2993    key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2994    if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2995       key->nr_sampler_views =
2996          llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2997    }
2998    else {
2999       key->nr_sampler_views = key->nr_samplers;
3000    }
3001 
3002    key->nr_images = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
3003 
3004    draw_sampler = key->samplers;
3005 
3006    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3007 
3008    for (i = 0 ; i < key->nr_samplers; i++) {
3009       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3010                                       llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
3011    }
3012    for (i = 0 ; i < key->nr_sampler_views; i++) {
3013       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3014                                       llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
3015    }
3016 
3017    draw_image = draw_gs_llvm_variant_key_images(key);
3018    memset(draw_image, 0,
3019           key->nr_images * sizeof *draw_image);
3020    for (i = 0; i < key->nr_images; i++) {
3021       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
3022                                             llvm->draw->images[PIPE_SHADER_GEOMETRY][i]);
3023    }
3024    return key;
3025 }
3026 
3027 void
draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key * key)3028 draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
3029 {
3030    unsigned i;
3031    struct draw_sampler_static_state *sampler = key->samplers;
3032    struct draw_image_static_state *image = draw_gs_llvm_variant_key_images(key);
3033    for (i = 0 ; i < key->nr_sampler_views; i++) {
3034       debug_printf("sampler[%i].src_format = %s\n", i,
3035                    util_format_name(sampler[i].texture_state.format));
3036    }
3037 
3038    for (i = 0 ; i < key->nr_images; i++)
3039       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
3040 
3041 }
3042 
3043 static void
create_tcs_jit_types(struct draw_tcs_llvm_variant * var)3044 create_tcs_jit_types(struct draw_tcs_llvm_variant *var)
3045 {
3046    struct gallivm_state *gallivm = var->gallivm;
3047    LLVMTypeRef texture_type, sampler_type, image_type, context_type;
3048 
3049    texture_type = create_jit_texture_type(gallivm, "texture");
3050    sampler_type = create_jit_sampler_type(gallivm, "sampler");
3051    image_type = create_jit_image_type(gallivm, "image");
3052 
3053    context_type = create_tcs_jit_context_type(gallivm,
3054                                               0,
3055                                               texture_type, sampler_type,
3056                                               image_type,
3057                                               "draw_tcs_jit_context");
3058    var->input_array_type = create_tcs_jit_input_type(gallivm);
3059    var->output_array_type = create_tcs_jit_output_type(gallivm);
3060    var->context_ptr_type = LLVMPointerType(context_type, 0);
3061 }
3062 
3063 static LLVMTypeRef
get_tcs_context_ptr_type(struct draw_tcs_llvm_variant * variant)3064 get_tcs_context_ptr_type(struct draw_tcs_llvm_variant *variant)
3065 {
3066    if (!variant->context_ptr_type)
3067       create_tcs_jit_types(variant);
3068    return variant->context_ptr_type;
3069 }
3070 
3071 static LLVMValueRef
draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,boolean is_vindex_indirect,LLVMValueRef vertex_index,boolean is_aindex_indirect,LLVMValueRef attrib_index,boolean is_sindex_indirect,LLVMValueRef swizzle_index)3072 draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface *tes_iface,
3073                                struct lp_build_context *bld,
3074                                boolean is_vindex_indirect,
3075                                LLVMValueRef vertex_index,
3076                                boolean is_aindex_indirect,
3077                                LLVMValueRef attrib_index,
3078                                boolean is_sindex_indirect,
3079                                LLVMValueRef swizzle_index)
3080 {
3081    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
3082    struct gallivm_state *gallivm = bld->gallivm;
3083    LLVMBuilderRef builder = gallivm->builder;
3084    LLVMValueRef indices[3];
3085    LLVMValueRef res;
3086    struct lp_type type = bld->type;
3087 
3088    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
3089       int i;
3090 
3091       res = bld->zero;
3092       for (i = 0; i < type.length; ++i) {
3093          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3094          LLVMValueRef vert_chan_index = vertex_index;
3095          LLVMValueRef attr_chan_index = attrib_index;
3096          LLVMValueRef swiz_chan_index = swizzle_index;
3097          LLVMValueRef channel_vec;
3098 
3099          if (is_vindex_indirect) {
3100             vert_chan_index = LLVMBuildExtractElement(builder,
3101                                                       vertex_index, idx, "");
3102          }
3103          if (is_aindex_indirect) {
3104             attr_chan_index = LLVMBuildExtractElement(builder,
3105                                                       attrib_index, idx, "");
3106          }
3107          if (is_sindex_indirect) {
3108             swiz_chan_index = LLVMBuildExtractElement(builder,
3109                                                       swizzle_index, idx, "");
3110          }
3111 
3112          indices[0] = vert_chan_index;
3113          indices[1] = attr_chan_index;
3114          indices[2] = swiz_chan_index;
3115 
3116          channel_vec = LLVMBuildGEP(builder, tcs->input, indices, 3, "");
3117          channel_vec = LLVMBuildLoad(builder, channel_vec, "");
3118 
3119          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3120       }
3121    } else {
3122       indices[0] = vertex_index;
3123       indices[1] = attrib_index;
3124       indices[2] = swizzle_index;
3125 
3126       res = LLVMBuildGEP(builder, tcs->input, indices, 3, "");
3127       res = LLVMBuildLoad(builder, res, "");
3128       res = lp_build_broadcast_scalar(bld, res);
3129    }
3130    return res;
3131 }
3132 
3133 static LLVMValueRef
draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,boolean is_vindex_indirect,LLVMValueRef vertex_index,boolean is_aindex_indirect,LLVMValueRef attrib_index,boolean is_sindex_indirect,LLVMValueRef swizzle_index,uint32_t name)3134 draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface *tes_iface,
3135                                 struct lp_build_context *bld,
3136                                 boolean is_vindex_indirect,
3137                                 LLVMValueRef vertex_index,
3138                                 boolean is_aindex_indirect,
3139                                 LLVMValueRef attrib_index,
3140                                 boolean is_sindex_indirect,
3141                                 LLVMValueRef swizzle_index,
3142                                 uint32_t name)
3143 {
3144    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
3145    struct gallivm_state *gallivm = bld->gallivm;
3146    LLVMBuilderRef builder = gallivm->builder;
3147    LLVMValueRef indices[3];
3148    LLVMValueRef res;
3149    struct lp_type type = bld->type;
3150 
3151    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
3152       int i;
3153 
3154       res = bld->zero;
3155       for (i = 0; i < type.length; ++i) {
3156          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3157          LLVMValueRef vert_chan_index = vertex_index;
3158          LLVMValueRef attr_chan_index = attrib_index;
3159          LLVMValueRef swiz_chan_index = swizzle_index;
3160          LLVMValueRef channel_vec;
3161 
3162          if (is_vindex_indirect) {
3163             vert_chan_index = LLVMBuildExtractElement(builder,
3164                                                       vertex_index, idx, "");
3165          }
3166          if (is_aindex_indirect) {
3167             attr_chan_index = LLVMBuildExtractElement(builder,
3168                                                       attrib_index, idx, "");
3169          }
3170          if (is_sindex_indirect) {
3171             swiz_chan_index = LLVMBuildExtractElement(builder,
3172                                                       swizzle_index, idx, "");
3173          }
3174 
3175          indices[0] = vert_chan_index;
3176          indices[1] = attr_chan_index;
3177          indices[2] = swiz_chan_index;
3178 
3179          channel_vec = LLVMBuildGEP(builder, tcs->output, indices, 3, "");
3180          channel_vec = LLVMBuildLoad(builder, channel_vec, "");
3181 
3182          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3183       }
3184    } else {
3185       indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
3186       indices[1] = attrib_index;
3187       indices[2] = swizzle_index;
3188 
3189       res = LLVMBuildGEP(builder, tcs->output, indices, 3, "");
3190       res = LLVMBuildLoad(builder, res, "");
3191       res = lp_build_broadcast_scalar(bld, res);
3192    }
3193    return res;
3194 }
3195 
3196 static void
draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,unsigned name,boolean is_vindex_indirect,LLVMValueRef vertex_index,boolean is_aindex_indirect,LLVMValueRef attrib_index,boolean is_sindex_indirect,LLVMValueRef swizzle_index,LLVMValueRef value,LLVMValueRef mask_vec)3197 draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface *tes_iface,
3198                                 struct lp_build_context *bld,
3199                                 unsigned name,
3200                                 boolean is_vindex_indirect,
3201                                 LLVMValueRef vertex_index,
3202                                 boolean is_aindex_indirect,
3203                                 LLVMValueRef attrib_index,
3204                                 boolean is_sindex_indirect,
3205                                 LLVMValueRef swizzle_index,
3206                                 LLVMValueRef value,
3207                                 LLVMValueRef mask_vec)
3208 {
3209    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
3210    struct gallivm_state *gallivm = bld->gallivm;
3211    LLVMBuilderRef builder = gallivm->builder;
3212    LLVMValueRef indices[3];
3213    LLVMValueRef res;
3214    struct lp_type type = bld->type;
3215 
3216    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
3217       int i;
3218 
3219       for (i = 0; i < type.length; ++i) {
3220          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3221          LLVMValueRef vert_chan_index = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
3222          LLVMValueRef attr_chan_index = attrib_index;
3223          LLVMValueRef swiz_chan_index = swizzle_index;
3224          LLVMValueRef channel_vec;
3225 
3226          if (is_vindex_indirect) {
3227             vert_chan_index = LLVMBuildExtractElement(builder,
3228                                                       vertex_index, idx, "");
3229          }
3230          if (is_aindex_indirect) {
3231             attr_chan_index = LLVMBuildExtractElement(builder,
3232                                                       attrib_index, idx, "");
3233          }
3234 
3235          if (is_sindex_indirect) {
3236             swiz_chan_index = LLVMBuildExtractElement(builder,
3237                                                       swizzle_index, idx, "");
3238          }
3239 
3240          indices[0] = vert_chan_index;
3241          indices[1] = attr_chan_index;
3242          indices[2] = swiz_chan_index;
3243 
3244          channel_vec = LLVMBuildGEP(builder, tcs->output, indices, 3, "");
3245 
3246          res = LLVMBuildExtractElement(builder, value, idx, "");
3247 
3248          struct lp_build_if_state ifthen;
3249          LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
3250          cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
3251          lp_build_if(&ifthen, gallivm, cond);
3252          LLVMBuildStore(builder, res, channel_vec);
3253          lp_build_endif(&ifthen);
3254       }
3255    } else {
3256       indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
3257       indices[1] = attrib_index;
3258       indices[2] = swizzle_index;
3259 
3260       res = LLVMBuildGEP(builder, tcs->output, indices, 3, "");
3261       for (unsigned i = 0; i < type.length; ++i) {
3262          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3263          LLVMValueRef val = LLVMBuildExtractElement(builder, value, idx, "");
3264 
3265          struct lp_build_if_state ifthen;
3266          LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
3267          cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
3268          lp_build_if(&ifthen, gallivm, cond);
3269          LLVMBuildStore(builder, val, res);
3270          lp_build_endif(&ifthen);
3271       }
3272    }
3273 }
3274 
3275 
3276 static LLVMValueRef
generate_tcs_mask_value(struct draw_tcs_llvm_variant * variant,struct lp_type tcs_type,LLVMValueRef limit,LLVMValueRef loop_counter)3277 generate_tcs_mask_value(struct draw_tcs_llvm_variant *variant,
3278                         struct lp_type tcs_type, LLVMValueRef limit, LLVMValueRef loop_counter)
3279 {
3280    struct gallivm_state *gallivm = variant->gallivm;
3281    LLVMBuilderRef builder = gallivm->builder;
3282    struct lp_type mask_type = lp_int_type(tcs_type);
3283    LLVMValueRef num_vecs;
3284    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
3285    unsigned i;
3286 
3287    num_vecs = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
3288    for (i = 0; i < tcs_type.length; i++) {
3289       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3290       mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
3291    }
3292    mask_val = lp_build_compare(gallivm, mask_type,
3293                                PIPE_FUNC_GREATER, num_vecs, mask_val);
3294 
3295    return mask_val;
3296 }
3297 
3298 static void
draw_tcs_llvm_generate(struct draw_llvm * llvm,struct draw_tcs_llvm_variant * variant)3299 draw_tcs_llvm_generate(struct draw_llvm *llvm,
3300                        struct draw_tcs_llvm_variant *variant)
3301 {
3302    struct gallivm_state *gallivm = variant->gallivm;
3303    LLVMContextRef context = gallivm->context;
3304    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
3305    LLVMTypeRef arg_types[6];
3306    LLVMTypeRef func_type, coro_func_type;
3307    LLVMValueRef variant_func, variant_coro;
3308    LLVMValueRef context_ptr;
3309    LLVMValueRef input_array, output_array, prim_id, patch_vertices_in;
3310    LLVMValueRef mask_val;
3311    LLVMBasicBlockRef block;
3312    LLVMBuilderRef builder;
3313    struct lp_build_context bld, bldvec;
3314    struct lp_build_sampler_soa *sampler = 0;
3315    struct lp_build_image_soa *image = NULL;
3316    struct lp_bld_tgsi_system_values system_values;
3317    char func_name[64], func_name_coro[64];
3318    unsigned i;
3319    struct draw_tcs_llvm_iface tcs_iface;
3320    struct lp_build_mask_context mask;
3321    LLVMValueRef consts_ptr, num_consts_ptr;
3322    LLVMValueRef ssbos_ptr, num_ssbos_ptr;
3323    struct lp_type tcs_type;
3324    unsigned vector_length = variant->shader->base.vector_length;
3325 
3326    memset(&system_values, 0, sizeof(system_values));
3327 
3328    snprintf(func_name, sizeof(func_name), "draw_llvm_tcs_variant");
3329 
3330    snprintf(func_name_coro, sizeof(func_name_coro), "draw_llvm_tcs_coro_variant");
3331 
3332    arg_types[0] = get_tcs_context_ptr_type(variant);    /* context */
3333    arg_types[1] = variant->input_array_type;           /* input */
3334    arg_types[2] = variant->output_array_type;
3335    arg_types[3] = int32_type;
3336    arg_types[4] = int32_type;
3337    arg_types[5] = int32_type; /* coroutine only */
3338 
3339    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types) - 1, 0);
3340 
3341    coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), arg_types, ARRAY_SIZE(arg_types), 0);
3342 
3343    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
3344 
3345    variant_coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
3346 
3347    variant->function = variant_func;
3348    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
3349 
3350    LLVMSetFunctionCallConv(variant_coro, LLVMCCallConv);
3351 
3352    for (i = 0; i < ARRAY_SIZE(arg_types); ++i) {
3353       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
3354          lp_add_function_attr(variant_coro, i + 1, LP_FUNC_ATTR_NOALIAS);
3355          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
3356       }
3357    }
3358 
3359    if (gallivm->cache && gallivm->cache->data_size)
3360       return;
3361    context_ptr               = LLVMGetParam(variant_func, 0);
3362    input_array               = LLVMGetParam(variant_func, 1);
3363    output_array              = LLVMGetParam(variant_func, 2);
3364    prim_id                   = LLVMGetParam(variant_func, 3);
3365    patch_vertices_in         = LLVMGetParam(variant_func, 4);
3366 
3367    lp_build_name(context_ptr, "context");
3368    lp_build_name(input_array, "input");
3369    lp_build_name(output_array, "output");
3370    lp_build_name(prim_id, "prim_id");
3371    lp_build_name(patch_vertices_in, "patch_vertices_in");
3372 
3373    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
3374    builder = gallivm->builder;
3375    LLVMPositionBuilderAtEnd(builder, block);
3376 
3377    lp_build_context_init(&bld, gallivm, lp_type_int(32));
3378 
3379    memset(&tcs_type, 0, sizeof tcs_type);
3380    tcs_type.floating = TRUE; /* floating point values */
3381    tcs_type.sign = TRUE;     /* values are signed */
3382    tcs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
3383    tcs_type.width = 32;      /* 32-bit float */
3384    tcs_type.length = vector_length;
3385 
3386    lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tcs_type));
3387 
3388    LLVMValueRef count = lp_build_const_int32(gallivm, variant->shader->base.vertices_out);
3389    LLVMValueRef step = lp_build_const_int32(gallivm, vector_length);
3390 
3391    struct lp_build_loop_state loop_state[2];
3392    LLVMValueRef num_inner_loop;
3393    unsigned count_align = util_align_npot(variant->shader->base.vertices_out, tcs_type.length);
3394    num_inner_loop = lp_build_const_int32(gallivm, count_align / tcs_type.length);
3395    LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
3396    LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_inner_loop, "coro_hdls");
3397    unsigned end_coroutine = INT_MAX;
3398    lp_build_loop_begin(&loop_state[1], gallivm,
3399                        lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
3400    lp_build_loop_begin(&loop_state[0], gallivm,
3401                        lp_build_const_int32(gallivm, 0)); /* inner loop */
3402    {
3403       LLVMValueRef args[6];
3404       args[0] = context_ptr;
3405       args[1] = input_array;
3406       args[2] = output_array;
3407       args[3] = prim_id;
3408       args[4] = patch_vertices_in;
3409       args[5] = loop_state[0].counter;
3410       LLVMValueRef coro_entry = LLVMBuildGEP(builder, coro_hdls, &loop_state[0].counter, 1, "");
3411       LLVMValueRef coro_hdl = LLVMBuildLoad(builder, coro_entry, "coro_hdl");
3412 
3413       struct lp_build_if_state ifstate;
3414       LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntEQ, loop_state[1].counter,
3415                                        lp_build_const_int32(gallivm, 0), "");
3416       /* first time here - call the coroutine function entry point */
3417       lp_build_if(&ifstate, gallivm, cmp);
3418       LLVMValueRef coro_ret = LLVMBuildCall(builder, variant_coro, args, 6, "");
3419       LLVMBuildStore(builder, coro_ret, coro_entry);
3420       lp_build_else(&ifstate);
3421       /* subsequent calls for this invocation - check if done. */
3422       LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
3423       struct lp_build_if_state ifstate2;
3424       lp_build_if(&ifstate2, gallivm, coro_done);
3425       /* if done destroy and force loop exit */
3426       lp_build_coro_destroy(gallivm, coro_hdl);
3427       lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));
3428       lp_build_else(&ifstate2);
3429       /* otherwise resume the coroutine */
3430       lp_build_coro_resume(gallivm, coro_hdl);
3431       lp_build_endif(&ifstate2);
3432       lp_build_endif(&ifstate);
3433       lp_build_loop_force_reload_counter(&loop_state[1]);
3434    }
3435    lp_build_loop_end_cond(&loop_state[0],
3436                           num_inner_loop,
3437                           NULL,  LLVMIntUGE);
3438    lp_build_loop_end_cond(&loop_state[1],
3439                           lp_build_const_int32(gallivm, end_coroutine),
3440                           NULL, LLVMIntEQ);
3441    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
3442 
3443    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "entry");
3444    LLVMPositionBuilderAtEnd(builder, block);
3445 
3446    context_ptr = LLVMGetParam(variant_coro, 0);
3447    input_array = LLVMGetParam(variant_coro, 1);
3448    output_array = LLVMGetParam(variant_coro, 2);
3449    prim_id = LLVMGetParam(variant_coro, 3);
3450    patch_vertices_in = LLVMGetParam(variant_coro, 4);
3451 
3452    consts_ptr = draw_tcs_jit_context_constants(variant->gallivm, context_ptr);
3453    num_consts_ptr =
3454       draw_tcs_jit_context_num_constants(variant->gallivm, context_ptr);
3455 
3456    ssbos_ptr = draw_tcs_jit_context_ssbos(variant->gallivm, context_ptr);
3457    num_ssbos_ptr =
3458       draw_tcs_jit_context_num_ssbos(variant->gallivm, context_ptr);
3459    sampler = draw_llvm_sampler_soa_create(variant->key.samplers, variant->key.nr_samplers);
3460    image = draw_llvm_image_soa_create(draw_tcs_llvm_variant_key_images(&variant->key),
3461                                       variant->key.nr_images);
3462 
3463    LLVMValueRef counter = LLVMGetParam(variant_coro, 5);
3464    LLVMValueRef invocvec = LLVMGetUndef(LLVMVectorType(int32_type, vector_length));
3465    for (i = 0; i < vector_length; i++) {
3466       LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), lp_build_const_int32(gallivm, i), "");
3467       invocvec = LLVMBuildInsertElement(builder, invocvec, idx, idx, "");
3468    }
3469 
3470    system_values.invocation_id = invocvec;
3471    system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
3472    system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
3473    tcs_iface.input = input_array;
3474    tcs_iface.output = output_array;
3475    tcs_iface.base.emit_fetch_input = draw_tcs_llvm_emit_fetch_input;
3476    tcs_iface.base.emit_fetch_output = draw_tcs_llvm_emit_fetch_output;
3477    tcs_iface.base.emit_store_output = draw_tcs_llvm_emit_store_output;
3478 
3479 
3480    {
3481       LLVMValueRef coro_id = lp_build_coro_id(gallivm);
3482       LLVMValueRef coro_hdl = lp_build_coro_begin_alloc_mem(gallivm, coro_id);
3483 
3484       mask_val = generate_tcs_mask_value(variant, tcs_type, count, LLVMBuildMul(builder, counter, step, ""));
3485       lp_build_mask_begin(&mask, gallivm, tcs_type, mask_val);
3486 
3487       struct lp_build_coro_suspend_info coro_info;
3488 
3489       LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "suspend");
3490       LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "cleanup");
3491 
3492       coro_info.suspend = sus_block;
3493       coro_info.cleanup = clean_block;
3494 
3495       struct lp_build_tgsi_params params;
3496       memset(&params, 0, sizeof(params));
3497 
3498       params.type = tcs_type;
3499       params.mask = &mask;
3500       params.consts_ptr = consts_ptr;
3501       params.const_sizes_ptr = num_consts_ptr;
3502       params.system_values = &system_values;
3503       params.context_ptr = context_ptr;
3504       params.sampler = sampler;
3505       params.info = &llvm->draw->tcs.tess_ctrl_shader->info;
3506       params.ssbo_ptr = ssbos_ptr;
3507       params.ssbo_sizes_ptr = num_ssbos_ptr;
3508       params.image = image;
3509       params.coro = &coro_info;
3510       params.tcs_iface = &tcs_iface.base;
3511 
3512       lp_build_nir_soa(variant->gallivm,
3513                        llvm->draw->tcs.tess_ctrl_shader->state.ir.nir,
3514                        &params, NULL);
3515 
3516       lp_build_mask_end(&mask);
3517 
3518       lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
3519       LLVMPositionBuilderAtEnd(builder, clean_block);
3520 
3521       lp_build_coro_free_mem(gallivm, coro_id, coro_hdl);
3522 
3523       LLVMBuildBr(builder, sus_block);
3524       LLVMPositionBuilderAtEnd(builder, sus_block);
3525 
3526       lp_build_coro_end(gallivm, coro_hdl);
3527       LLVMBuildRet(builder, coro_hdl);
3528    }
3529 
3530    sampler->destroy(sampler);
3531    image->destroy(image);
3532    gallivm_verify_function(gallivm, variant_func);
3533    gallivm_verify_function(gallivm, variant_coro);
3534 }
3535 
3536 struct draw_tcs_llvm_variant *
draw_tcs_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_tcs_llvm_variant_key * key)3537 draw_tcs_llvm_create_variant(struct draw_llvm *llvm,
3538                              unsigned num_outputs,
3539                              const struct draw_tcs_llvm_variant_key *key)
3540 {
3541    struct draw_tcs_llvm_variant *variant;
3542    struct llvm_tess_ctrl_shader *shader = llvm_tess_ctrl_shader(llvm->draw->tcs.tess_ctrl_shader);
3543    char module_name[64];
3544    unsigned char ir_sha1_cache_key[20];
3545    struct lp_cached_code cached = { 0 };
3546    bool needs_caching = false;
3547 
3548    variant = MALLOC(sizeof *variant +
3549                     shader->variant_key_size - sizeof variant->key);
3550    if (!variant)
3551       return NULL;
3552 
3553    variant->llvm = llvm;
3554    variant->shader = shader;
3555 
3556    snprintf(module_name, sizeof(module_name), "draw_llvm_tcs_variant%u",
3557             variant->shader->variants_cached);
3558 
3559    memcpy(&variant->key, key, shader->variant_key_size);
3560 
3561    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
3562       draw_get_ir_cache_key(shader->base.state.ir.nir,
3563                             key,
3564                             shader->variant_key_size,
3565                             num_outputs,
3566                             ir_sha1_cache_key);
3567 
3568       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
3569                                          &cached,
3570                                          ir_sha1_cache_key);
3571       if (!cached.data_size)
3572          needs_caching = true;
3573    }
3574 
3575    variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
3576 
3577    create_tcs_jit_types(variant);
3578 
3579    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3580       nir_print_shader(llvm->draw->tcs.tess_ctrl_shader->state.ir.nir, stderr);
3581       draw_tcs_llvm_dump_variant_key(&variant->key);
3582    }
3583 
3584    lp_build_coro_declare_malloc_hooks(variant->gallivm);
3585    draw_tcs_llvm_generate(llvm, variant);
3586 
3587    gallivm_compile_module(variant->gallivm);
3588 
3589    lp_build_coro_add_malloc_hooks(variant->gallivm);
3590    variant->jit_func = (draw_tcs_jit_func)
3591       gallivm_jit_function(variant->gallivm, variant->function);
3592 
3593    if (needs_caching)
3594       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
3595                                            &cached,
3596                                            ir_sha1_cache_key);
3597    gallivm_free_ir(variant->gallivm);
3598 
3599    variant->list_item_global.base = variant;
3600    variant->list_item_local.base = variant;
3601    /*variant->no = */shader->variants_created++;
3602    variant->list_item_global.base = variant;
3603 
3604    return variant;
3605 }
3606 
3607 void
draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant * variant)3608 draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant *variant)
3609 {
3610    struct draw_llvm *llvm = variant->llvm;
3611 
3612    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3613       debug_printf("Deleting TCS variant: %u tcs variants,\t%u total variants\n",
3614                     variant->shader->variants_cached, llvm->nr_tcs_variants);
3615    }
3616 
3617    gallivm_destroy(variant->gallivm);
3618 
3619    remove_from_list(&variant->list_item_local);
3620    variant->shader->variants_cached--;
3621    remove_from_list(&variant->list_item_global);
3622    llvm->nr_tcs_variants--;
3623    FREE(variant);
3624 }
3625 
3626 struct draw_tcs_llvm_variant_key *
draw_tcs_llvm_make_variant_key(struct draw_llvm * llvm,char * store)3627 draw_tcs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
3628 {
3629    unsigned i;
3630    struct draw_tcs_llvm_variant_key *key;
3631    struct draw_sampler_static_state *draw_sampler;
3632    struct draw_image_static_state *draw_image;
3633 
3634    key = (struct draw_tcs_llvm_variant_key *)store;
3635 
3636    memset(key, 0, offsetof(struct draw_tcs_llvm_variant_key, samplers[0]));
3637 
3638    /* All variants of this shader will have the same value for
3639     * nr_samplers.  Not yet trying to compact away holes in the
3640     * sampler array.
3641     */
3642    key->nr_samplers = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
3643    if (llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
3644       key->nr_sampler_views =
3645          llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
3646    }
3647    else {
3648       key->nr_sampler_views = key->nr_samplers;
3649    }
3650 
3651    key->nr_images = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
3652 
3653    draw_sampler = key->samplers;
3654 
3655    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3656 
3657    for (i = 0 ; i < key->nr_samplers; i++) {
3658       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3659                                       llvm->draw->samplers[PIPE_SHADER_TESS_CTRL][i]);
3660    }
3661    for (i = 0 ; i < key->nr_sampler_views; i++) {
3662       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3663                                       llvm->draw->sampler_views[PIPE_SHADER_TESS_CTRL][i]);
3664    }
3665 
3666    draw_image = draw_tcs_llvm_variant_key_images(key);
3667    memset(draw_image, 0,
3668           key->nr_images * sizeof *draw_image);
3669    for (i = 0; i < key->nr_images; i++) {
3670       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
3671                                             llvm->draw->images[PIPE_SHADER_TESS_CTRL][i]);
3672    }
3673    return key;
3674 }
3675 
3676 void
draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key * key)3677 draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key *key)
3678 {
3679    unsigned i;
3680    struct draw_sampler_static_state *sampler = key->samplers;
3681    struct draw_image_static_state *image = draw_tcs_llvm_variant_key_images(key);
3682    for (i = 0 ; i < key->nr_sampler_views; i++) {
3683       debug_printf("sampler[%i].src_format = %s\n", i,
3684                    util_format_name(sampler[i].texture_state.format));
3685    }
3686 
3687    for (i = 0 ; i < key->nr_images; i++)
3688       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
3689 
3690 }
3691 
3692 static void
create_tes_jit_types(struct draw_tes_llvm_variant * var)3693 create_tes_jit_types(struct draw_tes_llvm_variant *var)
3694 {
3695    struct gallivm_state *gallivm = var->gallivm;
3696    LLVMTypeRef texture_type, sampler_type, image_type, context_type;
3697 
3698    texture_type = create_jit_texture_type(gallivm, "texture");
3699    sampler_type = create_jit_sampler_type(gallivm, "sampler");
3700    image_type = create_jit_image_type(gallivm, "image");
3701 
3702    context_type = create_tes_jit_context_type(gallivm,
3703                                               0,
3704                                               texture_type, sampler_type,
3705                                               image_type,
3706                                               "draw_tes_jit_context");
3707    var->context_ptr_type = LLVMPointerType(context_type, 0);
3708 
3709    var->input_array_type = create_tes_jit_input_type(gallivm);
3710 }
3711 
3712 static LLVMTypeRef
get_tes_context_ptr_type(struct draw_tes_llvm_variant * variant)3713 get_tes_context_ptr_type(struct draw_tes_llvm_variant *variant)
3714 {
3715    if (!variant->context_ptr_type)
3716       create_tes_jit_types(variant);
3717    return variant->context_ptr_type;
3718 }
3719 
3720 static LLVMValueRef
generate_tes_mask_value(struct draw_tes_llvm_variant * variant,struct lp_type tes_type,LLVMValueRef limit,LLVMValueRef loop_counter)3721 generate_tes_mask_value(struct draw_tes_llvm_variant *variant,
3722                         struct lp_type tes_type, LLVMValueRef limit, LLVMValueRef loop_counter)
3723 {
3724    struct gallivm_state *gallivm = variant->gallivm;
3725    LLVMBuilderRef builder = gallivm->builder;
3726    struct lp_type mask_type = lp_int_type(tes_type);
3727    LLVMValueRef num_prims;
3728    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
3729    unsigned i;
3730 
3731    num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
3732    for (i = 0; i < tes_type.length; i++) {
3733       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3734       mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
3735    }
3736    mask_val = lp_build_compare(gallivm, mask_type,
3737                                PIPE_FUNC_GREATER, num_prims, mask_val);
3738 
3739    return mask_val;
3740 }
3741 
3742 static LLVMValueRef
draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface * tes_iface,struct lp_build_context * bld,boolean is_vindex_indirect,LLVMValueRef vertex_index,boolean is_aindex_indirect,LLVMValueRef attrib_index,boolean is_sindex_indirect,LLVMValueRef swizzle_index)3743 draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface *tes_iface,
3744                                  struct lp_build_context *bld,
3745                                  boolean is_vindex_indirect,
3746                                  LLVMValueRef vertex_index,
3747                                  boolean is_aindex_indirect,
3748                                  LLVMValueRef attrib_index,
3749                                  boolean is_sindex_indirect,
3750                                  LLVMValueRef swizzle_index)
3751 {
3752    const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
3753    struct gallivm_state *gallivm = bld->gallivm;
3754    LLVMBuilderRef builder = gallivm->builder;
3755    LLVMValueRef indices[3];
3756    LLVMValueRef res;
3757    struct lp_type type = bld->type;
3758 
3759    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
3760       int i;
3761 
3762       res = bld->zero;
3763 
3764       for (i = 0; i < type.length; ++i) {
3765          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3766          LLVMValueRef vert_chan_index = vertex_index;
3767          LLVMValueRef attr_chan_index = attrib_index;
3768          LLVMValueRef swiz_chan_index = swizzle_index;
3769          LLVMValueRef channel_vec;
3770 
3771          if (is_vindex_indirect) {
3772             vert_chan_index = LLVMBuildExtractElement(builder,
3773                                                       vertex_index, idx, "");
3774          }
3775          if (is_aindex_indirect) {
3776             attr_chan_index = LLVMBuildExtractElement(builder,
3777                                                       attrib_index, idx, "");
3778          }
3779          if (is_sindex_indirect) {
3780             swiz_chan_index = LLVMBuildExtractElement(builder,
3781                                                       swizzle_index, idx, "");
3782          }
3783 
3784          indices[0] = vert_chan_index;
3785          indices[1] = attr_chan_index;
3786          indices[2] = swiz_chan_index;
3787 
3788          channel_vec = LLVMBuildGEP(builder, tes->input, indices, 3, "");
3789          channel_vec = LLVMBuildLoad(builder, channel_vec, "");
3790 
3791          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3792       }
3793    } else {
3794       indices[0] = vertex_index;
3795       indices[1] = attrib_index;
3796       indices[2] = swizzle_index;
3797 
3798       res = LLVMBuildGEP(builder, tes->input, indices, 3, "");
3799       res = LLVMBuildLoad(builder, res, "");
3800       res = lp_build_broadcast_scalar(bld, res);
3801    }
3802    return res;
3803 }
3804 
3805 static LLVMValueRef
draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface * tes_iface,struct lp_build_context * bld,boolean is_aindex_indirect,LLVMValueRef attrib_index,LLVMValueRef swizzle_index)3806 draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
3807                                 struct lp_build_context *bld,
3808                                 boolean is_aindex_indirect,
3809                                 LLVMValueRef attrib_index,
3810                                 LLVMValueRef swizzle_index)
3811 {
3812    const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
3813    struct gallivm_state *gallivm = bld->gallivm;
3814    LLVMBuilderRef builder = gallivm->builder;
3815    LLVMValueRef indices[3];
3816    LLVMValueRef res;
3817    struct lp_type type = bld->type;
3818 
3819    if (is_aindex_indirect) {
3820       int i;
3821 
3822       res = bld->zero;
3823 
3824       for (i = 0; i < type.length; ++i) {
3825          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3826          LLVMValueRef attr_chan_index = attrib_index;
3827          LLVMValueRef channel_vec;
3828 
3829          if (is_aindex_indirect) {
3830             attr_chan_index = LLVMBuildExtractElement(builder,
3831                                                       attrib_index, idx, "");
3832          }
3833 
3834          indices[0] = lp_build_const_int32(gallivm, 0);
3835          indices[1] = attr_chan_index;
3836          indices[2] = swizzle_index;
3837 
3838          channel_vec = LLVMBuildGEP(builder, tes->input, indices, 3, "");
3839          channel_vec = LLVMBuildLoad(builder, channel_vec, "");
3840 
3841          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3842       }
3843    } else {
3844       indices[0] = lp_build_const_int32(gallivm, 0);
3845       indices[1] = attrib_index;
3846       indices[2] = swizzle_index;
3847 
3848       res = LLVMBuildGEP(builder, tes->input, indices, 3, "");
3849       res = LLVMBuildLoad(builder, res, "");
3850       res = lp_build_broadcast_scalar(bld, res);
3851    }
3852    return res;
3853 }
3854 
3855 static void
draw_tes_llvm_generate(struct draw_llvm * llvm,struct draw_tes_llvm_variant * variant)3856 draw_tes_llvm_generate(struct draw_llvm *llvm,
3857                        struct draw_tes_llvm_variant *variant)
3858 {
3859    struct gallivm_state *gallivm = variant->gallivm;
3860    LLVMContextRef context = gallivm->context;
3861    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
3862    LLVMTypeRef flt_type = LLVMFloatTypeInContext(context);
3863    LLVMTypeRef arg_types[10];
3864    LLVMTypeRef func_type;
3865    LLVMValueRef variant_func;
3866    LLVMValueRef context_ptr;
3867    LLVMValueRef tess_coord[2], io_ptr, input_array, num_tess_coord;
3868    LLVMValueRef tess_inner, tess_outer, prim_id, patch_vertices_in;
3869    LLVMBasicBlockRef block;
3870    LLVMBuilderRef builder;
3871    LLVMValueRef mask_val;
3872    struct lp_build_context bld, bldvec;
3873    struct lp_build_sampler_soa *sampler = 0;
3874    struct lp_build_image_soa *image = NULL;
3875    struct lp_bld_tgsi_system_values system_values;
3876    char func_name[64];
3877    unsigned i;
3878    struct draw_tes_llvm_iface tes_iface;
3879    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
3880    struct lp_build_mask_context mask;
3881    LLVMValueRef consts_ptr, num_consts_ptr;
3882    LLVMValueRef ssbos_ptr, num_ssbos_ptr;
3883    LLVMValueRef step;
3884    struct lp_type tes_type;
3885    unsigned vector_length = variant->shader->base.vector_length;
3886 
3887    memset(&system_values, 0, sizeof(system_values));
3888    memset(&outputs, 0, sizeof(outputs));
3889 
3890    snprintf(func_name, sizeof(func_name), "draw_llvm_tes_variant");
3891 
3892    arg_types[0] = get_tes_context_ptr_type(variant);    /* context */
3893    arg_types[1] = variant->input_array_type;           /* input */
3894    arg_types[2] = variant->vertex_header_ptr_type;
3895    arg_types[3] = int32_type;
3896    arg_types[4] = int32_type;
3897    arg_types[5] = LLVMPointerType(flt_type, 0);
3898    arg_types[6] = LLVMPointerType(flt_type, 0);
3899    arg_types[7] = LLVMPointerType(LLVMArrayType(flt_type, 4), 0);
3900    arg_types[8] = LLVMPointerType(LLVMArrayType(flt_type, 2), 0);
3901    arg_types[9] = int32_type;
3902 
3903    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
3904    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
3905 
3906    variant->function = variant_func;
3907    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
3908 
3909    for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
3910       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
3911          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
3912 
3913    if (gallivm->cache && gallivm->cache->data_size)
3914       return;
3915    context_ptr               = LLVMGetParam(variant_func, 0);
3916    input_array               = LLVMGetParam(variant_func, 1);
3917    io_ptr                    = LLVMGetParam(variant_func, 2);
3918    prim_id                   = LLVMGetParam(variant_func, 3);
3919    num_tess_coord            = LLVMGetParam(variant_func, 4);
3920    tess_coord[0]             = LLVMGetParam(variant_func, 5);
3921    tess_coord[1]             = LLVMGetParam(variant_func, 6);
3922    tess_outer                = LLVMGetParam(variant_func, 7);
3923    tess_inner                = LLVMGetParam(variant_func, 8);
3924    patch_vertices_in         = LLVMGetParam(variant_func, 9);
3925 
3926    lp_build_name(context_ptr, "context");
3927    lp_build_name(input_array, "input");
3928    lp_build_name(io_ptr, "io");
3929    lp_build_name(prim_id, "prim_id");
3930    lp_build_name(num_tess_coord, "num_tess_coord");
3931    lp_build_name(tess_coord[0], "tess_coord[0]");
3932    lp_build_name(tess_coord[1], "tess_coord[1]");
3933    lp_build_name(tess_outer, "tess_outer");
3934    lp_build_name(tess_inner, "tess_inner");
3935    lp_build_name(patch_vertices_in, "patch_vertices_in");
3936 
3937    tes_iface.base.fetch_vertex_input = draw_tes_llvm_fetch_vertex_input;
3938    tes_iface.base.fetch_patch_input = draw_tes_llvm_fetch_patch_input;
3939    tes_iface.input = input_array;
3940    tes_iface.variant = variant;
3941 
3942    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
3943    builder = gallivm->builder;
3944    LLVMPositionBuilderAtEnd(builder, block);
3945 
3946    lp_build_context_init(&bld, gallivm, lp_type_int(32));
3947 
3948    memset(&tes_type, 0, sizeof tes_type);
3949    tes_type.floating = TRUE; /* floating point values */
3950    tes_type.sign = TRUE;     /* values are signed */
3951    tes_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
3952    tes_type.width = 32;      /* 32-bit float */
3953    tes_type.length = vector_length;
3954 
3955    lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tes_type));
3956    consts_ptr = draw_tes_jit_context_constants(variant->gallivm, context_ptr);
3957    num_consts_ptr =
3958       draw_tes_jit_context_num_constants(variant->gallivm, context_ptr);
3959 
3960    ssbos_ptr = draw_tes_jit_context_ssbos(variant->gallivm, context_ptr);
3961    num_ssbos_ptr =
3962       draw_tes_jit_context_num_ssbos(variant->gallivm, context_ptr);
3963    sampler = draw_llvm_sampler_soa_create(variant->key.samplers, variant->key.nr_samplers);
3964    image = draw_llvm_image_soa_create(draw_tes_llvm_variant_key_images(&variant->key),
3965                                       variant->key.nr_images);
3966    step = lp_build_const_int32(gallivm, vector_length);
3967 
3968    system_values.tess_outer = LLVMBuildLoad(builder, tess_outer, "");
3969    system_values.tess_inner = LLVMBuildLoad(builder, tess_inner, "");
3970 
3971    system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
3972 
3973    system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
3974    struct lp_build_loop_state lp_loop;
3975    lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
3976    {
3977       LLVMValueRef io;
3978 
3979       io = LLVMBuildGEP(builder, io_ptr, &lp_loop.counter, 1, "");
3980       mask_val = generate_tes_mask_value(variant, tes_type, num_tess_coord, lp_loop.counter);
3981       lp_build_mask_begin(&mask, gallivm, tes_type, mask_val);
3982 
3983       system_values.tess_coord = LLVMGetUndef(LLVMArrayType(LLVMVectorType(flt_type, vector_length), 3));
3984       for (i = 0; i < 3; i++) {
3985          LLVMValueRef tess_coord_chan = LLVMGetUndef(LLVMVectorType(flt_type, vector_length));
3986          for (unsigned j = 0; j < vector_length; j++) {
3987             LLVMValueRef idx = LLVMBuildAdd(builder, lp_loop.counter, lp_build_const_int32(gallivm, j), "");
3988             LLVMValueRef tc_val;
3989             if (i == 2) {
3990                if (variant->shader->base.prim_mode == PIPE_PRIM_TRIANGLES) {
3991                   tc_val = lp_build_const_float(gallivm, 1.0);
3992                   tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get(builder, tess_coord[0], idx), "");
3993                   tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get(builder, tess_coord[1], idx), "");
3994                } else
3995                   tc_val = lp_build_const_float(gallivm, 0.0);
3996             } else
3997                tc_val = lp_build_pointer_get(builder, tess_coord[i], idx);
3998 
3999             tess_coord_chan = LLVMBuildInsertElement(builder, tess_coord_chan, tc_val, lp_build_const_int32(gallivm, j), "");
4000          }
4001          system_values.tess_coord = LLVMBuildInsertValue(builder, system_values.tess_coord, tess_coord_chan, i, "");
4002       }
4003 
4004       struct lp_build_tgsi_params params;
4005       memset(&params, 0, sizeof(params));
4006 
4007       params.type = tes_type;
4008       params.mask = &mask;
4009       params.consts_ptr = consts_ptr;
4010       params.const_sizes_ptr = num_consts_ptr;
4011       params.system_values = &system_values;
4012       params.context_ptr = context_ptr;
4013       params.sampler = sampler;
4014       params.info = &llvm->draw->tes.tess_eval_shader->info;
4015       params.ssbo_ptr = ssbos_ptr;
4016       params.ssbo_sizes_ptr = num_ssbos_ptr;
4017       params.image = image;
4018       params.tes_iface = &tes_iface.base;
4019 
4020       lp_build_nir_soa(variant->gallivm,
4021                        llvm->draw->tes.tess_eval_shader->state.ir.nir,
4022                        &params,
4023                        outputs);
4024 
4025       lp_build_mask_end(&mask);
4026       LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
4027                                                      lp_int_type(tes_type), 0);
4028 
4029       convert_to_aos(gallivm, io, NULL, outputs, clipmask,
4030                      params.info->num_outputs, tes_type, FALSE);
4031    }
4032    lp_build_loop_end_cond(&lp_loop, num_tess_coord, step, LLVMIntUGE);
4033    sampler->destroy(sampler);
4034    image->destroy(image);
4035 
4036    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
4037    gallivm_verify_function(gallivm, variant_func);
4038 }
4039 
4040 struct draw_tes_llvm_variant *
draw_tes_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_tes_llvm_variant_key * key)4041 draw_tes_llvm_create_variant(struct draw_llvm *llvm,
4042                              unsigned num_outputs,
4043                              const struct draw_tes_llvm_variant_key *key)
4044 {
4045    struct draw_tes_llvm_variant *variant;
4046    struct llvm_tess_eval_shader *shader = llvm_tess_eval_shader(llvm->draw->tes.tess_eval_shader);
4047    LLVMTypeRef vertex_header;
4048    char module_name[64];
4049    unsigned char ir_sha1_cache_key[20];
4050    struct lp_cached_code cached = { 0 };
4051    bool needs_caching = false;
4052 
4053    variant = MALLOC(sizeof *variant +
4054                     shader->variant_key_size - sizeof variant->key);
4055    if (!variant)
4056       return NULL;
4057 
4058    variant->llvm = llvm;
4059    variant->shader = shader;
4060 
4061    snprintf(module_name, sizeof(module_name), "draw_llvm_tes_variant%u",
4062             variant->shader->variants_cached);
4063 
4064    memcpy(&variant->key, key, shader->variant_key_size);
4065    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
4066       draw_get_ir_cache_key(shader->base.state.ir.nir,
4067                             key,
4068                             shader->variant_key_size,
4069                             num_outputs,
4070                             ir_sha1_cache_key);
4071 
4072       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
4073                                          &cached,
4074                                          ir_sha1_cache_key);
4075       if (!cached.data_size)
4076          needs_caching = true;
4077    }
4078    variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
4079 
4080    create_tes_jit_types(variant);
4081 
4082    vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs);
4083 
4084    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
4085 
4086    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
4087       nir_print_shader(llvm->draw->tes.tess_eval_shader->state.ir.nir, stderr);
4088       draw_tes_llvm_dump_variant_key(&variant->key);
4089    }
4090 
4091    draw_tes_llvm_generate(llvm, variant);
4092 
4093    gallivm_compile_module(variant->gallivm);
4094 
4095    variant->jit_func = (draw_tes_jit_func)
4096       gallivm_jit_function(variant->gallivm, variant->function);
4097 
4098    if (needs_caching)
4099       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
4100                                            &cached,
4101                                            ir_sha1_cache_key);
4102    gallivm_free_ir(variant->gallivm);
4103 
4104    variant->list_item_global.base = variant;
4105    variant->list_item_local.base = variant;
4106    /*variant->no = */shader->variants_created++;
4107    variant->list_item_global.base = variant;
4108 
4109    return variant;
4110 }
4111 
4112 void
draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant * variant)4113 draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant *variant)
4114 {
4115    struct draw_llvm *llvm = variant->llvm;
4116 
4117    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
4118       debug_printf("Deleting TES variant: %u tes variants,\t%u total variants\n",
4119                     variant->shader->variants_cached, llvm->nr_tes_variants);
4120    }
4121 
4122    gallivm_destroy(variant->gallivm);
4123 
4124    remove_from_list(&variant->list_item_local);
4125    variant->shader->variants_cached--;
4126    remove_from_list(&variant->list_item_global);
4127    llvm->nr_tes_variants--;
4128    FREE(variant);
4129 }
4130 
4131 struct draw_tes_llvm_variant_key *
draw_tes_llvm_make_variant_key(struct draw_llvm * llvm,char * store)4132 draw_tes_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
4133 {
4134    unsigned i;
4135    struct draw_tes_llvm_variant_key *key;
4136    struct draw_sampler_static_state *draw_sampler;
4137    struct draw_image_static_state *draw_image;
4138 
4139    key = (struct draw_tes_llvm_variant_key *)store;
4140 
4141    memset(key, 0, offsetof(struct draw_tes_llvm_variant_key, samplers[0]));
4142 
4143    /* All variants of this shader will have the same value for
4144     * nr_samplers.  Not yet trying to compact away holes in the
4145     * sampler array.
4146     */
4147    key->nr_samplers = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
4148    if (llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
4149       key->nr_sampler_views =
4150          llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
4151    }
4152    else {
4153       key->nr_sampler_views = key->nr_samplers;
4154    }
4155 
4156    key->nr_images = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
4157 
4158    draw_sampler = key->samplers;
4159 
4160    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
4161 
4162    for (i = 0 ; i < key->nr_samplers; i++) {
4163       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
4164                                       llvm->draw->samplers[PIPE_SHADER_TESS_EVAL][i]);
4165    }
4166    for (i = 0 ; i < key->nr_sampler_views; i++) {
4167       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
4168                                       llvm->draw->sampler_views[PIPE_SHADER_TESS_EVAL][i]);
4169    }
4170 
4171    draw_image = draw_tes_llvm_variant_key_images(key);
4172    memset(draw_image, 0,
4173           key->nr_images * sizeof *draw_image);
4174    for (i = 0; i < key->nr_images; i++) {
4175       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
4176                                             llvm->draw->images[PIPE_SHADER_TESS_EVAL][i]);
4177    }
4178    return key;
4179 }
4180 
4181 void
draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key * key)4182 draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key *key)
4183 {
4184    unsigned i;
4185    struct draw_sampler_static_state *sampler = key->samplers;
4186    struct draw_image_static_state *image = draw_tes_llvm_variant_key_images(key);
4187    for (i = 0 ; i < key->nr_sampler_views; i++) {
4188       debug_printf("sampler[%i].src_format = %s\n", i,
4189                    util_format_name(sampler[i].texture_state.format));
4190    }
4191 
4192    for (i = 0 ; i < key->nr_images; i++)
4193       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
4194 
4195 }
4196