1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * @file
31  * Code generate the whole fragment pipeline.
32  *
33  * The fragment pipeline consists of the following stages:
34  * - early depth test
35  * - fragment shader
36  * - alpha test
37  * - depth/stencil test
38  * - blending
39  *
40  * This file has only the glue to assemble the fragment pipeline.  The actual
41  * plumbing of converting Gallium state into LLVM IR is done elsewhere, in the
42  * lp_bld_*.[ch] files, and in a complete generic and reusable way. Here we
43  * muster the LLVM JIT execution engine to create a function that follows an
44  * established binary interface and that can be called from C directly.
45  *
46  * A big source of complexity here is that we often want to run different
47  * stages with different precisions and data types and precisions. For example,
48  * the fragment shader needs typically to be done in floats, but the
49  * depth/stencil test and blending is better done in the type that most closely
50  * matches the depth/stencil and color buffer respectively.
51  *
52  * Since the width of a SIMD vector register stays the same regardless of the
53  * element type, different types imply different number of elements, so we must
54  * code generate more instances of the stages with larger types to be able to
55  * feed/consume the stages with smaller types.
56  *
57  * @author Jose Fonseca <jfonseca@vmware.com>
58  */
59 
60 #include <limits.h>
61 #include "pipe/p_defines.h"
62 #include "util/u_inlines.h"
63 #include "util/u_memory.h"
64 #include "util/u_pointer.h"
65 #include "util/u_format.h"
66 #include "util/u_dump.h"
67 #include "util/u_string.h"
68 #include "util/u_simple_list.h"
69 #include "os/os_time.h"
70 #include "pipe/p_shader_tokens.h"
71 #include "draw/draw_context.h"
72 #include "tgsi/tgsi_dump.h"
73 #include "tgsi/tgsi_scan.h"
74 #include "tgsi/tgsi_parse.h"
75 #include "gallivm/lp_bld_type.h"
76 #include "gallivm/lp_bld_const.h"
77 #include "gallivm/lp_bld_conv.h"
78 #include "gallivm/lp_bld_init.h"
79 #include "gallivm/lp_bld_intr.h"
80 #include "gallivm/lp_bld_logic.h"
81 #include "gallivm/lp_bld_tgsi.h"
82 #include "gallivm/lp_bld_swizzle.h"
83 #include "gallivm/lp_bld_flow.h"
84 #include "gallivm/lp_bld_debug.h"
85 
86 #include "lp_bld_alpha.h"
87 #include "lp_bld_blend.h"
88 #include "lp_bld_depth.h"
89 #include "lp_bld_interp.h"
90 #include "lp_context.h"
91 #include "lp_debug.h"
92 #include "lp_perf.h"
93 #include "lp_setup.h"
94 #include "lp_state.h"
95 #include "lp_tex_sample.h"
96 #include "lp_flush.h"
97 #include "lp_state_fs.h"
98 
99 
100 /** Fragment shader number (for debugging) */
101 static unsigned fs_no = 0;
102 
103 
104 /**
105  * Expand the relevant bits of mask_input to a n*4-dword mask for the
106  * n*four pixels in n 2x2 quads.  This will set the n*four elements of the
107  * quad mask vector to 0 or ~0.
108  * Grouping is 01, 23 for 2 quad mode hence only 0 and 2 are valid
109  * quad arguments with fs length 8.
110  *
111  * \param first_quad  which quad(s) of the quad group to test, in [0,3]
112  * \param mask_input  bitwise mask for the whole 4x4 stamp
113  */
114 static LLVMValueRef
generate_quad_mask(struct gallivm_state * gallivm,struct lp_type fs_type,unsigned first_quad,LLVMValueRef mask_input)115 generate_quad_mask(struct gallivm_state *gallivm,
116                    struct lp_type fs_type,
117                    unsigned first_quad,
118                    LLVMValueRef mask_input) /* int32 */
119 {
120    LLVMBuilderRef builder = gallivm->builder;
121    struct lp_type mask_type;
122    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
123    LLVMValueRef bits[16];
124    LLVMValueRef mask;
125    int shift, i;
126 
127    /*
128     * XXX: We'll need a different path for 16 x u8
129     */
130    assert(fs_type.width == 32);
131    assert(fs_type.length <= Elements(bits));
132    mask_type = lp_int_type(fs_type);
133 
134    /*
135     * mask_input >>= (quad * 4)
136     */
137    switch (first_quad) {
138    case 0:
139       shift = 0;
140       break;
141    case 1:
142       assert(fs_type.length == 4);
143       shift = 2;
144       break;
145    case 2:
146       shift = 8;
147       break;
148    case 3:
149       assert(fs_type.length == 4);
150       shift = 10;
151       break;
152    default:
153       assert(0);
154       shift = 0;
155    }
156 
157    mask_input = LLVMBuildLShr(builder,
158                               mask_input,
159                               LLVMConstInt(i32t, shift, 0),
160                               "");
161 
162    /*
163     * mask = { mask_input & (1 << i), for i in [0,3] }
164     */
165    mask = lp_build_broadcast(gallivm,
166                              lp_build_vec_type(gallivm, mask_type),
167                              mask_input);
168 
169    for (i = 0; i < fs_type.length / 4; i++) {
170       unsigned j = 2 * (i % 2) + (i / 2) * 8;
171       bits[4*i + 0] = LLVMConstInt(i32t, 1 << (j + 0), 0);
172       bits[4*i + 1] = LLVMConstInt(i32t, 1 << (j + 1), 0);
173       bits[4*i + 2] = LLVMConstInt(i32t, 1 << (j + 4), 0);
174       bits[4*i + 3] = LLVMConstInt(i32t, 1 << (j + 5), 0);
175    }
176    mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, fs_type.length), "");
177 
178    /*
179     * mask = mask != 0 ? ~0 : 0
180     */
181    mask = lp_build_compare(gallivm,
182                            mask_type, PIPE_FUNC_NOTEQUAL,
183                            mask,
184                            lp_build_const_int_vec(gallivm, mask_type, 0));
185 
186    return mask;
187 }
188 
189 
190 #define EARLY_DEPTH_TEST  0x1
191 #define LATE_DEPTH_TEST   0x2
192 #define EARLY_DEPTH_WRITE 0x4
193 #define LATE_DEPTH_WRITE  0x8
194 
195 static int
find_output_by_semantic(const struct tgsi_shader_info * info,unsigned semantic,unsigned index)196 find_output_by_semantic( const struct tgsi_shader_info *info,
197 			 unsigned semantic,
198 			 unsigned index )
199 {
200    int i;
201 
202    for (i = 0; i < info->num_outputs; i++)
203       if (info->output_semantic_name[i] == semantic &&
204 	  info->output_semantic_index[i] == index)
205 	 return i;
206 
207    return -1;
208 }
209 
210 
211 /**
212  * Generate the fragment shader, depth/stencil test, and alpha tests.
213  * \param i  which quad in the tile, in range [0,3]
214  * \param partial_mask  if 1, do mask_input testing
215  */
216 static void
generate_fs(struct gallivm_state * gallivm,struct lp_fragment_shader * shader,const struct lp_fragment_shader_variant_key * key,LLVMBuilderRef builder,struct lp_type type,LLVMValueRef context_ptr,unsigned i,struct lp_build_interp_soa_context * interp,struct lp_build_sampler_soa * sampler,LLVMValueRef * pmask,LLVMValueRef (* color)[4],LLVMValueRef depth_ptr,LLVMValueRef facing,unsigned partial_mask,LLVMValueRef mask_input,LLVMValueRef counter)217 generate_fs(struct gallivm_state *gallivm,
218             struct lp_fragment_shader *shader,
219             const struct lp_fragment_shader_variant_key *key,
220             LLVMBuilderRef builder,
221             struct lp_type type,
222             LLVMValueRef context_ptr,
223             unsigned i,
224             struct lp_build_interp_soa_context *interp,
225             struct lp_build_sampler_soa *sampler,
226             LLVMValueRef *pmask,
227             LLVMValueRef (*color)[4],
228             LLVMValueRef depth_ptr,
229             LLVMValueRef facing,
230             unsigned partial_mask,
231             LLVMValueRef mask_input,
232             LLVMValueRef counter)
233 {
234    const struct util_format_description *zs_format_desc = NULL;
235    const struct tgsi_token *tokens = shader->base.tokens;
236    LLVMTypeRef vec_type;
237    LLVMValueRef consts_ptr;
238    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
239    LLVMValueRef z;
240    LLVMValueRef zs_value = NULL;
241    LLVMValueRef stencil_refs[2];
242    struct lp_build_mask_context mask;
243    boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 &&
244                             shader->info.base.num_inputs < 3 &&
245                             shader->info.base.num_instructions < 8);
246    unsigned attrib;
247    unsigned chan;
248    unsigned cbuf;
249    unsigned depth_mode;
250    struct lp_bld_tgsi_system_values system_values;
251 
252    memset(&system_values, 0, sizeof(system_values));
253 
254    if (key->depth.enabled ||
255        key->stencil[0].enabled ||
256        key->stencil[1].enabled) {
257 
258       zs_format_desc = util_format_description(key->zsbuf_format);
259       assert(zs_format_desc);
260 
261       if (!shader->info.base.writes_z) {
262          if (key->alpha.enabled || shader->info.base.uses_kill)
263             /* With alpha test and kill, can do the depth test early
264              * and hopefully eliminate some quads.  But need to do a
265              * special deferred depth write once the final mask value
266              * is known.
267              */
268             depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
269          else
270             depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
271       }
272       else {
273          depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
274       }
275 
276       if (!(key->depth.enabled && key->depth.writemask) &&
277           !(key->stencil[0].enabled && key->stencil[0].writemask))
278          depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
279    }
280    else {
281       depth_mode = 0;
282    }
283 
284    assert(i < 4);
285 
286    stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr);
287    stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr);
288 
289    vec_type = lp_build_vec_type(gallivm, type);
290 
291    consts_ptr = lp_jit_context_constants(gallivm, context_ptr);
292 
293    memset(outputs, 0, sizeof outputs);
294 
295    /* Declare the color and z variables */
296    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
297       for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
298          color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color");
299       }
300    }
301 
302    /* do triangle edge testing */
303    if (partial_mask) {
304       *pmask = generate_quad_mask(gallivm, type,
305                                   i*type.length/4, mask_input);
306    }
307    else {
308       *pmask = lp_build_const_int_vec(gallivm, type, ~0);
309    }
310 
311    /* 'mask' will control execution based on quad's pixel alive/killed state */
312    lp_build_mask_begin(&mask, gallivm, type, *pmask);
313 
314    if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
315       lp_build_mask_check(&mask);
316 
317    lp_build_interp_soa_update_pos(interp, gallivm, i*type.length/4);
318    z = interp->pos[2];
319 
320    if (depth_mode & EARLY_DEPTH_TEST) {
321       lp_build_depth_stencil_test(gallivm,
322                                   &key->depth,
323                                   key->stencil,
324                                   type,
325                                   zs_format_desc,
326                                   &mask,
327                                   stencil_refs,
328                                   z,
329                                   depth_ptr, facing,
330                                   &zs_value,
331                                   !simple_shader);
332 
333       if (depth_mode & EARLY_DEPTH_WRITE) {
334          lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value);
335       }
336    }
337 
338    lp_build_interp_soa_update_inputs(interp, gallivm, i*type.length/4);
339 
340    /* Build the actual shader */
341    lp_build_tgsi_soa(gallivm, tokens, type, &mask,
342                      consts_ptr, &system_values,
343                      interp->pos, interp->inputs,
344                      outputs, sampler, &shader->info.base);
345 
346    /* Alpha test */
347    if (key->alpha.enabled) {
348       int color0 = find_output_by_semantic(&shader->info.base,
349                                            TGSI_SEMANTIC_COLOR,
350                                            0);
351 
352       if (color0 != -1 && outputs[color0][3]) {
353          const struct util_format_description *cbuf_format_desc;
354          LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
355          LLVMValueRef alpha_ref_value;
356 
357          alpha_ref_value = lp_jit_context_alpha_ref_value(gallivm, context_ptr);
358          alpha_ref_value = lp_build_broadcast(gallivm, vec_type, alpha_ref_value);
359 
360          cbuf_format_desc = util_format_description(key->cbuf_format[0]);
361 
362          lp_build_alpha_test(gallivm, key->alpha.func, type, cbuf_format_desc,
363                              &mask, alpha, alpha_ref_value,
364                              (depth_mode & LATE_DEPTH_TEST) != 0);
365       }
366    }
367 
368    /* Late Z test */
369    if (depth_mode & LATE_DEPTH_TEST) {
370       int pos0 = find_output_by_semantic(&shader->info.base,
371                                          TGSI_SEMANTIC_POSITION,
372                                          0);
373 
374       if (pos0 != -1 && outputs[pos0][2]) {
375          z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
376       }
377 
378       lp_build_depth_stencil_test(gallivm,
379                                   &key->depth,
380                                   key->stencil,
381                                   type,
382                                   zs_format_desc,
383                                   &mask,
384                                   stencil_refs,
385                                   z,
386                                   depth_ptr, facing,
387                                   &zs_value,
388                                   !simple_shader);
389       /* Late Z write */
390       if (depth_mode & LATE_DEPTH_WRITE) {
391          lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value);
392       }
393    }
394    else if ((depth_mode & EARLY_DEPTH_TEST) &&
395             (depth_mode & LATE_DEPTH_WRITE))
396    {
397       /* Need to apply a reduced mask to the depth write.  Reload the
398        * depth value, update from zs_value with the new mask value and
399        * write that out.
400        */
401       lp_build_deferred_depth_write(gallivm,
402                                     type,
403                                     zs_format_desc,
404                                     &mask,
405                                     depth_ptr,
406                                     zs_value);
407    }
408 
409 
410    /* Color write  */
411    for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
412    {
413       if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR &&
414           shader->info.base.output_semantic_index[attrib] < key->nr_cbufs)
415       {
416          unsigned cbuf = shader->info.base.output_semantic_index[attrib];
417          for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
418             if(outputs[attrib][chan]) {
419                /* XXX: just initialize outputs to point at colors[] and
420                 * skip this.
421                 */
422                LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
423                lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
424                LLVMBuildStore(builder, out, color[cbuf][chan]);
425             }
426          }
427       }
428    }
429 
430    if (counter)
431       lp_build_occlusion_count(gallivm, type,
432                                lp_build_mask_value(&mask), counter);
433 
434    *pmask = lp_build_mask_end(&mask);
435 }
436 
437 
438 /**
439  * Generate the fragment shader, depth/stencil test, and alpha tests.
440  */
441 static void
generate_fs_loop(struct gallivm_state * gallivm,struct lp_fragment_shader * shader,const struct lp_fragment_shader_variant_key * key,LLVMBuilderRef builder,struct lp_type type,LLVMValueRef context_ptr,LLVMValueRef num_loop,struct lp_build_interp_soa_context * interp,struct lp_build_sampler_soa * sampler,LLVMValueRef mask_store,LLVMValueRef (* out_color)[4],LLVMValueRef depth_ptr,unsigned depth_bits,LLVMValueRef facing,LLVMValueRef counter)442 generate_fs_loop(struct gallivm_state *gallivm,
443                  struct lp_fragment_shader *shader,
444                  const struct lp_fragment_shader_variant_key *key,
445                  LLVMBuilderRef builder,
446                  struct lp_type type,
447                  LLVMValueRef context_ptr,
448                  LLVMValueRef num_loop,
449                  struct lp_build_interp_soa_context *interp,
450                  struct lp_build_sampler_soa *sampler,
451                  LLVMValueRef mask_store,
452                  LLVMValueRef (*out_color)[4],
453                  LLVMValueRef depth_ptr,
454                  unsigned depth_bits,
455                  LLVMValueRef facing,
456                  LLVMValueRef counter)
457 {
458    const struct util_format_description *zs_format_desc = NULL;
459    const struct tgsi_token *tokens = shader->base.tokens;
460    LLVMTypeRef vec_type;
461    LLVMValueRef mask_ptr, mask_val;
462    LLVMValueRef consts_ptr;
463    LLVMValueRef z;
464    LLVMValueRef zs_value = NULL;
465    LLVMValueRef stencil_refs[2];
466    LLVMValueRef depth_ptr_i;
467    LLVMValueRef depth_offset;
468    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
469    struct lp_build_for_loop_state loop_state;
470    struct lp_build_mask_context mask;
471    boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 &&
472                             shader->info.base.num_inputs < 3 &&
473                             shader->info.base.num_instructions < 8);
474    unsigned attrib;
475    unsigned chan;
476    unsigned cbuf;
477    unsigned depth_mode;
478 
479    struct lp_bld_tgsi_system_values system_values;
480 
481    memset(&system_values, 0, sizeof(system_values));
482 
483    if (key->depth.enabled ||
484        key->stencil[0].enabled ||
485        key->stencil[1].enabled) {
486 
487       zs_format_desc = util_format_description(key->zsbuf_format);
488       assert(zs_format_desc);
489 
490       if (!shader->info.base.writes_z) {
491          if (key->alpha.enabled || shader->info.base.uses_kill)
492             /* With alpha test and kill, can do the depth test early
493              * and hopefully eliminate some quads.  But need to do a
494              * special deferred depth write once the final mask value
495              * is known.
496              */
497             depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
498          else
499             depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
500       }
501       else {
502          depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
503       }
504 
505       if (!(key->depth.enabled && key->depth.writemask) &&
506           !(key->stencil[0].enabled && key->stencil[0].writemask))
507          depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
508    }
509    else {
510       depth_mode = 0;
511    }
512 
513 
514    stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr);
515    stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr);
516 
517    vec_type = lp_build_vec_type(gallivm, type);
518 
519    consts_ptr = lp_jit_context_constants(gallivm, context_ptr);
520 
521    lp_build_for_loop_begin(&loop_state, gallivm,
522                            lp_build_const_int32(gallivm, 0),
523                            LLVMIntULT,
524                            num_loop,
525                            lp_build_const_int32(gallivm, 1));
526 
527    mask_ptr = LLVMBuildGEP(builder, mask_store,
528                            &loop_state.counter, 1, "mask_ptr");
529    mask_val = LLVMBuildLoad(builder, mask_ptr, "");
530 
531    depth_offset = LLVMBuildMul(builder, loop_state.counter,
532                                lp_build_const_int32(gallivm, depth_bits * type.length),
533                                "");
534 
535    depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
536 
537    memset(outputs, 0, sizeof outputs);
538 
539    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
540       for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
541          out_color[cbuf][chan] = lp_build_array_alloca(gallivm,
542                                                        lp_build_vec_type(gallivm,
543                                                                          type),
544                                                        num_loop, "color");
545       }
546    }
547 
548 
549 
550    /* 'mask' will control execution based on quad's pixel alive/killed state */
551    lp_build_mask_begin(&mask, gallivm, type, mask_val);
552 
553    if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
554       lp_build_mask_check(&mask);
555 
556    lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter);
557    z = interp->pos[2];
558 
559    if (depth_mode & EARLY_DEPTH_TEST) {
560       lp_build_depth_stencil_test(gallivm,
561                                   &key->depth,
562                                   key->stencil,
563                                   type,
564                                   zs_format_desc,
565                                   &mask,
566                                   stencil_refs,
567                                   z,
568                                   depth_ptr_i, facing,
569                                   &zs_value,
570                                   !simple_shader);
571 
572       if (depth_mode & EARLY_DEPTH_WRITE) {
573          lp_build_depth_write(builder, zs_format_desc, depth_ptr_i, zs_value);
574       }
575    }
576 
577    lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter);
578 
579    /* Build the actual shader */
580    lp_build_tgsi_soa(gallivm, tokens, type, &mask,
581                      consts_ptr, &system_values,
582                      interp->pos, interp->inputs,
583                      outputs, sampler, &shader->info.base);
584 
585    /* Alpha test */
586    if (key->alpha.enabled) {
587       int color0 = find_output_by_semantic(&shader->info.base,
588                                            TGSI_SEMANTIC_COLOR,
589                                            0);
590 
591       if (color0 != -1 && outputs[color0][3]) {
592          const struct util_format_description *cbuf_format_desc;
593          LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
594          LLVMValueRef alpha_ref_value;
595 
596          alpha_ref_value = lp_jit_context_alpha_ref_value(gallivm, context_ptr);
597          alpha_ref_value = lp_build_broadcast(gallivm, vec_type, alpha_ref_value);
598 
599          cbuf_format_desc = util_format_description(key->cbuf_format[0]);
600 
601          lp_build_alpha_test(gallivm, key->alpha.func, type, cbuf_format_desc,
602                              &mask, alpha, alpha_ref_value,
603                              (depth_mode & LATE_DEPTH_TEST) != 0);
604       }
605    }
606 
607    /* Late Z test */
608    if (depth_mode & LATE_DEPTH_TEST) {
609       int pos0 = find_output_by_semantic(&shader->info.base,
610                                          TGSI_SEMANTIC_POSITION,
611                                          0);
612 
613       if (pos0 != -1 && outputs[pos0][2]) {
614          z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
615       }
616 
617       lp_build_depth_stencil_test(gallivm,
618                                   &key->depth,
619                                   key->stencil,
620                                   type,
621                                   zs_format_desc,
622                                   &mask,
623                                   stencil_refs,
624                                   z,
625                                   depth_ptr_i, facing,
626                                   &zs_value,
627                                   !simple_shader);
628       /* Late Z write */
629       if (depth_mode & LATE_DEPTH_WRITE) {
630          lp_build_depth_write(builder, zs_format_desc, depth_ptr_i, zs_value);
631       }
632    }
633    else if ((depth_mode & EARLY_DEPTH_TEST) &&
634             (depth_mode & LATE_DEPTH_WRITE))
635    {
636       /* Need to apply a reduced mask to the depth write.  Reload the
637        * depth value, update from zs_value with the new mask value and
638        * write that out.
639        */
640       lp_build_deferred_depth_write(gallivm,
641                                     type,
642                                     zs_format_desc,
643                                     &mask,
644                                     depth_ptr_i,
645                                     zs_value);
646    }
647 
648 
649    /* Color write  */
650    for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
651    {
652       if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR &&
653           shader->info.base.output_semantic_index[attrib] < key->nr_cbufs)
654       {
655          unsigned cbuf = shader->info.base.output_semantic_index[attrib];
656          for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
657             if(outputs[attrib][chan]) {
658                /* XXX: just initialize outputs to point at colors[] and
659                 * skip this.
660                 */
661                LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
662                LLVMValueRef color_ptr;
663                color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan],
664                                         &loop_state.counter, 1, "");
665                lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]);
666                LLVMBuildStore(builder, out, color_ptr);
667             }
668          }
669       }
670    }
671 
672    if (key->occlusion_count) {
673       lp_build_name(counter, "counter");
674       lp_build_occlusion_count(gallivm, type,
675                                lp_build_mask_value(&mask), counter);
676    }
677 
678    mask_val = lp_build_mask_end(&mask);
679    LLVMBuildStore(builder, mask_val, mask_ptr);
680    lp_build_for_loop_end(&loop_state);
681 }
682 
683 
684 /**
685  * Generate color blending and color output.
686  * \param rt  the render target index (to index blend, colormask state)
687  * \param type  the pixel color type
688  * \param context_ptr  pointer to the runtime JIT context
689  * \param mask  execution mask (active fragment/pixel mask)
690  * \param src  colors from the fragment shader
691  * \param dst_ptr  the destination color buffer pointer
692  */
693 static void
generate_blend(struct gallivm_state * gallivm,const struct pipe_blend_state * blend,unsigned rt,LLVMBuilderRef builder,struct lp_type type,LLVMValueRef context_ptr,LLVMValueRef mask,LLVMValueRef * src,LLVMValueRef dst_ptr,boolean do_branch)694 generate_blend(struct gallivm_state *gallivm,
695                const struct pipe_blend_state *blend,
696                unsigned rt,
697                LLVMBuilderRef builder,
698                struct lp_type type,
699                LLVMValueRef context_ptr,
700                LLVMValueRef mask,
701                LLVMValueRef *src,
702                LLVMValueRef dst_ptr,
703                boolean do_branch)
704 {
705    struct lp_build_context bld;
706    struct lp_build_mask_context mask_ctx;
707    LLVMTypeRef vec_type;
708    LLVMValueRef const_ptr;
709    LLVMValueRef con[4];
710    LLVMValueRef dst[4];
711    LLVMValueRef res[4];
712    unsigned chan;
713 
714    lp_build_context_init(&bld, gallivm, type);
715 
716    lp_build_mask_begin(&mask_ctx, gallivm, type, mask);
717    if (do_branch)
718       lp_build_mask_check(&mask_ctx);
719 
720    vec_type = lp_build_vec_type(gallivm, type);
721 
722    const_ptr = lp_jit_context_blend_color(gallivm, context_ptr);
723    const_ptr = LLVMBuildBitCast(builder, const_ptr,
724                                 LLVMPointerType(vec_type, 0), "");
725 
726    /* load constant blend color and colors from the dest color buffer */
727    for(chan = 0; chan < 4; ++chan) {
728       LLVMValueRef index = lp_build_const_int32(gallivm, chan);
729       con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
730 
731       dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
732 
733       lp_build_name(con[chan], "con.%c", "rgba"[chan]);
734       lp_build_name(dst[chan], "dst.%c", "rgba"[chan]);
735    }
736 
737    /* do blend */
738    lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res);
739 
740    /* store results to color buffer */
741    for(chan = 0; chan < 4; ++chan) {
742       if(blend->rt[rt].colormask & (1 << chan)) {
743          LLVMValueRef index = lp_build_const_int32(gallivm, chan);
744          lp_build_name(res[chan], "res.%c", "rgba"[chan]);
745          res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
746          LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
747       }
748    }
749 
750    lp_build_mask_end(&mask_ctx);
751 }
752 
753 
754 /**
755  * Generate the runtime callable function for the whole fragment pipeline.
756  * Note that the function which we generate operates on a block of 16
757  * pixels at at time.  The block contains 2x2 quads.  Each quad contains
758  * 2x2 pixels.
759  */
760 static void
generate_fragment(struct llvmpipe_context * lp,struct lp_fragment_shader * shader,struct lp_fragment_shader_variant * variant,unsigned partial_mask)761 generate_fragment(struct llvmpipe_context *lp,
762                   struct lp_fragment_shader *shader,
763                   struct lp_fragment_shader_variant *variant,
764                   unsigned partial_mask)
765 {
766    struct gallivm_state *gallivm = variant->gallivm;
767    const struct lp_fragment_shader_variant_key *key = &variant->key;
768    struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
769    char func_name[256];
770    struct lp_type fs_type;
771    struct lp_type blend_type;
772    LLVMTypeRef fs_elem_type;
773    LLVMTypeRef blend_vec_type;
774    LLVMTypeRef arg_types[11];
775    LLVMTypeRef func_type;
776    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
777    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
778    LLVMValueRef context_ptr;
779    LLVMValueRef x;
780    LLVMValueRef y;
781    LLVMValueRef a0_ptr;
782    LLVMValueRef dadx_ptr;
783    LLVMValueRef dady_ptr;
784    LLVMValueRef color_ptr_ptr;
785    LLVMValueRef depth_ptr;
786    LLVMValueRef mask_input;
787    LLVMValueRef counter = NULL;
788    LLVMBasicBlockRef block;
789    LLVMBuilderRef builder;
790    struct lp_build_sampler_soa *sampler;
791    struct lp_build_interp_soa_context interp;
792    LLVMValueRef fs_mask[16 / 4];
793    LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
794    LLVMValueRef blend_mask;
795    LLVMValueRef function;
796    LLVMValueRef facing;
797    const struct util_format_description *zs_format_desc;
798    unsigned num_fs;
799    unsigned i;
800    unsigned chan;
801    unsigned cbuf;
802    boolean cbuf0_write_all;
803    boolean try_loop = TRUE;
804 
805    assert(lp_native_vector_width / 32 >= 4);
806 
807    /* Adjust color input interpolation according to flatshade state:
808     */
809    memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]);
810    for (i = 0; i < shader->info.base.num_inputs; i++) {
811       if (inputs[i].interp == LP_INTERP_COLOR) {
812 	 if (key->flatshade)
813 	    inputs[i].interp = LP_INTERP_CONSTANT;
814 	 else
815 	    inputs[i].interp = LP_INTERP_PERSPECTIVE;
816       }
817    }
818 
819    /* check if writes to cbuf[0] are to be copied to all cbufs */
820    cbuf0_write_all = FALSE;
821    for (i = 0;i < shader->info.base.num_properties; i++) {
822       if (shader->info.base.properties[i].name ==
823           TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
824          cbuf0_write_all = TRUE;
825          break;
826       }
827    }
828 
829    /* TODO: actually pick these based on the fs and color buffer
830     * characteristics. */
831 
832    memset(&fs_type, 0, sizeof fs_type);
833    fs_type.floating = TRUE;      /* floating point values */
834    fs_type.sign = TRUE;          /* values are signed */
835    fs_type.norm = FALSE;         /* values are not limited to [0,1] or [-1,1] */
836    fs_type.width = 32;           /* 32-bit float */
837    fs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */
838    num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
839 
840    memset(&blend_type, 0, sizeof blend_type);
841    blend_type.floating = FALSE; /* values are integers */
842    blend_type.sign = FALSE;     /* values are unsigned */
843    blend_type.norm = TRUE;      /* values are in [0,1] or [-1,1] */
844    blend_type.width = 8;        /* 8-bit ubyte values */
845    blend_type.length = 16;      /* 16 elements per vector */
846 
847    /*
848     * Generate the function prototype. Any change here must be reflected in
849     * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa.
850     */
851 
852    fs_elem_type = lp_build_elem_type(gallivm, fs_type);
853 
854    blend_vec_type = lp_build_vec_type(gallivm, blend_type);
855 
856    util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
857 		 shader->no, variant->no, partial_mask ? "partial" : "whole");
858 
859    arg_types[0] = variant->jit_context_ptr_type;       /* context */
860    arg_types[1] = int32_type;                          /* x */
861    arg_types[2] = int32_type;                          /* y */
862    arg_types[3] = int32_type;                          /* facing */
863    arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* a0 */
864    arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* dadx */
865    arg_types[6] = LLVMPointerType(fs_elem_type, 0);    /* dady */
866    arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0);  /* color */
867    arg_types[8] = LLVMPointerType(int8_type, 0);       /* depth */
868    arg_types[9] = int32_type;                          /* mask_input */
869    arg_types[10] = LLVMPointerType(int32_type, 0);     /* counter */
870 
871    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
872                                 arg_types, Elements(arg_types), 0);
873 
874    function = LLVMAddFunction(gallivm->module, func_name, func_type);
875    LLVMSetFunctionCallConv(function, LLVMCCallConv);
876 
877    variant->function[partial_mask] = function;
878 
879    /* XXX: need to propagate noalias down into color param now we are
880     * passing a pointer-to-pointer?
881     */
882    for(i = 0; i < Elements(arg_types); ++i)
883       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
884          LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
885 
886    context_ptr  = LLVMGetParam(function, 0);
887    x            = LLVMGetParam(function, 1);
888    y            = LLVMGetParam(function, 2);
889    facing       = LLVMGetParam(function, 3);
890    a0_ptr       = LLVMGetParam(function, 4);
891    dadx_ptr     = LLVMGetParam(function, 5);
892    dady_ptr     = LLVMGetParam(function, 6);
893    color_ptr_ptr = LLVMGetParam(function, 7);
894    depth_ptr    = LLVMGetParam(function, 8);
895    mask_input   = LLVMGetParam(function, 9);
896 
897    lp_build_name(context_ptr, "context");
898    lp_build_name(x, "x");
899    lp_build_name(y, "y");
900    lp_build_name(a0_ptr, "a0");
901    lp_build_name(dadx_ptr, "dadx");
902    lp_build_name(dady_ptr, "dady");
903    lp_build_name(color_ptr_ptr, "color_ptr_ptr");
904    lp_build_name(depth_ptr, "depth");
905    lp_build_name(mask_input, "mask_input");
906 
907    if (key->occlusion_count) {
908       counter = LLVMGetParam(function, 10);
909       lp_build_name(counter, "counter");
910    }
911 
912    /*
913     * Function body
914     */
915 
916    block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
917    builder = gallivm->builder;
918    assert(builder);
919    LLVMPositionBuilderAtEnd(builder, block);
920 
921    /* code generated texture sampling */
922    sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
923 
924    zs_format_desc = util_format_description(key->zsbuf_format);
925 
926    if (!try_loop) {
927       /*
928        * The shader input interpolation info is not explicitely baked in the
929        * shader key, but everything it derives from (TGSI, and flatshade) is
930        * already included in the shader key.
931        */
932       lp_build_interp_soa_init(&interp,
933                                gallivm,
934                                shader->info.base.num_inputs,
935                                inputs,
936                                builder, fs_type,
937                                FALSE,
938                                a0_ptr, dadx_ptr, dady_ptr,
939                                x, y);
940 
941       /* loop over quads in the block */
942       for(i = 0; i < num_fs; ++i) {
943          LLVMValueRef depth_offset = LLVMConstInt(int32_type,
944                                                   i*fs_type.length*zs_format_desc->block.bits/8,
945                                                   0);
946          LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
947          LLVMValueRef depth_ptr_i;
948 
949          depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
950 
951          generate_fs(gallivm,
952                      shader, key,
953                      builder,
954                      fs_type,
955                      context_ptr,
956                      i,
957                      &interp,
958                      sampler,
959                      &fs_mask[i], /* output */
960                      out_color,
961                      depth_ptr_i,
962                      facing,
963                      partial_mask,
964                      mask_input,
965                      counter);
966 
967          for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
968             for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
969                fs_out_color[cbuf][chan][i] =
970                   out_color[cbuf * !cbuf0_write_all][chan];
971       }
972    }
973    else {
974       unsigned depth_bits = zs_format_desc->block.bits/8;
975       LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
976       LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
977       LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
978                                                       num_loop, "mask_store");
979       LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
980 
981       /*
982        * The shader input interpolation info is not explicitely baked in the
983        * shader key, but everything it derives from (TGSI, and flatshade) is
984        * already included in the shader key.
985        */
986       lp_build_interp_soa_init(&interp,
987                                gallivm,
988                                shader->info.base.num_inputs,
989                                inputs,
990                                builder, fs_type,
991                                TRUE,
992                                a0_ptr, dadx_ptr, dady_ptr,
993                                x, y);
994 
995       for (i = 0; i < num_fs; i++) {
996          LLVMValueRef mask;
997          LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
998          LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store,
999                                               &indexi, 1, "mask_ptr");
1000 
1001          if (partial_mask) {
1002             mask = generate_quad_mask(gallivm, fs_type,
1003                                       i*fs_type.length/4, mask_input);
1004          }
1005          else {
1006             mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
1007          }
1008          LLVMBuildStore(builder, mask, mask_ptr);
1009       }
1010 
1011       generate_fs_loop(gallivm,
1012                        shader, key,
1013                        builder,
1014                        fs_type,
1015                        context_ptr,
1016                        num_loop,
1017                        &interp,
1018                        sampler,
1019                        mask_store, /* output */
1020                        color_store,
1021                        depth_ptr,
1022                        depth_bits,
1023                        facing,
1024                        counter);
1025 
1026       for (i = 0; i < num_fs; i++) {
1027          LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
1028          LLVMValueRef ptr = LLVMBuildGEP(builder, mask_store,
1029                                          &indexi, 1, "");
1030          fs_mask[i] = LLVMBuildLoad(builder, ptr, "mask");
1031          /* This is fucked up need to reorganize things */
1032          for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
1033             for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1034                ptr = LLVMBuildGEP(builder,
1035                                   color_store[cbuf * !cbuf0_write_all][chan],
1036                                   &indexi, 1, "");
1037                fs_out_color[cbuf][chan][i] = ptr;
1038             }
1039          }
1040       }
1041    }
1042 
1043    sampler->destroy(sampler);
1044 
1045    /* Loop over color outputs / color buffers to do blending.
1046     */
1047    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
1048       LLVMValueRef color_ptr;
1049       LLVMValueRef index = lp_build_const_int32(gallivm, cbuf);
1050       LLVMValueRef blend_in_color[TGSI_NUM_CHANNELS];
1051       unsigned rt;
1052 
1053       /*
1054        * Convert the fs's output color and mask to fit to the blending type.
1055        */
1056       for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1057          LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH];
1058 
1059          for (i = 0; i < num_fs; i++) {
1060             fs_color_vals[i] =
1061                LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals");
1062          }
1063 
1064          lp_build_conv(gallivm, fs_type, blend_type,
1065                        fs_color_vals,
1066                        num_fs,
1067                        &blend_in_color[chan], 1);
1068 
1069          lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
1070       }
1071 
1072       if (partial_mask || !variant->opaque) {
1073          lp_build_conv_mask(variant->gallivm, fs_type, blend_type,
1074                             fs_mask, num_fs,
1075                             &blend_mask, 1);
1076       } else {
1077          blend_mask = lp_build_const_int_vec(variant->gallivm, blend_type, ~0);
1078       }
1079 
1080       color_ptr = LLVMBuildLoad(builder,
1081                                 LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
1082                                 "");
1083       lp_build_name(color_ptr, "color_ptr%d", cbuf);
1084 
1085       /* which blend/colormask state to use */
1086       rt = key->blend.independent_blend_enable ? cbuf : 0;
1087 
1088       /*
1089        * Blending.
1090        */
1091       {
1092          /* Could the 4x4 have been killed?
1093           */
1094          boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) &&
1095                               !key->alpha.enabled &&
1096                               !shader->info.base.uses_kill);
1097 
1098          generate_blend(variant->gallivm,
1099                         &key->blend,
1100                         rt,
1101                         builder,
1102                         blend_type,
1103                         context_ptr,
1104                         blend_mask,
1105                         blend_in_color,
1106                         color_ptr,
1107                         do_branch);
1108       }
1109    }
1110 
1111    LLVMBuildRetVoid(builder);
1112 
1113    gallivm_verify_function(gallivm, function);
1114 
1115    variant->nr_instrs += lp_build_count_instructions(function);
1116 }
1117 
1118 
1119 static void
dump_fs_variant_key(const struct lp_fragment_shader_variant_key * key)1120 dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
1121 {
1122    unsigned i;
1123 
1124    debug_printf("fs variant %p:\n", (void *) key);
1125 
1126    if (key->flatshade) {
1127       debug_printf("flatshade = 1\n");
1128    }
1129    for (i = 0; i < key->nr_cbufs; ++i) {
1130       debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i]));
1131    }
1132    if (key->depth.enabled) {
1133       debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format));
1134       debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE));
1135       debug_printf("depth.writemask = %u\n", key->depth.writemask);
1136    }
1137 
1138    for (i = 0; i < 2; ++i) {
1139       if (key->stencil[i].enabled) {
1140          debug_printf("stencil[%u].func = %s\n", i, util_dump_func(key->stencil[i].func, TRUE));
1141          debug_printf("stencil[%u].fail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].fail_op, TRUE));
1142          debug_printf("stencil[%u].zpass_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zpass_op, TRUE));
1143          debug_printf("stencil[%u].zfail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zfail_op, TRUE));
1144          debug_printf("stencil[%u].valuemask = 0x%x\n", i, key->stencil[i].valuemask);
1145          debug_printf("stencil[%u].writemask = 0x%x\n", i, key->stencil[i].writemask);
1146       }
1147    }
1148 
1149    if (key->alpha.enabled) {
1150       debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE));
1151    }
1152 
1153    if (key->occlusion_count) {
1154       debug_printf("occlusion_count = 1\n");
1155    }
1156 
1157    if (key->blend.logicop_enable) {
1158       debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE));
1159    }
1160    else if (key->blend.rt[0].blend_enable) {
1161       debug_printf("blend.rgb_func = %s\n",   util_dump_blend_func  (key->blend.rt[0].rgb_func, TRUE));
1162       debug_printf("blend.rgb_src_factor = %s\n",   util_dump_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE));
1163       debug_printf("blend.rgb_dst_factor = %s\n",   util_dump_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE));
1164       debug_printf("blend.alpha_func = %s\n",       util_dump_blend_func  (key->blend.rt[0].alpha_func, TRUE));
1165       debug_printf("blend.alpha_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE));
1166       debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE));
1167    }
1168    debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask);
1169    for (i = 0; i < key->nr_samplers; ++i) {
1170       debug_printf("sampler[%u] = \n", i);
1171       debug_printf("  .format = %s\n",
1172                    util_format_name(key->sampler[i].format));
1173       debug_printf("  .target = %s\n",
1174                    util_dump_tex_target(key->sampler[i].target, TRUE));
1175       debug_printf("  .pot = %u %u %u\n",
1176                    key->sampler[i].pot_width,
1177                    key->sampler[i].pot_height,
1178                    key->sampler[i].pot_depth);
1179       debug_printf("  .wrap = %s %s %s\n",
1180                    util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
1181                    util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
1182                    util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
1183       debug_printf("  .min_img_filter = %s\n",
1184                    util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
1185       debug_printf("  .min_mip_filter = %s\n",
1186                    util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
1187       debug_printf("  .mag_img_filter = %s\n",
1188                    util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
1189       if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
1190          debug_printf("  .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE));
1191       debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
1192       debug_printf("  .min_max_lod_equal = %u\n", key->sampler[i].min_max_lod_equal);
1193       debug_printf("  .lod_bias_non_zero = %u\n", key->sampler[i].lod_bias_non_zero);
1194       debug_printf("  .apply_min_lod = %u\n", key->sampler[i].apply_min_lod);
1195       debug_printf("  .apply_max_lod = %u\n", key->sampler[i].apply_max_lod);
1196    }
1197 }
1198 
1199 
1200 void
lp_debug_fs_variant(const struct lp_fragment_shader_variant * variant)1201 lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant)
1202 {
1203    debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n",
1204                 variant->shader->no, variant->no);
1205    tgsi_dump(variant->shader->base.tokens, 0);
1206    dump_fs_variant_key(&variant->key);
1207    debug_printf("variant->opaque = %u\n", variant->opaque);
1208    debug_printf("\n");
1209 }
1210 
1211 
1212 /**
1213  * Generate a new fragment shader variant from the shader code and
1214  * other state indicated by the key.
1215  */
1216 static struct lp_fragment_shader_variant *
generate_variant(struct llvmpipe_context * lp,struct lp_fragment_shader * shader,const struct lp_fragment_shader_variant_key * key)1217 generate_variant(struct llvmpipe_context *lp,
1218                  struct lp_fragment_shader *shader,
1219                  const struct lp_fragment_shader_variant_key *key)
1220 {
1221    struct lp_fragment_shader_variant *variant;
1222    const struct util_format_description *cbuf0_format_desc;
1223    boolean fullcolormask;
1224 
1225    variant = CALLOC_STRUCT(lp_fragment_shader_variant);
1226    if(!variant)
1227       return NULL;
1228 
1229    variant->gallivm = gallivm_create();
1230    if (!variant->gallivm) {
1231       FREE(variant);
1232       return NULL;
1233    }
1234 
1235    variant->shader = shader;
1236    variant->list_item_global.base = variant;
1237    variant->list_item_local.base = variant;
1238    variant->no = shader->variants_created++;
1239 
1240    memcpy(&variant->key, key, shader->variant_key_size);
1241 
1242    /*
1243     * Determine whether we are touching all channels in the color buffer.
1244     */
1245    fullcolormask = FALSE;
1246    if (key->nr_cbufs == 1) {
1247       cbuf0_format_desc = util_format_description(key->cbuf_format[0]);
1248       fullcolormask = util_format_colormask_full(cbuf0_format_desc, key->blend.rt[0].colormask);
1249    }
1250 
1251    variant->opaque =
1252          !key->blend.logicop_enable &&
1253          !key->blend.rt[0].blend_enable &&
1254          fullcolormask &&
1255          !key->stencil[0].enabled &&
1256          !key->alpha.enabled &&
1257          !key->depth.enabled &&
1258          !shader->info.base.uses_kill
1259          ? TRUE : FALSE;
1260 
1261 
1262    if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
1263       lp_debug_fs_variant(variant);
1264    }
1265 
1266    lp_jit_init_types(variant);
1267 
1268    if (variant->jit_function[RAST_EDGE_TEST] == NULL)
1269       generate_fragment(lp, shader, variant, RAST_EDGE_TEST);
1270 
1271    if (variant->jit_function[RAST_WHOLE] == NULL) {
1272       if (variant->opaque) {
1273          /* Specialized shader, which doesn't need to read the color buffer. */
1274          generate_fragment(lp, shader, variant, RAST_WHOLE);
1275       }
1276    }
1277 
1278    /*
1279     * Compile everything
1280     */
1281 
1282    gallivm_compile_module(variant->gallivm);
1283 
1284    if (variant->function[RAST_EDGE_TEST]) {
1285       variant->jit_function[RAST_EDGE_TEST] = (lp_jit_frag_func)
1286             gallivm_jit_function(variant->gallivm,
1287                                  variant->function[RAST_EDGE_TEST]);
1288    }
1289 
1290    if (variant->function[RAST_WHOLE]) {
1291          variant->jit_function[RAST_WHOLE] = (lp_jit_frag_func)
1292                gallivm_jit_function(variant->gallivm,
1293                                     variant->function[RAST_WHOLE]);
1294    } else if (!variant->jit_function[RAST_WHOLE]) {
1295       variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
1296    }
1297 
1298    return variant;
1299 }
1300 
1301 
1302 static void *
llvmpipe_create_fs_state(struct pipe_context * pipe,const struct pipe_shader_state * templ)1303 llvmpipe_create_fs_state(struct pipe_context *pipe,
1304                          const struct pipe_shader_state *templ)
1305 {
1306    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1307    struct lp_fragment_shader *shader;
1308    int nr_samplers;
1309    int i;
1310 
1311    shader = CALLOC_STRUCT(lp_fragment_shader);
1312    if (!shader)
1313       return NULL;
1314 
1315    shader->no = fs_no++;
1316    make_empty_list(&shader->variants);
1317 
1318    /* get/save the summary info for this shader */
1319    lp_build_tgsi_info(templ->tokens, &shader->info);
1320 
1321    /* we need to keep a local copy of the tokens */
1322    shader->base.tokens = tgsi_dup_tokens(templ->tokens);
1323 
1324    shader->draw_data = draw_create_fragment_shader(llvmpipe->draw, templ);
1325    if (shader->draw_data == NULL) {
1326       FREE((void *) shader->base.tokens);
1327       FREE(shader);
1328       return NULL;
1329    }
1330 
1331    nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
1332 
1333    shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key,
1334 				     sampler[nr_samplers]);
1335 
1336    for (i = 0; i < shader->info.base.num_inputs; i++) {
1337       shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i];
1338       shader->inputs[i].cyl_wrap = shader->info.base.input_cylindrical_wrap[i];
1339 
1340       switch (shader->info.base.input_interpolate[i]) {
1341       case TGSI_INTERPOLATE_CONSTANT:
1342 	 shader->inputs[i].interp = LP_INTERP_CONSTANT;
1343 	 break;
1344       case TGSI_INTERPOLATE_LINEAR:
1345 	 shader->inputs[i].interp = LP_INTERP_LINEAR;
1346 	 break;
1347       case TGSI_INTERPOLATE_PERSPECTIVE:
1348 	 shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
1349 	 break;
1350       case TGSI_INTERPOLATE_COLOR:
1351 	 shader->inputs[i].interp = LP_INTERP_COLOR;
1352 	 break;
1353       default:
1354 	 assert(0);
1355 	 break;
1356       }
1357 
1358       switch (shader->info.base.input_semantic_name[i]) {
1359       case TGSI_SEMANTIC_FACE:
1360 	 shader->inputs[i].interp = LP_INTERP_FACING;
1361 	 break;
1362       case TGSI_SEMANTIC_POSITION:
1363 	 /* Position was already emitted above
1364 	  */
1365 	 shader->inputs[i].interp = LP_INTERP_POSITION;
1366 	 shader->inputs[i].src_index = 0;
1367 	 continue;
1368       }
1369 
1370       shader->inputs[i].src_index = i+1;
1371    }
1372 
1373    if (LP_DEBUG & DEBUG_TGSI) {
1374       unsigned attrib;
1375       debug_printf("llvmpipe: Create fragment shader #%u %p:\n",
1376                    shader->no, (void *) shader);
1377       tgsi_dump(templ->tokens, 0);
1378       debug_printf("usage masks:\n");
1379       for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) {
1380          unsigned usage_mask = shader->info.base.input_usage_mask[attrib];
1381          debug_printf("  IN[%u].%s%s%s%s\n",
1382                       attrib,
1383                       usage_mask & TGSI_WRITEMASK_X ? "x" : "",
1384                       usage_mask & TGSI_WRITEMASK_Y ? "y" : "",
1385                       usage_mask & TGSI_WRITEMASK_Z ? "z" : "",
1386                       usage_mask & TGSI_WRITEMASK_W ? "w" : "");
1387       }
1388       debug_printf("\n");
1389    }
1390 
1391    return shader;
1392 }
1393 
1394 
1395 static void
llvmpipe_bind_fs_state(struct pipe_context * pipe,void * fs)1396 llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
1397 {
1398    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1399 
1400    if (llvmpipe->fs == fs)
1401       return;
1402 
1403    draw_flush(llvmpipe->draw);
1404 
1405    llvmpipe->fs = (struct lp_fragment_shader *) fs;
1406 
1407    draw_bind_fragment_shader(llvmpipe->draw,
1408                              (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL));
1409 
1410    llvmpipe->dirty |= LP_NEW_FS;
1411 }
1412 
1413 
1414 /**
1415  * Remove shader variant from two lists: the shader's variant list
1416  * and the context's variant list.
1417  */
1418 void
llvmpipe_remove_shader_variant(struct llvmpipe_context * lp,struct lp_fragment_shader_variant * variant)1419 llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
1420                                struct lp_fragment_shader_variant *variant)
1421 {
1422    unsigned i;
1423 
1424    if (gallivm_debug & GALLIVM_DEBUG_IR) {
1425       debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached"
1426                    " #%u v total cached #%u\n",
1427                    variant->shader->no,
1428                    variant->no,
1429                    variant->shader->variants_created,
1430                    variant->shader->variants_cached,
1431                    lp->nr_fs_variants);
1432    }
1433 
1434    /* free all the variant's JIT'd functions */
1435    for (i = 0; i < Elements(variant->function); i++) {
1436       if (variant->function[i]) {
1437          gallivm_free_function(variant->gallivm,
1438                                variant->function[i],
1439                                variant->jit_function[i]);
1440       }
1441    }
1442 
1443    gallivm_destroy(variant->gallivm);
1444 
1445    /* remove from shader's list */
1446    remove_from_list(&variant->list_item_local);
1447    variant->shader->variants_cached--;
1448 
1449    /* remove from context's list */
1450    remove_from_list(&variant->list_item_global);
1451    lp->nr_fs_variants--;
1452    lp->nr_fs_instrs -= variant->nr_instrs;
1453 
1454    FREE(variant);
1455 }
1456 
1457 
1458 static void
llvmpipe_delete_fs_state(struct pipe_context * pipe,void * fs)1459 llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
1460 {
1461    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1462    struct lp_fragment_shader *shader = fs;
1463    struct lp_fs_variant_list_item *li;
1464 
1465    assert(fs != llvmpipe->fs);
1466 
1467    /*
1468     * XXX: we need to flush the context until we have some sort of reference
1469     * counting in fragment shaders as they may still be binned
1470     * Flushing alone might not sufficient we need to wait on it too.
1471     */
1472    llvmpipe_finish(pipe, __FUNCTION__);
1473 
1474    /* Delete all the variants */
1475    li = first_elem(&shader->variants);
1476    while(!at_end(&shader->variants, li)) {
1477       struct lp_fs_variant_list_item *next = next_elem(li);
1478       llvmpipe_remove_shader_variant(llvmpipe, li->base);
1479       li = next;
1480    }
1481 
1482    /* Delete draw module's data */
1483    draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data);
1484 
1485    assert(shader->variants_cached == 0);
1486    FREE((void *) shader->base.tokens);
1487    FREE(shader);
1488 }
1489 
1490 
1491 
1492 static void
llvmpipe_set_constant_buffer(struct pipe_context * pipe,uint shader,uint index,struct pipe_constant_buffer * cb)1493 llvmpipe_set_constant_buffer(struct pipe_context *pipe,
1494                              uint shader, uint index,
1495                              struct pipe_constant_buffer *cb)
1496 {
1497    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1498    struct pipe_resource *constants = cb ? cb->buffer : NULL;
1499    unsigned size;
1500    const void *data;
1501 
1502    if (cb && cb->user_buffer) {
1503       constants = llvmpipe_user_buffer_create(pipe->screen,
1504                                               (void *) cb->user_buffer,
1505                                               cb->buffer_size,
1506                                               PIPE_BIND_CONSTANT_BUFFER);
1507    }
1508 
1509    size = constants ? constants->width0 : 0;
1510    data = constants ? llvmpipe_resource_data(constants) : NULL;
1511 
1512    assert(shader < PIPE_SHADER_TYPES);
1513    assert(index < PIPE_MAX_CONSTANT_BUFFERS);
1514 
1515    if(llvmpipe->constants[shader][index] == constants)
1516       return;
1517 
1518    draw_flush(llvmpipe->draw);
1519 
1520    /* note: reference counting */
1521    pipe_resource_reference(&llvmpipe->constants[shader][index], constants);
1522 
1523    if(shader == PIPE_SHADER_VERTEX ||
1524       shader == PIPE_SHADER_GEOMETRY) {
1525       draw_set_mapped_constant_buffer(llvmpipe->draw, shader,
1526                                       index, data, size);
1527    }
1528 
1529    llvmpipe->dirty |= LP_NEW_CONSTANTS;
1530 
1531    if (cb && cb->user_buffer) {
1532       pipe_resource_reference(&constants, NULL);
1533    }
1534 }
1535 
1536 
1537 /**
1538  * Return the blend factor equivalent to a destination alpha of one.
1539  */
1540 static INLINE unsigned
force_dst_alpha_one(unsigned factor)1541 force_dst_alpha_one(unsigned factor)
1542 {
1543    switch(factor) {
1544    case PIPE_BLENDFACTOR_DST_ALPHA:
1545       return PIPE_BLENDFACTOR_ONE;
1546    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
1547       return PIPE_BLENDFACTOR_ZERO;
1548    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
1549       return PIPE_BLENDFACTOR_ZERO;
1550    }
1551 
1552    return factor;
1553 }
1554 
1555 
1556 /**
1557  * We need to generate several variants of the fragment pipeline to match
1558  * all the combinations of the contributing state atoms.
1559  *
1560  * TODO: there is actually no reason to tie this to context state -- the
1561  * generated code could be cached globally in the screen.
1562  */
1563 static void
make_variant_key(struct llvmpipe_context * lp,struct lp_fragment_shader * shader,struct lp_fragment_shader_variant_key * key)1564 make_variant_key(struct llvmpipe_context *lp,
1565                  struct lp_fragment_shader *shader,
1566                  struct lp_fragment_shader_variant_key *key)
1567 {
1568    unsigned i;
1569 
1570    memset(key, 0, shader->variant_key_size);
1571 
1572    if (lp->framebuffer.zsbuf) {
1573       if (lp->depth_stencil->depth.enabled) {
1574          key->zsbuf_format = lp->framebuffer.zsbuf->format;
1575          memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
1576       }
1577       if (lp->depth_stencil->stencil[0].enabled) {
1578          key->zsbuf_format = lp->framebuffer.zsbuf->format;
1579          memcpy(&key->stencil, &lp->depth_stencil->stencil, sizeof key->stencil);
1580       }
1581    }
1582 
1583    key->alpha.enabled = lp->depth_stencil->alpha.enabled;
1584    if(key->alpha.enabled)
1585       key->alpha.func = lp->depth_stencil->alpha.func;
1586    /* alpha.ref_value is passed in jit_context */
1587 
1588    key->flatshade = lp->rasterizer->flatshade;
1589    if (lp->active_query_count) {
1590       key->occlusion_count = TRUE;
1591    }
1592 
1593    if (lp->framebuffer.nr_cbufs) {
1594       memcpy(&key->blend, lp->blend, sizeof key->blend);
1595    }
1596 
1597    key->nr_cbufs = lp->framebuffer.nr_cbufs;
1598    for (i = 0; i < lp->framebuffer.nr_cbufs; i++) {
1599       enum pipe_format format = lp->framebuffer.cbufs[i]->format;
1600       struct pipe_rt_blend_state *blend_rt = &key->blend.rt[i];
1601       const struct util_format_description *format_desc;
1602 
1603       key->cbuf_format[i] = format;
1604 
1605       format_desc = util_format_description(format);
1606       assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
1607              format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
1608 
1609       blend_rt->colormask = lp->blend->rt[i].colormask;
1610 
1611       /*
1612        * Mask out color channels not present in the color buffer.
1613        */
1614       blend_rt->colormask &= util_format_colormask(format_desc);
1615 
1616       /*
1617        * Our swizzled render tiles always have an alpha channel, but the linear
1618        * render target format often does not, so force here the dst alpha to be
1619        * one.
1620        *
1621        * This is not a mere optimization. Wrong results will be produced if the
1622        * dst alpha is used, the dst format does not have alpha, and the previous
1623        * rendering was not flushed from the swizzled to linear buffer. For
1624        * example, NonPowTwo DCT.
1625        *
1626        * TODO: This should be generalized to all channels for better
1627        * performance, but only alpha causes correctness issues.
1628        *
1629        * Also, force rgb/alpha func/factors match, to make AoS blending easier.
1630        */
1631       if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W ||
1632 	  format_desc->swizzle[3] == format_desc->swizzle[0]) {
1633          blend_rt->rgb_src_factor   = force_dst_alpha_one(blend_rt->rgb_src_factor);
1634          blend_rt->rgb_dst_factor   = force_dst_alpha_one(blend_rt->rgb_dst_factor);
1635          blend_rt->alpha_func       = blend_rt->rgb_func;
1636          blend_rt->alpha_src_factor = blend_rt->rgb_src_factor;
1637          blend_rt->alpha_dst_factor = blend_rt->rgb_dst_factor;
1638       }
1639    }
1640 
1641    /* This value will be the same for all the variants of a given shader:
1642     */
1643    key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
1644 
1645    for(i = 0; i < key->nr_samplers; ++i) {
1646       if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
1647          lp_sampler_static_state(&key->sampler[i],
1648 				 lp->sampler_views[PIPE_SHADER_FRAGMENT][i],
1649 				 lp->samplers[PIPE_SHADER_FRAGMENT][i]);
1650       }
1651    }
1652 }
1653 
1654 
1655 
1656 /**
1657  * Update fragment shader state.  This is called just prior to drawing
1658  * something when some fragment-related state has changed.
1659  */
1660 void
llvmpipe_update_fs(struct llvmpipe_context * lp)1661 llvmpipe_update_fs(struct llvmpipe_context *lp)
1662 {
1663    struct lp_fragment_shader *shader = lp->fs;
1664    struct lp_fragment_shader_variant_key key;
1665    struct lp_fragment_shader_variant *variant = NULL;
1666    struct lp_fs_variant_list_item *li;
1667 
1668    make_variant_key(lp, shader, &key);
1669 
1670    /* Search the variants for one which matches the key */
1671    li = first_elem(&shader->variants);
1672    while(!at_end(&shader->variants, li)) {
1673       if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) {
1674          variant = li->base;
1675          break;
1676       }
1677       li = next_elem(li);
1678    }
1679 
1680    if (variant) {
1681       /* Move this variant to the head of the list to implement LRU
1682        * deletion of shader's when we have too many.
1683        */
1684       move_to_head(&lp->fs_variants_list, &variant->list_item_global);
1685    }
1686    else {
1687       /* variant not found, create it now */
1688       int64_t t0, t1, dt;
1689       unsigned i;
1690       unsigned variants_to_cull;
1691 
1692       if (0) {
1693          debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n",
1694                       lp->nr_fs_variants,
1695                       lp->nr_fs_instrs,
1696                       lp->nr_fs_variants ? lp->nr_fs_instrs / lp->nr_fs_variants : 0);
1697       }
1698 
1699       /* First, check if we've exceeded the max number of shader variants.
1700        * If so, free 25% of them (the least recently used ones).
1701        */
1702       variants_to_cull = lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 4 : 0;
1703 
1704       if (variants_to_cull ||
1705           lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
1706          struct pipe_context *pipe = &lp->pipe;
1707 
1708          /*
1709           * XXX: we need to flush the context until we have some sort of
1710           * reference counting in fragment shaders as they may still be binned
1711           * Flushing alone might not be sufficient we need to wait on it too.
1712           */
1713          llvmpipe_finish(pipe, __FUNCTION__);
1714 
1715          /*
1716           * We need to re-check lp->nr_fs_variants because an arbitrarliy large
1717           * number of shader variants (potentially all of them) could be
1718           * pending for destruction on flush.
1719           */
1720 
1721          for (i = 0; i < variants_to_cull || lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS; i++) {
1722             struct lp_fs_variant_list_item *item;
1723             if (is_empty_list(&lp->fs_variants_list)) {
1724                break;
1725             }
1726             item = last_elem(&lp->fs_variants_list);
1727             assert(item);
1728             assert(item->base);
1729             llvmpipe_remove_shader_variant(lp, item->base);
1730          }
1731       }
1732 
1733       /*
1734        * Generate the new variant.
1735        */
1736       t0 = os_time_get();
1737       variant = generate_variant(lp, shader, &key);
1738       t1 = os_time_get();
1739       dt = t1 - t0;
1740       LP_COUNT_ADD(llvm_compile_time, dt);
1741       LP_COUNT_ADD(nr_llvm_compiles, 2);  /* emit vs. omit in/out test */
1742 
1743       llvmpipe_variant_count++;
1744 
1745       /* Put the new variant into the list */
1746       if (variant) {
1747          insert_at_head(&shader->variants, &variant->list_item_local);
1748          insert_at_head(&lp->fs_variants_list, &variant->list_item_global);
1749          lp->nr_fs_variants++;
1750          lp->nr_fs_instrs += variant->nr_instrs;
1751          shader->variants_cached++;
1752       }
1753    }
1754 
1755    /* Bind this variant */
1756    lp_setup_set_fs_variant(lp->setup, variant);
1757 }
1758 
1759 
1760 
1761 
1762 
1763 
1764 
1765 void
llvmpipe_init_fs_funcs(struct llvmpipe_context * llvmpipe)1766 llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe)
1767 {
1768    llvmpipe->pipe.create_fs_state = llvmpipe_create_fs_state;
1769    llvmpipe->pipe.bind_fs_state   = llvmpipe_bind_fs_state;
1770    llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state;
1771 
1772    llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer;
1773 }
1774