• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /**************************************************************************
2   *
3   * Copyright 2010 VMware, Inc.
4   * All Rights Reserved.
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a
7   * copy of this software and associated documentation files (the
8   * "Software"), to deal in the Software without restriction, including
9   * without limitation the rights to use, copy, modify, merge, publish,
10   * distribute, sub license, and/or sell copies of the Software, and to
11   * permit persons to whom the Software is furnished to do so, subject to
12   * the following conditions:
13   *
14   * The above copyright notice and this permission notice (including the
15   * next paragraph) shall be included in all copies or substantial portions
16   * of the Software.
17   *
18   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25   *
26   **************************************************************************/
27  
28  /**
29   * @file
30   * Texture sampling -- AoS.
31   *
32   * @author Jose Fonseca <jfonseca@vmware.com>
33   * @author Brian Paul <brianp@vmware.com>
34   */
35  
36  #include "pipe/p_defines.h"
37  #include "pipe/p_state.h"
38  #include "util/u_debug.h"
39  #include "util/u_dump.h"
40  #include "util/u_memory.h"
41  #include "util/u_math.h"
42  #include "util/u_format.h"
43  #include "util/u_cpu_detect.h"
44  #include "lp_bld_debug.h"
45  #include "lp_bld_type.h"
46  #include "lp_bld_const.h"
47  #include "lp_bld_conv.h"
48  #include "lp_bld_arit.h"
49  #include "lp_bld_bitarit.h"
50  #include "lp_bld_logic.h"
51  #include "lp_bld_swizzle.h"
52  #include "lp_bld_pack.h"
53  #include "lp_bld_flow.h"
54  #include "lp_bld_gather.h"
55  #include "lp_bld_format.h"
56  #include "lp_bld_init.h"
57  #include "lp_bld_sample.h"
58  #include "lp_bld_sample_aos.h"
59  #include "lp_bld_quad.h"
60  
61  
62  /**
63   * Build LLVM code for texture coord wrapping, for nearest filtering,
64   * for scaled integer texcoords.
65   * \param block_length  is the length of the pixel block along the
66   *                      coordinate axis
67   * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
68   * \param length  the texture size along one dimension
69   * \param stride  pixel stride along the coordinate axis (in bytes)
70   * \param is_pot  if TRUE, length is a power of two
71   * \param wrap_mode  one of PIPE_TEX_WRAP_x
72   * \param out_offset  byte offset for the wrapped coordinate
73   * \param out_i  resulting sub-block pixel coordinate for coord0
74   */
75  static void
lp_build_sample_wrap_nearest_int(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef coord_f,LLVMValueRef length,LLVMValueRef stride,boolean is_pot,unsigned wrap_mode,LLVMValueRef * out_offset,LLVMValueRef * out_i)76  lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
77                                   unsigned block_length,
78                                   LLVMValueRef coord,
79                                   LLVMValueRef coord_f,
80                                   LLVMValueRef length,
81                                   LLVMValueRef stride,
82                                   boolean is_pot,
83                                   unsigned wrap_mode,
84                                   LLVMValueRef *out_offset,
85                                   LLVMValueRef *out_i)
86  {
87     struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
88     LLVMBuilderRef builder = bld->gallivm->builder;
89     LLVMValueRef length_minus_one;
90  
91     length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
92  
93     switch(wrap_mode) {
94     case PIPE_TEX_WRAP_REPEAT:
95        if(is_pot)
96           coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
97        else {
98           struct lp_build_context *coord_bld = &bld->coord_bld;
99           LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
100           coord = lp_build_fract_safe(coord_bld, coord_f);
101           coord = lp_build_mul(coord_bld, coord, length_f);
102           coord = lp_build_itrunc(coord_bld, coord);
103        }
104        break;
105  
106     case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
107        coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
108        coord = lp_build_min(int_coord_bld, coord, length_minus_one);
109        break;
110  
111     case PIPE_TEX_WRAP_CLAMP:
112     case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
113     case PIPE_TEX_WRAP_MIRROR_REPEAT:
114     case PIPE_TEX_WRAP_MIRROR_CLAMP:
115     case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
116     case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
117     default:
118        assert(0);
119     }
120  
121     lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
122                                    out_offset, out_i);
123  }
124  
125  
126  /**
127   * Build LLVM code for texture coord wrapping, for nearest filtering,
128   * for float texcoords.
129   * \param coord  the incoming texcoord (s,t,r or q)
130   * \param length  the texture size along one dimension
131   * \param is_pot  if TRUE, length is a power of two
132   * \param wrap_mode  one of PIPE_TEX_WRAP_x
133   * \param icoord  the texcoord after wrapping, as int
134   */
135  static void
lp_build_sample_wrap_nearest_float(struct lp_build_sample_context * bld,LLVMValueRef coord,LLVMValueRef length,boolean is_pot,unsigned wrap_mode,LLVMValueRef * icoord)136  lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
137                                     LLVMValueRef coord,
138                                     LLVMValueRef length,
139                                     boolean is_pot,
140                                     unsigned wrap_mode,
141                                     LLVMValueRef *icoord)
142  {
143     struct lp_build_context *coord_bld = &bld->coord_bld;
144     LLVMValueRef length_minus_one;
145  
146     switch(wrap_mode) {
147     case PIPE_TEX_WRAP_REPEAT:
148        /* take fraction, unnormalize */
149        coord = lp_build_fract_safe(coord_bld, coord);
150        coord = lp_build_mul(coord_bld, coord, length);
151        *icoord = lp_build_itrunc(coord_bld, coord);
152        break;
153     case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
154        length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
155        if (bld->static_state->normalized_coords) {
156           /* scale coord to length */
157           coord = lp_build_mul(coord_bld, coord, length);
158        }
159        coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
160                               length_minus_one);
161        *icoord = lp_build_itrunc(coord_bld, coord);
162        break;
163  
164     case PIPE_TEX_WRAP_CLAMP:
165     case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
166     case PIPE_TEX_WRAP_MIRROR_REPEAT:
167     case PIPE_TEX_WRAP_MIRROR_CLAMP:
168     case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
169     case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
170     default:
171        assert(0);
172     }
173  }
174  
175  
176  /**
177   * Build LLVM code for texture coord wrapping, for linear filtering,
178   * for scaled integer texcoords.
179   * \param block_length  is the length of the pixel block along the
180   *                      coordinate axis
181   * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
182   * \param length  the texture size along one dimension
183   * \param stride  pixel stride along the coordinate axis (in bytes)
184   * \param is_pot  if TRUE, length is a power of two
185   * \param wrap_mode  one of PIPE_TEX_WRAP_x
186   * \param offset0  resulting relative offset for coord0
187   * \param offset1  resulting relative offset for coord0 + 1
188   * \param i0  resulting sub-block pixel coordinate for coord0
189   * \param i1  resulting sub-block pixel coordinate for coord0 + 1
190   */
191  static void
lp_build_sample_wrap_linear_int(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord0,LLVMValueRef * weight_i,LLVMValueRef coord_f,LLVMValueRef length,LLVMValueRef stride,boolean is_pot,unsigned wrap_mode,LLVMValueRef * offset0,LLVMValueRef * offset1,LLVMValueRef * i0,LLVMValueRef * i1)192  lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
193                                  unsigned block_length,
194                                  LLVMValueRef coord0,
195                                  LLVMValueRef *weight_i,
196                                  LLVMValueRef coord_f,
197                                  LLVMValueRef length,
198                                  LLVMValueRef stride,
199                                  boolean is_pot,
200                                  unsigned wrap_mode,
201                                  LLVMValueRef *offset0,
202                                  LLVMValueRef *offset1,
203                                  LLVMValueRef *i0,
204                                  LLVMValueRef *i1)
205  {
206     struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
207     LLVMBuilderRef builder = bld->gallivm->builder;
208     LLVMValueRef length_minus_one;
209     LLVMValueRef lmask, umask, mask;
210  
211     /*
212      * If the pixel block covers more than one pixel then there is no easy
213      * way to calculate offset1 relative to offset0. Instead, compute them
214      * independently. Otherwise, try to compute offset0 and offset1 with
215      * a single stride multiplication.
216      */
217  
218     length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
219  
220     if (block_length != 1) {
221        LLVMValueRef coord1;
222        switch(wrap_mode) {
223        case PIPE_TEX_WRAP_REPEAT:
224           if (is_pot) {
225              coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
226              coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
227              coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
228           }
229           else {
230              LLVMValueRef mask;
231              LLVMValueRef weight;
232              LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
233              lp_build_coord_repeat_npot_linear(bld, coord_f,
234                                                length, length_f,
235                                                &coord0, &weight);
236              mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
237                                      PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
238              coord1 = LLVMBuildAnd(builder,
239                                    lp_build_add(int_coord_bld, coord0,
240                                                 int_coord_bld->one),
241                                    mask, "");
242              weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
243              *weight_i = lp_build_itrunc(&bld->coord_bld, weight);
244           }
245           break;
246  
247        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
248           coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
249           coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
250                                  length_minus_one);
251           coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
252                                  length_minus_one);
253           break;
254  
255        case PIPE_TEX_WRAP_CLAMP:
256        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
257        case PIPE_TEX_WRAP_MIRROR_REPEAT:
258        case PIPE_TEX_WRAP_MIRROR_CLAMP:
259        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
260        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
261        default:
262           assert(0);
263           coord0 = int_coord_bld->zero;
264           coord1 = int_coord_bld->zero;
265           break;
266        }
267        lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
268                                       offset0, i0);
269        lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
270                                       offset1, i1);
271        return;
272     }
273  
274     *i0 = int_coord_bld->zero;
275     *i1 = int_coord_bld->zero;
276  
277     switch(wrap_mode) {
278     case PIPE_TEX_WRAP_REPEAT:
279        if (is_pot) {
280           coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
281        }
282        else {
283           LLVMValueRef weight;
284           LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
285           lp_build_coord_repeat_npot_linear(bld, coord_f,
286                                             length, length_f,
287                                             &coord0, &weight);
288           weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
289           *weight_i = lp_build_itrunc(&bld->coord_bld, weight);
290        }
291  
292        mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
293                                PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
294  
295        *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
296        *offset1 = LLVMBuildAnd(builder,
297                                lp_build_add(int_coord_bld, *offset0, stride),
298                                mask, "");
299        break;
300  
301     case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
302        /* XXX this might be slower than the separate path
303         * on some newer cpus. With sse41 this is 8 instructions vs. 7
304         * - at least on SNB this is almost certainly slower since
305         * min/max are cheaper than selects, and the muls aren't bad.
306         */
307        lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
308                                 PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
309        umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
310                                 PIPE_FUNC_LESS, coord0, length_minus_one);
311  
312        coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
313        coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
314  
315        mask = LLVMBuildAnd(builder, lmask, umask, "");
316  
317        *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
318        *offset1 = lp_build_add(int_coord_bld,
319                                *offset0,
320                                LLVMBuildAnd(builder, stride, mask, ""));
321        break;
322  
323     case PIPE_TEX_WRAP_CLAMP:
324     case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
325     case PIPE_TEX_WRAP_MIRROR_REPEAT:
326     case PIPE_TEX_WRAP_MIRROR_CLAMP:
327     case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
328     case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
329     default:
330        assert(0);
331        *offset0 = int_coord_bld->zero;
332        *offset1 = int_coord_bld->zero;
333        break;
334     }
335  }
336  
337  
338  /**
339   * Build LLVM code for texture coord wrapping, for linear filtering,
340   * for float texcoords.
341   * \param block_length  is the length of the pixel block along the
342   *                      coordinate axis
343   * \param coord  the incoming texcoord (s,t,r or q)
344   * \param length  the texture size along one dimension
345   * \param is_pot  if TRUE, length is a power of two
346   * \param wrap_mode  one of PIPE_TEX_WRAP_x
347   * \param coord0  the first texcoord after wrapping, as int
348   * \param coord1  the second texcoord after wrapping, as int
349   * \param weight  the filter weight as int (0-255)
350   * \param force_nearest  if this coord actually uses nearest filtering
351   */
352  static void
lp_build_sample_wrap_linear_float(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef length,boolean is_pot,unsigned wrap_mode,LLVMValueRef * coord0,LLVMValueRef * coord1,LLVMValueRef * weight,unsigned force_nearest)353  lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
354                                    unsigned block_length,
355                                    LLVMValueRef coord,
356                                    LLVMValueRef length,
357                                    boolean is_pot,
358                                    unsigned wrap_mode,
359                                    LLVMValueRef *coord0,
360                                    LLVMValueRef *coord1,
361                                    LLVMValueRef *weight,
362                                    unsigned force_nearest)
363  {
364     struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
365     struct lp_build_context *coord_bld = &bld->coord_bld;
366     LLVMBuilderRef builder = bld->gallivm->builder;
367     LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
368     LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
369  
370     switch(wrap_mode) {
371     case PIPE_TEX_WRAP_REPEAT:
372        if (is_pot) {
373           /* mul by size and subtract 0.5 */
374           coord = lp_build_mul(coord_bld, coord, length);
375           if (!force_nearest)
376              coord = lp_build_sub(coord_bld, coord, half);
377           *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
378           /* convert to int, compute lerp weight */
379           lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
380           *coord1 = lp_build_ifloor(coord_bld, *coord1);
381           /* repeat wrap */
382           length_minus_one = lp_build_itrunc(coord_bld, length_minus_one);
383           *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, "");
384           *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, "");
385        }
386        else {
387           LLVMValueRef mask;
388           /* wrap with normalized floats is just fract */
389           coord = lp_build_fract(coord_bld, coord);
390           /* unnormalize */
391           coord = lp_build_mul(coord_bld, coord, length);
392           /*
393            * we avoided the 0.5/length division, have to fix up wrong
394            * edge cases with selects
395            */
396           *coord1 = lp_build_add(coord_bld, coord, half);
397           coord = lp_build_sub(coord_bld, coord, half);
398           *weight = lp_build_fract(coord_bld, coord);
399           mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
400                                   PIPE_FUNC_LESS, coord, coord_bld->zero);
401           *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
402           *coord0 = lp_build_itrunc(coord_bld, *coord0);
403           mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
404                                   PIPE_FUNC_LESS, *coord1, length);
405           *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero);
406           *coord1 = lp_build_itrunc(coord_bld, *coord1);
407        }
408        break;
409     case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
410        if (bld->static_state->normalized_coords) {
411           /* mul by tex size */
412           coord = lp_build_mul(coord_bld, coord, length);
413        }
414        /* subtract 0.5 */
415        if (!force_nearest) {
416           coord = lp_build_sub(coord_bld, coord, half);
417        }
418        /* clamp to [0, length - 1] */
419        coord = lp_build_min(coord_bld, coord, length_minus_one);
420        coord = lp_build_max(coord_bld, coord, coord_bld->zero);
421        *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
422        /* convert to int, compute lerp weight */
423        lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
424        /* coord1 = min(coord1, length-1) */
425        *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one);
426        *coord1 = lp_build_itrunc(coord_bld, *coord1);
427        break;
428     default:
429        assert(0);
430        *coord0 = int_coord_bld->zero;
431        *coord1 = int_coord_bld->zero;
432        *weight = coord_bld->zero;
433        break;
434     }
435     *weight = lp_build_mul_imm(coord_bld, *weight, 256);
436     *weight = lp_build_itrunc(coord_bld, *weight);
437     return;
438  }
439  
440  
441  /**
442   * Fetch texels for image with nearest sampling.
443   * Return filtered color as two vectors of 16-bit fixed point values.
444   */
445  static void
lp_build_sample_fetch_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef data_ptr,LLVMValueRef offset,LLVMValueRef x_subcoord,LLVMValueRef y_subcoord,LLVMValueRef * colors_lo,LLVMValueRef * colors_hi)446  lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
447                                      LLVMValueRef data_ptr,
448                                      LLVMValueRef offset,
449                                      LLVMValueRef x_subcoord,
450                                      LLVMValueRef y_subcoord,
451                                      LLVMValueRef *colors_lo,
452                                      LLVMValueRef *colors_hi)
453  {
454     /*
455      * Fetch the pixels as 4 x 32bit (rgba order might differ):
456      *
457      *   rgba0 rgba1 rgba2 rgba3
458      *
459      * bit cast them into 16 x u8
460      *
461      *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
462      *
463      * unpack them into two 8 x i16:
464      *
465      *   r0 g0 b0 a0 r1 g1 b1 a1
466      *   r2 g2 b2 a2 r3 g3 b3 a3
467      *
468      * The higher 8 bits of the resulting elements will be zero.
469      */
470     LLVMBuilderRef builder = bld->gallivm->builder;
471     LLVMValueRef rgba8;
472     struct lp_build_context h16, u8n;
473     LLVMTypeRef u8n_vec_type;
474  
475     lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
476     lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
477     u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
478  
479     if (util_format_is_rgba8_variant(bld->format_desc)) {
480        /*
481         * Given the format is a rgba8, just read the pixels as is,
482         * without any swizzling. Swizzling will be done later.
483         */
484        rgba8 = lp_build_gather(bld->gallivm,
485                                bld->texel_type.length,
486                                bld->format_desc->block.bits,
487                                bld->texel_type.width,
488                                data_ptr, offset);
489  
490        rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
491     }
492     else {
493        rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
494                                        bld->format_desc,
495                                        u8n.type,
496                                        data_ptr, offset,
497                                        x_subcoord,
498                                        y_subcoord);
499     }
500  
501     /* Expand one 4*rgba8 to two 2*rgba16 */
502     lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
503                      rgba8,
504                      colors_lo, colors_hi);
505  }
506  
507  
508  /**
509   * Sample a single texture image with nearest sampling.
510   * If sampling a cube texture, r = cube face in [0,5].
511   * Return filtered color as two vectors of 16-bit fixed point values.
512   */
513  static void
lp_build_sample_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef * colors_lo,LLVMValueRef * colors_hi)514  lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
515                                LLVMValueRef int_size,
516                                LLVMValueRef row_stride_vec,
517                                LLVMValueRef img_stride_vec,
518                                LLVMValueRef data_ptr,
519                                LLVMValueRef s,
520                                LLVMValueRef t,
521                                LLVMValueRef r,
522                                LLVMValueRef *colors_lo,
523                                LLVMValueRef *colors_hi)
524  {
525     const unsigned dims = bld->dims;
526     LLVMBuilderRef builder = bld->gallivm->builder;
527     struct lp_build_context i32;
528     LLVMTypeRef i32_vec_type;
529     LLVMValueRef i32_c8;
530     LLVMValueRef width_vec, height_vec, depth_vec;
531     LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
532     LLVMValueRef s_float, t_float = NULL, r_float = NULL;
533     LLVMValueRef x_stride;
534     LLVMValueRef x_offset, offset;
535     LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
536  
537     lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
538  
539     i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
540  
541     lp_build_extract_image_sizes(bld,
542                                  bld->int_size_type,
543                                  bld->int_coord_type,
544                                  int_size,
545                                  &width_vec,
546                                  &height_vec,
547                                  &depth_vec);
548  
549     s_float = s; t_float = t; r_float = r;
550  
551     if (bld->static_state->normalized_coords) {
552        LLVMValueRef scaled_size;
553        LLVMValueRef flt_size;
554  
555        /* scale size by 256 (8 fractional bits) */
556        scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
557  
558        flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
559  
560        lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
561     }
562     else {
563        /* scale coords by 256 (8 fractional bits) */
564        s = lp_build_mul_imm(&bld->coord_bld, s, 256);
565        if (dims >= 2)
566           t = lp_build_mul_imm(&bld->coord_bld, t, 256);
567        if (dims >= 3)
568           r = lp_build_mul_imm(&bld->coord_bld, r, 256);
569     }
570  
571     /* convert float to int */
572     s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
573     if (dims >= 2)
574        t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
575     if (dims >= 3)
576        r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
577  
578     /* compute floor (shift right 8) */
579     i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
580     s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
581     if (dims >= 2)
582        t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
583     if (dims >= 3)
584        r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
585  
586     /* get pixel, row, image strides */
587     x_stride = lp_build_const_vec(bld->gallivm,
588                                   bld->int_coord_bld.type,
589                                   bld->format_desc->block.bits/8);
590  
591     /* Do texcoord wrapping, compute texel offset */
592     lp_build_sample_wrap_nearest_int(bld,
593                                      bld->format_desc->block.width,
594                                      s_ipart, s_float,
595                                      width_vec, x_stride,
596                                      bld->static_state->pot_width,
597                                      bld->static_state->wrap_s,
598                                      &x_offset, &x_subcoord);
599     offset = x_offset;
600     if (dims >= 2) {
601        LLVMValueRef y_offset;
602        lp_build_sample_wrap_nearest_int(bld,
603                                         bld->format_desc->block.height,
604                                         t_ipart, t_float,
605                                         height_vec, row_stride_vec,
606                                         bld->static_state->pot_height,
607                                         bld->static_state->wrap_t,
608                                         &y_offset, &y_subcoord);
609        offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
610        if (dims >= 3) {
611           LLVMValueRef z_offset;
612           lp_build_sample_wrap_nearest_int(bld,
613                                            1, /* block length (depth) */
614                                            r_ipart, r_float,
615                                            depth_vec, img_stride_vec,
616                                            bld->static_state->pot_depth,
617                                            bld->static_state->wrap_r,
618                                            &z_offset, &z_subcoord);
619           offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
620        }
621        else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
622           LLVMValueRef z_offset;
623           /* The r coord is the cube face in [0,5] */
624           z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
625           offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
626        }
627     }
628  
629     lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
630                                         x_subcoord, y_subcoord,
631                                         colors_lo, colors_hi);
632  }
633  
634  
635  /**
636   * Sample a single texture image with nearest sampling.
637   * If sampling a cube texture, r = cube face in [0,5].
638   * Return filtered color as two vectors of 16-bit fixed point values.
639   * Does address calcs (except offsets) with floats.
640   * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
641   */
642  static void
lp_build_sample_image_nearest_afloat(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef * colors_lo,LLVMValueRef * colors_hi)643  lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
644                                       LLVMValueRef int_size,
645                                       LLVMValueRef row_stride_vec,
646                                       LLVMValueRef img_stride_vec,
647                                       LLVMValueRef data_ptr,
648                                       LLVMValueRef s,
649                                       LLVMValueRef t,
650                                       LLVMValueRef r,
651                                       LLVMValueRef *colors_lo,
652                                       LLVMValueRef *colors_hi)
653     {
654     const unsigned dims = bld->dims;
655     LLVMValueRef width_vec, height_vec, depth_vec;
656     LLVMValueRef offset;
657     LLVMValueRef x_subcoord, y_subcoord;
658     LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL;
659     LLVMValueRef flt_size;
660  
661     flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
662  
663     lp_build_extract_image_sizes(bld,
664                                  bld->float_size_type,
665                                  bld->coord_type,
666                                  flt_size,
667                                  &width_vec,
668                                  &height_vec,
669                                  &depth_vec);
670  
671     /* Do texcoord wrapping */
672     lp_build_sample_wrap_nearest_float(bld,
673                                        s, width_vec,
674                                        bld->static_state->pot_width,
675                                        bld->static_state->wrap_s,
676                                        &x_icoord);
677  
678     if (dims >= 2) {
679        lp_build_sample_wrap_nearest_float(bld,
680                                           t, height_vec,
681                                           bld->static_state->pot_height,
682                                           bld->static_state->wrap_t,
683                                           &y_icoord);
684  
685        if (dims >= 3) {
686           lp_build_sample_wrap_nearest_float(bld,
687                                              r, depth_vec,
688                                              bld->static_state->pot_depth,
689                                              bld->static_state->wrap_r,
690                                              &z_icoord);
691        }
692        else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
693           z_icoord = r;
694        }
695     }
696  
697     /*
698      * From here on we deal with ints, and we should split up the 256bit
699      * vectors manually for better generated code.
700      */
701  
702     /*
703      * compute texel offsets -
704      * cannot do offset calc with floats, difficult for block-based formats,
705      * and not enough precision anyway.
706      */
707     lp_build_sample_offset(&bld->int_coord_bld,
708                            bld->format_desc,
709                            x_icoord, y_icoord,
710                            z_icoord,
711                            row_stride_vec, img_stride_vec,
712                            &offset,
713                            &x_subcoord, &y_subcoord);
714  
715     lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
716                                         x_subcoord, y_subcoord,
717                                         colors_lo, colors_hi);
718  }
719  
720  
721  /**
722   * Fetch texels for image with linear sampling.
723   * Return filtered color as two vectors of 16-bit fixed point values.
724   */
725  static void
lp_build_sample_fetch_image_linear(struct lp_build_sample_context * bld,LLVMValueRef data_ptr,LLVMValueRef offset[2][2][2],LLVMValueRef x_subcoord[2],LLVMValueRef y_subcoord[2],LLVMValueRef s_fpart,LLVMValueRef t_fpart,LLVMValueRef r_fpart,LLVMValueRef * colors_lo,LLVMValueRef * colors_hi)726  lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
727                                     LLVMValueRef data_ptr,
728                                     LLVMValueRef offset[2][2][2],
729                                     LLVMValueRef x_subcoord[2],
730                                     LLVMValueRef y_subcoord[2],
731                                     LLVMValueRef s_fpart,
732                                     LLVMValueRef t_fpart,
733                                     LLVMValueRef r_fpart,
734                                     LLVMValueRef *colors_lo,
735                                     LLVMValueRef *colors_hi)
736  {
737     const unsigned dims = bld->dims;
738     LLVMBuilderRef builder = bld->gallivm->builder;
739     struct lp_build_context h16, u8n;
740     LLVMTypeRef h16_vec_type, u8n_vec_type;
741     LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
742     LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
743     LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
744     LLVMValueRef shuffle_lo, shuffle_hi;
745     LLVMValueRef s_fpart_lo, s_fpart_hi;
746     LLVMValueRef t_fpart_lo = NULL, t_fpart_hi = NULL;
747     LLVMValueRef r_fpart_lo = NULL, r_fpart_hi = NULL;
748     LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
749     LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
750     LLVMValueRef packed_lo, packed_hi;
751     unsigned i, j, k;
752     unsigned numj, numk;
753  
754     lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
755     lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
756     h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type);
757     u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
758  
759     /*
760      * Transform 4 x i32 in
761      *
762      *   s_fpart = {s0, s1, s2, s3}
763      *
764      * into 8 x i16
765      *
766      *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
767      *
768      * into two 8 x i16
769      *
770      *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
771      *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
772      *
773      * and likewise for t_fpart. There is no risk of loosing precision here
774      * since the fractional parts only use the lower 8bits.
775      */
776     s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
777     if (dims >= 2)
778        t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
779     if (dims >= 3)
780        r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
781  
782     for (j = 0; j < h16.type.length; j += 4) {
783  #ifdef PIPE_ARCH_LITTLE_ENDIAN
784        unsigned subindex = 0;
785  #else
786        unsigned subindex = 1;
787  #endif
788        LLVMValueRef index;
789  
790        index = LLVMConstInt(elem_type, j/2 + subindex, 0);
791        for (i = 0; i < 4; ++i)
792           shuffles_lo[j + i] = index;
793  
794        index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
795        for (i = 0; i < 4; ++i)
796           shuffles_hi[j + i] = index;
797     }
798  
799     shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
800     shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
801  
802     s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
803                                         shuffle_lo, "");
804     s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
805                                         shuffle_hi, "");
806     if (dims >= 2) {
807        t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
808                                            shuffle_lo, "");
809        t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
810                                            shuffle_hi, "");
811     }
812     if (dims >= 3) {
813        r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
814                                            shuffle_lo, "");
815        r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
816                                            shuffle_hi, "");
817     }
818  
819     /*
820      * Fetch the pixels as 4 x 32bit (rgba order might differ):
821      *
822      *   rgba0 rgba1 rgba2 rgba3
823      *
824      * bit cast them into 16 x u8
825      *
826      *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
827      *
828      * unpack them into two 8 x i16:
829      *
830      *   r0 g0 b0 a0 r1 g1 b1 a1
831      *   r2 g2 b2 a2 r3 g3 b3 a3
832      *
833      * The higher 8 bits of the resulting elements will be zero.
834      */
835     numj = 1 + (dims >= 2);
836     numk = 1 + (dims >= 3);
837  
838     for (k = 0; k < numk; k++) {
839        for (j = 0; j < numj; j++) {
840           for (i = 0; i < 2; i++) {
841              LLVMValueRef rgba8;
842  
843              if (util_format_is_rgba8_variant(bld->format_desc)) {
844                 /*
845                  * Given the format is a rgba8, just read the pixels as is,
846                  * without any swizzling. Swizzling will be done later.
847                  */
848                 rgba8 = lp_build_gather(bld->gallivm,
849                                         bld->texel_type.length,
850                                         bld->format_desc->block.bits,
851                                         bld->texel_type.width,
852                                         data_ptr, offset[k][j][i]);
853  
854                 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
855              }
856              else {
857                 rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
858                                                 bld->format_desc,
859                                                 u8n.type,
860                                                 data_ptr, offset[k][j][i],
861                                                 x_subcoord[i],
862                                                 y_subcoord[j]);
863              }
864  
865              /* Expand one 4*rgba8 to two 2*rgba16 */
866              lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
867                               rgba8,
868                               &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
869           }
870        }
871     }
872  
873     /*
874      * Linear interpolation with 8.8 fixed point.
875      */
876     if (bld->static_state->force_nearest_s) {
877        /* special case 1-D lerp */
878        packed_lo = lp_build_lerp(&h16,
879                                  t_fpart_lo,
880                                  neighbors_lo[0][0][0],
881                                  neighbors_lo[0][0][1]);
882  
883        packed_hi = lp_build_lerp(&h16,
884                                  t_fpart_hi,
885                                  neighbors_hi[0][1][0],
886                                  neighbors_hi[0][1][0]);
887     }
888     else if (bld->static_state->force_nearest_t) {
889        /* special case 1-D lerp */
890        packed_lo = lp_build_lerp(&h16,
891                                  s_fpart_lo,
892                                  neighbors_lo[0][0][0],
893                                  neighbors_lo[0][0][1]);
894  
895        packed_hi = lp_build_lerp(&h16,
896                                  s_fpart_hi,
897                                  neighbors_hi[0][0][0],
898                                  neighbors_hi[0][0][1]);
899     }
900     else {
901        /* general 1/2/3-D lerping */
902        if (dims == 1) {
903           packed_lo = lp_build_lerp(&h16,
904                                     s_fpart_lo,
905                                     neighbors_lo[0][0][0],
906                                     neighbors_lo[0][0][1]);
907  
908           packed_hi = lp_build_lerp(&h16,
909                                     s_fpart_hi,
910                                     neighbors_hi[0][0][0],
911                                     neighbors_hi[0][0][1]);
912        }
913        else {
914           /* 2-D lerp */
915           packed_lo = lp_build_lerp_2d(&h16,
916                                        s_fpart_lo, t_fpart_lo,
917                                        neighbors_lo[0][0][0],
918                                        neighbors_lo[0][0][1],
919                                        neighbors_lo[0][1][0],
920                                        neighbors_lo[0][1][1]);
921  
922           packed_hi = lp_build_lerp_2d(&h16,
923                                        s_fpart_hi, t_fpart_hi,
924                                        neighbors_hi[0][0][0],
925                                        neighbors_hi[0][0][1],
926                                        neighbors_hi[0][1][0],
927                                        neighbors_hi[0][1][1]);
928  
929           if (dims >= 3) {
930              LLVMValueRef packed_lo2, packed_hi2;
931  
932              /* lerp in the second z slice */
933              packed_lo2 = lp_build_lerp_2d(&h16,
934                                            s_fpart_lo, t_fpart_lo,
935                                            neighbors_lo[1][0][0],
936                                            neighbors_lo[1][0][1],
937                                            neighbors_lo[1][1][0],
938                                            neighbors_lo[1][1][1]);
939  
940              packed_hi2 = lp_build_lerp_2d(&h16,
941                                            s_fpart_hi, t_fpart_hi,
942                                            neighbors_hi[1][0][0],
943                                            neighbors_hi[1][0][1],
944                                            neighbors_hi[1][1][0],
945                                            neighbors_hi[1][1][1]);
946              /* interp between two z slices */
947              packed_lo = lp_build_lerp(&h16, r_fpart_lo,
948                                        packed_lo, packed_lo2);
949              packed_hi = lp_build_lerp(&h16, r_fpart_hi,
950                                        packed_hi, packed_hi2);
951           }
952        }
953     }
954  
955     *colors_lo = packed_lo;
956     *colors_hi = packed_hi;
957  }
958  
959  /**
960   * Sample a single texture image with (bi-)(tri-)linear sampling.
961   * Return filtered color as two vectors of 16-bit fixed point values.
962   */
963  static void
lp_build_sample_image_linear(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef * colors_lo,LLVMValueRef * colors_hi)964  lp_build_sample_image_linear(struct lp_build_sample_context *bld,
965                               LLVMValueRef int_size,
966                               LLVMValueRef row_stride_vec,
967                               LLVMValueRef img_stride_vec,
968                               LLVMValueRef data_ptr,
969                               LLVMValueRef s,
970                               LLVMValueRef t,
971                               LLVMValueRef r,
972                               LLVMValueRef *colors_lo,
973                               LLVMValueRef *colors_hi)
974  {
975     const unsigned dims = bld->dims;
976     LLVMBuilderRef builder = bld->gallivm->builder;
977     struct lp_build_context i32;
978     LLVMTypeRef i32_vec_type;
979     LLVMValueRef i32_c8, i32_c128, i32_c255;
980     LLVMValueRef width_vec, height_vec, depth_vec;
981     LLVMValueRef s_ipart, s_fpart, s_float;
982     LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
983     LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
984     LLVMValueRef x_stride, y_stride, z_stride;
985     LLVMValueRef x_offset0, x_offset1;
986     LLVMValueRef y_offset0, y_offset1;
987     LLVMValueRef z_offset0, z_offset1;
988     LLVMValueRef offset[2][2][2]; /* [z][y][x] */
989     LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
990     unsigned x, y, z;
991  
992     lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
993  
994     i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
995  
996     lp_build_extract_image_sizes(bld,
997                                  bld->int_size_type,
998                                  bld->int_coord_type,
999                                  int_size,
1000                                  &width_vec,
1001                                  &height_vec,
1002                                  &depth_vec);
1003  
1004     s_float = s; t_float = t; r_float = r;
1005  
1006     if (bld->static_state->normalized_coords) {
1007        LLVMValueRef scaled_size;
1008        LLVMValueRef flt_size;
1009  
1010        /* scale size by 256 (8 fractional bits) */
1011        scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
1012  
1013        flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
1014  
1015        lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
1016     }
1017     else {
1018        /* scale coords by 256 (8 fractional bits) */
1019        s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1020        if (dims >= 2)
1021           t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1022        if (dims >= 3)
1023           r = lp_build_mul_imm(&bld->coord_bld, r, 256);
1024     }
1025  
1026     /* convert float to int */
1027     s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1028     if (dims >= 2)
1029        t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1030     if (dims >= 3)
1031        r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
1032  
1033     /* subtract 0.5 (add -128) */
1034     i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
1035     if (!bld->static_state->force_nearest_s) {
1036        s = LLVMBuildAdd(builder, s, i32_c128, "");
1037     }
1038     if (dims >= 2 && !bld->static_state->force_nearest_t) {
1039        t = LLVMBuildAdd(builder, t, i32_c128, "");
1040     }
1041     if (dims >= 3) {
1042        r = LLVMBuildAdd(builder, r, i32_c128, "");
1043     }
1044  
1045     /* compute floor (shift right 8) */
1046     i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
1047     s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1048     if (dims >= 2)
1049        t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1050     if (dims >= 3)
1051        r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
1052  
1053     /* compute fractional part (AND with 0xff) */
1054     i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
1055     s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1056     if (dims >= 2)
1057        t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1058     if (dims >= 3)
1059        r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
1060  
1061     /* get pixel, row and image strides */
1062     x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
1063                                   bld->format_desc->block.bits/8);
1064     y_stride = row_stride_vec;
1065     z_stride = img_stride_vec;
1066  
1067     /* do texcoord wrapping and compute texel offsets */
1068     lp_build_sample_wrap_linear_int(bld,
1069                                     bld->format_desc->block.width,
1070                                     s_ipart, &s_fpart, s_float,
1071                                     width_vec, x_stride,
1072                                     bld->static_state->pot_width,
1073                                     bld->static_state->wrap_s,
1074                                     &x_offset0, &x_offset1,
1075                                     &x_subcoord[0], &x_subcoord[1]);
1076     for (z = 0; z < 2; z++) {
1077        for (y = 0; y < 2; y++) {
1078           offset[z][y][0] = x_offset0;
1079           offset[z][y][1] = x_offset1;
1080        }
1081     }
1082  
1083     if (dims >= 2) {
1084        lp_build_sample_wrap_linear_int(bld,
1085                                        bld->format_desc->block.height,
1086                                        t_ipart, &t_fpart, t_float,
1087                                        height_vec, y_stride,
1088                                        bld->static_state->pot_height,
1089                                        bld->static_state->wrap_t,
1090                                        &y_offset0, &y_offset1,
1091                                        &y_subcoord[0], &y_subcoord[1]);
1092  
1093        for (z = 0; z < 2; z++) {
1094           for (x = 0; x < 2; x++) {
1095              offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1096                                             offset[z][0][x], y_offset0);
1097              offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1098                                             offset[z][1][x], y_offset1);
1099           }
1100        }
1101     }
1102  
1103     if (dims >= 3) {
1104        lp_build_sample_wrap_linear_int(bld,
1105                                        bld->format_desc->block.height,
1106                                        r_ipart, &r_fpart, r_float,
1107                                        depth_vec, z_stride,
1108                                        bld->static_state->pot_depth,
1109                                        bld->static_state->wrap_r,
1110                                        &z_offset0, &z_offset1,
1111                                        &z_subcoord[0], &z_subcoord[1]);
1112        for (y = 0; y < 2; y++) {
1113           for (x = 0; x < 2; x++) {
1114              offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1115                                             offset[0][y][x], z_offset0);
1116              offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1117                                             offset[1][y][x], z_offset1);
1118           }
1119        }
1120     }
1121     else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1122        LLVMValueRef z_offset;
1123        z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1124        for (y = 0; y < 2; y++) {
1125           for (x = 0; x < 2; x++) {
1126              /* The r coord is the cube face in [0,5] */
1127              offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1128                                             offset[0][y][x], z_offset);
1129           }
1130        }
1131     }
1132  
1133     lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1134                                        x_subcoord, y_subcoord,
1135                                        s_fpart, t_fpart, r_fpart,
1136                                        colors_lo, colors_hi);
1137  }
1138  
1139  
1140  /**
1141   * Sample a single texture image with (bi-)(tri-)linear sampling.
1142   * Return filtered color as two vectors of 16-bit fixed point values.
1143   * Does address calcs (except offsets) with floats.
1144   * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
1145   */
1146  static void
lp_build_sample_image_linear_afloat(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef * colors_lo,LLVMValueRef * colors_hi)1147  lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
1148                                      LLVMValueRef int_size,
1149                                      LLVMValueRef row_stride_vec,
1150                                      LLVMValueRef img_stride_vec,
1151                                      LLVMValueRef data_ptr,
1152                                      LLVMValueRef s,
1153                                      LLVMValueRef t,
1154                                      LLVMValueRef r,
1155                                      LLVMValueRef *colors_lo,
1156                                      LLVMValueRef *colors_hi)
1157  {
1158     const unsigned dims = bld->dims;
1159     LLVMValueRef width_vec, height_vec, depth_vec;
1160     LLVMValueRef s_fpart;
1161     LLVMValueRef t_fpart = NULL;
1162     LLVMValueRef r_fpart = NULL;
1163     LLVMValueRef x_stride, y_stride, z_stride;
1164     LLVMValueRef x_offset0, x_offset1;
1165     LLVMValueRef y_offset0, y_offset1;
1166     LLVMValueRef z_offset0, z_offset1;
1167     LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1168     LLVMValueRef x_subcoord[2], y_subcoord[2];
1169     LLVMValueRef flt_size;
1170     LLVMValueRef x_icoord0, x_icoord1;
1171     LLVMValueRef y_icoord0, y_icoord1;
1172     LLVMValueRef z_icoord0, z_icoord1;
1173     unsigned x, y, z;
1174  
1175     flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
1176  
1177     lp_build_extract_image_sizes(bld,
1178                                  bld->float_size_type,
1179                                  bld->coord_type,
1180                                  flt_size,
1181                                  &width_vec,
1182                                  &height_vec,
1183                                  &depth_vec);
1184  
1185     /* do texcoord wrapping and compute texel offsets */
1186     lp_build_sample_wrap_linear_float(bld,
1187                                       bld->format_desc->block.width,
1188                                       s, width_vec,
1189                                       bld->static_state->pot_width,
1190                                       bld->static_state->wrap_s,
1191                                       &x_icoord0, &x_icoord1,
1192                                       &s_fpart,
1193                                       bld->static_state->force_nearest_s);
1194  
1195     if (dims >= 2) {
1196        lp_build_sample_wrap_linear_float(bld,
1197                                          bld->format_desc->block.height,
1198                                          t, height_vec,
1199                                          bld->static_state->pot_height,
1200                                          bld->static_state->wrap_t,
1201                                          &y_icoord0, &y_icoord1,
1202                                          &t_fpart,
1203                                          bld->static_state->force_nearest_t);
1204  
1205        if (dims >= 3) {
1206           lp_build_sample_wrap_linear_float(bld,
1207                                             bld->format_desc->block.height,
1208                                             r, depth_vec,
1209                                             bld->static_state->pot_depth,
1210                                             bld->static_state->wrap_r,
1211                                             &z_icoord0, &z_icoord1,
1212                                             &r_fpart, 0);
1213        }
1214     }
1215  
1216     /*
1217      * From here on we deal with ints, and we should split up the 256bit
1218      * vectors manually for better generated code.
1219      */
1220  
1221     /* get pixel, row and image strides */
1222     x_stride = lp_build_const_vec(bld->gallivm,
1223                                   bld->int_coord_bld.type,
1224                                   bld->format_desc->block.bits/8);
1225     y_stride = row_stride_vec;
1226     z_stride = img_stride_vec;
1227  
1228     /*
1229      * compute texel offset -
1230      * cannot do offset calc with floats, difficult for block-based formats,
1231      * and not enough precision anyway.
1232      */
1233     lp_build_sample_partial_offset(&bld->int_coord_bld,
1234                                    bld->format_desc->block.width,
1235                                    x_icoord0, x_stride,
1236                                    &x_offset0, &x_subcoord[0]);
1237     lp_build_sample_partial_offset(&bld->int_coord_bld,
1238                                    bld->format_desc->block.width,
1239                                    x_icoord1, x_stride,
1240                                    &x_offset1, &x_subcoord[1]);
1241     for (z = 0; z < 2; z++) {
1242        for (y = 0; y < 2; y++) {
1243           offset[z][y][0] = x_offset0;
1244           offset[z][y][1] = x_offset1;
1245        }
1246     }
1247  
1248     if (dims >= 2) {
1249        lp_build_sample_partial_offset(&bld->int_coord_bld,
1250                                       bld->format_desc->block.height,
1251                                       y_icoord0, y_stride,
1252                                       &y_offset0, &y_subcoord[0]);
1253        lp_build_sample_partial_offset(&bld->int_coord_bld,
1254                                       bld->format_desc->block.height,
1255                                       y_icoord1, y_stride,
1256                                       &y_offset1, &y_subcoord[1]);
1257        for (z = 0; z < 2; z++) {
1258           for (x = 0; x < 2; x++) {
1259              offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1260                                             offset[z][0][x], y_offset0);
1261              offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1262                                             offset[z][1][x], y_offset1);
1263           }
1264        }
1265     }
1266  
1267     if (dims >= 3) {
1268        LLVMValueRef z_subcoord[2];
1269        lp_build_sample_partial_offset(&bld->int_coord_bld,
1270                                       1,
1271                                       z_icoord0, z_stride,
1272                                       &z_offset0, &z_subcoord[0]);
1273        lp_build_sample_partial_offset(&bld->int_coord_bld,
1274                                       1,
1275                                       z_icoord1, z_stride,
1276                                       &z_offset1, &z_subcoord[1]);
1277        for (y = 0; y < 2; y++) {
1278           for (x = 0; x < 2; x++) {
1279              offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1280                                             offset[0][y][x], z_offset0);
1281              offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1282                                             offset[1][y][x], z_offset1);
1283           }
1284        }
1285     }
1286     else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1287        LLVMValueRef z_offset;
1288        z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1289        for (y = 0; y < 2; y++) {
1290           for (x = 0; x < 2; x++) {
1291              /* The r coord is the cube face in [0,5] */
1292              offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1293                                             offset[0][y][x], z_offset);
1294           }
1295        }
1296     }
1297  
1298     lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1299                                        x_subcoord, y_subcoord,
1300                                        s_fpart, t_fpart, r_fpart,
1301                                        colors_lo, colors_hi);
1302  }
1303  
1304  
1305  /**
1306   * Sample the texture/mipmap using given image filter and mip filter.
1307   * data0_ptr and data1_ptr point to the two mipmap levels to sample
1308   * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1309   * If we're using nearest miplevel sampling the '1' values will be null/unused.
1310   */
1311  static void
lp_build_sample_mipmap(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef colors_lo_var,LLVMValueRef colors_hi_var)1312  lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1313                         unsigned img_filter,
1314                         unsigned mip_filter,
1315                         LLVMValueRef s,
1316                         LLVMValueRef t,
1317                         LLVMValueRef r,
1318                         LLVMValueRef ilevel0,
1319                         LLVMValueRef ilevel1,
1320                         LLVMValueRef lod_fpart,
1321                         LLVMValueRef colors_lo_var,
1322                         LLVMValueRef colors_hi_var)
1323  {
1324     LLVMBuilderRef builder = bld->gallivm->builder;
1325     LLVMValueRef size0;
1326     LLVMValueRef size1;
1327     LLVMValueRef row_stride0_vec = NULL;
1328     LLVMValueRef row_stride1_vec = NULL;
1329     LLVMValueRef img_stride0_vec = NULL;
1330     LLVMValueRef img_stride1_vec = NULL;
1331     LLVMValueRef data_ptr0;
1332     LLVMValueRef data_ptr1;
1333     LLVMValueRef colors0_lo, colors0_hi;
1334     LLVMValueRef colors1_lo, colors1_hi;
1335  
1336     /* sample the first mipmap level */
1337     lp_build_mipmap_level_sizes(bld, ilevel0,
1338                                 &size0,
1339                                 &row_stride0_vec, &img_stride0_vec);
1340     data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1341     if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
1342        if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1343           lp_build_sample_image_nearest_afloat(bld,
1344                                                size0,
1345                                                row_stride0_vec, img_stride0_vec,
1346                                                data_ptr0, s, t, r,
1347                                                &colors0_lo, &colors0_hi);
1348        }
1349        else {
1350           assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1351           lp_build_sample_image_linear_afloat(bld,
1352                                               size0,
1353                                               row_stride0_vec, img_stride0_vec,
1354                                               data_ptr0, s, t, r,
1355                                               &colors0_lo, &colors0_hi);
1356        }
1357     }
1358     else {
1359        if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1360           lp_build_sample_image_nearest(bld,
1361                                         size0,
1362                                         row_stride0_vec, img_stride0_vec,
1363                                         data_ptr0, s, t, r,
1364                                         &colors0_lo, &colors0_hi);
1365        }
1366        else {
1367           assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1368           lp_build_sample_image_linear(bld,
1369                                        size0,
1370                                        row_stride0_vec, img_stride0_vec,
1371                                        data_ptr0, s, t, r,
1372                                        &colors0_lo, &colors0_hi);
1373        }
1374     }
1375  
1376     /* Store the first level's colors in the output variables */
1377     LLVMBuildStore(builder, colors0_lo, colors_lo_var);
1378     LLVMBuildStore(builder, colors0_hi, colors_hi_var);
1379  
1380     if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1381        LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
1382                                                       bld->perquadf_bld.type, 256.0);
1383        LLVMTypeRef i32vec_type = lp_build_vec_type(bld->gallivm, bld->perquadi_bld.type);
1384        struct lp_build_if_state if_ctx;
1385        LLVMValueRef need_lerp;
1386        unsigned num_quads = bld->coord_bld.type.length / 4;
1387        unsigned i;
1388  
1389        lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1390        lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1391  
1392        /* need_lerp = lod_fpart > 0 */
1393        if (num_quads == 1) {
1394           need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1395                                     lod_fpart, bld->perquadi_bld.zero,
1396                                     "need_lerp");
1397        }
1398        else {
1399           /*
1400            * We'll do mip filtering if any of the quads need it.
1401            * It might be better to split the vectors here and only fetch/filter
1402            * quads which need it.
1403            */
1404           /*
1405            * We need to clamp lod_fpart here since we can get negative
1406            * values which would screw up filtering if not all
1407            * lod_fpart values have same sign.
1408            * We can however then skip the greater than comparison.
1409            */
1410           lod_fpart = lp_build_max(&bld->perquadi_bld, lod_fpart,
1411                                    bld->perquadi_bld.zero);
1412           need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads, lod_fpart);
1413        }
1414  
1415        lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1416        {
1417           struct lp_build_context h16_bld;
1418  
1419           lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
1420  
1421           /* sample the second mipmap level */
1422           lp_build_mipmap_level_sizes(bld, ilevel1,
1423                                       &size1,
1424                                       &row_stride1_vec, &img_stride1_vec);
1425           data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1426  
1427           if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
1428              if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1429                 lp_build_sample_image_nearest_afloat(bld,
1430                                                      size1,
1431                                                      row_stride1_vec, img_stride1_vec,
1432                                                      data_ptr1, s, t, r,
1433                                                      &colors1_lo, &colors1_hi);
1434              }
1435              else {
1436                 lp_build_sample_image_linear_afloat(bld,
1437                                                     size1,
1438                                                     row_stride1_vec, img_stride1_vec,
1439                                                     data_ptr1, s, t, r,
1440                                                     &colors1_lo, &colors1_hi);
1441              }
1442           }
1443           else {
1444              if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1445                 lp_build_sample_image_nearest(bld,
1446                                               size1,
1447                                               row_stride1_vec, img_stride1_vec,
1448                                               data_ptr1, s, t, r,
1449                                               &colors1_lo, &colors1_hi);
1450              }
1451              else {
1452                 lp_build_sample_image_linear(bld,
1453                                              size1,
1454                                              row_stride1_vec, img_stride1_vec,
1455                                              data_ptr1, s, t, r,
1456                                              &colors1_lo, &colors1_hi);
1457              }
1458           }
1459  
1460           /* interpolate samples from the two mipmap levels */
1461  
1462           if (num_quads == 1) {
1463              lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
1464              lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
1465  
1466  #if HAVE_LLVM == 0x208
1467              /* This is a work-around for a bug in LLVM 2.8.
1468               * Evidently, something goes wrong in the construction of the
1469               * lod_fpart short[8] vector.  Adding this no-effect shuffle seems
1470               * to force the vector to be properly constructed.
1471               * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
1472               */
1473              {
1474                 LLVMValueRef shuffles[8], shuffle;
1475                 assert(h16_bld.type.length <= Elements(shuffles));
1476                 for (i = 0; i < h16_bld.type.length; i++)
1477                    shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1));
1478                 shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
1479                 lod_fpart = LLVMBuildShuffleVector(builder,
1480                                                    lod_fpart, lod_fpart,
1481                                                    shuffle, "");
1482              }
1483  #endif
1484  
1485              colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
1486                                         colors0_lo, colors1_lo);
1487              colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
1488                                         colors0_hi, colors1_hi);
1489           }
1490           else {
1491              LLVMValueRef lod_parts[LP_MAX_VECTOR_LENGTH/16];
1492              struct lp_type perquadi16_type = bld->perquadi_bld.type;
1493              perquadi16_type.width /= 2;
1494              perquadi16_type.length *= 2;
1495              lod_fpart = LLVMBuildBitCast(builder, lod_fpart,
1496                                           lp_build_vec_type(bld->gallivm,
1497                                                             perquadi16_type), "");
1498              /* XXX this only works for exactly 2 quads. More quads need shuffle */
1499              assert(num_quads == 2);
1500              for (i = 0; i < num_quads; i++) {
1501                 LLVMValueRef indexi2 = lp_build_const_int32(bld->gallivm, i*2);
1502                 lod_parts[i] = lp_build_extract_broadcast(bld->gallivm,
1503                                                           perquadi16_type,
1504                                                           h16_bld.type,
1505                                                           lod_fpart,
1506                                                           indexi2);
1507              }
1508              colors0_lo = lp_build_lerp(&h16_bld, lod_parts[0],
1509                                         colors0_lo, colors1_lo);
1510              colors0_hi = lp_build_lerp(&h16_bld, lod_parts[1],
1511                                         colors0_hi, colors1_hi);
1512           }
1513  
1514           LLVMBuildStore(builder, colors0_lo, colors_lo_var);
1515           LLVMBuildStore(builder, colors0_hi, colors_hi_var);
1516        }
1517        lp_build_endif(&if_ctx);
1518     }
1519  }
1520  
1521  
1522  
1523  /**
1524   * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
1525   * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
1526   * but only limited texture coord wrap modes.
1527   */
1528  void
lp_build_sample_aos(struct lp_build_sample_context * bld,unsigned unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef lod_ipart,LLVMValueRef lod_fpart,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef texel_out[4])1529  lp_build_sample_aos(struct lp_build_sample_context *bld,
1530                      unsigned unit,
1531                      LLVMValueRef s,
1532                      LLVMValueRef t,
1533                      LLVMValueRef r,
1534                      LLVMValueRef lod_ipart,
1535                      LLVMValueRef lod_fpart,
1536                      LLVMValueRef ilevel0,
1537                      LLVMValueRef ilevel1,
1538                      LLVMValueRef texel_out[4])
1539  {
1540     struct lp_build_context *int_bld = &bld->int_bld;
1541     LLVMBuilderRef builder = bld->gallivm->builder;
1542     const unsigned mip_filter = bld->static_state->min_mip_filter;
1543     const unsigned min_filter = bld->static_state->min_img_filter;
1544     const unsigned mag_filter = bld->static_state->mag_img_filter;
1545     const unsigned dims = bld->dims;
1546     LLVMValueRef packed, packed_lo, packed_hi;
1547     LLVMValueRef unswizzled[4];
1548     struct lp_build_context h16_bld;
1549  
1550     /* we only support the common/simple wrap modes at this time */
1551     assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
1552     if (dims >= 2)
1553        assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
1554     if (dims >= 3)
1555        assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
1556  
1557  
1558     /* make 16-bit fixed-pt builder context */
1559     lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
1560  
1561     /*
1562      * Get/interpolate texture colors.
1563      */
1564  
1565     packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo");
1566     packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi");
1567  
1568     if (min_filter == mag_filter) {
1569        /* no need to distinguish between minification and magnification */
1570        lp_build_sample_mipmap(bld,
1571                               min_filter, mip_filter,
1572                               s, t, r,
1573                               ilevel0, ilevel1, lod_fpart,
1574                               packed_lo, packed_hi);
1575     }
1576     else {
1577        /* Emit conditional to choose min image filter or mag image filter
1578         * depending on the lod being > 0 or <= 0, respectively.
1579         */
1580        struct lp_build_if_state if_ctx;
1581        LLVMValueRef minify;
1582  
1583        /* minify = lod >= 0.0 */
1584        minify = LLVMBuildICmp(builder, LLVMIntSGE,
1585                               lod_ipart, int_bld->zero, "");
1586  
1587        lp_build_if(&if_ctx, bld->gallivm, minify);
1588        {
1589           /* Use the minification filter */
1590           lp_build_sample_mipmap(bld,
1591                                  min_filter, mip_filter,
1592                                  s, t, r,
1593                                  ilevel0, ilevel1, lod_fpart,
1594                                  packed_lo, packed_hi);
1595        }
1596        lp_build_else(&if_ctx);
1597        {
1598           /* Use the magnification filter */
1599           lp_build_sample_mipmap(bld,
1600                                  mag_filter, PIPE_TEX_MIPFILTER_NONE,
1601                                  s, t, r,
1602                                  ilevel0, NULL, NULL,
1603                                  packed_lo, packed_hi);
1604        }
1605        lp_build_endif(&if_ctx);
1606     }
1607  
1608     /*
1609      * combine the values stored in 'packed_lo' and 'packed_hi' variables
1610      * into 'packed'
1611      */
1612     packed = lp_build_pack2(bld->gallivm,
1613                             h16_bld.type, lp_type_unorm(8, bld->vector_width),
1614                             LLVMBuildLoad(builder, packed_lo, ""),
1615                             LLVMBuildLoad(builder, packed_hi, ""));
1616  
1617     /*
1618      * Convert to SoA and swizzle.
1619      */
1620     lp_build_rgba8_to_f32_soa(bld->gallivm,
1621                               bld->texel_type,
1622                               packed, unswizzled);
1623  
1624     if (util_format_is_rgba8_variant(bld->format_desc)) {
1625        lp_build_format_swizzle_soa(bld->format_desc,
1626                                    &bld->texel_bld,
1627                                    unswizzled, texel_out);
1628     }
1629     else {
1630        texel_out[0] = unswizzled[0];
1631        texel_out[1] = unswizzled[1];
1632        texel_out[2] = unswizzled[2];
1633        texel_out[3] = unswizzled[3];
1634     }
1635  }
1636