1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Texture sampling -- AoS.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  * @author Brian Paul <brianp@vmware.com>
34  */
35 
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "util/u_cpu_detect.h"
44 #include "lp_bld_debug.h"
45 #include "lp_bld_type.h"
46 #include "lp_bld_const.h"
47 #include "lp_bld_conv.h"
48 #include "lp_bld_arit.h"
49 #include "lp_bld_bitarit.h"
50 #include "lp_bld_logic.h"
51 #include "lp_bld_swizzle.h"
52 #include "lp_bld_pack.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_gather.h"
55 #include "lp_bld_format.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_sample.h"
58 #include "lp_bld_sample_aos.h"
59 #include "lp_bld_quad.h"
60 
61 
62 /**
63  * Build LLVM code for texture coord wrapping, for nearest filtering,
64  * for scaled integer texcoords.
65  * \param block_length  is the length of the pixel block along the
66  *                      coordinate axis
67  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
68  * \param length  the texture size along one dimension
69  * \param stride  pixel stride along the coordinate axis (in bytes)
70  * \param is_pot  if TRUE, length is a power of two
71  * \param wrap_mode  one of PIPE_TEX_WRAP_x
72  * \param out_offset  byte offset for the wrapped coordinate
73  * \param out_i  resulting sub-block pixel coordinate for coord0
74  */
75 static void
lp_build_sample_wrap_nearest_int(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef coord_f,LLVMValueRef length,LLVMValueRef stride,boolean is_pot,unsigned wrap_mode,LLVMValueRef * out_offset,LLVMValueRef * out_i)76 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
77                                  unsigned block_length,
78                                  LLVMValueRef coord,
79                                  LLVMValueRef coord_f,
80                                  LLVMValueRef length,
81                                  LLVMValueRef stride,
82                                  boolean is_pot,
83                                  unsigned wrap_mode,
84                                  LLVMValueRef *out_offset,
85                                  LLVMValueRef *out_i)
86 {
87    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
88    LLVMBuilderRef builder = bld->gallivm->builder;
89    LLVMValueRef length_minus_one;
90 
91    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
92 
93    switch(wrap_mode) {
94    case PIPE_TEX_WRAP_REPEAT:
95       if(is_pot)
96          coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
97       else {
98          struct lp_build_context *coord_bld = &bld->coord_bld;
99          LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
100          coord = lp_build_fract_safe(coord_bld, coord_f);
101          coord = lp_build_mul(coord_bld, coord, length_f);
102          coord = lp_build_itrunc(coord_bld, coord);
103       }
104       break;
105 
106    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
107       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
108       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
109       break;
110 
111    case PIPE_TEX_WRAP_CLAMP:
112    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
113    case PIPE_TEX_WRAP_MIRROR_REPEAT:
114    case PIPE_TEX_WRAP_MIRROR_CLAMP:
115    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
116    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
117    default:
118       assert(0);
119    }
120 
121    lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
122                                   out_offset, out_i);
123 }
124 
125 
126 /**
127  * Build LLVM code for texture coord wrapping, for nearest filtering,
128  * for float texcoords.
129  * \param coord  the incoming texcoord (s,t,r or q)
130  * \param length  the texture size along one dimension
131  * \param is_pot  if TRUE, length is a power of two
132  * \param wrap_mode  one of PIPE_TEX_WRAP_x
133  * \param icoord  the texcoord after wrapping, as int
134  */
135 static void
lp_build_sample_wrap_nearest_float(struct lp_build_sample_context * bld,LLVMValueRef coord,LLVMValueRef length,boolean is_pot,unsigned wrap_mode,LLVMValueRef * icoord)136 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
137                                    LLVMValueRef coord,
138                                    LLVMValueRef length,
139                                    boolean is_pot,
140                                    unsigned wrap_mode,
141                                    LLVMValueRef *icoord)
142 {
143    struct lp_build_context *coord_bld = &bld->coord_bld;
144    LLVMValueRef length_minus_one;
145 
146    switch(wrap_mode) {
147    case PIPE_TEX_WRAP_REPEAT:
148       /* take fraction, unnormalize */
149       coord = lp_build_fract_safe(coord_bld, coord);
150       coord = lp_build_mul(coord_bld, coord, length);
151       *icoord = lp_build_itrunc(coord_bld, coord);
152       break;
153    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
154       length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
155       if (bld->static_state->normalized_coords) {
156          /* scale coord to length */
157          coord = lp_build_mul(coord_bld, coord, length);
158       }
159       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
160                              length_minus_one);
161       *icoord = lp_build_itrunc(coord_bld, coord);
162       break;
163 
164    case PIPE_TEX_WRAP_CLAMP:
165    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
166    case PIPE_TEX_WRAP_MIRROR_REPEAT:
167    case PIPE_TEX_WRAP_MIRROR_CLAMP:
168    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
169    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
170    default:
171       assert(0);
172    }
173 }
174 
175 
176 /**
177  * Build LLVM code for texture coord wrapping, for linear filtering,
178  * for scaled integer texcoords.
179  * \param block_length  is the length of the pixel block along the
180  *                      coordinate axis
181  * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
182  * \param length  the texture size along one dimension
183  * \param stride  pixel stride along the coordinate axis (in bytes)
184  * \param is_pot  if TRUE, length is a power of two
185  * \param wrap_mode  one of PIPE_TEX_WRAP_x
186  * \param offset0  resulting relative offset for coord0
187  * \param offset1  resulting relative offset for coord0 + 1
188  * \param i0  resulting sub-block pixel coordinate for coord0
189  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
190  */
191 static void
lp_build_sample_wrap_linear_int(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord0,LLVMValueRef * weight_i,LLVMValueRef coord_f,LLVMValueRef length,LLVMValueRef stride,boolean is_pot,unsigned wrap_mode,LLVMValueRef * offset0,LLVMValueRef * offset1,LLVMValueRef * i0,LLVMValueRef * i1)192 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
193                                 unsigned block_length,
194                                 LLVMValueRef coord0,
195                                 LLVMValueRef *weight_i,
196                                 LLVMValueRef coord_f,
197                                 LLVMValueRef length,
198                                 LLVMValueRef stride,
199                                 boolean is_pot,
200                                 unsigned wrap_mode,
201                                 LLVMValueRef *offset0,
202                                 LLVMValueRef *offset1,
203                                 LLVMValueRef *i0,
204                                 LLVMValueRef *i1)
205 {
206    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
207    LLVMBuilderRef builder = bld->gallivm->builder;
208    LLVMValueRef length_minus_one;
209    LLVMValueRef lmask, umask, mask;
210 
211    /*
212     * If the pixel block covers more than one pixel then there is no easy
213     * way to calculate offset1 relative to offset0. Instead, compute them
214     * independently. Otherwise, try to compute offset0 and offset1 with
215     * a single stride multiplication.
216     */
217 
218    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
219 
220    if (block_length != 1) {
221       LLVMValueRef coord1;
222       switch(wrap_mode) {
223       case PIPE_TEX_WRAP_REPEAT:
224          if (is_pot) {
225             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
226             coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
227             coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
228          }
229          else {
230             LLVMValueRef mask;
231             LLVMValueRef weight;
232             LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
233             lp_build_coord_repeat_npot_linear(bld, coord_f,
234                                               length, length_f,
235                                               &coord0, &weight);
236             mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
237                                     PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
238             coord1 = LLVMBuildAnd(builder,
239                                   lp_build_add(int_coord_bld, coord0,
240                                                int_coord_bld->one),
241                                   mask, "");
242             weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
243             *weight_i = lp_build_itrunc(&bld->coord_bld, weight);
244          }
245          break;
246 
247       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
248          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
249          coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
250                                 length_minus_one);
251          coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
252                                 length_minus_one);
253          break;
254 
255       case PIPE_TEX_WRAP_CLAMP:
256       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
257       case PIPE_TEX_WRAP_MIRROR_REPEAT:
258       case PIPE_TEX_WRAP_MIRROR_CLAMP:
259       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
260       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
261       default:
262          assert(0);
263          coord0 = int_coord_bld->zero;
264          coord1 = int_coord_bld->zero;
265          break;
266       }
267       lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
268                                      offset0, i0);
269       lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
270                                      offset1, i1);
271       return;
272    }
273 
274    *i0 = int_coord_bld->zero;
275    *i1 = int_coord_bld->zero;
276 
277    switch(wrap_mode) {
278    case PIPE_TEX_WRAP_REPEAT:
279       if (is_pot) {
280          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
281       }
282       else {
283          LLVMValueRef weight;
284          LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
285          lp_build_coord_repeat_npot_linear(bld, coord_f,
286                                            length, length_f,
287                                            &coord0, &weight);
288          weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
289          *weight_i = lp_build_itrunc(&bld->coord_bld, weight);
290       }
291 
292       mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
293                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
294 
295       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
296       *offset1 = LLVMBuildAnd(builder,
297                               lp_build_add(int_coord_bld, *offset0, stride),
298                               mask, "");
299       break;
300 
301    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
302       /* XXX this might be slower than the separate path
303        * on some newer cpus. With sse41 this is 8 instructions vs. 7
304        * - at least on SNB this is almost certainly slower since
305        * min/max are cheaper than selects, and the muls aren't bad.
306        */
307       lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
308                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
309       umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
310                                PIPE_FUNC_LESS, coord0, length_minus_one);
311 
312       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
313       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
314 
315       mask = LLVMBuildAnd(builder, lmask, umask, "");
316 
317       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
318       *offset1 = lp_build_add(int_coord_bld,
319                               *offset0,
320                               LLVMBuildAnd(builder, stride, mask, ""));
321       break;
322 
323    case PIPE_TEX_WRAP_CLAMP:
324    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
325    case PIPE_TEX_WRAP_MIRROR_REPEAT:
326    case PIPE_TEX_WRAP_MIRROR_CLAMP:
327    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
328    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
329    default:
330       assert(0);
331       *offset0 = int_coord_bld->zero;
332       *offset1 = int_coord_bld->zero;
333       break;
334    }
335 }
336 
337 
338 /**
339  * Build LLVM code for texture coord wrapping, for linear filtering,
340  * for float texcoords.
341  * \param block_length  is the length of the pixel block along the
342  *                      coordinate axis
343  * \param coord  the incoming texcoord (s,t,r or q)
344  * \param length  the texture size along one dimension
345  * \param is_pot  if TRUE, length is a power of two
346  * \param wrap_mode  one of PIPE_TEX_WRAP_x
347  * \param coord0  the first texcoord after wrapping, as int
348  * \param coord1  the second texcoord after wrapping, as int
349  * \param weight  the filter weight as int (0-255)
350  * \param force_nearest  if this coord actually uses nearest filtering
351  */
352 static void
lp_build_sample_wrap_linear_float(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef length,boolean is_pot,unsigned wrap_mode,LLVMValueRef * coord0,LLVMValueRef * coord1,LLVMValueRef * weight,unsigned force_nearest)353 lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
354                                   unsigned block_length,
355                                   LLVMValueRef coord,
356                                   LLVMValueRef length,
357                                   boolean is_pot,
358                                   unsigned wrap_mode,
359                                   LLVMValueRef *coord0,
360                                   LLVMValueRef *coord1,
361                                   LLVMValueRef *weight,
362                                   unsigned force_nearest)
363 {
364    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
365    struct lp_build_context *coord_bld = &bld->coord_bld;
366    LLVMBuilderRef builder = bld->gallivm->builder;
367    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
368    LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
369 
370    switch(wrap_mode) {
371    case PIPE_TEX_WRAP_REPEAT:
372       if (is_pot) {
373          /* mul by size and subtract 0.5 */
374          coord = lp_build_mul(coord_bld, coord, length);
375          if (!force_nearest)
376             coord = lp_build_sub(coord_bld, coord, half);
377          *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
378          /* convert to int, compute lerp weight */
379          lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
380          *coord1 = lp_build_ifloor(coord_bld, *coord1);
381          /* repeat wrap */
382          length_minus_one = lp_build_itrunc(coord_bld, length_minus_one);
383          *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, "");
384          *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, "");
385       }
386       else {
387          LLVMValueRef mask;
388          /* wrap with normalized floats is just fract */
389          coord = lp_build_fract(coord_bld, coord);
390          /* unnormalize */
391          coord = lp_build_mul(coord_bld, coord, length);
392          /*
393           * we avoided the 0.5/length division, have to fix up wrong
394           * edge cases with selects
395           */
396          *coord1 = lp_build_add(coord_bld, coord, half);
397          coord = lp_build_sub(coord_bld, coord, half);
398          *weight = lp_build_fract(coord_bld, coord);
399          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
400                                  PIPE_FUNC_LESS, coord, coord_bld->zero);
401          *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
402          *coord0 = lp_build_itrunc(coord_bld, *coord0);
403          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
404                                  PIPE_FUNC_LESS, *coord1, length);
405          *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero);
406          *coord1 = lp_build_itrunc(coord_bld, *coord1);
407       }
408       break;
409    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
410       if (bld->static_state->normalized_coords) {
411          /* mul by tex size */
412          coord = lp_build_mul(coord_bld, coord, length);
413       }
414       /* subtract 0.5 */
415       if (!force_nearest) {
416          coord = lp_build_sub(coord_bld, coord, half);
417       }
418       /* clamp to [0, length - 1] */
419       coord = lp_build_min(coord_bld, coord, length_minus_one);
420       coord = lp_build_max(coord_bld, coord, coord_bld->zero);
421       *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
422       /* convert to int, compute lerp weight */
423       lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
424       /* coord1 = min(coord1, length-1) */
425       *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one);
426       *coord1 = lp_build_itrunc(coord_bld, *coord1);
427       break;
428    default:
429       assert(0);
430       *coord0 = int_coord_bld->zero;
431       *coord1 = int_coord_bld->zero;
432       *weight = coord_bld->zero;
433       break;
434    }
435    *weight = lp_build_mul_imm(coord_bld, *weight, 256);
436    *weight = lp_build_itrunc(coord_bld, *weight);
437    return;
438 }
439 
440 
441 /**
442  * Fetch texels for image with nearest sampling.
443  * Return filtered color as two vectors of 16-bit fixed point values.
444  */
445 static void
lp_build_sample_fetch_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef data_ptr,LLVMValueRef offset,LLVMValueRef x_subcoord,LLVMValueRef y_subcoord,LLVMValueRef * colors_lo,LLVMValueRef * colors_hi)446 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
447                                     LLVMValueRef data_ptr,
448                                     LLVMValueRef offset,
449                                     LLVMValueRef x_subcoord,
450                                     LLVMValueRef y_subcoord,
451                                     LLVMValueRef *colors_lo,
452                                     LLVMValueRef *colors_hi)
453 {
454    /*
455     * Fetch the pixels as 4 x 32bit (rgba order might differ):
456     *
457     *   rgba0 rgba1 rgba2 rgba3
458     *
459     * bit cast them into 16 x u8
460     *
461     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
462     *
463     * unpack them into two 8 x i16:
464     *
465     *   r0 g0 b0 a0 r1 g1 b1 a1
466     *   r2 g2 b2 a2 r3 g3 b3 a3
467     *
468     * The higher 8 bits of the resulting elements will be zero.
469     */
470    LLVMBuilderRef builder = bld->gallivm->builder;
471    LLVMValueRef rgba8;
472    struct lp_build_context h16, u8n;
473    LLVMTypeRef u8n_vec_type;
474 
475    lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
476    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
477    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
478 
479    if (util_format_is_rgba8_variant(bld->format_desc)) {
480       /*
481        * Given the format is a rgba8, just read the pixels as is,
482        * without any swizzling. Swizzling will be done later.
483        */
484       rgba8 = lp_build_gather(bld->gallivm,
485                               bld->texel_type.length,
486                               bld->format_desc->block.bits,
487                               bld->texel_type.width,
488                               data_ptr, offset);
489 
490       rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
491    }
492    else {
493       rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
494                                       bld->format_desc,
495                                       u8n.type,
496                                       data_ptr, offset,
497                                       x_subcoord,
498                                       y_subcoord);
499    }
500 
501    /* Expand one 4*rgba8 to two 2*rgba16 */
502    lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
503                     rgba8,
504                     colors_lo, colors_hi);
505 }
506 
507 
508 /**
509  * Sample a single texture image with nearest sampling.
510  * If sampling a cube texture, r = cube face in [0,5].
511  * Return filtered color as two vectors of 16-bit fixed point values.
512  */
513 static void
lp_build_sample_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef * colors_lo,LLVMValueRef * colors_hi)514 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
515                               LLVMValueRef int_size,
516                               LLVMValueRef row_stride_vec,
517                               LLVMValueRef img_stride_vec,
518                               LLVMValueRef data_ptr,
519                               LLVMValueRef s,
520                               LLVMValueRef t,
521                               LLVMValueRef r,
522                               LLVMValueRef *colors_lo,
523                               LLVMValueRef *colors_hi)
524 {
525    const unsigned dims = bld->dims;
526    LLVMBuilderRef builder = bld->gallivm->builder;
527    struct lp_build_context i32;
528    LLVMTypeRef i32_vec_type;
529    LLVMValueRef i32_c8;
530    LLVMValueRef width_vec, height_vec, depth_vec;
531    LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
532    LLVMValueRef s_float, t_float = NULL, r_float = NULL;
533    LLVMValueRef x_stride;
534    LLVMValueRef x_offset, offset;
535    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
536 
537    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
538 
539    i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
540 
541    lp_build_extract_image_sizes(bld,
542                                 bld->int_size_type,
543                                 bld->int_coord_type,
544                                 int_size,
545                                 &width_vec,
546                                 &height_vec,
547                                 &depth_vec);
548 
549    s_float = s; t_float = t; r_float = r;
550 
551    if (bld->static_state->normalized_coords) {
552       LLVMValueRef scaled_size;
553       LLVMValueRef flt_size;
554 
555       /* scale size by 256 (8 fractional bits) */
556       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
557 
558       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
559 
560       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
561    }
562    else {
563       /* scale coords by 256 (8 fractional bits) */
564       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
565       if (dims >= 2)
566          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
567       if (dims >= 3)
568          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
569    }
570 
571    /* convert float to int */
572    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
573    if (dims >= 2)
574       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
575    if (dims >= 3)
576       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
577 
578    /* compute floor (shift right 8) */
579    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
580    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
581    if (dims >= 2)
582       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
583    if (dims >= 3)
584       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
585 
586    /* get pixel, row, image strides */
587    x_stride = lp_build_const_vec(bld->gallivm,
588                                  bld->int_coord_bld.type,
589                                  bld->format_desc->block.bits/8);
590 
591    /* Do texcoord wrapping, compute texel offset */
592    lp_build_sample_wrap_nearest_int(bld,
593                                     bld->format_desc->block.width,
594                                     s_ipart, s_float,
595                                     width_vec, x_stride,
596                                     bld->static_state->pot_width,
597                                     bld->static_state->wrap_s,
598                                     &x_offset, &x_subcoord);
599    offset = x_offset;
600    if (dims >= 2) {
601       LLVMValueRef y_offset;
602       lp_build_sample_wrap_nearest_int(bld,
603                                        bld->format_desc->block.height,
604                                        t_ipart, t_float,
605                                        height_vec, row_stride_vec,
606                                        bld->static_state->pot_height,
607                                        bld->static_state->wrap_t,
608                                        &y_offset, &y_subcoord);
609       offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
610       if (dims >= 3) {
611          LLVMValueRef z_offset;
612          lp_build_sample_wrap_nearest_int(bld,
613                                           1, /* block length (depth) */
614                                           r_ipart, r_float,
615                                           depth_vec, img_stride_vec,
616                                           bld->static_state->pot_depth,
617                                           bld->static_state->wrap_r,
618                                           &z_offset, &z_subcoord);
619          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
620       }
621       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
622          LLVMValueRef z_offset;
623          /* The r coord is the cube face in [0,5] */
624          z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
625          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
626       }
627    }
628 
629    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
630                                        x_subcoord, y_subcoord,
631                                        colors_lo, colors_hi);
632 }
633 
634 
635 /**
636  * Sample a single texture image with nearest sampling.
637  * If sampling a cube texture, r = cube face in [0,5].
638  * Return filtered color as two vectors of 16-bit fixed point values.
639  * Does address calcs (except offsets) with floats.
640  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
641  */
642 static void
lp_build_sample_image_nearest_afloat(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef * colors_lo,LLVMValueRef * colors_hi)643 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
644                                      LLVMValueRef int_size,
645                                      LLVMValueRef row_stride_vec,
646                                      LLVMValueRef img_stride_vec,
647                                      LLVMValueRef data_ptr,
648                                      LLVMValueRef s,
649                                      LLVMValueRef t,
650                                      LLVMValueRef r,
651                                      LLVMValueRef *colors_lo,
652                                      LLVMValueRef *colors_hi)
653    {
654    const unsigned dims = bld->dims;
655    LLVMValueRef width_vec, height_vec, depth_vec;
656    LLVMValueRef offset;
657    LLVMValueRef x_subcoord, y_subcoord;
658    LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL;
659    LLVMValueRef flt_size;
660 
661    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
662 
663    lp_build_extract_image_sizes(bld,
664                                 bld->float_size_type,
665                                 bld->coord_type,
666                                 flt_size,
667                                 &width_vec,
668                                 &height_vec,
669                                 &depth_vec);
670 
671    /* Do texcoord wrapping */
672    lp_build_sample_wrap_nearest_float(bld,
673                                       s, width_vec,
674                                       bld->static_state->pot_width,
675                                       bld->static_state->wrap_s,
676                                       &x_icoord);
677 
678    if (dims >= 2) {
679       lp_build_sample_wrap_nearest_float(bld,
680                                          t, height_vec,
681                                          bld->static_state->pot_height,
682                                          bld->static_state->wrap_t,
683                                          &y_icoord);
684 
685       if (dims >= 3) {
686          lp_build_sample_wrap_nearest_float(bld,
687                                             r, depth_vec,
688                                             bld->static_state->pot_depth,
689                                             bld->static_state->wrap_r,
690                                             &z_icoord);
691       }
692       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
693          z_icoord = r;
694       }
695    }
696 
697    /*
698     * From here on we deal with ints, and we should split up the 256bit
699     * vectors manually for better generated code.
700     */
701 
702    /*
703     * compute texel offsets -
704     * cannot do offset calc with floats, difficult for block-based formats,
705     * and not enough precision anyway.
706     */
707    lp_build_sample_offset(&bld->int_coord_bld,
708                           bld->format_desc,
709                           x_icoord, y_icoord,
710                           z_icoord,
711                           row_stride_vec, img_stride_vec,
712                           &offset,
713                           &x_subcoord, &y_subcoord);
714 
715    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
716                                        x_subcoord, y_subcoord,
717                                        colors_lo, colors_hi);
718 }
719 
720 
721 /**
722  * Fetch texels for image with linear sampling.
723  * Return filtered color as two vectors of 16-bit fixed point values.
724  */
725 static void
lp_build_sample_fetch_image_linear(struct lp_build_sample_context * bld,LLVMValueRef data_ptr,LLVMValueRef offset[2][2][2],LLVMValueRef x_subcoord[2],LLVMValueRef y_subcoord[2],LLVMValueRef s_fpart,LLVMValueRef t_fpart,LLVMValueRef r_fpart,LLVMValueRef * colors_lo,LLVMValueRef * colors_hi)726 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
727                                    LLVMValueRef data_ptr,
728                                    LLVMValueRef offset[2][2][2],
729                                    LLVMValueRef x_subcoord[2],
730                                    LLVMValueRef y_subcoord[2],
731                                    LLVMValueRef s_fpart,
732                                    LLVMValueRef t_fpart,
733                                    LLVMValueRef r_fpart,
734                                    LLVMValueRef *colors_lo,
735                                    LLVMValueRef *colors_hi)
736 {
737    const unsigned dims = bld->dims;
738    LLVMBuilderRef builder = bld->gallivm->builder;
739    struct lp_build_context h16, u8n;
740    LLVMTypeRef h16_vec_type, u8n_vec_type;
741    LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
742    LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
743    LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
744    LLVMValueRef shuffle_lo, shuffle_hi;
745    LLVMValueRef s_fpart_lo, s_fpart_hi;
746    LLVMValueRef t_fpart_lo = NULL, t_fpart_hi = NULL;
747    LLVMValueRef r_fpart_lo = NULL, r_fpart_hi = NULL;
748    LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
749    LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
750    LLVMValueRef packed_lo, packed_hi;
751    unsigned i, j, k;
752    unsigned numj, numk;
753 
754    lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
755    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
756    h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type);
757    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
758 
759    /*
760     * Transform 4 x i32 in
761     *
762     *   s_fpart = {s0, s1, s2, s3}
763     *
764     * into 8 x i16
765     *
766     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
767     *
768     * into two 8 x i16
769     *
770     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
771     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
772     *
773     * and likewise for t_fpart. There is no risk of loosing precision here
774     * since the fractional parts only use the lower 8bits.
775     */
776    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
777    if (dims >= 2)
778       t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
779    if (dims >= 3)
780       r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
781 
782    for (j = 0; j < h16.type.length; j += 4) {
783 #ifdef PIPE_ARCH_LITTLE_ENDIAN
784       unsigned subindex = 0;
785 #else
786       unsigned subindex = 1;
787 #endif
788       LLVMValueRef index;
789 
790       index = LLVMConstInt(elem_type, j/2 + subindex, 0);
791       for (i = 0; i < 4; ++i)
792          shuffles_lo[j + i] = index;
793 
794       index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
795       for (i = 0; i < 4; ++i)
796          shuffles_hi[j + i] = index;
797    }
798 
799    shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
800    shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
801 
802    s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
803                                        shuffle_lo, "");
804    s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
805                                        shuffle_hi, "");
806    if (dims >= 2) {
807       t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
808                                           shuffle_lo, "");
809       t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
810                                           shuffle_hi, "");
811    }
812    if (dims >= 3) {
813       r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
814                                           shuffle_lo, "");
815       r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
816                                           shuffle_hi, "");
817    }
818 
819    /*
820     * Fetch the pixels as 4 x 32bit (rgba order might differ):
821     *
822     *   rgba0 rgba1 rgba2 rgba3
823     *
824     * bit cast them into 16 x u8
825     *
826     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
827     *
828     * unpack them into two 8 x i16:
829     *
830     *   r0 g0 b0 a0 r1 g1 b1 a1
831     *   r2 g2 b2 a2 r3 g3 b3 a3
832     *
833     * The higher 8 bits of the resulting elements will be zero.
834     */
835    numj = 1 + (dims >= 2);
836    numk = 1 + (dims >= 3);
837 
838    for (k = 0; k < numk; k++) {
839       for (j = 0; j < numj; j++) {
840          for (i = 0; i < 2; i++) {
841             LLVMValueRef rgba8;
842 
843             if (util_format_is_rgba8_variant(bld->format_desc)) {
844                /*
845                 * Given the format is a rgba8, just read the pixels as is,
846                 * without any swizzling. Swizzling will be done later.
847                 */
848                rgba8 = lp_build_gather(bld->gallivm,
849                                        bld->texel_type.length,
850                                        bld->format_desc->block.bits,
851                                        bld->texel_type.width,
852                                        data_ptr, offset[k][j][i]);
853 
854                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
855             }
856             else {
857                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
858                                                bld->format_desc,
859                                                u8n.type,
860                                                data_ptr, offset[k][j][i],
861                                                x_subcoord[i],
862                                                y_subcoord[j]);
863             }
864 
865             /* Expand one 4*rgba8 to two 2*rgba16 */
866             lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
867                              rgba8,
868                              &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
869          }
870       }
871    }
872 
873    /*
874     * Linear interpolation with 8.8 fixed point.
875     */
876    if (bld->static_state->force_nearest_s) {
877       /* special case 1-D lerp */
878       packed_lo = lp_build_lerp(&h16,
879                                 t_fpart_lo,
880                                 neighbors_lo[0][0][0],
881                                 neighbors_lo[0][0][1]);
882 
883       packed_hi = lp_build_lerp(&h16,
884                                 t_fpart_hi,
885                                 neighbors_hi[0][1][0],
886                                 neighbors_hi[0][1][0]);
887    }
888    else if (bld->static_state->force_nearest_t) {
889       /* special case 1-D lerp */
890       packed_lo = lp_build_lerp(&h16,
891                                 s_fpart_lo,
892                                 neighbors_lo[0][0][0],
893                                 neighbors_lo[0][0][1]);
894 
895       packed_hi = lp_build_lerp(&h16,
896                                 s_fpart_hi,
897                                 neighbors_hi[0][0][0],
898                                 neighbors_hi[0][0][1]);
899    }
900    else {
901       /* general 1/2/3-D lerping */
902       if (dims == 1) {
903          packed_lo = lp_build_lerp(&h16,
904                                    s_fpart_lo,
905                                    neighbors_lo[0][0][0],
906                                    neighbors_lo[0][0][1]);
907 
908          packed_hi = lp_build_lerp(&h16,
909                                    s_fpart_hi,
910                                    neighbors_hi[0][0][0],
911                                    neighbors_hi[0][0][1]);
912       }
913       else {
914          /* 2-D lerp */
915          packed_lo = lp_build_lerp_2d(&h16,
916                                       s_fpart_lo, t_fpart_lo,
917                                       neighbors_lo[0][0][0],
918                                       neighbors_lo[0][0][1],
919                                       neighbors_lo[0][1][0],
920                                       neighbors_lo[0][1][1]);
921 
922          packed_hi = lp_build_lerp_2d(&h16,
923                                       s_fpart_hi, t_fpart_hi,
924                                       neighbors_hi[0][0][0],
925                                       neighbors_hi[0][0][1],
926                                       neighbors_hi[0][1][0],
927                                       neighbors_hi[0][1][1]);
928 
929          if (dims >= 3) {
930             LLVMValueRef packed_lo2, packed_hi2;
931 
932             /* lerp in the second z slice */
933             packed_lo2 = lp_build_lerp_2d(&h16,
934                                           s_fpart_lo, t_fpart_lo,
935                                           neighbors_lo[1][0][0],
936                                           neighbors_lo[1][0][1],
937                                           neighbors_lo[1][1][0],
938                                           neighbors_lo[1][1][1]);
939 
940             packed_hi2 = lp_build_lerp_2d(&h16,
941                                           s_fpart_hi, t_fpart_hi,
942                                           neighbors_hi[1][0][0],
943                                           neighbors_hi[1][0][1],
944                                           neighbors_hi[1][1][0],
945                                           neighbors_hi[1][1][1]);
946             /* interp between two z slices */
947             packed_lo = lp_build_lerp(&h16, r_fpart_lo,
948                                       packed_lo, packed_lo2);
949             packed_hi = lp_build_lerp(&h16, r_fpart_hi,
950                                       packed_hi, packed_hi2);
951          }
952       }
953    }
954 
955    *colors_lo = packed_lo;
956    *colors_hi = packed_hi;
957 }
958 
959 /**
960  * Sample a single texture image with (bi-)(tri-)linear sampling.
961  * Return filtered color as two vectors of 16-bit fixed point values.
962  */
963 static void
lp_build_sample_image_linear(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef * colors_lo,LLVMValueRef * colors_hi)964 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
965                              LLVMValueRef int_size,
966                              LLVMValueRef row_stride_vec,
967                              LLVMValueRef img_stride_vec,
968                              LLVMValueRef data_ptr,
969                              LLVMValueRef s,
970                              LLVMValueRef t,
971                              LLVMValueRef r,
972                              LLVMValueRef *colors_lo,
973                              LLVMValueRef *colors_hi)
974 {
975    const unsigned dims = bld->dims;
976    LLVMBuilderRef builder = bld->gallivm->builder;
977    struct lp_build_context i32;
978    LLVMTypeRef i32_vec_type;
979    LLVMValueRef i32_c8, i32_c128, i32_c255;
980    LLVMValueRef width_vec, height_vec, depth_vec;
981    LLVMValueRef s_ipart, s_fpart, s_float;
982    LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
983    LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
984    LLVMValueRef x_stride, y_stride, z_stride;
985    LLVMValueRef x_offset0, x_offset1;
986    LLVMValueRef y_offset0, y_offset1;
987    LLVMValueRef z_offset0, z_offset1;
988    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
989    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
990    unsigned x, y, z;
991 
992    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
993 
994    i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
995 
996    lp_build_extract_image_sizes(bld,
997                                 bld->int_size_type,
998                                 bld->int_coord_type,
999                                 int_size,
1000                                 &width_vec,
1001                                 &height_vec,
1002                                 &depth_vec);
1003 
1004    s_float = s; t_float = t; r_float = r;
1005 
1006    if (bld->static_state->normalized_coords) {
1007       LLVMValueRef scaled_size;
1008       LLVMValueRef flt_size;
1009 
1010       /* scale size by 256 (8 fractional bits) */
1011       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
1012 
1013       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
1014 
1015       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
1016    }
1017    else {
1018       /* scale coords by 256 (8 fractional bits) */
1019       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1020       if (dims >= 2)
1021          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1022       if (dims >= 3)
1023          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
1024    }
1025 
1026    /* convert float to int */
1027    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1028    if (dims >= 2)
1029       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1030    if (dims >= 3)
1031       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
1032 
1033    /* subtract 0.5 (add -128) */
1034    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
1035    if (!bld->static_state->force_nearest_s) {
1036       s = LLVMBuildAdd(builder, s, i32_c128, "");
1037    }
1038    if (dims >= 2 && !bld->static_state->force_nearest_t) {
1039       t = LLVMBuildAdd(builder, t, i32_c128, "");
1040    }
1041    if (dims >= 3) {
1042       r = LLVMBuildAdd(builder, r, i32_c128, "");
1043    }
1044 
1045    /* compute floor (shift right 8) */
1046    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
1047    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1048    if (dims >= 2)
1049       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1050    if (dims >= 3)
1051       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
1052 
1053    /* compute fractional part (AND with 0xff) */
1054    i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
1055    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1056    if (dims >= 2)
1057       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1058    if (dims >= 3)
1059       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
1060 
1061    /* get pixel, row and image strides */
1062    x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
1063                                  bld->format_desc->block.bits/8);
1064    y_stride = row_stride_vec;
1065    z_stride = img_stride_vec;
1066 
1067    /* do texcoord wrapping and compute texel offsets */
1068    lp_build_sample_wrap_linear_int(bld,
1069                                    bld->format_desc->block.width,
1070                                    s_ipart, &s_fpart, s_float,
1071                                    width_vec, x_stride,
1072                                    bld->static_state->pot_width,
1073                                    bld->static_state->wrap_s,
1074                                    &x_offset0, &x_offset1,
1075                                    &x_subcoord[0], &x_subcoord[1]);
1076    for (z = 0; z < 2; z++) {
1077       for (y = 0; y < 2; y++) {
1078          offset[z][y][0] = x_offset0;
1079          offset[z][y][1] = x_offset1;
1080       }
1081    }
1082 
1083    if (dims >= 2) {
1084       lp_build_sample_wrap_linear_int(bld,
1085                                       bld->format_desc->block.height,
1086                                       t_ipart, &t_fpart, t_float,
1087                                       height_vec, y_stride,
1088                                       bld->static_state->pot_height,
1089                                       bld->static_state->wrap_t,
1090                                       &y_offset0, &y_offset1,
1091                                       &y_subcoord[0], &y_subcoord[1]);
1092 
1093       for (z = 0; z < 2; z++) {
1094          for (x = 0; x < 2; x++) {
1095             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1096                                            offset[z][0][x], y_offset0);
1097             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1098                                            offset[z][1][x], y_offset1);
1099          }
1100       }
1101    }
1102 
1103    if (dims >= 3) {
1104       lp_build_sample_wrap_linear_int(bld,
1105                                       bld->format_desc->block.height,
1106                                       r_ipart, &r_fpart, r_float,
1107                                       depth_vec, z_stride,
1108                                       bld->static_state->pot_depth,
1109                                       bld->static_state->wrap_r,
1110                                       &z_offset0, &z_offset1,
1111                                       &z_subcoord[0], &z_subcoord[1]);
1112       for (y = 0; y < 2; y++) {
1113          for (x = 0; x < 2; x++) {
1114             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1115                                            offset[0][y][x], z_offset0);
1116             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1117                                            offset[1][y][x], z_offset1);
1118          }
1119       }
1120    }
1121    else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1122       LLVMValueRef z_offset;
1123       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1124       for (y = 0; y < 2; y++) {
1125          for (x = 0; x < 2; x++) {
1126             /* The r coord is the cube face in [0,5] */
1127             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1128                                            offset[0][y][x], z_offset);
1129          }
1130       }
1131    }
1132 
1133    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1134                                       x_subcoord, y_subcoord,
1135                                       s_fpart, t_fpart, r_fpart,
1136                                       colors_lo, colors_hi);
1137 }
1138 
1139 
1140 /**
1141  * Sample a single texture image with (bi-)(tri-)linear sampling.
1142  * Return filtered color as two vectors of 16-bit fixed point values.
1143  * Does address calcs (except offsets) with floats.
1144  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
1145  */
1146 static void
lp_build_sample_image_linear_afloat(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef * colors_lo,LLVMValueRef * colors_hi)1147 lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
1148                                     LLVMValueRef int_size,
1149                                     LLVMValueRef row_stride_vec,
1150                                     LLVMValueRef img_stride_vec,
1151                                     LLVMValueRef data_ptr,
1152                                     LLVMValueRef s,
1153                                     LLVMValueRef t,
1154                                     LLVMValueRef r,
1155                                     LLVMValueRef *colors_lo,
1156                                     LLVMValueRef *colors_hi)
1157 {
1158    const unsigned dims = bld->dims;
1159    LLVMValueRef width_vec, height_vec, depth_vec;
1160    LLVMValueRef s_fpart;
1161    LLVMValueRef t_fpart = NULL;
1162    LLVMValueRef r_fpart = NULL;
1163    LLVMValueRef x_stride, y_stride, z_stride;
1164    LLVMValueRef x_offset0, x_offset1;
1165    LLVMValueRef y_offset0, y_offset1;
1166    LLVMValueRef z_offset0, z_offset1;
1167    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1168    LLVMValueRef x_subcoord[2], y_subcoord[2];
1169    LLVMValueRef flt_size;
1170    LLVMValueRef x_icoord0, x_icoord1;
1171    LLVMValueRef y_icoord0, y_icoord1;
1172    LLVMValueRef z_icoord0, z_icoord1;
1173    unsigned x, y, z;
1174 
1175    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
1176 
1177    lp_build_extract_image_sizes(bld,
1178                                 bld->float_size_type,
1179                                 bld->coord_type,
1180                                 flt_size,
1181                                 &width_vec,
1182                                 &height_vec,
1183                                 &depth_vec);
1184 
1185    /* do texcoord wrapping and compute texel offsets */
1186    lp_build_sample_wrap_linear_float(bld,
1187                                      bld->format_desc->block.width,
1188                                      s, width_vec,
1189                                      bld->static_state->pot_width,
1190                                      bld->static_state->wrap_s,
1191                                      &x_icoord0, &x_icoord1,
1192                                      &s_fpart,
1193                                      bld->static_state->force_nearest_s);
1194 
1195    if (dims >= 2) {
1196       lp_build_sample_wrap_linear_float(bld,
1197                                         bld->format_desc->block.height,
1198                                         t, height_vec,
1199                                         bld->static_state->pot_height,
1200                                         bld->static_state->wrap_t,
1201                                         &y_icoord0, &y_icoord1,
1202                                         &t_fpart,
1203                                         bld->static_state->force_nearest_t);
1204 
1205       if (dims >= 3) {
1206          lp_build_sample_wrap_linear_float(bld,
1207                                            bld->format_desc->block.height,
1208                                            r, depth_vec,
1209                                            bld->static_state->pot_depth,
1210                                            bld->static_state->wrap_r,
1211                                            &z_icoord0, &z_icoord1,
1212                                            &r_fpart, 0);
1213       }
1214    }
1215 
1216    /*
1217     * From here on we deal with ints, and we should split up the 256bit
1218     * vectors manually for better generated code.
1219     */
1220 
1221    /* get pixel, row and image strides */
1222    x_stride = lp_build_const_vec(bld->gallivm,
1223                                  bld->int_coord_bld.type,
1224                                  bld->format_desc->block.bits/8);
1225    y_stride = row_stride_vec;
1226    z_stride = img_stride_vec;
1227 
1228    /*
1229     * compute texel offset -
1230     * cannot do offset calc with floats, difficult for block-based formats,
1231     * and not enough precision anyway.
1232     */
1233    lp_build_sample_partial_offset(&bld->int_coord_bld,
1234                                   bld->format_desc->block.width,
1235                                   x_icoord0, x_stride,
1236                                   &x_offset0, &x_subcoord[0]);
1237    lp_build_sample_partial_offset(&bld->int_coord_bld,
1238                                   bld->format_desc->block.width,
1239                                   x_icoord1, x_stride,
1240                                   &x_offset1, &x_subcoord[1]);
1241    for (z = 0; z < 2; z++) {
1242       for (y = 0; y < 2; y++) {
1243          offset[z][y][0] = x_offset0;
1244          offset[z][y][1] = x_offset1;
1245       }
1246    }
1247 
1248    if (dims >= 2) {
1249       lp_build_sample_partial_offset(&bld->int_coord_bld,
1250                                      bld->format_desc->block.height,
1251                                      y_icoord0, y_stride,
1252                                      &y_offset0, &y_subcoord[0]);
1253       lp_build_sample_partial_offset(&bld->int_coord_bld,
1254                                      bld->format_desc->block.height,
1255                                      y_icoord1, y_stride,
1256                                      &y_offset1, &y_subcoord[1]);
1257       for (z = 0; z < 2; z++) {
1258          for (x = 0; x < 2; x++) {
1259             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1260                                            offset[z][0][x], y_offset0);
1261             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1262                                            offset[z][1][x], y_offset1);
1263          }
1264       }
1265    }
1266 
1267    if (dims >= 3) {
1268       LLVMValueRef z_subcoord[2];
1269       lp_build_sample_partial_offset(&bld->int_coord_bld,
1270                                      1,
1271                                      z_icoord0, z_stride,
1272                                      &z_offset0, &z_subcoord[0]);
1273       lp_build_sample_partial_offset(&bld->int_coord_bld,
1274                                      1,
1275                                      z_icoord1, z_stride,
1276                                      &z_offset1, &z_subcoord[1]);
1277       for (y = 0; y < 2; y++) {
1278          for (x = 0; x < 2; x++) {
1279             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1280                                            offset[0][y][x], z_offset0);
1281             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1282                                            offset[1][y][x], z_offset1);
1283          }
1284       }
1285    }
1286    else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1287       LLVMValueRef z_offset;
1288       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1289       for (y = 0; y < 2; y++) {
1290          for (x = 0; x < 2; x++) {
1291             /* The r coord is the cube face in [0,5] */
1292             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1293                                            offset[0][y][x], z_offset);
1294          }
1295       }
1296    }
1297 
1298    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1299                                       x_subcoord, y_subcoord,
1300                                       s_fpart, t_fpart, r_fpart,
1301                                       colors_lo, colors_hi);
1302 }
1303 
1304 
1305 /**
1306  * Sample the texture/mipmap using given image filter and mip filter.
1307  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1308  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1309  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1310  */
1311 static void
lp_build_sample_mipmap(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef colors_lo_var,LLVMValueRef colors_hi_var)1312 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1313                        unsigned img_filter,
1314                        unsigned mip_filter,
1315                        LLVMValueRef s,
1316                        LLVMValueRef t,
1317                        LLVMValueRef r,
1318                        LLVMValueRef ilevel0,
1319                        LLVMValueRef ilevel1,
1320                        LLVMValueRef lod_fpart,
1321                        LLVMValueRef colors_lo_var,
1322                        LLVMValueRef colors_hi_var)
1323 {
1324    LLVMBuilderRef builder = bld->gallivm->builder;
1325    LLVMValueRef size0;
1326    LLVMValueRef size1;
1327    LLVMValueRef row_stride0_vec = NULL;
1328    LLVMValueRef row_stride1_vec = NULL;
1329    LLVMValueRef img_stride0_vec = NULL;
1330    LLVMValueRef img_stride1_vec = NULL;
1331    LLVMValueRef data_ptr0;
1332    LLVMValueRef data_ptr1;
1333    LLVMValueRef colors0_lo, colors0_hi;
1334    LLVMValueRef colors1_lo, colors1_hi;
1335 
1336    /* sample the first mipmap level */
1337    lp_build_mipmap_level_sizes(bld, ilevel0,
1338                                &size0,
1339                                &row_stride0_vec, &img_stride0_vec);
1340    data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1341    if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
1342       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1343          lp_build_sample_image_nearest_afloat(bld,
1344                                               size0,
1345                                               row_stride0_vec, img_stride0_vec,
1346                                               data_ptr0, s, t, r,
1347                                               &colors0_lo, &colors0_hi);
1348       }
1349       else {
1350          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1351          lp_build_sample_image_linear_afloat(bld,
1352                                              size0,
1353                                              row_stride0_vec, img_stride0_vec,
1354                                              data_ptr0, s, t, r,
1355                                              &colors0_lo, &colors0_hi);
1356       }
1357    }
1358    else {
1359       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1360          lp_build_sample_image_nearest(bld,
1361                                        size0,
1362                                        row_stride0_vec, img_stride0_vec,
1363                                        data_ptr0, s, t, r,
1364                                        &colors0_lo, &colors0_hi);
1365       }
1366       else {
1367          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1368          lp_build_sample_image_linear(bld,
1369                                       size0,
1370                                       row_stride0_vec, img_stride0_vec,
1371                                       data_ptr0, s, t, r,
1372                                       &colors0_lo, &colors0_hi);
1373       }
1374    }
1375 
1376    /* Store the first level's colors in the output variables */
1377    LLVMBuildStore(builder, colors0_lo, colors_lo_var);
1378    LLVMBuildStore(builder, colors0_hi, colors_hi_var);
1379 
1380    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1381       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
1382                                                      bld->perquadf_bld.type, 256.0);
1383       LLVMTypeRef i32vec_type = lp_build_vec_type(bld->gallivm, bld->perquadi_bld.type);
1384       struct lp_build_if_state if_ctx;
1385       LLVMValueRef need_lerp;
1386       unsigned num_quads = bld->coord_bld.type.length / 4;
1387       unsigned i;
1388 
1389       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1390       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1391 
1392       /* need_lerp = lod_fpart > 0 */
1393       if (num_quads == 1) {
1394          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1395                                    lod_fpart, bld->perquadi_bld.zero,
1396                                    "need_lerp");
1397       }
1398       else {
1399          /*
1400           * We'll do mip filtering if any of the quads need it.
1401           * It might be better to split the vectors here and only fetch/filter
1402           * quads which need it.
1403           */
1404          /*
1405           * We need to clamp lod_fpart here since we can get negative
1406           * values which would screw up filtering if not all
1407           * lod_fpart values have same sign.
1408           * We can however then skip the greater than comparison.
1409           */
1410          lod_fpart = lp_build_max(&bld->perquadi_bld, lod_fpart,
1411                                   bld->perquadi_bld.zero);
1412          need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads, lod_fpart);
1413       }
1414 
1415       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1416       {
1417          struct lp_build_context h16_bld;
1418 
1419          lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
1420 
1421          /* sample the second mipmap level */
1422          lp_build_mipmap_level_sizes(bld, ilevel1,
1423                                      &size1,
1424                                      &row_stride1_vec, &img_stride1_vec);
1425          data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1426 
1427          if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
1428             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1429                lp_build_sample_image_nearest_afloat(bld,
1430                                                     size1,
1431                                                     row_stride1_vec, img_stride1_vec,
1432                                                     data_ptr1, s, t, r,
1433                                                     &colors1_lo, &colors1_hi);
1434             }
1435             else {
1436                lp_build_sample_image_linear_afloat(bld,
1437                                                    size1,
1438                                                    row_stride1_vec, img_stride1_vec,
1439                                                    data_ptr1, s, t, r,
1440                                                    &colors1_lo, &colors1_hi);
1441             }
1442          }
1443          else {
1444             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1445                lp_build_sample_image_nearest(bld,
1446                                              size1,
1447                                              row_stride1_vec, img_stride1_vec,
1448                                              data_ptr1, s, t, r,
1449                                              &colors1_lo, &colors1_hi);
1450             }
1451             else {
1452                lp_build_sample_image_linear(bld,
1453                                             size1,
1454                                             row_stride1_vec, img_stride1_vec,
1455                                             data_ptr1, s, t, r,
1456                                             &colors1_lo, &colors1_hi);
1457             }
1458          }
1459 
1460          /* interpolate samples from the two mipmap levels */
1461 
1462          if (num_quads == 1) {
1463             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
1464             lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
1465 
1466 #if HAVE_LLVM == 0x208
1467             /* This is a work-around for a bug in LLVM 2.8.
1468              * Evidently, something goes wrong in the construction of the
1469              * lod_fpart short[8] vector.  Adding this no-effect shuffle seems
1470              * to force the vector to be properly constructed.
1471              * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
1472              */
1473             {
1474                LLVMValueRef shuffles[8], shuffle;
1475                assert(h16_bld.type.length <= Elements(shuffles));
1476                for (i = 0; i < h16_bld.type.length; i++)
1477                   shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1));
1478                shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
1479                lod_fpart = LLVMBuildShuffleVector(builder,
1480                                                   lod_fpart, lod_fpart,
1481                                                   shuffle, "");
1482             }
1483 #endif
1484 
1485             colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
1486                                        colors0_lo, colors1_lo);
1487             colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
1488                                        colors0_hi, colors1_hi);
1489          }
1490          else {
1491             LLVMValueRef lod_parts[LP_MAX_VECTOR_LENGTH/16];
1492             struct lp_type perquadi16_type = bld->perquadi_bld.type;
1493             perquadi16_type.width /= 2;
1494             perquadi16_type.length *= 2;
1495             lod_fpart = LLVMBuildBitCast(builder, lod_fpart,
1496                                          lp_build_vec_type(bld->gallivm,
1497                                                            perquadi16_type), "");
1498             /* XXX this only works for exactly 2 quads. More quads need shuffle */
1499             assert(num_quads == 2);
1500             for (i = 0; i < num_quads; i++) {
1501                LLVMValueRef indexi2 = lp_build_const_int32(bld->gallivm, i*2);
1502                lod_parts[i] = lp_build_extract_broadcast(bld->gallivm,
1503                                                          perquadi16_type,
1504                                                          h16_bld.type,
1505                                                          lod_fpart,
1506                                                          indexi2);
1507             }
1508             colors0_lo = lp_build_lerp(&h16_bld, lod_parts[0],
1509                                        colors0_lo, colors1_lo);
1510             colors0_hi = lp_build_lerp(&h16_bld, lod_parts[1],
1511                                        colors0_hi, colors1_hi);
1512          }
1513 
1514          LLVMBuildStore(builder, colors0_lo, colors_lo_var);
1515          LLVMBuildStore(builder, colors0_hi, colors_hi_var);
1516       }
1517       lp_build_endif(&if_ctx);
1518    }
1519 }
1520 
1521 
1522 
1523 /**
1524  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
1525  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
1526  * but only limited texture coord wrap modes.
1527  */
1528 void
lp_build_sample_aos(struct lp_build_sample_context * bld,unsigned unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef lod_ipart,LLVMValueRef lod_fpart,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef texel_out[4])1529 lp_build_sample_aos(struct lp_build_sample_context *bld,
1530                     unsigned unit,
1531                     LLVMValueRef s,
1532                     LLVMValueRef t,
1533                     LLVMValueRef r,
1534                     LLVMValueRef lod_ipart,
1535                     LLVMValueRef lod_fpart,
1536                     LLVMValueRef ilevel0,
1537                     LLVMValueRef ilevel1,
1538                     LLVMValueRef texel_out[4])
1539 {
1540    struct lp_build_context *int_bld = &bld->int_bld;
1541    LLVMBuilderRef builder = bld->gallivm->builder;
1542    const unsigned mip_filter = bld->static_state->min_mip_filter;
1543    const unsigned min_filter = bld->static_state->min_img_filter;
1544    const unsigned mag_filter = bld->static_state->mag_img_filter;
1545    const unsigned dims = bld->dims;
1546    LLVMValueRef packed, packed_lo, packed_hi;
1547    LLVMValueRef unswizzled[4];
1548    struct lp_build_context h16_bld;
1549 
1550    /* we only support the common/simple wrap modes at this time */
1551    assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
1552    if (dims >= 2)
1553       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
1554    if (dims >= 3)
1555       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
1556 
1557 
1558    /* make 16-bit fixed-pt builder context */
1559    lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
1560 
1561    /*
1562     * Get/interpolate texture colors.
1563     */
1564 
1565    packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo");
1566    packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi");
1567 
1568    if (min_filter == mag_filter) {
1569       /* no need to distinguish between minification and magnification */
1570       lp_build_sample_mipmap(bld,
1571                              min_filter, mip_filter,
1572                              s, t, r,
1573                              ilevel0, ilevel1, lod_fpart,
1574                              packed_lo, packed_hi);
1575    }
1576    else {
1577       /* Emit conditional to choose min image filter or mag image filter
1578        * depending on the lod being > 0 or <= 0, respectively.
1579        */
1580       struct lp_build_if_state if_ctx;
1581       LLVMValueRef minify;
1582 
1583       /* minify = lod >= 0.0 */
1584       minify = LLVMBuildICmp(builder, LLVMIntSGE,
1585                              lod_ipart, int_bld->zero, "");
1586 
1587       lp_build_if(&if_ctx, bld->gallivm, minify);
1588       {
1589          /* Use the minification filter */
1590          lp_build_sample_mipmap(bld,
1591                                 min_filter, mip_filter,
1592                                 s, t, r,
1593                                 ilevel0, ilevel1, lod_fpart,
1594                                 packed_lo, packed_hi);
1595       }
1596       lp_build_else(&if_ctx);
1597       {
1598          /* Use the magnification filter */
1599          lp_build_sample_mipmap(bld,
1600                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1601                                 s, t, r,
1602                                 ilevel0, NULL, NULL,
1603                                 packed_lo, packed_hi);
1604       }
1605       lp_build_endif(&if_ctx);
1606    }
1607 
1608    /*
1609     * combine the values stored in 'packed_lo' and 'packed_hi' variables
1610     * into 'packed'
1611     */
1612    packed = lp_build_pack2(bld->gallivm,
1613                            h16_bld.type, lp_type_unorm(8, bld->vector_width),
1614                            LLVMBuildLoad(builder, packed_lo, ""),
1615                            LLVMBuildLoad(builder, packed_hi, ""));
1616 
1617    /*
1618     * Convert to SoA and swizzle.
1619     */
1620    lp_build_rgba8_to_f32_soa(bld->gallivm,
1621                              bld->texel_type,
1622                              packed, unswizzled);
1623 
1624    if (util_format_is_rgba8_variant(bld->format_desc)) {
1625       lp_build_format_swizzle_soa(bld->format_desc,
1626                                   &bld->texel_bld,
1627                                   unswizzled, texel_out);
1628    }
1629    else {
1630       texel_out[0] = unswizzled[0];
1631       texel_out[1] = unswizzled[1];
1632       texel_out[2] = unswizzled[2];
1633       texel_out[3] = unswizzled[3];
1634    }
1635 }
1636