1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2008-2010 VMware, Inc. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * Texture sampling
31 *
32 * Authors:
33 * Brian Paul
34 * Keith Whitwell
35 */
36
37 #include "pipe/p_context.h"
38 #include "pipe/p_defines.h"
39 #include "pipe/p_shader_tokens.h"
40 #include "util/u_math.h"
41 #include "util/u_memory.h"
42 #include "sp_quad.h" /* only for #define QUAD_* tokens */
43 #include "sp_tex_sample.h"
44 #include "sp_tex_tile_cache.h"
45
46
47 /** Set to one to help debug texture sampling */
48 #define DEBUG_TEX 0
49
50
51 /*
52 * Return fractional part of 'f'. Used for computing interpolation weights.
53 * Need to be careful with negative values.
54 * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
55 * of improperly weighted linear-filtered textures.
56 * The tests/texwrap.c demo is a good test.
57 */
58 static INLINE float
frac(float f)59 frac(float f)
60 {
61 return f - floorf(f);
62 }
63
64
65
66 /**
67 * Linear interpolation macro
68 */
69 static INLINE float
lerp(float a,float v0,float v1)70 lerp(float a, float v0, float v1)
71 {
72 return v0 + a * (v1 - v0);
73 }
74
75
76 /**
77 * Do 2D/bilinear interpolation of float values.
78 * v00, v10, v01 and v11 are typically four texture samples in a square/box.
79 * a and b are the horizontal and vertical interpolants.
80 * It's important that this function is inlined when compiled with
81 * optimization! If we find that's not true on some systems, convert
82 * to a macro.
83 */
84 static INLINE float
lerp_2d(float a,float b,float v00,float v10,float v01,float v11)85 lerp_2d(float a, float b,
86 float v00, float v10, float v01, float v11)
87 {
88 const float temp0 = lerp(a, v00, v10);
89 const float temp1 = lerp(a, v01, v11);
90 return lerp(b, temp0, temp1);
91 }
92
93
94 /**
95 * As above, but 3D interpolation of 8 values.
96 */
97 static INLINE float
lerp_3d(float a,float b,float c,float v000,float v100,float v010,float v110,float v001,float v101,float v011,float v111)98 lerp_3d(float a, float b, float c,
99 float v000, float v100, float v010, float v110,
100 float v001, float v101, float v011, float v111)
101 {
102 const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
103 const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
104 return lerp(c, temp0, temp1);
105 }
106
107
108
109 /**
110 * Compute coord % size for repeat wrap modes.
111 * Note that if coord is negative, coord % size doesn't give the right
112 * value. To avoid that problem we add a large multiple of the size
113 * (rather than using a conditional).
114 */
115 static INLINE int
repeat(int coord,unsigned size)116 repeat(int coord, unsigned size)
117 {
118 return (coord + size * 1024) % size;
119 }
120
121
122 /**
123 * Apply texture coord wrapping mode and return integer texture indexes
124 * for a vector of four texcoords (S or T or P).
125 * \param wrapMode PIPE_TEX_WRAP_x
126 * \param s the incoming texcoords
127 * \param size the texture image size
128 * \param icoord returns the integer texcoords
129 * \return integer texture index
130 */
131 static void
wrap_nearest_repeat(float s,unsigned size,int * icoord)132 wrap_nearest_repeat(float s, unsigned size, int *icoord)
133 {
134 /* s limited to [0,1) */
135 /* i limited to [0,size-1] */
136 int i = util_ifloor(s * size);
137 *icoord = repeat(i, size);
138 }
139
140
141 static void
wrap_nearest_clamp(float s,unsigned size,int * icoord)142 wrap_nearest_clamp(float s, unsigned size, int *icoord)
143 {
144 /* s limited to [0,1] */
145 /* i limited to [0,size-1] */
146 if (s <= 0.0F)
147 *icoord = 0;
148 else if (s >= 1.0F)
149 *icoord = size - 1;
150 else
151 *icoord = util_ifloor(s * size);
152 }
153
154
155 static void
wrap_nearest_clamp_to_edge(float s,unsigned size,int * icoord)156 wrap_nearest_clamp_to_edge(float s, unsigned size, int *icoord)
157 {
158 /* s limited to [min,max] */
159 /* i limited to [0, size-1] */
160 const float min = 1.0F / (2.0F * size);
161 const float max = 1.0F - min;
162 if (s < min)
163 *icoord = 0;
164 else if (s > max)
165 *icoord = size - 1;
166 else
167 *icoord = util_ifloor(s * size);
168 }
169
170
171 static void
wrap_nearest_clamp_to_border(float s,unsigned size,int * icoord)172 wrap_nearest_clamp_to_border(float s, unsigned size, int *icoord)
173 {
174 /* s limited to [min,max] */
175 /* i limited to [-1, size] */
176 const float min = -1.0F / (2.0F * size);
177 const float max = 1.0F - min;
178 if (s <= min)
179 *icoord = -1;
180 else if (s >= max)
181 *icoord = size;
182 else
183 *icoord = util_ifloor(s * size);
184 }
185
186
187 static void
wrap_nearest_mirror_repeat(float s,unsigned size,int * icoord)188 wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord)
189 {
190 const float min = 1.0F / (2.0F * size);
191 const float max = 1.0F - min;
192 const int flr = util_ifloor(s);
193 float u = frac(s);
194 if (flr & 1)
195 u = 1.0F - u;
196 if (u < min)
197 *icoord = 0;
198 else if (u > max)
199 *icoord = size - 1;
200 else
201 *icoord = util_ifloor(u * size);
202 }
203
204
205 static void
wrap_nearest_mirror_clamp(float s,unsigned size,int * icoord)206 wrap_nearest_mirror_clamp(float s, unsigned size, int *icoord)
207 {
208 /* s limited to [0,1] */
209 /* i limited to [0,size-1] */
210 const float u = fabsf(s);
211 if (u <= 0.0F)
212 *icoord = 0;
213 else if (u >= 1.0F)
214 *icoord = size - 1;
215 else
216 *icoord = util_ifloor(u * size);
217 }
218
219
220 static void
wrap_nearest_mirror_clamp_to_edge(float s,unsigned size,int * icoord)221 wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int *icoord)
222 {
223 /* s limited to [min,max] */
224 /* i limited to [0, size-1] */
225 const float min = 1.0F / (2.0F * size);
226 const float max = 1.0F - min;
227 const float u = fabsf(s);
228 if (u < min)
229 *icoord = 0;
230 else if (u > max)
231 *icoord = size - 1;
232 else
233 *icoord = util_ifloor(u * size);
234 }
235
236
237 static void
wrap_nearest_mirror_clamp_to_border(float s,unsigned size,int * icoord)238 wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord)
239 {
240 /* s limited to [min,max] */
241 /* i limited to [0, size-1] */
242 const float min = -1.0F / (2.0F * size);
243 const float max = 1.0F - min;
244 const float u = fabsf(s);
245 if (u < min)
246 *icoord = -1;
247 else if (u > max)
248 *icoord = size;
249 else
250 *icoord = util_ifloor(u * size);
251 }
252
253
254 /**
255 * Used to compute texel locations for linear sampling
256 * \param wrapMode PIPE_TEX_WRAP_x
257 * \param s the texcoord
258 * \param size the texture image size
259 * \param icoord0 returns first texture index
260 * \param icoord1 returns second texture index (usually icoord0 + 1)
261 * \param w returns blend factor/weight between texture indices
262 * \param icoord returns the computed integer texture coord
263 */
264 static void
wrap_linear_repeat(float s,unsigned size,int * icoord0,int * icoord1,float * w)265 wrap_linear_repeat(float s, unsigned size,
266 int *icoord0, int *icoord1, float *w)
267 {
268 float u = s * size - 0.5F;
269 *icoord0 = repeat(util_ifloor(u), size);
270 *icoord1 = repeat(*icoord0 + 1, size);
271 *w = frac(u);
272 }
273
274
275 static void
wrap_linear_clamp(float s,unsigned size,int * icoord0,int * icoord1,float * w)276 wrap_linear_clamp(float s, unsigned size,
277 int *icoord0, int *icoord1, float *w)
278 {
279 float u = CLAMP(s, 0.0F, 1.0F);
280 u = u * size - 0.5f;
281 *icoord0 = util_ifloor(u);
282 *icoord1 = *icoord0 + 1;
283 *w = frac(u);
284 }
285
286
287 static void
wrap_linear_clamp_to_edge(float s,unsigned size,int * icoord0,int * icoord1,float * w)288 wrap_linear_clamp_to_edge(float s, unsigned size,
289 int *icoord0, int *icoord1, float *w)
290 {
291 float u = CLAMP(s, 0.0F, 1.0F);
292 u = u * size - 0.5f;
293 *icoord0 = util_ifloor(u);
294 *icoord1 = *icoord0 + 1;
295 if (*icoord0 < 0)
296 *icoord0 = 0;
297 if (*icoord1 >= (int) size)
298 *icoord1 = size - 1;
299 *w = frac(u);
300 }
301
302
303 static void
wrap_linear_clamp_to_border(float s,unsigned size,int * icoord0,int * icoord1,float * w)304 wrap_linear_clamp_to_border(float s, unsigned size,
305 int *icoord0, int *icoord1, float *w)
306 {
307 const float min = -1.0F / (2.0F * size);
308 const float max = 1.0F - min;
309 float u = CLAMP(s, min, max);
310 u = u * size - 0.5f;
311 *icoord0 = util_ifloor(u);
312 *icoord1 = *icoord0 + 1;
313 *w = frac(u);
314 }
315
316
317 static void
wrap_linear_mirror_repeat(float s,unsigned size,int * icoord0,int * icoord1,float * w)318 wrap_linear_mirror_repeat(float s, unsigned size,
319 int *icoord0, int *icoord1, float *w)
320 {
321 const int flr = util_ifloor(s);
322 float u = frac(s);
323 if (flr & 1)
324 u = 1.0F - u;
325 u = u * size - 0.5F;
326 *icoord0 = util_ifloor(u);
327 *icoord1 = *icoord0 + 1;
328 if (*icoord0 < 0)
329 *icoord0 = 0;
330 if (*icoord1 >= (int) size)
331 *icoord1 = size - 1;
332 *w = frac(u);
333 }
334
335
336 static void
wrap_linear_mirror_clamp(float s,unsigned size,int * icoord0,int * icoord1,float * w)337 wrap_linear_mirror_clamp(float s, unsigned size,
338 int *icoord0, int *icoord1, float *w)
339 {
340 float u = fabsf(s);
341 if (u >= 1.0F)
342 u = (float) size;
343 else
344 u *= size;
345 u -= 0.5F;
346 *icoord0 = util_ifloor(u);
347 *icoord1 = *icoord0 + 1;
348 *w = frac(u);
349 }
350
351
352 static void
wrap_linear_mirror_clamp_to_edge(float s,unsigned size,int * icoord0,int * icoord1,float * w)353 wrap_linear_mirror_clamp_to_edge(float s, unsigned size,
354 int *icoord0, int *icoord1, float *w)
355 {
356 float u = fabsf(s);
357 if (u >= 1.0F)
358 u = (float) size;
359 else
360 u *= size;
361 u -= 0.5F;
362 *icoord0 = util_ifloor(u);
363 *icoord1 = *icoord0 + 1;
364 if (*icoord0 < 0)
365 *icoord0 = 0;
366 if (*icoord1 >= (int) size)
367 *icoord1 = size - 1;
368 *w = frac(u);
369 }
370
371
372 static void
wrap_linear_mirror_clamp_to_border(float s,unsigned size,int * icoord0,int * icoord1,float * w)373 wrap_linear_mirror_clamp_to_border(float s, unsigned size,
374 int *icoord0, int *icoord1, float *w)
375 {
376 const float min = -1.0F / (2.0F * size);
377 const float max = 1.0F - min;
378 float u = fabsf(s);
379 if (u <= min)
380 u = min * size;
381 else if (u >= max)
382 u = max * size;
383 else
384 u *= size;
385 u -= 0.5F;
386 *icoord0 = util_ifloor(u);
387 *icoord1 = *icoord0 + 1;
388 *w = frac(u);
389 }
390
391
392 /**
393 * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
394 */
395 static void
wrap_nearest_unorm_clamp(float s,unsigned size,int * icoord)396 wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord)
397 {
398 int i = util_ifloor(s);
399 *icoord = CLAMP(i, 0, (int) size-1);
400 }
401
402
403 /**
404 * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
405 */
406 static void
wrap_nearest_unorm_clamp_to_border(float s,unsigned size,int * icoord)407 wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int *icoord)
408 {
409 *icoord = util_ifloor( CLAMP(s, -0.5F, (float) size + 0.5F) );
410 }
411
412
413 /**
414 * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
415 */
416 static void
wrap_nearest_unorm_clamp_to_edge(float s,unsigned size,int * icoord)417 wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int *icoord)
418 {
419 *icoord = util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) );
420 }
421
422
423 /**
424 * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
425 */
426 static void
wrap_linear_unorm_clamp(float s,unsigned size,int * icoord0,int * icoord1,float * w)427 wrap_linear_unorm_clamp(float s, unsigned size,
428 int *icoord0, int *icoord1, float *w)
429 {
430 /* Not exactly what the spec says, but it matches NVIDIA output */
431 float u = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
432 *icoord0 = util_ifloor(u);
433 *icoord1 = *icoord0 + 1;
434 *w = frac(u);
435 }
436
437
438 /**
439 * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
440 */
441 static void
wrap_linear_unorm_clamp_to_border(float s,unsigned size,int * icoord0,int * icoord1,float * w)442 wrap_linear_unorm_clamp_to_border(float s, unsigned size,
443 int *icoord0, int *icoord1, float *w)
444 {
445 float u = CLAMP(s, -0.5F, (float) size + 0.5F);
446 u -= 0.5F;
447 *icoord0 = util_ifloor(u);
448 *icoord1 = *icoord0 + 1;
449 if (*icoord1 > (int) size - 1)
450 *icoord1 = size - 1;
451 *w = frac(u);
452 }
453
454
455 /**
456 * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
457 */
458 static void
wrap_linear_unorm_clamp_to_edge(float s,unsigned size,int * icoord0,int * icoord1,float * w)459 wrap_linear_unorm_clamp_to_edge(float s, unsigned size,
460 int *icoord0, int *icoord1, float *w)
461 {
462 float u = CLAMP(s, +0.5F, (float) size - 0.5F);
463 u -= 0.5F;
464 *icoord0 = util_ifloor(u);
465 *icoord1 = *icoord0 + 1;
466 if (*icoord1 > (int) size - 1)
467 *icoord1 = size - 1;
468 *w = frac(u);
469 }
470
471
472 /**
473 * Do coordinate to array index conversion. For array textures.
474 */
475 static INLINE void
wrap_array_layer(float coord,unsigned size,int * layer)476 wrap_array_layer(float coord, unsigned size, int *layer)
477 {
478 int c = util_ifloor(coord + 0.5F);
479 *layer = CLAMP(c, 0, size - 1);
480 }
481
482
483 /**
484 * Examine the quad's texture coordinates to compute the partial
485 * derivatives w.r.t X and Y, then compute lambda (level of detail).
486 */
487 static float
compute_lambda_1d(const struct sp_sampler_variant * samp,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE])488 compute_lambda_1d(const struct sp_sampler_variant *samp,
489 const float s[TGSI_QUAD_SIZE],
490 const float t[TGSI_QUAD_SIZE],
491 const float p[TGSI_QUAD_SIZE])
492 {
493 const struct pipe_resource *texture = samp->view->texture;
494 float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
495 float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
496 float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
497
498 return util_fast_log2(rho);
499 }
500
501
502 static float
compute_lambda_2d(const struct sp_sampler_variant * samp,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE])503 compute_lambda_2d(const struct sp_sampler_variant *samp,
504 const float s[TGSI_QUAD_SIZE],
505 const float t[TGSI_QUAD_SIZE],
506 const float p[TGSI_QUAD_SIZE])
507 {
508 const struct pipe_resource *texture = samp->view->texture;
509 float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
510 float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
511 float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
512 float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
513 float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
514 float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, samp->view->u.tex.first_level);
515 float rho = MAX2(maxx, maxy);
516
517 return util_fast_log2(rho);
518 }
519
520
521 static float
compute_lambda_3d(const struct sp_sampler_variant * samp,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE])522 compute_lambda_3d(const struct sp_sampler_variant *samp,
523 const float s[TGSI_QUAD_SIZE],
524 const float t[TGSI_QUAD_SIZE],
525 const float p[TGSI_QUAD_SIZE])
526 {
527 const struct pipe_resource *texture = samp->view->texture;
528 float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
529 float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
530 float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
531 float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
532 float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
533 float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]);
534 float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
535 float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, samp->view->u.tex.first_level);
536 float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, samp->view->u.tex.first_level);
537 float rho;
538
539 rho = MAX2(maxx, maxy);
540 rho = MAX2(rho, maxz);
541
542 return util_fast_log2(rho);
543 }
544
545
546 /**
547 * Compute lambda for a vertex texture sampler.
548 * Since there aren't derivatives to use, just return 0.
549 */
550 static float
compute_lambda_vert(const struct sp_sampler_variant * samp,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE])551 compute_lambda_vert(const struct sp_sampler_variant *samp,
552 const float s[TGSI_QUAD_SIZE],
553 const float t[TGSI_QUAD_SIZE],
554 const float p[TGSI_QUAD_SIZE])
555 {
556 return 0.0f;
557 }
558
559
560
561 /**
562 * Get a texel from a texture, using the texture tile cache.
563 *
564 * \param addr the template tex address containing cube, z, face info.
565 * \param x the x coord of texel within 2D image
566 * \param y the y coord of texel within 2D image
567 * \param rgba the quad to put the texel/color into
568 *
569 * XXX maybe move this into sp_tex_tile_cache.c and merge with the
570 * sp_get_cached_tile_tex() function.
571 */
572
573
574
575
576 static INLINE const float *
get_texel_2d_no_border(const struct sp_sampler_variant * samp,union tex_tile_address addr,int x,int y)577 get_texel_2d_no_border(const struct sp_sampler_variant *samp,
578 union tex_tile_address addr, int x, int y)
579 {
580 const struct softpipe_tex_cached_tile *tile;
581
582 addr.bits.x = x / TILE_SIZE;
583 addr.bits.y = y / TILE_SIZE;
584 y %= TILE_SIZE;
585 x %= TILE_SIZE;
586
587 tile = sp_get_cached_tile_tex(samp->cache, addr);
588
589 return &tile->data.color[y][x][0];
590 }
591
592
593 static INLINE const float *
get_texel_2d(const struct sp_sampler_variant * samp,union tex_tile_address addr,int x,int y)594 get_texel_2d(const struct sp_sampler_variant *samp,
595 union tex_tile_address addr, int x, int y)
596 {
597 const struct pipe_resource *texture = samp->view->texture;
598 unsigned level = addr.bits.level;
599
600 if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
601 y < 0 || y >= (int) u_minify(texture->height0, level)) {
602 return samp->sampler->border_color.f;
603 }
604 else {
605 return get_texel_2d_no_border( samp, addr, x, y );
606 }
607 }
608
609
610 /* Gather a quad of adjacent texels within a tile:
611 */
612 static INLINE void
get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_variant * samp,union tex_tile_address addr,unsigned x,unsigned y,const float * out[4])613 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_variant *samp,
614 union tex_tile_address addr,
615 unsigned x, unsigned y,
616 const float *out[4])
617 {
618 const struct softpipe_tex_cached_tile *tile;
619
620 addr.bits.x = x / TILE_SIZE;
621 addr.bits.y = y / TILE_SIZE;
622 y %= TILE_SIZE;
623 x %= TILE_SIZE;
624
625 tile = sp_get_cached_tile_tex(samp->cache, addr);
626
627 out[0] = &tile->data.color[y ][x ][0];
628 out[1] = &tile->data.color[y ][x+1][0];
629 out[2] = &tile->data.color[y+1][x ][0];
630 out[3] = &tile->data.color[y+1][x+1][0];
631 }
632
633
634 /* Gather a quad of potentially non-adjacent texels:
635 */
636 static INLINE void
get_texel_quad_2d_no_border(const struct sp_sampler_variant * samp,union tex_tile_address addr,int x0,int y0,int x1,int y1,const float * out[4])637 get_texel_quad_2d_no_border(const struct sp_sampler_variant *samp,
638 union tex_tile_address addr,
639 int x0, int y0,
640 int x1, int y1,
641 const float *out[4])
642 {
643 out[0] = get_texel_2d_no_border( samp, addr, x0, y0 );
644 out[1] = get_texel_2d_no_border( samp, addr, x1, y0 );
645 out[2] = get_texel_2d_no_border( samp, addr, x0, y1 );
646 out[3] = get_texel_2d_no_border( samp, addr, x1, y1 );
647 }
648
649 /* Can involve a lot of unnecessary checks for border color:
650 */
651 static INLINE void
get_texel_quad_2d(const struct sp_sampler_variant * samp,union tex_tile_address addr,int x0,int y0,int x1,int y1,const float * out[4])652 get_texel_quad_2d(const struct sp_sampler_variant *samp,
653 union tex_tile_address addr,
654 int x0, int y0,
655 int x1, int y1,
656 const float *out[4])
657 {
658 out[0] = get_texel_2d( samp, addr, x0, y0 );
659 out[1] = get_texel_2d( samp, addr, x1, y0 );
660 out[3] = get_texel_2d( samp, addr, x1, y1 );
661 out[2] = get_texel_2d( samp, addr, x0, y1 );
662 }
663
664
665
666 /* 3d variants:
667 */
668 static INLINE const float *
get_texel_3d_no_border(const struct sp_sampler_variant * samp,union tex_tile_address addr,int x,int y,int z)669 get_texel_3d_no_border(const struct sp_sampler_variant *samp,
670 union tex_tile_address addr, int x, int y, int z)
671 {
672 const struct softpipe_tex_cached_tile *tile;
673
674 addr.bits.x = x / TILE_SIZE;
675 addr.bits.y = y / TILE_SIZE;
676 addr.bits.z = z;
677 y %= TILE_SIZE;
678 x %= TILE_SIZE;
679
680 tile = sp_get_cached_tile_tex(samp->cache, addr);
681
682 return &tile->data.color[y][x][0];
683 }
684
685
686 static INLINE const float *
get_texel_3d(const struct sp_sampler_variant * samp,union tex_tile_address addr,int x,int y,int z)687 get_texel_3d(const struct sp_sampler_variant *samp,
688 union tex_tile_address addr, int x, int y, int z)
689 {
690 const struct pipe_resource *texture = samp->view->texture;
691 unsigned level = addr.bits.level;
692
693 if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
694 y < 0 || y >= (int) u_minify(texture->height0, level) ||
695 z < 0 || z >= (int) u_minify(texture->depth0, level)) {
696 return samp->sampler->border_color.f;
697 }
698 else {
699 return get_texel_3d_no_border( samp, addr, x, y, z );
700 }
701 }
702
703
704 /* Get texel pointer for 1D array texture */
705 static INLINE const float *
get_texel_1d_array(const struct sp_sampler_variant * samp,union tex_tile_address addr,int x,int y)706 get_texel_1d_array(const struct sp_sampler_variant *samp,
707 union tex_tile_address addr, int x, int y)
708 {
709 const struct pipe_resource *texture = samp->view->texture;
710 unsigned level = addr.bits.level;
711
712 if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
713 return samp->sampler->border_color.f;
714 }
715 else {
716 return get_texel_2d_no_border(samp, addr, x, y);
717 }
718 }
719
720
721 /* Get texel pointer for 2D array texture */
722 static INLINE const float *
get_texel_2d_array(const struct sp_sampler_variant * samp,union tex_tile_address addr,int x,int y,int layer)723 get_texel_2d_array(const struct sp_sampler_variant *samp,
724 union tex_tile_address addr, int x, int y, int layer)
725 {
726 const struct pipe_resource *texture = samp->view->texture;
727 unsigned level = addr.bits.level;
728
729 assert(layer < (int) texture->array_size);
730 assert(layer >= 0);
731
732 if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
733 y < 0 || y >= (int) u_minify(texture->height0, level)) {
734 return samp->sampler->border_color.f;
735 }
736 else {
737 return get_texel_3d_no_border(samp, addr, x, y, layer);
738 }
739 }
740
741
742 /**
743 * Given the logbase2 of a mipmap's base level size and a mipmap level,
744 * return the size (in texels) of that mipmap level.
745 * For example, if level[0].width = 256 then base_pot will be 8.
746 * If level = 2, then we'll return 64 (the width at level=2).
747 * Return 1 if level > base_pot.
748 */
749 static INLINE unsigned
pot_level_size(unsigned base_pot,unsigned level)750 pot_level_size(unsigned base_pot, unsigned level)
751 {
752 return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
753 }
754
755
756 static void
print_sample(const char * function,const float * rgba)757 print_sample(const char *function, const float *rgba)
758 {
759 debug_printf("%s %g %g %g %g\n",
760 function,
761 rgba[0], rgba[TGSI_NUM_CHANNELS], rgba[2*TGSI_NUM_CHANNELS], rgba[3*TGSI_NUM_CHANNELS]);
762 }
763
764
765 static void
print_sample_4(const char * function,float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])766 print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
767 {
768 debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
769 function,
770 rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
771 rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
772 rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
773 rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
774 }
775
776 /* Some image-filter fastpaths:
777 */
778 static INLINE void
img_filter_2d_linear_repeat_POT(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float * rgba)779 img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
780 float s,
781 float t,
782 float p,
783 unsigned level,
784 unsigned face_id,
785 enum tgsi_sampler_control control,
786 float *rgba)
787 {
788 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
789 unsigned xpot = pot_level_size(samp->xpot, level);
790 unsigned ypot = pot_level_size(samp->ypot, level);
791 unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
792 unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
793 union tex_tile_address addr;
794 int c;
795
796
797
798 float u = s * xpot - 0.5F;
799 float v = t * ypot - 0.5F;
800
801 int uflr = util_ifloor(u);
802 int vflr = util_ifloor(v);
803
804 float xw = u - (float)uflr;
805 float yw = v - (float)vflr;
806
807 int x0 = uflr & (xpot - 1);
808 int y0 = vflr & (ypot - 1);
809
810 const float *tx[4];
811
812 addr.value = 0;
813 addr.bits.level = level;
814
815 /* Can we fetch all four at once:
816 */
817 if (x0 < xmax && y0 < ymax) {
818 get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx);
819 }
820 else {
821 unsigned x1 = (x0 + 1) & (xpot - 1);
822 unsigned y1 = (y0 + 1) & (ypot - 1);
823 get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
824 }
825
826 /* interpolate R, G, B, A */
827 for (c = 0; c < TGSI_QUAD_SIZE; c++) {
828 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
829 tx[0][c], tx[1][c],
830 tx[2][c], tx[3][c]);
831 }
832
833 if (DEBUG_TEX) {
834 print_sample(__FUNCTION__, rgba);
835 }
836 }
837
838
839 static INLINE void
img_filter_2d_nearest_repeat_POT(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float rgba[TGSI_QUAD_SIZE])840 img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
841 float s,
842 float t,
843 float p,
844 unsigned level,
845 unsigned face_id,
846 enum tgsi_sampler_control control,
847 float rgba[TGSI_QUAD_SIZE])
848 {
849 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
850 unsigned xpot = pot_level_size(samp->xpot, level);
851 unsigned ypot = pot_level_size(samp->ypot, level);
852 const float *out;
853 union tex_tile_address addr;
854 int c;
855
856 float u = s * xpot;
857 float v = t * ypot;
858
859 int uflr = util_ifloor(u);
860 int vflr = util_ifloor(v);
861
862 int x0 = uflr & (xpot - 1);
863 int y0 = vflr & (ypot - 1);
864
865 addr.value = 0;
866 addr.bits.level = level;
867
868 out = get_texel_2d_no_border(samp, addr, x0, y0);
869 for (c = 0; c < TGSI_QUAD_SIZE; c++)
870 rgba[TGSI_NUM_CHANNELS*c] = out[c];
871
872 if (DEBUG_TEX) {
873 print_sample(__FUNCTION__, rgba);
874 }
875 }
876
877
878 static INLINE void
img_filter_2d_nearest_clamp_POT(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float rgba[TGSI_QUAD_SIZE])879 img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
880 float s,
881 float t,
882 float p,
883 unsigned level,
884 unsigned face_id,
885 enum tgsi_sampler_control control,
886 float rgba[TGSI_QUAD_SIZE])
887 {
888 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
889 unsigned xpot = pot_level_size(samp->xpot, level);
890 unsigned ypot = pot_level_size(samp->ypot, level);
891 union tex_tile_address addr;
892 int c;
893
894 float u = s * xpot;
895 float v = t * ypot;
896
897 int x0, y0;
898 const float *out;
899
900 addr.value = 0;
901 addr.bits.level = level;
902
903 x0 = util_ifloor(u);
904 if (x0 < 0)
905 x0 = 0;
906 else if (x0 > xpot - 1)
907 x0 = xpot - 1;
908
909 y0 = util_ifloor(v);
910 if (y0 < 0)
911 y0 = 0;
912 else if (y0 > ypot - 1)
913 y0 = ypot - 1;
914
915 out = get_texel_2d_no_border(samp, addr, x0, y0);
916 for (c = 0; c < TGSI_QUAD_SIZE; c++)
917 rgba[TGSI_NUM_CHANNELS*c] = out[c];
918
919 if (DEBUG_TEX) {
920 print_sample(__FUNCTION__, rgba);
921 }
922 }
923
924
925 static void
img_filter_1d_nearest(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float rgba[TGSI_QUAD_SIZE])926 img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
927 float s,
928 float t,
929 float p,
930 unsigned level,
931 unsigned face_id,
932 enum tgsi_sampler_control control,
933 float rgba[TGSI_QUAD_SIZE])
934 {
935 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
936 const struct pipe_resource *texture = samp->view->texture;
937 int width;
938 int x;
939 union tex_tile_address addr;
940 const float *out;
941 int c;
942
943 width = u_minify(texture->width0, level);
944
945 assert(width > 0);
946
947 addr.value = 0;
948 addr.bits.level = level;
949
950 samp->nearest_texcoord_s(s, width, &x);
951
952 out = get_texel_2d(samp, addr, x, 0);
953 for (c = 0; c < TGSI_QUAD_SIZE; c++)
954 rgba[TGSI_NUM_CHANNELS*c] = out[c];
955
956 if (DEBUG_TEX) {
957 print_sample(__FUNCTION__, rgba);
958 }
959 }
960
961
962 static void
img_filter_1d_array_nearest(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float * rgba)963 img_filter_1d_array_nearest(struct tgsi_sampler *tgsi_sampler,
964 float s,
965 float t,
966 float p,
967 unsigned level,
968 unsigned face_id,
969 enum tgsi_sampler_control control,
970 float *rgba)
971 {
972 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
973 const struct pipe_resource *texture = samp->view->texture;
974 int width;
975 int x, layer;
976 union tex_tile_address addr;
977 const float *out;
978 int c;
979
980 width = u_minify(texture->width0, level);
981
982 assert(width > 0);
983
984 addr.value = 0;
985 addr.bits.level = level;
986
987 samp->nearest_texcoord_s(s, width, &x);
988 wrap_array_layer(t, texture->array_size, &layer);
989
990 out = get_texel_1d_array(samp, addr, x, layer);
991 for (c = 0; c < TGSI_QUAD_SIZE; c++)
992 rgba[TGSI_NUM_CHANNELS*c] = out[c];
993
994 if (DEBUG_TEX) {
995 print_sample(__FUNCTION__, rgba);
996 }
997 }
998
999
1000 static void
img_filter_2d_nearest(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float * rgba)1001 img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
1002 float s,
1003 float t,
1004 float p,
1005 unsigned level,
1006 unsigned face_id,
1007 enum tgsi_sampler_control control,
1008 float *rgba)
1009 {
1010 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1011 const struct pipe_resource *texture = samp->view->texture;
1012 int width, height;
1013 int x, y;
1014 union tex_tile_address addr;
1015 const float *out;
1016 int c;
1017
1018 width = u_minify(texture->width0, level);
1019 height = u_minify(texture->height0, level);
1020
1021 assert(width > 0);
1022 assert(height > 0);
1023
1024 addr.value = 0;
1025 addr.bits.level = level;
1026
1027 samp->nearest_texcoord_s(s, width, &x);
1028 samp->nearest_texcoord_t(t, height, &y);
1029
1030 out = get_texel_2d(samp, addr, x, y);
1031 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1032 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1033
1034 if (DEBUG_TEX) {
1035 print_sample(__FUNCTION__, rgba);
1036 }
1037 }
1038
1039
1040 static void
img_filter_2d_array_nearest(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float * rgba)1041 img_filter_2d_array_nearest(struct tgsi_sampler *tgsi_sampler,
1042 float s,
1043 float t,
1044 float p,
1045 unsigned level,
1046 unsigned face_id,
1047 enum tgsi_sampler_control control,
1048 float *rgba)
1049 {
1050 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1051 const struct pipe_resource *texture = samp->view->texture;
1052 int width, height;
1053 int x, y, layer;
1054 union tex_tile_address addr;
1055 const float *out;
1056 int c;
1057
1058 width = u_minify(texture->width0, level);
1059 height = u_minify(texture->height0, level);
1060
1061 assert(width > 0);
1062 assert(height > 0);
1063
1064 addr.value = 0;
1065 addr.bits.level = level;
1066
1067 samp->nearest_texcoord_s(s, width, &x);
1068 samp->nearest_texcoord_t(t, height, &y);
1069 wrap_array_layer(p, texture->array_size, &layer);
1070
1071 out = get_texel_2d_array(samp, addr, x, y, layer);
1072 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1073 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1074
1075 if (DEBUG_TEX) {
1076 print_sample(__FUNCTION__, rgba);
1077 }
1078 }
1079
1080
1081 static INLINE union tex_tile_address
face(union tex_tile_address addr,unsigned face)1082 face(union tex_tile_address addr, unsigned face )
1083 {
1084 addr.bits.face = face;
1085 return addr;
1086 }
1087
1088
1089 static void
img_filter_cube_nearest(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float * rgba)1090 img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
1091 float s,
1092 float t,
1093 float p,
1094 unsigned level,
1095 unsigned face_id,
1096 enum tgsi_sampler_control control,
1097 float *rgba)
1098 {
1099 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1100 const struct pipe_resource *texture = samp->view->texture;
1101 int width, height;
1102 int x, y;
1103 union tex_tile_address addr;
1104 const float *out;
1105 int c;
1106
1107 width = u_minify(texture->width0, level);
1108 height = u_minify(texture->height0, level);
1109
1110 assert(width > 0);
1111 assert(height > 0);
1112
1113 addr.value = 0;
1114 addr.bits.level = level;
1115
1116 samp->nearest_texcoord_s(s, width, &x);
1117 samp->nearest_texcoord_t(t, height, &y);
1118
1119 out = get_texel_2d(samp, face(addr, face_id), x, y);
1120 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1121 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1122
1123 if (DEBUG_TEX) {
1124 print_sample(__FUNCTION__, rgba);
1125 }
1126 }
1127
1128
1129 static void
img_filter_3d_nearest(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float * rgba)1130 img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
1131 float s,
1132 float t,
1133 float p,
1134 unsigned level,
1135 unsigned face_id,
1136 enum tgsi_sampler_control control,
1137 float *rgba)
1138 {
1139 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1140 const struct pipe_resource *texture = samp->view->texture;
1141 int width, height, depth;
1142 int x, y, z;
1143 union tex_tile_address addr;
1144 const float *out;
1145 int c;
1146
1147 width = u_minify(texture->width0, level);
1148 height = u_minify(texture->height0, level);
1149 depth = u_minify(texture->depth0, level);
1150
1151 assert(width > 0);
1152 assert(height > 0);
1153 assert(depth > 0);
1154
1155 samp->nearest_texcoord_s(s, width, &x);
1156 samp->nearest_texcoord_t(t, height, &y);
1157 samp->nearest_texcoord_p(p, depth, &z);
1158
1159 addr.value = 0;
1160 addr.bits.level = level;
1161
1162 out = get_texel_3d(samp, addr, x, y, z);
1163 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1164 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1165 }
1166
1167
1168 static void
img_filter_1d_linear(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float * rgba)1169 img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
1170 float s,
1171 float t,
1172 float p,
1173 unsigned level,
1174 unsigned face_id,
1175 enum tgsi_sampler_control control,
1176 float *rgba)
1177 {
1178 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1179 const struct pipe_resource *texture = samp->view->texture;
1180 int width;
1181 int x0, x1;
1182 float xw; /* weights */
1183 union tex_tile_address addr;
1184 const float *tx0, *tx1;
1185 int c;
1186
1187 width = u_minify(texture->width0, level);
1188
1189 assert(width > 0);
1190
1191 addr.value = 0;
1192 addr.bits.level = level;
1193
1194 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1195
1196 tx0 = get_texel_2d(samp, addr, x0, 0);
1197 tx1 = get_texel_2d(samp, addr, x1, 0);
1198
1199 /* interpolate R, G, B, A */
1200 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1201 rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1202 }
1203
1204
1205 static void
img_filter_1d_array_linear(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float * rgba)1206 img_filter_1d_array_linear(struct tgsi_sampler *tgsi_sampler,
1207 float s,
1208 float t,
1209 float p,
1210 unsigned level,
1211 unsigned face_id,
1212 enum tgsi_sampler_control control,
1213 float *rgba)
1214 {
1215 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1216 const struct pipe_resource *texture = samp->view->texture;
1217 int width;
1218 int x0, x1, layer;
1219 float xw; /* weights */
1220 union tex_tile_address addr;
1221 const float *tx0, *tx1;
1222 int c;
1223
1224 width = u_minify(texture->width0, level);
1225
1226 assert(width > 0);
1227
1228 addr.value = 0;
1229 addr.bits.level = level;
1230
1231 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1232 wrap_array_layer(t, texture->array_size, &layer);
1233
1234 tx0 = get_texel_1d_array(samp, addr, x0, layer);
1235 tx1 = get_texel_1d_array(samp, addr, x1, layer);
1236
1237 /* interpolate R, G, B, A */
1238 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1239 rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1240 }
1241
1242
1243 static void
img_filter_2d_linear(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float * rgba)1244 img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
1245 float s,
1246 float t,
1247 float p,
1248 unsigned level,
1249 unsigned face_id,
1250 enum tgsi_sampler_control control,
1251 float *rgba)
1252 {
1253 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1254 const struct pipe_resource *texture = samp->view->texture;
1255 int width, height;
1256 int x0, y0, x1, y1;
1257 float xw, yw; /* weights */
1258 union tex_tile_address addr;
1259 const float *tx0, *tx1, *tx2, *tx3;
1260 int c;
1261
1262 width = u_minify(texture->width0, level);
1263 height = u_minify(texture->height0, level);
1264
1265 assert(width > 0);
1266 assert(height > 0);
1267
1268 addr.value = 0;
1269 addr.bits.level = level;
1270
1271 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1272 samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1273
1274 tx0 = get_texel_2d(samp, addr, x0, y0);
1275 tx1 = get_texel_2d(samp, addr, x1, y0);
1276 tx2 = get_texel_2d(samp, addr, x0, y1);
1277 tx3 = get_texel_2d(samp, addr, x1, y1);
1278
1279 /* interpolate R, G, B, A */
1280 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1281 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1282 tx0[c], tx1[c],
1283 tx2[c], tx3[c]);
1284 }
1285
1286
1287 static void
img_filter_2d_array_linear(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float * rgba)1288 img_filter_2d_array_linear(struct tgsi_sampler *tgsi_sampler,
1289 float s,
1290 float t,
1291 float p,
1292 unsigned level,
1293 unsigned face_id,
1294 enum tgsi_sampler_control control,
1295 float *rgba)
1296 {
1297 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1298 const struct pipe_resource *texture = samp->view->texture;
1299 int width, height;
1300 int x0, y0, x1, y1, layer;
1301 float xw, yw; /* weights */
1302 union tex_tile_address addr;
1303 const float *tx0, *tx1, *tx2, *tx3;
1304 int c;
1305
1306 width = u_minify(texture->width0, level);
1307 height = u_minify(texture->height0, level);
1308
1309 assert(width > 0);
1310 assert(height > 0);
1311
1312 addr.value = 0;
1313 addr.bits.level = level;
1314
1315 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1316 samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1317 wrap_array_layer(p, texture->array_size, &layer);
1318
1319 tx0 = get_texel_2d_array(samp, addr, x0, y0, layer);
1320 tx1 = get_texel_2d_array(samp, addr, x1, y0, layer);
1321 tx2 = get_texel_2d_array(samp, addr, x0, y1, layer);
1322 tx3 = get_texel_2d_array(samp, addr, x1, y1, layer);
1323
1324 /* interpolate R, G, B, A */
1325 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1326 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1327 tx0[c], tx1[c],
1328 tx2[c], tx3[c]);
1329 }
1330
1331
1332 static void
img_filter_cube_linear(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float * rgba)1333 img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
1334 float s,
1335 float t,
1336 float p,
1337 unsigned level,
1338 unsigned face_id,
1339 enum tgsi_sampler_control control,
1340 float *rgba)
1341 {
1342 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1343 const struct pipe_resource *texture = samp->view->texture;
1344 int width, height;
1345 int x0, y0, x1, y1;
1346 float xw, yw; /* weights */
1347 union tex_tile_address addr, addrj;
1348 const float *tx0, *tx1, *tx2, *tx3;
1349 int c;
1350
1351 width = u_minify(texture->width0, level);
1352 height = u_minify(texture->height0, level);
1353
1354 assert(width > 0);
1355 assert(height > 0);
1356
1357 addr.value = 0;
1358 addr.bits.level = level;
1359
1360 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1361 samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1362
1363 addrj = face(addr, face_id);
1364 tx0 = get_texel_2d(samp, addrj, x0, y0);
1365 tx1 = get_texel_2d(samp, addrj, x1, y0);
1366 tx2 = get_texel_2d(samp, addrj, x0, y1);
1367 tx3 = get_texel_2d(samp, addrj, x1, y1);
1368
1369 /* interpolate R, G, B, A */
1370 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1371 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1372 tx0[c], tx1[c],
1373 tx2[c], tx3[c]);
1374 }
1375
1376
1377 static void
img_filter_3d_linear(struct tgsi_sampler * tgsi_sampler,float s,float t,float p,unsigned level,unsigned face_id,enum tgsi_sampler_control control,float * rgba)1378 img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
1379 float s,
1380 float t,
1381 float p,
1382 unsigned level,
1383 unsigned face_id,
1384 enum tgsi_sampler_control control,
1385 float *rgba)
1386 {
1387 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1388 const struct pipe_resource *texture = samp->view->texture;
1389 int width, height, depth;
1390 int x0, x1, y0, y1, z0, z1;
1391 float xw, yw, zw; /* interpolation weights */
1392 union tex_tile_address addr;
1393 const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
1394 int c;
1395
1396 width = u_minify(texture->width0, level);
1397 height = u_minify(texture->height0, level);
1398 depth = u_minify(texture->depth0, level);
1399
1400 addr.value = 0;
1401 addr.bits.level = level;
1402
1403 assert(width > 0);
1404 assert(height > 0);
1405 assert(depth > 0);
1406
1407 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1408 samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1409 samp->linear_texcoord_p(p, depth, &z0, &z1, &zw);
1410
1411
1412 tx00 = get_texel_3d(samp, addr, x0, y0, z0);
1413 tx01 = get_texel_3d(samp, addr, x1, y0, z0);
1414 tx02 = get_texel_3d(samp, addr, x0, y1, z0);
1415 tx03 = get_texel_3d(samp, addr, x1, y1, z0);
1416
1417 tx10 = get_texel_3d(samp, addr, x0, y0, z1);
1418 tx11 = get_texel_3d(samp, addr, x1, y0, z1);
1419 tx12 = get_texel_3d(samp, addr, x0, y1, z1);
1420 tx13 = get_texel_3d(samp, addr, x1, y1, z1);
1421
1422 /* interpolate R, G, B, A */
1423 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1424 rgba[TGSI_NUM_CHANNELS*c] = lerp_3d(xw, yw, zw,
1425 tx00[c], tx01[c],
1426 tx02[c], tx03[c],
1427 tx10[c], tx11[c],
1428 tx12[c], tx13[c]);
1429 }
1430
1431
1432 /* Calculate level of detail for every fragment.
1433 * Note that lambda has already been biased by global LOD bias.
1434 */
1435 static INLINE void
compute_lod(const struct pipe_sampler_state * sampler,const float biased_lambda,const float lodbias[TGSI_QUAD_SIZE],float lod[TGSI_QUAD_SIZE])1436 compute_lod(const struct pipe_sampler_state *sampler,
1437 const float biased_lambda,
1438 const float lodbias[TGSI_QUAD_SIZE],
1439 float lod[TGSI_QUAD_SIZE])
1440 {
1441 uint i;
1442
1443 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1444 lod[i] = biased_lambda + lodbias[i];
1445 lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod);
1446 }
1447 }
1448
1449
1450 static void
mip_filter_linear(struct tgsi_sampler * tgsi_sampler,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE],const float c0[TGSI_QUAD_SIZE],enum tgsi_sampler_control control,float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])1451 mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
1452 const float s[TGSI_QUAD_SIZE],
1453 const float t[TGSI_QUAD_SIZE],
1454 const float p[TGSI_QUAD_SIZE],
1455 const float c0[TGSI_QUAD_SIZE],
1456 enum tgsi_sampler_control control,
1457 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1458 {
1459 struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1460 const struct pipe_resource *texture = samp->view->texture;
1461 int j;
1462 float lod[TGSI_QUAD_SIZE];
1463
1464 if (control == tgsi_sampler_lod_bias) {
1465 float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1466 compute_lod(samp->sampler, lambda, c0, lod);
1467 } else {
1468 assert(control == tgsi_sampler_lod_explicit);
1469
1470 memcpy(lod, c0, sizeof(lod));
1471 }
1472
1473 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1474 int level0 = samp->view->u.tex.first_level + (int)lod[j];
1475
1476 if (lod[j] < 0.0)
1477 samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1478
1479 else if (level0 >= texture->last_level)
1480 samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], texture->last_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1481
1482 else {
1483 float levelBlend = frac(lod[j]);
1484 float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1485 int c;
1486
1487 samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level0, samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][0]);
1488 samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level0+1, samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][1]);
1489
1490 for (c = 0; c < 4; c++) {
1491 rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
1492 }
1493 }
1494 }
1495
1496 if (DEBUG_TEX) {
1497 print_sample_4(__FUNCTION__, rgba);
1498 }
1499 }
1500
1501
1502 /**
1503 * Compute nearest mipmap level from texcoords.
1504 * Then sample the texture level for four elements of a quad.
1505 * \param c0 the LOD bias factors, or absolute LODs (depending on control)
1506 */
1507 static void
mip_filter_nearest(struct tgsi_sampler * tgsi_sampler,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE],const float c0[TGSI_QUAD_SIZE],enum tgsi_sampler_control control,float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])1508 mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
1509 const float s[TGSI_QUAD_SIZE],
1510 const float t[TGSI_QUAD_SIZE],
1511 const float p[TGSI_QUAD_SIZE],
1512 const float c0[TGSI_QUAD_SIZE],
1513 enum tgsi_sampler_control control,
1514 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1515 {
1516 struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1517 const struct pipe_resource *texture = samp->view->texture;
1518 float lod[TGSI_QUAD_SIZE];
1519 int j;
1520
1521 if (control == tgsi_sampler_lod_bias) {
1522 float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1523 compute_lod(samp->sampler, lambda, c0, lod);
1524 } else {
1525 assert(control == tgsi_sampler_lod_explicit);
1526
1527 memcpy(lod, c0, sizeof(lod));
1528 }
1529
1530 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1531 if (lod[j] < 0.0)
1532 samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1533 else {
1534 float level = samp->view->u.tex.first_level + (int)(lod[j] + 0.5F) ;
1535 level = MIN2(level, (int)texture->last_level);
1536 samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1537 }
1538 }
1539
1540 if (DEBUG_TEX) {
1541 print_sample_4(__FUNCTION__, rgba);
1542 }
1543 }
1544
1545
1546 static void
mip_filter_none(struct tgsi_sampler * tgsi_sampler,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE],const float c0[TGSI_QUAD_SIZE],enum tgsi_sampler_control control,float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])1547 mip_filter_none(struct tgsi_sampler *tgsi_sampler,
1548 const float s[TGSI_QUAD_SIZE],
1549 const float t[TGSI_QUAD_SIZE],
1550 const float p[TGSI_QUAD_SIZE],
1551 const float c0[TGSI_QUAD_SIZE],
1552 enum tgsi_sampler_control control,
1553 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1554 {
1555 struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1556 float lod[TGSI_QUAD_SIZE];
1557 int j;
1558
1559 if (control == tgsi_sampler_lod_bias) {
1560 float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1561 compute_lod(samp->sampler, lambda, c0, lod);
1562 } else {
1563 assert(control == tgsi_sampler_lod_explicit);
1564
1565 memcpy(lod, c0, sizeof(lod));
1566 }
1567
1568 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1569 if (lod[j] < 0.0) {
1570 samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1571 }
1572 else {
1573 samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1574 }
1575 }
1576 }
1577
1578
1579 static void
mip_filter_none_no_filter_select(struct tgsi_sampler * tgsi_sampler,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE],const float c0[TGSI_QUAD_SIZE],enum tgsi_sampler_control control,float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])1580 mip_filter_none_no_filter_select(struct tgsi_sampler *tgsi_sampler,
1581 const float s[TGSI_QUAD_SIZE],
1582 const float t[TGSI_QUAD_SIZE],
1583 const float p[TGSI_QUAD_SIZE],
1584 const float c0[TGSI_QUAD_SIZE],
1585 enum tgsi_sampler_control control,
1586 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1587 {
1588 struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1589 int j;
1590
1591 for (j = 0; j < TGSI_QUAD_SIZE; j++)
1592 samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1593 }
1594
1595
1596 /* For anisotropic filtering */
1597 #define WEIGHT_LUT_SIZE 1024
1598
1599 static float *weightLut = NULL;
1600
1601 /**
1602 * Creates the look-up table used to speed-up EWA sampling
1603 */
1604 static void
create_filter_table(void)1605 create_filter_table(void)
1606 {
1607 unsigned i;
1608 if (!weightLut) {
1609 weightLut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
1610
1611 for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
1612 float alpha = 2;
1613 float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
1614 float weight = (float) exp(-alpha * r2);
1615 weightLut[i] = weight;
1616 }
1617 }
1618 }
1619
1620
1621 /**
1622 * Elliptical weighted average (EWA) filter for producing high quality
1623 * anisotropic filtered results.
1624 * Based on the Higher Quality Elliptical Weighted Average Filter
1625 * published by Paul S. Heckbert in his Master's Thesis
1626 * "Fundamentals of Texture Mapping and Image Warping" (1989)
1627 */
1628 static void
img_filter_2d_ewa(struct tgsi_sampler * tgsi_sampler,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE],unsigned level,enum tgsi_sampler_control control,const float dudx,const float dvdx,const float dudy,const float dvdy,float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])1629 img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler,
1630 const float s[TGSI_QUAD_SIZE],
1631 const float t[TGSI_QUAD_SIZE],
1632 const float p[TGSI_QUAD_SIZE],
1633 unsigned level,
1634 enum tgsi_sampler_control control,
1635 const float dudx, const float dvdx,
1636 const float dudy, const float dvdy,
1637 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1638 {
1639 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1640 const struct pipe_resource *texture = samp->view->texture;
1641
1642 // ??? Won't the image filters blow up if level is negative?
1643 unsigned level0 = level > 0 ? level : 0;
1644 float scaling = 1.0 / (1 << level0);
1645 int width = u_minify(texture->width0, level0);
1646 int height = u_minify(texture->height0, level0);
1647
1648 float ux = dudx * scaling;
1649 float vx = dvdx * scaling;
1650 float uy = dudy * scaling;
1651 float vy = dvdy * scaling;
1652
1653 /* compute ellipse coefficients to bound the region:
1654 * A*x*x + B*x*y + C*y*y = F.
1655 */
1656 float A = vx*vx+vy*vy+1;
1657 float B = -2*(ux*vx+uy*vy);
1658 float C = ux*ux+uy*uy+1;
1659 float F = A*C-B*B/4.0;
1660
1661 /* check if it is an ellipse */
1662 /* ASSERT(F > 0.0); */
1663
1664 /* Compute the ellipse's (u,v) bounding box in texture space */
1665 float d = -B*B+4.0*C*A;
1666 float box_u = 2.0 / d * sqrt(d*C*F); /* box_u -> half of bbox with */
1667 float box_v = 2.0 / d * sqrt(A*d*F); /* box_v -> half of bbox height */
1668
1669 float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1670 float s_buffer[TGSI_QUAD_SIZE];
1671 float t_buffer[TGSI_QUAD_SIZE];
1672 float weight_buffer[TGSI_QUAD_SIZE];
1673 unsigned buffer_next;
1674 int j;
1675 float den; /* = 0.0F; */
1676 float ddq;
1677 float U; /* = u0 - tex_u; */
1678 int v;
1679
1680 /* Scale ellipse formula to directly index the Filter Lookup Table.
1681 * i.e. scale so that F = WEIGHT_LUT_SIZE-1
1682 */
1683 double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
1684 A *= formScale;
1685 B *= formScale;
1686 C *= formScale;
1687 /* F *= formScale; */ /* no need to scale F as we don't use it below here */
1688
1689 /* For each quad, the du and dx values are the same and so the ellipse is
1690 * also the same. Note that texel/image access can only be performed using
1691 * a quad, i.e. it is not possible to get the pixel value for a single
1692 * tex coord. In order to have a better performance, the access is buffered
1693 * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
1694 * full, then the pixel values are read from the image.
1695 */
1696 ddq = 2 * A;
1697
1698 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1699 /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
1700 * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
1701 * value, q, is less than F, we're inside the ellipse
1702 */
1703 float tex_u = -0.5F + s[j] * texture->width0 * scaling;
1704 float tex_v = -0.5F + t[j] * texture->height0 * scaling;
1705
1706 int u0 = (int) floorf(tex_u - box_u);
1707 int u1 = (int) ceilf(tex_u + box_u);
1708 int v0 = (int) floorf(tex_v - box_v);
1709 int v1 = (int) ceilf(tex_v + box_v);
1710
1711 float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
1712 buffer_next = 0;
1713 den = 0;
1714 U = u0 - tex_u;
1715 for (v = v0; v <= v1; ++v) {
1716 float V = v - tex_v;
1717 float dq = A * (2 * U + 1) + B * V;
1718 float q = (C * V + B * U) * V + A * U * U;
1719
1720 int u;
1721 for (u = u0; u <= u1; ++u) {
1722 /* Note that the ellipse has been pre-scaled so F =
1723 * WEIGHT_LUT_SIZE - 1
1724 */
1725 if (q < WEIGHT_LUT_SIZE) {
1726 /* as a LUT is used, q must never be negative;
1727 * should not happen, though
1728 */
1729 const int qClamped = q >= 0.0F ? q : 0;
1730 float weight = weightLut[qClamped];
1731
1732 weight_buffer[buffer_next] = weight;
1733 s_buffer[buffer_next] = u / ((float) width);
1734 t_buffer[buffer_next] = v / ((float) height);
1735
1736 buffer_next++;
1737 if (buffer_next == TGSI_QUAD_SIZE) {
1738 /* 4 texel coords are in the buffer -> read it now */
1739 unsigned jj;
1740 /* it is assumed that samp->min_img_filter is set to
1741 * img_filter_2d_nearest or one of the
1742 * accelerated img_filter_2d_nearest_XXX functions.
1743 */
1744 for (jj = 0; jj < buffer_next; jj++) {
1745 samp->min_img_filter(tgsi_sampler, s_buffer[jj], t_buffer[jj], p[jj], level, samp->faces[j],
1746 tgsi_sampler_lod_bias, &rgba_temp[0][jj]);
1747 num[0] += weight_buffer[jj] * rgba_temp[0][jj];
1748 num[1] += weight_buffer[jj] * rgba_temp[1][jj];
1749 num[2] += weight_buffer[jj] * rgba_temp[2][jj];
1750 num[3] += weight_buffer[jj] * rgba_temp[3][jj];
1751 }
1752
1753 buffer_next = 0;
1754 }
1755
1756 den += weight;
1757 }
1758 q += dq;
1759 dq += ddq;
1760 }
1761 }
1762
1763 /* if the tex coord buffer contains unread values, we will read
1764 * them now.
1765 */
1766 if (buffer_next > 0) {
1767 unsigned jj;
1768 /* it is assumed that samp->min_img_filter is set to
1769 * img_filter_2d_nearest or one of the
1770 * accelerated img_filter_2d_nearest_XXX functions.
1771 */
1772 for (jj = 0; jj < buffer_next; jj++) {
1773 samp->min_img_filter(tgsi_sampler, s_buffer[jj], t_buffer[jj], p[jj], level, samp->faces[j],
1774 tgsi_sampler_lod_bias, &rgba_temp[0][jj]);
1775 num[0] += weight_buffer[jj] * rgba_temp[0][jj];
1776 num[1] += weight_buffer[jj] * rgba_temp[1][jj];
1777 num[2] += weight_buffer[jj] * rgba_temp[2][jj];
1778 num[3] += weight_buffer[jj] * rgba_temp[3][jj];
1779 }
1780 }
1781
1782 if (den <= 0.0F) {
1783 /* Reaching this place would mean that no pixels intersected
1784 * the ellipse. This should never happen because the filter
1785 * we use always intersects at least one pixel.
1786 */
1787
1788 /*rgba[0]=0;
1789 rgba[1]=0;
1790 rgba[2]=0;
1791 rgba[3]=0;*/
1792 /* not enough pixels in resampling, resort to direct interpolation */
1793 samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level, samp->faces[j],
1794 tgsi_sampler_lod_bias, &rgba_temp[0][j]);
1795 den = 1;
1796 num[0] = rgba_temp[0][j];
1797 num[1] = rgba_temp[1][j];
1798 num[2] = rgba_temp[2][j];
1799 num[3] = rgba_temp[3][j];
1800 }
1801
1802 rgba[0][j] = num[0] / den;
1803 rgba[1][j] = num[1] / den;
1804 rgba[2][j] = num[2] / den;
1805 rgba[3][j] = num[3] / den;
1806 }
1807 }
1808
1809
1810 /**
1811 * Sample 2D texture using an anisotropic filter.
1812 */
1813 static void
mip_filter_linear_aniso(struct tgsi_sampler * tgsi_sampler,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE],const float c0[TGSI_QUAD_SIZE],enum tgsi_sampler_control control,float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])1814 mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler,
1815 const float s[TGSI_QUAD_SIZE],
1816 const float t[TGSI_QUAD_SIZE],
1817 const float p[TGSI_QUAD_SIZE],
1818 const float c0[TGSI_QUAD_SIZE],
1819 enum tgsi_sampler_control control,
1820 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1821 {
1822 struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1823 const struct pipe_resource *texture = samp->view->texture;
1824 int level0;
1825 float lambda;
1826 float lod[TGSI_QUAD_SIZE];
1827
1828 float s_to_u = u_minify(texture->width0, samp->view->u.tex.first_level);
1829 float t_to_v = u_minify(texture->height0, samp->view->u.tex.first_level);
1830 float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
1831 float dudy = (s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
1832 float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
1833 float dvdy = (t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
1834
1835 if (control == tgsi_sampler_lod_bias) {
1836 /* note: instead of working with Px and Py, we will use the
1837 * squared length instead, to avoid sqrt.
1838 */
1839 float Px2 = dudx * dudx + dvdx * dvdx;
1840 float Py2 = dudy * dudy + dvdy * dvdy;
1841
1842 float Pmax2;
1843 float Pmin2;
1844 float e;
1845 const float maxEccentricity = samp->sampler->max_anisotropy * samp->sampler->max_anisotropy;
1846
1847 if (Px2 < Py2) {
1848 Pmax2 = Py2;
1849 Pmin2 = Px2;
1850 }
1851 else {
1852 Pmax2 = Px2;
1853 Pmin2 = Py2;
1854 }
1855
1856 /* if the eccentricity of the ellipse is too big, scale up the shorter
1857 * of the two vectors to limit the maximum amount of work per pixel
1858 */
1859 e = Pmax2 / Pmin2;
1860 if (e > maxEccentricity) {
1861 /* float s=e / maxEccentricity;
1862 minor[0] *= s;
1863 minor[1] *= s;
1864 Pmin2 *= s; */
1865 Pmin2 = Pmax2 / maxEccentricity;
1866 }
1867
1868 /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
1869 * this since 0.5*log(x) = log(sqrt(x))
1870 */
1871 lambda = 0.5F * util_fast_log2(Pmin2) + samp->sampler->lod_bias;
1872 compute_lod(samp->sampler, lambda, c0, lod);
1873 }
1874 else {
1875 assert(control == tgsi_sampler_lod_explicit);
1876
1877 memcpy(lod, c0, sizeof(lod));
1878 }
1879
1880 /* XXX: Take into account all lod values.
1881 */
1882 lambda = lod[0];
1883 level0 = samp->view->u.tex.first_level + (int)lambda;
1884
1885 /* If the ellipse covers the whole image, we can
1886 * simply return the average of the whole image.
1887 */
1888 if (level0 >= (int) texture->last_level) {
1889 int j;
1890 for (j = 0; j < TGSI_QUAD_SIZE; j++)
1891 samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], texture->last_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1892 }
1893 else {
1894 /* don't bother interpolating between multiple LODs; it doesn't
1895 * seem to be worth the extra running time.
1896 */
1897 img_filter_2d_ewa(tgsi_sampler, s, t, p, level0, tgsi_sampler_lod_bias,
1898 dudx, dvdx, dudy, dvdy, rgba);
1899 }
1900
1901 if (DEBUG_TEX) {
1902 print_sample_4(__FUNCTION__, rgba);
1903 }
1904 }
1905
1906
1907 /**
1908 * Specialized version of mip_filter_linear with hard-wired calls to
1909 * 2d lambda calculation and 2d_linear_repeat_POT img filters.
1910 */
1911 static void
mip_filter_linear_2d_linear_repeat_POT(struct tgsi_sampler * tgsi_sampler,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE],const float c0[TGSI_QUAD_SIZE],enum tgsi_sampler_control control,float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])1912 mip_filter_linear_2d_linear_repeat_POT(
1913 struct tgsi_sampler *tgsi_sampler,
1914 const float s[TGSI_QUAD_SIZE],
1915 const float t[TGSI_QUAD_SIZE],
1916 const float p[TGSI_QUAD_SIZE],
1917 const float c0[TGSI_QUAD_SIZE],
1918 enum tgsi_sampler_control control,
1919 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1920 {
1921 struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1922 const struct pipe_resource *texture = samp->view->texture;
1923 int j;
1924 float lambda;
1925 float lod[TGSI_QUAD_SIZE];
1926
1927 if (control == tgsi_sampler_lod_bias) {
1928 lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1929 compute_lod(samp->sampler, lambda, c0, lod);
1930 } else {
1931 assert(control == tgsi_sampler_lod_explicit);
1932
1933 memcpy(lod, c0, sizeof(lod));
1934 }
1935
1936 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1937 int level0 = samp->view->u.tex.first_level + (int)lod[j];
1938
1939 /* Catches both negative and large values of level0:
1940 */
1941 if ((unsigned)level0 >= texture->last_level) {
1942 if (level0 < 0)
1943 img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1944 else
1945 img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], samp->view->texture->last_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1946
1947 }
1948 else {
1949 float levelBlend = frac(lod[j]);
1950 float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1951 int c;
1952
1953 img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], level0, samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][0]);
1954 img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], level0+1, samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][1]);
1955
1956 for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1957 rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
1958 }
1959 }
1960
1961 if (DEBUG_TEX) {
1962 print_sample_4(__FUNCTION__, rgba);
1963 }
1964 }
1965
1966
1967 /**
1968 * Do shadow/depth comparisons.
1969 */
1970 static void
sample_compare(struct tgsi_sampler * tgsi_sampler,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE],const float c0[TGSI_QUAD_SIZE],enum tgsi_sampler_control control,float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])1971 sample_compare(struct tgsi_sampler *tgsi_sampler,
1972 const float s[TGSI_QUAD_SIZE],
1973 const float t[TGSI_QUAD_SIZE],
1974 const float p[TGSI_QUAD_SIZE],
1975 const float c0[TGSI_QUAD_SIZE],
1976 enum tgsi_sampler_control control,
1977 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1978 {
1979 struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1980 const struct pipe_sampler_state *sampler = samp->sampler;
1981 int j, k0, k1, k2, k3;
1982 float val;
1983 float pc0, pc1, pc2, pc3;
1984
1985 samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba);
1986
1987 /**
1988 * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
1989 * for 2D Array texture we need to use the 'c0' (aka Q).
1990 * When we sampled the depth texture, the depth value was put into all
1991 * RGBA channels. We look at the red channel here.
1992 */
1993
1994 if (samp->view->texture->target == PIPE_TEXTURE_2D_ARRAY ||
1995 samp->view->texture->target == PIPE_TEXTURE_CUBE) {
1996 pc0 = CLAMP(c0[0], 0.0F, 1.0F);
1997 pc1 = CLAMP(c0[1], 0.0F, 1.0F);
1998 pc2 = CLAMP(c0[2], 0.0F, 1.0F);
1999 pc3 = CLAMP(c0[3], 0.0F, 1.0F);
2000 } else {
2001 pc0 = CLAMP(p[0], 0.0F, 1.0F);
2002 pc1 = CLAMP(p[1], 0.0F, 1.0F);
2003 pc2 = CLAMP(p[2], 0.0F, 1.0F);
2004 pc3 = CLAMP(p[3], 0.0F, 1.0F);
2005 }
2006 /* compare four texcoords vs. four texture samples */
2007 switch (sampler->compare_func) {
2008 case PIPE_FUNC_LESS:
2009 k0 = pc0 < rgba[0][0];
2010 k1 = pc1 < rgba[0][1];
2011 k2 = pc2 < rgba[0][2];
2012 k3 = pc3 < rgba[0][3];
2013 break;
2014 case PIPE_FUNC_LEQUAL:
2015 k0 = pc0 <= rgba[0][0];
2016 k1 = pc1 <= rgba[0][1];
2017 k2 = pc2 <= rgba[0][2];
2018 k3 = pc3 <= rgba[0][3];
2019 break;
2020 case PIPE_FUNC_GREATER:
2021 k0 = pc0 > rgba[0][0];
2022 k1 = pc1 > rgba[0][1];
2023 k2 = pc2 > rgba[0][2];
2024 k3 = pc3 > rgba[0][3];
2025 break;
2026 case PIPE_FUNC_GEQUAL:
2027 k0 = pc0 >= rgba[0][0];
2028 k1 = pc1 >= rgba[0][1];
2029 k2 = pc2 >= rgba[0][2];
2030 k3 = pc3 >= rgba[0][3];
2031 break;
2032 case PIPE_FUNC_EQUAL:
2033 k0 = pc0 == rgba[0][0];
2034 k1 = pc1 == rgba[0][1];
2035 k2 = pc2 == rgba[0][2];
2036 k3 = pc3 == rgba[0][3];
2037 break;
2038 case PIPE_FUNC_NOTEQUAL:
2039 k0 = pc0 != rgba[0][0];
2040 k1 = pc1 != rgba[0][1];
2041 k2 = pc2 != rgba[0][2];
2042 k3 = pc3 != rgba[0][3];
2043 break;
2044 case PIPE_FUNC_ALWAYS:
2045 k0 = k1 = k2 = k3 = 1;
2046 break;
2047 case PIPE_FUNC_NEVER:
2048 k0 = k1 = k2 = k3 = 0;
2049 break;
2050 default:
2051 k0 = k1 = k2 = k3 = 0;
2052 assert(0);
2053 break;
2054 }
2055
2056 if (sampler->mag_img_filter == PIPE_TEX_FILTER_LINEAR) {
2057 /* convert four pass/fail values to an intensity in [0,1] */
2058 val = 0.25F * (k0 + k1 + k2 + k3);
2059
2060 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
2061 for (j = 0; j < 4; j++) {
2062 rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
2063 rgba[3][j] = 1.0F;
2064 }
2065 } else {
2066 for (j = 0; j < 4; j++) {
2067 rgba[0][j] = k0;
2068 rgba[1][j] = k1;
2069 rgba[2][j] = k2;
2070 rgba[3][j] = 1.0F;
2071 }
2072 }
2073 }
2074
2075
2076 /**
2077 * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
2078 * Put face info into the sampler faces[] array.
2079 */
2080 static void
sample_cube(struct tgsi_sampler * tgsi_sampler,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE],const float c0[TGSI_QUAD_SIZE],enum tgsi_sampler_control control,float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])2081 sample_cube(struct tgsi_sampler *tgsi_sampler,
2082 const float s[TGSI_QUAD_SIZE],
2083 const float t[TGSI_QUAD_SIZE],
2084 const float p[TGSI_QUAD_SIZE],
2085 const float c0[TGSI_QUAD_SIZE],
2086 enum tgsi_sampler_control control,
2087 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2088 {
2089 struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2090 unsigned j;
2091 float ssss[4], tttt[4];
2092
2093 /* Not actually used, but the intermediate steps that do the
2094 * dereferencing don't know it.
2095 */
2096 static const float pppp[4] = { 0, 0, 0, 0 };
2097
2098 /*
2099 major axis
2100 direction target sc tc ma
2101 ---------- ------------------------------- --- --- ---
2102 +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
2103 -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
2104 +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
2105 -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
2106 +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
2107 -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
2108 */
2109
2110 /* Choose the cube face and compute new s/t coords for the 2D face.
2111 *
2112 * Use the same cube face for all four pixels in the quad.
2113 *
2114 * This isn't ideal, but if we want to use a different cube face
2115 * per pixel in the quad, we'd have to also compute the per-face
2116 * LOD here too. That's because the four post-face-selection
2117 * texcoords are no longer related to each other (they're
2118 * per-face!) so we can't use subtraction to compute the partial
2119 * deriviates to compute the LOD. Doing so (near cube edges
2120 * anyway) gives us pretty much random values.
2121 */
2122 {
2123 /* use the average of the four pixel's texcoords to choose the face */
2124 const float rx = 0.25F * (s[0] + s[1] + s[2] + s[3]);
2125 const float ry = 0.25F * (t[0] + t[1] + t[2] + t[3]);
2126 const float rz = 0.25F * (p[0] + p[1] + p[2] + p[3]);
2127 const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
2128
2129 if (arx >= ary && arx >= arz) {
2130 float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
2131 uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
2132 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2133 const float ima = -0.5F / fabsf(s[j]);
2134 ssss[j] = sign * p[j] * ima + 0.5F;
2135 tttt[j] = t[j] * ima + 0.5F;
2136 samp->faces[j] = face;
2137 }
2138 }
2139 else if (ary >= arx && ary >= arz) {
2140 float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
2141 uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
2142 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2143 const float ima = -0.5F / fabsf(t[j]);
2144 ssss[j] = -s[j] * ima + 0.5F;
2145 tttt[j] = sign * -p[j] * ima + 0.5F;
2146 samp->faces[j] = face;
2147 }
2148 }
2149 else {
2150 float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
2151 uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
2152 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2153 const float ima = -0.5F / fabsf(p[j]);
2154 ssss[j] = sign * -s[j] * ima + 0.5F;
2155 tttt[j] = t[j] * ima + 0.5F;
2156 samp->faces[j] = face;
2157 }
2158 }
2159 }
2160
2161 /* In our little pipeline, the compare stage is next. If compare
2162 * is not active, this will point somewhere deeper into the
2163 * pipeline, eg. to mip_filter or even img_filter.
2164 */
2165 samp->compare(tgsi_sampler, ssss, tttt, pppp, c0, control, rgba);
2166 }
2167
2168
2169 static void
do_swizzling(const struct sp_sampler_variant * samp,float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])2170 do_swizzling(const struct sp_sampler_variant *samp,
2171 float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
2172 float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2173 {
2174 int j;
2175 const unsigned swizzle_r = samp->key.bits.swizzle_r;
2176 const unsigned swizzle_g = samp->key.bits.swizzle_g;
2177 const unsigned swizzle_b = samp->key.bits.swizzle_b;
2178 const unsigned swizzle_a = samp->key.bits.swizzle_a;
2179
2180 switch (swizzle_r) {
2181 case PIPE_SWIZZLE_ZERO:
2182 for (j = 0; j < 4; j++)
2183 out[0][j] = 0.0f;
2184 break;
2185 case PIPE_SWIZZLE_ONE:
2186 for (j = 0; j < 4; j++)
2187 out[0][j] = 1.0f;
2188 break;
2189 default:
2190 assert(swizzle_r < 4);
2191 for (j = 0; j < 4; j++)
2192 out[0][j] = in[swizzle_r][j];
2193 }
2194
2195 switch (swizzle_g) {
2196 case PIPE_SWIZZLE_ZERO:
2197 for (j = 0; j < 4; j++)
2198 out[1][j] = 0.0f;
2199 break;
2200 case PIPE_SWIZZLE_ONE:
2201 for (j = 0; j < 4; j++)
2202 out[1][j] = 1.0f;
2203 break;
2204 default:
2205 assert(swizzle_g < 4);
2206 for (j = 0; j < 4; j++)
2207 out[1][j] = in[swizzle_g][j];
2208 }
2209
2210 switch (swizzle_b) {
2211 case PIPE_SWIZZLE_ZERO:
2212 for (j = 0; j < 4; j++)
2213 out[2][j] = 0.0f;
2214 break;
2215 case PIPE_SWIZZLE_ONE:
2216 for (j = 0; j < 4; j++)
2217 out[2][j] = 1.0f;
2218 break;
2219 default:
2220 assert(swizzle_b < 4);
2221 for (j = 0; j < 4; j++)
2222 out[2][j] = in[swizzle_b][j];
2223 }
2224
2225 switch (swizzle_a) {
2226 case PIPE_SWIZZLE_ZERO:
2227 for (j = 0; j < 4; j++)
2228 out[3][j] = 0.0f;
2229 break;
2230 case PIPE_SWIZZLE_ONE:
2231 for (j = 0; j < 4; j++)
2232 out[3][j] = 1.0f;
2233 break;
2234 default:
2235 assert(swizzle_a < 4);
2236 for (j = 0; j < 4; j++)
2237 out[3][j] = in[swizzle_a][j];
2238 }
2239 }
2240
2241
2242 static void
sample_swizzle(struct tgsi_sampler * tgsi_sampler,const float s[TGSI_QUAD_SIZE],const float t[TGSI_QUAD_SIZE],const float p[TGSI_QUAD_SIZE],const float c0[TGSI_QUAD_SIZE],enum tgsi_sampler_control control,float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])2243 sample_swizzle(struct tgsi_sampler *tgsi_sampler,
2244 const float s[TGSI_QUAD_SIZE],
2245 const float t[TGSI_QUAD_SIZE],
2246 const float p[TGSI_QUAD_SIZE],
2247 const float c0[TGSI_QUAD_SIZE],
2248 enum tgsi_sampler_control control,
2249 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2250 {
2251 struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2252 float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2253
2254 samp->sample_target(tgsi_sampler, s, t, p, c0, control, rgba_temp);
2255
2256 do_swizzling(samp, rgba_temp, rgba);
2257 }
2258
2259
2260 static wrap_nearest_func
get_nearest_unorm_wrap(unsigned mode)2261 get_nearest_unorm_wrap(unsigned mode)
2262 {
2263 switch (mode) {
2264 case PIPE_TEX_WRAP_CLAMP:
2265 return wrap_nearest_unorm_clamp;
2266 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2267 return wrap_nearest_unorm_clamp_to_edge;
2268 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2269 return wrap_nearest_unorm_clamp_to_border;
2270 default:
2271 assert(0);
2272 return wrap_nearest_unorm_clamp;
2273 }
2274 }
2275
2276
2277 static wrap_nearest_func
get_nearest_wrap(unsigned mode)2278 get_nearest_wrap(unsigned mode)
2279 {
2280 switch (mode) {
2281 case PIPE_TEX_WRAP_REPEAT:
2282 return wrap_nearest_repeat;
2283 case PIPE_TEX_WRAP_CLAMP:
2284 return wrap_nearest_clamp;
2285 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2286 return wrap_nearest_clamp_to_edge;
2287 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2288 return wrap_nearest_clamp_to_border;
2289 case PIPE_TEX_WRAP_MIRROR_REPEAT:
2290 return wrap_nearest_mirror_repeat;
2291 case PIPE_TEX_WRAP_MIRROR_CLAMP:
2292 return wrap_nearest_mirror_clamp;
2293 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2294 return wrap_nearest_mirror_clamp_to_edge;
2295 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2296 return wrap_nearest_mirror_clamp_to_border;
2297 default:
2298 assert(0);
2299 return wrap_nearest_repeat;
2300 }
2301 }
2302
2303
2304 static wrap_linear_func
get_linear_unorm_wrap(unsigned mode)2305 get_linear_unorm_wrap(unsigned mode)
2306 {
2307 switch (mode) {
2308 case PIPE_TEX_WRAP_CLAMP:
2309 return wrap_linear_unorm_clamp;
2310 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2311 return wrap_linear_unorm_clamp_to_edge;
2312 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2313 return wrap_linear_unorm_clamp_to_border;
2314 default:
2315 assert(0);
2316 return wrap_linear_unorm_clamp;
2317 }
2318 }
2319
2320
2321 static wrap_linear_func
get_linear_wrap(unsigned mode)2322 get_linear_wrap(unsigned mode)
2323 {
2324 switch (mode) {
2325 case PIPE_TEX_WRAP_REPEAT:
2326 return wrap_linear_repeat;
2327 case PIPE_TEX_WRAP_CLAMP:
2328 return wrap_linear_clamp;
2329 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2330 return wrap_linear_clamp_to_edge;
2331 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2332 return wrap_linear_clamp_to_border;
2333 case PIPE_TEX_WRAP_MIRROR_REPEAT:
2334 return wrap_linear_mirror_repeat;
2335 case PIPE_TEX_WRAP_MIRROR_CLAMP:
2336 return wrap_linear_mirror_clamp;
2337 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2338 return wrap_linear_mirror_clamp_to_edge;
2339 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2340 return wrap_linear_mirror_clamp_to_border;
2341 default:
2342 assert(0);
2343 return wrap_linear_repeat;
2344 }
2345 }
2346
2347
2348 /**
2349 * Is swizzling needed for the given state key?
2350 */
2351 static INLINE bool
any_swizzle(union sp_sampler_key key)2352 any_swizzle(union sp_sampler_key key)
2353 {
2354 return (key.bits.swizzle_r != PIPE_SWIZZLE_RED ||
2355 key.bits.swizzle_g != PIPE_SWIZZLE_GREEN ||
2356 key.bits.swizzle_b != PIPE_SWIZZLE_BLUE ||
2357 key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA);
2358 }
2359
2360
2361 static compute_lambda_func
get_lambda_func(const union sp_sampler_key key)2362 get_lambda_func(const union sp_sampler_key key)
2363 {
2364 if (key.bits.processor == TGSI_PROCESSOR_VERTEX)
2365 return compute_lambda_vert;
2366
2367 switch (key.bits.target) {
2368 case PIPE_TEXTURE_1D:
2369 case PIPE_TEXTURE_1D_ARRAY:
2370 return compute_lambda_1d;
2371 case PIPE_TEXTURE_2D:
2372 case PIPE_TEXTURE_2D_ARRAY:
2373 case PIPE_TEXTURE_RECT:
2374 case PIPE_TEXTURE_CUBE:
2375 return compute_lambda_2d;
2376 case PIPE_TEXTURE_3D:
2377 return compute_lambda_3d;
2378 default:
2379 assert(0);
2380 return compute_lambda_1d;
2381 }
2382 }
2383
2384
2385 static img_filter_func
get_img_filter(const union sp_sampler_key key,unsigned filter,const struct pipe_sampler_state * sampler)2386 get_img_filter(const union sp_sampler_key key,
2387 unsigned filter,
2388 const struct pipe_sampler_state *sampler)
2389 {
2390 switch (key.bits.target) {
2391 case PIPE_TEXTURE_1D:
2392 if (filter == PIPE_TEX_FILTER_NEAREST)
2393 return img_filter_1d_nearest;
2394 else
2395 return img_filter_1d_linear;
2396 break;
2397 case PIPE_TEXTURE_1D_ARRAY:
2398 if (filter == PIPE_TEX_FILTER_NEAREST)
2399 return img_filter_1d_array_nearest;
2400 else
2401 return img_filter_1d_array_linear;
2402 break;
2403 case PIPE_TEXTURE_2D:
2404 case PIPE_TEXTURE_RECT:
2405 /* Try for fast path:
2406 */
2407 if (key.bits.is_pot &&
2408 sampler->wrap_s == sampler->wrap_t &&
2409 sampler->normalized_coords)
2410 {
2411 switch (sampler->wrap_s) {
2412 case PIPE_TEX_WRAP_REPEAT:
2413 switch (filter) {
2414 case PIPE_TEX_FILTER_NEAREST:
2415 return img_filter_2d_nearest_repeat_POT;
2416 case PIPE_TEX_FILTER_LINEAR:
2417 return img_filter_2d_linear_repeat_POT;
2418 default:
2419 break;
2420 }
2421 break;
2422 case PIPE_TEX_WRAP_CLAMP:
2423 switch (filter) {
2424 case PIPE_TEX_FILTER_NEAREST:
2425 return img_filter_2d_nearest_clamp_POT;
2426 default:
2427 break;
2428 }
2429 }
2430 }
2431 /* Otherwise use default versions:
2432 */
2433 if (filter == PIPE_TEX_FILTER_NEAREST)
2434 return img_filter_2d_nearest;
2435 else
2436 return img_filter_2d_linear;
2437 break;
2438 case PIPE_TEXTURE_2D_ARRAY:
2439 if (filter == PIPE_TEX_FILTER_NEAREST)
2440 return img_filter_2d_array_nearest;
2441 else
2442 return img_filter_2d_array_linear;
2443 break;
2444 case PIPE_TEXTURE_CUBE:
2445 if (filter == PIPE_TEX_FILTER_NEAREST)
2446 return img_filter_cube_nearest;
2447 else
2448 return img_filter_cube_linear;
2449 break;
2450 case PIPE_TEXTURE_3D:
2451 if (filter == PIPE_TEX_FILTER_NEAREST)
2452 return img_filter_3d_nearest;
2453 else
2454 return img_filter_3d_linear;
2455 break;
2456 default:
2457 assert(0);
2458 return img_filter_1d_nearest;
2459 }
2460 }
2461
2462
2463 /**
2464 * Bind the given texture object and texture cache to the sampler variant.
2465 */
2466 void
sp_sampler_variant_bind_view(struct sp_sampler_variant * samp,struct softpipe_tex_tile_cache * tex_cache,const struct pipe_sampler_view * view)2467 sp_sampler_variant_bind_view( struct sp_sampler_variant *samp,
2468 struct softpipe_tex_tile_cache *tex_cache,
2469 const struct pipe_sampler_view *view )
2470 {
2471 const struct pipe_resource *texture = view->texture;
2472
2473 samp->view = view;
2474 samp->cache = tex_cache;
2475 samp->xpot = util_logbase2( texture->width0 );
2476 samp->ypot = util_logbase2( texture->height0 );
2477 }
2478
2479
2480 void
sp_sampler_variant_destroy(struct sp_sampler_variant * samp)2481 sp_sampler_variant_destroy( struct sp_sampler_variant *samp )
2482 {
2483 FREE(samp);
2484 }
2485
2486
2487 static void
sample_get_dims(struct tgsi_sampler * tgsi_sampler,int level,int dims[4])2488 sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level,
2489 int dims[4])
2490 {
2491 struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2492 const struct pipe_sampler_view *view = samp->view;
2493 const struct pipe_resource *texture = view->texture;
2494
2495 /* undefined according to EXT_gpu_program */
2496 level += view->u.tex.first_level;
2497 if (level > view->u.tex.last_level)
2498 return;
2499
2500 dims[0] = u_minify(texture->width0, level);
2501
2502 switch(texture->target) {
2503 case PIPE_TEXTURE_1D_ARRAY:
2504 dims[1] = texture->array_size;
2505 /* fallthrough */
2506 case PIPE_TEXTURE_1D:
2507 case PIPE_BUFFER:
2508 return;
2509 case PIPE_TEXTURE_2D_ARRAY:
2510 dims[2] = texture->array_size;
2511 /* fallthrough */
2512 case PIPE_TEXTURE_2D:
2513 case PIPE_TEXTURE_CUBE:
2514 case PIPE_TEXTURE_RECT:
2515 dims[1] = u_minify(texture->height0, level);
2516 return;
2517 case PIPE_TEXTURE_3D:
2518 dims[1] = u_minify(texture->height0, level);
2519 dims[2] = u_minify(texture->depth0, level);
2520 return;
2521 default:
2522 assert(!"unexpected texture target in sample_get_dims()");
2523 return;
2524 }
2525 }
2526
2527 /**
2528 * This function is only used for getting unfiltered texels via the
2529 * TXF opcode. The GL spec says that out-of-bounds texel fetches
2530 * produce undefined results. Instead of crashing, lets just clamp
2531 * coords to the texture image size.
2532 */
2533 static void
sample_get_texels(struct tgsi_sampler * tgsi_sampler,const int v_i[TGSI_QUAD_SIZE],const int v_j[TGSI_QUAD_SIZE],const int v_k[TGSI_QUAD_SIZE],const int lod[TGSI_QUAD_SIZE],const int8_t offset[3],float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])2534 sample_get_texels(struct tgsi_sampler *tgsi_sampler,
2535 const int v_i[TGSI_QUAD_SIZE],
2536 const int v_j[TGSI_QUAD_SIZE],
2537 const int v_k[TGSI_QUAD_SIZE],
2538 const int lod[TGSI_QUAD_SIZE],
2539 const int8_t offset[3],
2540 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2541 {
2542 const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2543 union tex_tile_address addr;
2544 const struct pipe_resource *texture = samp->view->texture;
2545 int j, c;
2546 const float *tx;
2547 const bool need_swizzle = any_swizzle(samp->key);
2548 int width, height, depth, layers;
2549
2550 addr.value = 0;
2551 /* TODO write a better test for LOD */
2552 addr.bits.level = lod[0];
2553
2554 width = u_minify(texture->width0, addr.bits.level);
2555 height = u_minify(texture->height0, addr.bits.level);
2556 depth = u_minify(texture->depth0, addr.bits.level);
2557 layers = texture->array_size;
2558
2559 switch(texture->target) {
2560 case PIPE_TEXTURE_1D:
2561 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2562 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2563 tx = get_texel_2d(samp, addr, x, 0);
2564 for (c = 0; c < 4; c++) {
2565 rgba[c][j] = tx[c];
2566 }
2567 }
2568 break;
2569 case PIPE_TEXTURE_1D_ARRAY:
2570 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2571 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2572 int y = CLAMP(v_j[j], 0, layers - 1);
2573 tx = get_texel_1d_array(samp, addr, x, y);
2574 for (c = 0; c < 4; c++) {
2575 rgba[c][j] = tx[c];
2576 }
2577 }
2578 break;
2579 case PIPE_TEXTURE_2D:
2580 case PIPE_TEXTURE_RECT:
2581 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2582 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2583 int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2584 tx = get_texel_2d(samp, addr, x, y);
2585 for (c = 0; c < 4; c++) {
2586 rgba[c][j] = tx[c];
2587 }
2588 }
2589 break;
2590 case PIPE_TEXTURE_2D_ARRAY:
2591 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2592 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2593 int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2594 int layer = CLAMP(v_k[j], 0, layers - 1);
2595 tx = get_texel_2d_array(samp, addr, x, y, layer);
2596 for (c = 0; c < 4; c++) {
2597 rgba[c][j] = tx[c];
2598 }
2599 }
2600 break;
2601 case PIPE_TEXTURE_3D:
2602 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2603 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2604 int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2605 int z = CLAMP(v_k[j] + offset[2], 0, depth - 1);
2606
2607 tx = get_texel_3d(samp, addr, x, y, z);
2608 for (c = 0; c < 4; c++) {
2609 rgba[c][j] = tx[c];
2610 }
2611 }
2612 break;
2613 case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
2614 default:
2615 assert(!"Unknown or CUBE texture type in TXF processing\n");
2616 break;
2617 }
2618
2619 if (need_swizzle) {
2620 float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2621 memcpy(rgba_temp, rgba, sizeof(rgba_temp));
2622 do_swizzling(samp, rgba_temp, rgba);
2623 }
2624 }
2625
2626
2627 /**
2628 * Create a sampler variant for a given set of non-orthogonal state.
2629 */
2630 struct sp_sampler_variant *
sp_create_sampler_variant(const struct pipe_sampler_state * sampler,const union sp_sampler_key key)2631 sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
2632 const union sp_sampler_key key )
2633 {
2634 struct sp_sampler_variant *samp = CALLOC_STRUCT(sp_sampler_variant);
2635 if (!samp)
2636 return NULL;
2637
2638 samp->sampler = sampler;
2639 samp->key = key;
2640
2641 /* Note that (for instance) linear_texcoord_s and
2642 * nearest_texcoord_s may be active at the same time, if the
2643 * sampler min_img_filter differs from its mag_img_filter.
2644 */
2645 if (sampler->normalized_coords) {
2646 samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
2647 samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
2648 samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
2649
2650 samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
2651 samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
2652 samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
2653 }
2654 else {
2655 samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
2656 samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
2657 samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
2658
2659 samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
2660 samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
2661 samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
2662 }
2663
2664 samp->compute_lambda = get_lambda_func( key );
2665
2666 samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler);
2667 samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler);
2668
2669 switch (sampler->min_mip_filter) {
2670 case PIPE_TEX_MIPFILTER_NONE:
2671 if (sampler->min_img_filter == sampler->mag_img_filter)
2672 samp->mip_filter = mip_filter_none_no_filter_select;
2673 else
2674 samp->mip_filter = mip_filter_none;
2675 break;
2676
2677 case PIPE_TEX_MIPFILTER_NEAREST:
2678 samp->mip_filter = mip_filter_nearest;
2679 break;
2680
2681 case PIPE_TEX_MIPFILTER_LINEAR:
2682 if (key.bits.is_pot &&
2683 key.bits.target == PIPE_TEXTURE_2D &&
2684 sampler->min_img_filter == sampler->mag_img_filter &&
2685 sampler->normalized_coords &&
2686 sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
2687 sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
2688 sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
2689 samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
2690 }
2691 else {
2692 samp->mip_filter = mip_filter_linear;
2693 }
2694
2695 /* Anisotropic filtering extension. */
2696 if (sampler->max_anisotropy > 1) {
2697 samp->mip_filter = mip_filter_linear_aniso;
2698
2699 /* Override min_img_filter:
2700 * min_img_filter needs to be set to NEAREST since we need to access
2701 * each texture pixel as it is and weight it later; using linear
2702 * filters will have incorrect results.
2703 * By setting the filter to NEAREST here, we can avoid calling the
2704 * generic img_filter_2d_nearest in the anisotropic filter function,
2705 * making it possible to use one of the accelerated implementations
2706 */
2707 samp->min_img_filter = get_img_filter(key, PIPE_TEX_FILTER_NEAREST, sampler);
2708
2709 /* on first access create the lookup table containing the filter weights. */
2710 if (!weightLut) {
2711 create_filter_table();
2712 }
2713 }
2714
2715 break;
2716 }
2717
2718 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
2719 samp->compare = sample_compare;
2720 }
2721 else {
2722 /* Skip compare operation by promoting the mip_filter function
2723 * pointer:
2724 */
2725 samp->compare = samp->mip_filter;
2726 }
2727
2728 if (key.bits.target == PIPE_TEXTURE_CUBE) {
2729 samp->sample_target = sample_cube;
2730 }
2731 else {
2732 samp->faces[0] = 0;
2733 samp->faces[1] = 0;
2734 samp->faces[2] = 0;
2735 samp->faces[3] = 0;
2736
2737 /* Skip cube face determination by promoting the compare
2738 * function pointer:
2739 */
2740 samp->sample_target = samp->compare;
2741 }
2742
2743 if (any_swizzle(key)) {
2744 samp->base.get_samples = sample_swizzle;
2745 }
2746 else {
2747 samp->base.get_samples = samp->sample_target;
2748 }
2749
2750 samp->base.get_dims = sample_get_dims;
2751 samp->base.get_texel = sample_get_texels;
2752 return samp;
2753 }
2754