1 #include "rs_core.rsh"
2 #include "rs_structs.h"
3
4
5 // 565 Conversion bits taken from SkBitmap
6 #define SK_R16_BITS 5
7 #define SK_G16_BITS 6
8 #define SK_B16_BITS 5
9
10 #define SK_R16_SHIFT (SK_B16_BITS + SK_G16_BITS)
11 #define SK_G16_SHIFT (SK_B16_BITS)
12 #define SK_B16_SHIFT 0
13
14 #define SK_R16_MASK ((1 << SK_R16_BITS) - 1)
15 #define SK_G16_MASK ((1 << SK_G16_BITS) - 1)
16 #define SK_B16_MASK ((1 << SK_B16_BITS) - 1)
17
18 #define SkGetPackedR16(color) (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
19 #define SkGetPackedG16(color) (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
20 #define SkGetPackedB16(color) (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
21
SkR16ToR32(unsigned r)22 static inline unsigned SkR16ToR32(unsigned r) {
23 return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
24 }
25
SkG16ToG32(unsigned g)26 static inline unsigned SkG16ToG32(unsigned g) {
27 return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
28 }
29
SkB16ToB32(unsigned b)30 static inline unsigned SkB16ToB32(unsigned b) {
31 return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
32 }
33
34 #define SkPacked16ToR32(c) SkR16ToR32(SkGetPackedR16(c))
35 #define SkPacked16ToG32(c) SkG16ToG32(SkGetPackedG16(c))
36 #define SkPacked16ToB32(c) SkB16ToB32(SkGetPackedB16(c))
37
getFrom565(uint16_t color)38 static float3 getFrom565(uint16_t color) {
39 float3 result;
40 result.x = (float)SkPacked16ToR32(color);
41 result.y = (float)SkPacked16ToG32(color);
42 result.z = (float)SkPacked16ToB32(color);
43 return result;
44 }
45
46 /**
47 * Allocation sampling
48 */
49 static inline float __attribute__((overloadable))
getElementAt1(const uint8_t * p,int32_t x)50 getElementAt1(const uint8_t *p, int32_t x) {
51 float r = p[x];
52 return r;
53 }
54
55 static inline float2 __attribute__((overloadable))
getElementAt2(const uint8_t * p,int32_t x)56 getElementAt2(const uint8_t *p, int32_t x) {
57 x *= 2;
58 float2 r = {p[x], p[x+1]};
59 return r;
60 }
61
62 static inline float3 __attribute__((overloadable))
getElementAt3(const uint8_t * p,int32_t x)63 getElementAt3(const uint8_t *p, int32_t x) {
64 x *= 4;
65 float3 r = {p[x], p[x+1], p[x+2]};
66 return r;
67 }
68
69 static inline float4 __attribute__((overloadable))
getElementAt4(const uint8_t * p,int32_t x)70 getElementAt4(const uint8_t *p, int32_t x) {
71 x *= 4;
72 const uchar4 *p2 = (const uchar4 *)&p[x];
73 return convert_float4(p2[0]);
74 }
75
76 static inline float3 __attribute__((overloadable))
getElementAt565(const uint8_t * p,int32_t x)77 getElementAt565(const uint8_t *p, int32_t x) {
78 x *= 2;
79 float3 r = getFrom565(((const uint16_t *)p)[0]);
80 return r;
81 }
82
83 static inline float __attribute__((overloadable))
getElementAt1(const uint8_t * p,size_t stride,int32_t x,int32_t y)84 getElementAt1(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
85 p += y * stride;
86 float r = p[x];
87 return r;
88 }
89
90 static inline float2 __attribute__((overloadable))
getElementAt2(const uint8_t * p,size_t stride,int32_t x,int32_t y)91 getElementAt2(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
92 p += y * stride;
93 x *= 2;
94 float2 r = {p[x], p[x+1]};
95 return r;
96 }
97
98 static inline float3 __attribute__((overloadable))
getElementAt3(const uint8_t * p,size_t stride,int32_t x,int32_t y)99 getElementAt3(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
100 p += y * stride;
101 x *= 4;
102 float3 r = {p[x], p[x+1], p[x+2]};
103 return r;
104 }
105
106 static inline float4 __attribute__((overloadable))
getElementAt4(const uint8_t * p,size_t stride,int32_t x,int32_t y)107 getElementAt4(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
108 p += y * stride;
109 x *= 4;
110 float4 r = {p[x], p[x+1], p[x+2], p[x+3]};
111 return r;
112 }
113
114 static inline float3 __attribute__((overloadable))
getElementAt565(const uint8_t * p,size_t stride,int32_t x,int32_t y)115 getElementAt565(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
116 p += y * stride;
117 x *= 2;
118 float3 r = getFrom565(((const uint16_t *)p)[0]);
119 return r;
120 }
121
122
123
124
125
126 static float4 __attribute__((overloadable))
getSample_A(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)127 getSample_A(const uint8_t *p, int32_t iPixel,
128 int32_t next, float w0, float w1) {
129 float p0 = getElementAt1(p, iPixel);
130 float p1 = getElementAt1(p, next);
131 float r = p0 * w0 + p1 * w1;
132 r *= (1.f / 255.f);
133 float4 ret = {0.f, 0.f, 0.f, r};
134 return ret;
135 }
136 static float4 __attribute__((overloadable))
getSample_L(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)137 getSample_L(const uint8_t *p, int32_t iPixel,
138 int32_t next, float w0, float w1) {
139 float p0 = getElementAt1(p, iPixel);
140 float p1 = getElementAt1(p, next);
141 float r = p0 * w0 + p1 * w1;
142 r *= (1.f / 255.f);
143 float4 ret = {r, r, r, 1.f};
144 return ret;
145 }
146 static float4 __attribute__((overloadable))
getSample_LA(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)147 getSample_LA(const uint8_t *p, int32_t iPixel,
148 int32_t next, float w0, float w1) {
149 float2 p0 = getElementAt2(p, iPixel);
150 float2 p1 = getElementAt2(p, next);
151 float2 r = p0 * w0 + p1 * w1;
152 r *= (1.f / 255.f);
153 float4 ret = {r.x, r.x, r.x, r.y};
154 return ret;
155 }
156 static float4 __attribute__((overloadable))
getSample_RGB(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)157 getSample_RGB(const uint8_t *p, int32_t iPixel,
158 int32_t next, float w0, float w1) {
159 float3 p0 = getElementAt3(p, iPixel);
160 float3 p1 = getElementAt3(p, next);
161 float3 r = p0 * w0 + p1 * w1;
162 r *= (1.f / 255.f);
163 float4 ret = {r.x, r.x, r.z, 1.f};
164 return ret;
165 }
166 static float4 __attribute__((overloadable))
getSample_565(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)167 getSample_565(const uint8_t *p, int32_t iPixel,
168 int32_t next, float w0, float w1) {
169 float3 p0 = getElementAt565(p, iPixel);
170 float3 p1 = getElementAt565(p, next);
171 float3 r = p0 * w0 + p1 * w1;
172 r *= (1.f / 255.f);
173 float4 ret = {r.x, r.x, r.z, 1.f};
174 return ret;
175 }
176 static float4 __attribute__((overloadable))
getSample_RGBA(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)177 getSample_RGBA(const uint8_t *p, int32_t iPixel,
178 int32_t next, float w0, float w1) {
179 float4 p0 = getElementAt4(p, iPixel);
180 float4 p1 = getElementAt4(p, next);
181 float4 r = p0 * w0 + p1 * w1;
182 r *= (1.f / 255.f);
183 return r;
184 }
185
186
187 static float4 __attribute__((overloadable))
getSample_A(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)188 getSample_A(const uint8_t *p, size_t stride,
189 int locX, int locY, int nextX, int nextY,
190 float w0, float w1, float w2, float w3) {
191 float p0 = getElementAt1(p, stride, locX, locY);
192 float p1 = getElementAt1(p, stride, nextX, locY);
193 float p2 = getElementAt1(p, stride, locX, nextY);
194 float p3 = getElementAt1(p, stride, nextX, nextY);
195 float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
196 r *= (1.f / 255.f);
197 float4 ret = {0.f, 0.f, 0.f, r};
198 return ret;
199 }
200 static float4 __attribute__((overloadable))
getSample_L(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)201 getSample_L(const uint8_t *p, size_t stride,
202 int locX, int locY, int nextX, int nextY,
203 float w0, float w1, float w2, float w3) {
204 float p0 = getElementAt1(p, stride, locX, locY);
205 float p1 = getElementAt1(p, stride, nextX, locY);
206 float p2 = getElementAt1(p, stride, locX, nextY);
207 float p3 = getElementAt1(p, stride, nextX, nextY);
208 float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
209 r *= (1.f / 255.f);
210 float4 ret = {r, r, r, 1.f};
211 return ret;
212 }
213 static float4 __attribute__((overloadable))
getSample_LA(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)214 getSample_LA(const uint8_t *p, size_t stride,
215 int locX, int locY, int nextX, int nextY,
216 float w0, float w1, float w2, float w3) {
217 float2 p0 = getElementAt2(p, stride, locX, locY);
218 float2 p1 = getElementAt2(p, stride, nextX, locY);
219 float2 p2 = getElementAt2(p, stride, locX, nextY);
220 float2 p3 = getElementAt2(p, stride, nextX, nextY);
221 float2 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
222 r *= (1.f / 255.f);
223 float4 ret = {r.x, r.x, r.x, r.y};
224 return ret;
225 }
226 static float4 __attribute__((overloadable))
getSample_RGB(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)227 getSample_RGB(const uint8_t *p, size_t stride,
228 int locX, int locY, int nextX, int nextY,
229 float w0, float w1, float w2, float w3) {
230 float4 p0 = getElementAt4(p, stride, locX, locY);
231 float4 p1 = getElementAt4(p, stride, nextX, locY);
232 float4 p2 = getElementAt4(p, stride, locX, nextY);
233 float4 p3 = getElementAt4(p, stride, nextX, nextY);
234 float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
235 r *= (1.f / 255.f);
236 float4 ret = {r.x, r.y, r.z, 1.f};
237 return ret;
238 }
239 static float4 __attribute__((overloadable))
getSample_RGBA(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)240 getSample_RGBA(const uint8_t *p, size_t stride,
241 int locX, int locY, int nextX, int nextY,
242 float w0, float w1, float w2, float w3) {
243 float4 p0 = getElementAt4(p, stride, locX, locY);
244 float4 p1 = getElementAt4(p, stride, nextX, locY);
245 float4 p2 = getElementAt4(p, stride, locX, nextY);
246 float4 p3 = getElementAt4(p, stride, nextX, nextY);
247 float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
248 r *= (1.f / 255.f);
249 return r;
250 }
251 static float4 __attribute__((overloadable))
getSample_565(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)252 getSample_565(const uint8_t *p, size_t stride,
253 int locX, int locY, int nextX, int nextY,
254 float w0, float w1, float w2, float w3) {
255 float3 p0 = getElementAt565(p, stride, locX, locY);
256 float3 p1 = getElementAt565(p, stride, nextX, locY);
257 float3 p2 = getElementAt565(p, stride, locX, nextY);
258 float3 p3 = getElementAt565(p, stride, nextX, nextY);
259 float3 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
260 r *= (1.f / 255.f);
261 float4 ret;
262 ret.rgb = r;
263 ret.w = 1.f;
264 return ret;
265 }
266
267 static float4 __attribute__((overloadable))
getBilinearSample1D(const Allocation_t * alloc,float2 weights,uint32_t iPixel,uint32_t next,rs_data_kind dk,rs_data_type dt,uint32_t lod)268 getBilinearSample1D(const Allocation_t *alloc, float2 weights,
269 uint32_t iPixel, uint32_t next,
270 rs_data_kind dk, rs_data_type dt, uint32_t lod) {
271
272 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
273
274 switch(dk) {
275 case RS_KIND_PIXEL_RGBA:
276 return getSample_RGBA(p, iPixel, next, weights.x, weights.y);
277 case RS_KIND_PIXEL_A:
278 return getSample_A(p, iPixel, next, weights.x, weights.y);
279 case RS_KIND_PIXEL_RGB:
280 if (dt == RS_TYPE_UNSIGNED_5_6_5) {
281 return getSample_565(p, iPixel, next, weights.x, weights.y);
282 }
283 return getSample_RGB(p, iPixel, next, weights.x, weights.y);
284 case RS_KIND_PIXEL_L:
285 return getSample_L(p, iPixel, next, weights.x, weights.y);
286 case RS_KIND_PIXEL_LA:
287 return getSample_LA(p, iPixel, next, weights.x, weights.y);
288
289 default:
290 //__builtin_unreachable();
291 break;
292 }
293
294 //__builtin_unreachable();
295 return 0.f;
296 }
297
wrapI(rs_sampler_value wrap,int32_t coord,int32_t size)298 static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
299 if (wrap == RS_SAMPLER_WRAP) {
300 coord = coord % size;
301 if (coord < 0) {
302 coord += size;
303 }
304 }
305 if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
306 coord = coord % (size * 2);
307 if (coord < 0) {
308 coord = (size * 2) + coord;
309 }
310 if (coord >= size) {
311 coord = (size * 2 - 1) - coord;
312 }
313 }
314 return (uint32_t)max(0, min(coord, size - 1));
315 }
316
317 static float4 __attribute__((overloadable))
getBilinearSample2D(const Allocation_t * alloc,float w0,float w1,float w2,float w3,int lx,int ly,int nx,int ny,rs_data_kind dk,rs_data_type dt,uint32_t lod)318 getBilinearSample2D(const Allocation_t *alloc, float w0, float w1, float w2, float w3,
319 int lx, int ly, int nx, int ny,
320 rs_data_kind dk, rs_data_type dt, uint32_t lod) {
321
322 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
323 size_t stride = alloc->mHal.drvState.lod[lod].stride;
324
325 switch(dk) {
326 case RS_KIND_PIXEL_RGBA:
327 return getSample_RGBA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
328 case RS_KIND_PIXEL_A:
329 return getSample_A(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
330 case RS_KIND_PIXEL_LA:
331 return getSample_LA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
332 case RS_KIND_PIXEL_RGB:
333 if (dt == RS_TYPE_UNSIGNED_5_6_5) {
334 return getSample_565(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
335 }
336 return getSample_RGB(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
337 case RS_KIND_PIXEL_L:
338 return getSample_L(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
339
340 default:
341 break;
342 }
343
344 return 0.f;
345 }
346
347 static float4 __attribute__((overloadable))
getNearestSample(const Allocation_t * alloc,uint32_t iPixel,rs_data_kind dk,rs_data_type dt,uint32_t lod)348 getNearestSample(const Allocation_t *alloc, uint32_t iPixel, rs_data_kind dk,
349 rs_data_type dt, uint32_t lod) {
350
351 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
352
353 float4 result = {0.f, 0.f, 0.f, 255.f};
354
355 switch(dk) {
356 case RS_KIND_PIXEL_RGBA:
357 result = getElementAt4(p, iPixel);
358 break;
359 case RS_KIND_PIXEL_A:
360 result.w = getElementAt1(p, iPixel);
361 break;
362 case RS_KIND_PIXEL_LA:
363 result.zw = getElementAt2(p, iPixel);
364 result.xy = result.z;
365 break;
366 case RS_KIND_PIXEL_RGB:
367 if (dt == RS_TYPE_UNSIGNED_5_6_5) {
368 result.xyz = getElementAt565(p, iPixel);
369 } else {
370 result.xyz = getElementAt3(p, iPixel);
371 }
372 break;
373 case RS_KIND_PIXEL_L:
374 result.xyz = getElementAt1(p, iPixel);
375
376 default:
377 //__builtin_unreachable();
378 break;
379 }
380
381 return result * 0.003921569f;
382 }
383
384 static float4 __attribute__((overloadable))
getNearestSample(const Allocation_t * alloc,uint2 iPixel,rs_data_kind dk,rs_data_type dt,uint32_t lod)385 getNearestSample(const Allocation_t *alloc, uint2 iPixel, rs_data_kind dk,
386 rs_data_type dt, uint32_t lod) {
387
388 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
389 size_t stride = alloc->mHal.drvState.lod[lod].stride;
390
391 float4 result = {0.f, 0.f, 0.f, 255.f};
392
393 switch(dk) {
394 case RS_KIND_PIXEL_RGBA:
395 result = getElementAt4(p, stride, iPixel.x, iPixel.y);
396 break;
397 case RS_KIND_PIXEL_A:
398 result.w = getElementAt1(p, stride, iPixel.x, iPixel.y);
399 break;
400 case RS_KIND_PIXEL_LA:
401 result.zw = getElementAt2(p, stride, iPixel.x, iPixel.y);
402 result.xy = result.z;
403 break;
404 case RS_KIND_PIXEL_RGB:
405 if (dt == RS_TYPE_UNSIGNED_5_6_5) {
406 result.xyz = getElementAt565(p, stride, iPixel.x, iPixel.y);
407 } else {
408 result.xyz = getElementAt3(p, stride, iPixel.x, iPixel.y);
409 }
410 break;
411
412 default:
413 //__builtin_unreachable();
414 break;
415 }
416
417 return result * 0.003921569f;
418 }
419
420 static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,float uv,uint32_t lod)421 sample_LOD_LinearPixel(const Allocation_t *alloc,
422 rs_data_kind dk, rs_data_type dt,
423 rs_sampler_value wrapS,
424 float uv, uint32_t lod) {
425
426 int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
427 float pixelUV = uv * (float)(sourceW);
428 int32_t iPixel = floor(pixelUV);
429 float frac = pixelUV - (float)iPixel;
430
431 if (frac < 0.5f) {
432 iPixel -= 1;
433 frac += 0.5f;
434 } else {
435 frac -= 0.5f;
436 }
437
438 float oneMinusFrac = 1.0f - frac;
439
440 float2 weights;
441 weights.x = oneMinusFrac;
442 weights.y = frac;
443
444 uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
445 uint32_t location = wrapI(wrapS, iPixel, sourceW);
446
447 return getBilinearSample1D(alloc, weights, location, next, dk, dt, lod);
448 }
449
450 static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,float uv,uint32_t lod)451 sample_LOD_NearestPixel(const Allocation_t *alloc,
452 rs_data_kind dk, rs_data_type dt,
453 rs_sampler_value wrapS,
454 float uv, uint32_t lod) {
455
456 int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
457 int32_t iPixel = floor(uv * (float)(sourceW));
458 uint32_t location = wrapI(wrapS, iPixel, sourceW);
459
460 return getNearestSample(alloc, location, dk, dt, lod);
461 }
462
463 static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,rs_sampler_value wrapT,float2 uv,uint32_t lod)464 sample_LOD_LinearPixel(const Allocation_t *alloc,
465 rs_data_kind dk, rs_data_type dt,
466 rs_sampler_value wrapS,
467 rs_sampler_value wrapT,
468 float2 uv, uint32_t lod) {
469
470 int sourceW = alloc->mHal.drvState.lod[lod].dimX;
471 int sourceH = alloc->mHal.drvState.lod[lod].dimY;
472
473 float pixelU = uv.x * sourceW;
474 float pixelV = uv.y * sourceH;
475 int iPixelU = floor(pixelU);
476 int iPixelV = floor(pixelV);
477 float fracU = pixelU - iPixelU;
478 float fracV = pixelV - iPixelV;
479
480 if (fracU < 0.5f) {
481 iPixelU -= 1;
482 fracU += 0.5f;
483 } else {
484 fracU -= 0.5f;
485 }
486 if (fracV < 0.5f) {
487 iPixelV -= 1;
488 fracV += 0.5f;
489 } else {
490 fracV -= 0.5f;
491 }
492 float oneMinusFracU = 1.0f - fracU;
493 float oneMinusFracV = 1.0f - fracV;
494
495 float w0 = oneMinusFracU * oneMinusFracV;
496 float w1 = fracU * oneMinusFracV;
497 float w2 = oneMinusFracU * fracV;
498 float w3 = fracU * fracV;
499
500 int nx = wrapI(wrapS, iPixelU + 1, sourceW);
501 int ny = wrapI(wrapT, iPixelV + 1, sourceH);
502 int lx = wrapI(wrapS, iPixelU, sourceW);
503 int ly = wrapI(wrapT, iPixelV, sourceH);
504
505 return getBilinearSample2D(alloc, w0, w1, w2, w3, lx, ly, nx, ny, dk, dt, lod);
506
507 }
508
509 static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,rs_sampler_value wrapT,float2 uv,uint32_t lod)510 sample_LOD_NearestPixel(const Allocation_t *alloc,
511 rs_data_kind dk, rs_data_type dt,
512 rs_sampler_value wrapS,
513 rs_sampler_value wrapT,
514 float2 uv, uint32_t lod) {
515 int sourceW = alloc->mHal.drvState.lod[lod].dimX;
516 int sourceH = alloc->mHal.drvState.lod[lod].dimY;
517
518 float2 dimF;
519 dimF.x = (float)(sourceW);
520 dimF.y = (float)(sourceH);
521 int2 iPixel = convert_int2(floor(uv * dimF));
522
523 uint2 location;
524 location.x = wrapI(wrapS, iPixel.x, sourceW);
525 location.y = wrapI(wrapT, iPixel.y, sourceH);
526 return getNearestSample(alloc, location, dk, dt, lod);
527 }
528
529 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float uv,float lod)530 rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
531
532 const Allocation_t *alloc = (const Allocation_t *)a.p;
533 const Sampler_t *prog = (Sampler_t *)s.p;
534 const Type_t *type = (Type_t *)alloc->mHal.state.type;
535 const Element_t *elem = type->mHal.state.element;
536 rs_data_kind dk = elem->mHal.state.dataKind;
537 rs_data_type dt = elem->mHal.state.dataType;
538 rs_sampler_value sampleMin = prog->mHal.state.minFilter;
539 rs_sampler_value sampleMag = prog->mHal.state.magFilter;
540 rs_sampler_value wrapS = prog->mHal.state.wrapS;
541
542 if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
543 return 0.f;
544 }
545
546 if (lod <= 0.0f) {
547 if (sampleMag == RS_SAMPLER_NEAREST) {
548 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
549 }
550 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, 0);
551 }
552
553 if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
554 uint32_t maxLOD = type->mHal.state.lodCount - 1;
555 lod = min(lod, (float)maxLOD);
556 uint32_t nearestLOD = (uint32_t)round(lod);
557 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, nearestLOD);
558 }
559
560 if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
561 uint32_t lod0 = (uint32_t)floor(lod);
562 uint32_t lod1 = (uint32_t)ceil(lod);
563 uint32_t maxLOD = type->mHal.state.lodCount - 1;
564 lod0 = min(lod0, maxLOD);
565 lod1 = min(lod1, maxLOD);
566 float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod0);
567 float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod1);
568 float frac = lod - (float)lod0;
569 return sample0 * (1.0f - frac) + sample1 * frac;
570 }
571
572 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
573 }
574
575 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float location)576 rsSample(rs_allocation a, rs_sampler s, float location) {
577 return rsSample(a, s, location, 0);
578 }
579
580
581 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float2 uv,float lod)582 rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
583
584 const Allocation_t *alloc = (const Allocation_t *)a.p;
585 const Sampler_t *prog = (Sampler_t *)s.p;
586 const Type_t *type = (Type_t *)alloc->mHal.state.type;
587 const Element_t *elem = type->mHal.state.element;
588 rs_data_kind dk = elem->mHal.state.dataKind;
589 rs_data_type dt = elem->mHal.state.dataType;
590 rs_sampler_value sampleMin = prog->mHal.state.minFilter;
591 rs_sampler_value sampleMag = prog->mHal.state.magFilter;
592 rs_sampler_value wrapS = prog->mHal.state.wrapS;
593 rs_sampler_value wrapT = prog->mHal.state.wrapT;
594
595 if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
596 return 0.f;
597 }
598
599 if (lod <= 0.0f) {
600 if (sampleMag == RS_SAMPLER_NEAREST) {
601 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
602 }
603 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
604 }
605
606 if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
607 uint32_t maxLOD = type->mHal.state.lodCount - 1;
608 lod = min(lod, (float)maxLOD);
609 uint32_t nearestLOD = (uint32_t)round(lod);
610 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, nearestLOD);
611 }
612
613 if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
614 uint32_t lod0 = (uint32_t)floor(lod);
615 uint32_t lod1 = (uint32_t)ceil(lod);
616 uint32_t maxLOD = type->mHal.state.lodCount - 1;
617 lod0 = min(lod0, maxLOD);
618 lod1 = min(lod1, maxLOD);
619 float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod0);
620 float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod1);
621 float frac = lod - (float)lod0;
622 return sample0 * (1.0f - frac) + sample1 * frac;
623 }
624
625 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
626 }
627
628 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float2 uv)629 rsSample(rs_allocation a, rs_sampler s, float2 uv) {
630
631 const Allocation_t *alloc = (const Allocation_t *)a.p;
632 const Sampler_t *prog = (Sampler_t *)s.p;
633 const Type_t *type = (Type_t *)alloc->mHal.state.type;
634 const Element_t *elem = type->mHal.state.element;
635 rs_data_kind dk = elem->mHal.state.dataKind;
636 rs_data_type dt = elem->mHal.state.dataType;
637 rs_sampler_value wrapS = prog->mHal.state.wrapS;
638 rs_sampler_value wrapT = prog->mHal.state.wrapT;
639
640 if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
641 return 0.f;
642 }
643
644 if (prog->mHal.state.magFilter == RS_SAMPLER_NEAREST) {
645 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
646 }
647 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
648 }
649