1 #include "rs_core.rsh"
2 #include "rs_structs.h"
3
4
5 // 565 Conversion bits taken from SkBitmap
6 #define SK_R16_BITS 5
7 #define SK_G16_BITS 6
8 #define SK_B16_BITS 5
9
10 #define SK_R16_SHIFT (SK_B16_BITS + SK_G16_BITS)
11 #define SK_G16_SHIFT (SK_B16_BITS)
12 #define SK_B16_SHIFT 0
13
14 #define SK_R16_MASK ((1 << SK_R16_BITS) - 1)
15 #define SK_G16_MASK ((1 << SK_G16_BITS) - 1)
16 #define SK_B16_MASK ((1 << SK_B16_BITS) - 1)
17
18 #define SkGetPackedR16(color) (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
19 #define SkGetPackedG16(color) (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
20 #define SkGetPackedB16(color) (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
21
SkR16ToR32(unsigned r)22 static inline unsigned SkR16ToR32(unsigned r) {
23 return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
24 }
25
SkG16ToG32(unsigned g)26 static inline unsigned SkG16ToG32(unsigned g) {
27 return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
28 }
29
SkB16ToB32(unsigned b)30 static inline unsigned SkB16ToB32(unsigned b) {
31 return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
32 }
33
34 #define SkPacked16ToR32(c) SkR16ToR32(SkGetPackedR16(c))
35 #define SkPacked16ToG32(c) SkG16ToG32(SkGetPackedG16(c))
36 #define SkPacked16ToB32(c) SkB16ToB32(SkGetPackedB16(c))
37
getFrom565(uint16_t color)38 static float3 getFrom565(uint16_t color) {
39 float3 result;
40 result.x = (float)SkPacked16ToR32(color);
41 result.y = (float)SkPacked16ToG32(color);
42 result.z = (float)SkPacked16ToB32(color);
43 return result;
44 }
45
46 /**
47 * Allocation sampling
48 */
49 static inline float __attribute__((overloadable))
getElementAt1(const uint8_t * p,int32_t x)50 getElementAt1(const uint8_t *p, int32_t x) {
51 float r = p[x];
52 return r;
53 }
54
55 static inline float2 __attribute__((overloadable))
getElementAt2(const uint8_t * p,int32_t x)56 getElementAt2(const uint8_t *p, int32_t x) {
57 x *= 2;
58 float2 r = {p[x], p[x+1]};
59 return r;
60 }
61
62 static inline float3 __attribute__((overloadable))
getElementAt3(const uint8_t * p,int32_t x)63 getElementAt3(const uint8_t *p, int32_t x) {
64 x *= 4;
65 float3 r = {p[x], p[x+1], p[x+2]};
66 return r;
67 }
68
69 static inline float4 __attribute__((overloadable))
getElementAt4(const uint8_t * p,int32_t x)70 getElementAt4(const uint8_t *p, int32_t x) {
71 x *= 4;
72 const uchar4 *p2 = (const uchar4 *)&p[x];
73 return convert_float4(p2[0]);
74 }
75
76 static inline float3 __attribute__((overloadable))
getElementAt565(const uint8_t * p,int32_t x)77 getElementAt565(const uint8_t *p, int32_t x) {
78 x *= 2;
79 float3 r = getFrom565(((const uint16_t *)p)[0]);
80 return r;
81 }
82
83 static inline float __attribute__((overloadable))
getElementAt1(const uint8_t * p,size_t stride,int32_t x,int32_t y)84 getElementAt1(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
85 p += y * stride;
86 float r = p[x];
87 return r;
88 }
89
90 static inline float2 __attribute__((overloadable))
getElementAt2(const uint8_t * p,size_t stride,int32_t x,int32_t y)91 getElementAt2(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
92 p += y * stride;
93 x *= 2;
94 float2 r = {p[x], p[x+1]};
95 return r;
96 }
97
98 static inline float3 __attribute__((overloadable))
getElementAt3(const uint8_t * p,size_t stride,int32_t x,int32_t y)99 getElementAt3(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
100 p += y * stride;
101 x *= 4;
102 float3 r = {p[x], p[x+1], p[x+2]};
103 return r;
104 }
105
106 static inline float4 __attribute__((overloadable))
getElementAt4(const uint8_t * p,size_t stride,int32_t x,int32_t y)107 getElementAt4(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
108 p += y * stride;
109 x *= 4;
110 float4 r = {p[x], p[x+1], p[x+2], p[x+3]};
111 return r;
112 }
113
114 static inline float3 __attribute__((overloadable))
getElementAt565(const uint8_t * p,size_t stride,int32_t x,int32_t y)115 getElementAt565(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
116 p += y * stride;
117 x *= 2;
118 float3 r = getFrom565(((const uint16_t *)p)[0]);
119 return r;
120 }
121
122
123
124
125
126 static float4 __attribute__((overloadable))
getSample_A(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)127 getSample_A(const uint8_t *p, int32_t iPixel,
128 int32_t next, float w0, float w1) {
129 float p0 = getElementAt1(p, iPixel);
130 float p1 = getElementAt1(p, next);
131 float r = p0 * w0 + p1 * w1;
132 r *= (1.f / 255.f);
133 float4 ret = {0.f, 0.f, 0.f, r};
134 return ret;
135 }
136 static float4 __attribute__((overloadable))
getSample_L(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)137 getSample_L(const uint8_t *p, int32_t iPixel,
138 int32_t next, float w0, float w1) {
139 float p0 = getElementAt1(p, iPixel);
140 float p1 = getElementAt1(p, next);
141 float r = p0 * w0 + p1 * w1;
142 r *= (1.f / 255.f);
143 float4 ret = {r, r, r, 1.f};
144 return ret;
145 }
146 static float4 __attribute__((overloadable))
getSample_LA(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)147 getSample_LA(const uint8_t *p, int32_t iPixel,
148 int32_t next, float w0, float w1) {
149 float2 p0 = getElementAt2(p, iPixel);
150 float2 p1 = getElementAt2(p, next);
151 float2 r = p0 * w0 + p1 * w1;
152 r *= (1.f / 255.f);
153 float4 ret = {r.x, r.x, r.x, r.y};
154 return ret;
155 }
156 static float4 __attribute__((overloadable))
getSample_RGB(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)157 getSample_RGB(const uint8_t *p, int32_t iPixel,
158 int32_t next, float w0, float w1) {
159 float3 p0 = getElementAt3(p, iPixel);
160 float3 p1 = getElementAt3(p, next);
161 float3 r = p0 * w0 + p1 * w1;
162 r *= (1.f / 255.f);
163 float4 ret = {r.x, r.x, r.z, 1.f};
164 return ret;
165 }
166 static float4 __attribute__((overloadable))
getSample_565(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)167 getSample_565(const uint8_t *p, int32_t iPixel,
168 int32_t next, float w0, float w1) {
169 float3 p0 = getElementAt565(p, iPixel);
170 float3 p1 = getElementAt565(p, next);
171 float3 r = p0 * w0 + p1 * w1;
172 r *= (1.f / 255.f);
173 float4 ret = {r.x, r.x, r.z, 1.f};
174 return ret;
175 }
176 static float4 __attribute__((overloadable))
getSample_RGBA(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)177 getSample_RGBA(const uint8_t *p, int32_t iPixel,
178 int32_t next, float w0, float w1) {
179 float4 p0 = getElementAt4(p, iPixel);
180 float4 p1 = getElementAt4(p, next);
181 float4 r = p0 * w0 + p1 * w1;
182 r *= (1.f / 255.f);
183 return r;
184 }
185
186
187 static float4 __attribute__((overloadable))
getSample_A(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)188 getSample_A(const uint8_t *p, size_t stride,
189 int locX, int locY, int nextX, int nextY,
190 float w0, float w1, float w2, float w3) {
191 float p0 = getElementAt1(p, stride, locX, locY);
192 float p1 = getElementAt1(p, stride, nextX, locY);
193 float p2 = getElementAt1(p, stride, locX, nextY);
194 float p3 = getElementAt1(p, stride, nextX, nextY);
195 float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
196 r *= (1.f / 255.f);
197 float4 ret = {0.f, 0.f, 0.f, r};
198 return ret;
199 }
200 static float4 __attribute__((overloadable))
getSample_L(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)201 getSample_L(const uint8_t *p, size_t stride,
202 int locX, int locY, int nextX, int nextY,
203 float w0, float w1, float w2, float w3) {
204 float p0 = getElementAt1(p, stride, locX, locY);
205 float p1 = getElementAt1(p, stride, nextX, locY);
206 float p2 = getElementAt1(p, stride, locX, nextY);
207 float p3 = getElementAt1(p, stride, nextX, nextY);
208 float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
209 r *= (1.f / 255.f);
210 float4 ret = {r, r, r, 1.f};
211 return ret;
212 }
213 static float4 __attribute__((overloadable))
getSample_LA(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)214 getSample_LA(const uint8_t *p, size_t stride,
215 int locX, int locY, int nextX, int nextY,
216 float w0, float w1, float w2, float w3) {
217 float2 p0 = getElementAt2(p, stride, locX, locY);
218 float2 p1 = getElementAt2(p, stride, nextX, locY);
219 float2 p2 = getElementAt2(p, stride, locX, nextY);
220 float2 p3 = getElementAt2(p, stride, nextX, nextY);
221 float2 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
222 r *= (1.f / 255.f);
223 float4 ret = {r.x, r.x, r.x, r.y};
224 return ret;
225 }
226 static float4 __attribute__((overloadable))
getSample_RGB(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)227 getSample_RGB(const uint8_t *p, size_t stride,
228 int locX, int locY, int nextX, int nextY,
229 float w0, float w1, float w2, float w3) {
230 float4 p0 = getElementAt4(p, stride, locX, locY);
231 float4 p1 = getElementAt4(p, stride, nextX, locY);
232 float4 p2 = getElementAt4(p, stride, locX, nextY);
233 float4 p3 = getElementAt4(p, stride, nextX, nextY);
234 float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
235 r *= (1.f / 255.f);
236 float4 ret = {r.x, r.y, r.z, 1.f};
237 return ret;
238 }
239 static float4 __attribute__((overloadable))
getSample_RGBA(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)240 getSample_RGBA(const uint8_t *p, size_t stride,
241 int locX, int locY, int nextX, int nextY,
242 float w0, float w1, float w2, float w3) {
243 float4 p0 = getElementAt4(p, stride, locX, locY);
244 float4 p1 = getElementAt4(p, stride, nextX, locY);
245 float4 p2 = getElementAt4(p, stride, locX, nextY);
246 float4 p3 = getElementAt4(p, stride, nextX, nextY);
247 float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
248 r *= (1.f / 255.f);
249 return r;
250 }
251 static float4 __attribute__((overloadable))
getSample_565(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)252 getSample_565(const uint8_t *p, size_t stride,
253 int locX, int locY, int nextX, int nextY,
254 float w0, float w1, float w2, float w3) {
255 float3 p0 = getElementAt565(p, stride, locX, locY);
256 float3 p1 = getElementAt565(p, stride, nextX, locY);
257 float3 p2 = getElementAt565(p, stride, locX, nextY);
258 float3 p3 = getElementAt565(p, stride, nextX, nextY);
259 float3 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
260 r *= (1.f / 255.f);
261 float4 ret;
262 ret.rgb = r;
263 ret.w = 1.f;
264 return ret;
265 }
266
267 static float4 __attribute__((overloadable))
getBilinearSample1D(const Allocation_t * alloc,float2 weights,uint32_t iPixel,uint32_t next,rs_data_kind dk,rs_data_type dt,uint32_t lod)268 getBilinearSample1D(const Allocation_t *alloc, float2 weights,
269 uint32_t iPixel, uint32_t next,
270 rs_data_kind dk, rs_data_type dt, uint32_t lod) {
271
272 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
273
274 switch(dk) {
275 case RS_KIND_PIXEL_RGBA:
276 return getSample_RGBA(p, iPixel, next, weights.x, weights.y);
277 case RS_KIND_PIXEL_A:
278 return getSample_A(p, iPixel, next, weights.x, weights.y);
279 case RS_KIND_PIXEL_RGB:
280 if (dt == RS_TYPE_UNSIGNED_5_6_5) {
281 return getSample_565(p, iPixel, next, weights.x, weights.y);
282 }
283 return getSample_RGB(p, iPixel, next, weights.x, weights.y);
284 case RS_KIND_PIXEL_L:
285 return getSample_L(p, iPixel, next, weights.x, weights.y);
286 case RS_KIND_PIXEL_LA:
287 return getSample_LA(p, iPixel, next, weights.x, weights.y);
288
289 default:
290 //__builtin_unreachable();
291 break;
292 }
293
294 //__builtin_unreachable();
295 return 0.f;
296 }
297
wrapI(rs_sampler_value wrap,int32_t coord,int32_t size)298 static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
299 if (wrap == RS_SAMPLER_WRAP) {
300 coord = coord % size;
301 if (coord < 0) {
302 coord += size;
303 }
304 }
305 if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
306 coord = coord % (size * 2);
307 if (coord < 0) {
308 coord = (size * 2) + coord;
309 }
310 if (coord >= size) {
311 coord = (size * 2 - 1) - coord;
312 }
313 }
314 return (uint32_t)max(0, min(coord, size - 1));
315 }
316
317 static float4 __attribute__((overloadable))
getBilinearSample2D(const Allocation_t * alloc,float w0,float w1,float w2,float w3,int lx,int ly,int nx,int ny,rs_data_kind dk,rs_data_type dt,uint32_t lod)318 getBilinearSample2D(const Allocation_t *alloc, float w0, float w1, float w2, float w3,
319 int lx, int ly, int nx, int ny,
320 rs_data_kind dk, rs_data_type dt, uint32_t lod) {
321
322 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
323 size_t stride = alloc->mHal.drvState.lod[lod].stride;
324
325 switch(dk) {
326 case RS_KIND_PIXEL_RGBA:
327 return getSample_RGBA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
328 case RS_KIND_PIXEL_A:
329 return getSample_A(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
330 case RS_KIND_PIXEL_LA:
331 return getSample_LA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
332 case RS_KIND_PIXEL_RGB:
333 if (dt == RS_TYPE_UNSIGNED_5_6_5) {
334 return getSample_565(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
335 }
336 return getSample_RGB(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
337 case RS_KIND_PIXEL_L:
338 return getSample_L(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
339
340 default:
341 break;
342 }
343
344 return 0.f;
345 }
346
347 static float4 __attribute__((overloadable))
getNearestSample(const Allocation_t * alloc,uint32_t iPixel,rs_data_kind dk,rs_data_type dt,uint32_t lod)348 getNearestSample(const Allocation_t *alloc, uint32_t iPixel, rs_data_kind dk,
349 rs_data_type dt, uint32_t lod) {
350
351 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
352
353 float4 result = {0.f, 0.f, 0.f, 255.f};
354
355 switch(dk) {
356 case RS_KIND_PIXEL_RGBA:
357 result = getElementAt4(p, iPixel);
358 break;
359 case RS_KIND_PIXEL_A:
360 result.w = getElementAt1(p, iPixel);
361 break;
362 case RS_KIND_PIXEL_LA:
363 result.zw = getElementAt2(p, iPixel);
364 result.xy = result.z;
365 break;
366 case RS_KIND_PIXEL_RGB:
367 if (dt == RS_TYPE_UNSIGNED_5_6_5) {
368 result.xyz = getElementAt565(p, iPixel);
369 } else {
370 result.xyz = getElementAt3(p, iPixel);
371 }
372 break;
373 case RS_KIND_PIXEL_L:
374 result.xyz = getElementAt1(p, iPixel);
375
376 default:
377 //__builtin_unreachable();
378 break;
379 }
380
381 return result * 0.003921569f;
382 }
383
384 static float4 __attribute__((overloadable))
getNearestSample(const Allocation_t * alloc,uint2 iPixel,rs_data_kind dk,rs_data_type dt,uint32_t lod)385 getNearestSample(const Allocation_t *alloc, uint2 iPixel, rs_data_kind dk,
386 rs_data_type dt, uint32_t lod) {
387
388 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
389 size_t stride = alloc->mHal.drvState.lod[lod].stride;
390
391 float4 result = {0.f, 0.f, 0.f, 255.f};
392
393 switch(dk) {
394 case RS_KIND_PIXEL_RGBA:
395 result = getElementAt4(p, stride, iPixel.x, iPixel.y);
396 break;
397 case RS_KIND_PIXEL_A:
398 result.w = getElementAt1(p, stride, iPixel.x, iPixel.y);
399 break;
400 case RS_KIND_PIXEL_LA:
401 result.zw = getElementAt2(p, stride, iPixel.x, iPixel.y);
402 result.xy = result.z;
403 break;
404 case RS_KIND_PIXEL_RGB:
405 if (dt == RS_TYPE_UNSIGNED_5_6_5) {
406 result.xyz = getElementAt565(p, stride, iPixel.x, iPixel.y);
407 } else {
408 result.xyz = getElementAt3(p, stride, iPixel.x, iPixel.y);
409 }
410 break;
411
412 default:
413 //__builtin_unreachable();
414 break;
415 }
416
417 return result * 0.003921569f;
418 }
419
420 static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,float uv,uint32_t lod)421 sample_LOD_LinearPixel(const Allocation_t *alloc,
422 rs_data_kind dk, rs_data_type dt,
423 rs_sampler_value wrapS,
424 float uv, uint32_t lod) {
425
426 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
427
428 int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
429 float pixelUV = uv * (float)(sourceW);
430 int32_t iPixel = floor(pixelUV);
431 float frac = pixelUV - (float)iPixel;
432
433 if (frac < 0.5f) {
434 iPixel -= 1;
435 frac += 0.5f;
436 } else {
437 frac -= 0.5f;
438 }
439
440 float oneMinusFrac = 1.0f - frac;
441
442 float2 weights;
443 weights.x = oneMinusFrac;
444 weights.y = frac;
445
446 uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
447 uint32_t location = wrapI(wrapS, iPixel, sourceW);
448
449 return getBilinearSample1D(alloc, weights, location, next, dk, dt, lod);
450 }
451
452 static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,float uv,uint32_t lod)453 sample_LOD_NearestPixel(const Allocation_t *alloc,
454 rs_data_kind dk, rs_data_type dt,
455 rs_sampler_value wrapS,
456 float uv, uint32_t lod) {
457
458 int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
459 int32_t iPixel = floor(uv * (float)(sourceW));
460 uint32_t location = wrapI(wrapS, iPixel, sourceW);
461
462 return getNearestSample(alloc, location, dk, dt, lod);
463 }
464
465 static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,rs_sampler_value wrapT,float2 uv,uint32_t lod)466 sample_LOD_LinearPixel(const Allocation_t *alloc,
467 rs_data_kind dk, rs_data_type dt,
468 rs_sampler_value wrapS,
469 rs_sampler_value wrapT,
470 float2 uv, uint32_t lod) {
471
472 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
473
474 int sourceW = alloc->mHal.drvState.lod[lod].dimX;
475 int sourceH = alloc->mHal.drvState.lod[lod].dimY;
476
477 float pixelU = uv.x * sourceW;
478 float pixelV = uv.y * sourceH;
479 int iPixelU = floor(pixelU);
480 int iPixelV = floor(pixelV);
481 float fracU = pixelU - iPixelU;
482 float fracV = pixelV - iPixelV;
483
484 if (fracU < 0.5f) {
485 iPixelU -= 1;
486 fracU += 0.5f;
487 } else {
488 fracU -= 0.5f;
489 }
490 if (fracV < 0.5f) {
491 iPixelV -= 1;
492 fracV += 0.5f;
493 } else {
494 fracV -= 0.5f;
495 }
496 float oneMinusFracU = 1.0f - fracU;
497 float oneMinusFracV = 1.0f - fracV;
498
499 float w0 = oneMinusFracU * oneMinusFracV;
500 float w1 = fracU * oneMinusFracV;
501 float w2 = oneMinusFracU * fracV;
502 float w3 = fracU * fracV;
503
504 int nx = wrapI(wrapS, iPixelU + 1, sourceW);
505 int ny = wrapI(wrapT, iPixelV + 1, sourceH);
506 int lx = wrapI(wrapS, iPixelU, sourceW);
507 int ly = wrapI(wrapT, iPixelV, sourceH);
508
509 return getBilinearSample2D(alloc, w0, w1, w2, w3, lx, ly, nx, ny, dk, dt, lod);
510
511 }
512
513 static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,rs_sampler_value wrapT,float2 uv,uint32_t lod)514 sample_LOD_NearestPixel(const Allocation_t *alloc,
515 rs_data_kind dk, rs_data_type dt,
516 rs_sampler_value wrapS,
517 rs_sampler_value wrapT,
518 float2 uv, uint32_t lod) {
519 int sourceW = alloc->mHal.drvState.lod[lod].dimX;
520 int sourceH = alloc->mHal.drvState.lod[lod].dimY;
521
522 float2 dimF;
523 dimF.x = (float)(sourceW);
524 dimF.y = (float)(sourceH);
525 int2 iPixel = convert_int2(floor(uv * dimF));
526
527 uint2 location;
528 location.x = wrapI(wrapS, iPixel.x, sourceW);
529 location.y = wrapI(wrapT, iPixel.y, sourceH);
530 return getNearestSample(alloc, location, dk, dt, lod);
531 }
532
533 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float uv,float lod)534 rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
535
536 const Allocation_t *alloc = (const Allocation_t *)a.p;
537 const Sampler_t *prog = (Sampler_t *)s.p;
538 const Type_t *type = (Type_t *)alloc->mHal.state.type;
539 const Element_t *elem = type->mHal.state.element;
540 rs_data_kind dk = elem->mHal.state.dataKind;
541 rs_data_type dt = elem->mHal.state.dataType;
542 rs_sampler_value sampleMin = prog->mHal.state.minFilter;
543 rs_sampler_value sampleMag = prog->mHal.state.magFilter;
544 rs_sampler_value wrapS = prog->mHal.state.wrapS;
545
546 if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
547 return 0.f;
548 }
549
550 if (lod <= 0.0f) {
551 if (sampleMag == RS_SAMPLER_NEAREST) {
552 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
553 }
554 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, 0);
555 }
556
557 if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
558 uint32_t maxLOD = type->mHal.state.lodCount - 1;
559 lod = min(lod, (float)maxLOD);
560 uint32_t nearestLOD = (uint32_t)round(lod);
561 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, nearestLOD);
562 }
563
564 if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
565 uint32_t lod0 = (uint32_t)floor(lod);
566 uint32_t lod1 = (uint32_t)ceil(lod);
567 uint32_t maxLOD = type->mHal.state.lodCount - 1;
568 lod0 = min(lod0, maxLOD);
569 lod1 = min(lod1, maxLOD);
570 float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod0);
571 float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod1);
572 float frac = lod - (float)lod0;
573 return sample0 * (1.0f - frac) + sample1 * frac;
574 }
575
576 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
577 }
578
579 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float location)580 rsSample(rs_allocation a, rs_sampler s, float location) {
581 return rsSample(a, s, location, 0);
582 }
583
584
585 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float2 uv,float lod)586 rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
587
588 const Allocation_t *alloc = (const Allocation_t *)a.p;
589 const Sampler_t *prog = (Sampler_t *)s.p;
590 const Type_t *type = (Type_t *)alloc->mHal.state.type;
591 const Element_t *elem = type->mHal.state.element;
592 rs_data_kind dk = elem->mHal.state.dataKind;
593 rs_data_type dt = elem->mHal.state.dataType;
594 rs_sampler_value sampleMin = prog->mHal.state.minFilter;
595 rs_sampler_value sampleMag = prog->mHal.state.magFilter;
596 rs_sampler_value wrapS = prog->mHal.state.wrapS;
597 rs_sampler_value wrapT = prog->mHal.state.wrapT;
598
599 if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
600 return 0.f;
601 }
602
603 if (lod <= 0.0f) {
604 if (sampleMag == RS_SAMPLER_NEAREST) {
605 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
606 }
607 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
608 }
609
610 if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
611 uint32_t maxLOD = type->mHal.state.lodCount - 1;
612 lod = min(lod, (float)maxLOD);
613 uint32_t nearestLOD = (uint32_t)round(lod);
614 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, nearestLOD);
615 }
616
617 if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
618 uint32_t lod0 = (uint32_t)floor(lod);
619 uint32_t lod1 = (uint32_t)ceil(lod);
620 uint32_t maxLOD = type->mHal.state.lodCount - 1;
621 lod0 = min(lod0, maxLOD);
622 lod1 = min(lod1, maxLOD);
623 float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod0);
624 float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod1);
625 float frac = lod - (float)lod0;
626 return sample0 * (1.0f - frac) + sample1 * frac;
627 }
628
629 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
630 }
631
632 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float2 uv)633 rsSample(rs_allocation a, rs_sampler s, float2 uv) {
634
635 const Allocation_t *alloc = (const Allocation_t *)a.p;
636 const Sampler_t *prog = (Sampler_t *)s.p;
637 const Type_t *type = (Type_t *)alloc->mHal.state.type;
638 const Element_t *elem = type->mHal.state.element;
639 rs_data_kind dk = elem->mHal.state.dataKind;
640 rs_data_type dt = elem->mHal.state.dataType;
641 rs_sampler_value wrapS = prog->mHal.state.wrapS;
642 rs_sampler_value wrapT = prog->mHal.state.wrapT;
643
644 if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
645 return 0.f;
646 }
647
648 if (prog->mHal.state.magFilter == RS_SAMPLER_NEAREST) {
649 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
650 }
651 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
652 }
653