1 #include "rs_core.rsh"
2 #include "rs_structs.h"
3 
4 
5 // 565 Conversion bits taken from SkBitmap
6 #define SK_R16_BITS     5
7 #define SK_G16_BITS     6
8 #define SK_B16_BITS     5
9 
10 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
11 #define SK_G16_SHIFT    (SK_B16_BITS)
12 #define SK_B16_SHIFT    0
13 
14 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
15 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
16 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
17 
18 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
19 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
20 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
21 
SkR16ToR32(unsigned r)22 static inline unsigned SkR16ToR32(unsigned r) {
23     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
24 }
25 
SkG16ToG32(unsigned g)26 static inline unsigned SkG16ToG32(unsigned g) {
27     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
28 }
29 
SkB16ToB32(unsigned b)30 static inline unsigned SkB16ToB32(unsigned b) {
31     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
32 }
33 
34 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
35 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
36 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
37 
getFrom565(uint16_t color)38 static float3 getFrom565(uint16_t color) {
39     float3 result;
40     result.x = (float)SkPacked16ToR32(color);
41     result.y = (float)SkPacked16ToG32(color);
42     result.z = (float)SkPacked16ToB32(color);
43     return result;
44 }
45 
46 /**
47 * Allocation sampling
48 */
49 static inline float __attribute__((overloadable))
getElementAt1(const uint8_t * p,int32_t x)50         getElementAt1(const uint8_t *p, int32_t x) {
51     float r = p[x];
52     return r;
53 }
54 
55 static inline float2 __attribute__((overloadable))
getElementAt2(const uint8_t * p,int32_t x)56         getElementAt2(const uint8_t *p, int32_t x) {
57     x *= 2;
58     float2 r = {p[x], p[x+1]};
59     return r;
60 }
61 
62 static inline float3 __attribute__((overloadable))
getElementAt3(const uint8_t * p,int32_t x)63         getElementAt3(const uint8_t *p, int32_t x) {
64     x *= 4;
65     float3 r = {p[x], p[x+1], p[x+2]};
66     return r;
67 }
68 
69 static inline float4 __attribute__((overloadable))
getElementAt4(const uint8_t * p,int32_t x)70         getElementAt4(const uint8_t *p, int32_t x) {
71     x *= 4;
72     const uchar4 *p2 = (const uchar4 *)&p[x];
73     return convert_float4(p2[0]);
74 }
75 
76 static inline float3 __attribute__((overloadable))
getElementAt565(const uint8_t * p,int32_t x)77         getElementAt565(const uint8_t *p, int32_t x) {
78     x *= 2;
79     float3 r = getFrom565(((const uint16_t *)p)[0]);
80     return r;
81 }
82 
83 static inline float __attribute__((overloadable))
getElementAt1(const uint8_t * p,size_t stride,int32_t x,int32_t y)84         getElementAt1(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
85     p += y * stride;
86     float r = p[x];
87     return r;
88 }
89 
90 static inline float2 __attribute__((overloadable))
getElementAt2(const uint8_t * p,size_t stride,int32_t x,int32_t y)91         getElementAt2(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
92     p += y * stride;
93     x *= 2;
94     float2 r = {p[x], p[x+1]};
95     return r;
96 }
97 
98 static inline float3 __attribute__((overloadable))
getElementAt3(const uint8_t * p,size_t stride,int32_t x,int32_t y)99         getElementAt3(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
100     p += y * stride;
101     x *= 4;
102     float3 r = {p[x], p[x+1], p[x+2]};
103     return r;
104 }
105 
106 static inline float4 __attribute__((overloadable))
getElementAt4(const uint8_t * p,size_t stride,int32_t x,int32_t y)107         getElementAt4(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
108     p += y * stride;
109     x *= 4;
110     float4 r = {p[x], p[x+1], p[x+2], p[x+3]};
111     return r;
112 }
113 
114 static inline float3 __attribute__((overloadable))
getElementAt565(const uint8_t * p,size_t stride,int32_t x,int32_t y)115         getElementAt565(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
116     p += y * stride;
117     x *= 2;
118     float3 r = getFrom565(((const uint16_t *)p)[0]);
119     return r;
120 }
121 
122 
123 
124 
125 
126 static float4 __attribute__((overloadable))
getSample_A(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)127             getSample_A(const uint8_t *p, int32_t iPixel,
128                           int32_t next, float w0, float w1) {
129     float p0 = getElementAt1(p, iPixel);
130     float p1 = getElementAt1(p, next);
131     float r = p0 * w0 + p1 * w1;
132     r *= (1.f / 255.f);
133     float4 ret = {0.f, 0.f, 0.f, r};
134     return ret;
135 }
136 static float4 __attribute__((overloadable))
getSample_L(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)137             getSample_L(const uint8_t *p, int32_t iPixel,
138                           int32_t next, float w0, float w1) {
139     float p0 = getElementAt1(p, iPixel);
140     float p1 = getElementAt1(p, next);
141     float r = p0 * w0 + p1 * w1;
142     r *= (1.f / 255.f);
143     float4 ret = {r, r, r, 1.f};
144     return ret;
145 }
146 static float4 __attribute__((overloadable))
getSample_LA(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)147             getSample_LA(const uint8_t *p, int32_t iPixel,
148                            int32_t next, float w0, float w1) {
149     float2 p0 = getElementAt2(p, iPixel);
150     float2 p1 = getElementAt2(p, next);
151     float2 r = p0 * w0 + p1 * w1;
152     r *= (1.f / 255.f);
153     float4 ret = {r.x, r.x, r.x, r.y};
154     return ret;
155 }
156 static float4 __attribute__((overloadable))
getSample_RGB(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)157             getSample_RGB(const uint8_t *p, int32_t iPixel,
158                             int32_t next, float w0, float w1) {
159     float3 p0 = getElementAt3(p, iPixel);
160     float3 p1 = getElementAt3(p, next);
161     float3 r = p0 * w0 + p1 * w1;
162     r *= (1.f / 255.f);
163     float4 ret = {r.x, r.x, r.z, 1.f};
164     return ret;
165 }
166 static float4 __attribute__((overloadable))
getSample_565(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)167             getSample_565(const uint8_t *p, int32_t iPixel,
168                            int32_t next, float w0, float w1) {
169     float3 p0 = getElementAt565(p, iPixel);
170     float3 p1 = getElementAt565(p, next);
171     float3 r = p0 * w0 + p1 * w1;
172     r *= (1.f / 255.f);
173     float4 ret = {r.x, r.x, r.z, 1.f};
174     return ret;
175 }
176 static float4 __attribute__((overloadable))
getSample_RGBA(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)177             getSample_RGBA(const uint8_t *p, int32_t iPixel,
178                              int32_t next, float w0, float w1) {
179     float4 p0 = getElementAt4(p, iPixel);
180     float4 p1 = getElementAt4(p, next);
181     float4 r = p0 * w0 + p1 * w1;
182     r *= (1.f / 255.f);
183     return r;
184 }
185 
186 
187 static float4 __attribute__((overloadable))
getSample_A(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)188             getSample_A(const uint8_t *p, size_t stride,
189                           int locX, int locY, int nextX, int nextY,
190                           float w0, float w1, float w2, float w3) {
191     float p0 = getElementAt1(p, stride, locX, locY);
192     float p1 = getElementAt1(p, stride, nextX, locY);
193     float p2 = getElementAt1(p, stride, locX, nextY);
194     float p3 = getElementAt1(p, stride, nextX, nextY);
195     float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
196     r *= (1.f / 255.f);
197     float4 ret = {0.f, 0.f, 0.f, r};
198     return ret;
199 }
200 static float4 __attribute__((overloadable))
getSample_L(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)201             getSample_L(const uint8_t *p, size_t stride,
202                          int locX, int locY, int nextX, int nextY,
203                          float w0, float w1, float w2, float w3) {
204     float p0 = getElementAt1(p, stride, locX, locY);
205     float p1 = getElementAt1(p, stride, nextX, locY);
206     float p2 = getElementAt1(p, stride, locX, nextY);
207     float p3 = getElementAt1(p, stride, nextX, nextY);
208     float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
209     r *= (1.f / 255.f);
210     float4 ret = {r, r, r, 1.f};
211     return ret;
212 }
213 static float4 __attribute__((overloadable))
getSample_LA(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)214             getSample_LA(const uint8_t *p, size_t stride,
215                          int locX, int locY, int nextX, int nextY,
216                          float w0, float w1, float w2, float w3) {
217     float2 p0 = getElementAt2(p, stride, locX, locY);
218     float2 p1 = getElementAt2(p, stride, nextX, locY);
219     float2 p2 = getElementAt2(p, stride, locX, nextY);
220     float2 p3 = getElementAt2(p, stride, nextX, nextY);
221     float2 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
222     r *= (1.f / 255.f);
223     float4 ret = {r.x, r.x, r.x, r.y};
224     return ret;
225 }
226 static float4 __attribute__((overloadable))
getSample_RGB(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)227             getSample_RGB(const uint8_t *p, size_t stride,
228                          int locX, int locY, int nextX, int nextY,
229                          float w0, float w1, float w2, float w3) {
230     float4 p0 = getElementAt4(p, stride, locX, locY);
231     float4 p1 = getElementAt4(p, stride, nextX, locY);
232     float4 p2 = getElementAt4(p, stride, locX, nextY);
233     float4 p3 = getElementAt4(p, stride, nextX, nextY);
234     float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
235     r *= (1.f / 255.f);
236     float4 ret = {r.x, r.y, r.z, 1.f};
237     return ret;
238 }
239 static float4 __attribute__((overloadable))
getSample_RGBA(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)240             getSample_RGBA(const uint8_t *p, size_t stride,
241                          int locX, int locY, int nextX, int nextY,
242                          float w0, float w1, float w2, float w3) {
243     float4 p0 = getElementAt4(p, stride, locX, locY);
244     float4 p1 = getElementAt4(p, stride, nextX, locY);
245     float4 p2 = getElementAt4(p, stride, locX, nextY);
246     float4 p3 = getElementAt4(p, stride, nextX, nextY);
247     float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
248     r *= (1.f / 255.f);
249     return r;
250 }
251 static float4 __attribute__((overloadable))
getSample_565(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)252             getSample_565(const uint8_t *p, size_t stride,
253                          int locX, int locY, int nextX, int nextY,
254                          float w0, float w1, float w2, float w3) {
255     float3 p0 = getElementAt565(p, stride, locX, locY);
256     float3 p1 = getElementAt565(p, stride, nextX, locY);
257     float3 p2 = getElementAt565(p, stride, locX, nextY);
258     float3 p3 = getElementAt565(p, stride, nextX, nextY);
259     float3 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
260     r *= (1.f / 255.f);
261     float4 ret;
262     ret.rgb = r;
263     ret.w = 1.f;
264     return ret;
265 }
266 
267 static float4 __attribute__((overloadable))
getBilinearSample1D(const Allocation_t * alloc,float2 weights,uint32_t iPixel,uint32_t next,rs_data_kind dk,rs_data_type dt,uint32_t lod)268         getBilinearSample1D(const Allocation_t *alloc, float2 weights,
269                           uint32_t iPixel, uint32_t next,
270                           rs_data_kind dk, rs_data_type dt, uint32_t lod) {
271 
272      const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
273 
274      switch(dk) {
275      case RS_KIND_PIXEL_RGBA:
276          return getSample_RGBA(p, iPixel, next, weights.x, weights.y);
277      case RS_KIND_PIXEL_A:
278          return getSample_A(p, iPixel, next, weights.x, weights.y);
279      case RS_KIND_PIXEL_RGB:
280          if (dt == RS_TYPE_UNSIGNED_5_6_5) {
281              return getSample_565(p, iPixel, next, weights.x, weights.y);
282          }
283          return getSample_RGB(p, iPixel, next, weights.x, weights.y);
284      case RS_KIND_PIXEL_L:
285          return getSample_L(p, iPixel, next, weights.x, weights.y);
286      case RS_KIND_PIXEL_LA:
287          return getSample_LA(p, iPixel, next, weights.x, weights.y);
288 
289      default:
290          //__builtin_unreachable();
291          break;
292      }
293 
294      //__builtin_unreachable();
295      return 0.f;
296 }
297 
wrapI(rs_sampler_value wrap,int32_t coord,int32_t size)298 static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
299     if (wrap == RS_SAMPLER_WRAP) {
300         coord = coord % size;
301         if (coord < 0) {
302             coord += size;
303         }
304     }
305     if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
306         coord = coord % (size * 2);
307         if (coord < 0) {
308             coord = (size * 2) + coord;
309         }
310         if (coord >= size) {
311             coord = (size * 2 - 1) - coord;
312         }
313     }
314     return (uint32_t)max(0, min(coord, size - 1));
315 }
316 
317 static float4 __attribute__((overloadable))
getBilinearSample2D(const Allocation_t * alloc,float w0,float w1,float w2,float w3,int lx,int ly,int nx,int ny,rs_data_kind dk,rs_data_type dt,uint32_t lod)318         getBilinearSample2D(const Allocation_t *alloc, float w0, float w1, float w2, float w3,
319                           int lx, int ly, int nx, int ny,
320                           rs_data_kind dk, rs_data_type dt, uint32_t lod) {
321 
322     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
323     size_t stride = alloc->mHal.drvState.lod[lod].stride;
324 
325     switch(dk) {
326     case RS_KIND_PIXEL_RGBA:
327         return getSample_RGBA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
328     case RS_KIND_PIXEL_A:
329         return getSample_A(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
330     case RS_KIND_PIXEL_LA:
331         return getSample_LA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
332     case RS_KIND_PIXEL_RGB:
333         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
334             return getSample_565(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
335         }
336         return getSample_RGB(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
337     case RS_KIND_PIXEL_L:
338         return getSample_L(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
339 
340     default:
341         break;
342     }
343 
344     return 0.f;
345 }
346 
347 static float4  __attribute__((overloadable))
getNearestSample(const Allocation_t * alloc,uint32_t iPixel,rs_data_kind dk,rs_data_type dt,uint32_t lod)348         getNearestSample(const Allocation_t *alloc, uint32_t iPixel, rs_data_kind dk,
349                          rs_data_type dt, uint32_t lod) {
350 
351     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
352 
353     float4 result = {0.f, 0.f, 0.f, 255.f};
354 
355     switch(dk) {
356     case RS_KIND_PIXEL_RGBA:
357         result = getElementAt4(p, iPixel);
358         break;
359     case RS_KIND_PIXEL_A:
360         result.w = getElementAt1(p, iPixel);
361         break;
362     case RS_KIND_PIXEL_LA:
363         result.zw = getElementAt2(p, iPixel);
364         result.xy = result.z;
365         break;
366     case RS_KIND_PIXEL_RGB:
367         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
368             result.xyz = getElementAt565(p, iPixel);
369         } else {
370             result.xyz = getElementAt3(p, iPixel);
371         }
372         break;
373     case RS_KIND_PIXEL_L:
374         result.xyz = getElementAt1(p, iPixel);
375 
376     default:
377         //__builtin_unreachable();
378         break;
379     }
380 
381     return result * 0.003921569f;
382 }
383 
384 static float4  __attribute__((overloadable))
getNearestSample(const Allocation_t * alloc,uint2 iPixel,rs_data_kind dk,rs_data_type dt,uint32_t lod)385         getNearestSample(const Allocation_t *alloc, uint2 iPixel, rs_data_kind dk,
386                          rs_data_type dt, uint32_t lod) {
387 
388     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
389     size_t stride = alloc->mHal.drvState.lod[lod].stride;
390 
391     float4 result = {0.f, 0.f, 0.f, 255.f};
392 
393     switch(dk) {
394     case RS_KIND_PIXEL_RGBA:
395         result = getElementAt4(p, stride, iPixel.x, iPixel.y);
396         break;
397     case RS_KIND_PIXEL_A:
398         result.w = getElementAt1(p, stride, iPixel.x, iPixel.y);
399         break;
400     case RS_KIND_PIXEL_LA:
401         result.zw = getElementAt2(p, stride, iPixel.x, iPixel.y);
402         result.xy = result.z;
403         break;
404     case RS_KIND_PIXEL_RGB:
405         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
406             result.xyz = getElementAt565(p, stride, iPixel.x, iPixel.y);
407         } else {
408             result.xyz = getElementAt3(p, stride, iPixel.x, iPixel.y);
409         }
410         break;
411 
412     default:
413         //__builtin_unreachable();
414         break;
415     }
416 
417     return result * 0.003921569f;
418 }
419 
420 static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,float uv,uint32_t lod)421         sample_LOD_LinearPixel(const Allocation_t *alloc,
422                                rs_data_kind dk, rs_data_type dt,
423                                rs_sampler_value wrapS,
424                                float uv, uint32_t lod) {
425 
426     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
427 
428     int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
429     float pixelUV = uv * (float)(sourceW);
430     int32_t iPixel = floor(pixelUV);
431     float frac = pixelUV - (float)iPixel;
432 
433     if (frac < 0.5f) {
434         iPixel -= 1;
435         frac += 0.5f;
436     } else {
437         frac -= 0.5f;
438     }
439 
440     float oneMinusFrac = 1.0f - frac;
441 
442     float2 weights;
443     weights.x = oneMinusFrac;
444     weights.y = frac;
445 
446     uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
447     uint32_t location = wrapI(wrapS, iPixel, sourceW);
448 
449     return getBilinearSample1D(alloc, weights, location, next, dk, dt, lod);
450 }
451 
452 static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,float uv,uint32_t lod)453         sample_LOD_NearestPixel(const Allocation_t *alloc,
454                                 rs_data_kind dk, rs_data_type dt,
455                                 rs_sampler_value wrapS,
456                                 float uv, uint32_t lod) {
457 
458     int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
459     int32_t iPixel = floor(uv * (float)(sourceW));
460     uint32_t location = wrapI(wrapS, iPixel, sourceW);
461 
462     return getNearestSample(alloc, location, dk, dt, lod);
463 }
464 
465 static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,rs_sampler_value wrapT,float2 uv,uint32_t lod)466         sample_LOD_LinearPixel(const Allocation_t *alloc,
467                                rs_data_kind dk, rs_data_type dt,
468                                rs_sampler_value wrapS,
469                                rs_sampler_value wrapT,
470                                float2 uv, uint32_t lod) {
471 
472     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
473 
474     int sourceW = alloc->mHal.drvState.lod[lod].dimX;
475     int sourceH = alloc->mHal.drvState.lod[lod].dimY;
476 
477     float pixelU = uv.x * sourceW;
478     float pixelV = uv.y * sourceH;
479     int iPixelU = floor(pixelU);
480     int iPixelV = floor(pixelV);
481     float fracU = pixelU - iPixelU;
482     float fracV = pixelV - iPixelV;
483 
484     if (fracU < 0.5f) {
485         iPixelU -= 1;
486         fracU += 0.5f;
487     } else {
488         fracU -= 0.5f;
489     }
490     if (fracV < 0.5f) {
491         iPixelV -= 1;
492         fracV += 0.5f;
493     } else {
494         fracV -= 0.5f;
495     }
496     float oneMinusFracU = 1.0f - fracU;
497     float oneMinusFracV = 1.0f - fracV;
498 
499     float w0 = oneMinusFracU * oneMinusFracV;
500     float w1 = fracU * oneMinusFracV;
501     float w2 = oneMinusFracU * fracV;
502     float w3 = fracU * fracV;
503 
504     int nx = wrapI(wrapS, iPixelU + 1, sourceW);
505     int ny = wrapI(wrapT, iPixelV + 1, sourceH);
506     int lx = wrapI(wrapS, iPixelU, sourceW);
507     int ly = wrapI(wrapT, iPixelV, sourceH);
508 
509     return getBilinearSample2D(alloc, w0, w1, w2, w3, lx, ly, nx, ny, dk, dt, lod);
510 
511 }
512 
513 static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,rs_sampler_value wrapT,float2 uv,uint32_t lod)514         sample_LOD_NearestPixel(const Allocation_t *alloc,
515                                 rs_data_kind dk, rs_data_type dt,
516                                 rs_sampler_value wrapS,
517                                 rs_sampler_value wrapT,
518                                 float2 uv, uint32_t lod) {
519     int sourceW = alloc->mHal.drvState.lod[lod].dimX;
520     int sourceH = alloc->mHal.drvState.lod[lod].dimY;
521 
522     float2 dimF;
523     dimF.x = (float)(sourceW);
524     dimF.y = (float)(sourceH);
525     int2 iPixel = convert_int2(floor(uv * dimF));
526 
527     uint2 location;
528     location.x = wrapI(wrapS, iPixel.x, sourceW);
529     location.y = wrapI(wrapT, iPixel.y, sourceH);
530     return getNearestSample(alloc, location, dk, dt, lod);
531 }
532 
533 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float uv,float lod)534         rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
535 
536     const Allocation_t *alloc = (const Allocation_t *)a.p;
537     const Sampler_t *prog = (Sampler_t *)s.p;
538     const Type_t *type = (Type_t *)alloc->mHal.state.type;
539     const Element_t *elem = type->mHal.state.element;
540     rs_data_kind dk = elem->mHal.state.dataKind;
541     rs_data_type dt = elem->mHal.state.dataType;
542     rs_sampler_value sampleMin = prog->mHal.state.minFilter;
543     rs_sampler_value sampleMag = prog->mHal.state.magFilter;
544     rs_sampler_value wrapS = prog->mHal.state.wrapS;
545 
546     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
547         return 0.f;
548     }
549 
550     if (lod <= 0.0f) {
551         if (sampleMag == RS_SAMPLER_NEAREST) {
552             return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
553         }
554         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, 0);
555     }
556 
557     if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
558         uint32_t maxLOD = type->mHal.state.lodCount - 1;
559         lod = min(lod, (float)maxLOD);
560         uint32_t nearestLOD = (uint32_t)round(lod);
561         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, nearestLOD);
562     }
563 
564     if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
565         uint32_t lod0 = (uint32_t)floor(lod);
566         uint32_t lod1 = (uint32_t)ceil(lod);
567         uint32_t maxLOD = type->mHal.state.lodCount - 1;
568         lod0 = min(lod0, maxLOD);
569         lod1 = min(lod1, maxLOD);
570         float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod0);
571         float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod1);
572         float frac = lod - (float)lod0;
573         return sample0 * (1.0f - frac) + sample1 * frac;
574     }
575 
576     return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
577 }
578 
579 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float location)580         rsSample(rs_allocation a, rs_sampler s, float location) {
581     return rsSample(a, s, location, 0);
582 }
583 
584 
585 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float2 uv,float lod)586         rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
587 
588     const Allocation_t *alloc = (const Allocation_t *)a.p;
589     const Sampler_t *prog = (Sampler_t *)s.p;
590     const Type_t *type = (Type_t *)alloc->mHal.state.type;
591     const Element_t *elem = type->mHal.state.element;
592     rs_data_kind dk = elem->mHal.state.dataKind;
593     rs_data_type dt = elem->mHal.state.dataType;
594     rs_sampler_value sampleMin = prog->mHal.state.minFilter;
595     rs_sampler_value sampleMag = prog->mHal.state.magFilter;
596     rs_sampler_value wrapS = prog->mHal.state.wrapS;
597     rs_sampler_value wrapT = prog->mHal.state.wrapT;
598 
599     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
600         return 0.f;
601     }
602 
603     if (lod <= 0.0f) {
604         if (sampleMag == RS_SAMPLER_NEAREST) {
605             return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
606         }
607         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
608     }
609 
610     if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
611         uint32_t maxLOD = type->mHal.state.lodCount - 1;
612         lod = min(lod, (float)maxLOD);
613         uint32_t nearestLOD = (uint32_t)round(lod);
614         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, nearestLOD);
615     }
616 
617     if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
618         uint32_t lod0 = (uint32_t)floor(lod);
619         uint32_t lod1 = (uint32_t)ceil(lod);
620         uint32_t maxLOD = type->mHal.state.lodCount - 1;
621         lod0 = min(lod0, maxLOD);
622         lod1 = min(lod1, maxLOD);
623         float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod0);
624         float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod1);
625         float frac = lod - (float)lod0;
626         return sample0 * (1.0f - frac) + sample1 * frac;
627     }
628 
629     return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
630 }
631 
632 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float2 uv)633         rsSample(rs_allocation a, rs_sampler s, float2 uv) {
634 
635     const Allocation_t *alloc = (const Allocation_t *)a.p;
636     const Sampler_t *prog = (Sampler_t *)s.p;
637     const Type_t *type = (Type_t *)alloc->mHal.state.type;
638     const Element_t *elem = type->mHal.state.element;
639     rs_data_kind dk = elem->mHal.state.dataKind;
640     rs_data_type dt = elem->mHal.state.dataType;
641     rs_sampler_value wrapS = prog->mHal.state.wrapS;
642     rs_sampler_value wrapT = prog->mHal.state.wrapT;
643 
644     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
645         return 0.f;
646     }
647 
648     if (prog->mHal.state.magFilter == RS_SAMPLER_NEAREST) {
649         return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
650     }
651     return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
652 }
653