1 #include "rs_core.rsh"
2 #include "rs_structs.h"
3 
4 
5 // 565 Conversion bits taken from SkBitmap
6 #define SK_R16_BITS     5
7 #define SK_G16_BITS     6
8 #define SK_B16_BITS     5
9 
10 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
11 #define SK_G16_SHIFT    (SK_B16_BITS)
12 #define SK_B16_SHIFT    0
13 
14 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
15 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
16 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
17 
18 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
19 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
20 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
21 
SkR16ToR32(unsigned r)22 static inline unsigned SkR16ToR32(unsigned r) {
23     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
24 }
25 
SkG16ToG32(unsigned g)26 static inline unsigned SkG16ToG32(unsigned g) {
27     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
28 }
29 
SkB16ToB32(unsigned b)30 static inline unsigned SkB16ToB32(unsigned b) {
31     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
32 }
33 
34 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
35 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
36 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
37 
getFrom565(uint16_t color)38 static float3 getFrom565(uint16_t color) {
39     float3 result;
40     result.x = (float)SkPacked16ToR32(color);
41     result.y = (float)SkPacked16ToG32(color);
42     result.z = (float)SkPacked16ToB32(color);
43     return result;
44 }
45 
46 /**
47 * Allocation sampling
48 */
49 static inline float __attribute__((overloadable))
getElementAt1(const uint8_t * p,int32_t x)50         getElementAt1(const uint8_t *p, int32_t x) {
51     float r = p[x];
52     return r;
53 }
54 
55 static inline float2 __attribute__((overloadable))
getElementAt2(const uint8_t * p,int32_t x)56         getElementAt2(const uint8_t *p, int32_t x) {
57     x *= 2;
58     float2 r = {p[x], p[x+1]};
59     return r;
60 }
61 
62 static inline float3 __attribute__((overloadable))
getElementAt3(const uint8_t * p,int32_t x)63         getElementAt3(const uint8_t *p, int32_t x) {
64     x *= 4;
65     float3 r = {p[x], p[x+1], p[x+2]};
66     return r;
67 }
68 
69 static inline float4 __attribute__((overloadable))
getElementAt4(const uint8_t * p,int32_t x)70         getElementAt4(const uint8_t *p, int32_t x) {
71     x *= 4;
72     const uchar4 *p2 = (const uchar4 *)&p[x];
73     return convert_float4(p2[0]);
74 }
75 
76 static inline float3 __attribute__((overloadable))
getElementAt565(const uint8_t * p,int32_t x)77         getElementAt565(const uint8_t *p, int32_t x) {
78     x *= 2;
79     float3 r = getFrom565(((const uint16_t *)p)[0]);
80     return r;
81 }
82 
83 static inline float __attribute__((overloadable))
getElementAt1(const uint8_t * p,size_t stride,int32_t x,int32_t y)84         getElementAt1(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
85     p += y * stride;
86     float r = p[x];
87     return r;
88 }
89 
90 static inline float2 __attribute__((overloadable))
getElementAt2(const uint8_t * p,size_t stride,int32_t x,int32_t y)91         getElementAt2(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
92     p += y * stride;
93     x *= 2;
94     float2 r = {p[x], p[x+1]};
95     return r;
96 }
97 
98 static inline float3 __attribute__((overloadable))
getElementAt3(const uint8_t * p,size_t stride,int32_t x,int32_t y)99         getElementAt3(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
100     p += y * stride;
101     x *= 4;
102     float3 r = {p[x], p[x+1], p[x+2]};
103     return r;
104 }
105 
106 static inline float4 __attribute__((overloadable))
getElementAt4(const uint8_t * p,size_t stride,int32_t x,int32_t y)107         getElementAt4(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
108     p += y * stride;
109     x *= 4;
110     float4 r = {p[x], p[x+1], p[x+2], p[x+3]};
111     return r;
112 }
113 
114 static inline float3 __attribute__((overloadable))
getElementAt565(const uint8_t * p,size_t stride,int32_t x,int32_t y)115         getElementAt565(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
116     p += y * stride;
117     x *= 2;
118     float3 r = getFrom565(((const uint16_t *)p)[0]);
119     return r;
120 }
121 
122 
123 
124 
125 
126 static float4 __attribute__((overloadable))
getSample_A(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)127             getSample_A(const uint8_t *p, int32_t iPixel,
128                           int32_t next, float w0, float w1) {
129     float p0 = getElementAt1(p, iPixel);
130     float p1 = getElementAt1(p, next);
131     float r = p0 * w0 + p1 * w1;
132     r *= (1.f / 255.f);
133     float4 ret = {0.f, 0.f, 0.f, r};
134     return ret;
135 }
136 static float4 __attribute__((overloadable))
getSample_L(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)137             getSample_L(const uint8_t *p, int32_t iPixel,
138                           int32_t next, float w0, float w1) {
139     float p0 = getElementAt1(p, iPixel);
140     float p1 = getElementAt1(p, next);
141     float r = p0 * w0 + p1 * w1;
142     r *= (1.f / 255.f);
143     float4 ret = {r, r, r, 1.f};
144     return ret;
145 }
146 static float4 __attribute__((overloadable))
getSample_LA(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)147             getSample_LA(const uint8_t *p, int32_t iPixel,
148                            int32_t next, float w0, float w1) {
149     float2 p0 = getElementAt2(p, iPixel);
150     float2 p1 = getElementAt2(p, next);
151     float2 r = p0 * w0 + p1 * w1;
152     r *= (1.f / 255.f);
153     float4 ret = {r.x, r.x, r.x, r.y};
154     return ret;
155 }
156 static float4 __attribute__((overloadable))
getSample_RGB(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)157             getSample_RGB(const uint8_t *p, int32_t iPixel,
158                             int32_t next, float w0, float w1) {
159     float3 p0 = getElementAt3(p, iPixel);
160     float3 p1 = getElementAt3(p, next);
161     float3 r = p0 * w0 + p1 * w1;
162     r *= (1.f / 255.f);
163     float4 ret = {r.x, r.x, r.z, 1.f};
164     return ret;
165 }
166 static float4 __attribute__((overloadable))
getSample_565(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)167             getSample_565(const uint8_t *p, int32_t iPixel,
168                            int32_t next, float w0, float w1) {
169     float3 p0 = getElementAt565(p, iPixel);
170     float3 p1 = getElementAt565(p, next);
171     float3 r = p0 * w0 + p1 * w1;
172     r *= (1.f / 255.f);
173     float4 ret = {r.x, r.x, r.z, 1.f};
174     return ret;
175 }
176 static float4 __attribute__((overloadable))
getSample_RGBA(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)177             getSample_RGBA(const uint8_t *p, int32_t iPixel,
178                              int32_t next, float w0, float w1) {
179     float4 p0 = getElementAt4(p, iPixel);
180     float4 p1 = getElementAt4(p, next);
181     float4 r = p0 * w0 + p1 * w1;
182     r *= (1.f / 255.f);
183     return r;
184 }
185 
186 
187 static float4 __attribute__((overloadable))
getSample_A(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)188             getSample_A(const uint8_t *p, size_t stride,
189                           int locX, int locY, int nextX, int nextY,
190                           float w0, float w1, float w2, float w3) {
191     float p0 = getElementAt1(p, stride, locX, locY);
192     float p1 = getElementAt1(p, stride, nextX, locY);
193     float p2 = getElementAt1(p, stride, locX, nextY);
194     float p3 = getElementAt1(p, stride, nextX, nextY);
195     float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
196     r *= (1.f / 255.f);
197     float4 ret = {0.f, 0.f, 0.f, r};
198     return ret;
199 }
200 static float4 __attribute__((overloadable))
getSample_L(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)201             getSample_L(const uint8_t *p, size_t stride,
202                          int locX, int locY, int nextX, int nextY,
203                          float w0, float w1, float w2, float w3) {
204     float p0 = getElementAt1(p, stride, locX, locY);
205     float p1 = getElementAt1(p, stride, nextX, locY);
206     float p2 = getElementAt1(p, stride, locX, nextY);
207     float p3 = getElementAt1(p, stride, nextX, nextY);
208     float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
209     r *= (1.f / 255.f);
210     float4 ret = {r, r, r, 1.f};
211     return ret;
212 }
213 static float4 __attribute__((overloadable))
getSample_LA(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)214             getSample_LA(const uint8_t *p, size_t stride,
215                          int locX, int locY, int nextX, int nextY,
216                          float w0, float w1, float w2, float w3) {
217     float2 p0 = getElementAt2(p, stride, locX, locY);
218     float2 p1 = getElementAt2(p, stride, nextX, locY);
219     float2 p2 = getElementAt2(p, stride, locX, nextY);
220     float2 p3 = getElementAt2(p, stride, nextX, nextY);
221     float2 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
222     r *= (1.f / 255.f);
223     float4 ret = {r.x, r.x, r.x, r.y};
224     return ret;
225 }
226 static float4 __attribute__((overloadable))
getSample_RGB(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)227             getSample_RGB(const uint8_t *p, size_t stride,
228                          int locX, int locY, int nextX, int nextY,
229                          float w0, float w1, float w2, float w3) {
230     float4 p0 = getElementAt4(p, stride, locX, locY);
231     float4 p1 = getElementAt4(p, stride, nextX, locY);
232     float4 p2 = getElementAt4(p, stride, locX, nextY);
233     float4 p3 = getElementAt4(p, stride, nextX, nextY);
234     float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
235     r *= (1.f / 255.f);
236     float4 ret = {r.x, r.y, r.z, 1.f};
237     return ret;
238 }
239 static float4 __attribute__((overloadable))
getSample_RGBA(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)240             getSample_RGBA(const uint8_t *p, size_t stride,
241                          int locX, int locY, int nextX, int nextY,
242                          float w0, float w1, float w2, float w3) {
243     float4 p0 = getElementAt4(p, stride, locX, locY);
244     float4 p1 = getElementAt4(p, stride, nextX, locY);
245     float4 p2 = getElementAt4(p, stride, locX, nextY);
246     float4 p3 = getElementAt4(p, stride, nextX, nextY);
247     float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
248     r *= (1.f / 255.f);
249     return r;
250 }
251 static float4 __attribute__((overloadable))
getSample_565(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)252             getSample_565(const uint8_t *p, size_t stride,
253                          int locX, int locY, int nextX, int nextY,
254                          float w0, float w1, float w2, float w3) {
255     float3 p0 = getElementAt565(p, stride, locX, locY);
256     float3 p1 = getElementAt565(p, stride, nextX, locY);
257     float3 p2 = getElementAt565(p, stride, locX, nextY);
258     float3 p3 = getElementAt565(p, stride, nextX, nextY);
259     float3 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
260     r *= (1.f / 255.f);
261     float4 ret;
262     ret.rgb = r;
263     ret.w = 1.f;
264     return ret;
265 }
266 
267 static float4 __attribute__((overloadable))
getBilinearSample1D(const Allocation_t * alloc,float2 weights,uint32_t iPixel,uint32_t next,rs_data_kind dk,rs_data_type dt,uint32_t lod)268         getBilinearSample1D(const Allocation_t *alloc, float2 weights,
269                           uint32_t iPixel, uint32_t next,
270                           rs_data_kind dk, rs_data_type dt, uint32_t lod) {
271 
272      const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
273 
274      switch(dk) {
275      case RS_KIND_PIXEL_RGBA:
276          return getSample_RGBA(p, iPixel, next, weights.x, weights.y);
277      case RS_KIND_PIXEL_A:
278          return getSample_A(p, iPixel, next, weights.x, weights.y);
279      case RS_KIND_PIXEL_RGB:
280          if (dt == RS_TYPE_UNSIGNED_5_6_5) {
281              return getSample_565(p, iPixel, next, weights.x, weights.y);
282          }
283          return getSample_RGB(p, iPixel, next, weights.x, weights.y);
284      case RS_KIND_PIXEL_L:
285          return getSample_L(p, iPixel, next, weights.x, weights.y);
286      case RS_KIND_PIXEL_LA:
287          return getSample_LA(p, iPixel, next, weights.x, weights.y);
288 
289      default:
290          //__builtin_unreachable();
291          break;
292      }
293 
294      //__builtin_unreachable();
295      return 0.f;
296 }
297 
wrapI(rs_sampler_value wrap,int32_t coord,int32_t size)298 static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
299     if (wrap == RS_SAMPLER_WRAP) {
300         coord = coord % size;
301         if (coord < 0) {
302             coord += size;
303         }
304     }
305     if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
306         coord = coord % (size * 2);
307         if (coord < 0) {
308             coord = (size * 2) + coord;
309         }
310         if (coord >= size) {
311             coord = (size * 2 - 1) - coord;
312         }
313     }
314     return (uint32_t)max(0, min(coord, size - 1));
315 }
316 
317 static float4 __attribute__((overloadable))
getBilinearSample2D(const Allocation_t * alloc,float w0,float w1,float w2,float w3,int lx,int ly,int nx,int ny,rs_data_kind dk,rs_data_type dt,uint32_t lod)318         getBilinearSample2D(const Allocation_t *alloc, float w0, float w1, float w2, float w3,
319                           int lx, int ly, int nx, int ny,
320                           rs_data_kind dk, rs_data_type dt, uint32_t lod) {
321 
322     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
323     size_t stride = alloc->mHal.drvState.lod[lod].stride;
324 
325     switch(dk) {
326     case RS_KIND_PIXEL_RGBA:
327         return getSample_RGBA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
328     case RS_KIND_PIXEL_A:
329         return getSample_A(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
330     case RS_KIND_PIXEL_LA:
331         return getSample_LA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
332     case RS_KIND_PIXEL_RGB:
333         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
334             return getSample_565(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
335         }
336         return getSample_RGB(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
337     case RS_KIND_PIXEL_L:
338         return getSample_L(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
339 
340     default:
341         break;
342     }
343 
344     return 0.f;
345 }
346 
347 static float4  __attribute__((overloadable))
getNearestSample(const Allocation_t * alloc,uint32_t iPixel,rs_data_kind dk,rs_data_type dt,uint32_t lod)348         getNearestSample(const Allocation_t *alloc, uint32_t iPixel, rs_data_kind dk,
349                          rs_data_type dt, uint32_t lod) {
350 
351     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
352 
353     float4 result = {0.f, 0.f, 0.f, 255.f};
354 
355     switch(dk) {
356     case RS_KIND_PIXEL_RGBA:
357         result = getElementAt4(p, iPixel);
358         break;
359     case RS_KIND_PIXEL_A:
360         result.w = getElementAt1(p, iPixel);
361         break;
362     case RS_KIND_PIXEL_LA:
363         result.zw = getElementAt2(p, iPixel);
364         result.xy = result.z;
365         break;
366     case RS_KIND_PIXEL_RGB:
367         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
368             result.xyz = getElementAt565(p, iPixel);
369         } else {
370             result.xyz = getElementAt3(p, iPixel);
371         }
372         break;
373     case RS_KIND_PIXEL_L:
374         result.xyz = getElementAt1(p, iPixel);
375 
376     default:
377         //__builtin_unreachable();
378         break;
379     }
380 
381     return result * 0.003921569f;
382 }
383 
384 static float4  __attribute__((overloadable))
getNearestSample(const Allocation_t * alloc,uint2 iPixel,rs_data_kind dk,rs_data_type dt,uint32_t lod)385         getNearestSample(const Allocation_t *alloc, uint2 iPixel, rs_data_kind dk,
386                          rs_data_type dt, uint32_t lod) {
387 
388     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
389     size_t stride = alloc->mHal.drvState.lod[lod].stride;
390 
391     float4 result = {0.f, 0.f, 0.f, 255.f};
392 
393     switch(dk) {
394     case RS_KIND_PIXEL_RGBA:
395         result = getElementAt4(p, stride, iPixel.x, iPixel.y);
396         break;
397     case RS_KIND_PIXEL_A:
398         result.w = getElementAt1(p, stride, iPixel.x, iPixel.y);
399         break;
400     case RS_KIND_PIXEL_LA:
401         result.zw = getElementAt2(p, stride, iPixel.x, iPixel.y);
402         result.xy = result.z;
403         break;
404     case RS_KIND_PIXEL_RGB:
405         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
406             result.xyz = getElementAt565(p, stride, iPixel.x, iPixel.y);
407         } else {
408             result.xyz = getElementAt3(p, stride, iPixel.x, iPixel.y);
409         }
410         break;
411 
412     default:
413         //__builtin_unreachable();
414         break;
415     }
416 
417     return result * 0.003921569f;
418 }
419 
420 static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,float uv,uint32_t lod)421         sample_LOD_LinearPixel(const Allocation_t *alloc,
422                                rs_data_kind dk, rs_data_type dt,
423                                rs_sampler_value wrapS,
424                                float uv, uint32_t lod) {
425 
426     int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
427     float pixelUV = uv * (float)(sourceW);
428     int32_t iPixel = floor(pixelUV);
429     float frac = pixelUV - (float)iPixel;
430 
431     if (frac < 0.5f) {
432         iPixel -= 1;
433         frac += 0.5f;
434     } else {
435         frac -= 0.5f;
436     }
437 
438     float oneMinusFrac = 1.0f - frac;
439 
440     float2 weights;
441     weights.x = oneMinusFrac;
442     weights.y = frac;
443 
444     uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
445     uint32_t location = wrapI(wrapS, iPixel, sourceW);
446 
447     return getBilinearSample1D(alloc, weights, location, next, dk, dt, lod);
448 }
449 
450 static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,float uv,uint32_t lod)451         sample_LOD_NearestPixel(const Allocation_t *alloc,
452                                 rs_data_kind dk, rs_data_type dt,
453                                 rs_sampler_value wrapS,
454                                 float uv, uint32_t lod) {
455 
456     int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
457     int32_t iPixel = floor(uv * (float)(sourceW));
458     uint32_t location = wrapI(wrapS, iPixel, sourceW);
459 
460     return getNearestSample(alloc, location, dk, dt, lod);
461 }
462 
463 static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,rs_sampler_value wrapT,float2 uv,uint32_t lod)464         sample_LOD_LinearPixel(const Allocation_t *alloc,
465                                rs_data_kind dk, rs_data_type dt,
466                                rs_sampler_value wrapS,
467                                rs_sampler_value wrapT,
468                                float2 uv, uint32_t lod) {
469 
470     int sourceW = alloc->mHal.drvState.lod[lod].dimX;
471     int sourceH = alloc->mHal.drvState.lod[lod].dimY;
472 
473     float pixelU = uv.x * sourceW;
474     float pixelV = uv.y * sourceH;
475     int iPixelU = floor(pixelU);
476     int iPixelV = floor(pixelV);
477     float fracU = pixelU - iPixelU;
478     float fracV = pixelV - iPixelV;
479 
480     if (fracU < 0.5f) {
481         iPixelU -= 1;
482         fracU += 0.5f;
483     } else {
484         fracU -= 0.5f;
485     }
486     if (fracV < 0.5f) {
487         iPixelV -= 1;
488         fracV += 0.5f;
489     } else {
490         fracV -= 0.5f;
491     }
492     float oneMinusFracU = 1.0f - fracU;
493     float oneMinusFracV = 1.0f - fracV;
494 
495     float w0 = oneMinusFracU * oneMinusFracV;
496     float w1 = fracU * oneMinusFracV;
497     float w2 = oneMinusFracU * fracV;
498     float w3 = fracU * fracV;
499 
500     int nx = wrapI(wrapS, iPixelU + 1, sourceW);
501     int ny = wrapI(wrapT, iPixelV + 1, sourceH);
502     int lx = wrapI(wrapS, iPixelU, sourceW);
503     int ly = wrapI(wrapT, iPixelV, sourceH);
504 
505     return getBilinearSample2D(alloc, w0, w1, w2, w3, lx, ly, nx, ny, dk, dt, lod);
506 
507 }
508 
509 static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,rs_sampler_value wrapT,float2 uv,uint32_t lod)510         sample_LOD_NearestPixel(const Allocation_t *alloc,
511                                 rs_data_kind dk, rs_data_type dt,
512                                 rs_sampler_value wrapS,
513                                 rs_sampler_value wrapT,
514                                 float2 uv, uint32_t lod) {
515     int sourceW = alloc->mHal.drvState.lod[lod].dimX;
516     int sourceH = alloc->mHal.drvState.lod[lod].dimY;
517 
518     float2 dimF;
519     dimF.x = (float)(sourceW);
520     dimF.y = (float)(sourceH);
521     int2 iPixel = convert_int2(floor(uv * dimF));
522 
523     uint2 location;
524     location.x = wrapI(wrapS, iPixel.x, sourceW);
525     location.y = wrapI(wrapT, iPixel.y, sourceH);
526     return getNearestSample(alloc, location, dk, dt, lod);
527 }
528 
529 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float uv,float lod)530         rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
531 
532     const Allocation_t *alloc = (const Allocation_t *)a.p;
533     const Sampler_t *prog = (Sampler_t *)s.p;
534     const Type_t *type = (Type_t *)alloc->mHal.state.type;
535     const Element_t *elem = type->mHal.state.element;
536     rs_data_kind dk = elem->mHal.state.dataKind;
537     rs_data_type dt = elem->mHal.state.dataType;
538     rs_sampler_value sampleMin = prog->mHal.state.minFilter;
539     rs_sampler_value sampleMag = prog->mHal.state.magFilter;
540     rs_sampler_value wrapS = prog->mHal.state.wrapS;
541 
542     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
543         return 0.f;
544     }
545 
546     if (lod <= 0.0f) {
547         if (sampleMag == RS_SAMPLER_NEAREST) {
548             return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
549         }
550         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, 0);
551     }
552 
553     if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
554         uint32_t maxLOD = type->mHal.state.lodCount - 1;
555         lod = min(lod, (float)maxLOD);
556         uint32_t nearestLOD = (uint32_t)round(lod);
557         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, nearestLOD);
558     }
559 
560     if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
561         uint32_t lod0 = (uint32_t)floor(lod);
562         uint32_t lod1 = (uint32_t)ceil(lod);
563         uint32_t maxLOD = type->mHal.state.lodCount - 1;
564         lod0 = min(lod0, maxLOD);
565         lod1 = min(lod1, maxLOD);
566         float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod0);
567         float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod1);
568         float frac = lod - (float)lod0;
569         return sample0 * (1.0f - frac) + sample1 * frac;
570     }
571 
572     return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
573 }
574 
575 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float location)576         rsSample(rs_allocation a, rs_sampler s, float location) {
577     return rsSample(a, s, location, 0);
578 }
579 
580 
581 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float2 uv,float lod)582         rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
583 
584     const Allocation_t *alloc = (const Allocation_t *)a.p;
585     const Sampler_t *prog = (Sampler_t *)s.p;
586     const Type_t *type = (Type_t *)alloc->mHal.state.type;
587     const Element_t *elem = type->mHal.state.element;
588     rs_data_kind dk = elem->mHal.state.dataKind;
589     rs_data_type dt = elem->mHal.state.dataType;
590     rs_sampler_value sampleMin = prog->mHal.state.minFilter;
591     rs_sampler_value sampleMag = prog->mHal.state.magFilter;
592     rs_sampler_value wrapS = prog->mHal.state.wrapS;
593     rs_sampler_value wrapT = prog->mHal.state.wrapT;
594 
595     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
596         return 0.f;
597     }
598 
599     if (lod <= 0.0f) {
600         if (sampleMag == RS_SAMPLER_NEAREST) {
601             return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
602         }
603         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
604     }
605 
606     if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
607         uint32_t maxLOD = type->mHal.state.lodCount - 1;
608         lod = min(lod, (float)maxLOD);
609         uint32_t nearestLOD = (uint32_t)round(lod);
610         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, nearestLOD);
611     }
612 
613     if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
614         uint32_t lod0 = (uint32_t)floor(lod);
615         uint32_t lod1 = (uint32_t)ceil(lod);
616         uint32_t maxLOD = type->mHal.state.lodCount - 1;
617         lod0 = min(lod0, maxLOD);
618         lod1 = min(lod1, maxLOD);
619         float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod0);
620         float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod1);
621         float frac = lod - (float)lod0;
622         return sample0 * (1.0f - frac) + sample1 * frac;
623     }
624 
625     return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
626 }
627 
628 extern float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float2 uv)629         rsSample(rs_allocation a, rs_sampler s, float2 uv) {
630 
631     const Allocation_t *alloc = (const Allocation_t *)a.p;
632     const Sampler_t *prog = (Sampler_t *)s.p;
633     const Type_t *type = (Type_t *)alloc->mHal.state.type;
634     const Element_t *elem = type->mHal.state.element;
635     rs_data_kind dk = elem->mHal.state.dataKind;
636     rs_data_type dt = elem->mHal.state.dataType;
637     rs_sampler_value wrapS = prog->mHal.state.wrapS;
638     rs_sampler_value wrapT = prog->mHal.state.wrapT;
639 
640     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
641         return 0.f;
642     }
643 
644     if (prog->mHal.state.magFilter == RS_SAMPLER_NEAREST) {
645         return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
646     }
647     return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
648 }
649