1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20 
21 namespace android {
22 namespace renderscript {
23 
24 
25 class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
26 public:
27     void populateScript(Script *) override;
28     void invokeFreeChildren() override;
29 
30     void setGlobalObj(uint32_t slot, ObjectBase *data) override;
31 
32     ~RsdCpuScriptIntrinsicResize() override;
33     RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
34 
35     void preLaunch(uint32_t slot, const Allocation ** ains,
36                    uint32_t inLen, Allocation * aout, const void * usr,
37                    uint32_t usrLen, const RsScriptCall *sc) override;
38 
39     float scaleX;
40     float scaleY;
41 
42 protected:
43     ObjectBaseRef<const Allocation> mAlloc;
44     ObjectBaseRef<const Element> mElement;
45 
46     static void kernelU1(const RsExpandKernelDriverInfo *info,
47                          uint32_t xstart, uint32_t xend,
48                          uint32_t outstep);
49     static void kernelU2(const RsExpandKernelDriverInfo *info,
50                          uint32_t xstart, uint32_t xend,
51                          uint32_t outstep);
52     static void kernelU4(const RsExpandKernelDriverInfo *info,
53                          uint32_t xstart, uint32_t xend,
54                          uint32_t outstep);
55     static void kernelF1(const RsExpandKernelDriverInfo *info,
56                          uint32_t xstart, uint32_t xend,
57                          uint32_t outstep);
58     static void kernelF2(const RsExpandKernelDriverInfo *info,
59                          uint32_t xstart, uint32_t xend,
60                          uint32_t outstep);
61     static void kernelF4(const RsExpandKernelDriverInfo *info,
62                          uint32_t xstart, uint32_t xend,
63                          uint32_t outstep);
64 };
65 
setGlobalObj(uint32_t slot,ObjectBase * data)66 void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
67     rsAssert(slot == 0);
68     mAlloc.set(static_cast<Allocation *>(data));
69 }
70 
cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3,float x)71 static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
72     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
73             + x * (3.f * (p1 - p2) + p3 - p0)));
74 }
75 
cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3,float x)76 static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
77     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
78             + x * (3.f * (p1 - p2) + p3 - p0)));
79 }
80 
cubicInterpolate(float p0,float p1,float p2,float p3,float x)81 static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
82     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
83             + x * (3.f * (p1 - p2) + p3 - p0)));
84 }
85 
OneBiCubic(const uchar4 * yp0,const uchar4 * yp1,const uchar4 * yp2,const uchar4 * yp3,float xf,float yf,int width)86 static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
87                          float xf, float yf, int width) {
88     int startx = (int) floor(xf - 1);
89     xf = xf - floor(xf);
90     int maxx = width - 1;
91     int xs0 = rsMax(0, startx + 0);
92     int xs1 = rsMax(0, startx + 1);
93     int xs2 = rsMin(maxx, startx + 2);
94     int xs3 = rsMin(maxx, startx + 3);
95 
96     float4 p0  = cubicInterpolate(convert_float4(yp0[xs0]),
97                                   convert_float4(yp0[xs1]),
98                                   convert_float4(yp0[xs2]),
99                                   convert_float4(yp0[xs3]), xf);
100 
101     float4 p1  = cubicInterpolate(convert_float4(yp1[xs0]),
102                                   convert_float4(yp1[xs1]),
103                                   convert_float4(yp1[xs2]),
104                                   convert_float4(yp1[xs3]), xf);
105 
106     float4 p2  = cubicInterpolate(convert_float4(yp2[xs0]),
107                                   convert_float4(yp2[xs1]),
108                                   convert_float4(yp2[xs2]),
109                                   convert_float4(yp2[xs3]), xf);
110 
111     float4 p3  = cubicInterpolate(convert_float4(yp3[xs0]),
112                                   convert_float4(yp3[xs1]),
113                                   convert_float4(yp3[xs2]),
114                                   convert_float4(yp3[xs3]), xf);
115 
116     float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
117     p = clamp(p + 0.5f, 0.f, 255.f);
118     return convert_uchar4(p);
119 }
120 
OneBiCubic(const uchar2 * yp0,const uchar2 * yp1,const uchar2 * yp2,const uchar2 * yp3,float xf,float yf,int width)121 static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
122                          float xf, float yf, int width) {
123     int startx = (int) floor(xf - 1);
124     xf = xf - floor(xf);
125     int maxx = width - 1;
126     int xs0 = rsMax(0, startx + 0);
127     int xs1 = rsMax(0, startx + 1);
128     int xs2 = rsMin(maxx, startx + 2);
129     int xs3 = rsMin(maxx, startx + 3);
130 
131     float2 p0  = cubicInterpolate(convert_float2(yp0[xs0]),
132                                   convert_float2(yp0[xs1]),
133                                   convert_float2(yp0[xs2]),
134                                   convert_float2(yp0[xs3]), xf);
135 
136     float2 p1  = cubicInterpolate(convert_float2(yp1[xs0]),
137                                   convert_float2(yp1[xs1]),
138                                   convert_float2(yp1[xs2]),
139                                   convert_float2(yp1[xs3]), xf);
140 
141     float2 p2  = cubicInterpolate(convert_float2(yp2[xs0]),
142                                   convert_float2(yp2[xs1]),
143                                   convert_float2(yp2[xs2]),
144                                   convert_float2(yp2[xs3]), xf);
145 
146     float2 p3  = cubicInterpolate(convert_float2(yp3[xs0]),
147                                   convert_float2(yp3[xs1]),
148                                   convert_float2(yp3[xs2]),
149                                   convert_float2(yp3[xs3]), xf);
150 
151     float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
152     p = clamp(p + 0.5f, 0.f, 255.f);
153     return convert_uchar2(p);
154 }
155 
OneBiCubic(const uchar * yp0,const uchar * yp1,const uchar * yp2,const uchar * yp3,float xf,float yf,int width)156 static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
157                         float xf, float yf, int width) {
158     int startx = (int) floor(xf - 1);
159     xf = xf - floor(xf);
160     int maxx = width - 1;
161     int xs0 = rsMax(0, startx + 0);
162     int xs1 = rsMax(0, startx + 1);
163     int xs2 = rsMin(maxx, startx + 2);
164     int xs3 = rsMin(maxx, startx + 3);
165 
166     float p0  = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
167                                  (float)yp0[xs2], (float)yp0[xs3], xf);
168     float p1  = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
169                                  (float)yp1[xs2], (float)yp1[xs3], xf);
170     float p2  = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
171                                  (float)yp2[xs2], (float)yp2[xs3], xf);
172     float p3  = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
173                                  (float)yp3[xs2], (float)yp3[xs3], xf);
174 
175     float p  = cubicInterpolate(p0, p1, p2, p3, yf);
176     p = clamp(p + 0.5f, 0.f, 255.f);
177     return (uchar)p;
178 }
179 
180 extern "C" uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc);
181 
182 extern "C" void rsdIntrinsicResizeB4_K(
183             uchar4 *dst,
184             size_t count,
185             uint32_t xf,
186             uint32_t xinc,
187             uchar4 const *srcn,
188             uchar4 const *src0,
189             uchar4 const *src1,
190             uchar4 const *src2,
191             size_t xclip,
192             size_t avail,
193             uint64_t osc_ctl,
194             int32_t const *yr);
195 
196 extern "C" void rsdIntrinsicResizeB2_K(
197             uchar2 *dst,
198             size_t count,
199             uint32_t xf,
200             uint32_t xinc,
201             uchar2 const *srcn,
202             uchar2 const *src0,
203             uchar2 const *src1,
204             uchar2 const *src2,
205             size_t xclip,
206             size_t avail,
207             uint64_t osc_ctl,
208             int32_t const *yr);
209 
210 extern "C" void rsdIntrinsicResizeB1_K(
211             uchar *dst,
212             size_t count,
213             uint32_t xf,
214             uint32_t xinc,
215             uchar const *srcn,
216             uchar const *src0,
217             uchar const *src1,
218             uchar const *src2,
219             size_t xclip,
220             size_t avail,
221             uint64_t osc_ctl,
222             int32_t const *yr);
223 
224 #if defined(ARCH_ARM_USE_INTRINSICS)
mkYCoeff(int32_t * yr,float yf)225 static void mkYCoeff(int32_t *yr, float yf) {
226     int32_t yf1 = rint(yf * 0x10000);
227     int32_t yf2 = rint(yf * yf * 0x10000);
228     int32_t yf3 = rint(yf * yf * yf * 0x10000);
229 
230     yr[0] = -(2 * yf2 - yf3 - yf1) >> 1;
231     yr[1] = (3 * yf3 - 5 * yf2 + 0x20000) >> 1;
232     yr[2] = (-3 * yf3 + 4 * yf2 + yf1) >> 1;
233     yr[3] = -(yf3 - yf2) >> 1;
234 }
235 #endif
236 
OneBiCubic(const float4 * yp0,const float4 * yp1,const float4 * yp2,const float4 * yp3,float xf,float yf,int width)237 static float4 OneBiCubic(const float4 *yp0, const float4 *yp1, const float4 *yp2, const float4 *yp3,
238                          float xf, float yf, int width) {
239     int startx = (int) floor(xf - 1);
240     xf = xf - floor(xf);
241     int maxx = width - 1;
242     int xs0 = rsMax(0, startx + 0);
243     int xs1 = rsMax(0, startx + 1);
244     int xs2 = rsMin(maxx, startx + 2);
245     int xs3 = rsMin(maxx, startx + 3);
246 
247     float4 p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
248                                   yp0[xs2], yp0[xs3], xf);
249     float4 p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
250                                   yp1[xs2], yp1[xs3], xf);
251     float4 p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
252                                   yp2[xs2], yp2[xs3], xf);
253     float4 p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
254                                   yp3[xs2], yp3[xs3], xf);
255 
256     float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
257     return p;
258 }
259 
OneBiCubic(const float2 * yp0,const float2 * yp1,const float2 * yp2,const float2 * yp3,float xf,float yf,int width)260 static float2 OneBiCubic(const float2 *yp0, const float2 *yp1, const float2 *yp2, const float2 *yp3,
261                          float xf, float yf, int width) {
262     int startx = (int) floor(xf - 1);
263     xf = xf - floor(xf);
264     int maxx = width - 1;
265     int xs0 = rsMax(0, startx + 0);
266     int xs1 = rsMax(0, startx + 1);
267     int xs2 = rsMin(maxx, startx + 2);
268     int xs3 = rsMin(maxx, startx + 3);
269 
270     float2 p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
271                                   yp0[xs2], yp0[xs3], xf);
272     float2 p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
273                                   yp1[xs2], yp1[xs3], xf);
274     float2 p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
275                                   yp2[xs2], yp2[xs3], xf);
276     float2 p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
277                                   yp3[xs2], yp3[xs3], xf);
278 
279     float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
280     return p;
281 }
282 
OneBiCubic(const float * yp0,const float * yp1,const float * yp2,const float * yp3,float xf,float yf,int width)283 static float OneBiCubic(const float *yp0, const float *yp1, const float *yp2, const float *yp3,
284                         float xf, float yf, int width) {
285     int startx = (int) floor(xf - 1);
286     xf = xf - floor(xf);
287     int maxx = width - 1;
288     int xs0 = rsMax(0, startx + 0);
289     int xs1 = rsMax(0, startx + 1);
290     int xs2 = rsMin(maxx, startx + 2);
291     int xs3 = rsMin(maxx, startx + 3);
292 
293     float p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
294                                  yp0[xs2], yp0[xs3], xf);
295     float p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
296                                  yp1[xs2], yp1[xs3], xf);
297     float p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
298                                  yp2[xs2], yp2[xs3], xf);
299     float p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
300                                  yp3[xs2], yp3[xs3], xf);
301 
302     float p  = cubicInterpolate(p0, p1, p2, p3, yf);
303     return p;
304 }
305 
kernelU4(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)306 void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info,
307                                                 uint32_t xstart, uint32_t xend,
308                                                 uint32_t outstep) {
309     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
310 
311     if (!cp->mAlloc.get()) {
312         ALOGE("Resize executed without input, skipping");
313         return;
314     }
315     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
316     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
317     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
318     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
319 
320     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
321     int starty = (int) floor(yf - 1);
322     yf = yf - floor(yf);
323     int maxy = srcHeight - 1;
324     int ys0 = rsMax(0, starty + 0);
325     int ys1 = rsMax(0, starty + 1);
326     int ys2 = rsMin(maxy, starty + 2);
327     int ys3 = rsMin(maxy, starty + 3);
328 
329     const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
330     const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
331     const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
332     const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
333 
334     uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart;
335     uint32_t x1 = xstart;
336     uint32_t x2 = xend;
337 
338 #if defined(ARCH_ARM_USE_INTRINSICS)
339     if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
340         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
341         long xf16 = rint(xf * 0x10000);
342         uint32_t xinc16 = rint(cp->scaleX * 0x10000);
343 
344         int xoff = (xf16 >> 16) - 1;
345         int xclip = rsMax(0, xoff) - xoff;
346         int len = x2 - x1;
347 
348         int32_t yr[4];
349         uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
350         mkYCoeff(yr, yf);
351 
352         xoff += xclip;
353 
354         rsdIntrinsicResizeB4_K(
355                 out, len,
356                 xf16 & 0xffff, xinc16,
357                 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
358                 xclip, srcWidth - xoff + xclip,
359                 osc_ctl, yr);
360         out += len;
361         x1 += len;
362     }
363 #endif
364 
365     while(x1 < x2) {
366         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
367         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
368         out++;
369         x1++;
370     }
371 }
372 
kernelU2(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)373 void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info,
374                                                 uint32_t xstart, uint32_t xend,
375                                                 uint32_t outstep) {
376     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
377 
378     if (!cp->mAlloc.get()) {
379         ALOGE("Resize executed without input, skipping");
380         return;
381     }
382     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
383     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
384     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
385     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
386 
387     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
388     int starty = (int) floor(yf - 1);
389     yf = yf - floor(yf);
390     int maxy = srcHeight - 1;
391     int ys0 = rsMax(0, starty + 0);
392     int ys1 = rsMax(0, starty + 1);
393     int ys2 = rsMin(maxy, starty + 2);
394     int ys3 = rsMin(maxy, starty + 3);
395 
396     const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
397     const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
398     const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
399     const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
400 
401     uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart;
402     uint32_t x1 = xstart;
403     uint32_t x2 = xend;
404 
405 #if defined(ARCH_ARM_USE_INTRINSICS)
406     if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
407         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
408         long xf16 = rint(xf * 0x10000);
409         uint32_t xinc16 = rint(cp->scaleX * 0x10000);
410 
411         int xoff = (xf16 >> 16) - 1;
412         int xclip = rsMax(0, xoff) - xoff;
413         int len = x2 - x1;
414 
415         int32_t yr[4];
416         uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
417         mkYCoeff(yr, yf);
418 
419         xoff += xclip;
420 
421         rsdIntrinsicResizeB2_K(
422                 out, len,
423                 xf16 & 0xffff, xinc16,
424                 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
425                 xclip, srcWidth - xoff + xclip,
426                 osc_ctl, yr);
427         out += len;
428         x1 += len;
429     }
430 #endif
431 
432     while(x1 < x2) {
433         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
434         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
435         out++;
436         x1++;
437     }
438 }
439 
kernelU1(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)440 void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info,
441                                                 uint32_t xstart, uint32_t xend,
442                                                 uint32_t outstep) {
443     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
444 
445     if (!cp->mAlloc.get()) {
446         ALOGE("Resize executed without input, skipping");
447         return;
448     }
449     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
450     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
451     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
452     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
453 
454     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
455     int starty = (int) floor(yf - 1);
456     yf = yf - floor(yf);
457     int maxy = srcHeight - 1;
458     int ys0 = rsMax(0, starty + 0);
459     int ys1 = rsMax(0, starty + 1);
460     int ys2 = rsMin(maxy, starty + 2);
461     int ys3 = rsMin(maxy, starty + 3);
462 
463     const uchar *yp0 = pin + stride * ys0;
464     const uchar *yp1 = pin + stride * ys1;
465     const uchar *yp2 = pin + stride * ys2;
466     const uchar *yp3 = pin + stride * ys3;
467 
468     uchar *out = ((uchar *)info->outPtr[0]) + xstart;
469     uint32_t x1 = xstart;
470     uint32_t x2 = xend;
471 
472 #if defined(ARCH_ARM_USE_INTRINSICS)
473     if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
474         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
475         long xf16 = rint(xf * 0x10000);
476         uint32_t xinc16 = rint(cp->scaleX * 0x10000);
477 
478         int xoff = (xf16 >> 16) - 1;
479         int xclip = rsMax(0, xoff) - xoff;
480         int len = x2 - x1;
481 
482         int32_t yr[4];
483         uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
484         mkYCoeff(yr, yf);
485 
486         xoff += xclip;
487 
488         rsdIntrinsicResizeB1_K(
489                 out, len,
490                 xf16 & 0xffff, xinc16,
491                 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
492                 xclip, srcWidth - xoff + xclip,
493                 osc_ctl, yr);
494         out += len;
495         x1 += len;
496     }
497 #endif
498 
499     while(x1 < x2) {
500         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
501         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
502         out++;
503         x1++;
504     }
505 }
506 
kernelF4(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)507 void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info,
508                                                 uint32_t xstart, uint32_t xend,
509                                                 uint32_t outstep) {
510     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
511 
512     if (!cp->mAlloc.get()) {
513         ALOGE("Resize executed without input, skipping");
514         return;
515     }
516     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
517     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
518     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
519     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
520 
521     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
522     int starty = (int) floor(yf - 1);
523     yf = yf - floor(yf);
524     int maxy = srcHeight - 1;
525     int ys0 = rsMax(0, starty + 0);
526     int ys1 = rsMax(0, starty + 1);
527     int ys2 = rsMin(maxy, starty + 2);
528     int ys3 = rsMin(maxy, starty + 3);
529 
530     const float4 *yp0 = (const float4 *)(pin + stride * ys0);
531     const float4 *yp1 = (const float4 *)(pin + stride * ys1);
532     const float4 *yp2 = (const float4 *)(pin + stride * ys2);
533     const float4 *yp3 = (const float4 *)(pin + stride * ys3);
534 
535     float4 *out = ((float4 *)info->outPtr[0]) + xstart;
536     uint32_t x1 = xstart;
537     uint32_t x2 = xend;
538 
539     while(x1 < x2) {
540         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
541         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
542         out++;
543         x1++;
544     }
545 }
546 
kernelF2(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)547 void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info,
548                                                 uint32_t xstart, uint32_t xend,
549                                                 uint32_t outstep) {
550     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
551 
552     if (!cp->mAlloc.get()) {
553         ALOGE("Resize executed without input, skipping");
554         return;
555     }
556     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
557     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
558     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
559     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
560 
561     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
562     int starty = (int) floor(yf - 1);
563     yf = yf - floor(yf);
564     int maxy = srcHeight - 1;
565     int ys0 = rsMax(0, starty + 0);
566     int ys1 = rsMax(0, starty + 1);
567     int ys2 = rsMin(maxy, starty + 2);
568     int ys3 = rsMin(maxy, starty + 3);
569 
570     const float2 *yp0 = (const float2 *)(pin + stride * ys0);
571     const float2 *yp1 = (const float2 *)(pin + stride * ys1);
572     const float2 *yp2 = (const float2 *)(pin + stride * ys2);
573     const float2 *yp3 = (const float2 *)(pin + stride * ys3);
574 
575     float2 *out = ((float2 *)info->outPtr[0]) + xstart;
576     uint32_t x1 = xstart;
577     uint32_t x2 = xend;
578 
579     while(x1 < x2) {
580         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
581         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
582         out++;
583         x1++;
584     }
585 }
586 
kernelF1(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)587 void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info,
588                                                 uint32_t xstart, uint32_t xend,
589                                                 uint32_t outstep) {
590     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
591 
592     if (!cp->mAlloc.get()) {
593         ALOGE("Resize executed without input, skipping");
594         return;
595     }
596     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
597     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
598     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
599     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
600 
601     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
602     int starty = (int) floor(yf - 1);
603     yf = yf - floor(yf);
604     int maxy = srcHeight - 1;
605     int ys0 = rsMax(0, starty + 0);
606     int ys1 = rsMax(0, starty + 1);
607     int ys2 = rsMin(maxy, starty + 2);
608     int ys3 = rsMin(maxy, starty + 3);
609 
610     const float *yp0 = (const float *)(pin + stride * ys0);
611     const float *yp1 = (const float *)(pin + stride * ys1);
612     const float *yp2 = (const float *)(pin + stride * ys2);
613     const float *yp3 = (const float *)(pin + stride * ys3);
614 
615     float *out = ((float *)info->outPtr[0]) + xstart;
616     uint32_t x1 = xstart;
617     uint32_t x2 = xend;
618 
619     while(x1 < x2) {
620         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
621         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
622         out++;
623         x1++;
624     }
625 }
626 
RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)627 RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
628             RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
629             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
630 
631 }
632 
~RsdCpuScriptIntrinsicResize()633 RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
634 }
635 
preLaunch(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)636 void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot,
637                                             const Allocation ** ains,
638                                             uint32_t inLen, Allocation * aout,
639                                             const void * usr, uint32_t usrLen,
640                                             const RsScriptCall *sc)
641 {
642     if (!mAlloc.get()) {
643         ALOGE("Resize executed without input, skipping");
644         return;
645     }
646     const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
647     const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
648 
649     //check the data type to determine F or U.
650     if (mAlloc->getType()->getElement()->getType() == RS_TYPE_UNSIGNED_8) {
651         switch(mAlloc->getType()->getElement()->getVectorSize()) {
652         case 1:
653             mRootPtr = &kernelU1;
654             break;
655         case 2:
656             mRootPtr = &kernelU2;
657             break;
658         case 3:
659         case 4:
660             mRootPtr = &kernelU4;
661             break;
662         }
663     } else {
664         switch(mAlloc->getType()->getElement()->getVectorSize()) {
665         case 1:
666             mRootPtr = &kernelF1;
667             break;
668         case 2:
669             mRootPtr = &kernelF2;
670             break;
671         case 3:
672         case 4:
673             mRootPtr = &kernelF4;
674             break;
675         }
676     }
677 
678     scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
679     scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
680 
681 }
682 
populateScript(Script * s)683 void RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
684     s->mHal.info.exportedVariableCount = 1;
685 }
686 
invokeFreeChildren()687 void RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
688     mAlloc.clear();
689 }
690 
rsdIntrinsic_Resize(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)691 RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
692 
693     return new RsdCpuScriptIntrinsicResize(ctx, s, e);
694 }
695 
696 } // namespace renderscript
697 } // namespace android
698