1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20 
21 using namespace android;
22 using namespace android::renderscript;
23 
24 namespace android {
25 namespace renderscript {
26 
27 
28 class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
29 public:
30     virtual void populateScript(Script *);
31     virtual void invokeFreeChildren();
32 
33     virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
34 
35     virtual ~RsdCpuScriptIntrinsicResize();
36     RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
37 
38     virtual void preLaunch(uint32_t slot, const Allocation * ain,
39                            Allocation * aout, const void * usr,
40                            uint32_t usrLen, const RsScriptCall *sc);
41 
42     float scaleX;
43     float scaleY;
44 
45 protected:
46     ObjectBaseRef<const Allocation> mAlloc;
47     ObjectBaseRef<const Element> mElement;
48 
49     static void kernelU1(const RsForEachStubParamStruct *p,
50                          uint32_t xstart, uint32_t xend,
51                          uint32_t instep, uint32_t outstep);
52     static void kernelU2(const RsForEachStubParamStruct *p,
53                          uint32_t xstart, uint32_t xend,
54                          uint32_t instep, uint32_t outstep);
55     static void kernelU4(const RsForEachStubParamStruct *p,
56                          uint32_t xstart, uint32_t xend,
57                          uint32_t instep, uint32_t outstep);
58 };
59 
60 }
61 }
62 
63 
setGlobalObj(uint32_t slot,ObjectBase * data)64 void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
65     rsAssert(slot == 0);
66     mAlloc.set(static_cast<Allocation *>(data));
67 }
68 
cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3,float x)69 static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
70     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
71             + x * (3.f * (p1 - p2) + p3 - p0)));
72 }
73 
cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3,float x)74 static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
75     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
76             + x * (3.f * (p1 - p2) + p3 - p0)));
77 }
78 
cubicInterpolate(float p0,float p1,float p2,float p3,float x)79 static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
80     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
81             + x * (3.f * (p1 - p2) + p3 - p0)));
82 }
83 
OneBiCubic(const uchar4 * yp0,const uchar4 * yp1,const uchar4 * yp2,const uchar4 * yp3,float xf,float yf,int width)84 static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
85                          float xf, float yf, int width) {
86     int startx = (int) floor(xf - 1);
87     xf = xf - floor(xf);
88     int maxx = width - 1;
89     int xs0 = rsMax(0, startx + 0);
90     int xs1 = rsMax(0, startx + 1);
91     int xs2 = rsMin(maxx, startx + 2);
92     int xs3 = rsMin(maxx, startx + 3);
93 
94     float4 p0  = cubicInterpolate(convert_float4(yp0[xs0]),
95                                   convert_float4(yp0[xs1]),
96                                   convert_float4(yp0[xs2]),
97                                   convert_float4(yp0[xs3]), xf);
98 
99     float4 p1  = cubicInterpolate(convert_float4(yp1[xs0]),
100                                   convert_float4(yp1[xs1]),
101                                   convert_float4(yp1[xs2]),
102                                   convert_float4(yp1[xs3]), xf);
103 
104     float4 p2  = cubicInterpolate(convert_float4(yp2[xs0]),
105                                   convert_float4(yp2[xs1]),
106                                   convert_float4(yp2[xs2]),
107                                   convert_float4(yp2[xs3]), xf);
108 
109     float4 p3  = cubicInterpolate(convert_float4(yp3[xs0]),
110                                   convert_float4(yp3[xs1]),
111                                   convert_float4(yp3[xs2]),
112                                   convert_float4(yp3[xs3]), xf);
113 
114     float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
115     p = clamp(p + 0.5f, 0.f, 255.f);
116     return convert_uchar4(p);
117 }
118 
OneBiCubic(const uchar2 * yp0,const uchar2 * yp1,const uchar2 * yp2,const uchar2 * yp3,float xf,float yf,int width)119 static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
120                          float xf, float yf, int width) {
121     int startx = (int) floor(xf - 1);
122     xf = xf - floor(xf);
123     int maxx = width - 1;
124     int xs0 = rsMax(0, startx + 0);
125     int xs1 = rsMax(0, startx + 1);
126     int xs2 = rsMin(maxx, startx + 2);
127     int xs3 = rsMin(maxx, startx + 3);
128 
129     float2 p0  = cubicInterpolate(convert_float2(yp0[xs0]),
130                                   convert_float2(yp0[xs1]),
131                                   convert_float2(yp0[xs2]),
132                                   convert_float2(yp0[xs3]), xf);
133 
134     float2 p1  = cubicInterpolate(convert_float2(yp1[xs0]),
135                                   convert_float2(yp1[xs1]),
136                                   convert_float2(yp1[xs2]),
137                                   convert_float2(yp1[xs3]), xf);
138 
139     float2 p2  = cubicInterpolate(convert_float2(yp2[xs0]),
140                                   convert_float2(yp2[xs1]),
141                                   convert_float2(yp2[xs2]),
142                                   convert_float2(yp2[xs3]), xf);
143 
144     float2 p3  = cubicInterpolate(convert_float2(yp3[xs0]),
145                                   convert_float2(yp3[xs1]),
146                                   convert_float2(yp3[xs2]),
147                                   convert_float2(yp3[xs3]), xf);
148 
149     float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
150     p = clamp(p + 0.5f, 0.f, 255.f);
151     return convert_uchar2(p);
152 }
153 
OneBiCubic(const uchar * yp0,const uchar * yp1,const uchar * yp2,const uchar * yp3,float xf,float yf,int width)154 static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
155                         float xf, float yf, int width) {
156     int startx = (int) floor(xf - 1);
157     xf = xf - floor(xf);
158     int maxx = width - 1;
159     int xs0 = rsMax(0, startx + 0);
160     int xs1 = rsMax(0, startx + 1);
161     int xs2 = rsMin(maxx, startx + 2);
162     int xs3 = rsMin(maxx, startx + 3);
163 
164     float p0  = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
165                                  (float)yp0[xs2], (float)yp0[xs3], xf);
166     float p1  = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
167                                  (float)yp1[xs2], (float)yp1[xs3], xf);
168     float p2  = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
169                                  (float)yp2[xs2], (float)yp2[xs3], xf);
170     float p3  = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
171                                  (float)yp3[xs2], (float)yp3[xs3], xf);
172 
173     float p  = cubicInterpolate(p0, p1, p2, p3, yf);
174     p = clamp(p + 0.5f, 0.f, 255.f);
175     return (uchar)p;
176 }
177 
kernelU4(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)178 void RsdCpuScriptIntrinsicResize::kernelU4(const RsForEachStubParamStruct *p,
179                                                 uint32_t xstart, uint32_t xend,
180                                                 uint32_t instep, uint32_t outstep) {
181     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
182 
183     if (!cp->mAlloc.get()) {
184         ALOGE("Resize executed without input, skipping");
185         return;
186     }
187     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
188     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
189     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
190     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
191 
192     float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
193     int starty = (int) floor(yf - 1);
194     yf = yf - floor(yf);
195     int maxy = srcHeight - 1;
196     int ys0 = rsMax(0, starty + 0);
197     int ys1 = rsMax(0, starty + 1);
198     int ys2 = rsMin(maxy, starty + 2);
199     int ys3 = rsMin(maxy, starty + 3);
200 
201     const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
202     const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
203     const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
204     const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
205 
206     uchar4 *out = ((uchar4 *)p->out) + xstart;
207     uint32_t x1 = xstart;
208     uint32_t x2 = xend;
209 
210     while(x1 < x2) {
211         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
212         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
213         out++;
214         x1++;
215     }
216 }
217 
kernelU2(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)218 void RsdCpuScriptIntrinsicResize::kernelU2(const RsForEachStubParamStruct *p,
219                                                 uint32_t xstart, uint32_t xend,
220                                                 uint32_t instep, uint32_t outstep) {
221     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
222 
223     if (!cp->mAlloc.get()) {
224         ALOGE("Resize executed without input, skipping");
225         return;
226     }
227     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
228     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
229     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
230     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
231 
232     float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
233     int starty = (int) floor(yf - 1);
234     yf = yf - floor(yf);
235     int maxy = srcHeight - 1;
236     int ys0 = rsMax(0, starty + 0);
237     int ys1 = rsMax(0, starty + 1);
238     int ys2 = rsMin(maxy, starty + 2);
239     int ys3 = rsMin(maxy, starty + 3);
240 
241     const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
242     const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
243     const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
244     const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
245 
246     uchar2 *out = ((uchar2 *)p->out) + xstart;
247     uint32_t x1 = xstart;
248     uint32_t x2 = xend;
249 
250     while(x1 < x2) {
251         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
252         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
253         out++;
254         x1++;
255     }
256 }
257 
kernelU1(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)258 void RsdCpuScriptIntrinsicResize::kernelU1(const RsForEachStubParamStruct *p,
259                                                 uint32_t xstart, uint32_t xend,
260                                                 uint32_t instep, uint32_t outstep) {
261     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
262 
263     if (!cp->mAlloc.get()) {
264         ALOGE("Resize executed without input, skipping");
265         return;
266     }
267     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
268     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
269     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
270     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
271 
272     float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
273     int starty = (int) floor(yf - 1);
274     yf = yf - floor(yf);
275     int maxy = srcHeight - 1;
276     int ys0 = rsMax(0, starty + 0);
277     int ys1 = rsMax(0, starty + 1);
278     int ys2 = rsMin(maxy, starty + 2);
279     int ys3 = rsMin(maxy, starty + 3);
280 
281     const uchar *yp0 = pin + stride * ys0;
282     const uchar *yp1 = pin + stride * ys1;
283     const uchar *yp2 = pin + stride * ys2;
284     const uchar *yp3 = pin + stride * ys3;
285 
286     uchar *out = ((uchar *)p->out) + xstart;
287     uint32_t x1 = xstart;
288     uint32_t x2 = xend;
289 
290     while(x1 < x2) {
291         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
292         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
293         out++;
294         x1++;
295     }
296 }
297 
RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)298 RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
299             RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
300             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
301 
302 }
303 
~RsdCpuScriptIntrinsicResize()304 RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
305 }
306 
preLaunch(uint32_t slot,const Allocation * ain,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)307 void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, const Allocation * ain,
308                                             Allocation * aout, const void * usr,
309                                             uint32_t usrLen, const RsScriptCall *sc)
310 {
311     if (!mAlloc.get()) {
312         ALOGE("Resize executed without input, skipping");
313         return;
314     }
315     const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
316     const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
317     const size_t stride = mAlloc->mHal.drvState.lod[0].stride;
318 
319     switch(mAlloc->getType()->getElement()->getVectorSize()) {
320     case 1:
321         mRootPtr = &kernelU1;
322         break;
323     case 2:
324         mRootPtr = &kernelU2;
325         break;
326     case 3:
327     case 4:
328         mRootPtr = &kernelU4;
329         break;
330     }
331 
332     scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
333     scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
334 
335 }
336 
populateScript(Script * s)337 void RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
338     s->mHal.info.exportedVariableCount = 1;
339 }
340 
invokeFreeChildren()341 void RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
342     mAlloc.clear();
343 }
344 
345 
rsdIntrinsic_Resize(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)346 RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
347 
348     return new RsdCpuScriptIntrinsicResize(ctx, s, e);
349 }
350 
351 
352