1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20
21 using namespace android;
22 using namespace android::renderscript;
23
24 namespace android {
25 namespace renderscript {
26
27
28 class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
29 public:
30 virtual void populateScript(Script *);
31 virtual void invokeFreeChildren();
32
33 virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
34
35 virtual ~RsdCpuScriptIntrinsicResize();
36 RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
37
38 virtual void preLaunch(uint32_t slot, const Allocation * ain,
39 Allocation * aout, const void * usr,
40 uint32_t usrLen, const RsScriptCall *sc);
41
42 float scaleX;
43 float scaleY;
44
45 protected:
46 ObjectBaseRef<const Allocation> mAlloc;
47 ObjectBaseRef<const Element> mElement;
48
49 static void kernelU1(const RsForEachStubParamStruct *p,
50 uint32_t xstart, uint32_t xend,
51 uint32_t instep, uint32_t outstep);
52 static void kernelU2(const RsForEachStubParamStruct *p,
53 uint32_t xstart, uint32_t xend,
54 uint32_t instep, uint32_t outstep);
55 static void kernelU4(const RsForEachStubParamStruct *p,
56 uint32_t xstart, uint32_t xend,
57 uint32_t instep, uint32_t outstep);
58 };
59
60 }
61 }
62
63
setGlobalObj(uint32_t slot,ObjectBase * data)64 void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
65 rsAssert(slot == 0);
66 mAlloc.set(static_cast<Allocation *>(data));
67 }
68
cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3,float x)69 static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
70 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
71 + x * (3.f * (p1 - p2) + p3 - p0)));
72 }
73
cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3,float x)74 static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
75 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
76 + x * (3.f * (p1 - p2) + p3 - p0)));
77 }
78
cubicInterpolate(float p0,float p1,float p2,float p3,float x)79 static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
80 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
81 + x * (3.f * (p1 - p2) + p3 - p0)));
82 }
83
OneBiCubic(const uchar4 * yp0,const uchar4 * yp1,const uchar4 * yp2,const uchar4 * yp3,float xf,float yf,int width)84 static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
85 float xf, float yf, int width) {
86 int startx = (int) floor(xf - 1);
87 xf = xf - floor(xf);
88 int maxx = width - 1;
89 int xs0 = rsMax(0, startx + 0);
90 int xs1 = rsMax(0, startx + 1);
91 int xs2 = rsMin(maxx, startx + 2);
92 int xs3 = rsMin(maxx, startx + 3);
93
94 float4 p0 = cubicInterpolate(convert_float4(yp0[xs0]),
95 convert_float4(yp0[xs1]),
96 convert_float4(yp0[xs2]),
97 convert_float4(yp0[xs3]), xf);
98
99 float4 p1 = cubicInterpolate(convert_float4(yp1[xs0]),
100 convert_float4(yp1[xs1]),
101 convert_float4(yp1[xs2]),
102 convert_float4(yp1[xs3]), xf);
103
104 float4 p2 = cubicInterpolate(convert_float4(yp2[xs0]),
105 convert_float4(yp2[xs1]),
106 convert_float4(yp2[xs2]),
107 convert_float4(yp2[xs3]), xf);
108
109 float4 p3 = cubicInterpolate(convert_float4(yp3[xs0]),
110 convert_float4(yp3[xs1]),
111 convert_float4(yp3[xs2]),
112 convert_float4(yp3[xs3]), xf);
113
114 float4 p = cubicInterpolate(p0, p1, p2, p3, yf);
115 p = clamp(p + 0.5f, 0.f, 255.f);
116 return convert_uchar4(p);
117 }
118
OneBiCubic(const uchar2 * yp0,const uchar2 * yp1,const uchar2 * yp2,const uchar2 * yp3,float xf,float yf,int width)119 static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
120 float xf, float yf, int width) {
121 int startx = (int) floor(xf - 1);
122 xf = xf - floor(xf);
123 int maxx = width - 1;
124 int xs0 = rsMax(0, startx + 0);
125 int xs1 = rsMax(0, startx + 1);
126 int xs2 = rsMin(maxx, startx + 2);
127 int xs3 = rsMin(maxx, startx + 3);
128
129 float2 p0 = cubicInterpolate(convert_float2(yp0[xs0]),
130 convert_float2(yp0[xs1]),
131 convert_float2(yp0[xs2]),
132 convert_float2(yp0[xs3]), xf);
133
134 float2 p1 = cubicInterpolate(convert_float2(yp1[xs0]),
135 convert_float2(yp1[xs1]),
136 convert_float2(yp1[xs2]),
137 convert_float2(yp1[xs3]), xf);
138
139 float2 p2 = cubicInterpolate(convert_float2(yp2[xs0]),
140 convert_float2(yp2[xs1]),
141 convert_float2(yp2[xs2]),
142 convert_float2(yp2[xs3]), xf);
143
144 float2 p3 = cubicInterpolate(convert_float2(yp3[xs0]),
145 convert_float2(yp3[xs1]),
146 convert_float2(yp3[xs2]),
147 convert_float2(yp3[xs3]), xf);
148
149 float2 p = cubicInterpolate(p0, p1, p2, p3, yf);
150 p = clamp(p + 0.5f, 0.f, 255.f);
151 return convert_uchar2(p);
152 }
153
OneBiCubic(const uchar * yp0,const uchar * yp1,const uchar * yp2,const uchar * yp3,float xf,float yf,int width)154 static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
155 float xf, float yf, int width) {
156 int startx = (int) floor(xf - 1);
157 xf = xf - floor(xf);
158 int maxx = width - 1;
159 int xs0 = rsMax(0, startx + 0);
160 int xs1 = rsMax(0, startx + 1);
161 int xs2 = rsMin(maxx, startx + 2);
162 int xs3 = rsMin(maxx, startx + 3);
163
164 float p0 = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
165 (float)yp0[xs2], (float)yp0[xs3], xf);
166 float p1 = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
167 (float)yp1[xs2], (float)yp1[xs3], xf);
168 float p2 = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
169 (float)yp2[xs2], (float)yp2[xs3], xf);
170 float p3 = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
171 (float)yp3[xs2], (float)yp3[xs3], xf);
172
173 float p = cubicInterpolate(p0, p1, p2, p3, yf);
174 p = clamp(p + 0.5f, 0.f, 255.f);
175 return (uchar)p;
176 }
177
kernelU4(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)178 void RsdCpuScriptIntrinsicResize::kernelU4(const RsForEachStubParamStruct *p,
179 uint32_t xstart, uint32_t xend,
180 uint32_t instep, uint32_t outstep) {
181 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
182
183 if (!cp->mAlloc.get()) {
184 ALOGE("Resize executed without input, skipping");
185 return;
186 }
187 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
188 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
189 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
190 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
191
192 float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
193 int starty = (int) floor(yf - 1);
194 yf = yf - floor(yf);
195 int maxy = srcHeight - 1;
196 int ys0 = rsMax(0, starty + 0);
197 int ys1 = rsMax(0, starty + 1);
198 int ys2 = rsMin(maxy, starty + 2);
199 int ys3 = rsMin(maxy, starty + 3);
200
201 const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
202 const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
203 const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
204 const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
205
206 uchar4 *out = ((uchar4 *)p->out) + xstart;
207 uint32_t x1 = xstart;
208 uint32_t x2 = xend;
209
210 while(x1 < x2) {
211 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
212 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
213 out++;
214 x1++;
215 }
216 }
217
kernelU2(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)218 void RsdCpuScriptIntrinsicResize::kernelU2(const RsForEachStubParamStruct *p,
219 uint32_t xstart, uint32_t xend,
220 uint32_t instep, uint32_t outstep) {
221 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
222
223 if (!cp->mAlloc.get()) {
224 ALOGE("Resize executed without input, skipping");
225 return;
226 }
227 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
228 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
229 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
230 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
231
232 float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
233 int starty = (int) floor(yf - 1);
234 yf = yf - floor(yf);
235 int maxy = srcHeight - 1;
236 int ys0 = rsMax(0, starty + 0);
237 int ys1 = rsMax(0, starty + 1);
238 int ys2 = rsMin(maxy, starty + 2);
239 int ys3 = rsMin(maxy, starty + 3);
240
241 const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
242 const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
243 const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
244 const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
245
246 uchar2 *out = ((uchar2 *)p->out) + xstart;
247 uint32_t x1 = xstart;
248 uint32_t x2 = xend;
249
250 while(x1 < x2) {
251 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
252 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
253 out++;
254 x1++;
255 }
256 }
257
kernelU1(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)258 void RsdCpuScriptIntrinsicResize::kernelU1(const RsForEachStubParamStruct *p,
259 uint32_t xstart, uint32_t xend,
260 uint32_t instep, uint32_t outstep) {
261 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
262
263 if (!cp->mAlloc.get()) {
264 ALOGE("Resize executed without input, skipping");
265 return;
266 }
267 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
268 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
269 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
270 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
271
272 float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
273 int starty = (int) floor(yf - 1);
274 yf = yf - floor(yf);
275 int maxy = srcHeight - 1;
276 int ys0 = rsMax(0, starty + 0);
277 int ys1 = rsMax(0, starty + 1);
278 int ys2 = rsMin(maxy, starty + 2);
279 int ys3 = rsMin(maxy, starty + 3);
280
281 const uchar *yp0 = pin + stride * ys0;
282 const uchar *yp1 = pin + stride * ys1;
283 const uchar *yp2 = pin + stride * ys2;
284 const uchar *yp3 = pin + stride * ys3;
285
286 uchar *out = ((uchar *)p->out) + xstart;
287 uint32_t x1 = xstart;
288 uint32_t x2 = xend;
289
290 while(x1 < x2) {
291 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
292 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
293 out++;
294 x1++;
295 }
296 }
297
RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)298 RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
299 RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
300 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
301
302 }
303
~RsdCpuScriptIntrinsicResize()304 RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
305 }
306
preLaunch(uint32_t slot,const Allocation * ain,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)307 void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, const Allocation * ain,
308 Allocation * aout, const void * usr,
309 uint32_t usrLen, const RsScriptCall *sc)
310 {
311 if (!mAlloc.get()) {
312 ALOGE("Resize executed without input, skipping");
313 return;
314 }
315 const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
316 const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
317 const size_t stride = mAlloc->mHal.drvState.lod[0].stride;
318
319 switch(mAlloc->getType()->getElement()->getVectorSize()) {
320 case 1:
321 mRootPtr = &kernelU1;
322 break;
323 case 2:
324 mRootPtr = &kernelU2;
325 break;
326 case 3:
327 case 4:
328 mRootPtr = &kernelU4;
329 break;
330 }
331
332 scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
333 scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
334
335 }
336
populateScript(Script * s)337 void RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
338 s->mHal.info.exportedVariableCount = 1;
339 }
340
invokeFreeChildren()341 void RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
342 mAlloc.clear();
343 }
344
345
rsdIntrinsic_Resize(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)346 RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
347
348 return new RsdCpuScriptIntrinsicResize(ctx, s, e);
349 }
350
351
352