1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20
21 using namespace android;
22 using namespace android::renderscript;
23
24 namespace android {
25 namespace renderscript {
26
27
28 class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
29 public:
30 void populateScript(Script *) override;
31 void invokeFreeChildren() override;
32
33 void setGlobalObj(uint32_t slot, ObjectBase *data) override;
34
35 ~RsdCpuScriptIntrinsicResize() override;
36 RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
37
38 void preLaunch(uint32_t slot, const Allocation ** ains,
39 uint32_t inLen, Allocation * aout, const void * usr,
40 uint32_t usrLen, const RsScriptCall *sc) override;
41
42 float scaleX;
43 float scaleY;
44
45 protected:
46 ObjectBaseRef<const Allocation> mAlloc;
47 ObjectBaseRef<const Element> mElement;
48
49 static void kernelU1(const RsExpandKernelDriverInfo *info,
50 uint32_t xstart, uint32_t xend,
51 uint32_t outstep);
52 static void kernelU2(const RsExpandKernelDriverInfo *info,
53 uint32_t xstart, uint32_t xend,
54 uint32_t outstep);
55 static void kernelU4(const RsExpandKernelDriverInfo *info,
56 uint32_t xstart, uint32_t xend,
57 uint32_t outstep);
58 static void kernelF1(const RsExpandKernelDriverInfo *info,
59 uint32_t xstart, uint32_t xend,
60 uint32_t outstep);
61 static void kernelF2(const RsExpandKernelDriverInfo *info,
62 uint32_t xstart, uint32_t xend,
63 uint32_t outstep);
64 static void kernelF4(const RsExpandKernelDriverInfo *info,
65 uint32_t xstart, uint32_t xend,
66 uint32_t outstep);
67 };
68
69 }
70 }
71
72
setGlobalObj(uint32_t slot,ObjectBase * data)73 void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
74 rsAssert(slot == 0);
75 mAlloc.set(static_cast<Allocation *>(data));
76 }
77
cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3,float x)78 static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
79 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
80 + x * (3.f * (p1 - p2) + p3 - p0)));
81 }
82
cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3,float x)83 static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
84 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
85 + x * (3.f * (p1 - p2) + p3 - p0)));
86 }
87
cubicInterpolate(float p0,float p1,float p2,float p3,float x)88 static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
89 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
90 + x * (3.f * (p1 - p2) + p3 - p0)));
91 }
92
OneBiCubic(const uchar4 * yp0,const uchar4 * yp1,const uchar4 * yp2,const uchar4 * yp3,float xf,float yf,int width)93 static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
94 float xf, float yf, int width) {
95 int startx = (int) floor(xf - 1);
96 xf = xf - floor(xf);
97 int maxx = width - 1;
98 int xs0 = rsMax(0, startx + 0);
99 int xs1 = rsMax(0, startx + 1);
100 int xs2 = rsMin(maxx, startx + 2);
101 int xs3 = rsMin(maxx, startx + 3);
102
103 float4 p0 = cubicInterpolate(convert_float4(yp0[xs0]),
104 convert_float4(yp0[xs1]),
105 convert_float4(yp0[xs2]),
106 convert_float4(yp0[xs3]), xf);
107
108 float4 p1 = cubicInterpolate(convert_float4(yp1[xs0]),
109 convert_float4(yp1[xs1]),
110 convert_float4(yp1[xs2]),
111 convert_float4(yp1[xs3]), xf);
112
113 float4 p2 = cubicInterpolate(convert_float4(yp2[xs0]),
114 convert_float4(yp2[xs1]),
115 convert_float4(yp2[xs2]),
116 convert_float4(yp2[xs3]), xf);
117
118 float4 p3 = cubicInterpolate(convert_float4(yp3[xs0]),
119 convert_float4(yp3[xs1]),
120 convert_float4(yp3[xs2]),
121 convert_float4(yp3[xs3]), xf);
122
123 float4 p = cubicInterpolate(p0, p1, p2, p3, yf);
124 p = clamp(p + 0.5f, 0.f, 255.f);
125 return convert_uchar4(p);
126 }
127
OneBiCubic(const uchar2 * yp0,const uchar2 * yp1,const uchar2 * yp2,const uchar2 * yp3,float xf,float yf,int width)128 static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
129 float xf, float yf, int width) {
130 int startx = (int) floor(xf - 1);
131 xf = xf - floor(xf);
132 int maxx = width - 1;
133 int xs0 = rsMax(0, startx + 0);
134 int xs1 = rsMax(0, startx + 1);
135 int xs2 = rsMin(maxx, startx + 2);
136 int xs3 = rsMin(maxx, startx + 3);
137
138 float2 p0 = cubicInterpolate(convert_float2(yp0[xs0]),
139 convert_float2(yp0[xs1]),
140 convert_float2(yp0[xs2]),
141 convert_float2(yp0[xs3]), xf);
142
143 float2 p1 = cubicInterpolate(convert_float2(yp1[xs0]),
144 convert_float2(yp1[xs1]),
145 convert_float2(yp1[xs2]),
146 convert_float2(yp1[xs3]), xf);
147
148 float2 p2 = cubicInterpolate(convert_float2(yp2[xs0]),
149 convert_float2(yp2[xs1]),
150 convert_float2(yp2[xs2]),
151 convert_float2(yp2[xs3]), xf);
152
153 float2 p3 = cubicInterpolate(convert_float2(yp3[xs0]),
154 convert_float2(yp3[xs1]),
155 convert_float2(yp3[xs2]),
156 convert_float2(yp3[xs3]), xf);
157
158 float2 p = cubicInterpolate(p0, p1, p2, p3, yf);
159 p = clamp(p + 0.5f, 0.f, 255.f);
160 return convert_uchar2(p);
161 }
162
OneBiCubic(const uchar * yp0,const uchar * yp1,const uchar * yp2,const uchar * yp3,float xf,float yf,int width)163 static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
164 float xf, float yf, int width) {
165 int startx = (int) floor(xf - 1);
166 xf = xf - floor(xf);
167 int maxx = width - 1;
168 int xs0 = rsMax(0, startx + 0);
169 int xs1 = rsMax(0, startx + 1);
170 int xs2 = rsMin(maxx, startx + 2);
171 int xs3 = rsMin(maxx, startx + 3);
172
173 float p0 = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
174 (float)yp0[xs2], (float)yp0[xs3], xf);
175 float p1 = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
176 (float)yp1[xs2], (float)yp1[xs3], xf);
177 float p2 = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
178 (float)yp2[xs2], (float)yp2[xs3], xf);
179 float p3 = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
180 (float)yp3[xs2], (float)yp3[xs3], xf);
181
182 float p = cubicInterpolate(p0, p1, p2, p3, yf);
183 p = clamp(p + 0.5f, 0.f, 255.f);
184 return (uchar)p;
185 }
186
187 extern "C" uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc);
188
189 extern "C" void rsdIntrinsicResizeB4_K(
190 uchar4 *dst,
191 size_t count,
192 uint32_t xf,
193 uint32_t xinc,
194 uchar4 const *srcn,
195 uchar4 const *src0,
196 uchar4 const *src1,
197 uchar4 const *src2,
198 size_t xclip,
199 size_t avail,
200 uint64_t osc_ctl,
201 int32_t const *yr);
202
203 extern "C" void rsdIntrinsicResizeB2_K(
204 uchar2 *dst,
205 size_t count,
206 uint32_t xf,
207 uint32_t xinc,
208 uchar2 const *srcn,
209 uchar2 const *src0,
210 uchar2 const *src1,
211 uchar2 const *src2,
212 size_t xclip,
213 size_t avail,
214 uint64_t osc_ctl,
215 int32_t const *yr);
216
217 extern "C" void rsdIntrinsicResizeB1_K(
218 uchar *dst,
219 size_t count,
220 uint32_t xf,
221 uint32_t xinc,
222 uchar const *srcn,
223 uchar const *src0,
224 uchar const *src1,
225 uchar const *src2,
226 size_t xclip,
227 size_t avail,
228 uint64_t osc_ctl,
229 int32_t const *yr);
230
231 #if defined(ARCH_ARM_USE_INTRINSICS)
mkYCoeff(int32_t * yr,float yf)232 static void mkYCoeff(int32_t *yr, float yf) {
233 int32_t yf1 = rint(yf * 0x10000);
234 int32_t yf2 = rint(yf * yf * 0x10000);
235 int32_t yf3 = rint(yf * yf * yf * 0x10000);
236
237 yr[0] = -(2 * yf2 - yf3 - yf1) >> 1;
238 yr[1] = (3 * yf3 - 5 * yf2 + 0x20000) >> 1;
239 yr[2] = (-3 * yf3 + 4 * yf2 + yf1) >> 1;
240 yr[3] = -(yf3 - yf2) >> 1;
241 }
242 #endif
243
OneBiCubic(const float4 * yp0,const float4 * yp1,const float4 * yp2,const float4 * yp3,float xf,float yf,int width)244 static float4 OneBiCubic(const float4 *yp0, const float4 *yp1, const float4 *yp2, const float4 *yp3,
245 float xf, float yf, int width) {
246 int startx = (int) floor(xf - 1);
247 xf = xf - floor(xf);
248 int maxx = width - 1;
249 int xs0 = rsMax(0, startx + 0);
250 int xs1 = rsMax(0, startx + 1);
251 int xs2 = rsMin(maxx, startx + 2);
252 int xs3 = rsMin(maxx, startx + 3);
253
254 float4 p0 = cubicInterpolate(yp0[xs0], yp0[xs1],
255 yp0[xs2], yp0[xs3], xf);
256 float4 p1 = cubicInterpolate(yp1[xs0], yp1[xs1],
257 yp1[xs2], yp1[xs3], xf);
258 float4 p2 = cubicInterpolate(yp2[xs0], yp2[xs1],
259 yp2[xs2], yp2[xs3], xf);
260 float4 p3 = cubicInterpolate(yp3[xs0], yp3[xs1],
261 yp3[xs2], yp3[xs3], xf);
262
263 float4 p = cubicInterpolate(p0, p1, p2, p3, yf);
264 return p;
265 }
266
OneBiCubic(const float2 * yp0,const float2 * yp1,const float2 * yp2,const float2 * yp3,float xf,float yf,int width)267 static float2 OneBiCubic(const float2 *yp0, const float2 *yp1, const float2 *yp2, const float2 *yp3,
268 float xf, float yf, int width) {
269 int startx = (int) floor(xf - 1);
270 xf = xf - floor(xf);
271 int maxx = width - 1;
272 int xs0 = rsMax(0, startx + 0);
273 int xs1 = rsMax(0, startx + 1);
274 int xs2 = rsMin(maxx, startx + 2);
275 int xs3 = rsMin(maxx, startx + 3);
276
277 float2 p0 = cubicInterpolate(yp0[xs0], yp0[xs1],
278 yp0[xs2], yp0[xs3], xf);
279 float2 p1 = cubicInterpolate(yp1[xs0], yp1[xs1],
280 yp1[xs2], yp1[xs3], xf);
281 float2 p2 = cubicInterpolate(yp2[xs0], yp2[xs1],
282 yp2[xs2], yp2[xs3], xf);
283 float2 p3 = cubicInterpolate(yp3[xs0], yp3[xs1],
284 yp3[xs2], yp3[xs3], xf);
285
286 float2 p = cubicInterpolate(p0, p1, p2, p3, yf);
287 return p;
288 }
289
OneBiCubic(const float * yp0,const float * yp1,const float * yp2,const float * yp3,float xf,float yf,int width)290 static float OneBiCubic(const float *yp0, const float *yp1, const float *yp2, const float *yp3,
291 float xf, float yf, int width) {
292 int startx = (int) floor(xf - 1);
293 xf = xf - floor(xf);
294 int maxx = width - 1;
295 int xs0 = rsMax(0, startx + 0);
296 int xs1 = rsMax(0, startx + 1);
297 int xs2 = rsMin(maxx, startx + 2);
298 int xs3 = rsMin(maxx, startx + 3);
299
300 float p0 = cubicInterpolate(yp0[xs0], yp0[xs1],
301 yp0[xs2], yp0[xs3], xf);
302 float p1 = cubicInterpolate(yp1[xs0], yp1[xs1],
303 yp1[xs2], yp1[xs3], xf);
304 float p2 = cubicInterpolate(yp2[xs0], yp2[xs1],
305 yp2[xs2], yp2[xs3], xf);
306 float p3 = cubicInterpolate(yp3[xs0], yp3[xs1],
307 yp3[xs2], yp3[xs3], xf);
308
309 float p = cubicInterpolate(p0, p1, p2, p3, yf);
310 return p;
311 }
312
kernelU4(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)313 void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info,
314 uint32_t xstart, uint32_t xend,
315 uint32_t outstep) {
316 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
317
318 if (!cp->mAlloc.get()) {
319 ALOGE("Resize executed without input, skipping");
320 return;
321 }
322 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
323 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
324 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
325 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
326
327 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
328 int starty = (int) floor(yf - 1);
329 yf = yf - floor(yf);
330 int maxy = srcHeight - 1;
331 int ys0 = rsMax(0, starty + 0);
332 int ys1 = rsMax(0, starty + 1);
333 int ys2 = rsMin(maxy, starty + 2);
334 int ys3 = rsMin(maxy, starty + 3);
335
336 const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
337 const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
338 const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
339 const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
340
341 uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart;
342 uint32_t x1 = xstart;
343 uint32_t x2 = xend;
344
345 #if defined(ARCH_ARM_USE_INTRINSICS)
346 if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
347 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
348 long xf16 = rint(xf * 0x10000);
349 uint32_t xinc16 = rint(cp->scaleX * 0x10000);
350
351 int xoff = (xf16 >> 16) - 1;
352 int xclip = rsMax(0, xoff) - xoff;
353 int len = x2 - x1;
354
355 int32_t yr[4];
356 uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
357 mkYCoeff(yr, yf);
358
359 xoff += xclip;
360
361 rsdIntrinsicResizeB4_K(
362 out, len,
363 xf16 & 0xffff, xinc16,
364 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
365 xclip, srcWidth - xoff + xclip,
366 osc_ctl, yr);
367 out += len;
368 x1 += len;
369 }
370 #endif
371
372 while(x1 < x2) {
373 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
374 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
375 out++;
376 x1++;
377 }
378 }
379
kernelU2(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)380 void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info,
381 uint32_t xstart, uint32_t xend,
382 uint32_t outstep) {
383 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
384
385 if (!cp->mAlloc.get()) {
386 ALOGE("Resize executed without input, skipping");
387 return;
388 }
389 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
390 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
391 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
392 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
393
394 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
395 int starty = (int) floor(yf - 1);
396 yf = yf - floor(yf);
397 int maxy = srcHeight - 1;
398 int ys0 = rsMax(0, starty + 0);
399 int ys1 = rsMax(0, starty + 1);
400 int ys2 = rsMin(maxy, starty + 2);
401 int ys3 = rsMin(maxy, starty + 3);
402
403 const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
404 const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
405 const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
406 const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
407
408 uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart;
409 uint32_t x1 = xstart;
410 uint32_t x2 = xend;
411
412 #if defined(ARCH_ARM_USE_INTRINSICS)
413 if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
414 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
415 long xf16 = rint(xf * 0x10000);
416 uint32_t xinc16 = rint(cp->scaleX * 0x10000);
417
418 int xoff = (xf16 >> 16) - 1;
419 int xclip = rsMax(0, xoff) - xoff;
420 int len = x2 - x1;
421
422 int32_t yr[4];
423 uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
424 mkYCoeff(yr, yf);
425
426 xoff += xclip;
427
428 rsdIntrinsicResizeB2_K(
429 out, len,
430 xf16 & 0xffff, xinc16,
431 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
432 xclip, srcWidth - xoff + xclip,
433 osc_ctl, yr);
434 out += len;
435 x1 += len;
436 }
437 #endif
438
439 while(x1 < x2) {
440 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
441 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
442 out++;
443 x1++;
444 }
445 }
446
kernelU1(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)447 void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info,
448 uint32_t xstart, uint32_t xend,
449 uint32_t outstep) {
450 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
451
452 if (!cp->mAlloc.get()) {
453 ALOGE("Resize executed without input, skipping");
454 return;
455 }
456 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
457 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
458 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
459 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
460
461 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
462 int starty = (int) floor(yf - 1);
463 yf = yf - floor(yf);
464 int maxy = srcHeight - 1;
465 int ys0 = rsMax(0, starty + 0);
466 int ys1 = rsMax(0, starty + 1);
467 int ys2 = rsMin(maxy, starty + 2);
468 int ys3 = rsMin(maxy, starty + 3);
469
470 const uchar *yp0 = pin + stride * ys0;
471 const uchar *yp1 = pin + stride * ys1;
472 const uchar *yp2 = pin + stride * ys2;
473 const uchar *yp3 = pin + stride * ys3;
474
475 uchar *out = ((uchar *)info->outPtr[0]) + xstart;
476 uint32_t x1 = xstart;
477 uint32_t x2 = xend;
478
479 #if defined(ARCH_ARM_USE_INTRINSICS)
480 if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
481 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
482 long xf16 = rint(xf * 0x10000);
483 uint32_t xinc16 = rint(cp->scaleX * 0x10000);
484
485 int xoff = (xf16 >> 16) - 1;
486 int xclip = rsMax(0, xoff) - xoff;
487 int len = x2 - x1;
488
489 int32_t yr[4];
490 uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
491 mkYCoeff(yr, yf);
492
493 xoff += xclip;
494
495 rsdIntrinsicResizeB1_K(
496 out, len,
497 xf16 & 0xffff, xinc16,
498 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
499 xclip, srcWidth - xoff + xclip,
500 osc_ctl, yr);
501 out += len;
502 x1 += len;
503 }
504 #endif
505
506 while(x1 < x2) {
507 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
508 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
509 out++;
510 x1++;
511 }
512 }
513
kernelF4(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)514 void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info,
515 uint32_t xstart, uint32_t xend,
516 uint32_t outstep) {
517 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
518
519 if (!cp->mAlloc.get()) {
520 ALOGE("Resize executed without input, skipping");
521 return;
522 }
523 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
524 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
525 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
526 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
527
528 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
529 int starty = (int) floor(yf - 1);
530 yf = yf - floor(yf);
531 int maxy = srcHeight - 1;
532 int ys0 = rsMax(0, starty + 0);
533 int ys1 = rsMax(0, starty + 1);
534 int ys2 = rsMin(maxy, starty + 2);
535 int ys3 = rsMin(maxy, starty + 3);
536
537 const float4 *yp0 = (const float4 *)(pin + stride * ys0);
538 const float4 *yp1 = (const float4 *)(pin + stride * ys1);
539 const float4 *yp2 = (const float4 *)(pin + stride * ys2);
540 const float4 *yp3 = (const float4 *)(pin + stride * ys3);
541
542 float4 *out = ((float4 *)info->outPtr[0]) + xstart;
543 uint32_t x1 = xstart;
544 uint32_t x2 = xend;
545
546 while(x1 < x2) {
547 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
548 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
549 out++;
550 x1++;
551 }
552 }
553
kernelF2(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)554 void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info,
555 uint32_t xstart, uint32_t xend,
556 uint32_t outstep) {
557 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
558
559 if (!cp->mAlloc.get()) {
560 ALOGE("Resize executed without input, skipping");
561 return;
562 }
563 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
564 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
565 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
566 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
567
568 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
569 int starty = (int) floor(yf - 1);
570 yf = yf - floor(yf);
571 int maxy = srcHeight - 1;
572 int ys0 = rsMax(0, starty + 0);
573 int ys1 = rsMax(0, starty + 1);
574 int ys2 = rsMin(maxy, starty + 2);
575 int ys3 = rsMin(maxy, starty + 3);
576
577 const float2 *yp0 = (const float2 *)(pin + stride * ys0);
578 const float2 *yp1 = (const float2 *)(pin + stride * ys1);
579 const float2 *yp2 = (const float2 *)(pin + stride * ys2);
580 const float2 *yp3 = (const float2 *)(pin + stride * ys3);
581
582 float2 *out = ((float2 *)info->outPtr[0]) + xstart;
583 uint32_t x1 = xstart;
584 uint32_t x2 = xend;
585
586 while(x1 < x2) {
587 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
588 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
589 out++;
590 x1++;
591 }
592 }
593
kernelF1(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)594 void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info,
595 uint32_t xstart, uint32_t xend,
596 uint32_t outstep) {
597 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
598
599 if (!cp->mAlloc.get()) {
600 ALOGE("Resize executed without input, skipping");
601 return;
602 }
603 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
604 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
605 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
606 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
607
608 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
609 int starty = (int) floor(yf - 1);
610 yf = yf - floor(yf);
611 int maxy = srcHeight - 1;
612 int ys0 = rsMax(0, starty + 0);
613 int ys1 = rsMax(0, starty + 1);
614 int ys2 = rsMin(maxy, starty + 2);
615 int ys3 = rsMin(maxy, starty + 3);
616
617 const float *yp0 = (const float *)(pin + stride * ys0);
618 const float *yp1 = (const float *)(pin + stride * ys1);
619 const float *yp2 = (const float *)(pin + stride * ys2);
620 const float *yp3 = (const float *)(pin + stride * ys3);
621
622 float *out = ((float *)info->outPtr[0]) + xstart;
623 uint32_t x1 = xstart;
624 uint32_t x2 = xend;
625
626 while(x1 < x2) {
627 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
628 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
629 out++;
630 x1++;
631 }
632 }
633
RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)634 RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
635 RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
636 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
637
638 }
639
~RsdCpuScriptIntrinsicResize()640 RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
641 }
642
preLaunch(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)643 void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot,
644 const Allocation ** ains,
645 uint32_t inLen, Allocation * aout,
646 const void * usr, uint32_t usrLen,
647 const RsScriptCall *sc)
648 {
649 if (!mAlloc.get()) {
650 ALOGE("Resize executed without input, skipping");
651 return;
652 }
653 const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
654 const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
655 const size_t stride = mAlloc->mHal.drvState.lod[0].stride;
656
657 //check the data type to determine F or U.
658 if (mAlloc->getType()->getElement()->getType() == RS_TYPE_UNSIGNED_8) {
659 switch(mAlloc->getType()->getElement()->getVectorSize()) {
660 case 1:
661 mRootPtr = &kernelU1;
662 break;
663 case 2:
664 mRootPtr = &kernelU2;
665 break;
666 case 3:
667 case 4:
668 mRootPtr = &kernelU4;
669 break;
670 }
671 } else {
672 switch(mAlloc->getType()->getElement()->getVectorSize()) {
673 case 1:
674 mRootPtr = &kernelF1;
675 break;
676 case 2:
677 mRootPtr = &kernelF2;
678 break;
679 case 3:
680 case 4:
681 mRootPtr = &kernelF4;
682 break;
683 }
684 }
685
686 scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
687 scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
688
689 }
690
populateScript(Script * s)691 void RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
692 s->mHal.info.exportedVariableCount = 1;
693 }
694
invokeFreeChildren()695 void RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
696 mAlloc.clear();
697 }
698
699
rsdIntrinsic_Resize(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)700 RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
701
702 return new RsdCpuScriptIntrinsicResize(ctx, s, e);
703 }
704