1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20 
21 using namespace android;
22 using namespace android::renderscript;
23 
24 namespace android {
25 namespace renderscript {
26 
27 
28 class RsdCpuScriptIntrinsicConvolve3x3 : public RsdCpuScriptIntrinsic {
29 public:
30     virtual void populateScript(Script *);
31     virtual void invokeFreeChildren();
32 
33     virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength);
34     virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
35 
36     virtual ~RsdCpuScriptIntrinsicConvolve3x3();
37     RsdCpuScriptIntrinsicConvolve3x3(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
38 
39 protected:
40     float mFp[16];
41     short mIp[16];
42     ObjectBaseRef<const Allocation> mAlloc;
43     ObjectBaseRef<const Element> mElement;
44 
45     static void kernelU1(const RsForEachStubParamStruct *p,
46                          uint32_t xstart, uint32_t xend,
47                          uint32_t instep, uint32_t outstep);
48     static void kernelU2(const RsForEachStubParamStruct *p,
49                          uint32_t xstart, uint32_t xend,
50                          uint32_t instep, uint32_t outstep);
51     static void kernelU4(const RsForEachStubParamStruct *p,
52                          uint32_t xstart, uint32_t xend,
53                          uint32_t instep, uint32_t outstep);
54     static void kernelF1(const RsForEachStubParamStruct *p,
55                          uint32_t xstart, uint32_t xend,
56                          uint32_t instep, uint32_t outstep);
57     static void kernelF2(const RsForEachStubParamStruct *p,
58                          uint32_t xstart, uint32_t xend,
59                          uint32_t instep, uint32_t outstep);
60     static void kernelF4(const RsForEachStubParamStruct *p,
61                          uint32_t xstart, uint32_t xend,
62                          uint32_t instep, uint32_t outstep);
63 };
64 
65 }
66 }
67 
68 
setGlobalObj(uint32_t slot,ObjectBase * data)69 void RsdCpuScriptIntrinsicConvolve3x3::setGlobalObj(uint32_t slot, ObjectBase *data) {
70     rsAssert(slot == 1);
71     mAlloc.set(static_cast<Allocation *>(data));
72 }
73 
setGlobalVar(uint32_t slot,const void * data,size_t dataLength)74 void RsdCpuScriptIntrinsicConvolve3x3::setGlobalVar(uint32_t slot, const void *data,
75                                                     size_t dataLength) {
76     rsAssert(slot == 0);
77     memcpy (&mFp, data, dataLength);
78     for(int ct=0; ct < 9; ct++) {
79         if (mFp[ct] >= 0) {
80             mIp[ct] = (short)(mFp[ct] * 256.f + 0.5f);
81         } else {
82             mIp[ct] = (short)(mFp[ct] * 256.f - 0.5f);
83         }
84     }
85 }
86 
87 extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, const void *y1,
88                                           const void *y2, const short *coef, uint32_t count);
89 
90 
ConvolveOneU4(const RsForEachStubParamStruct * p,uint32_t x,uchar4 * out,const uchar4 * py0,const uchar4 * py1,const uchar4 * py2,const float * coeff)91 static void ConvolveOneU4(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *out,
92                           const uchar4 *py0, const uchar4 *py1, const uchar4 *py2,
93                           const float* coeff) {
94 
95     uint32_t x1 = rsMax((int32_t)x-1, 0);
96     uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
97 
98     float4 px = convert_float4(py0[x1]) * coeff[0] +
99                 convert_float4(py0[x]) * coeff[1] +
100                 convert_float4(py0[x2]) * coeff[2] +
101                 convert_float4(py1[x1]) * coeff[3] +
102                 convert_float4(py1[x]) * coeff[4] +
103                 convert_float4(py1[x2]) * coeff[5] +
104                 convert_float4(py2[x1]) * coeff[6] +
105                 convert_float4(py2[x]) * coeff[7] +
106                 convert_float4(py2[x2]) * coeff[8];
107 
108     px = clamp(px + 0.5f, 0.f, 255.f);
109     uchar4 o = {(uchar)px.x, (uchar)px.y, (uchar)px.z, (uchar)px.w};
110     *out = o;
111 }
112 
ConvolveOneU2(const RsForEachStubParamStruct * p,uint32_t x,uchar2 * out,const uchar2 * py0,const uchar2 * py1,const uchar2 * py2,const float * coeff)113 static void ConvolveOneU2(const RsForEachStubParamStruct *p, uint32_t x, uchar2 *out,
114                           const uchar2 *py0, const uchar2 *py1, const uchar2 *py2,
115                           const float* coeff) {
116 
117     uint32_t x1 = rsMax((int32_t)x-1, 0);
118     uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
119 
120     float2 px = convert_float2(py0[x1]) * coeff[0] +
121                 convert_float2(py0[x]) * coeff[1] +
122                 convert_float2(py0[x2]) * coeff[2] +
123                 convert_float2(py1[x1]) * coeff[3] +
124                 convert_float2(py1[x]) * coeff[4] +
125                 convert_float2(py1[x2]) * coeff[5] +
126                 convert_float2(py2[x1]) * coeff[6] +
127                 convert_float2(py2[x]) * coeff[7] +
128                 convert_float2(py2[x2]) * coeff[8];
129 
130     px = clamp(px + 0.5f, 0.f, 255.f);
131     *out = convert_uchar2(px);
132 }
133 
ConvolveOneU1(const RsForEachStubParamStruct * p,uint32_t x,uchar * out,const uchar * py0,const uchar * py1,const uchar * py2,const float * coeff)134 static void ConvolveOneU1(const RsForEachStubParamStruct *p, uint32_t x, uchar *out,
135                           const uchar *py0, const uchar *py1, const uchar *py2,
136                           const float* coeff) {
137 
138     uint32_t x1 = rsMax((int32_t)x-1, 0);
139     uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
140 
141     float px = ((float)py0[x1]) * coeff[0] +
142                ((float)py0[x]) * coeff[1] +
143                ((float)py0[x2]) * coeff[2] +
144                ((float)py1[x1]) * coeff[3] +
145                ((float)py1[x]) * coeff[4] +
146                ((float)py1[x2]) * coeff[5] +
147                ((float)py2[x1]) * coeff[6] +
148                ((float)py2[x]) * coeff[7] +
149                ((float)py2[x2]) * coeff[8];
150     *out = clamp(px + 0.5f, 0.f, 255.f);
151 }
152 
ConvolveOneF4(const RsForEachStubParamStruct * p,uint32_t x,float4 * out,const float4 * py0,const float4 * py1,const float4 * py2,const float * coeff)153 static void ConvolveOneF4(const RsForEachStubParamStruct *p, uint32_t x, float4 *out,
154                           const float4 *py0, const float4 *py1, const float4 *py2,
155                           const float* coeff) {
156 
157     uint32_t x1 = rsMax((int32_t)x-1, 0);
158     uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
159     *out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
160            (py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
161            (py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
162 }
163 
ConvolveOneF2(const RsForEachStubParamStruct * p,uint32_t x,float2 * out,const float2 * py0,const float2 * py1,const float2 * py2,const float * coeff)164 static void ConvolveOneF2(const RsForEachStubParamStruct *p, uint32_t x, float2 *out,
165                           const float2 *py0, const float2 *py1, const float2 *py2,
166                           const float* coeff) {
167 
168     uint32_t x1 = rsMax((int32_t)x-1, 0);
169     uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
170     *out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
171            (py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
172            (py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
173 }
174 
ConvolveOneF1(const RsForEachStubParamStruct * p,uint32_t x,float * out,const float * py0,const float * py1,const float * py2,const float * coeff)175 static void ConvolveOneF1(const RsForEachStubParamStruct *p, uint32_t x, float *out,
176                           const float *py0, const float *py1, const float *py2,
177                           const float* coeff) {
178 
179     uint32_t x1 = rsMax((int32_t)x-1, 0);
180     uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
181     *out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
182            (py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
183            (py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
184 }
185 
kernelU4(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)186 void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsForEachStubParamStruct *p,
187                                                 uint32_t xstart, uint32_t xend,
188                                                 uint32_t instep, uint32_t outstep) {
189     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
190 
191     if (!cp->mAlloc.get()) {
192         ALOGE("Convolve3x3 executed without input, skipping");
193         return;
194     }
195     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
196     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
197 
198     uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
199     uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
200     const uchar4 *py0 = (const uchar4 *)(pin + stride * y2);
201     const uchar4 *py1 = (const uchar4 *)(pin + stride * p->y);
202     const uchar4 *py2 = (const uchar4 *)(pin + stride * y1);
203 
204     uchar4 *out = (uchar4 *)p->out;
205     uint32_t x1 = xstart;
206     uint32_t x2 = xend;
207     if(x1 == 0) {
208         ConvolveOneU4(p, 0, out, py0, py1, py2, cp->mFp);
209         x1 ++;
210         out++;
211     }
212 
213     if(x2 > x1) {
214 #if defined(ARCH_ARM_USE_INTRINSICS) || defined(ARCH_X86_HAVE_SSSE3)
215         if (gArchUseSIMD) {
216             int32_t len = (x2 - x1 - 1) >> 1;
217             if(len > 0) {
218                 rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
219                 x1 += len << 1;
220                 out += len << 1;
221             }
222         }
223 #endif
224 
225         while(x1 != x2) {
226             ConvolveOneU4(p, x1, out, py0, py1, py2, cp->mFp);
227             out++;
228             x1++;
229         }
230     }
231 }
232 
kernelU2(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)233 void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsForEachStubParamStruct *p,
234                                                 uint32_t xstart, uint32_t xend,
235                                                 uint32_t instep, uint32_t outstep) {
236     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
237 
238     if (!cp->mAlloc.get()) {
239         ALOGE("Convolve3x3 executed without input, skipping");
240         return;
241     }
242     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
243     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
244 
245     uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
246     uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
247     const uchar2 *py0 = (const uchar2 *)(pin + stride * y2);
248     const uchar2 *py1 = (const uchar2 *)(pin + stride * p->y);
249     const uchar2 *py2 = (const uchar2 *)(pin + stride * y1);
250 
251     uchar2 *out = (uchar2 *)p->out;
252     uint32_t x1 = xstart;
253     uint32_t x2 = xend;
254     if(x1 == 0) {
255         ConvolveOneU2(p, 0, out, py0, py1, py2, cp->mFp);
256         x1 ++;
257         out++;
258     }
259 
260     if(x2 > x1) {
261 #if 0//defined(ARCH_ARM_HAVE_NEON)
262         int32_t len = (x2 - x1 - 1) >> 1;
263         if(len > 0) {
264             rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
265             x1 += len << 1;
266             out += len << 1;
267         }
268 #endif
269 
270         while(x1 != x2) {
271             ConvolveOneU2(p, x1, out, py0, py1, py2, cp->mFp);
272             out++;
273             x1++;
274         }
275     }
276 }
277 
kernelU1(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)278 void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsForEachStubParamStruct *p,
279                                                 uint32_t xstart, uint32_t xend,
280                                                 uint32_t instep, uint32_t outstep) {
281     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
282 
283     if (!cp->mAlloc.get()) {
284         ALOGE("Convolve3x3 executed without input, skipping");
285         return;
286     }
287     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
288     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
289 
290     uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
291     uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
292     const uchar *py0 = (const uchar *)(pin + stride * y2);
293     const uchar *py1 = (const uchar *)(pin + stride * p->y);
294     const uchar *py2 = (const uchar *)(pin + stride * y1);
295 
296     uchar *out = (uchar *)p->out;
297     uint32_t x1 = xstart;
298     uint32_t x2 = xend;
299     if(x1 == 0) {
300         ConvolveOneU1(p, 0, out, py0, py1, py2, cp->mFp);
301         x1 ++;
302         out++;
303     }
304 
305     if(x2 > x1) {
306 #if 0//defined(ARCH_ARM_HAVE_NEON)
307         int32_t len = (x2 - x1 - 1) >> 1;
308         if(len > 0) {
309             rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
310             x1 += len << 1;
311             out += len << 1;
312         }
313 #endif
314 
315         while(x1 != x2) {
316             ConvolveOneU1(p, x1, out, py0, py1, py2, cp->mFp);
317             out++;
318             x1++;
319         }
320     }
321 }
322 
kernelF4(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)323 void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsForEachStubParamStruct *p,
324                                                 uint32_t xstart, uint32_t xend,
325                                                 uint32_t instep, uint32_t outstep) {
326     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
327 
328     if (!cp->mAlloc.get()) {
329         ALOGE("Convolve3x3 executed without input, skipping");
330         return;
331     }
332     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
333     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
334 
335     uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
336     uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
337     const float4 *py0 = (const float4 *)(pin + stride * y2);
338     const float4 *py1 = (const float4 *)(pin + stride * p->y);
339     const float4 *py2 = (const float4 *)(pin + stride * y1);
340 
341     float4 *out = (float4 *)p->out;
342     uint32_t x1 = xstart;
343     uint32_t x2 = xend;
344     if(x1 == 0) {
345         ConvolveOneF4(p, 0, out, py0, py1, py2, cp->mFp);
346         x1 ++;
347         out++;
348     }
349 
350     if(x2 > x1) {
351 #if 0//defined(ARCH_ARM_HAVE_NEON)
352         int32_t len = (x2 - x1 - 1) >> 1;
353         if(len > 0) {
354             rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
355             x1 += len << 1;
356             out += len << 1;
357         }
358 #endif
359 
360         while(x1 != x2) {
361             ConvolveOneF4(p, x1, out, py0, py1, py2, cp->mFp);
362             out++;
363             x1++;
364         }
365     }
366 }
367 
kernelF2(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)368 void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsForEachStubParamStruct *p,
369                                                 uint32_t xstart, uint32_t xend,
370                                                 uint32_t instep, uint32_t outstep) {
371     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
372 
373     if (!cp->mAlloc.get()) {
374         ALOGE("Convolve3x3 executed without input, skipping");
375         return;
376     }
377     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
378     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
379 
380     uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
381     uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
382     const float2 *py0 = (const float2 *)(pin + stride * y2);
383     const float2 *py1 = (const float2 *)(pin + stride * p->y);
384     const float2 *py2 = (const float2 *)(pin + stride * y1);
385 
386     float2 *out = (float2 *)p->out;
387     uint32_t x1 = xstart;
388     uint32_t x2 = xend;
389     if(x1 == 0) {
390         ConvolveOneF2(p, 0, out, py0, py1, py2, cp->mFp);
391         x1 ++;
392         out++;
393     }
394 
395     if(x2 > x1) {
396 #if 0//defined(ARCH_ARM_HAVE_NEON)
397         int32_t len = (x2 - x1 - 1) >> 1;
398         if(len > 0) {
399             rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
400             x1 += len << 1;
401             out += len << 1;
402         }
403 #endif
404 
405         while(x1 != x2) {
406             ConvolveOneF2(p, x1, out, py0, py1, py2, cp->mFp);
407             out++;
408             x1++;
409         }
410     }
411 }
kernelF1(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)412 void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsForEachStubParamStruct *p,
413                                                 uint32_t xstart, uint32_t xend,
414                                                 uint32_t instep, uint32_t outstep) {
415     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
416 
417     if (!cp->mAlloc.get()) {
418         ALOGE("Convolve3x3 executed without input, skipping");
419         return;
420     }
421     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
422     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
423 
424     uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
425     uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
426     const float *py0 = (const float *)(pin + stride * y2);
427     const float *py1 = (const float *)(pin + stride * p->y);
428     const float *py2 = (const float *)(pin + stride * y1);
429 
430     float *out = (float *)p->out;
431     uint32_t x1 = xstart;
432     uint32_t x2 = xend;
433     if(x1 == 0) {
434         ConvolveOneF1(p, 0, out, py0, py1, py2, cp->mFp);
435         x1 ++;
436         out++;
437     }
438 
439     if(x2 > x1) {
440 #if 0//defined(ARCH_ARM_HAVE_NEON)
441         int32_t len = (x2 - x1 - 1) >> 1;
442         if(len > 0) {
443             rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
444             x1 += len << 1;
445             out += len << 1;
446         }
447 #endif
448 
449         while(x1 != x2) {
450             ConvolveOneF1(p, x1, out, py0, py1, py2, cp->mFp);
451             out++;
452             x1++;
453         }
454     }
455 }
456 
RsdCpuScriptIntrinsicConvolve3x3(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)457 RsdCpuScriptIntrinsicConvolve3x3::RsdCpuScriptIntrinsicConvolve3x3(
458             RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
459             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3) {
460 
461     if (e->getType() == RS_TYPE_FLOAT_32) {
462         switch(e->getVectorSize()) {
463         case 1:
464             mRootPtr = &kernelF1;
465             break;
466         case 2:
467             mRootPtr = &kernelF2;
468             break;
469         case 3:
470         case 4:
471             mRootPtr = &kernelF4;
472             break;
473         }
474     } else {
475         switch(e->getVectorSize()) {
476         case 1:
477             mRootPtr = &kernelU1;
478             break;
479         case 2:
480             mRootPtr = &kernelU2;
481             break;
482         case 3:
483         case 4:
484             mRootPtr = &kernelU4;
485             break;
486         }
487     }
488     for(int ct=0; ct < 9; ct++) {
489         mFp[ct] = 1.f / 9.f;
490         mIp[ct] = (short)(mFp[ct] * 256.f + 0.5f);
491     }
492 }
493 
~RsdCpuScriptIntrinsicConvolve3x3()494 RsdCpuScriptIntrinsicConvolve3x3::~RsdCpuScriptIntrinsicConvolve3x3() {
495 }
496 
populateScript(Script * s)497 void RsdCpuScriptIntrinsicConvolve3x3::populateScript(Script *s) {
498     s->mHal.info.exportedVariableCount = 2;
499 }
500 
invokeFreeChildren()501 void RsdCpuScriptIntrinsicConvolve3x3::invokeFreeChildren() {
502     mAlloc.clear();
503 }
504 
505 
rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)506 RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
507 
508     return new RsdCpuScriptIntrinsicConvolve3x3(ctx, s, e);
509 }
510 
511 
512