1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "rsCpuIntrinsic.h"
18 #include "rsCpuIntrinsicInlines.h"
19
20 using namespace android;
21 using namespace android::renderscript;
22
23 namespace android {
24 namespace renderscript {
25
26
27 class RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic {
28 public:
29 virtual void populateScript(Script *);
30 virtual void invokeFreeChildren();
31
32 virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength);
33 virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
34
35 virtual ~RsdCpuScriptIntrinsicHistogram();
36 RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
37
38 protected:
39 void preLaunch(uint32_t slot, const Allocation * ain,
40 Allocation * aout, const void * usr,
41 uint32_t usrLen, const RsScriptCall *sc);
42 void postLaunch(uint32_t slot, const Allocation * ain,
43 Allocation * aout, const void * usr,
44 uint32_t usrLen, const RsScriptCall *sc);
45
46
47 float mDot[4];
48 int mDotI[4];
49 int *mSums;
50 ObjectBaseRef<Allocation> mAllocOut;
51
52 static void kernelP1U4(const RsForEachStubParamStruct *p,
53 uint32_t xstart, uint32_t xend,
54 uint32_t instep, uint32_t outstep);
55 static void kernelP1U3(const RsForEachStubParamStruct *p,
56 uint32_t xstart, uint32_t xend,
57 uint32_t instep, uint32_t outstep);
58 static void kernelP1U2(const RsForEachStubParamStruct *p,
59 uint32_t xstart, uint32_t xend,
60 uint32_t instep, uint32_t outstep);
61 static void kernelP1U1(const RsForEachStubParamStruct *p,
62 uint32_t xstart, uint32_t xend,
63 uint32_t instep, uint32_t outstep);
64
65 static void kernelP1L4(const RsForEachStubParamStruct *p,
66 uint32_t xstart, uint32_t xend,
67 uint32_t instep, uint32_t outstep);
68 static void kernelP1L3(const RsForEachStubParamStruct *p,
69 uint32_t xstart, uint32_t xend,
70 uint32_t instep, uint32_t outstep);
71 static void kernelP1L2(const RsForEachStubParamStruct *p,
72 uint32_t xstart, uint32_t xend,
73 uint32_t instep, uint32_t outstep);
74 static void kernelP1L1(const RsForEachStubParamStruct *p,
75 uint32_t xstart, uint32_t xend,
76 uint32_t instep, uint32_t outstep);
77
78 };
79
80 }
81 }
82
setGlobalObj(uint32_t slot,ObjectBase * data)83 void RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) {
84 rsAssert(slot == 1);
85 mAllocOut.set(static_cast<Allocation *>(data));
86 }
87
setGlobalVar(uint32_t slot,const void * data,size_t dataLength)88 void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
89 rsAssert(slot == 0);
90 rsAssert(dataLength == 16);
91 memcpy(mDot, data, 16);
92 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
93 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
94 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
95 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
96 }
97
98
99
preLaunch(uint32_t slot,const Allocation * ain,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)100 void RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation * ain,
101 Allocation * aout, const void * usr,
102 uint32_t usrLen, const RsScriptCall *sc) {
103
104 const uint32_t threads = mCtx->getThreadCount();
105 uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
106
107 switch (slot) {
108 case 0:
109 switch(vSize) {
110 case 1:
111 mRootPtr = &kernelP1U1;
112 break;
113 case 2:
114 mRootPtr = &kernelP1U2;
115 break;
116 case 3:
117 mRootPtr = &kernelP1U3;
118 vSize = 4;
119 break;
120 case 4:
121 mRootPtr = &kernelP1U4;
122 break;
123 }
124 break;
125 case 1:
126 switch(ain->getType()->getElement()->getVectorSize()) {
127 case 1:
128 mRootPtr = &kernelP1L1;
129 break;
130 case 2:
131 mRootPtr = &kernelP1L2;
132 break;
133 case 3:
134 mRootPtr = &kernelP1L3;
135 break;
136 case 4:
137 mRootPtr = &kernelP1L4;
138 break;
139 }
140 break;
141 }
142 memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize);
143 }
144
postLaunch(uint32_t slot,const Allocation * ain,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)145 void RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, const Allocation * ain,
146 Allocation * aout, const void * usr,
147 uint32_t usrLen, const RsScriptCall *sc) {
148
149 unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr;
150 uint32_t threads = mCtx->getThreadCount();
151 uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
152
153 if (vSize == 3) vSize = 4;
154
155 for (uint32_t ct=0; ct < (256 * vSize); ct++) {
156 o[ct] = mSums[ct];
157 for (uint32_t t=1; t < threads; t++) {
158 o[ct] += mSums[ct + (256 * vSize * t)];
159 }
160 }
161 }
162
kernelP1U4(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)163 void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsForEachStubParamStruct *p,
164 uint32_t xstart, uint32_t xend,
165 uint32_t instep, uint32_t outstep) {
166
167 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
168 uchar *in = (uchar *)p->in;
169 int * sums = &cp->mSums[256 * 4 * p->lid];
170
171 for (uint32_t x = xstart; x < xend; x++) {
172 sums[(in[0] << 2) ] ++;
173 sums[(in[1] << 2) + 1] ++;
174 sums[(in[2] << 2) + 2] ++;
175 sums[(in[3] << 2) + 3] ++;
176 in += instep;
177 }
178 }
179
kernelP1U3(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)180 void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsForEachStubParamStruct *p,
181 uint32_t xstart, uint32_t xend,
182 uint32_t instep, uint32_t outstep) {
183
184 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
185 uchar *in = (uchar *)p->in;
186 int * sums = &cp->mSums[256 * 4 * p->lid];
187
188 for (uint32_t x = xstart; x < xend; x++) {
189 sums[(in[0] << 2) ] ++;
190 sums[(in[1] << 2) + 1] ++;
191 sums[(in[2] << 2) + 2] ++;
192 in += instep;
193 }
194 }
195
kernelP1U2(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)196 void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsForEachStubParamStruct *p,
197 uint32_t xstart, uint32_t xend,
198 uint32_t instep, uint32_t outstep) {
199
200 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
201 uchar *in = (uchar *)p->in;
202 int * sums = &cp->mSums[256 * 2 * p->lid];
203
204 for (uint32_t x = xstart; x < xend; x++) {
205 sums[(in[0] << 1) ] ++;
206 sums[(in[1] << 1) + 1] ++;
207 in += instep;
208 }
209 }
210
kernelP1L4(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)211 void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsForEachStubParamStruct *p,
212 uint32_t xstart, uint32_t xend,
213 uint32_t instep, uint32_t outstep) {
214
215 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
216 uchar *in = (uchar *)p->in;
217 int * sums = &cp->mSums[256 * p->lid];
218
219 for (uint32_t x = xstart; x < xend; x++) {
220 int t = (cp->mDotI[0] * in[0]) +
221 (cp->mDotI[1] * in[1]) +
222 (cp->mDotI[2] * in[2]) +
223 (cp->mDotI[3] * in[3]);
224 sums[(t + 0x7f) >> 8] ++;
225 in += instep;
226 }
227 }
228
kernelP1L3(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)229 void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsForEachStubParamStruct *p,
230 uint32_t xstart, uint32_t xend,
231 uint32_t instep, uint32_t outstep) {
232
233 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
234 uchar *in = (uchar *)p->in;
235 int * sums = &cp->mSums[256 * p->lid];
236
237 for (uint32_t x = xstart; x < xend; x++) {
238 int t = (cp->mDotI[0] * in[0]) +
239 (cp->mDotI[1] * in[1]) +
240 (cp->mDotI[2] * in[2]);
241 sums[(t + 0x7f) >> 8] ++;
242 in += instep;
243 }
244 }
245
kernelP1L2(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)246 void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsForEachStubParamStruct *p,
247 uint32_t xstart, uint32_t xend,
248 uint32_t instep, uint32_t outstep) {
249
250 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
251 uchar *in = (uchar *)p->in;
252 int * sums = &cp->mSums[256 * p->lid];
253
254 for (uint32_t x = xstart; x < xend; x++) {
255 int t = (cp->mDotI[0] * in[0]) +
256 (cp->mDotI[1] * in[1]);
257 sums[(t + 0x7f) >> 8] ++;
258 in += instep;
259 }
260 }
261
kernelP1L1(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)262 void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsForEachStubParamStruct *p,
263 uint32_t xstart, uint32_t xend,
264 uint32_t instep, uint32_t outstep) {
265
266 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
267 uchar *in = (uchar *)p->in;
268 int * sums = &cp->mSums[256 * p->lid];
269
270 for (uint32_t x = xstart; x < xend; x++) {
271 int t = (cp->mDotI[0] * in[0]);
272 sums[(t + 0x7f) >> 8] ++;
273 in += instep;
274 }
275 }
276
kernelP1U1(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)277 void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsForEachStubParamStruct *p,
278 uint32_t xstart, uint32_t xend,
279 uint32_t instep, uint32_t outstep) {
280
281 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
282 uchar *in = (uchar *)p->in;
283 int * sums = &cp->mSums[256 * p->lid];
284
285 for (uint32_t x = xstart; x < xend; x++) {
286 sums[in[0]] ++;
287 in += instep;
288 }
289 }
290
291
RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)292 RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx,
293 const Script *s, const Element *e)
294 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) {
295
296 mRootPtr = NULL;
297 mSums = new int[256 * 4 * mCtx->getThreadCount()];
298 mDot[0] = 0.299f;
299 mDot[1] = 0.587f;
300 mDot[2] = 0.114f;
301 mDot[3] = 0;
302 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
303 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
304 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
305 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
306 }
307
~RsdCpuScriptIntrinsicHistogram()308 RsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() {
309 if (mSums) {
310 delete []mSums;
311 }
312 }
313
populateScript(Script * s)314 void RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) {
315 s->mHal.info.exportedVariableCount = 2;
316 }
317
invokeFreeChildren()318 void RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() {
319 }
320
321
rsdIntrinsic_Histogram(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)322 RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
323
324 return new RsdCpuScriptIntrinsicHistogram(ctx, s, e);
325 }
326
327
328