1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsScriptGroup.h"
20 #include "rsCpuScriptGroup.h"
21
22 using namespace android;
23 using namespace android::renderscript;
24
CpuScriptGroupImpl(RsdCpuReferenceImpl * ctx,const ScriptGroupBase * sg)25 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) {
26 mCtx = ctx;
27 mSG = (ScriptGroup*)sg;
28 }
29
~CpuScriptGroupImpl()30 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
31
32 }
33
init()34 bool CpuScriptGroupImpl::init() {
35 return true;
36 }
37
setInput(const ScriptKernelID * kid,Allocation * a)38 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
39 }
40
setOutput(const ScriptKernelID * kid,Allocation * a)41 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
42 }
43
44
45 typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo,
46 uint32_t xstart, uint32_t xend,
47 uint32_t outstep);
48
scriptGroupRoot(const RsExpandKernelDriverInfo * kinfo,uint32_t xstart,uint32_t xend,uint32_t outstep)49 void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo,
50 uint32_t xstart, uint32_t xend,
51 uint32_t outstep) {
52
53
54 const ScriptList *sl = (const ScriptList *)kinfo->usr;
55 RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
56
57 const uint32_t oldInStride = mkinfo->inStride[0];
58
59 for (size_t ct = 0; ct < sl->count; ct++) {
60 ScriptGroupRootFunc_t func;
61 func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
62 mkinfo->usr = sl->usrPtrs[ct];
63
64 if (sl->ins[ct]) {
65 rsAssert(kinfo->inLen == 1);
66
67 mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
68
69 mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
70
71 if (sl->inExts[ct]) {
72 mkinfo->inPtr[0] =
73 (mkinfo->inPtr[0] +
74 sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y);
75
76 } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
77 mkinfo->inPtr[0] =
78 (mkinfo->inPtr[0] +
79 sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid);
80 }
81
82 } else {
83 rsAssert(kinfo->inLen == 0);
84
85 mkinfo->inPtr[0] = nullptr;
86 mkinfo->inStride[0] = 0;
87 }
88
89 uint32_t ostep;
90 if (sl->outs[ct]) {
91 rsAssert(kinfo->outLen == 1);
92
93 mkinfo->outPtr[0] =
94 (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
95
96 ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
97
98 if (sl->outExts[ct]) {
99 mkinfo->outPtr[0] =
100 mkinfo->outPtr[0] +
101 sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y;
102
103 } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
104 mkinfo->outPtr[0] =
105 mkinfo->outPtr[0] +
106 sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid;
107 }
108 } else {
109 rsAssert(kinfo->outLen == 0);
110
111 mkinfo->outPtr[0] = nullptr;
112 ostep = 0;
113 }
114
115 //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
116 func(kinfo, xstart, xend, ostep);
117 }
118 //ALOGE("script group root");
119
120 mkinfo->inStride[0] = oldInStride;
121 mkinfo->usr = sl;
122 }
123
124
125
execute()126 void CpuScriptGroupImpl::execute() {
127 Vector<Allocation *> ins;
128 Vector<bool> inExts;
129 Vector<Allocation *> outs;
130 Vector<bool> outExts;
131 Vector<const ScriptKernelID *> kernels;
132 bool fieldDep = false;
133
134 for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
135 ScriptGroup::Node *n = mSG->mNodes[ct];
136 Script *s = n->mKernels[0]->mScript;
137 if (s->hasObjectSlots()) {
138 // Disable the ScriptGroup optimization if we have global RS
139 // objects that might interfere between kernels.
140 fieldDep = true;
141 }
142
143 //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
144
145 for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
146 if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
147 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
148 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
149 }
150 }
151
152 for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
153 const ScriptKernelID *k = n->mKernels[ct2];
154 Allocation *ain = nullptr;
155 Allocation *aout = nullptr;
156 bool inExt = false;
157 bool outExt = false;
158
159 for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
160 if (n->mInputs[ct3]->mDstKernel.get() == k) {
161 ain = n->mInputs[ct3]->mAlloc.get();
162 break;
163 }
164 }
165 if (ain == nullptr) {
166 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
167 if (mSG->mInputs[ct3]->mKernel == k) {
168 ain = mSG->mInputs[ct3]->mAlloc.get();
169 inExt = true;
170 break;
171 }
172 }
173 }
174
175 for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
176 if (n->mOutputs[ct3]->mSource.get() == k) {
177 aout = n->mOutputs[ct3]->mAlloc.get();
178 if(n->mOutputs[ct3]->mDstField.get() != nullptr) {
179 fieldDep = true;
180 }
181 break;
182 }
183 }
184 if (aout == nullptr) {
185 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
186 if (mSG->mOutputs[ct3]->mKernel == k) {
187 aout = mSG->mOutputs[ct3]->mAlloc.get();
188 outExt = true;
189 break;
190 }
191 }
192 }
193
194 rsAssert((k->mHasKernelOutput == (aout != nullptr)) &&
195 (k->mHasKernelInput == (ain != nullptr)));
196
197 ins.add(ain);
198 inExts.add(inExt);
199 outs.add(aout);
200 outExts.add(outExt);
201 kernels.add(k);
202 }
203
204 }
205
206 MTLaunchStruct mtls;
207
208 if (fieldDep) {
209 for (size_t ct=0; ct < ins.size(); ct++) {
210 Script *s = kernels[ct]->mScript;
211 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
212 uint32_t slot = kernels[ct]->mSlot;
213
214 uint32_t inLen;
215 const Allocation **ains;
216
217 if (ins[ct] == nullptr) {
218 inLen = 0;
219 ains = nullptr;
220
221 } else {
222 inLen = 1;
223 ains = const_cast<const Allocation**>(&ins[ct]);
224 }
225
226 bool launchOK = si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls);
227
228 si->forEachKernelSetup(slot, &mtls);
229 si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
230 mtls.fep.usrLen, nullptr);
231
232 if (launchOK) {
233 mCtx->launchThreads(ains, inLen, outs[ct], nullptr, &mtls);
234 }
235
236 si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr);
237 }
238 } else {
239 ScriptList sl;
240 sl.ins = ins.array();
241 sl.outs = outs.array();
242 sl.kernels = kernels.array();
243 sl.count = kernels.size();
244
245 uint32_t inLen;
246 const Allocation **ains;
247
248 if (ins[0] == nullptr) {
249 inLen = 0;
250 ains = nullptr;
251
252 } else {
253 inLen = 1;
254 ains = const_cast<const Allocation**>(&ins[0]);
255 }
256
257 Vector<const void *> usrPtrs;
258 Vector<const void *> fnPtrs;
259 Vector<uint32_t> sigs;
260 for (size_t ct=0; ct < kernels.size(); ct++) {
261 Script *s = kernels[ct]->mScript;
262 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
263
264 si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
265 fnPtrs.add((void *)mtls.kernel);
266 usrPtrs.add(mtls.fep.usr);
267 sigs.add(mtls.fep.usrLen);
268 si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
269 mtls.fep.usr, mtls.fep.usrLen, nullptr);
270 }
271 sl.sigs = sigs.array();
272 sl.usrPtrs = usrPtrs.array();
273 sl.fnPtrs = fnPtrs.array();
274 sl.inExts = inExts.array();
275 sl.outExts = outExts.array();
276
277 Script *s = kernels[0]->mScript;
278 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
279
280 if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) {
281
282 mtls.script = nullptr;
283 mtls.kernel = (void (*)())&scriptGroupRoot;
284 mtls.fep.usr = &sl;
285
286 mCtx->launchThreads(ains, inLen, outs[0], nullptr, &mtls);
287 }
288
289 for (size_t ct=0; ct < kernels.size(); ct++) {
290 Script *s = kernels[ct]->mScript;
291 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
292 si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0,
293 nullptr);
294 }
295 }
296 }
297