1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsScriptGroup.h"
20 #include "rsCpuScriptGroup.h"
21 
22 using namespace android;
23 using namespace android::renderscript;
24 
CpuScriptGroupImpl(RsdCpuReferenceImpl * ctx,const ScriptGroupBase * sg)25 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) {
26     mCtx = ctx;
27     mSG = (ScriptGroup*)sg;
28 }
29 
~CpuScriptGroupImpl()30 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
31 
32 }
33 
init()34 bool CpuScriptGroupImpl::init() {
35     return true;
36 }
37 
setInput(const ScriptKernelID * kid,Allocation * a)38 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
39 }
40 
setOutput(const ScriptKernelID * kid,Allocation * a)41 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
42 }
43 
44 
45 typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo,
46                                       uint32_t xstart, uint32_t xend,
47                                       uint32_t outstep);
48 
scriptGroupRoot(const RsExpandKernelDriverInfo * kinfo,uint32_t xstart,uint32_t xend,uint32_t outstep)49 void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo,
50                                          uint32_t xstart, uint32_t xend,
51                                          uint32_t outstep) {
52 
53 
54     const ScriptList *sl             = (const ScriptList *)kinfo->usr;
55     RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
56 
57     const uint32_t oldInStride = mkinfo->inStride[0];
58 
59     for (size_t ct = 0; ct < sl->count; ct++) {
60         ScriptGroupRootFunc_t func;
61         func          = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
62         mkinfo->usr   = sl->usrPtrs[ct];
63 
64         if (sl->ins[ct]) {
65             rsAssert(kinfo->inLen == 1);
66 
67             mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
68 
69             mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
70 
71             if (sl->inExts[ct]) {
72                 mkinfo->inPtr[0] =
73                   (mkinfo->inPtr[0] +
74                    sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y);
75 
76             } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
77                 mkinfo->inPtr[0] =
78                   (mkinfo->inPtr[0] +
79                    sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid);
80             }
81 
82         } else {
83             rsAssert(kinfo->inLen == 0);
84 
85             mkinfo->inPtr[0]     = nullptr;
86             mkinfo->inStride[0]  = 0;
87         }
88 
89         uint32_t ostep;
90         if (sl->outs[ct]) {
91             rsAssert(kinfo->outLen == 1);
92 
93             mkinfo->outPtr[0] =
94               (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
95 
96             ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
97 
98             if (sl->outExts[ct]) {
99                 mkinfo->outPtr[0] =
100                   mkinfo->outPtr[0] +
101                   sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y;
102 
103             } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
104                 mkinfo->outPtr[0] =
105                   mkinfo->outPtr[0] +
106                   sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid;
107             }
108         } else {
109             rsAssert(kinfo->outLen == 0);
110 
111             mkinfo->outPtr[0] = nullptr;
112             ostep             = 0;
113         }
114 
115         //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
116         func(kinfo, xstart, xend, ostep);
117     }
118     //ALOGE("script group root");
119 
120     mkinfo->inStride[0] = oldInStride;
121     mkinfo->usr         = sl;
122 }
123 
124 
125 
execute()126 void CpuScriptGroupImpl::execute() {
127     Vector<Allocation *> ins;
128     Vector<bool> inExts;
129     Vector<Allocation *> outs;
130     Vector<bool> outExts;
131     Vector<const ScriptKernelID *> kernels;
132     bool fieldDep = false;
133 
134     for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
135         ScriptGroup::Node *n = mSG->mNodes[ct];
136         Script *s = n->mKernels[0]->mScript;
137         if (s->hasObjectSlots()) {
138             // Disable the ScriptGroup optimization if we have global RS
139             // objects that might interfere between kernels.
140             fieldDep = true;
141         }
142 
143         //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
144 
145         for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
146             if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
147                 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
148                 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
149             }
150         }
151 
152         for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
153             const ScriptKernelID *k = n->mKernels[ct2];
154             Allocation *ain = nullptr;
155             Allocation *aout = nullptr;
156             bool inExt = false;
157             bool outExt = false;
158 
159             for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
160                 if (n->mInputs[ct3]->mDstKernel.get() == k) {
161                     ain = n->mInputs[ct3]->mAlloc.get();
162                     break;
163                 }
164             }
165             if (ain == nullptr) {
166                 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
167                     if (mSG->mInputs[ct3]->mKernel == k) {
168                         ain = mSG->mInputs[ct3]->mAlloc.get();
169                         inExt = true;
170                         break;
171                     }
172                 }
173             }
174 
175             for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
176                 if (n->mOutputs[ct3]->mSource.get() == k) {
177                     aout = n->mOutputs[ct3]->mAlloc.get();
178                     if(n->mOutputs[ct3]->mDstField.get() != nullptr) {
179                         fieldDep = true;
180                     }
181                     break;
182                 }
183             }
184             if (aout == nullptr) {
185                 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
186                     if (mSG->mOutputs[ct3]->mKernel == k) {
187                         aout = mSG->mOutputs[ct3]->mAlloc.get();
188                         outExt = true;
189                         break;
190                     }
191                 }
192             }
193 
194             rsAssert((k->mHasKernelOutput == (aout != nullptr)) &&
195                      (k->mHasKernelInput == (ain != nullptr)));
196 
197             ins.add(ain);
198             inExts.add(inExt);
199             outs.add(aout);
200             outExts.add(outExt);
201             kernels.add(k);
202         }
203 
204     }
205 
206     MTLaunchStruct mtls;
207 
208     if (fieldDep) {
209         for (size_t ct=0; ct < ins.size(); ct++) {
210             Script *s = kernels[ct]->mScript;
211             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
212             uint32_t slot = kernels[ct]->mSlot;
213 
214             uint32_t inLen;
215             const Allocation **ains;
216 
217             if (ins[ct] == nullptr) {
218                 inLen = 0;
219                 ains  = nullptr;
220 
221             } else {
222                 inLen = 1;
223                 ains  = const_cast<const Allocation**>(&ins[ct]);
224             }
225 
226             bool launchOK = si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls);
227 
228             si->forEachKernelSetup(slot, &mtls);
229             si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
230                           mtls.fep.usrLen, nullptr);
231 
232             if (launchOK) {
233                 mCtx->launchThreads(ains, inLen, outs[ct], nullptr, &mtls);
234             }
235 
236             si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr);
237         }
238     } else {
239         ScriptList sl;
240         sl.ins = ins.array();
241         sl.outs = outs.array();
242         sl.kernels = kernels.array();
243         sl.count = kernels.size();
244 
245         uint32_t inLen;
246         const Allocation **ains;
247 
248         if (ins[0] == nullptr) {
249             inLen = 0;
250             ains  = nullptr;
251 
252         } else {
253             inLen = 1;
254             ains  = const_cast<const Allocation**>(&ins[0]);
255         }
256 
257         Vector<const void *> usrPtrs;
258         Vector<const void *> fnPtrs;
259         Vector<uint32_t> sigs;
260         for (size_t ct=0; ct < kernels.size(); ct++) {
261             Script *s = kernels[ct]->mScript;
262             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
263 
264             si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
265             fnPtrs.add((void *)mtls.kernel);
266             usrPtrs.add(mtls.fep.usr);
267             sigs.add(mtls.fep.usrLen);
268             si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
269                           mtls.fep.usr, mtls.fep.usrLen, nullptr);
270         }
271         sl.sigs = sigs.array();
272         sl.usrPtrs = usrPtrs.array();
273         sl.fnPtrs = fnPtrs.array();
274         sl.inExts = inExts.array();
275         sl.outExts = outExts.array();
276 
277         Script *s = kernels[0]->mScript;
278         RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
279 
280         if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) {
281 
282             mtls.script = nullptr;
283             mtls.kernel = (void (*)())&scriptGroupRoot;
284             mtls.fep.usr = &sl;
285 
286             mCtx->launchThreads(ains, inLen, outs[0], nullptr, &mtls);
287         }
288 
289         for (size_t ct=0; ct < kernels.size(); ct++) {
290             Script *s = kernels[ct]->mScript;
291             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
292             si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0,
293                            nullptr);
294         }
295     }
296 }
297