1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsScriptGroup.h"
20 #include "rsCpuScriptGroup.h"
21 
22 #include <vector>
23 
24 namespace android {
25 namespace renderscript {
26 
CpuScriptGroupImpl(RsdCpuReferenceImpl * ctx,const ScriptGroupBase * sg)27 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) {
28     mCtx = ctx;
29     mSG = (ScriptGroup*)sg;
30 }
31 
~CpuScriptGroupImpl()32 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
33 
34 }
35 
init()36 bool CpuScriptGroupImpl::init() {
37     return true;
38 }
39 
setInput(const ScriptKernelID * kid,Allocation * a)40 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
41 }
42 
setOutput(const ScriptKernelID * kid,Allocation * a)43 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
44 }
45 
46 
47 typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo,
48                                       uint32_t xstart, uint32_t xend,
49                                       uint32_t outstep);
50 
scriptGroupRoot(const RsExpandKernelDriverInfo * kinfo,uint32_t xstart,uint32_t xend,uint32_t outstep)51 void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo,
52                                          uint32_t xstart, uint32_t xend,
53                                          uint32_t outstep) {
54 
55 
56     const ScriptList *sl             = (const ScriptList *)kinfo->usr;
57     RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
58 
59     const uint32_t oldInStride = mkinfo->inStride[0];
60 
61     for (size_t ct = 0; ct < sl->count; ct++) {
62         ScriptGroupRootFunc_t func;
63         func          = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
64         mkinfo->usr   = sl->usrPtrs[ct];
65 
66         if (sl->ins[ct]) {
67             rsAssert(kinfo->inLen == 1);
68 
69             mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
70 
71             mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
72 
73             if (sl->inExts[ct]) {
74                 mkinfo->inPtr[0] =
75                   (mkinfo->inPtr[0] +
76                    sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y);
77 
78             } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
79                 mkinfo->inPtr[0] =
80                   (mkinfo->inPtr[0] +
81                    sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid);
82             }
83 
84         } else {
85             rsAssert(kinfo->inLen == 0);
86 
87             mkinfo->inPtr[0]     = nullptr;
88             mkinfo->inStride[0]  = 0;
89         }
90 
91         uint32_t ostep;
92         if (sl->outs[ct]) {
93             mkinfo->outPtr[0] =
94               (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
95 
96             ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
97 
98             if (sl->outExts[ct]) {
99                 mkinfo->outPtr[0] =
100                   mkinfo->outPtr[0] +
101                   sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y;
102 
103             } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
104                 mkinfo->outPtr[0] =
105                   mkinfo->outPtr[0] +
106                   sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid;
107             }
108         } else {
109             mkinfo->outPtr[0] = nullptr;
110             ostep             = 0;
111         }
112 
113         //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
114         func(kinfo, xstart, xend, ostep);
115     }
116     //ALOGE("script group root");
117 
118     mkinfo->inStride[0] = oldInStride;
119     mkinfo->usr         = sl;
120 }
121 
122 
123 
execute()124 void CpuScriptGroupImpl::execute() {
125     std::vector<Allocation *> ins;
126     std::vector<uint8_t> inExts;
127     std::vector<Allocation *> outs;
128     std::vector<uint8_t> outExts;
129     std::vector<const ScriptKernelID *> kernels;
130     bool fieldDep = false;
131 
132     for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
133         ScriptGroup::Node *n = mSG->mNodes[ct];
134         Script *s = n->mKernels[0]->mScript;
135         if (s->hasObjectSlots()) {
136             // Disable the ScriptGroup optimization if we have global RS
137             // objects that might interfere between kernels.
138             fieldDep = true;
139         }
140 
141         //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
142 
143         for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
144             if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
145                 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
146                 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
147             }
148         }
149 
150         for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
151             const ScriptKernelID *k = n->mKernels[ct2];
152             Allocation *ain = nullptr;
153             Allocation *aout = nullptr;
154             bool inExt = false;
155             bool outExt = false;
156 
157             for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
158                 if (n->mInputs[ct3]->mDstKernel.get() == k) {
159                     ain = n->mInputs[ct3]->mAlloc.get();
160                     break;
161                 }
162             }
163             if (ain == nullptr) {
164                 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
165                     if (mSG->mInputs[ct3]->mKernel == k) {
166                         ain = mSG->mInputs[ct3]->mAlloc.get();
167                         inExt = true;
168                         break;
169                     }
170                 }
171             }
172 
173             for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
174                 if (n->mOutputs[ct3]->mSource.get() == k) {
175                     aout = n->mOutputs[ct3]->mAlloc.get();
176                     if(n->mOutputs[ct3]->mDstField.get() != nullptr) {
177                         fieldDep = true;
178                     }
179                     break;
180                 }
181             }
182             if (aout == nullptr) {
183                 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
184                     if (mSG->mOutputs[ct3]->mKernel == k) {
185                         aout = mSG->mOutputs[ct3]->mAlloc.get();
186                         outExt = true;
187                         break;
188                     }
189                 }
190             }
191 
192             rsAssert((k->mHasKernelOutput == (aout != nullptr)) &&
193                      (k->mHasKernelInput == (ain != nullptr)));
194 
195             ins.push_back(ain);
196             inExts.push_back(inExt);
197             outs.push_back(aout);
198             outExts.push_back(outExt);
199             kernels.push_back(k);
200         }
201 
202     }
203 
204     MTLaunchStructForEach mtls;
205 
206     if (fieldDep) {
207         for (size_t ct=0; ct < ins.size(); ct++) {
208             Script *s = kernels[ct]->mScript;
209             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
210             uint32_t slot = kernels[ct]->mSlot;
211 
212             uint32_t inLen;
213             const Allocation **ains;
214 
215             if (ins[ct] == nullptr) {
216                 inLen = 0;
217                 ains  = nullptr;
218 
219             } else {
220                 inLen = 1;
221                 ains  = const_cast<const Allocation**>(&ins[ct]);
222             }
223 
224             bool launchOK = si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls);
225 
226             si->forEachKernelSetup(slot, &mtls);
227             si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
228                           mtls.fep.usrLen, nullptr);
229 
230             if (launchOK) {
231                 mCtx->launchForEach(ains, inLen, outs[ct], nullptr, &mtls);
232             }
233 
234             si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr);
235         }
236     } else {
237         ScriptList sl;
238         sl.ins = ins.data();
239         sl.outs = outs.data();
240         sl.kernels = kernels.data();
241         sl.count = kernels.size();
242 
243         uint32_t inLen;
244         const Allocation **ains;
245 
246         if (ins[0] == nullptr) {
247             inLen = 0;
248             ains  = nullptr;
249 
250         } else {
251             inLen = 1;
252             ains  = const_cast<const Allocation**>(&ins[0]);
253         }
254 
255         std::vector<const void *> usrPtrs;
256         std::vector<const void *> fnPtrs;
257         std::vector<uint32_t> sigs;
258         for (size_t ct=0; ct < kernels.size(); ct++) {
259             Script *s = kernels[ct]->mScript;
260             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
261 
262             si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
263             fnPtrs.push_back((void *)mtls.kernel);
264             usrPtrs.push_back(mtls.fep.usr);
265             sigs.push_back(mtls.fep.usrLen);
266             si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
267                           mtls.fep.usr, mtls.fep.usrLen, nullptr);
268         }
269         sl.sigs = sigs.data();
270         sl.usrPtrs = usrPtrs.data();
271         sl.fnPtrs = fnPtrs.data();
272         sl.inExts = inExts.data();
273         sl.outExts = outExts.data();
274 
275         Script *s = kernels[0]->mScript;
276         RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
277 
278         if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) {
279 
280             mtls.script = nullptr;
281             mtls.kernel = &scriptGroupRoot;
282             mtls.fep.usr = &sl;
283 
284             mCtx->launchForEach(ains, inLen, outs[0], nullptr, &mtls);
285         }
286 
287         for (size_t ct=0; ct < kernels.size(); ct++) {
288             Script *s = kernels[ct]->mScript;
289             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
290             si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0,
291                            nullptr);
292         }
293     }
294 }
295 
296 } // namespace renderscript
297 } // namespace android
298