1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsScriptGroup.h"
20 #include "rsCpuScriptGroup.h"
21 //#include "rsdBcc.h"
22 //#include "rsdAllocation.h"
23 
24 using namespace android;
25 using namespace android::renderscript;
26 
CpuScriptGroupImpl(RsdCpuReferenceImpl * ctx,const ScriptGroup * sg)27 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
28     mCtx = ctx;
29     mSG = sg;
30 }
31 
~CpuScriptGroupImpl()32 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
33 
34 }
35 
init()36 bool CpuScriptGroupImpl::init() {
37     return true;
38 }
39 
setInput(const ScriptKernelID * kid,Allocation * a)40 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
41 }
42 
setOutput(const ScriptKernelID * kid,Allocation * a)43 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
44 }
45 
46 
47 typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
48                                       uint32_t xstart, uint32_t xend,
49                                       uint32_t instep, uint32_t outstep);
50 
scriptGroupRoot(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)51 void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p,
52                                          uint32_t xstart, uint32_t xend,
53                                          uint32_t instep, uint32_t outstep) {
54 
55 
56     const ScriptList *sl = (const ScriptList *)p->usr;
57     RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
58     const void *oldUsr = p->usr;
59 
60     for(size_t ct=0; ct < sl->count; ct++) {
61         ScriptGroupRootFunc_t func;
62         func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
63         mp->usr = sl->usrPtrs[ct];
64 
65         mp->ptrIn = NULL;
66         mp->in = NULL;
67         mp->ptrOut = NULL;
68         mp->out = NULL;
69 
70         uint32_t istep = 0;
71         uint32_t ostep = 0;
72 
73         if (sl->ins[ct]) {
74             mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
75             istep = sl->ins[ct]->mHal.state.elementSizeBytes;
76             mp->in = mp->ptrIn;
77             if (sl->inExts[ct]) {
78                 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y;
79             } else {
80                 if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) {
81                     mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid;
82                 }
83             }
84         }
85 
86         if (sl->outs[ct]) {
87             mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
88             mp->out = mp->ptrOut;
89             ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
90             if (sl->outExts[ct]) {
91                 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y;
92             } else {
93                 if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) {
94                     mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid;
95                 }
96             }
97         }
98 
99         //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
100         func(p, xstart, xend, istep, ostep);
101     }
102     //ALOGE("script group root");
103 
104     //ConvolveParams *cp = (ConvolveParams *)p->usr;
105 
106     mp->usr = oldUsr;
107 }
108 
109 
110 
execute()111 void CpuScriptGroupImpl::execute() {
112     Vector<Allocation *> ins;
113     Vector<bool> inExts;
114     Vector<Allocation *> outs;
115     Vector<bool> outExts;
116     Vector<const ScriptKernelID *> kernels;
117     bool fieldDep = false;
118 
119     for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
120         ScriptGroup::Node *n = mSG->mNodes[ct];
121         Script *s = n->mKernels[0]->mScript;
122         if (s->hasObjectSlots()) {
123             // Disable the ScriptGroup optimization if we have global RS
124             // objects that might interfere between kernels.
125             fieldDep = true;
126         }
127 
128         //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
129 
130         for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
131             if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
132                 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
133                 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
134             }
135         }
136 
137         for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
138             const ScriptKernelID *k = n->mKernels[ct2];
139             Allocation *ain = NULL;
140             Allocation *aout = NULL;
141             bool inExt = false;
142             bool outExt = false;
143 
144             for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
145                 if (n->mInputs[ct3]->mDstKernel.get() == k) {
146                     ain = n->mInputs[ct3]->mAlloc.get();
147                     break;
148                 }
149             }
150             if (ain == NULL) {
151                 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
152                     if (mSG->mInputs[ct3]->mKernel == k) {
153                         ain = mSG->mInputs[ct3]->mAlloc.get();
154                         inExt = true;
155                         break;
156                     }
157                 }
158             }
159 
160             for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
161                 if (n->mOutputs[ct3]->mSource.get() == k) {
162                     aout = n->mOutputs[ct3]->mAlloc.get();
163                     if(n->mOutputs[ct3]->mDstField.get() != NULL) {
164                         fieldDep = true;
165                     }
166                     break;
167                 }
168             }
169             if (aout == NULL) {
170                 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
171                     if (mSG->mOutputs[ct3]->mKernel == k) {
172                         aout = mSG->mOutputs[ct3]->mAlloc.get();
173                         outExt = true;
174                         break;
175                     }
176                 }
177             }
178 
179             rsAssert((k->mHasKernelOutput == (aout != NULL)) &&
180                      (k->mHasKernelInput == (ain != NULL)));
181 
182             ins.add(ain);
183             inExts.add(inExt);
184             outs.add(aout);
185             outExts.add(outExt);
186             kernels.add(k);
187         }
188 
189     }
190 
191     MTLaunchStruct mtls;
192 
193     if(fieldDep) {
194         for (size_t ct=0; ct < ins.size(); ct++) {
195             Script *s = kernels[ct]->mScript;
196             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
197             uint32_t slot = kernels[ct]->mSlot;
198 
199             si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
200             si->forEachKernelSetup(slot, &mtls);
201             si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
202             mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
203             si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL);
204         }
205     } else {
206         ScriptList sl;
207         sl.ins = ins.array();
208         sl.outs = outs.array();
209         sl.kernels = kernels.array();
210         sl.count = kernels.size();
211 
212         Vector<const void *> usrPtrs;
213         Vector<const void *> fnPtrs;
214         Vector<uint32_t> sigs;
215         for (size_t ct=0; ct < kernels.size(); ct++) {
216             Script *s = kernels[ct]->mScript;
217             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
218 
219             si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
220             fnPtrs.add((void *)mtls.kernel);
221             usrPtrs.add(mtls.fep.usr);
222             sigs.add(mtls.fep.usrLen);
223             si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
224         }
225         sl.sigs = sigs.array();
226         sl.usrPtrs = usrPtrs.array();
227         sl.fnPtrs = fnPtrs.array();
228         sl.inExts = inExts.array();
229         sl.outExts = outExts.array();
230 
231         Script *s = kernels[0]->mScript;
232         RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
233         si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
234         mtls.script = NULL;
235         mtls.kernel = (void (*)())&scriptGroupRoot;
236         mtls.fep.usr = &sl;
237         mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
238 
239         for (size_t ct=0; ct < kernels.size(); ct++) {
240             Script *s = kernels[ct]->mScript;
241             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
242             si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL);
243         }
244     }
245 }
246 
247 
248