1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef RSD_CPU_CORE_H
18 #define RSD_CPU_CORE_H
19 
20 #include "rsd_cpu.h"
21 #include "rsSignal.h"
22 #include "rsContext.h"
23 #include "rsCppUtils.h"
24 #include "rsElement.h"
25 #include "rsScriptC.h"
26 #include "rsCpuCoreRuntime.h"
27 
28 #include <string>
29 
30 namespace android {
31 namespace renderscript {
32 
33 // Whether the CPU we're running on supports SIMD instructions
34 extern bool gArchUseSIMD;
35 
36 // Function types found in RenderScript code
37 typedef void (*ReduceAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum);
38 typedef void (*ReduceCombinerFunc_t)(uint8_t *accum, const uint8_t *other);
39 typedef void (*ReduceInitializerFunc_t)(uint8_t *accum);
40 typedef void (*ReduceOutConverterFunc_t)(uint8_t *out, const uint8_t *accum);
41 typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride);
42 typedef void (*InvokeFunc_t)(void *params);
43 typedef void (*InitOrDtorFunc_t)(void);
44 typedef int  (*RootFunc_t)(void);
45 
46 struct ReduceDescription {
47     ReduceAccumulatorFunc_t  accumFunc;  // expanded accumulator function
48     ReduceInitializerFunc_t  initFunc;   // user initializer function
49     ReduceCombinerFunc_t     combFunc;   // user combiner function
50     ReduceOutConverterFunc_t outFunc;    // user outconverter function
51     size_t                   accumSize;  // accumulator datum size, in bytes
52 };
53 
54 // Internal driver callback used to execute a kernel
55 typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
56 
57 class RsdCpuScriptImpl;
58 class RsdCpuReferenceImpl;
59 
60 struct ScriptTLSStruct {
61     android::renderscript::Context * mContext;
62     const android::renderscript::Script * mScript;
63     RsdCpuScriptImpl *mImpl;
64 };
65 
66 // MTLaunchStruct passes information about a multithreaded kernel launch.
67 struct MTLaunchStructCommon {
68     RsdCpuReferenceImpl *rs;
69     RsdCpuScriptImpl *script;
70 
71     uint32_t mSliceSize;
72     volatile int mSliceNum;
73     bool isThreadable;
74 
75     // Boundary information about the launch
76     RsLaunchDimensions start;
77     RsLaunchDimensions end;
78     // Points to MTLaunchStructForEach::fep::dim or
79     // MTLaunchStructReduce::redp::dim.
80     RsLaunchDimensions *dimPtr;
81 };
82 
83 struct MTLaunchStructForEach : public MTLaunchStructCommon {
84     // Driver info structure
85     RsExpandKernelDriverInfo fep;
86 
87     ForEachFunc_t kernel;
88     const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
89     Allocation *aout[RS_KERNEL_INPUT_LIMIT];
90 };
91 
92 struct MTLaunchStructReduce : public MTLaunchStructCommon {
93     // Driver info structure
94     RsExpandKernelDriverInfo redp;
95 
96     const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
97 
98     ReduceAccumulatorFunc_t accumFunc;
99     ReduceInitializerFunc_t initFunc;
100     ReduceCombinerFunc_t combFunc;
101     ReduceOutConverterFunc_t outFunc;
102 
103     size_t accumSize;  // accumulator datum size in bytes
104 
105     size_t accumStride;  // stride between accumulators in accumAlloc (below)
106 
107     // These fields are used for managing accumulator data items in a
108     // multithreaded execution.
109     //
110     // Let the number of threads be N.
111     // Let Outc be true iff there is an outconverter.
112     //
113     // accumAlloc is a pointer to a single allocation of (N - !Outc)
114     // accumulators.  (If there is no outconverter, then the output
115     // allocation acts as an accumulator.)  It is created at kernel
116     // launch time.  Within that allocation, the distance between the
117     // start of adjacent accumulators is accumStride bytes -- this
118     // might be the same as accumSize, or it might be larger, if we
119     // are attempting to avoid false sharing.
120     //
121     // accumCount is an atomic counter of how many accumulators have
122     // been grabbed by threads.  It is initialized to zero at kernel
123     // launch time.  See accumPtr for further description.
124     //
125     // accumPtr is pointer to an array of N pointers to accumulators.
126     // The array is created at kernel launch time, and each element is
127     // initialized to nullptr.  When a particular thread goes to work,
128     // that thread obtains its accumulator from its entry in this
129     // array.  If the entry is nullptr, that thread needs to obtain an
130     // accumulator, and initialize its entry in the array accordingly.
131     // It does so via atomic access (fetch-and-add) to accumCount.
132     // - If Outc, then the fetched value is used as an index into
133     //   accumAlloc.
134     // - If !Outc, then
135     //   - If the fetched value is zero, then this thread gets the
136     //     output allocation for its accumulator.
137     //   - If the fetched value is nonzero, then (fetched value - 1)
138     //     is used as an index into accumAlloc.
139     uint8_t *accumAlloc;
140     uint8_t **accumPtr;
141     uint32_t accumCount;
142 
143     // Logging control
144     uint32_t logReduce;
145 };
146 
147 class RsdCpuReferenceImpl : public RsdCpuReference {
148 public:
149     ~RsdCpuReferenceImpl() override;
150     explicit RsdCpuReferenceImpl(Context *);
151 
152     void lockMutex();
153     void unlockMutex();
154 
155     bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t);
156     void setPriority(int32_t priority) override;
157     virtual void launchThreads(WorkerCallback_t cbk, void *data);
158     static void * helperThreadProc(void *vrsc);
159     RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc);
160 
getContext()161     Context * getContext() {return mRSC;}
getThreadCount()162     uint32_t getThreadCount() const {
163         return mWorkers.mCount + 1;
164     }
165 
166     // Launch foreach kernel
167     void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout,
168                        const RsScriptCall *sc, MTLaunchStructForEach *mtls);
169 
170     // Launch a general reduce kernel
171     void launchReduce(const Allocation ** ains, uint32_t inLen, Allocation *aout,
172                       MTLaunchStructReduce *mtls);
173 
174     CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir,
175                              uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override;
176     CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) override;
177     void* createScriptGroup(const ScriptGroupBase *sg) override;
178 
179     const RsdCpuReference::CpuSymbol *symLookup(const char *);
180 
lookupScript(const Script * s)181     RsdCpuReference::CpuScript *lookupScript(const Script *s) {
182         return mScriptLookupFn(mRSC, s);
183     }
184 
setSelectRTCallback(RSSelectRTCallback pSelectRTCallback)185     void setSelectRTCallback(RSSelectRTCallback pSelectRTCallback) {
186         mSelectRTCallback = pSelectRTCallback;
187     }
getSelectRTCallback()188     RSSelectRTCallback getSelectRTCallback() {
189         return mSelectRTCallback;
190     }
191 
setBccPluginName(const char * name)192     virtual void setBccPluginName(const char *name) {
193         mBccPluginName.assign(name);
194     }
getBccPluginName()195     virtual const char *getBccPluginName() const {
196         return mBccPluginName.c_str();
197     }
getInKernel()198     bool getInKernel() override { return mInKernel; }
199 
200     // Set to true if we should embed global variable information in the code.
setEmbedGlobalInfo(bool v)201     void setEmbedGlobalInfo(bool v) override {
202         mEmbedGlobalInfo = v;
203     }
204 
205     // Returns true if we should embed global variable information in the code.
getEmbedGlobalInfo()206     bool getEmbedGlobalInfo() const override {
207         return mEmbedGlobalInfo;
208     }
209 
210     // Set to true if we should skip constant (immutable) global variables when
211     // potentially embedding information about globals.
setEmbedGlobalInfoSkipConstant(bool v)212     void setEmbedGlobalInfoSkipConstant(bool v) override {
213         mEmbedGlobalInfoSkipConstant = v;
214     }
215 
216     // Returns true if we should skip constant (immutable) global variables when
217     // potentially embedding information about globals.
getEmbedGlobalInfoSkipConstant()218     bool getEmbedGlobalInfoSkipConstant() const override {
219         return mEmbedGlobalInfoSkipConstant;
220     }
221 
222 protected:
223     Context *mRSC;
224     uint32_t version_major;
225     uint32_t version_minor;
226     //bool mHasGraphics;
227     bool mInKernel;  // Is a parallel kernel execution underway?
228 
229     struct Workers {
230         volatile int mRunningCount;
231         volatile int mLaunchCount;
232         uint32_t mCount;
233         pthread_t *mThreadId;
234         pid_t *mNativeThreadId;
235         Signal mCompleteSignal;
236         Signal *mLaunchSignals;
237         WorkerCallback_t mLaunchCallback;
238         void *mLaunchData;
239     };
240     Workers mWorkers;
241     bool mExit;
242     sym_lookup_t mSymLookupFn;
243     script_lookup_t mScriptLookupFn;
244 
245     ScriptTLSStruct mTlsStruct;
246 
247     RSSelectRTCallback mSelectRTCallback;
248     std::string mBccPluginName;
249 
250     // Specifies whether we should embed global variable information in the
251     // code via special RS variables that can be examined later by the driver.
252     // Defaults to true.
253     bool mEmbedGlobalInfo;
254 
255     // Specifies whether we should skip constant (immutable) global variables
256     // when potentially embedding information about globals.
257     // Defaults to true.
258     bool mEmbedGlobalInfoSkipConstant;
259 
260     long mPageSize;
261 
262     // Launch a general reduce kernel
263     void launchReduceSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout,
264                             MTLaunchStructReduce *mtls);
265     void launchReduceParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout,
266                               MTLaunchStructReduce *mtls);
267 };
268 
269 
270 } // namespace renderscript
271 } // namespace android
272 
273 #endif
274