1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef RSD_CPU_CORE_H 18 #define RSD_CPU_CORE_H 19 20 #include "rsd_cpu.h" 21 #include "rsSignal.h" 22 #include "rsContext.h" 23 #include "rsCppUtils.h" 24 #include "rsElement.h" 25 #include "rsScriptC.h" 26 #include "rsCpuCoreRuntime.h" 27 28 namespace android { 29 namespace renderscript { 30 31 // Whether the CPU we're running on supports SIMD instructions 32 extern bool gArchUseSIMD; 33 34 // Function types found in RenderScript code 35 typedef void (*ReduceAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum); 36 typedef void (*ReduceCombinerFunc_t)(uint8_t *accum, const uint8_t *other); 37 typedef void (*ReduceInitializerFunc_t)(uint8_t *accum); 38 typedef void (*ReduceOutConverterFunc_t)(uint8_t *out, const uint8_t *accum); 39 typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride); 40 typedef void (*InvokeFunc_t)(void *params); 41 typedef void (*InitOrDtorFunc_t)(void); 42 typedef int (*RootFunc_t)(void); 43 44 struct ReduceDescription { 45 ReduceAccumulatorFunc_t accumFunc; // expanded accumulator function 46 ReduceInitializerFunc_t initFunc; // user initializer function 47 ReduceCombinerFunc_t combFunc; // user combiner function 48 ReduceOutConverterFunc_t outFunc; // user outconverter function 49 size_t accumSize; // accumulator datum size, in bytes 50 }; 51 52 // Internal driver callback used to execute a kernel 53 typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); 54 55 class RsdCpuScriptImpl; 56 class RsdCpuReferenceImpl; 57 58 struct ScriptTLSStruct { 59 android::renderscript::Context * mContext; 60 const android::renderscript::Script * mScript; 61 RsdCpuScriptImpl *mImpl; 62 }; 63 64 // MTLaunchStruct passes information about a multithreaded kernel launch. 65 struct MTLaunchStructCommon { 66 RsdCpuReferenceImpl *rs; 67 RsdCpuScriptImpl *script; 68 69 uint32_t mSliceSize; 70 volatile int mSliceNum; 71 bool isThreadable; 72 73 // Boundary information about the launch 74 RsLaunchDimensions start; 75 RsLaunchDimensions end; 76 // Points to MTLaunchStructForEach::fep::dim or 77 // MTLaunchStructReduce::redp::dim. 78 RsLaunchDimensions *dimPtr; 79 }; 80 81 struct MTLaunchStructForEach : public MTLaunchStructCommon { 82 // Driver info structure 83 RsExpandKernelDriverInfo fep; 84 85 ForEachFunc_t kernel; 86 const Allocation *ains[RS_KERNEL_INPUT_LIMIT]; 87 Allocation *aout[RS_KERNEL_INPUT_LIMIT]; 88 }; 89 90 struct MTLaunchStructReduce : public MTLaunchStructCommon { 91 // Driver info structure 92 RsExpandKernelDriverInfo redp; 93 94 const Allocation *ains[RS_KERNEL_INPUT_LIMIT]; 95 96 ReduceAccumulatorFunc_t accumFunc; 97 ReduceInitializerFunc_t initFunc; 98 ReduceCombinerFunc_t combFunc; 99 ReduceOutConverterFunc_t outFunc; 100 101 size_t accumSize; // accumulator datum size in bytes 102 103 size_t accumStride; // stride between accumulators in accumAlloc (below) 104 105 // These fields are used for managing accumulator data items in a 106 // multithreaded execution. 107 // 108 // Let the number of threads be N. 109 // Let Outc be true iff there is an outconverter. 110 // 111 // accumAlloc is a pointer to a single allocation of (N - !Outc) 112 // accumulators. (If there is no outconverter, then the output 113 // allocation acts as an accumulator.) It is created at kernel 114 // launch time. Within that allocation, the distance between the 115 // start of adjacent accumulators is accumStride bytes -- this 116 // might be the same as accumSize, or it might be larger, if we 117 // are attempting to avoid false sharing. 118 // 119 // accumCount is an atomic counter of how many accumulators have 120 // been grabbed by threads. It is initialized to zero at kernel 121 // launch time. See accumPtr for further description. 122 // 123 // accumPtr is pointer to an array of N pointers to accumulators. 124 // The array is created at kernel launch time, and each element is 125 // initialized to nullptr. When a particular thread goes to work, 126 // that thread obtains its accumulator from its entry in this 127 // array. If the entry is nullptr, that thread needs to obtain an 128 // accumulator, and initialize its entry in the array accordingly. 129 // It does so via atomic access (fetch-and-add) to accumCount. 130 // - If Outc, then the fetched value is used as an index into 131 // accumAlloc. 132 // - If !Outc, then 133 // - If the fetched value is zero, then this thread gets the 134 // output allocation for its accumulator. 135 // - If the fetched value is nonzero, then (fetched value - 1) 136 // is used as an index into accumAlloc. 137 uint8_t *accumAlloc; 138 uint8_t **accumPtr; 139 uint32_t accumCount; 140 141 // Logging control 142 uint32_t logReduce; 143 }; 144 145 class RsdCpuReferenceImpl : public RsdCpuReference { 146 public: 147 ~RsdCpuReferenceImpl() override; 148 RsdCpuReferenceImpl(Context *); 149 150 void lockMutex(); 151 void unlockMutex(); 152 153 bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t); 154 void setPriority(int32_t priority) override; 155 virtual void launchThreads(WorkerCallback_t cbk, void *data); 156 static void * helperThreadProc(void *vrsc); 157 RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc); 158 getContext()159 Context * getContext() {return mRSC;} getThreadCount()160 uint32_t getThreadCount() const { 161 return mWorkers.mCount + 1; 162 } 163 164 // Launch foreach kernel 165 void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout, 166 const RsScriptCall *sc, MTLaunchStructForEach *mtls); 167 168 // Launch a general reduce kernel 169 void launchReduce(const Allocation ** ains, uint32_t inLen, Allocation *aout, 170 MTLaunchStructReduce *mtls); 171 172 CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir, 173 uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override; 174 CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) override; 175 void* createScriptGroup(const ScriptGroupBase *sg) override; 176 177 const RsdCpuReference::CpuSymbol *symLookup(const char *); 178 lookupScript(const Script * s)179 RsdCpuReference::CpuScript *lookupScript(const Script *s) { 180 return mScriptLookupFn(mRSC, s); 181 } 182 setSelectRTCallback(RSSelectRTCallback pSelectRTCallback)183 void setSelectRTCallback(RSSelectRTCallback pSelectRTCallback) { 184 mSelectRTCallback = pSelectRTCallback; 185 } getSelectRTCallback()186 RSSelectRTCallback getSelectRTCallback() { 187 return mSelectRTCallback; 188 } 189 setBccPluginName(const char * name)190 virtual void setBccPluginName(const char *name) { 191 mBccPluginName.setTo(name); 192 } getBccPluginName()193 virtual const char *getBccPluginName() const { 194 return mBccPluginName.string(); 195 } getInKernel()196 bool getInKernel() override { return mInKernel; } 197 198 // Set to true if we should embed global variable information in the code. setEmbedGlobalInfo(bool v)199 void setEmbedGlobalInfo(bool v) override { 200 mEmbedGlobalInfo = v; 201 } 202 203 // Returns true if we should embed global variable information in the code. getEmbedGlobalInfo()204 bool getEmbedGlobalInfo() const override { 205 return mEmbedGlobalInfo; 206 } 207 208 // Set to true if we should skip constant (immutable) global variables when 209 // potentially embedding information about globals. setEmbedGlobalInfoSkipConstant(bool v)210 void setEmbedGlobalInfoSkipConstant(bool v) override { 211 mEmbedGlobalInfoSkipConstant = v; 212 } 213 214 // Returns true if we should skip constant (immutable) global variables when 215 // potentially embedding information about globals. getEmbedGlobalInfoSkipConstant()216 bool getEmbedGlobalInfoSkipConstant() const override { 217 return mEmbedGlobalInfoSkipConstant; 218 } 219 220 protected: 221 Context *mRSC; 222 uint32_t version_major; 223 uint32_t version_minor; 224 //bool mHasGraphics; 225 bool mInKernel; // Is a parallel kernel execution underway? 226 227 struct Workers { 228 volatile int mRunningCount; 229 volatile int mLaunchCount; 230 uint32_t mCount; 231 pthread_t *mThreadId; 232 pid_t *mNativeThreadId; 233 Signal mCompleteSignal; 234 Signal *mLaunchSignals; 235 WorkerCallback_t mLaunchCallback; 236 void *mLaunchData; 237 }; 238 Workers mWorkers; 239 bool mExit; 240 sym_lookup_t mSymLookupFn; 241 script_lookup_t mScriptLookupFn; 242 243 ScriptTLSStruct mTlsStruct; 244 245 RSSelectRTCallback mSelectRTCallback; 246 String8 mBccPluginName; 247 248 // Specifies whether we should embed global variable information in the 249 // code via special RS variables that can be examined later by the driver. 250 // Defaults to true. 251 bool mEmbedGlobalInfo; 252 253 // Specifies whether we should skip constant (immutable) global variables 254 // when potentially embedding information about globals. 255 // Defaults to true. 256 bool mEmbedGlobalInfoSkipConstant; 257 258 long mPageSize; 259 260 // Launch a general reduce kernel 261 void launchReduceSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout, 262 MTLaunchStructReduce *mtls); 263 void launchReduceParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout, 264 MTLaunchStructReduce *mtls); 265 }; 266 267 268 } 269 } 270 271 #endif 272