1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsCpuScriptGroup.h"
20 #include "rsCpuScriptGroup2.h"
21
22 #include <malloc.h>
23 #include "rsContext.h"
24
25 #include <sys/types.h>
26 #include <sys/resource.h>
27 #include <sched.h>
28 #include <sys/syscall.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
34 #include <cutils/properties.h>
35 #include "utils/StopWatch.h"
36 #endif
37
38 #ifdef RS_SERVER
39 // Android exposes gettid(), standard Linux does not
gettid()40 static pid_t gettid() {
41 return syscall(SYS_gettid);
42 }
43 #endif
44
45 using namespace android;
46 using namespace android::renderscript;
47
48 typedef void (*outer_foreach_t)(
49 const RsExpandKernelDriverInfo *,
50 uint32_t x1, uint32_t x2, uint32_t outstep);
51
52
53 static pthread_key_t gThreadTLSKey = 0;
54 static uint32_t gThreadTLSKeyCount = 0;
55 static pthread_mutex_t gInitMutex = PTHREAD_MUTEX_INITIALIZER;
56
57 bool android::renderscript::gArchUseSIMD = false;
58
~RsdCpuReference()59 RsdCpuReference::~RsdCpuReference() {
60 }
61
create(Context * rsc,uint32_t version_major,uint32_t version_minor,sym_lookup_t lfn,script_lookup_t slfn,bcc::RSLinkRuntimeCallback pLinkRuntimeCallback,RSSelectRTCallback pSelectRTCallback,const char * pBccPluginName)62 RsdCpuReference * RsdCpuReference::create(Context *rsc, uint32_t version_major,
63 uint32_t version_minor, sym_lookup_t lfn, script_lookup_t slfn
64 , bcc::RSLinkRuntimeCallback pLinkRuntimeCallback,
65 RSSelectRTCallback pSelectRTCallback,
66 const char *pBccPluginName
67 ) {
68
69 RsdCpuReferenceImpl *cpu = new RsdCpuReferenceImpl(rsc);
70 if (!cpu) {
71 return nullptr;
72 }
73 if (!cpu->init(version_major, version_minor, lfn, slfn)) {
74 delete cpu;
75 return nullptr;
76 }
77
78 cpu->setLinkRuntimeCallback(pLinkRuntimeCallback);
79 cpu->setSelectRTCallback(pSelectRTCallback);
80 if (pBccPluginName) {
81 cpu->setBccPluginName(pBccPluginName);
82 }
83
84 return cpu;
85 }
86
87
getTlsContext()88 Context * RsdCpuReference::getTlsContext() {
89 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
90 return tls->mContext;
91 }
92
getTlsScript()93 const Script * RsdCpuReference::getTlsScript() {
94 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
95 return tls->mScript;
96 }
97
getThreadTLSKey()98 pthread_key_t RsdCpuReference::getThreadTLSKey(){ return gThreadTLSKey; }
99
100 ////////////////////////////////////////////////////////////
101 ///
102
RsdCpuReferenceImpl(Context * rsc)103 RsdCpuReferenceImpl::RsdCpuReferenceImpl(Context *rsc) {
104 mRSC = rsc;
105
106 version_major = 0;
107 version_minor = 0;
108 mInForEach = false;
109 memset(&mWorkers, 0, sizeof(mWorkers));
110 memset(&mTlsStruct, 0, sizeof(mTlsStruct));
111 mExit = false;
112 mLinkRuntimeCallback = nullptr;
113 mSelectRTCallback = nullptr;
114 mSetupCompilerCallback = nullptr;
115 mEmbedGlobalInfo = true;
116 mEmbedGlobalInfoSkipConstant = true;
117 }
118
119
helperThreadProc(void * vrsc)120 void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) {
121 RsdCpuReferenceImpl *dc = (RsdCpuReferenceImpl *)vrsc;
122
123 uint32_t idx = __sync_fetch_and_add(&dc->mWorkers.mLaunchCount, 1);
124
125 //ALOGV("RS helperThread starting %p idx=%i", dc, idx);
126
127 dc->mWorkers.mLaunchSignals[idx].init();
128 dc->mWorkers.mNativeThreadId[idx] = gettid();
129
130 memset(&dc->mTlsStruct, 0, sizeof(dc->mTlsStruct));
131 int status = pthread_setspecific(gThreadTLSKey, &dc->mTlsStruct);
132 if (status) {
133 ALOGE("pthread_setspecific %i", status);
134 }
135
136 #if 0
137 typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
138 cpu_set_t cpuset;
139 memset(&cpuset, 0, sizeof(cpuset));
140 cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
141 int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
142 sizeof(cpuset), &cpuset);
143 ALOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
144 #endif
145
146 while (!dc->mExit) {
147 dc->mWorkers.mLaunchSignals[idx].wait();
148 if (dc->mWorkers.mLaunchCallback) {
149 // idx +1 is used because the calling thread is always worker 0.
150 dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx+1);
151 }
152 __sync_fetch_and_sub(&dc->mWorkers.mRunningCount, 1);
153 dc->mWorkers.mCompleteSignal.set();
154 }
155
156 //ALOGV("RS helperThread exited %p idx=%i", dc, idx);
157 return nullptr;
158 }
159
launchThreads(WorkerCallback_t cbk,void * data)160 void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) {
161 mWorkers.mLaunchData = data;
162 mWorkers.mLaunchCallback = cbk;
163
164 // fast path for very small launches
165 MTLaunchStruct *mtls = (MTLaunchStruct *)data;
166 if (mtls && mtls->fep.dim.y <= 1 && mtls->end.x <= mtls->start.x + mtls->mSliceSize) {
167 if (mWorkers.mLaunchCallback) {
168 mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
169 }
170 return;
171 }
172
173 mWorkers.mRunningCount = mWorkers.mCount;
174 __sync_synchronize();
175
176 for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
177 mWorkers.mLaunchSignals[ct].set();
178 }
179
180 // We use the calling thread as one of the workers so we can start without
181 // the delay of the thread wakeup.
182 if (mWorkers.mLaunchCallback) {
183 mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
184 }
185
186 while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) {
187 mWorkers.mCompleteSignal.wait();
188 }
189 }
190
191
lockMutex()192 void RsdCpuReferenceImpl::lockMutex() {
193 pthread_mutex_lock(&gInitMutex);
194 }
195
unlockMutex()196 void RsdCpuReferenceImpl::unlockMutex() {
197 pthread_mutex_unlock(&gInitMutex);
198 }
199
200 // Determine if the CPU we're running on supports SIMD instructions.
GetCpuInfo()201 static void GetCpuInfo() {
202 // Read the CPU flags from /proc/cpuinfo.
203 FILE *cpuinfo = fopen("/proc/cpuinfo", "r");
204
205 if (!cpuinfo) {
206 return;
207 }
208
209 char cpuinfostr[4096];
210 // fgets() ends with newline or EOF, need to check the whole
211 // "cpuinfo" file to make sure we can use SIMD or not.
212 while (fgets(cpuinfostr, sizeof(cpuinfostr), cpuinfo)) {
213 #if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_ARM_USE_INTRINSICS)
214 gArchUseSIMD = strstr(cpuinfostr, " neon") || strstr(cpuinfostr, " asimd");
215 #elif defined(ARCH_X86_HAVE_SSSE3)
216 gArchUseSIMD = strstr(cpuinfostr, " ssse3");
217 #endif
218 if (gArchUseSIMD) {
219 break;
220 }
221 }
222 fclose(cpuinfo);
223 }
224
init(uint32_t version_major,uint32_t version_minor,sym_lookup_t lfn,script_lookup_t slfn)225 bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor,
226 sym_lookup_t lfn, script_lookup_t slfn) {
227
228 mSymLookupFn = lfn;
229 mScriptLookupFn = slfn;
230
231 lockMutex();
232 if (!gThreadTLSKeyCount) {
233 int status = pthread_key_create(&gThreadTLSKey, nullptr);
234 if (status) {
235 ALOGE("Failed to init thread tls key.");
236 unlockMutex();
237 return false;
238 }
239 }
240 gThreadTLSKeyCount++;
241 unlockMutex();
242
243 mTlsStruct.mContext = mRSC;
244 mTlsStruct.mScript = nullptr;
245 int status = pthread_setspecific(gThreadTLSKey, &mTlsStruct);
246 if (status) {
247 ALOGE("pthread_setspecific %i", status);
248 }
249
250 GetCpuInfo();
251
252 int cpu = sysconf(_SC_NPROCESSORS_CONF);
253 if(mRSC->props.mDebugMaxThreads) {
254 cpu = mRSC->props.mDebugMaxThreads;
255 }
256 if (cpu < 2) {
257 mWorkers.mCount = 0;
258 return true;
259 }
260
261 // Subtract one from the cpu count because we also use the command thread as a worker.
262 mWorkers.mCount = (uint32_t)(cpu - 1);
263
264 ALOGV("%p Launching thread(s), CPUs %i", mRSC, mWorkers.mCount + 1);
265
266 mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t));
267 mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t));
268 mWorkers.mLaunchSignals = new Signal[mWorkers.mCount];
269 mWorkers.mLaunchCallback = nullptr;
270
271 mWorkers.mCompleteSignal.init();
272
273 mWorkers.mRunningCount = mWorkers.mCount;
274 mWorkers.mLaunchCount = 0;
275 __sync_synchronize();
276
277 pthread_attr_t threadAttr;
278 status = pthread_attr_init(&threadAttr);
279 if (status) {
280 ALOGE("Failed to init thread attribute.");
281 return false;
282 }
283
284 for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
285 status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this);
286 if (status) {
287 mWorkers.mCount = ct;
288 ALOGE("Created fewer than expected number of RS threads.");
289 break;
290 }
291 }
292 while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) {
293 usleep(100);
294 }
295
296 pthread_attr_destroy(&threadAttr);
297 return true;
298 }
299
300
setPriority(int32_t priority)301 void RsdCpuReferenceImpl::setPriority(int32_t priority) {
302 for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
303 setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], priority);
304 }
305 }
306
~RsdCpuReferenceImpl()307 RsdCpuReferenceImpl::~RsdCpuReferenceImpl() {
308 mExit = true;
309 mWorkers.mLaunchData = nullptr;
310 mWorkers.mLaunchCallback = nullptr;
311 mWorkers.mRunningCount = mWorkers.mCount;
312 __sync_synchronize();
313 for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
314 mWorkers.mLaunchSignals[ct].set();
315 }
316 void *res;
317 for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
318 pthread_join(mWorkers.mThreadId[ct], &res);
319 }
320 rsAssert(__sync_fetch_and_or(&mWorkers.mRunningCount, 0) == 0);
321 free(mWorkers.mThreadId);
322 free(mWorkers.mNativeThreadId);
323 delete[] mWorkers.mLaunchSignals;
324
325 // Global structure cleanup.
326 lockMutex();
327 --gThreadTLSKeyCount;
328 if (!gThreadTLSKeyCount) {
329 pthread_key_delete(gThreadTLSKey);
330 }
331 unlockMutex();
332
333 }
334
FepPtrSetup(const MTLaunchStruct * mtls,RsExpandKernelDriverInfo * fep,uint32_t x,uint32_t y,uint32_t z=0,uint32_t lod=0,RsAllocationCubemapFace face=RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X,uint32_t a1=0,uint32_t a2=0,uint32_t a3=0,uint32_t a4=0)335 static inline void FepPtrSetup(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo *fep,
336 uint32_t x, uint32_t y,
337 uint32_t z = 0, uint32_t lod = 0,
338 RsAllocationCubemapFace face = RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X,
339 uint32_t a1 = 0, uint32_t a2 = 0, uint32_t a3 = 0, uint32_t a4 = 0) {
340
341 for (uint32_t i = 0; i < fep->inLen; i++) {
342 fep->inPtr[i] = (const uint8_t *)mtls->ains[i]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4);
343 }
344
345 if (mtls->aout[0] != nullptr) {
346 fep->outPtr[0] = (uint8_t *)mtls->aout[0]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4);
347 }
348 }
349
sliceInt(uint32_t * p,uint32_t val,uint32_t start,uint32_t end)350 static uint32_t sliceInt(uint32_t *p, uint32_t val, uint32_t start, uint32_t end) {
351 if (start >= end) {
352 *p = start;
353 return val;
354 }
355
356 uint32_t div = end - start;
357
358 uint32_t n = val / div;
359 *p = (val - (n * div)) + start;
360 return n;
361 }
362
SelectOuterSlice(const MTLaunchStruct * mtls,RsExpandKernelDriverInfo * fep,uint32_t sliceNum)363 static bool SelectOuterSlice(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo* fep, uint32_t sliceNum) {
364
365 uint32_t r = sliceNum;
366 r = sliceInt(&fep->current.z, r, mtls->start.z, mtls->end.z);
367 r = sliceInt(&fep->current.lod, r, mtls->start.lod, mtls->end.lod);
368 r = sliceInt(&fep->current.face, r, mtls->start.face, mtls->end.face);
369 r = sliceInt(&fep->current.array[0], r, mtls->start.array[0], mtls->end.array[0]);
370 r = sliceInt(&fep->current.array[1], r, mtls->start.array[1], mtls->end.array[1]);
371 r = sliceInt(&fep->current.array[2], r, mtls->start.array[2], mtls->end.array[2]);
372 r = sliceInt(&fep->current.array[3], r, mtls->start.array[3], mtls->end.array[3]);
373 return r == 0;
374 }
375
376
walk_general(void * usr,uint32_t idx)377 static void walk_general(void *usr, uint32_t idx) {
378 MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
379 RsExpandKernelDriverInfo fep = mtls->fep;
380 fep.lid = idx;
381 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
382
383
384 while(1) {
385 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
386
387 if (!SelectOuterSlice(mtls, &fep, slice)) {
388 return;
389 }
390
391 for (fep.current.y = mtls->start.y; fep.current.y < mtls->end.y;
392 fep.current.y++) {
393
394 FepPtrSetup(mtls, &fep, mtls->start.x,
395 fep.current.y, fep.current.z, fep.current.lod,
396 (RsAllocationCubemapFace)fep.current.face,
397 fep.current.array[0], fep.current.array[1],
398 fep.current.array[2], fep.current.array[3]);
399
400 fn(&fep, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
401 }
402 }
403
404 }
405
walk_2d(void * usr,uint32_t idx)406 static void walk_2d(void *usr, uint32_t idx) {
407 MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
408 RsExpandKernelDriverInfo fep = mtls->fep;
409 fep.lid = idx;
410 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
411
412 while (1) {
413 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
414 uint32_t yStart = mtls->start.y + slice * mtls->mSliceSize;
415 uint32_t yEnd = yStart + mtls->mSliceSize;
416
417 yEnd = rsMin(yEnd, mtls->end.y);
418
419 if (yEnd <= yStart) {
420 return;
421 }
422
423 for (fep.current.y = yStart; fep.current.y < yEnd; fep.current.y++) {
424 FepPtrSetup(mtls, &fep, mtls->start.x, fep.current.y);
425
426 fn(&fep, mtls->start.x, mtls->end.x, fep.outStride[0]);
427 }
428 }
429 }
430
walk_1d(void * usr,uint32_t idx)431 static void walk_1d(void *usr, uint32_t idx) {
432 MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
433 RsExpandKernelDriverInfo fep = mtls->fep;
434 fep.lid = idx;
435 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
436
437 while (1) {
438 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
439 uint32_t xStart = mtls->start.x + slice * mtls->mSliceSize;
440 uint32_t xEnd = xStart + mtls->mSliceSize;
441
442 xEnd = rsMin(xEnd, mtls->end.x);
443
444 if (xEnd <= xStart) {
445 return;
446 }
447
448 FepPtrSetup(mtls, &fep, xStart, 0);
449
450 fn(&fep, xStart, xEnd, fep.outStride[0]);
451 }
452 }
453
launchThreads(const Allocation ** ains,uint32_t inLen,Allocation * aout,const RsScriptCall * sc,MTLaunchStruct * mtls)454 void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains,
455 uint32_t inLen,
456 Allocation* aout,
457 const RsScriptCall* sc,
458 MTLaunchStruct* mtls) {
459
460 //android::StopWatch kernel_time("kernel time");
461
462 bool outerDims = (mtls->start.z != mtls->end.z) ||
463 (mtls->start.face != mtls->end.face) ||
464 (mtls->start.lod != mtls->end.lod) ||
465 (mtls->start.array[0] != mtls->end.array[0]) ||
466 (mtls->start.array[1] != mtls->end.array[1]) ||
467 (mtls->start.array[2] != mtls->end.array[2]) ||
468 (mtls->start.array[3] != mtls->end.array[3]);
469
470 if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) {
471 const size_t targetByteChunk = 16 * 1024;
472 mInForEach = true;
473
474 if (outerDims) {
475 // No fancy logic for chunk size
476 mtls->mSliceSize = 1;
477 launchThreads(walk_general, mtls);
478 } else if (mtls->fep.dim.y > 1) {
479 uint32_t s1 = mtls->fep.dim.y / ((mWorkers.mCount + 1) * 4);
480 uint32_t s2 = 0;
481
482 // This chooses our slice size to rate limit atomic ops to
483 // one per 16k bytes of reads/writes.
484 if ((mtls->aout[0] != nullptr) && mtls->aout[0]->mHal.drvState.lod[0].stride) {
485 s2 = targetByteChunk / mtls->aout[0]->mHal.drvState.lod[0].stride;
486 } else if (mtls->ains[0]) {
487 s2 = targetByteChunk / mtls->ains[0]->mHal.drvState.lod[0].stride;
488 } else {
489 // Launch option only case
490 // Use s1 based only on the dimensions
491 s2 = s1;
492 }
493 mtls->mSliceSize = rsMin(s1, s2);
494
495 if(mtls->mSliceSize < 1) {
496 mtls->mSliceSize = 1;
497 }
498
499 launchThreads(walk_2d, mtls);
500 } else {
501 uint32_t s1 = mtls->fep.dim.x / ((mWorkers.mCount + 1) * 4);
502 uint32_t s2 = 0;
503
504 // This chooses our slice size to rate limit atomic ops to
505 // one per 16k bytes of reads/writes.
506 if ((mtls->aout[0] != nullptr) && mtls->aout[0]->getType()->getElementSizeBytes()) {
507 s2 = targetByteChunk / mtls->aout[0]->getType()->getElementSizeBytes();
508 } else if (mtls->ains[0]) {
509 s2 = targetByteChunk / mtls->ains[0]->getType()->getElementSizeBytes();
510 } else {
511 // Launch option only case
512 // Use s1 based only on the dimensions
513 s2 = s1;
514 }
515 mtls->mSliceSize = rsMin(s1, s2);
516
517 if (mtls->mSliceSize < 1) {
518 mtls->mSliceSize = 1;
519 }
520
521 launchThreads(walk_1d, mtls);
522 }
523 mInForEach = false;
524
525 } else {
526 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
527 uint32_t slice = 0;
528
529
530 while(SelectOuterSlice(mtls, &mtls->fep, slice++)) {
531 for (mtls->fep.current.y = mtls->start.y;
532 mtls->fep.current.y < mtls->end.y;
533 mtls->fep.current.y++) {
534
535 FepPtrSetup(mtls, &mtls->fep, mtls->start.x,
536 mtls->fep.current.y, mtls->fep.current.z, mtls->fep.current.lod,
537 (RsAllocationCubemapFace) mtls->fep.current.face,
538 mtls->fep.current.array[0], mtls->fep.current.array[1],
539 mtls->fep.current.array[2], mtls->fep.current.array[3]);
540
541 fn(&mtls->fep, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
542 }
543 }
544 }
545 }
546
setTLS(RsdCpuScriptImpl * sc)547 RsdCpuScriptImpl * RsdCpuReferenceImpl::setTLS(RsdCpuScriptImpl *sc) {
548 //ALOGE("setTls %p", sc);
549 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
550 rsAssert(tls);
551 RsdCpuScriptImpl *old = tls->mImpl;
552 tls->mImpl = sc;
553 tls->mContext = mRSC;
554 if (sc) {
555 tls->mScript = sc->getScript();
556 } else {
557 tls->mScript = nullptr;
558 }
559 return old;
560 }
561
symLookup(const char * name)562 const RsdCpuReference::CpuSymbol * RsdCpuReferenceImpl::symLookup(const char *name) {
563 return mSymLookupFn(mRSC, name);
564 }
565
566
createScript(const ScriptC * s,char const * resName,char const * cacheDir,uint8_t const * bitcode,size_t bitcodeSize,uint32_t flags)567 RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createScript(const ScriptC *s,
568 char const *resName, char const *cacheDir,
569 uint8_t const *bitcode, size_t bitcodeSize,
570 uint32_t flags) {
571
572 RsdCpuScriptImpl *i = new RsdCpuScriptImpl(this, s);
573 if (!i->init(resName, cacheDir, bitcode, bitcodeSize, flags
574 , getBccPluginName()
575 )) {
576 delete i;
577 return nullptr;
578 }
579 return i;
580 }
581
582 extern RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx,
583 const Script *s, const Element *e);
584 extern RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx,
585 const Script *s, const Element *e);
586 extern RsdCpuScriptImpl * rsdIntrinsic_ColorMatrix(RsdCpuReferenceImpl *ctx,
587 const Script *s, const Element *e);
588 extern RsdCpuScriptImpl * rsdIntrinsic_LUT(RsdCpuReferenceImpl *ctx,
589 const Script *s, const Element *e);
590 extern RsdCpuScriptImpl * rsdIntrinsic_Convolve5x5(RsdCpuReferenceImpl *ctx,
591 const Script *s, const Element *e);
592 extern RsdCpuScriptImpl * rsdIntrinsic_Blur(RsdCpuReferenceImpl *ctx,
593 const Script *s, const Element *e);
594 extern RsdCpuScriptImpl * rsdIntrinsic_YuvToRGB(RsdCpuReferenceImpl *ctx,
595 const Script *s, const Element *e);
596 extern RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
597 const Script *s, const Element *e);
598 extern RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx,
599 const Script *s, const Element *e);
600 extern RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx,
601 const Script *s, const Element *e);
602 extern RsdCpuScriptImpl * rsdIntrinsic_BLAS(RsdCpuReferenceImpl *ctx,
603 const Script *s, const Element *e);
604
createIntrinsic(const Script * s,RsScriptIntrinsicID iid,Element * e)605 RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script *s,
606 RsScriptIntrinsicID iid, Element *e) {
607
608 RsdCpuScriptImpl *i = nullptr;
609 switch (iid) {
610 case RS_SCRIPT_INTRINSIC_ID_3DLUT:
611 i = rsdIntrinsic_3DLUT(this, s, e);
612 break;
613 case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3:
614 i = rsdIntrinsic_Convolve3x3(this, s, e);
615 break;
616 case RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX:
617 i = rsdIntrinsic_ColorMatrix(this, s, e);
618 break;
619 case RS_SCRIPT_INTRINSIC_ID_LUT:
620 i = rsdIntrinsic_LUT(this, s, e);
621 break;
622 case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5:
623 i = rsdIntrinsic_Convolve5x5(this, s, e);
624 break;
625 case RS_SCRIPT_INTRINSIC_ID_BLUR:
626 i = rsdIntrinsic_Blur(this, s, e);
627 break;
628 case RS_SCRIPT_INTRINSIC_ID_YUV_TO_RGB:
629 i = rsdIntrinsic_YuvToRGB(this, s, e);
630 break;
631 case RS_SCRIPT_INTRINSIC_ID_BLEND:
632 i = rsdIntrinsic_Blend(this, s, e);
633 break;
634 case RS_SCRIPT_INTRINSIC_ID_HISTOGRAM:
635 i = rsdIntrinsic_Histogram(this, s, e);
636 break;
637 case RS_SCRIPT_INTRINSIC_ID_RESIZE:
638 i = rsdIntrinsic_Resize(this, s, e);
639 break;
640 case RS_SCRIPT_INTRINSIC_ID_BLAS:
641 i = rsdIntrinsic_BLAS(this, s, e);
642 break;
643
644 default:
645 rsAssert(0);
646 }
647
648 return i;
649 }
650
createScriptGroup(const ScriptGroupBase * sg)651 void* RsdCpuReferenceImpl::createScriptGroup(const ScriptGroupBase *sg) {
652 switch (sg->getApiVersion()) {
653 case ScriptGroupBase::SG_V1: {
654 CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg);
655 if (!sgi->init()) {
656 delete sgi;
657 return nullptr;
658 }
659 return sgi;
660 }
661 case ScriptGroupBase::SG_V2: {
662 return new CpuScriptGroup2Impl(this, sg);
663 }
664 }
665 return nullptr;
666 }
667