1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsCpuScriptGroup.h"
20 #include "rsCpuScriptGroup2.h"
21 
22 #include <malloc.h>
23 #include "rsContext.h"
24 
25 #include <sys/types.h>
26 #include <sys/resource.h>
27 #include <sched.h>
28 #include <sys/syscall.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <unistd.h>
32 
33 #if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
34 #include <cutils/properties.h>
35 #include "utils/StopWatch.h"
36 #endif
37 
38 #ifdef RS_SERVER
39 // Android exposes gettid(), standard Linux does not
gettid()40 static pid_t gettid() {
41     return syscall(SYS_gettid);
42 }
43 #endif
44 
45 using namespace android;
46 using namespace android::renderscript;
47 
48 typedef void (*outer_foreach_t)(
49     const RsExpandKernelDriverInfo *,
50     uint32_t x1, uint32_t x2, uint32_t outstep);
51 
52 
53 static pthread_key_t gThreadTLSKey = 0;
54 static uint32_t gThreadTLSKeyCount = 0;
55 static pthread_mutex_t gInitMutex = PTHREAD_MUTEX_INITIALIZER;
56 
57 bool android::renderscript::gArchUseSIMD = false;
58 
~RsdCpuReference()59 RsdCpuReference::~RsdCpuReference() {
60 }
61 
create(Context * rsc,uint32_t version_major,uint32_t version_minor,sym_lookup_t lfn,script_lookup_t slfn,bcc::RSLinkRuntimeCallback pLinkRuntimeCallback,RSSelectRTCallback pSelectRTCallback,const char * pBccPluginName)62 RsdCpuReference * RsdCpuReference::create(Context *rsc, uint32_t version_major,
63         uint32_t version_minor, sym_lookup_t lfn, script_lookup_t slfn
64         , bcc::RSLinkRuntimeCallback pLinkRuntimeCallback,
65         RSSelectRTCallback pSelectRTCallback,
66         const char *pBccPluginName
67         ) {
68 
69     RsdCpuReferenceImpl *cpu = new RsdCpuReferenceImpl(rsc);
70     if (!cpu) {
71         return nullptr;
72     }
73     if (!cpu->init(version_major, version_minor, lfn, slfn)) {
74         delete cpu;
75         return nullptr;
76     }
77 
78     cpu->setLinkRuntimeCallback(pLinkRuntimeCallback);
79     cpu->setSelectRTCallback(pSelectRTCallback);
80     if (pBccPluginName) {
81         cpu->setBccPluginName(pBccPluginName);
82     }
83 
84     return cpu;
85 }
86 
87 
getTlsContext()88 Context * RsdCpuReference::getTlsContext() {
89     ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
90     return tls->mContext;
91 }
92 
getTlsScript()93 const Script * RsdCpuReference::getTlsScript() {
94     ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
95     return tls->mScript;
96 }
97 
getThreadTLSKey()98 pthread_key_t RsdCpuReference::getThreadTLSKey(){ return gThreadTLSKey; }
99 
100 ////////////////////////////////////////////////////////////
101 ///
102 
RsdCpuReferenceImpl(Context * rsc)103 RsdCpuReferenceImpl::RsdCpuReferenceImpl(Context *rsc) {
104     mRSC = rsc;
105 
106     version_major = 0;
107     version_minor = 0;
108     mInForEach = false;
109     memset(&mWorkers, 0, sizeof(mWorkers));
110     memset(&mTlsStruct, 0, sizeof(mTlsStruct));
111     mExit = false;
112     mLinkRuntimeCallback = nullptr;
113     mSelectRTCallback = nullptr;
114     mSetupCompilerCallback = nullptr;
115     mEmbedGlobalInfo = true;
116     mEmbedGlobalInfoSkipConstant = true;
117 }
118 
119 
helperThreadProc(void * vrsc)120 void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) {
121     RsdCpuReferenceImpl *dc = (RsdCpuReferenceImpl *)vrsc;
122 
123     uint32_t idx = __sync_fetch_and_add(&dc->mWorkers.mLaunchCount, 1);
124 
125     //ALOGV("RS helperThread starting %p idx=%i", dc, idx);
126 
127     dc->mWorkers.mLaunchSignals[idx].init();
128     dc->mWorkers.mNativeThreadId[idx] = gettid();
129 
130     memset(&dc->mTlsStruct, 0, sizeof(dc->mTlsStruct));
131     int status = pthread_setspecific(gThreadTLSKey, &dc->mTlsStruct);
132     if (status) {
133         ALOGE("pthread_setspecific %i", status);
134     }
135 
136 #if 0
137     typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
138     cpu_set_t cpuset;
139     memset(&cpuset, 0, sizeof(cpuset));
140     cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
141     int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
142               sizeof(cpuset), &cpuset);
143     ALOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
144 #endif
145 
146     while (!dc->mExit) {
147         dc->mWorkers.mLaunchSignals[idx].wait();
148         if (dc->mWorkers.mLaunchCallback) {
149            // idx +1 is used because the calling thread is always worker 0.
150            dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx+1);
151         }
152         __sync_fetch_and_sub(&dc->mWorkers.mRunningCount, 1);
153         dc->mWorkers.mCompleteSignal.set();
154     }
155 
156     //ALOGV("RS helperThread exited %p idx=%i", dc, idx);
157     return nullptr;
158 }
159 
launchThreads(WorkerCallback_t cbk,void * data)160 void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) {
161     mWorkers.mLaunchData = data;
162     mWorkers.mLaunchCallback = cbk;
163 
164     // fast path for very small launches
165     MTLaunchStruct *mtls = (MTLaunchStruct *)data;
166     if (mtls && mtls->fep.dim.y <= 1 && mtls->end.x <= mtls->start.x + mtls->mSliceSize) {
167         if (mWorkers.mLaunchCallback) {
168             mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
169         }
170         return;
171     }
172 
173     mWorkers.mRunningCount = mWorkers.mCount;
174     __sync_synchronize();
175 
176     for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
177         mWorkers.mLaunchSignals[ct].set();
178     }
179 
180     // We use the calling thread as one of the workers so we can start without
181     // the delay of the thread wakeup.
182     if (mWorkers.mLaunchCallback) {
183         mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
184     }
185 
186     while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) {
187         mWorkers.mCompleteSignal.wait();
188     }
189 }
190 
191 
lockMutex()192 void RsdCpuReferenceImpl::lockMutex() {
193     pthread_mutex_lock(&gInitMutex);
194 }
195 
unlockMutex()196 void RsdCpuReferenceImpl::unlockMutex() {
197     pthread_mutex_unlock(&gInitMutex);
198 }
199 
200 // Determine if the CPU we're running on supports SIMD instructions.
GetCpuInfo()201 static void GetCpuInfo() {
202     // Read the CPU flags from /proc/cpuinfo.
203     FILE *cpuinfo = fopen("/proc/cpuinfo", "r");
204 
205     if (!cpuinfo) {
206         return;
207     }
208 
209     char cpuinfostr[4096];
210     // fgets() ends with newline or EOF, need to check the whole
211     // "cpuinfo" file to make sure we can use SIMD or not.
212     while (fgets(cpuinfostr, sizeof(cpuinfostr), cpuinfo)) {
213 #if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_ARM_USE_INTRINSICS)
214         gArchUseSIMD = strstr(cpuinfostr, " neon") || strstr(cpuinfostr, " asimd");
215 #elif defined(ARCH_X86_HAVE_SSSE3)
216         gArchUseSIMD = strstr(cpuinfostr, " ssse3");
217 #endif
218         if (gArchUseSIMD) {
219             break;
220         }
221     }
222     fclose(cpuinfo);
223 }
224 
init(uint32_t version_major,uint32_t version_minor,sym_lookup_t lfn,script_lookup_t slfn)225 bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor,
226                                sym_lookup_t lfn, script_lookup_t slfn) {
227 
228     mSymLookupFn = lfn;
229     mScriptLookupFn = slfn;
230 
231     lockMutex();
232     if (!gThreadTLSKeyCount) {
233         int status = pthread_key_create(&gThreadTLSKey, nullptr);
234         if (status) {
235             ALOGE("Failed to init thread tls key.");
236             unlockMutex();
237             return false;
238         }
239     }
240     gThreadTLSKeyCount++;
241     unlockMutex();
242 
243     mTlsStruct.mContext = mRSC;
244     mTlsStruct.mScript = nullptr;
245     int status = pthread_setspecific(gThreadTLSKey, &mTlsStruct);
246     if (status) {
247         ALOGE("pthread_setspecific %i", status);
248     }
249 
250     GetCpuInfo();
251 
252     int cpu = sysconf(_SC_NPROCESSORS_CONF);
253     if(mRSC->props.mDebugMaxThreads) {
254         cpu = mRSC->props.mDebugMaxThreads;
255     }
256     if (cpu < 2) {
257         mWorkers.mCount = 0;
258         return true;
259     }
260 
261     // Subtract one from the cpu count because we also use the command thread as a worker.
262     mWorkers.mCount = (uint32_t)(cpu - 1);
263 
264     ALOGV("%p Launching thread(s), CPUs %i", mRSC, mWorkers.mCount + 1);
265 
266     mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t));
267     mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t));
268     mWorkers.mLaunchSignals = new Signal[mWorkers.mCount];
269     mWorkers.mLaunchCallback = nullptr;
270 
271     mWorkers.mCompleteSignal.init();
272 
273     mWorkers.mRunningCount = mWorkers.mCount;
274     mWorkers.mLaunchCount = 0;
275     __sync_synchronize();
276 
277     pthread_attr_t threadAttr;
278     status = pthread_attr_init(&threadAttr);
279     if (status) {
280         ALOGE("Failed to init thread attribute.");
281         return false;
282     }
283 
284     for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
285         status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this);
286         if (status) {
287             mWorkers.mCount = ct;
288             ALOGE("Created fewer than expected number of RS threads.");
289             break;
290         }
291     }
292     while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) {
293         usleep(100);
294     }
295 
296     pthread_attr_destroy(&threadAttr);
297     return true;
298 }
299 
300 
setPriority(int32_t priority)301 void RsdCpuReferenceImpl::setPriority(int32_t priority) {
302     for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
303         setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], priority);
304     }
305 }
306 
~RsdCpuReferenceImpl()307 RsdCpuReferenceImpl::~RsdCpuReferenceImpl() {
308     mExit = true;
309     mWorkers.mLaunchData = nullptr;
310     mWorkers.mLaunchCallback = nullptr;
311     mWorkers.mRunningCount = mWorkers.mCount;
312     __sync_synchronize();
313     for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
314         mWorkers.mLaunchSignals[ct].set();
315     }
316     void *res;
317     for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
318         pthread_join(mWorkers.mThreadId[ct], &res);
319     }
320     rsAssert(__sync_fetch_and_or(&mWorkers.mRunningCount, 0) == 0);
321     free(mWorkers.mThreadId);
322     free(mWorkers.mNativeThreadId);
323     delete[] mWorkers.mLaunchSignals;
324 
325     // Global structure cleanup.
326     lockMutex();
327     --gThreadTLSKeyCount;
328     if (!gThreadTLSKeyCount) {
329         pthread_key_delete(gThreadTLSKey);
330     }
331     unlockMutex();
332 
333 }
334 
FepPtrSetup(const MTLaunchStruct * mtls,RsExpandKernelDriverInfo * fep,uint32_t x,uint32_t y,uint32_t z=0,uint32_t lod=0,RsAllocationCubemapFace face=RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X,uint32_t a1=0,uint32_t a2=0,uint32_t a3=0,uint32_t a4=0)335 static inline void FepPtrSetup(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo *fep,
336                                uint32_t x, uint32_t y,
337                                uint32_t z = 0, uint32_t lod = 0,
338                                RsAllocationCubemapFace face = RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X,
339                                uint32_t a1 = 0, uint32_t a2 = 0, uint32_t a3 = 0, uint32_t a4 = 0) {
340 
341     for (uint32_t i = 0; i < fep->inLen; i++) {
342         fep->inPtr[i] = (const uint8_t *)mtls->ains[i]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4);
343     }
344 
345     if (mtls->aout[0] != nullptr) {
346         fep->outPtr[0] = (uint8_t *)mtls->aout[0]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4);
347     }
348 }
349 
sliceInt(uint32_t * p,uint32_t val,uint32_t start,uint32_t end)350 static uint32_t sliceInt(uint32_t *p, uint32_t val, uint32_t start, uint32_t end) {
351     if (start >= end) {
352         *p = start;
353         return val;
354     }
355 
356     uint32_t div = end - start;
357 
358     uint32_t n = val / div;
359     *p = (val - (n * div)) + start;
360     return n;
361 }
362 
SelectOuterSlice(const MTLaunchStruct * mtls,RsExpandKernelDriverInfo * fep,uint32_t sliceNum)363 static bool SelectOuterSlice(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo* fep, uint32_t sliceNum) {
364 
365     uint32_t r = sliceNum;
366     r = sliceInt(&fep->current.z, r, mtls->start.z, mtls->end.z);
367     r = sliceInt(&fep->current.lod, r, mtls->start.lod, mtls->end.lod);
368     r = sliceInt(&fep->current.face, r, mtls->start.face, mtls->end.face);
369     r = sliceInt(&fep->current.array[0], r, mtls->start.array[0], mtls->end.array[0]);
370     r = sliceInt(&fep->current.array[1], r, mtls->start.array[1], mtls->end.array[1]);
371     r = sliceInt(&fep->current.array[2], r, mtls->start.array[2], mtls->end.array[2]);
372     r = sliceInt(&fep->current.array[3], r, mtls->start.array[3], mtls->end.array[3]);
373     return r == 0;
374 }
375 
376 
walk_general(void * usr,uint32_t idx)377 static void walk_general(void *usr, uint32_t idx) {
378     MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
379     RsExpandKernelDriverInfo fep = mtls->fep;
380     fep.lid = idx;
381     outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
382 
383 
384     while(1) {
385         uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
386 
387         if (!SelectOuterSlice(mtls, &fep, slice)) {
388             return;
389         }
390 
391         for (fep.current.y = mtls->start.y; fep.current.y < mtls->end.y;
392              fep.current.y++) {
393 
394             FepPtrSetup(mtls, &fep, mtls->start.x,
395                         fep.current.y, fep.current.z, fep.current.lod,
396                         (RsAllocationCubemapFace)fep.current.face,
397                         fep.current.array[0], fep.current.array[1],
398                         fep.current.array[2], fep.current.array[3]);
399 
400             fn(&fep, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
401         }
402     }
403 
404 }
405 
walk_2d(void * usr,uint32_t idx)406 static void walk_2d(void *usr, uint32_t idx) {
407     MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
408     RsExpandKernelDriverInfo fep = mtls->fep;
409     fep.lid = idx;
410     outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
411 
412     while (1) {
413         uint32_t slice  = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
414         uint32_t yStart = mtls->start.y + slice * mtls->mSliceSize;
415         uint32_t yEnd   = yStart + mtls->mSliceSize;
416 
417         yEnd = rsMin(yEnd, mtls->end.y);
418 
419         if (yEnd <= yStart) {
420             return;
421         }
422 
423         for (fep.current.y = yStart; fep.current.y < yEnd; fep.current.y++) {
424             FepPtrSetup(mtls, &fep, mtls->start.x, fep.current.y);
425 
426             fn(&fep, mtls->start.x, mtls->end.x, fep.outStride[0]);
427         }
428     }
429 }
430 
walk_1d(void * usr,uint32_t idx)431 static void walk_1d(void *usr, uint32_t idx) {
432     MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
433     RsExpandKernelDriverInfo fep = mtls->fep;
434     fep.lid = idx;
435     outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
436 
437     while (1) {
438         uint32_t slice  = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
439         uint32_t xStart = mtls->start.x + slice * mtls->mSliceSize;
440         uint32_t xEnd   = xStart + mtls->mSliceSize;
441 
442         xEnd = rsMin(xEnd, mtls->end.x);
443 
444         if (xEnd <= xStart) {
445             return;
446         }
447 
448         FepPtrSetup(mtls, &fep, xStart, 0);
449 
450         fn(&fep, xStart, xEnd, fep.outStride[0]);
451     }
452 }
453 
launchThreads(const Allocation ** ains,uint32_t inLen,Allocation * aout,const RsScriptCall * sc,MTLaunchStruct * mtls)454 void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains,
455                                         uint32_t inLen,
456                                         Allocation* aout,
457                                         const RsScriptCall* sc,
458                                         MTLaunchStruct* mtls) {
459 
460     //android::StopWatch kernel_time("kernel time");
461 
462     bool outerDims = (mtls->start.z != mtls->end.z) ||
463                      (mtls->start.face != mtls->end.face) ||
464                      (mtls->start.lod != mtls->end.lod) ||
465                      (mtls->start.array[0] != mtls->end.array[0]) ||
466                      (mtls->start.array[1] != mtls->end.array[1]) ||
467                      (mtls->start.array[2] != mtls->end.array[2]) ||
468                      (mtls->start.array[3] != mtls->end.array[3]);
469 
470     if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) {
471         const size_t targetByteChunk = 16 * 1024;
472         mInForEach = true;
473 
474         if (outerDims) {
475             // No fancy logic for chunk size
476             mtls->mSliceSize = 1;
477             launchThreads(walk_general, mtls);
478         } else if (mtls->fep.dim.y > 1) {
479             uint32_t s1 = mtls->fep.dim.y / ((mWorkers.mCount + 1) * 4);
480             uint32_t s2 = 0;
481 
482             // This chooses our slice size to rate limit atomic ops to
483             // one per 16k bytes of reads/writes.
484             if ((mtls->aout[0] != nullptr) && mtls->aout[0]->mHal.drvState.lod[0].stride) {
485                 s2 = targetByteChunk / mtls->aout[0]->mHal.drvState.lod[0].stride;
486             } else if (mtls->ains[0]) {
487                 s2 = targetByteChunk / mtls->ains[0]->mHal.drvState.lod[0].stride;
488             } else {
489                 // Launch option only case
490                 // Use s1 based only on the dimensions
491                 s2 = s1;
492             }
493             mtls->mSliceSize = rsMin(s1, s2);
494 
495             if(mtls->mSliceSize < 1) {
496                 mtls->mSliceSize = 1;
497             }
498 
499             launchThreads(walk_2d, mtls);
500         } else {
501             uint32_t s1 = mtls->fep.dim.x / ((mWorkers.mCount + 1) * 4);
502             uint32_t s2 = 0;
503 
504             // This chooses our slice size to rate limit atomic ops to
505             // one per 16k bytes of reads/writes.
506             if ((mtls->aout[0] != nullptr) && mtls->aout[0]->getType()->getElementSizeBytes()) {
507                 s2 = targetByteChunk / mtls->aout[0]->getType()->getElementSizeBytes();
508             } else if (mtls->ains[0]) {
509                 s2 = targetByteChunk / mtls->ains[0]->getType()->getElementSizeBytes();
510             } else {
511                 // Launch option only case
512                 // Use s1 based only on the dimensions
513                 s2 = s1;
514             }
515             mtls->mSliceSize = rsMin(s1, s2);
516 
517             if (mtls->mSliceSize < 1) {
518                 mtls->mSliceSize = 1;
519             }
520 
521             launchThreads(walk_1d, mtls);
522         }
523         mInForEach = false;
524 
525     } else {
526         outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
527         uint32_t slice = 0;
528 
529 
530         while(SelectOuterSlice(mtls, &mtls->fep, slice++)) {
531             for (mtls->fep.current.y = mtls->start.y;
532                  mtls->fep.current.y < mtls->end.y;
533                  mtls->fep.current.y++) {
534 
535                 FepPtrSetup(mtls, &mtls->fep, mtls->start.x,
536                             mtls->fep.current.y, mtls->fep.current.z, mtls->fep.current.lod,
537                             (RsAllocationCubemapFace) mtls->fep.current.face,
538                             mtls->fep.current.array[0], mtls->fep.current.array[1],
539                             mtls->fep.current.array[2], mtls->fep.current.array[3]);
540 
541                 fn(&mtls->fep, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
542             }
543         }
544     }
545 }
546 
setTLS(RsdCpuScriptImpl * sc)547 RsdCpuScriptImpl * RsdCpuReferenceImpl::setTLS(RsdCpuScriptImpl *sc) {
548     //ALOGE("setTls %p", sc);
549     ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
550     rsAssert(tls);
551     RsdCpuScriptImpl *old = tls->mImpl;
552     tls->mImpl = sc;
553     tls->mContext = mRSC;
554     if (sc) {
555         tls->mScript = sc->getScript();
556     } else {
557         tls->mScript = nullptr;
558     }
559     return old;
560 }
561 
symLookup(const char * name)562 const RsdCpuReference::CpuSymbol * RsdCpuReferenceImpl::symLookup(const char *name) {
563     return mSymLookupFn(mRSC, name);
564 }
565 
566 
createScript(const ScriptC * s,char const * resName,char const * cacheDir,uint8_t const * bitcode,size_t bitcodeSize,uint32_t flags)567 RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createScript(const ScriptC *s,
568                                     char const *resName, char const *cacheDir,
569                                     uint8_t const *bitcode, size_t bitcodeSize,
570                                     uint32_t flags) {
571 
572     RsdCpuScriptImpl *i = new RsdCpuScriptImpl(this, s);
573     if (!i->init(resName, cacheDir, bitcode, bitcodeSize, flags
574         , getBccPluginName()
575         )) {
576         delete i;
577         return nullptr;
578     }
579     return i;
580 }
581 
582 extern RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx,
583                                              const Script *s, const Element *e);
584 extern RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx,
585                                                    const Script *s, const Element *e);
586 extern RsdCpuScriptImpl * rsdIntrinsic_ColorMatrix(RsdCpuReferenceImpl *ctx,
587                                                    const Script *s, const Element *e);
588 extern RsdCpuScriptImpl * rsdIntrinsic_LUT(RsdCpuReferenceImpl *ctx,
589                                            const Script *s, const Element *e);
590 extern RsdCpuScriptImpl * rsdIntrinsic_Convolve5x5(RsdCpuReferenceImpl *ctx,
591                                                    const Script *s, const Element *e);
592 extern RsdCpuScriptImpl * rsdIntrinsic_Blur(RsdCpuReferenceImpl *ctx,
593                                             const Script *s, const Element *e);
594 extern RsdCpuScriptImpl * rsdIntrinsic_YuvToRGB(RsdCpuReferenceImpl *ctx,
595                                                 const Script *s, const Element *e);
596 extern RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
597                                              const Script *s, const Element *e);
598 extern RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx,
599                                                  const Script *s, const Element *e);
600 extern RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx,
601                                               const Script *s, const Element *e);
602 extern RsdCpuScriptImpl * rsdIntrinsic_BLAS(RsdCpuReferenceImpl *ctx,
603                                               const Script *s, const Element *e);
604 
createIntrinsic(const Script * s,RsScriptIntrinsicID iid,Element * e)605 RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script *s,
606                                     RsScriptIntrinsicID iid, Element *e) {
607 
608     RsdCpuScriptImpl *i = nullptr;
609     switch (iid) {
610     case RS_SCRIPT_INTRINSIC_ID_3DLUT:
611         i = rsdIntrinsic_3DLUT(this, s, e);
612         break;
613     case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3:
614         i = rsdIntrinsic_Convolve3x3(this, s, e);
615         break;
616     case RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX:
617         i = rsdIntrinsic_ColorMatrix(this, s, e);
618         break;
619     case RS_SCRIPT_INTRINSIC_ID_LUT:
620         i = rsdIntrinsic_LUT(this, s, e);
621         break;
622     case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5:
623         i = rsdIntrinsic_Convolve5x5(this, s, e);
624         break;
625     case RS_SCRIPT_INTRINSIC_ID_BLUR:
626         i = rsdIntrinsic_Blur(this, s, e);
627         break;
628     case RS_SCRIPT_INTRINSIC_ID_YUV_TO_RGB:
629         i = rsdIntrinsic_YuvToRGB(this, s, e);
630         break;
631     case RS_SCRIPT_INTRINSIC_ID_BLEND:
632         i = rsdIntrinsic_Blend(this, s, e);
633         break;
634     case RS_SCRIPT_INTRINSIC_ID_HISTOGRAM:
635         i = rsdIntrinsic_Histogram(this, s, e);
636         break;
637     case RS_SCRIPT_INTRINSIC_ID_RESIZE:
638         i = rsdIntrinsic_Resize(this, s, e);
639         break;
640     case RS_SCRIPT_INTRINSIC_ID_BLAS:
641         i = rsdIntrinsic_BLAS(this, s, e);
642         break;
643 
644     default:
645         rsAssert(0);
646     }
647 
648     return i;
649 }
650 
createScriptGroup(const ScriptGroupBase * sg)651 void* RsdCpuReferenceImpl::createScriptGroup(const ScriptGroupBase *sg) {
652   switch (sg->getApiVersion()) {
653     case ScriptGroupBase::SG_V1: {
654       CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg);
655       if (!sgi->init()) {
656         delete sgi;
657         return nullptr;
658       }
659       return sgi;
660     }
661     case ScriptGroupBase::SG_V2: {
662       return new CpuScriptGroup2Impl(this, sg);
663     }
664   }
665   return nullptr;
666 }
667