1 /*
2  * Copyright (C) 2011-2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsCpuExecutable.h"
20 
21 #ifdef RS_COMPATIBILITY_LIB
22     #include <stdio.h>
23     #include <sys/stat.h>
24     #include <unistd.h>
25 #else
26     #include "rsCppUtils.h"
27 
28     #include <bcc/BCCContext.h>
29     #include <bcc/Config/Config.h>
30     #include <bcc/Renderscript/RSCompilerDriver.h>
31     #include <bcinfo/MetadataExtractor.h>
32     #include <cutils/properties.h>
33 
34     #include <zlib.h>
35     #include <sys/file.h>
36     #include <sys/types.h>
37     #include <unistd.h>
38 
39     #include <string>
40     #include <vector>
41 #endif
42 
43 #include <set>
44 #include <string>
45 #include <dlfcn.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <iostream>
49 #include <sstream>
50 
51 namespace {
52 
53 static const bool kDebugGlobalVariables = false;
54 
55 #ifndef RS_COMPATIBILITY_LIB
56 
is_force_recompile()57 static bool is_force_recompile() {
58 #ifdef RS_SERVER
59   return false;
60 #else
61   char buf[PROPERTY_VALUE_MAX];
62 
63   // Re-compile if floating point precision has been overridden.
64   property_get("debug.rs.precision", buf, "");
65   if (buf[0] != '\0') {
66     return true;
67   }
68 
69   // Re-compile if debug.rs.forcerecompile is set.
70   property_get("debug.rs.forcerecompile", buf, "0");
71   if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
72     return true;
73   } else {
74     return false;
75   }
76 #endif  // RS_SERVER
77 }
78 
setCompileArguments(std::vector<const char * > * args,const std::string & bcFileName,const char * cacheDir,const char * resName,const char * core_lib,bool useRSDebugContext,const char * bccPluginName,bool emitGlobalInfo,bool emitGlobalInfoSkipConstant)79 static void setCompileArguments(std::vector<const char*>* args,
80                                 const std::string& bcFileName,
81                                 const char* cacheDir, const char* resName,
82                                 const char* core_lib, bool useRSDebugContext,
83                                 const char* bccPluginName, bool emitGlobalInfo,
84                                 bool emitGlobalInfoSkipConstant) {
85     rsAssert(cacheDir && resName && core_lib);
86     args->push_back(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH);
87     args->push_back("-unroll-runtime");
88     args->push_back("-scalarize-load-store");
89     if (emitGlobalInfo) {
90         args->push_back("-rs-global-info");
91         if (emitGlobalInfoSkipConstant) {
92             args->push_back("-rs-global-info-skip-constant");
93         }
94     }
95     args->push_back("-o");
96     args->push_back(resName);
97     args->push_back("-output_path");
98     args->push_back(cacheDir);
99     args->push_back("-bclib");
100     args->push_back(core_lib);
101     args->push_back("-mtriple");
102     args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
103 
104     // Enable workaround for A53 codegen by default.
105 #if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
106     args->push_back("-aarch64-fix-cortex-a53-835769");
107 #endif
108 
109     // Execute the bcc compiler.
110     if (useRSDebugContext) {
111         args->push_back("-rs-debug-ctx");
112     } else {
113         // Only load additional libraries for compiles that don't use
114         // the debug context.
115         if (bccPluginName && strlen(bccPluginName) > 0) {
116             args->push_back("-load");
117             args->push_back(bccPluginName);
118         }
119     }
120 
121     args->push_back("-fPIC");
122     args->push_back("-embedRSInfo");
123 
124     args->push_back(bcFileName.c_str());
125     args->push_back(nullptr);
126 }
127 
compileBitcode(const std::string & bcFileName,const char * bitcode,size_t bitcodeSize,std::vector<const char * > & compileArguments)128 static bool compileBitcode(const std::string &bcFileName,
129                            const char *bitcode,
130                            size_t bitcodeSize,
131                            std::vector<const char *> &compileArguments) {
132     rsAssert(bitcode && bitcodeSize);
133 
134     FILE *bcfile = fopen(bcFileName.c_str(), "w");
135     if (!bcfile) {
136         ALOGE("Could not write to %s", bcFileName.c_str());
137         return false;
138     }
139     size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
140     fclose(bcfile);
141     if (nwritten != bitcodeSize) {
142         ALOGE("Could not write %zu bytes to %s", bitcodeSize,
143               bcFileName.c_str());
144         return false;
145     }
146 
147     return android::renderscript::rsuExecuteCommand(
148                    android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH,
149                    compileArguments.size()-1, compileArguments.data());
150 }
151 
152 // The checksum is unnecessary under a few conditions, since the primary
153 // use-case for it is debugging. If we are loading something from the
154 // system partition (read-only), we know that it was precompiled as part of
155 // application ahead of time (and thus the checksum is completely
156 // unnecessary). The checksum is also unnecessary on release (non-debug)
157 // builds, as the only way to get a shared object is to have compiled the
158 // script once already. On a release build, there is no way to adjust the
159 // other libraries/dependencies, and so the only reason to recompile would
160 // be for a source APK change or an OTA. In either case, the APK would be
161 // reinstalled, which would already clear the code_cache/ directory.
isChecksumNeeded(const char * cacheDir)162 bool isChecksumNeeded(const char *cacheDir) {
163     if ((::strcmp(SYSLIBPATH, cacheDir) == 0) ||
164         (::strcmp(SYSLIBPATH_VENDOR, cacheDir) == 0))
165         return false;
166     char buf[PROPERTY_VALUE_MAX];
167     property_get("ro.debuggable", buf, "");
168     return (buf[0] == '1');
169 }
170 
addFileToChecksum(const char * fileName,uint32_t & checksum)171 bool addFileToChecksum(const char *fileName, uint32_t &checksum) {
172     int FD = open(fileName, O_RDONLY);
173     if (FD == -1) {
174         ALOGE("Cannot open file \'%s\' to compute checksum", fileName);
175         return false;
176     }
177 
178     char buf[256];
179     while (true) {
180         ssize_t nread = read(FD, buf, sizeof(buf));
181         if (nread < 0) { // bail out on failed read
182             ALOGE("Error while computing checksum for file \'%s\'", fileName);
183             return false;
184         }
185 
186         checksum = adler32(checksum, (const unsigned char *) buf, nread);
187         if (static_cast<size_t>(nread) < sizeof(buf)) // EOF
188             break;
189     }
190 
191     if (close(FD) != 0) {
192         ALOGE("Cannot close file \'%s\' after computing checksum", fileName);
193         return false;
194     }
195     return true;
196 }
197 
198 #endif  // !defined(RS_COMPATIBILITY_LIB)
199 }  // namespace
200 
201 namespace android {
202 namespace renderscript {
203 
204 #ifndef RS_COMPATIBILITY_LIB
205 
constructBuildChecksum(uint8_t const * bitcode,size_t bitcodeSize,const char * commandLine,const char ** bccFiles,size_t numFiles)206 uint32_t constructBuildChecksum(uint8_t const *bitcode, size_t bitcodeSize,
207                                 const char *commandLine,
208                                 const char** bccFiles, size_t numFiles) {
209     uint32_t checksum = adler32(0L, Z_NULL, 0);
210 
211     // include checksum of bitcode
212     if (bitcode != nullptr && bitcodeSize > 0) {
213         checksum = adler32(checksum, bitcode, bitcodeSize);
214     }
215 
216     // include checksum of command line arguments
217     checksum = adler32(checksum, (const unsigned char *) commandLine,
218                        strlen(commandLine));
219 
220     // include checksum of bccFiles
221     for (size_t i = 0; i < numFiles; i++) {
222         const char* bccFile = bccFiles[i];
223         if (bccFile[0] != 0 && !addFileToChecksum(bccFile, checksum)) {
224             // return empty checksum instead of something partial/corrupt
225             return 0;
226         }
227     }
228 
229     return checksum;
230 }
231 
232 #endif  // !RS_COMPATIBILITY_LIB
233 
RsdCpuScriptImpl(RsdCpuReferenceImpl * ctx,const Script * s)234 RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
235     mCtx = ctx;
236     mScript = s;
237 
238     mScriptSO = nullptr;
239 
240 #ifndef RS_COMPATIBILITY_LIB
241     mCompilerDriver = nullptr;
242 #endif
243 
244 
245     mRoot = nullptr;
246     mRootExpand = nullptr;
247     mInit = nullptr;
248     mFreeChildren = nullptr;
249     mScriptExec = nullptr;
250 
251     mBoundAllocs = nullptr;
252     mIntrinsicData = nullptr;
253     mIsThreadable = true;
254 
255     mBuildChecksum = 0;
256     mChecksumNeeded = false;
257 }
258 
storeRSInfoFromSO()259 bool RsdCpuScriptImpl::storeRSInfoFromSO() {
260     // The shared object may have an invalid build checksum.
261     // Validate and fail early.
262     mScriptExec = ScriptExecutable::createFromSharedObject(
263             mCtx->getContext(), mScriptSO,
264             mChecksumNeeded ? mBuildChecksum : 0);
265 
266     if (mScriptExec == nullptr) {
267         return false;
268     }
269 
270     mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
271     if (mRoot) {
272         //ALOGE("Found root(): %p", mRoot);
273     }
274     mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
275     if (mRootExpand) {
276         //ALOGE("Found root.expand(): %p", mRootExpand);
277     }
278     mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
279     if (mInit) {
280         //ALOGE("Found init(): %p", mInit);
281     }
282     mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
283     if (mFreeChildren) {
284         //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
285     }
286 
287     size_t varCount = mScriptExec->getExportedVariableCount();
288     if (varCount > 0) {
289         mBoundAllocs = new Allocation *[varCount];
290         memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
291     }
292 
293     mIsThreadable = mScriptExec->getThreadable();
294     //ALOGE("Script isThreadable? %d", mIsThreadable);
295 
296     if (kDebugGlobalVariables) {
297         mScriptExec->dumpGlobalInfo();
298     }
299 
300     return true;
301 }
302 
init(char const * resName,char const * cacheDir,uint8_t const * bitcode,size_t bitcodeSize,uint32_t flags,char const * bccPluginName)303 bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
304                             uint8_t const *bitcode, size_t bitcodeSize,
305                             uint32_t flags, char const *bccPluginName) {
306     //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir,
307     // bitcode, bitcodeSize, flags, lookupFunc);
308     //ALOGE("rsdScriptInit %p %p", rsc, script);
309 
310     mCtx->lockMutex();
311 #ifndef RS_COMPATIBILITY_LIB
312     bool useRSDebugContext = false;
313 
314     mCompilerDriver = nullptr;
315 
316     mCompilerDriver = new bcc::RSCompilerDriver();
317     if (mCompilerDriver == nullptr) {
318         ALOGE("bcc: FAILS to create compiler driver (out of memory)");
319         mCtx->unlockMutex();
320         return false;
321     }
322 
323     // Run any compiler setup functions we have been provided with.
324     RSSetupCompilerCallback setupCompilerCallback =
325             mCtx->getSetupCompilerCallback();
326     if (setupCompilerCallback != nullptr) {
327         setupCompilerCallback(mCompilerDriver);
328     }
329 
330     bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
331     if (!bitcodeMetadata.extract()) {
332         ALOGE("Could not extract metadata from bitcode");
333         mCtx->unlockMutex();
334         return false;
335     }
336 
337     const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
338 
339     if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
340         mCompilerDriver->setDebugContext(true);
341         useRSDebugContext = true;
342     }
343 
344     std::string bcFileName(cacheDir);
345     bcFileName.append("/");
346     bcFileName.append(resName);
347     bcFileName.append(".bc");
348 
349     std::vector<const char*> compileArguments;
350     bool emitGlobalInfo = mCtx->getEmbedGlobalInfo();
351     bool emitGlobalInfoSkipConstant = mCtx->getEmbedGlobalInfoSkipConstant();
352     setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
353                         useRSDebugContext, bccPluginName, emitGlobalInfo,
354                         emitGlobalInfoSkipConstant);
355 
356     mChecksumNeeded = isChecksumNeeded(cacheDir);
357     if (mChecksumNeeded) {
358         std::vector<const char *> bccFiles = { BCC_EXE_PATH,
359                                                core_lib,
360                                              };
361 
362         // The last argument of compileArguments is a nullptr, so remove 1 from
363         // the size.
364         std::unique_ptr<const char> compileCommandLine(
365             rsuJoinStrings(compileArguments.size()-1, compileArguments.data()));
366 
367         mBuildChecksum = constructBuildChecksum(bitcode, bitcodeSize,
368                                                 compileCommandLine.get(),
369                                                 bccFiles.data(), bccFiles.size());
370 
371         if (mBuildChecksum == 0) {
372             // cannot compute checksum but verification is enabled
373             mCtx->unlockMutex();
374             return false;
375         }
376     }
377     else {
378         // add a dummy/constant as a checksum if verification is disabled
379         mBuildChecksum = 0xabadcafe;
380     }
381 
382     // Append build checksum to commandline
383     // Handle the terminal nullptr in compileArguments
384     compileArguments.pop_back();
385     compileArguments.push_back("-build-checksum");
386     std::stringstream ss;
387     ss << std::hex << mBuildChecksum;
388     compileArguments.push_back(ss.str().c_str());
389     compileArguments.push_back(nullptr);
390 
391     if (!is_force_recompile() && !useRSDebugContext) {
392         mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
393 
394         // Read RS info from the shared object to detect checksum mismatch
395         if (mScriptSO != nullptr && !storeRSInfoFromSO()) {
396             dlclose(mScriptSO);
397             mScriptSO = nullptr;
398         }
399     }
400 
401     // If we can't, it's either not there or out of date.  We compile the bit code and try loading
402     // again.
403     if (mScriptSO == nullptr) {
404         if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
405                             compileArguments))
406         {
407             ALOGE("bcc: FAILS to compile '%s'", resName);
408             mCtx->unlockMutex();
409             return false;
410         }
411 
412         if (!SharedLibraryUtils::createSharedLibrary(mCtx->getContext()->getDriverName(),
413                                                      cacheDir, resName)) {
414             ALOGE("Linker: Failed to link object file '%s'", resName);
415             mCtx->unlockMutex();
416             return false;
417         }
418 
419         mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
420         if (mScriptSO == nullptr) {
421             ALOGE("Unable to load '%s'", resName);
422             mCtx->unlockMutex();
423             return false;
424         }
425 
426         // Read RS symbol information from the .so.
427         if (!storeRSInfoFromSO()) {
428             goto error;
429         }
430     }
431 
432     mBitcodeFilePath.setTo(bcFileName.c_str());
433 
434 #else  // RS_COMPATIBILITY_LIB is defined
435     const char *nativeLibDir = mCtx->getContext()->getNativeLibDir();
436     mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nativeLibDir);
437 
438     if (!mScriptSO) {
439         goto error;
440     }
441 
442     if (!storeRSInfoFromSO()) {
443         goto error;
444     }
445 #endif
446     mCtx->unlockMutex();
447     return true;
448 
449 error:
450 
451     mCtx->unlockMutex();
452     if (mScriptSO) {
453         dlclose(mScriptSO);
454         mScriptSO = nullptr;
455     }
456     return false;
457 }
458 
459 #ifndef RS_COMPATIBILITY_LIB
460 
findCoreLib(const bcinfo::MetadataExtractor & ME,const char * bitcode,size_t bitcodeSize)461 const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
462                                           size_t bitcodeSize) {
463     const char* defaultLib = SYSLIBPATH"/libclcore.bc";
464 
465     // If we're debugging, use the debug library.
466     if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
467         return SYSLIBPATH"/libclcore_debug.bc";
468     }
469 
470     // If a callback has been registered to specify a library, use that.
471     RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
472     if (selectRTCallback != nullptr) {
473         return selectRTCallback((const char*)bitcode, bitcodeSize);
474     }
475 
476     // Check for a platform specific library
477 #if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
478     enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
479     if (prec == bcinfo::RS_FP_Relaxed) {
480         // NEON-capable ARMv7a devices can use an accelerated math library
481         // for all reduced precision scripts.
482         // ARMv8 does not use NEON, as ASIMD can be used with all precision
483         // levels.
484         return SYSLIBPATH"/libclcore_neon.bc";
485     } else {
486         return defaultLib;
487     }
488 #elif defined(__i386__) || defined(__x86_64__)
489     // x86 devices will use an optimized library.
490     return SYSLIBPATH"/libclcore_x86.bc";
491 #else
492     return defaultLib;
493 #endif
494 }
495 
496 #endif
497 
populateScript(Script * script)498 void RsdCpuScriptImpl::populateScript(Script *script) {
499     // Copy info over to runtime
500     script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
501     script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
502     script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
503     script->mHal.info.exportedPragmaKeyList = mScriptExec->getPragmaKeys();
504     script->mHal.info.exportedPragmaValueList = mScriptExec->getPragmaValues();
505 
506     // Bug, need to stash in metadata
507     if (mRootExpand) {
508         script->mHal.info.root = mRootExpand;
509     } else {
510         script->mHal.info.root = mRoot;
511     }
512 }
513 
514 
forEachMtlsSetup(const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc,MTLaunchStruct * mtls)515 bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
516                                         uint32_t inLen,
517                                         Allocation * aout,
518                                         const void * usr, uint32_t usrLen,
519                                         const RsScriptCall *sc,
520                                         MTLaunchStruct *mtls) {
521 
522     memset(mtls, 0, sizeof(MTLaunchStruct));
523 
524     for (int index = inLen; --index >= 0;) {
525         const Allocation* ain = ains[index];
526 
527         // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
528         if (ain != nullptr &&
529             (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) {
530 
531             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
532                                          "rsForEach called with null in allocations");
533             return false;
534         }
535     }
536 
537     if (aout &&
538         (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) {
539 
540         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
541                                      "rsForEach called with null out allocations");
542         return false;
543     }
544 
545     if (inLen > 0) {
546         const Allocation *ain0   = ains[0];
547         const Type       *inType = ain0->getType();
548 
549         mtls->fep.dim.x = inType->getDimX();
550         mtls->fep.dim.y = inType->getDimY();
551         mtls->fep.dim.z = inType->getDimZ();
552 
553         for (int Index = inLen; --Index >= 1;) {
554             if (!ain0->hasSameDims(ains[Index])) {
555                 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
556                   "Failed to launch kernel; dimensions of input and output"
557                   "allocations do not match.");
558 
559                 return false;
560             }
561         }
562 
563     } else if (aout != nullptr) {
564         const Type *outType = aout->getType();
565 
566         mtls->fep.dim.x = outType->getDimX();
567         mtls->fep.dim.y = outType->getDimY();
568         mtls->fep.dim.z = outType->getDimZ();
569 
570     } else if (sc != nullptr) {
571         mtls->fep.dim.x = sc->xEnd;
572         mtls->fep.dim.y = sc->yEnd;
573         mtls->fep.dim.z = 0;
574     } else {
575         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
576                                      "rsForEach called with null allocations");
577         return false;
578     }
579 
580     if (inLen > 0 && aout != nullptr) {
581         if (!ains[0]->hasSameDims(aout)) {
582             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
583               "Failed to launch kernel; dimensions of input and output allocations do not match.");
584 
585             return false;
586         }
587     }
588 
589     if (!sc || (sc->xEnd == 0)) {
590         mtls->end.x = mtls->fep.dim.x;
591     } else {
592         mtls->start.x = rsMin(mtls->fep.dim.x, sc->xStart);
593         mtls->end.x = rsMin(mtls->fep.dim.x, sc->xEnd);
594         if (mtls->start.x >= mtls->end.x) {
595             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
596               "Failed to launch kernel; Invalid xStart or xEnd.");
597             return false;
598         }
599     }
600 
601     if (!sc || (sc->yEnd == 0)) {
602         mtls->end.y = mtls->fep.dim.y;
603     } else {
604         mtls->start.y = rsMin(mtls->fep.dim.y, sc->yStart);
605         mtls->end.y = rsMin(mtls->fep.dim.y, sc->yEnd);
606         if (mtls->start.y >= mtls->end.y) {
607             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
608               "Failed to launch kernel; Invalid yStart or yEnd.");
609             return false;
610         }
611     }
612 
613     if (!sc || (sc->zEnd == 0)) {
614         mtls->end.z = mtls->fep.dim.z;
615     } else {
616         mtls->start.z = rsMin(mtls->fep.dim.z, sc->zStart);
617         mtls->end.z = rsMin(mtls->fep.dim.z, sc->zEnd);
618         if (mtls->start.z >= mtls->end.z) {
619             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
620               "Failed to launch kernel; Invalid zStart or zEnd.");
621             return false;
622         }
623     }
624 
625     if (!sc || (sc->arrayEnd == 0)) {
626         mtls->end.array[0] = mtls->fep.dim.array[0];
627     } else {
628         mtls->start.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayStart);
629         mtls->end.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayEnd);
630         if (mtls->start.array[0] >= mtls->end.array[0]) {
631             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
632               "Failed to launch kernel; Invalid arrayStart or arrayEnd.");
633             return false;
634         }
635     }
636 
637     if (!sc || (sc->array2End == 0)) {
638         mtls->end.array[1] = mtls->fep.dim.array[1];
639     } else {
640         mtls->start.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2Start);
641         mtls->end.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2End);
642         if (mtls->start.array[1] >= mtls->end.array[1]) {
643             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
644               "Failed to launch kernel; Invalid array2Start or array2End.");
645             return false;
646         }
647     }
648 
649     if (!sc || (sc->array3End == 0)) {
650         mtls->end.array[2] = mtls->fep.dim.array[2];
651     } else {
652         mtls->start.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3Start);
653         mtls->end.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3End);
654         if (mtls->start.array[2] >= mtls->end.array[2]) {
655             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
656               "Failed to launch kernel; Invalid array3Start or array3End.");
657             return false;
658         }
659     }
660 
661     if (!sc || (sc->array4End == 0)) {
662         mtls->end.array[3] = mtls->fep.dim.array[3];
663     } else {
664         mtls->start.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4Start);
665         mtls->end.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4End);
666         if (mtls->start.array[3] >= mtls->end.array[3]) {
667             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
668               "Failed to launch kernel; Invalid array4Start or array4End.");
669             return false;
670         }
671     }
672 
673 
674     // The X & Y walkers always want 0-1 min even if dim is not present
675     mtls->end.x    = rsMax((uint32_t)1, mtls->end.x);
676     mtls->end.y    = rsMax((uint32_t)1, mtls->end.y);
677 
678     mtls->rsc        = mCtx;
679     if (ains) {
680         memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
681     }
682     mtls->aout[0]    = aout;
683     mtls->fep.usr    = usr;
684     mtls->fep.usrLen = usrLen;
685     mtls->mSliceSize = 1;
686     mtls->mSliceNum  = 0;
687 
688     mtls->isThreadable  = mIsThreadable;
689 
690     if (inLen > 0) {
691         mtls->fep.inLen = inLen;
692         for (int index = inLen; --index >= 0;) {
693             mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
694             mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes();
695         }
696     }
697 
698     if (aout != nullptr) {
699         mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
700         mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes();
701     }
702 
703     // All validation passed, ok to launch threads
704     return true;
705 }
706 
707 
invokeForEach(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)708 void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
709                                      const Allocation ** ains,
710                                      uint32_t inLen,
711                                      Allocation * aout,
712                                      const void * usr,
713                                      uint32_t usrLen,
714                                      const RsScriptCall *sc) {
715 
716     MTLaunchStruct mtls;
717 
718     if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) {
719         forEachKernelSetup(slot, &mtls);
720 
721         RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
722         mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
723         mCtx->setTLS(oldTLS);
724     }
725 }
726 
forEachKernelSetup(uint32_t slot,MTLaunchStruct * mtls)727 void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
728     mtls->script = this;
729     mtls->fep.slot = slot;
730     mtls->kernel = mScriptExec->getForEachFunction(slot);
731     rsAssert(mtls->kernel != nullptr);
732     mtls->sig = mScriptExec->getForEachSignature(slot);
733 }
734 
invokeRoot()735 int RsdCpuScriptImpl::invokeRoot() {
736     RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
737     int ret = mRoot();
738     mCtx->setTLS(oldTLS);
739     return ret;
740 }
741 
invokeInit()742 void RsdCpuScriptImpl::invokeInit() {
743     if (mInit) {
744         mInit();
745     }
746 }
747 
invokeFreeChildren()748 void RsdCpuScriptImpl::invokeFreeChildren() {
749     if (mFreeChildren) {
750         mFreeChildren();
751     }
752 }
753 
invokeFunction(uint32_t slot,const void * params,size_t paramLength)754 void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
755                                       size_t paramLength) {
756     //ALOGE("invoke %i %p %zu", slot, params, paramLength);
757     void * ap = nullptr;
758 
759 #if defined(__x86_64__)
760     // The invoked function could have input parameter of vector type for example float4 which
761     // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
762     // So try to align void* params before passing them into RS exported function.
763 
764     if ((uint8_t)(uint64_t)params & 0x0F) {
765         if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
766             memcpy(ap, params, paramLength);
767         } else {
768             ALOGE("x86_64: invokeFunction memalign error, still use params which"
769                   " is not 16 bytes aligned.");
770         }
771     }
772 #endif
773 
774     RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
775     reinterpret_cast<void (*)(const void *, uint32_t)>(
776         mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength);
777 
778     mCtx->setTLS(oldTLS);
779 }
780 
setGlobalVar(uint32_t slot,const void * data,size_t dataLength)781 void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
782     //rsAssert(!script->mFieldIsObject[slot]);
783     //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
784 
785     //if (mIntrinsicID) {
786         //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
787         //return;
788     //}
789 
790     int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
791     if (!destPtr) {
792         //ALOGV("Calling setVar on slot = %i which is null", slot);
793         return;
794     }
795 
796     memcpy(destPtr, data, dataLength);
797 }
798 
getGlobalVar(uint32_t slot,void * data,size_t dataLength)799 void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
800     //rsAssert(!script->mFieldIsObject[slot]);
801     //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
802 
803     int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
804     if (!srcPtr) {
805         //ALOGV("Calling setVar on slot = %i which is null", slot);
806         return;
807     }
808     memcpy(data, srcPtr, dataLength);
809 }
810 
811 
setGlobalVarWithElemDims(uint32_t slot,const void * data,size_t dataLength,const Element * elem,const uint32_t * dims,size_t dimLength)812 void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
813                                                 const Element *elem,
814                                                 const uint32_t *dims, size_t dimLength) {
815     int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
816     if (!destPtr) {
817         //ALOGV("Calling setVar on slot = %i which is null", slot);
818         return;
819     }
820 
821     // We want to look at dimension in terms of integer components,
822     // but dimLength is given in terms of bytes.
823     dimLength /= sizeof(int);
824 
825     // Only a single dimension is currently supported.
826     rsAssert(dimLength == 1);
827     if (dimLength == 1) {
828         // First do the increment loop.
829         size_t stride = elem->getSizeBytes();
830         const char *cVal = reinterpret_cast<const char *>(data);
831         for (uint32_t i = 0; i < dims[0]; i++) {
832             elem->incRefs(cVal);
833             cVal += stride;
834         }
835 
836         // Decrement loop comes after (to prevent race conditions).
837         char *oldVal = reinterpret_cast<char *>(destPtr);
838         for (uint32_t i = 0; i < dims[0]; i++) {
839             elem->decRefs(oldVal);
840             oldVal += stride;
841         }
842     }
843 
844     memcpy(destPtr, data, dataLength);
845 }
846 
setGlobalBind(uint32_t slot,Allocation * data)847 void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
848 
849     //rsAssert(!script->mFieldIsObject[slot]);
850     //ALOGE("setGlobalBind %i %p", slot, data);
851 
852     int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
853     if (!destPtr) {
854         //ALOGV("Calling setVar on slot = %i which is null", slot);
855         return;
856     }
857 
858     void *ptr = nullptr;
859     mBoundAllocs[slot] = data;
860     if (data) {
861         ptr = data->mHal.drvState.lod[0].mallocPtr;
862     }
863     memcpy(destPtr, &ptr, sizeof(void *));
864 }
865 
setGlobalObj(uint32_t slot,ObjectBase * data)866 void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
867 
868     //rsAssert(script->mFieldIsObject[slot]);
869     //ALOGE("setGlobalObj %i %p", slot, data);
870 
871     int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
872     if (!destPtr) {
873         //ALOGV("Calling setVar on slot = %i which is null", slot);
874         return;
875     }
876 
877     rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
878 }
879 
getFieldName(uint32_t slot) const880 const char* RsdCpuScriptImpl::getFieldName(uint32_t slot) const {
881     return mScriptExec->getFieldName(slot);
882 }
883 
~RsdCpuScriptImpl()884 RsdCpuScriptImpl::~RsdCpuScriptImpl() {
885 #ifndef RS_COMPATIBILITY_LIB
886     delete mCompilerDriver;
887 #endif
888 
889     delete mScriptExec;
890 
891     delete[] mBoundAllocs;
892     if (mScriptSO) {
893         dlclose(mScriptSO);
894     }
895 }
896 
getAllocationForPointer(const void * ptr) const897 Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
898     if (!ptr) {
899         return nullptr;
900     }
901 
902     for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
903         Allocation *a = mBoundAllocs[ct];
904         if (!a) continue;
905         if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
906             return a;
907         }
908     }
909     ALOGE("rsGetAllocation, failed to find %p", ptr);
910     return nullptr;
911 }
912 
getGlobalEntries() const913 int RsdCpuScriptImpl::getGlobalEntries() const {
914     return mScriptExec->getGlobalEntries();
915 }
916 
getGlobalName(int i) const917 const char * RsdCpuScriptImpl::getGlobalName(int i) const {
918     return mScriptExec->getGlobalName(i);
919 }
920 
getGlobalAddress(int i) const921 const void * RsdCpuScriptImpl::getGlobalAddress(int i) const {
922     return mScriptExec->getGlobalAddress(i);
923 }
924 
getGlobalSize(int i) const925 size_t RsdCpuScriptImpl::getGlobalSize(int i) const {
926     return mScriptExec->getGlobalSize(i);
927 }
928 
getGlobalProperties(int i) const929 uint32_t RsdCpuScriptImpl::getGlobalProperties(int i) const {
930     return mScriptExec->getGlobalProperties(i);
931 }
932 
preLaunch(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)933 void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
934                                  uint32_t inLen, Allocation * aout,
935                                  const void * usr, uint32_t usrLen,
936                                  const RsScriptCall *sc) {}
937 
postLaunch(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)938 void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
939                                   uint32_t inLen, Allocation * aout,
940                                   const void * usr, uint32_t usrLen,
941                                   const RsScriptCall *sc) {}
942 
943 
944 }
945 }
946