1 /*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsCpuExecutable.h"
20
21 #ifdef RS_COMPATIBILITY_LIB
22 #include <stdio.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #else
26 #include "rsCppUtils.h"
27
28 #include <bcc/BCCContext.h>
29 #include <bcc/Config/Config.h>
30 #include <bcc/Renderscript/RSCompilerDriver.h>
31 #include <bcinfo/MetadataExtractor.h>
32 #include <cutils/properties.h>
33
34 #include <zlib.h>
35 #include <sys/file.h>
36 #include <sys/types.h>
37 #include <unistd.h>
38
39 #include <string>
40 #include <vector>
41 #endif
42
43 #include <set>
44 #include <string>
45 #include <dlfcn.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <iostream>
49 #include <sstream>
50
51 namespace {
52
53 static const bool kDebugGlobalVariables = false;
54
55 #ifndef RS_COMPATIBILITY_LIB
56
is_force_recompile()57 static bool is_force_recompile() {
58 #ifdef RS_SERVER
59 return false;
60 #else
61 char buf[PROPERTY_VALUE_MAX];
62
63 // Re-compile if floating point precision has been overridden.
64 property_get("debug.rs.precision", buf, "");
65 if (buf[0] != '\0') {
66 return true;
67 }
68
69 // Re-compile if debug.rs.forcerecompile is set.
70 property_get("debug.rs.forcerecompile", buf, "0");
71 if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
72 return true;
73 } else {
74 return false;
75 }
76 #endif // RS_SERVER
77 }
78
setCompileArguments(std::vector<const char * > * args,const std::string & bcFileName,const char * cacheDir,const char * resName,const char * core_lib,bool useRSDebugContext,const char * bccPluginName,bool emitGlobalInfo,bool emitGlobalInfoSkipConstant)79 static void setCompileArguments(std::vector<const char*>* args,
80 const std::string& bcFileName,
81 const char* cacheDir, const char* resName,
82 const char* core_lib, bool useRSDebugContext,
83 const char* bccPluginName, bool emitGlobalInfo,
84 bool emitGlobalInfoSkipConstant) {
85 rsAssert(cacheDir && resName && core_lib);
86 args->push_back(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH);
87 args->push_back("-unroll-runtime");
88 args->push_back("-scalarize-load-store");
89 if (emitGlobalInfo) {
90 args->push_back("-rs-global-info");
91 if (emitGlobalInfoSkipConstant) {
92 args->push_back("-rs-global-info-skip-constant");
93 }
94 }
95 args->push_back("-o");
96 args->push_back(resName);
97 args->push_back("-output_path");
98 args->push_back(cacheDir);
99 args->push_back("-bclib");
100 args->push_back(core_lib);
101 args->push_back("-mtriple");
102 args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
103
104 // Enable workaround for A53 codegen by default.
105 #if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
106 args->push_back("-aarch64-fix-cortex-a53-835769");
107 #endif
108
109 // Execute the bcc compiler.
110 if (useRSDebugContext) {
111 args->push_back("-rs-debug-ctx");
112 } else {
113 // Only load additional libraries for compiles that don't use
114 // the debug context.
115 if (bccPluginName && strlen(bccPluginName) > 0) {
116 args->push_back("-load");
117 args->push_back(bccPluginName);
118 }
119 }
120
121 args->push_back("-fPIC");
122 args->push_back("-embedRSInfo");
123
124 args->push_back(bcFileName.c_str());
125 args->push_back(nullptr);
126 }
127
compileBitcode(const std::string & bcFileName,const char * bitcode,size_t bitcodeSize,std::vector<const char * > & compileArguments)128 static bool compileBitcode(const std::string &bcFileName,
129 const char *bitcode,
130 size_t bitcodeSize,
131 std::vector<const char *> &compileArguments) {
132 rsAssert(bitcode && bitcodeSize);
133
134 FILE *bcfile = fopen(bcFileName.c_str(), "w");
135 if (!bcfile) {
136 ALOGE("Could not write to %s", bcFileName.c_str());
137 return false;
138 }
139 size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
140 fclose(bcfile);
141 if (nwritten != bitcodeSize) {
142 ALOGE("Could not write %zu bytes to %s", bitcodeSize,
143 bcFileName.c_str());
144 return false;
145 }
146
147 return android::renderscript::rsuExecuteCommand(
148 android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH,
149 compileArguments.size()-1, compileArguments.data());
150 }
151
152 // The checksum is unnecessary under a few conditions, since the primary
153 // use-case for it is debugging. If we are loading something from the
154 // system partition (read-only), we know that it was precompiled as part of
155 // application ahead of time (and thus the checksum is completely
156 // unnecessary). The checksum is also unnecessary on release (non-debug)
157 // builds, as the only way to get a shared object is to have compiled the
158 // script once already. On a release build, there is no way to adjust the
159 // other libraries/dependencies, and so the only reason to recompile would
160 // be for a source APK change or an OTA. In either case, the APK would be
161 // reinstalled, which would already clear the code_cache/ directory.
isChecksumNeeded(const char * cacheDir)162 bool isChecksumNeeded(const char *cacheDir) {
163 if ((::strcmp(SYSLIBPATH, cacheDir) == 0) ||
164 (::strcmp(SYSLIBPATH_VENDOR, cacheDir) == 0))
165 return false;
166 char buf[PROPERTY_VALUE_MAX];
167 property_get("ro.debuggable", buf, "");
168 return (buf[0] == '1');
169 }
170
addFileToChecksum(const char * fileName,uint32_t & checksum)171 bool addFileToChecksum(const char *fileName, uint32_t &checksum) {
172 int FD = open(fileName, O_RDONLY);
173 if (FD == -1) {
174 ALOGE("Cannot open file \'%s\' to compute checksum", fileName);
175 return false;
176 }
177
178 char buf[256];
179 while (true) {
180 ssize_t nread = read(FD, buf, sizeof(buf));
181 if (nread < 0) { // bail out on failed read
182 ALOGE("Error while computing checksum for file \'%s\'", fileName);
183 return false;
184 }
185
186 checksum = adler32(checksum, (const unsigned char *) buf, nread);
187 if (static_cast<size_t>(nread) < sizeof(buf)) // EOF
188 break;
189 }
190
191 if (close(FD) != 0) {
192 ALOGE("Cannot close file \'%s\' after computing checksum", fileName);
193 return false;
194 }
195 return true;
196 }
197
198 #endif // !defined(RS_COMPATIBILITY_LIB)
199 } // namespace
200
201 namespace android {
202 namespace renderscript {
203
204 #ifndef RS_COMPATIBILITY_LIB
205
constructBuildChecksum(uint8_t const * bitcode,size_t bitcodeSize,const char * commandLine,const char ** bccFiles,size_t numFiles)206 uint32_t constructBuildChecksum(uint8_t const *bitcode, size_t bitcodeSize,
207 const char *commandLine,
208 const char** bccFiles, size_t numFiles) {
209 uint32_t checksum = adler32(0L, Z_NULL, 0);
210
211 // include checksum of bitcode
212 if (bitcode != nullptr && bitcodeSize > 0) {
213 checksum = adler32(checksum, bitcode, bitcodeSize);
214 }
215
216 // include checksum of command line arguments
217 checksum = adler32(checksum, (const unsigned char *) commandLine,
218 strlen(commandLine));
219
220 // include checksum of bccFiles
221 for (size_t i = 0; i < numFiles; i++) {
222 const char* bccFile = bccFiles[i];
223 if (bccFile[0] != 0 && !addFileToChecksum(bccFile, checksum)) {
224 // return empty checksum instead of something partial/corrupt
225 return 0;
226 }
227 }
228
229 return checksum;
230 }
231
232 #endif // !RS_COMPATIBILITY_LIB
233
RsdCpuScriptImpl(RsdCpuReferenceImpl * ctx,const Script * s)234 RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
235 mCtx = ctx;
236 mScript = s;
237
238 mScriptSO = nullptr;
239
240 #ifndef RS_COMPATIBILITY_LIB
241 mCompilerDriver = nullptr;
242 #endif
243
244
245 mRoot = nullptr;
246 mRootExpand = nullptr;
247 mInit = nullptr;
248 mFreeChildren = nullptr;
249 mScriptExec = nullptr;
250
251 mBoundAllocs = nullptr;
252 mIntrinsicData = nullptr;
253 mIsThreadable = true;
254
255 mBuildChecksum = 0;
256 mChecksumNeeded = false;
257 }
258
storeRSInfoFromSO()259 bool RsdCpuScriptImpl::storeRSInfoFromSO() {
260 // The shared object may have an invalid build checksum.
261 // Validate and fail early.
262 mScriptExec = ScriptExecutable::createFromSharedObject(
263 mCtx->getContext(), mScriptSO,
264 mChecksumNeeded ? mBuildChecksum : 0);
265
266 if (mScriptExec == nullptr) {
267 return false;
268 }
269
270 mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
271 if (mRoot) {
272 //ALOGE("Found root(): %p", mRoot);
273 }
274 mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
275 if (mRootExpand) {
276 //ALOGE("Found root.expand(): %p", mRootExpand);
277 }
278 mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
279 if (mInit) {
280 //ALOGE("Found init(): %p", mInit);
281 }
282 mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
283 if (mFreeChildren) {
284 //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
285 }
286
287 size_t varCount = mScriptExec->getExportedVariableCount();
288 if (varCount > 0) {
289 mBoundAllocs = new Allocation *[varCount];
290 memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
291 }
292
293 mIsThreadable = mScriptExec->getThreadable();
294 //ALOGE("Script isThreadable? %d", mIsThreadable);
295
296 if (kDebugGlobalVariables) {
297 mScriptExec->dumpGlobalInfo();
298 }
299
300 return true;
301 }
302
init(char const * resName,char const * cacheDir,uint8_t const * bitcode,size_t bitcodeSize,uint32_t flags,char const * bccPluginName)303 bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
304 uint8_t const *bitcode, size_t bitcodeSize,
305 uint32_t flags, char const *bccPluginName) {
306 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir,
307 // bitcode, bitcodeSize, flags, lookupFunc);
308 //ALOGE("rsdScriptInit %p %p", rsc, script);
309
310 mCtx->lockMutex();
311 #ifndef RS_COMPATIBILITY_LIB
312 bool useRSDebugContext = false;
313
314 mCompilerDriver = nullptr;
315
316 mCompilerDriver = new bcc::RSCompilerDriver();
317 if (mCompilerDriver == nullptr) {
318 ALOGE("bcc: FAILS to create compiler driver (out of memory)");
319 mCtx->unlockMutex();
320 return false;
321 }
322
323 // Run any compiler setup functions we have been provided with.
324 RSSetupCompilerCallback setupCompilerCallback =
325 mCtx->getSetupCompilerCallback();
326 if (setupCompilerCallback != nullptr) {
327 setupCompilerCallback(mCompilerDriver);
328 }
329
330 bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
331 if (!bitcodeMetadata.extract()) {
332 ALOGE("Could not extract metadata from bitcode");
333 mCtx->unlockMutex();
334 return false;
335 }
336
337 const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
338
339 if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
340 mCompilerDriver->setDebugContext(true);
341 useRSDebugContext = true;
342 }
343
344 std::string bcFileName(cacheDir);
345 bcFileName.append("/");
346 bcFileName.append(resName);
347 bcFileName.append(".bc");
348
349 std::vector<const char*> compileArguments;
350 bool emitGlobalInfo = mCtx->getEmbedGlobalInfo();
351 bool emitGlobalInfoSkipConstant = mCtx->getEmbedGlobalInfoSkipConstant();
352 setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
353 useRSDebugContext, bccPluginName, emitGlobalInfo,
354 emitGlobalInfoSkipConstant);
355
356 mChecksumNeeded = isChecksumNeeded(cacheDir);
357 if (mChecksumNeeded) {
358 std::vector<const char *> bccFiles = { BCC_EXE_PATH,
359 core_lib,
360 };
361
362 // The last argument of compileArguments is a nullptr, so remove 1 from
363 // the size.
364 std::unique_ptr<const char> compileCommandLine(
365 rsuJoinStrings(compileArguments.size()-1, compileArguments.data()));
366
367 mBuildChecksum = constructBuildChecksum(bitcode, bitcodeSize,
368 compileCommandLine.get(),
369 bccFiles.data(), bccFiles.size());
370
371 if (mBuildChecksum == 0) {
372 // cannot compute checksum but verification is enabled
373 mCtx->unlockMutex();
374 return false;
375 }
376 }
377 else {
378 // add a dummy/constant as a checksum if verification is disabled
379 mBuildChecksum = 0xabadcafe;
380 }
381
382 // Append build checksum to commandline
383 // Handle the terminal nullptr in compileArguments
384 compileArguments.pop_back();
385 compileArguments.push_back("-build-checksum");
386 std::stringstream ss;
387 ss << std::hex << mBuildChecksum;
388 compileArguments.push_back(ss.str().c_str());
389 compileArguments.push_back(nullptr);
390
391 if (!is_force_recompile() && !useRSDebugContext) {
392 mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
393
394 // Read RS info from the shared object to detect checksum mismatch
395 if (mScriptSO != nullptr && !storeRSInfoFromSO()) {
396 dlclose(mScriptSO);
397 mScriptSO = nullptr;
398 }
399 }
400
401 // If we can't, it's either not there or out of date. We compile the bit code and try loading
402 // again.
403 if (mScriptSO == nullptr) {
404 if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
405 compileArguments))
406 {
407 ALOGE("bcc: FAILS to compile '%s'", resName);
408 mCtx->unlockMutex();
409 return false;
410 }
411
412 if (!SharedLibraryUtils::createSharedLibrary(mCtx->getContext()->getDriverName(),
413 cacheDir, resName)) {
414 ALOGE("Linker: Failed to link object file '%s'", resName);
415 mCtx->unlockMutex();
416 return false;
417 }
418
419 mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
420 if (mScriptSO == nullptr) {
421 ALOGE("Unable to load '%s'", resName);
422 mCtx->unlockMutex();
423 return false;
424 }
425
426 // Read RS symbol information from the .so.
427 if (!storeRSInfoFromSO()) {
428 goto error;
429 }
430 }
431
432 mBitcodeFilePath.setTo(bcFileName.c_str());
433
434 #else // RS_COMPATIBILITY_LIB is defined
435 const char *nativeLibDir = mCtx->getContext()->getNativeLibDir();
436 mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nativeLibDir);
437
438 if (!mScriptSO) {
439 goto error;
440 }
441
442 if (!storeRSInfoFromSO()) {
443 goto error;
444 }
445 #endif
446 mCtx->unlockMutex();
447 return true;
448
449 error:
450
451 mCtx->unlockMutex();
452 if (mScriptSO) {
453 dlclose(mScriptSO);
454 mScriptSO = nullptr;
455 }
456 return false;
457 }
458
459 #ifndef RS_COMPATIBILITY_LIB
460
findCoreLib(const bcinfo::MetadataExtractor & ME,const char * bitcode,size_t bitcodeSize)461 const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
462 size_t bitcodeSize) {
463 const char* defaultLib = SYSLIBPATH"/libclcore.bc";
464
465 // If we're debugging, use the debug library.
466 if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
467 return SYSLIBPATH"/libclcore_debug.bc";
468 }
469
470 // If a callback has been registered to specify a library, use that.
471 RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
472 if (selectRTCallback != nullptr) {
473 return selectRTCallback((const char*)bitcode, bitcodeSize);
474 }
475
476 // Check for a platform specific library
477 #if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
478 enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
479 if (prec == bcinfo::RS_FP_Relaxed) {
480 // NEON-capable ARMv7a devices can use an accelerated math library
481 // for all reduced precision scripts.
482 // ARMv8 does not use NEON, as ASIMD can be used with all precision
483 // levels.
484 return SYSLIBPATH"/libclcore_neon.bc";
485 } else {
486 return defaultLib;
487 }
488 #elif defined(__i386__) || defined(__x86_64__)
489 // x86 devices will use an optimized library.
490 return SYSLIBPATH"/libclcore_x86.bc";
491 #else
492 return defaultLib;
493 #endif
494 }
495
496 #endif
497
populateScript(Script * script)498 void RsdCpuScriptImpl::populateScript(Script *script) {
499 // Copy info over to runtime
500 script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
501 script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
502 script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
503 script->mHal.info.exportedPragmaKeyList = mScriptExec->getPragmaKeys();
504 script->mHal.info.exportedPragmaValueList = mScriptExec->getPragmaValues();
505
506 // Bug, need to stash in metadata
507 if (mRootExpand) {
508 script->mHal.info.root = mRootExpand;
509 } else {
510 script->mHal.info.root = mRoot;
511 }
512 }
513
514
forEachMtlsSetup(const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc,MTLaunchStruct * mtls)515 bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
516 uint32_t inLen,
517 Allocation * aout,
518 const void * usr, uint32_t usrLen,
519 const RsScriptCall *sc,
520 MTLaunchStruct *mtls) {
521
522 memset(mtls, 0, sizeof(MTLaunchStruct));
523
524 for (int index = inLen; --index >= 0;) {
525 const Allocation* ain = ains[index];
526
527 // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
528 if (ain != nullptr &&
529 (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) {
530
531 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
532 "rsForEach called with null in allocations");
533 return false;
534 }
535 }
536
537 if (aout &&
538 (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) {
539
540 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
541 "rsForEach called with null out allocations");
542 return false;
543 }
544
545 if (inLen > 0) {
546 const Allocation *ain0 = ains[0];
547 const Type *inType = ain0->getType();
548
549 mtls->fep.dim.x = inType->getDimX();
550 mtls->fep.dim.y = inType->getDimY();
551 mtls->fep.dim.z = inType->getDimZ();
552
553 for (int Index = inLen; --Index >= 1;) {
554 if (!ain0->hasSameDims(ains[Index])) {
555 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
556 "Failed to launch kernel; dimensions of input and output"
557 "allocations do not match.");
558
559 return false;
560 }
561 }
562
563 } else if (aout != nullptr) {
564 const Type *outType = aout->getType();
565
566 mtls->fep.dim.x = outType->getDimX();
567 mtls->fep.dim.y = outType->getDimY();
568 mtls->fep.dim.z = outType->getDimZ();
569
570 } else if (sc != nullptr) {
571 mtls->fep.dim.x = sc->xEnd;
572 mtls->fep.dim.y = sc->yEnd;
573 mtls->fep.dim.z = 0;
574 } else {
575 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
576 "rsForEach called with null allocations");
577 return false;
578 }
579
580 if (inLen > 0 && aout != nullptr) {
581 if (!ains[0]->hasSameDims(aout)) {
582 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
583 "Failed to launch kernel; dimensions of input and output allocations do not match.");
584
585 return false;
586 }
587 }
588
589 if (!sc || (sc->xEnd == 0)) {
590 mtls->end.x = mtls->fep.dim.x;
591 } else {
592 mtls->start.x = rsMin(mtls->fep.dim.x, sc->xStart);
593 mtls->end.x = rsMin(mtls->fep.dim.x, sc->xEnd);
594 if (mtls->start.x >= mtls->end.x) {
595 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
596 "Failed to launch kernel; Invalid xStart or xEnd.");
597 return false;
598 }
599 }
600
601 if (!sc || (sc->yEnd == 0)) {
602 mtls->end.y = mtls->fep.dim.y;
603 } else {
604 mtls->start.y = rsMin(mtls->fep.dim.y, sc->yStart);
605 mtls->end.y = rsMin(mtls->fep.dim.y, sc->yEnd);
606 if (mtls->start.y >= mtls->end.y) {
607 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
608 "Failed to launch kernel; Invalid yStart or yEnd.");
609 return false;
610 }
611 }
612
613 if (!sc || (sc->zEnd == 0)) {
614 mtls->end.z = mtls->fep.dim.z;
615 } else {
616 mtls->start.z = rsMin(mtls->fep.dim.z, sc->zStart);
617 mtls->end.z = rsMin(mtls->fep.dim.z, sc->zEnd);
618 if (mtls->start.z >= mtls->end.z) {
619 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
620 "Failed to launch kernel; Invalid zStart or zEnd.");
621 return false;
622 }
623 }
624
625 if (!sc || (sc->arrayEnd == 0)) {
626 mtls->end.array[0] = mtls->fep.dim.array[0];
627 } else {
628 mtls->start.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayStart);
629 mtls->end.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayEnd);
630 if (mtls->start.array[0] >= mtls->end.array[0]) {
631 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
632 "Failed to launch kernel; Invalid arrayStart or arrayEnd.");
633 return false;
634 }
635 }
636
637 if (!sc || (sc->array2End == 0)) {
638 mtls->end.array[1] = mtls->fep.dim.array[1];
639 } else {
640 mtls->start.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2Start);
641 mtls->end.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2End);
642 if (mtls->start.array[1] >= mtls->end.array[1]) {
643 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
644 "Failed to launch kernel; Invalid array2Start or array2End.");
645 return false;
646 }
647 }
648
649 if (!sc || (sc->array3End == 0)) {
650 mtls->end.array[2] = mtls->fep.dim.array[2];
651 } else {
652 mtls->start.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3Start);
653 mtls->end.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3End);
654 if (mtls->start.array[2] >= mtls->end.array[2]) {
655 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
656 "Failed to launch kernel; Invalid array3Start or array3End.");
657 return false;
658 }
659 }
660
661 if (!sc || (sc->array4End == 0)) {
662 mtls->end.array[3] = mtls->fep.dim.array[3];
663 } else {
664 mtls->start.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4Start);
665 mtls->end.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4End);
666 if (mtls->start.array[3] >= mtls->end.array[3]) {
667 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
668 "Failed to launch kernel; Invalid array4Start or array4End.");
669 return false;
670 }
671 }
672
673
674 // The X & Y walkers always want 0-1 min even if dim is not present
675 mtls->end.x = rsMax((uint32_t)1, mtls->end.x);
676 mtls->end.y = rsMax((uint32_t)1, mtls->end.y);
677
678 mtls->rsc = mCtx;
679 if (ains) {
680 memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
681 }
682 mtls->aout[0] = aout;
683 mtls->fep.usr = usr;
684 mtls->fep.usrLen = usrLen;
685 mtls->mSliceSize = 1;
686 mtls->mSliceNum = 0;
687
688 mtls->isThreadable = mIsThreadable;
689
690 if (inLen > 0) {
691 mtls->fep.inLen = inLen;
692 for (int index = inLen; --index >= 0;) {
693 mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
694 mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes();
695 }
696 }
697
698 if (aout != nullptr) {
699 mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
700 mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes();
701 }
702
703 // All validation passed, ok to launch threads
704 return true;
705 }
706
707
invokeForEach(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)708 void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
709 const Allocation ** ains,
710 uint32_t inLen,
711 Allocation * aout,
712 const void * usr,
713 uint32_t usrLen,
714 const RsScriptCall *sc) {
715
716 MTLaunchStruct mtls;
717
718 if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) {
719 forEachKernelSetup(slot, &mtls);
720
721 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
722 mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
723 mCtx->setTLS(oldTLS);
724 }
725 }
726
forEachKernelSetup(uint32_t slot,MTLaunchStruct * mtls)727 void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
728 mtls->script = this;
729 mtls->fep.slot = slot;
730 mtls->kernel = mScriptExec->getForEachFunction(slot);
731 rsAssert(mtls->kernel != nullptr);
732 mtls->sig = mScriptExec->getForEachSignature(slot);
733 }
734
invokeRoot()735 int RsdCpuScriptImpl::invokeRoot() {
736 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
737 int ret = mRoot();
738 mCtx->setTLS(oldTLS);
739 return ret;
740 }
741
invokeInit()742 void RsdCpuScriptImpl::invokeInit() {
743 if (mInit) {
744 mInit();
745 }
746 }
747
invokeFreeChildren()748 void RsdCpuScriptImpl::invokeFreeChildren() {
749 if (mFreeChildren) {
750 mFreeChildren();
751 }
752 }
753
invokeFunction(uint32_t slot,const void * params,size_t paramLength)754 void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
755 size_t paramLength) {
756 //ALOGE("invoke %i %p %zu", slot, params, paramLength);
757 void * ap = nullptr;
758
759 #if defined(__x86_64__)
760 // The invoked function could have input parameter of vector type for example float4 which
761 // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
762 // So try to align void* params before passing them into RS exported function.
763
764 if ((uint8_t)(uint64_t)params & 0x0F) {
765 if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
766 memcpy(ap, params, paramLength);
767 } else {
768 ALOGE("x86_64: invokeFunction memalign error, still use params which"
769 " is not 16 bytes aligned.");
770 }
771 }
772 #endif
773
774 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
775 reinterpret_cast<void (*)(const void *, uint32_t)>(
776 mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength);
777
778 mCtx->setTLS(oldTLS);
779 }
780
setGlobalVar(uint32_t slot,const void * data,size_t dataLength)781 void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
782 //rsAssert(!script->mFieldIsObject[slot]);
783 //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
784
785 //if (mIntrinsicID) {
786 //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
787 //return;
788 //}
789
790 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
791 if (!destPtr) {
792 //ALOGV("Calling setVar on slot = %i which is null", slot);
793 return;
794 }
795
796 memcpy(destPtr, data, dataLength);
797 }
798
getGlobalVar(uint32_t slot,void * data,size_t dataLength)799 void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
800 //rsAssert(!script->mFieldIsObject[slot]);
801 //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
802
803 int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
804 if (!srcPtr) {
805 //ALOGV("Calling setVar on slot = %i which is null", slot);
806 return;
807 }
808 memcpy(data, srcPtr, dataLength);
809 }
810
811
setGlobalVarWithElemDims(uint32_t slot,const void * data,size_t dataLength,const Element * elem,const uint32_t * dims,size_t dimLength)812 void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
813 const Element *elem,
814 const uint32_t *dims, size_t dimLength) {
815 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
816 if (!destPtr) {
817 //ALOGV("Calling setVar on slot = %i which is null", slot);
818 return;
819 }
820
821 // We want to look at dimension in terms of integer components,
822 // but dimLength is given in terms of bytes.
823 dimLength /= sizeof(int);
824
825 // Only a single dimension is currently supported.
826 rsAssert(dimLength == 1);
827 if (dimLength == 1) {
828 // First do the increment loop.
829 size_t stride = elem->getSizeBytes();
830 const char *cVal = reinterpret_cast<const char *>(data);
831 for (uint32_t i = 0; i < dims[0]; i++) {
832 elem->incRefs(cVal);
833 cVal += stride;
834 }
835
836 // Decrement loop comes after (to prevent race conditions).
837 char *oldVal = reinterpret_cast<char *>(destPtr);
838 for (uint32_t i = 0; i < dims[0]; i++) {
839 elem->decRefs(oldVal);
840 oldVal += stride;
841 }
842 }
843
844 memcpy(destPtr, data, dataLength);
845 }
846
setGlobalBind(uint32_t slot,Allocation * data)847 void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
848
849 //rsAssert(!script->mFieldIsObject[slot]);
850 //ALOGE("setGlobalBind %i %p", slot, data);
851
852 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
853 if (!destPtr) {
854 //ALOGV("Calling setVar on slot = %i which is null", slot);
855 return;
856 }
857
858 void *ptr = nullptr;
859 mBoundAllocs[slot] = data;
860 if (data) {
861 ptr = data->mHal.drvState.lod[0].mallocPtr;
862 }
863 memcpy(destPtr, &ptr, sizeof(void *));
864 }
865
setGlobalObj(uint32_t slot,ObjectBase * data)866 void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
867
868 //rsAssert(script->mFieldIsObject[slot]);
869 //ALOGE("setGlobalObj %i %p", slot, data);
870
871 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
872 if (!destPtr) {
873 //ALOGV("Calling setVar on slot = %i which is null", slot);
874 return;
875 }
876
877 rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
878 }
879
getFieldName(uint32_t slot) const880 const char* RsdCpuScriptImpl::getFieldName(uint32_t slot) const {
881 return mScriptExec->getFieldName(slot);
882 }
883
~RsdCpuScriptImpl()884 RsdCpuScriptImpl::~RsdCpuScriptImpl() {
885 #ifndef RS_COMPATIBILITY_LIB
886 delete mCompilerDriver;
887 #endif
888
889 delete mScriptExec;
890
891 delete[] mBoundAllocs;
892 if (mScriptSO) {
893 dlclose(mScriptSO);
894 }
895 }
896
getAllocationForPointer(const void * ptr) const897 Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
898 if (!ptr) {
899 return nullptr;
900 }
901
902 for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
903 Allocation *a = mBoundAllocs[ct];
904 if (!a) continue;
905 if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
906 return a;
907 }
908 }
909 ALOGE("rsGetAllocation, failed to find %p", ptr);
910 return nullptr;
911 }
912
getGlobalEntries() const913 int RsdCpuScriptImpl::getGlobalEntries() const {
914 return mScriptExec->getGlobalEntries();
915 }
916
getGlobalName(int i) const917 const char * RsdCpuScriptImpl::getGlobalName(int i) const {
918 return mScriptExec->getGlobalName(i);
919 }
920
getGlobalAddress(int i) const921 const void * RsdCpuScriptImpl::getGlobalAddress(int i) const {
922 return mScriptExec->getGlobalAddress(i);
923 }
924
getGlobalSize(int i) const925 size_t RsdCpuScriptImpl::getGlobalSize(int i) const {
926 return mScriptExec->getGlobalSize(i);
927 }
928
getGlobalProperties(int i) const929 uint32_t RsdCpuScriptImpl::getGlobalProperties(int i) const {
930 return mScriptExec->getGlobalProperties(i);
931 }
932
preLaunch(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)933 void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
934 uint32_t inLen, Allocation * aout,
935 const void * usr, uint32_t usrLen,
936 const RsScriptCall *sc) {}
937
postLaunch(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)938 void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
939 uint32_t inLen, Allocation * aout,
940 const void * usr, uint32_t usrLen,
941 const RsScriptCall *sc) {}
942
943
944 }
945 }
946