1 #include "rsCpuScriptGroup2.h"
2 
3 #include <dlfcn.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <unistd.h>
7 
8 #include <set>
9 #include <sstream>
10 #include <string>
11 #include <vector>
12 
13 #ifndef RS_COMPATIBILITY_LIB
14 #include "bcc/Config/Config.h"
15 #endif
16 
17 #include "cpu_ref/rsCpuCore.h"
18 #include "rsClosure.h"
19 #include "rsContext.h"
20 #include "rsCpuCore.h"
21 #include "rsCpuExecutable.h"
22 #include "rsCpuScript.h"
23 #include "rsScript.h"
24 #include "rsScriptGroup2.h"
25 #include "rsScriptIntrinsic.h"
26 
27 using std::string;
28 using std::vector;
29 
30 namespace android {
31 namespace renderscript {
32 
33 namespace {
34 
35 const size_t DefaultKernelArgCount = 2;
36 
groupRoot(const RsExpandKernelDriverInfo * kinfo,uint32_t xstart,uint32_t xend,uint32_t outstep)37 void groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
38                uint32_t xend, uint32_t outstep) {
39     const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
40     RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
41 
42     const size_t oldInLen = mutable_kinfo->inLen;
43 
44     decltype(mutable_kinfo->inStride) oldInStride;
45     memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
46 
47     for (CPUClosure* cpuClosure : closures) {
48         const Closure* closure = cpuClosure->mClosure;
49 
50         // There had better be enough space in mutable_kinfo
51         rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
52 
53         for (size_t i = 0; i < closure->mNumArg; i++) {
54             const void* arg = closure->mArgs[i];
55             const Allocation* a = (const Allocation*)arg;
56             const uint32_t eStride = a->mHal.state.elementSizeBytes;
57             const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
58                     eStride * xstart;
59             if (kinfo->dim.y > 1) {
60                 ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
61             }
62             mutable_kinfo->inPtr[i] = ptr;
63             mutable_kinfo->inStride[i] = eStride;
64         }
65         mutable_kinfo->inLen = closure->mNumArg;
66 
67         const Allocation* out = closure->mReturnValue;
68         const uint32_t ostep = out->mHal.state.elementSizeBytes;
69         const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
70                 ostep * xstart;
71         if (kinfo->dim.y > 1) {
72             ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
73         }
74 
75         rsAssert(kinfo->outLen <= 1);
76         mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
77 
78         cpuClosure->mFunc(kinfo, xstart, xend, ostep);
79     }
80 
81     mutable_kinfo->inLen = oldInLen;
82     memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
83 }
84 
85 }  // namespace
86 
Batch(CpuScriptGroup2Impl * group,const char * name)87 Batch::Batch(CpuScriptGroup2Impl* group, const char* name) :
88     mGroup(group), mFunc(nullptr) {
89     mName = strndup(name, strlen(name));
90 }
91 
~Batch()92 Batch::~Batch() {
93     for (CPUClosure* c : mClosures) {
94         delete c;
95     }
96     free(mName);
97 }
98 
conflict(CPUClosure * cpuClosure) const99 bool Batch::conflict(CPUClosure* cpuClosure) const {
100     if (mClosures.empty()) {
101         return false;
102     }
103 
104     const Closure* closure = cpuClosure->mClosure;
105 
106     if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
107         // An invoke should be in a batch by itself, so it conflicts with any other
108         // closure.
109         return true;
110     }
111 
112     const auto& globalDeps = closure->mGlobalDeps;
113     const auto& argDeps = closure->mArgDeps;
114 
115     for (CPUClosure* c : mClosures) {
116         const Closure* batched = c->mClosure;
117         if (globalDeps.find(batched) != globalDeps.end()) {
118             return true;
119         }
120         const auto& it = argDeps.find(batched);
121         if (it != argDeps.end()) {
122             const auto& args = (*it).second;
123             for (const auto &p1 : *args) {
124                 if (p1.second.get() != nullptr) {
125                     return true;
126                 }
127             }
128         }
129     }
130 
131     // The compiler fusion pass in bcc expects that kernels chained up through
132     // (1st) input and output.
133 
134     const Closure* lastBatched = mClosures.back()->mClosure;
135     const auto& it = argDeps.find(lastBatched);
136 
137     if (it == argDeps.end()) {
138         return true;
139     }
140 
141     const auto& args = (*it).second;
142     for (const auto &p1 : *args) {
143         if (p1.first == 0 && p1.second.get() == nullptr) {
144             // The new closure depends on the last batched closure's return
145             // value (fieldId being nullptr) for its first argument (argument 0)
146             return false;
147         }
148     }
149 
150     return true;
151 }
152 
CpuScriptGroup2Impl(RsdCpuReferenceImpl * cpuRefImpl,const ScriptGroupBase * sg)153 CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
154                                          const ScriptGroupBase *sg) :
155     mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
156     mExecutable(nullptr), mScriptObj(nullptr) {
157     rsAssert(!mGroup->mClosures.empty());
158 
159     mCpuRefImpl->lockMutex();
160     Batch* batch = new Batch(this, "Batch0");
161     int i = 0;
162     for (Closure* closure: mGroup->mClosures) {
163         CPUClosure* cc;
164         const IDBase* funcID = closure->mFunctionID.get();
165         RsdCpuScriptImpl* si =
166                 (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
167         if (closure->mIsKernel) {
168             MTLaunchStruct mtls;
169             si->forEachKernelSetup(funcID->mSlot, &mtls);
170             cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
171         } else {
172             cc = new CPUClosure(closure, si);
173         }
174 
175         if (batch->conflict(cc)) {
176             mBatches.push_back(batch);
177             std::stringstream ss;
178             ss << "Batch" << ++i;
179             batch = new Batch(this, ss.str().c_str());
180         }
181 
182         batch->mClosures.push_back(cc);
183     }
184 
185     rsAssert(!batch->mClosures.empty());
186     mBatches.push_back(batch);
187 
188 #ifndef RS_COMPATIBILITY_LIB
189     compile(mGroup->mCacheDir);
190     if (mScriptObj != nullptr && mExecutable != nullptr) {
191         for (Batch* batch : mBatches) {
192             batch->resolveFuncPtr(mScriptObj);
193         }
194     }
195 #endif  // RS_COMPATIBILITY_LIB
196     mCpuRefImpl->unlockMutex();
197 }
198 
resolveFuncPtr(void * sharedObj)199 void Batch::resolveFuncPtr(void* sharedObj) {
200     std::string funcName(mName);
201     if (mClosures.front()->mClosure->mIsKernel) {
202         funcName.append(".expand");
203     }
204     mFunc = dlsym(sharedObj, funcName.c_str());
205     rsAssert (mFunc != nullptr);
206 }
207 
~CpuScriptGroup2Impl()208 CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
209     for (Batch* batch : mBatches) {
210         delete batch;
211     }
212     delete mExecutable;
213     // TODO: move this dlclose into ~ScriptExecutable().
214     if (mScriptObj != nullptr) {
215         dlclose(mScriptObj);
216     }
217 }
218 
219 namespace {
220 
221 #ifndef RS_COMPATIBILITY_LIB
222 
getCoreLibPath(Context * context,string * coreLibRelaxedPath)223 string getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
224     *coreLibRelaxedPath = "";
225 
226     // If we're debugging, use the debug library.
227     if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
228         return SYSLIBPATH"/libclcore_debug.bc";
229     }
230 
231     // Check for a platform specific library
232 
233 #if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
234     // NEON-capable ARMv7a devices can use an accelerated math library
235     // for all reduced precision scripts.
236     // ARMv8 does not use NEON, as ASIMD can be used with all precision
237     // levels.
238     *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
239 #endif
240 
241 #if defined(__i386__) || defined(__x86_64__)
242     // x86 devices will use an optimized library.
243     return SYSLIBPATH"/libclcore_x86.bc";
244 #else
245     return SYSLIBPATH"/libclcore.bc";
246 #endif
247 }
248 
setupCompileArguments(const vector<const char * > & inputs,const vector<string> & kernelBatches,const vector<string> & invokeBatches,const char * outputDir,const char * outputFileName,const char * coreLibPath,const char * coreLibRelaxedPath,const bool emitGlobalInfo,const bool emitGlobalInfoSkipConstant,vector<const char * > * args)249 void setupCompileArguments(
250         const vector<const char*>& inputs, const vector<string>& kernelBatches,
251         const vector<string>& invokeBatches,
252         const char* outputDir, const char* outputFileName,
253         const char* coreLibPath, const char* coreLibRelaxedPath,
254         const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant,
255         vector<const char*>* args) {
256     args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
257     args->push_back("-fPIC");
258     args->push_back("-embedRSInfo");
259     if (emitGlobalInfo) {
260         args->push_back("-rs-global-info");
261         if (emitGlobalInfoSkipConstant) {
262             args->push_back("-rs-global-info-skip-constant");
263         }
264     }
265     args->push_back("-mtriple");
266     args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
267     args->push_back("-bclib");
268     args->push_back(coreLibPath);
269     args->push_back("-bclib_relaxed");
270     args->push_back(coreLibRelaxedPath);
271     for (const char* input : inputs) {
272         args->push_back(input);
273     }
274     for (const string& batch : kernelBatches) {
275         args->push_back("-merge");
276         args->push_back(batch.c_str());
277     }
278     for (const string& batch : invokeBatches) {
279         args->push_back("-invoke");
280         args->push_back(batch.c_str());
281     }
282     args->push_back("-output_path");
283     args->push_back(outputDir);
284 
285     // The output filename has to be the last, in case we need to pop it out and
286     // replace with a different name.
287     args->push_back("-o");
288     args->push_back(outputFileName);
289 }
290 
generateSourceSlot(RsdCpuReferenceImpl * ctxt,const Closure & closure,const std::vector<const char * > & inputs,std::stringstream & ss)291 void generateSourceSlot(RsdCpuReferenceImpl* ctxt,
292                         const Closure& closure,
293                         const std::vector<const char*>& inputs,
294                         std::stringstream& ss) {
295     const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
296     const Script* script = funcID->mScript;
297 
298     rsAssert (!script->isIntrinsic());
299 
300     const RsdCpuScriptImpl *cpuScript =
301             (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
302     const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
303 
304     const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
305             inputs.begin();
306 
307     ss << index << "," << funcID->mSlot << ".";
308 }
309 
310 #endif  // RS_COMPATIBILTY_LIB
311 
312 }  // anonymous namespace
313 
compile(const char * cacheDir)314 void CpuScriptGroup2Impl::compile(const char* cacheDir) {
315 #ifndef RS_COMPATIBILITY_LIB
316     if (mGroup->mClosures.size() < 2) {
317         return;
318     }
319 
320     auto comparator = [](const char* str1, const char* str2) -> bool {
321         return strcmp(str1, str2) < 0;
322     };
323     std::set<const char*, decltype(comparator)> inputSet(comparator);
324 
325     for (Closure* closure : mGroup->mClosures) {
326         const Script* script = closure->mFunctionID.get()->mScript;
327 
328         // If any script is an intrinsic, give up trying fusing the kernels.
329         if (script->isIntrinsic()) {
330             return;
331         }
332 
333         const RsdCpuScriptImpl *cpuScript =
334             (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script);
335 
336         const char* bitcodeFilename = cpuScript->getBitcodeFilePath();
337         inputSet.insert(bitcodeFilename);
338     }
339 
340     std::vector<const char*> inputs(inputSet.begin(), inputSet.end());
341 
342     std::vector<string> kernelBatches;
343     std::vector<string> invokeBatches;
344 
345     int i = 0;
346     for (const auto& batch : mBatches) {
347         rsAssert(batch->size() > 0);
348 
349         std::stringstream ss;
350         ss << batch->mName << ":";
351 
352         if (!batch->mClosures.front()->mClosure->mIsKernel) {
353             rsAssert(batch->size() == 1);
354             generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss);
355             invokeBatches.push_back(ss.str());
356         } else {
357             for (const auto& cpuClosure : batch->mClosures) {
358                 generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss);
359             }
360             kernelBatches.push_back(ss.str());
361         }
362     }
363 
364     rsAssert(cacheDir != nullptr);
365     string objFilePath(cacheDir);
366     objFilePath.append("/");
367     objFilePath.append(mGroup->mName);
368     objFilePath.append(".o");
369 
370     const char* resName = mGroup->mName;
371     string coreLibRelaxedPath;
372     const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
373                                                &coreLibRelaxedPath);
374 
375     vector<const char*> arguments;
376     bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo();
377     bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant();
378     setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
379                           resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(),
380                           emitGlobalInfo, emitGlobalInfoSkipConstant,
381                           &arguments);
382 
383     std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1,
384                                                        arguments.data()));
385 
386     inputs.push_back(coreLibPath.c_str());
387     inputs.push_back(coreLibRelaxedPath.c_str());
388 
389     uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(),
390                                                inputs.data(), inputs.size());
391 
392     if (checksum == 0) {
393         return;
394     }
395 
396     std::stringstream ss;
397     ss << std::hex << checksum;
398     const char* checksumStr = ss.str().c_str();
399 
400     //===--------------------------------------------------------------------===//
401     // Try to load a shared lib from code cache matching filename and checksum
402     //===--------------------------------------------------------------------===//
403 
404     bool alreadyLoaded = false;
405     std::string cloneName;
406 
407     mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nullptr,
408                                                        &alreadyLoaded);
409     if (mScriptObj != nullptr) {
410         // A shared library named resName is found in code cache directory
411         // cacheDir, and loaded with the handle stored in mScriptObj.
412 
413         mExecutable = ScriptExecutable::createFromSharedObject(
414             getCpuRefImpl()->getContext(), mScriptObj, checksum);
415 
416         if (mExecutable != nullptr) {
417             // The loaded shared library in mScriptObj has a matching checksum.
418             // An executable object has been created.
419             return;
420         }
421 
422         ALOGV("Failed to create an executable object from so file due to "
423               "mismatching checksum");
424 
425         if (alreadyLoaded) {
426             // The shared object found in code cache has already been loaded.
427             // A different file name is needed for the new shared library, to
428             // avoid corrupting the currently loaded instance.
429 
430             cloneName.append(resName);
431             cloneName.append("#");
432             cloneName.append(SharedLibraryUtils::getRandomString(6).string());
433 
434             // The last element in arguments is the output filename.
435             arguments.pop_back();
436             arguments.push_back(cloneName.c_str());
437         }
438 
439         dlclose(mScriptObj);
440         mScriptObj = nullptr;
441     }
442 
443     //===--------------------------------------------------------------------===//
444     // Fuse the input kernels and generate native code in an object file
445     //===--------------------------------------------------------------------===//
446 
447     arguments.push_back("-build-checksum");
448     arguments.push_back(checksumStr);
449     arguments.push_back(nullptr);
450 
451     bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH,
452                                       arguments.size()-1,
453                                       arguments.data());
454     if (!compiled) {
455         return;
456     }
457 
458     //===--------------------------------------------------------------------===//
459     // Create and load the shared lib
460     //===--------------------------------------------------------------------===//
461 
462     if (!SharedLibraryUtils::createSharedLibrary(
463             getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) {
464         ALOGE("Failed to link object file '%s'", resName);
465         unlink(objFilePath.c_str());
466         return;
467     }
468 
469     unlink(objFilePath.c_str());
470 
471     mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
472     if (mScriptObj == nullptr) {
473         ALOGE("Unable to load '%s'", resName);
474         return;
475     }
476 
477     if (alreadyLoaded) {
478         // Delete the temporary, random-named file that we created to avoid
479         // interfering with an already loaded shared library.
480         string cloneFilePath(cacheDir);
481         cloneFilePath.append("/");
482         cloneFilePath.append(cloneName.c_str());
483         cloneFilePath.append(".so");
484         unlink(cloneFilePath.c_str());
485     }
486 
487     mExecutable = ScriptExecutable::createFromSharedObject(
488         getCpuRefImpl()->getContext(),
489         mScriptObj);
490 
491 #endif  // RS_COMPATIBILITY_LIB
492 }
493 
execute()494 void CpuScriptGroup2Impl::execute() {
495     for (auto batch : mBatches) {
496         batch->setGlobalsForBatch();
497         batch->run();
498     }
499 }
500 
setGlobalsForBatch()501 void Batch::setGlobalsForBatch() {
502     for (CPUClosure* cpuClosure : mClosures) {
503         const Closure* closure = cpuClosure->mClosure;
504         const IDBase* funcID = closure->mFunctionID.get();
505         Script* s = funcID->mScript;;
506         for (const auto& p : closure->mGlobals) {
507             const void* value = p.second.first;
508             int size = p.second.second;
509             if (value == nullptr && size == 0) {
510                 // This indicates the current closure depends on another closure for a
511                 // global in their shared module (script). In this case we don't need to
512                 // copy the value. For example, an invoke intializes a global variable
513                 // which a kernel later reads.
514                 continue;
515             }
516             rsAssert(p.first != nullptr);
517             Script* script = p.first->mScript;
518             RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl();
519             const RsdCpuScriptImpl *cpuScript =
520                     (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
521             int slot = p.first->mSlot;
522             ScriptExecutable* exec = mGroup->getExecutable();
523             if (exec != nullptr) {
524                 const char* varName = cpuScript->getFieldName(slot);
525                 void* addr = exec->getFieldAddress(varName);
526                 if (size < 0) {
527                     rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
528                                  (rs_object_base*)addr, (ObjectBase*)value);
529                 } else {
530                     memcpy(addr, (const void*)&value, size);
531                 }
532             } else {
533                 // We use -1 size to indicate an ObjectBase rather than a primitive type
534                 if (size < 0) {
535                     s->setVarObj(slot, (ObjectBase*)value);
536                 } else {
537                     s->setVar(slot, (const void*)&value, size);
538                 }
539             }
540         }
541     }
542 }
543 
run()544 void Batch::run() {
545     if (!mClosures.front()->mClosure->mIsKernel) {
546         rsAssert(mClosures.size() == 1);
547 
548         // This batch contains a single closure for an invoke function
549         CPUClosure* cc = mClosures.front();
550         const Closure* c = cc->mClosure;
551 
552         if (mFunc != nullptr) {
553             // TODO: Need align pointers for x86_64.
554             // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
555             ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
556         } else {
557             const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
558             rsAssert(invokeID != nullptr);
559             cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
560         }
561 
562         return;
563     }
564 
565     if (mFunc != nullptr) {
566         MTLaunchStruct mtls;
567         const CPUClosure* firstCpuClosure = mClosures.front();
568         const CPUClosure* lastCpuClosure = mClosures.back();
569 
570         firstCpuClosure->mSi->forEachMtlsSetup(
571                 (const Allocation**)firstCpuClosure->mClosure->mArgs,
572                 firstCpuClosure->mClosure->mNumArg,
573                 lastCpuClosure->mClosure->mReturnValue,
574                 nullptr, 0, nullptr, &mtls);
575 
576         mtls.script = nullptr;
577         mtls.fep.usr = nullptr;
578         mtls.kernel = (ForEachFunc_t)mFunc;
579 
580         mGroup->getCpuRefImpl()->launchThreads(
581                 (const Allocation**)firstCpuClosure->mClosure->mArgs,
582                 firstCpuClosure->mClosure->mNumArg,
583                 lastCpuClosure->mClosure->mReturnValue,
584                 nullptr, &mtls);
585 
586         return;
587     }
588 
589     for (CPUClosure* cpuClosure : mClosures) {
590         const Closure* closure = cpuClosure->mClosure;
591         const ScriptKernelID* kernelID =
592                 (const ScriptKernelID*)closure->mFunctionID.get();
593         cpuClosure->mSi->preLaunch(kernelID->mSlot,
594                                    (const Allocation**)closure->mArgs,
595                                    closure->mNumArg, closure->mReturnValue,
596                                    nullptr, 0, nullptr);
597     }
598 
599     const CPUClosure* cpuClosure = mClosures.front();
600     const Closure* closure = cpuClosure->mClosure;
601     MTLaunchStruct mtls;
602 
603     if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
604                                           closure->mNumArg,
605                                           closure->mReturnValue,
606                                           nullptr, 0, nullptr, &mtls)) {
607 
608         mtls.script = nullptr;
609         mtls.kernel = (void (*)())&groupRoot;
610         mtls.fep.usr = &mClosures;
611 
612         mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
613     }
614 
615     for (CPUClosure* cpuClosure : mClosures) {
616         const Closure* closure = cpuClosure->mClosure;
617         const ScriptKernelID* kernelID =
618                 (const ScriptKernelID*)closure->mFunctionID.get();
619         cpuClosure->mSi->postLaunch(kernelID->mSlot,
620                                     (const Allocation**)closure->mArgs,
621                                     closure->mNumArg, closure->mReturnValue,
622                                     nullptr, 0, nullptr);
623     }
624 }
625 
626 }  // namespace renderscript
627 }  // namespace android
628