1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "rsovScript.h"
18 
19 #include "bcinfo/MetadataExtractor.h"
20 #include "rsContext.h"
21 #include "rsDefines.h"
22 #include "rsType.h"
23 #include "rsUtils.h"
24 #include "rsovAllocation.h"
25 #include "rsovContext.h"
26 #include "rsovCore.h"
27 #include "spirit/instructions.h"
28 #include "spirit/module.h"
29 
30 #include <fstream>
31 #include <functional>
32 
33 namespace android {
34 namespace renderscript {
35 namespace rsov {
36 
37 namespace {
38 // Layout of this struct has to be the same as the struct in generated SPIR-V
39 // TODO: generate this file from some spec that is shared with the compiler
40 struct rsovTypeInfo {
41   uint32_t element_size;  // TODO: not implemented
42   uint32_t x_size;
43   uint32_t y_size;
44   uint32_t z_size;
45 };
46 
47 const char *COMPILER_EXE_PATH = "/system/bin/bcc_rsov";
48 
setCompilerArgs(const char * bcFileName,const char * cacheDir)49 std::vector<const char *> setCompilerArgs(const char *bcFileName,
50                                           const char *cacheDir) {
51   rsAssert(bcFileName && cacheDir);
52 
53   std::vector<const char *> args;
54 
55   args.push_back(COMPILER_EXE_PATH);
56   args.push_back(bcFileName);
57 
58   args.push_back(nullptr);
59   return args;
60 }
61 
writeBytes(const char * filename,const char * bytes,size_t size)62 void writeBytes(const char *filename, const char *bytes, size_t size) {
63   std::ofstream ofs(filename, std::ios::binary);
64   ofs.write(bytes, size);
65   ofs.close();
66 }
67 
readWords(const char * filename)68 std::vector<uint32_t> readWords(const char *filename) {
69   std::ifstream ifs(filename, std::ios::binary);
70 
71   ifs.seekg(0, ifs.end);
72   int length = ifs.tellg();
73   ifs.seekg(0, ifs.beg);
74 
75   rsAssert(((length & 3) == 0) && "File size expected to be multiples of 4");
76 
77   std::vector<uint32_t> spvWords(length / sizeof(uint32_t));
78 
79   ifs.read((char *)(spvWords.data()), length);
80 
81   ifs.close();
82 
83   return spvWords;
84 }
85 
compileBitcode(const char * resName,const char * cacheDir,const char * bitcode,size_t bitcodeSize)86 std::vector<uint32_t> compileBitcode(const char *resName, const char *cacheDir,
87                                      const char *bitcode, size_t bitcodeSize) {
88   rsAssert(bitcode && bitcodeSize);
89 
90   // TODO: Cache the generated code
91 
92   std::string bcFileName(cacheDir);
93   bcFileName.append("/");
94   bcFileName.append(resName);
95   bcFileName.append(".bc");
96 
97   writeBytes(bcFileName.c_str(), bitcode, bitcodeSize);
98 
99   auto args = setCompilerArgs(bcFileName.c_str(), cacheDir);
100 
101   if (!rsuExecuteCommand(COMPILER_EXE_PATH, args.size() - 1, args.data())) {
102     ALOGE("compiler command line failed");
103     return std::vector<uint32_t>();
104   }
105 
106   ALOGV("compiler command line succeeded");
107 
108   std::string spvFileName(cacheDir);
109   spvFileName.append("/");
110   spvFileName.append(resName);
111   spvFileName.append(".spv");
112 
113   return readWords(spvFileName.c_str());
114 }
115 
116 }  // anonymous namespace
117 
isScriptCpuBacked(const Script * s)118 bool RSoVScript::isScriptCpuBacked(const Script *s) {
119   return s->mHal.info.mVersionMinor == CPU_SCRIPT_MAGIC_NUMBER;
120 }
121 
initScriptOnCpu(Script * s,RsdCpuReference::CpuScript * cs)122 void RSoVScript::initScriptOnCpu(Script *s, RsdCpuReference::CpuScript *cs) {
123   s->mHal.drv = cs;
124   s->mHal.info.mVersionMajor = 0;  // Unused. Don't care.
125   s->mHal.info.mVersionMinor = CPU_SCRIPT_MAGIC_NUMBER;
126 }
127 
initScriptOnRSoV(Script * s,RSoVScript * rsovScript)128 void RSoVScript::initScriptOnRSoV(Script *s, RSoVScript *rsovScript) {
129   s->mHal.drv = rsovScript;
130   s->mHal.info.mVersionMajor = 0;  // Unused. Don't care.
131   s->mHal.info.mVersionMinor = 0;
132 }
133 
RSoVScript(RSoVContext * context,std::vector<uint32_t> && spvWords,bcinfo::MetadataExtractor * ME,std::map<std::string,int> * GA2ID)134 RSoVScript::RSoVScript(RSoVContext *context, std::vector<uint32_t> &&spvWords,
135                        bcinfo::MetadataExtractor *ME,
136                        std::map<std::string, int> *GA2ID)
137     : mRSoV(context),
138       mDevice(context->getDevice()),
139       mSPIRVWords(std::move(spvWords)),
140       mME(ME),
141       mGlobalAllocationMetadata(nullptr),
142       mGAMapping(GA2ID) {}
143 
~RSoVScript()144 RSoVScript::~RSoVScript() {
145   delete mCpuScript;
146   delete mME;
147 }
148 
populateScript(Script *)149 void RSoVScript::populateScript(Script *) {
150   // TODO: implement this
151 }
152 
invokeFunction(uint32_t slot,const void * params,size_t paramLength)153 void RSoVScript::invokeFunction(uint32_t slot, const void *params,
154                                 size_t paramLength) {
155   getCpuScript()->invokeFunction(slot, params, paramLength);
156 }
157 
invokeRoot()158 int RSoVScript::invokeRoot() { return getCpuScript()->invokeRoot(); }
159 
invokeForEach(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)160 void RSoVScript::invokeForEach(uint32_t slot, const Allocation **ains,
161                                uint32_t inLen, Allocation *aout,
162                                const void *usr, uint32_t usrLen,
163                                const RsScriptCall *sc) {
164   // TODO: Handle kernel without input Allocation
165   rsAssert(ains);
166   std::vector<RSoVAllocation *> inputAllocations(inLen);
167   for (uint32_t i = 0; i < inLen; ++i) {
168     inputAllocations[i] = static_cast<RSoVAllocation *>(ains[i]->mHal.drv);
169   }
170   RSoVAllocation *outputAllocation =
171       static_cast<RSoVAllocation *>(aout->mHal.drv);
172   runForEach(slot, inLen, inputAllocations, outputAllocation);
173 }
174 
invokeReduce(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const RsScriptCall * sc)175 void RSoVScript::invokeReduce(uint32_t slot, const Allocation **ains,
176                               uint32_t inLen, Allocation *aout,
177                               const RsScriptCall *sc) {
178   getCpuScript()->invokeReduce(slot, ains, inLen, aout, sc);
179 }
180 
invokeInit()181 void RSoVScript::invokeInit() {
182   // TODO: implement this
183 }
184 
invokeFreeChildren()185 void RSoVScript::invokeFreeChildren() {
186   // TODO: implement this
187 }
188 
setGlobalVar(uint32_t slot,const void * data,size_t dataLength)189 void RSoVScript::setGlobalVar(uint32_t slot, const void *data,
190                               size_t dataLength) {
191   // TODO: implement this
192   ALOGV("%s missing.", __FUNCTION__);
193 }
194 
getGlobalVar(uint32_t slot,void * data,size_t dataLength)195 void RSoVScript::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
196   // TODO: implement this
197   ALOGV("%s missing.", __FUNCTION__);
198 }
199 
setGlobalVarWithElemDims(uint32_t slot,const void * data,size_t dataLength,const Element * e,const uint32_t * dims,size_t dimLength)200 void RSoVScript::setGlobalVarWithElemDims(uint32_t slot, const void *data,
201                                           size_t dataLength, const Element *e,
202                                           const uint32_t *dims,
203                                           size_t dimLength) {
204   // TODO: implement this
205 }
206 
setGlobalBind(uint32_t slot,Allocation * data)207 void RSoVScript::setGlobalBind(uint32_t slot, Allocation *data) {
208   ALOGV("%s succeeded.", __FUNCTION__);
209   // TODO: implement this
210 }
211 
setGlobalObj(uint32_t slot,ObjectBase * obj)212 void RSoVScript::setGlobalObj(uint32_t slot, ObjectBase *obj) {
213   mCpuScript->setGlobalObj(slot, obj);
214   ALOGV("%s succeeded.", __FUNCTION__);
215 }
216 
getAllocationForPointer(const void * ptr) const217 Allocation *RSoVScript::getAllocationForPointer(const void *ptr) const {
218   // TODO: implement this
219   return nullptr;
220 }
221 
getGlobalEntries() const222 int RSoVScript::getGlobalEntries() const {
223   // TODO: implement this
224   return 0;
225 }
226 
getGlobalName(int i) const227 const char *RSoVScript::getGlobalName(int i) const {
228   // TODO: implement this
229   return nullptr;
230 }
231 
getGlobalAddress(int i) const232 const void *RSoVScript::getGlobalAddress(int i) const {
233   // TODO: implement this
234   return nullptr;
235 }
236 
getGlobalSize(int i) const237 size_t RSoVScript::getGlobalSize(int i) const {
238   // TODO: implement this
239   return 0;
240 }
241 
getGlobalProperties(int i) const242 uint32_t RSoVScript::getGlobalProperties(int i) const {
243   // TODO: implement this
244   return 0;
245 }
246 
InitDescriptorAndPipelineLayouts(uint32_t inLen)247 void RSoVScript::InitDescriptorAndPipelineLayouts(uint32_t inLen) {
248   // TODO: global variables
249   // TODO: kernels with zero output allocations
250   std::vector<VkDescriptorSetLayoutBinding> layout_bindings{
251       {
252           // for the global allocation metadata
253           .binding = 0,
254           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
255           .descriptorCount = 1,
256           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
257           .pImmutableSamplers = nullptr,
258       },
259       {
260           // for the output allocation
261           .binding = 1,
262           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
263           .descriptorCount = 1,
264           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
265           .pImmutableSamplers = nullptr,
266       },
267   };
268 
269   // initialize descriptors for input allocations
270   for (uint32_t i = 0; i < inLen; ++i) {
271     layout_bindings.push_back({
272         .binding = i + 2,  // input allocations start from bining #2
273         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
274         .descriptorCount = 1,
275         .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
276         .pImmutableSamplers = nullptr,
277     });
278   }
279 
280   VkDescriptorSetLayoutCreateInfo descriptor_layout = {
281       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
282       .pNext = nullptr,
283       .flags = 0,
284       .bindingCount = inLen + 2,
285       .pBindings = layout_bindings.data(),
286   };
287 
288   VkResult res;
289 
290   mDescLayout.resize(NUM_DESCRIPTOR_SETS);
291   res = vkCreateDescriptorSetLayout(mDevice, &descriptor_layout, NULL,
292                                     mDescLayout.data());
293   if (res != VK_SUCCESS) {
294     __android_log_print(ANDROID_LOG_ERROR, "ComputeTest",
295                         "vkCreateDescriptorSetLayout() returns %d", res);
296   }
297   rsAssert(res == VK_SUCCESS);
298 
299   /* Now use the descriptor layout to create a pipeline layout */
300   VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {
301       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
302       .pNext = nullptr,
303       .pushConstantRangeCount = 0,
304       .pPushConstantRanges = nullptr,
305       .setLayoutCount = NUM_DESCRIPTOR_SETS,
306       .pSetLayouts = mDescLayout.data(),
307   };
308 
309   res = vkCreatePipelineLayout(mDevice, &pPipelineLayoutCreateInfo, NULL,
310                                &mPipelineLayout);
311   rsAssert(res == VK_SUCCESS);
312 
313   ALOGV("%s succeeded.", __FUNCTION__);
314 }
315 
InitShader(uint32_t slot)316 void RSoVScript::InitShader(uint32_t slot) {
317   VkResult res;
318 
319   mShaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
320   mShaderStage.pNext = nullptr;
321   mShaderStage.pSpecializationInfo = nullptr;
322   mShaderStage.flags = 0;
323   mShaderStage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
324   const char **RSKernelNames = mME->getExportForEachNameList();
325   size_t RSKernelNum = mME->getExportForEachSignatureCount();
326   rsAssert(slot < RSKernelNum);
327   rsAssert(RSKernelNames);
328   rsAssert(RSKernelNames[slot]);
329   ALOGV("slot = %d kernel name = %s", slot, RSKernelNames[slot]);
330   std::string entryName("entry_");
331   entryName.append(RSKernelNames[slot]);
332   mShaderStage.pName = strndup(entryName.c_str(), entryName.size());
333 
334   VkShaderModuleCreateInfo moduleCreateInfo = {
335       .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
336       .pNext = nullptr,
337       .flags = 0,
338       .codeSize = mSPIRVWords.size() * sizeof(unsigned int),
339       .pCode = mSPIRVWords.data(),
340   };
341   res = vkCreateShaderModule(mDevice, &moduleCreateInfo, NULL,
342                              &mShaderStage.module);
343   rsAssert(res == VK_SUCCESS);
344 
345   ALOGV("%s succeeded.", __FUNCTION__);
346 }
347 
InitDescriptorPool(uint32_t inLen)348 void RSoVScript::InitDescriptorPool(uint32_t inLen) {
349   /* DEPENDS on InitDescriptorAndPipelineLayouts() */
350 
351   VkResult res;
352   VkDescriptorPoolSize type_count[] = {
353       {
354           .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
355           .descriptorCount = 2 + inLen,
356       },
357   };
358 
359   VkDescriptorPoolCreateInfo descriptor_pool = {
360       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
361       .pNext = nullptr,
362       .maxSets = 1,
363       .poolSizeCount = NELEM(type_count),
364       .pPoolSizes = type_count,
365   };
366 
367   res = vkCreateDescriptorPool(mDevice, &descriptor_pool, NULL, &mDescPool);
368   rsAssert(res == VK_SUCCESS);
369 
370   ALOGV("%s succeeded.", __FUNCTION__);
371 }
372 
373 // Iterate through a list of global allocations that are used inside the module
374 // and marshal their type information to a dedicated Vulkan Buffer
MarshalTypeInfo(void)375 void RSoVScript::MarshalTypeInfo(void) {
376   // Marshal global allocation metadata to the device
377   auto *cs = getCpuScript();
378   int nr_globals = mGAMapping->size();
379   if (mGlobalAllocationMetadata == nullptr) {
380     mGlobalAllocationMetadata.reset(
381         new RSoVBuffer(mRSoV, sizeof(struct rsovTypeInfo) * nr_globals));
382   }
383   struct rsovTypeInfo *mappedMetadata =
384       (struct rsovTypeInfo *)mGlobalAllocationMetadata->getHostPtr();
385   for (int i = 0; i < nr_globals; ++i) {
386     if (getGlobalRsType(cs->getGlobalProperties(i)) ==
387         RsDataType::RS_TYPE_ALLOCATION) {
388       ALOGV("global variable %d is an allocation!", i);
389       const void *host_buf;
390       cs->getGlobalVar(i, (void *)&host_buf, sizeof(host_buf));
391       if (!host_buf) continue;
392       const android::renderscript::Allocation *GA =
393           static_cast<const android::renderscript::Allocation *>(host_buf);
394       const android::renderscript::Type *T = GA->getType();
395       rsAssert(T);
396 
397       auto global_it = mGAMapping->find(cs->getGlobalName(i));
398       rsAssert(global_it != (*mGAMapping).end());
399       int id = global_it->second;
400       ALOGV("global allocation %s is mapped to ID %d", cs->getGlobalName(i),
401             id);
402       // TODO: marshal other properties
403       mappedMetadata[id].x_size = T->getDimX();
404       mappedMetadata[id].y_size = T->getDimY();
405       mappedMetadata[id].z_size = T->getDimZ();
406     }
407   }
408 }
409 
InitDescriptorSet(const std::vector<RSoVAllocation * > & inputAllocations,RSoVAllocation * outputAllocation)410 void RSoVScript::InitDescriptorSet(
411     const std::vector<RSoVAllocation *> &inputAllocations,
412     RSoVAllocation *outputAllocation) {
413   VkResult res;
414 
415   VkDescriptorSetAllocateInfo alloc_info = {
416       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
417       .pNext = NULL,
418       .descriptorPool = mDescPool,
419       .descriptorSetCount = NUM_DESCRIPTOR_SETS,
420       .pSetLayouts = mDescLayout.data(),
421   };
422 
423   mDescSet.resize(NUM_DESCRIPTOR_SETS);
424   res = vkAllocateDescriptorSets(mDevice, &alloc_info, mDescSet.data());
425   ALOGD("vkAllocateDescriptorSets() result = %d", res);
426   rsAssert(res == VK_SUCCESS);
427 
428   // TODO: support for set up the binding(s) of global variables
429   uint32_t nBindings = inputAllocations.size() + 1;  // input + output.
430   std::vector<VkWriteDescriptorSet> writes{
431       // Metadata for global allocations
432       {
433           .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
434           .dstSet = mDescSet[0],
435           .dstBinding = 0,
436           .dstArrayElement = 0,
437           .descriptorCount = 1,
438           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
439           .pBufferInfo = mGlobalAllocationMetadata->getBufferInfo(),
440       },
441 
442       {
443           .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
444           .dstSet = mDescSet[0],
445           .dstBinding = 1,
446           .dstArrayElement = 0,
447           .descriptorCount = 1,
448           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
449           .pBufferInfo = outputAllocation->getBuffer()->getBufferInfo(),
450       },
451   };
452   for (uint32_t i = 0; i < inputAllocations.size(); ++i) {
453     writes.push_back({
454         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
455         .dstSet = mDescSet[0],
456         .dstBinding = 2 + i,  // input allocations start from binding #2
457         .dstArrayElement = 0,
458         .descriptorCount = 1,
459         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
460         .pBufferInfo = inputAllocations[i]->getBuffer()->getBufferInfo(),
461     });
462   }
463 
464   vkUpdateDescriptorSets(mDevice, writes.size(), writes.data(), 0, NULL);
465 
466   ALOGV("%s succeeded.", __FUNCTION__);
467 }
468 
InitPipeline()469 void RSoVScript::InitPipeline() {
470   // DEPENDS on mShaderStage, i.e., InitShader()
471 
472   VkResult res;
473 
474   VkComputePipelineCreateInfo pipeline_info = {
475       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
476       .pNext = nullptr,
477       .layout = mPipelineLayout,
478       .basePipelineHandle = VK_NULL_HANDLE,
479       .basePipelineIndex = 0,
480       .flags = 0,
481       .stage = mShaderStage,
482   };
483   res = vkCreateComputePipelines(mDevice, VK_NULL_HANDLE, 1, &pipeline_info,
484                                  NULL, &mComputePipeline);
485   rsAssert(res == VK_SUCCESS);
486 
487   ALOGV("%s succeeded.", __FUNCTION__);
488 }
489 
runForEach(uint32_t slot,uint32_t inLen,const std::vector<RSoVAllocation * > & inputAllocations,RSoVAllocation * outputAllocation)490 void RSoVScript::runForEach(
491     uint32_t slot, uint32_t inLen,
492     const std::vector<RSoVAllocation *> &inputAllocations,
493     RSoVAllocation *outputAllocation) {
494   VkResult res;
495 
496   InitDescriptorAndPipelineLayouts(inLen);
497   InitShader(slot);
498   InitDescriptorPool(inLen);
499   MarshalTypeInfo();
500   InitDescriptorSet(inputAllocations, outputAllocation);
501   // InitPipelineCache();
502   InitPipeline();
503 
504   VkCommandBuffer cmd;
505 
506   VkCommandBufferAllocateInfo cmd_info = {
507       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
508       .pNext = nullptr,
509       .commandPool = mRSoV->getCmdPool(),
510       .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
511       .commandBufferCount = 1,
512   };
513 
514   res = vkAllocateCommandBuffers(mDevice, &cmd_info, &cmd);
515   rsAssert(res == VK_SUCCESS);
516 
517   VkCommandBufferBeginInfo cmd_buf_info = {
518       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
519       .pNext = nullptr,
520       .flags = 0,
521       .pInheritanceInfo = nullptr,
522   };
523 
524   res = vkBeginCommandBuffer(cmd, &cmd_buf_info);
525   rsAssert(res == VK_SUCCESS);
526 
527   vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mComputePipeline);
528 
529   vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mPipelineLayout,
530                           0, mDescSet.size(), mDescSet.data(), 0, nullptr);
531   // Assuming all input allocations are of the same dimensionality
532   const uint32_t width = inputAllocations[0]->getWidth();
533   const uint32_t height = rsMax(inputAllocations[0]->getHeight(), 1U);
534   const uint32_t depth = rsMax(inputAllocations[0]->getDepth(), 1U);
535   vkCmdDispatch(cmd, width, height, depth);
536 
537   res = vkEndCommandBuffer(cmd);
538   assert(res == VK_SUCCESS);
539 
540   VkSubmitInfo submit_info = {
541       .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
542       .commandBufferCount = 1,
543       .pCommandBuffers = &cmd,
544   };
545 
546   VkFence fence;
547 
548   VkFenceCreateInfo fenceInfo = {
549       .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
550       .pNext = nullptr,
551       .flags = 0,
552   };
553 
554   vkCreateFence(mDevice, &fenceInfo, NULL, &fence);
555 
556   vkQueueSubmit(mRSoV->getQueue(), 1, &submit_info, fence);
557 
558   // Make sure command buffer is finished
559   do {
560     res = vkWaitForFences(mDevice, 1, &fence, VK_TRUE, 100000);
561   } while (res == VK_TIMEOUT);
562 
563   rsAssert(res == VK_SUCCESS);
564 
565   vkDestroyFence(mDevice, fence, NULL);
566 
567   // TODO: shall we reuse command buffers?
568   VkCommandBuffer cmd_bufs[] = {cmd};
569   vkFreeCommandBuffers(mDevice, mRSoV->getCmdPool(), 1, cmd_bufs);
570 
571   vkDestroyPipeline(mDevice, mComputePipeline, nullptr);
572   for (int i = 0; i < NUM_DESCRIPTOR_SETS; i++)
573     vkDestroyDescriptorSetLayout(mDevice, mDescLayout[i], nullptr);
574   vkDestroyPipelineLayout(mDevice, mPipelineLayout, nullptr);
575   vkFreeDescriptorSets(mDevice, mDescPool, NUM_DESCRIPTOR_SETS,
576                        mDescSet.data());
577   vkDestroyDescriptorPool(mDevice, mDescPool, nullptr);
578   free((void *)mShaderStage.pName);
579   vkDestroyShaderModule(mDevice, mShaderStage.module, nullptr);
580 
581   ALOGV("%s succeeded.", __FUNCTION__);
582 }
583 
584 }  // namespace rsov
585 }  // namespace renderscript
586 }  // namespace android
587 
588 using android::renderscript::Allocation;
589 using android::renderscript::Context;
590 using android::renderscript::Element;
591 using android::renderscript::ObjectBase;
592 using android::renderscript::RsdCpuReference;
593 using android::renderscript::Script;
594 using android::renderscript::ScriptC;
595 using android::renderscript::rs_script;
596 using android::renderscript::rsov::RSoVContext;
597 using android::renderscript::rsov::RSoVScript;
598 using android::renderscript::rsov::compileBitcode;
599 
600 namespace {
601 // A class to parse global allocation metadata; essentially a subset of JSON
602 // it would look like {"__RSoV_GA": {"g":42}}
603 // The result is stored in a refence to a map<string, int>
604 class ParseMD {
605  public:
ParseMD(std::string s,std::map<std::string,int> & map)606   ParseMD(std::string s, std::map<std::string, int> &map)
607       : mString(s), mMapping(map) {}
608 
parse(void)609   bool parse(void) {
610     // remove outermose two pairs of braces
611     mString = removeBraces(mString);
612     mString = removeBraces(mString);
613     // Now we are supposed to have a comma-separated list that looks like:
614     // "foo":42, "bar":56
615     split<','>(mString, [&](auto s) {
616       split<':'>(s, nullptr, [&](auto pair) {
617         rsAssert(pair.size() == 2);
618         std::string ga_name = removeQuotes(pair[0]);
619         int id = atoi(pair[1].c_str());
620         ALOGV("ParseMD: global allocation %s has ID %d", ga_name.c_str(), id);
621         mMapping[ga_name] = id;
622       });
623     });
624     return true;
625   }
626 
627  private:
628   template <char L, char R>
removeMatching(const std::string & s)629   static std::string removeMatching(const std::string &s) {
630     auto leftCBrace = s.find(L);
631     rsAssert(leftCBrace != std::string::npos);
632     leftCBrace++;
633     return s.substr(leftCBrace, s.rfind(R) - leftCBrace);
634   }
635 
removeBraces(const std::string & s)636   static std::string removeBraces(const std::string &s) {
637     return removeMatching<'{', '}'>(s);
638   }
639 
removeQuotes(const std::string & s)640   static std::string removeQuotes(const std::string &s) {
641     return removeMatching<'"', '"'>(s);
642   }
643 
644   // Splitting a string, and call "each" and/or "all" with individal elements
645   // and a vector of all tokenized elements
646   template <char D>
split(const std::string & s,std::function<void (const std::string &)> each,std::function<void (const std::vector<const std::string> &)> all=nullptr)647   static void split(const std::string &s,
648                     std::function<void(const std::string &)> each,
649                     std::function<void(const std::vector<const std::string> &)>
650                         all = nullptr) {
651     std::vector<const std::string> result;
652     for (std::string::size_type pos = 0; pos < s.size(); pos++) {
653       std::string::size_type begin = pos;
654 
655       while (s[pos] != D && pos <= s.size()) pos++;
656       std::string found = s.substr(begin, pos - begin);
657       if (each) each(found);
658       if (all) result.push_back(found);
659     }
660     if (all) all(result);
661   }
662 
663   std::string mString;
664   std::map<std::string, int> &mMapping;
665 };
666 
667 }  // namespace
668 
669 class ExtractRSoVMD : public android::spirit::DoNothingVisitor {
670  public:
ExtractRSoVMD()671   ExtractRSoVMD() : mGAMapping(new std::map<std::string, int>) {}
672 
visit(android::spirit::StringInst * s)673   void visit(android::spirit::StringInst *s) {
674     ALOGV("ExtractRSoVMD: string = %s", s->mOperand1.c_str());
675     ParseMD p(s->mOperand1, *mGAMapping);
676     p.parse();
677   }
678 
takeMapping(void)679   std::map<std::string, int> *takeMapping(void) { return mGAMapping.release(); }
680 
681  private:
682   std::unique_ptr<std::map<std::string, int> > mGAMapping;
683 };
684 
rsovScriptInit(const Context * rsc,ScriptC * script,char const * resName,char const * cacheDir,uint8_t const * bitcode,size_t bitcodeSize,uint32_t flags)685 bool rsovScriptInit(const Context *rsc, ScriptC *script, char const *resName,
686                     char const *cacheDir, uint8_t const *bitcode,
687                     size_t bitcodeSize, uint32_t flags) {
688   RSoVHal *hal = static_cast<RSoVHal *>(rsc->mHal.drv);
689 
690   std::unique_ptr<RsdCpuReference::CpuScript> cs(hal->mCpuRef->createScript(
691       script, resName, cacheDir, bitcode, bitcodeSize, flags));
692   if (cs == nullptr) {
693     ALOGE("Failed creating a CPU script %p for %s (%p)", cs.get(), resName,
694           script);
695     return false;
696   }
697   cs->populateScript(script);
698 
699   std::unique_ptr<bcinfo::MetadataExtractor> bitcodeMetadata(
700       new bcinfo::MetadataExtractor((const char *)bitcode, bitcodeSize));
701   if (!bitcodeMetadata || !bitcodeMetadata->extract()) {
702     ALOGE("Could not extract metadata from bitcode from %s", resName);
703     return false;
704   }
705 
706   auto spvWords =
707       compileBitcode(resName, cacheDir, (const char *)bitcode, bitcodeSize);
708   if (!spvWords.empty()) {
709     // Extract compiler metadata on allocation->binding mapping
710     android::spirit::Module *module =
711         android::spirit::Deserialize<android::spirit::Module>(spvWords);
712     rsAssert(module);
713     ExtractRSoVMD ga_md;
714     module->accept(&ga_md);
715     RSoVScript *rsovScript =
716         new RSoVScript(hal->mRSoV, std::move(spvWords),
717                        bitcodeMetadata.release(), ga_md.takeMapping());
718     if (rsovScript) {
719       rsovScript->setCpuScript(cs.release());
720       RSoVScript::initScriptOnRSoV(script, rsovScript);
721       return true;
722     }
723   }
724 
725   ALOGD("Failed creating an RSoV script for %s", resName);
726   // Fall back to CPU driver instead
727   RSoVScript::initScriptOnCpu(script, cs.release());
728 
729   return true;
730 }
731 
rsovInitIntrinsic(const Context * rsc,Script * s,RsScriptIntrinsicID iid,Element * e)732 bool rsovInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid,
733                        Element *e) {
734   RSoVHal *dc = (RSoVHal *)rsc->mHal.drv;
735   RsdCpuReference::CpuScript *cs = dc->mCpuRef->createIntrinsic(s, iid, e);
736   if (cs == nullptr) {
737     return false;
738   }
739   s->mHal.drv = cs;
740   cs->populateScript(s);
741   return true;
742 }
743 
rsovScriptInvokeForEach(const Context * rsc,Script * s,uint32_t slot,const Allocation * ain,Allocation * aout,const void * usr,size_t usrLen,const RsScriptCall * sc)744 void rsovScriptInvokeForEach(const Context *rsc, Script *s, uint32_t slot,
745                              const Allocation *ain, Allocation *aout,
746                              const void *usr, size_t usrLen,
747                              const RsScriptCall *sc) {
748   if (ain == nullptr) {
749     rsovScriptInvokeForEachMulti(rsc, s, slot, nullptr, 0, aout, usr, usrLen,
750                                  sc);
751   } else {
752     const Allocation *ains[1] = {ain};
753 
754     rsovScriptInvokeForEachMulti(rsc, s, slot, ains, 1, aout, usr, usrLen, sc);
755   }
756 }
757 
rsovScriptInvokeForEachMulti(const Context * rsc,Script * s,uint32_t slot,const Allocation ** ains,size_t inLen,Allocation * aout,const void * usr,size_t usrLen,const RsScriptCall * sc)758 void rsovScriptInvokeForEachMulti(const Context *rsc, Script *s, uint32_t slot,
759                                   const Allocation **ains, size_t inLen,
760                                   Allocation *aout, const void *usr,
761                                   size_t usrLen, const RsScriptCall *sc) {
762   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
763   cs->invokeForEach(slot, ains, inLen, aout, usr, usrLen, sc);
764 }
765 
rsovScriptInvokeRoot(const Context * dc,Script * s)766 int rsovScriptInvokeRoot(const Context *dc, Script *s) {
767   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
768   return cs->invokeRoot();
769 }
770 
rsovScriptInvokeInit(const Context * dc,Script * s)771 void rsovScriptInvokeInit(const Context *dc, Script *s) {
772   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
773   cs->invokeInit();
774 }
775 
rsovScriptInvokeFreeChildren(const Context * dc,Script * s)776 void rsovScriptInvokeFreeChildren(const Context *dc, Script *s) {
777   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
778   cs->invokeFreeChildren();
779 }
780 
rsovScriptInvokeFunction(const Context * dc,Script * s,uint32_t slot,const void * params,size_t paramLength)781 void rsovScriptInvokeFunction(const Context *dc, Script *s, uint32_t slot,
782                               const void *params, size_t paramLength) {
783   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
784   cs->invokeFunction(slot, params, paramLength);
785 }
786 
rsovScriptInvokeReduce(const Context * dc,Script * s,uint32_t slot,const Allocation ** ains,size_t inLen,Allocation * aout,const RsScriptCall * sc)787 void rsovScriptInvokeReduce(const Context *dc, Script *s, uint32_t slot,
788                             const Allocation **ains, size_t inLen,
789                             Allocation *aout, const RsScriptCall *sc) {
790   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
791   cs->invokeReduce(slot, ains, inLen, aout, sc);
792 }
793 
rsovScriptSetGlobalVar(const Context * dc,const Script * s,uint32_t slot,void * data,size_t dataLength)794 void rsovScriptSetGlobalVar(const Context *dc, const Script *s, uint32_t slot,
795                             void *data, size_t dataLength) {
796   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
797   cs->setGlobalVar(slot, data, dataLength);
798 }
799 
rsovScriptGetGlobalVar(const Context * dc,const Script * s,uint32_t slot,void * data,size_t dataLength)800 void rsovScriptGetGlobalVar(const Context *dc, const Script *s, uint32_t slot,
801                             void *data, size_t dataLength) {
802   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
803   cs->getGlobalVar(slot, data, dataLength);
804 }
805 
rsovScriptSetGlobalVarWithElemDims(const Context * dc,const Script * s,uint32_t slot,void * data,size_t dataLength,const android::renderscript::Element * elem,const uint32_t * dims,size_t dimLength)806 void rsovScriptSetGlobalVarWithElemDims(
807     const Context *dc, const Script *s, uint32_t slot, void *data,
808     size_t dataLength, const android::renderscript::Element *elem,
809     const uint32_t *dims, size_t dimLength) {
810   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
811   cs->setGlobalVarWithElemDims(slot, data, dataLength, elem, dims, dimLength);
812 }
813 
rsovScriptSetGlobalBind(const Context * dc,const Script * s,uint32_t slot,Allocation * data)814 void rsovScriptSetGlobalBind(const Context *dc, const Script *s, uint32_t slot,
815                              Allocation *data) {
816   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
817   cs->setGlobalBind(slot, data);
818 }
819 
rsovScriptSetGlobalObj(const Context * dc,const Script * s,uint32_t slot,ObjectBase * data)820 void rsovScriptSetGlobalObj(const Context *dc, const Script *s, uint32_t slot,
821                             ObjectBase *data) {
822   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
823   cs->setGlobalObj(slot, data);
824 }
825 
rsovScriptDestroy(const Context * dc,Script * s)826 void rsovScriptDestroy(const Context *dc, Script *s) {
827   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
828   delete cs;
829   s->mHal.drv = nullptr;
830 }
831 
rsovScriptGetAllocationForPointer(const android::renderscript::Context * dc,const android::renderscript::Script * sc,const void * ptr)832 Allocation *rsovScriptGetAllocationForPointer(
833     const android::renderscript::Context *dc,
834     const android::renderscript::Script *sc, const void *ptr) {
835   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)sc->mHal.drv;
836   return cs->getAllocationForPointer(ptr);
837 }
838 
rsovScriptUpdateCachedObject(const Context * rsc,const Script * script,rs_script * obj)839 void rsovScriptUpdateCachedObject(const Context *rsc, const Script *script,
840                                   rs_script *obj) {
841   obj->p = script;
842 #ifdef __LP64__
843   obj->unused1 = nullptr;
844   obj->unused2 = nullptr;
845   obj->unused3 = nullptr;
846 #endif
847 }
848