1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "rsovScript.h"
18 
19 #include "bcinfo/MetadataExtractor.h"
20 #include "module.h"
21 #include "rsContext.h"
22 #include "rsDefines.h"
23 #include "rsType.h"
24 #include "rsUtils.h"
25 #include "rsovAllocation.h"
26 #include "rsovContext.h"
27 #include "rsovCore.h"
28 #include "spirit/file_utils.h"
29 #include "spirit/instructions.h"
30 #include "spirit/module.h"
31 
32 #include <fstream>
33 #include <functional>
34 #include <iostream>
35 #include <sstream>
36 #include <string>
37 
38 extern "C" {
39 char*  __GPUBlock = nullptr;
40 }
41 
42 namespace android {
43 namespace renderscript {
44 namespace rsov {
45 
46 namespace {
47 // Layout of this struct has to be the same as the struct in generated SPIR-V
48 // TODO: generate this file from some spec that is shared with the compiler
49 struct rsovTypeInfo {
50   uint32_t element_size;  // TODO: not implemented
51   uint32_t x_size;
52   uint32_t y_size;
53   uint32_t z_size;
54 };
55 
56 const char *COMPILER_EXE_PATH = "/system/bin/rs2spirv";
57 
setCompilerArgs(const char * bcFileName,const char * cacheDir)58 std::vector<const char *> setCompilerArgs(const char *bcFileName,
59                                           const char *cacheDir) {
60   rsAssert(bcFileName && cacheDir);
61 
62   std::vector<const char *> args;
63 
64   args.push_back(COMPILER_EXE_PATH);
65   args.push_back(bcFileName);
66 
67   args.push_back(nullptr);
68   return args;
69 }
70 
writeBytes(const char * filename,const char * bytes,size_t size)71 void writeBytes(const char *filename, const char *bytes, size_t size) {
72   std::ofstream ofs(filename, std::ios::binary);
73   ofs.write(bytes, size);
74   ofs.close();
75 }
76 
readWords(const char * filename)77 std::vector<uint32_t> readWords(const char *filename) {
78   std::ifstream ifs(filename, std::ios::binary);
79 
80   ifs.seekg(0, ifs.end);
81   int length = ifs.tellg();
82   ifs.seekg(0, ifs.beg);
83 
84   rsAssert(((length & 3) == 0) && "File size expected to be multiples of 4");
85 
86   std::vector<uint32_t> spvWords(length / sizeof(uint32_t));
87 
88   ifs.read((char *)(spvWords.data()), length);
89 
90   ifs.close();
91 
92   return spvWords;
93 }
94 
compileBitcode(const char * resName,const char * cacheDir,const char * bitcode,size_t bitcodeSize,std::vector<uint8_t> & modifiedBitcode)95 std::vector<uint32_t> compileBitcode(const char *resName, const char *cacheDir,
96                                      const char *bitcode, size_t bitcodeSize,
97                                      std::vector<uint8_t> &modifiedBitcode) {
98   rsAssert(bitcode && bitcodeSize);
99 
100   // TODO: Cache the generated code
101 
102   std::string bcFileName(cacheDir);
103   bcFileName.append("/");
104   bcFileName.append(resName);
105   bcFileName.append(".bc");
106 
107   writeBytes(bcFileName.c_str(), bitcode, bitcodeSize);
108 
109   auto args = setCompilerArgs(bcFileName.c_str(), cacheDir);
110 
111   if (!rsuExecuteCommand(COMPILER_EXE_PATH, args.size() - 1, args.data())) {
112     ALOGE("compiler command line failed");
113     return std::vector<uint32_t>();
114   }
115 
116   ALOGV("compiler command line succeeded");
117 
118   std::string spvFileName(cacheDir);
119   spvFileName.append("/");
120   spvFileName.append(resName);
121   spvFileName.append(".spv");
122 
123   std::string modifiedBCFileName(cacheDir);
124   modifiedBCFileName.append("/").append(resName).append("_modified.bc");
125 
126   args.pop_back();
127   args.push_back("-bc");
128   args.push_back(modifiedBCFileName.c_str());
129   args.push_back(nullptr);
130 
131   if (!rsuExecuteCommand(COMPILER_EXE_PATH, args.size() - 1, args.data())) {
132     ALOGE("compiler command line to create modified bitcode failed");
133     return std::vector<uint32_t>();
134   }
135 
136   modifiedBitcode = android::spirit::readFile<uint8_t>(modifiedBCFileName);
137 
138   return readWords(spvFileName.c_str());
139 }
140 
splitOffsets(const std::string & str,char delimiter,std::vector<uint32_t> * offsets)141 void splitOffsets(const std::string &str, char delimiter,
142                   std::vector<uint32_t> *offsets) {
143   std::stringstream ss(str);
144   std::string tok;
145 
146   while (std::getline(ss, tok, delimiter)) {
147     const uint32_t offset = static_cast<uint32_t>(std::stoi(tok));
148     offsets->push_back(offset);
149   }
150 }
151 
152 }  // anonymous namespace
153 
isScriptCpuBacked(const Script * s)154 bool RSoVScript::isScriptCpuBacked(const Script *s) {
155   return s->mHal.info.mVersionMinor == CPU_SCRIPT_MAGIC_NUMBER;
156 }
157 
initScriptOnCpu(Script * s,RsdCpuReference::CpuScript * cs)158 void RSoVScript::initScriptOnCpu(Script *s, RsdCpuReference::CpuScript *cs) {
159   s->mHal.drv = cs;
160   s->mHal.info.mVersionMajor = 0;  // Unused. Don't care.
161   s->mHal.info.mVersionMinor = CPU_SCRIPT_MAGIC_NUMBER;
162 }
163 
initScriptOnRSoV(Script * s,RSoVScript * rsovScript)164 void RSoVScript::initScriptOnRSoV(Script *s, RSoVScript *rsovScript) {
165   s->mHal.drv = rsovScript;
166   s->mHal.info.mVersionMajor = 0;  // Unused. Don't care.
167   s->mHal.info.mVersionMinor = 0;
168 }
169 
170 using android::spirit::Module;
171 using android::spirit::Deserialize;
172 
RSoVScript(RSoVContext * context,std::vector<uint32_t> && spvWords,bcinfo::MetadataExtractor * ME,std::map<std::string,int> * GA2ID)173 RSoVScript::RSoVScript(RSoVContext *context, std::vector<uint32_t> &&spvWords,
174                        bcinfo::MetadataExtractor *ME,
175                        std::map<std::string, int> *GA2ID)
176     : mRSoV(context),
177       mDevice(context->getDevice()),
178       mSPIRVWords(std::move(spvWords)),
179       mME(ME),
180       mGlobalAllocationMetadata(nullptr),
181       mGAMapping(GA2ID) {
182   std::unique_ptr<Module> module(Deserialize<Module>(mSPIRVWords));
183 
184   const std::string &strGlobalSize =
185       module->findStringOfPrefix(".rsov.GlobalSize:");
186   if (strGlobalSize.empty()) {
187     mGlobals.reset(new RSoVBuffer(context, 4));
188     return;
189   }
190   const size_t colonPosSize = strGlobalSize.find(':');
191   const std::string &strVal = strGlobalSize.substr(colonPosSize + 1);
192   const uint64_t globalSize = static_cast<uint64_t>(std::stol(strVal));
193   if (globalSize > 0) {
194     mGlobals.reset(new RSoVBuffer(context, globalSize));
195     __GPUBlock = mGlobals->getHostPtr();
196     const std::string &offsetStr =
197       module->findStringOfPrefix(".rsov.ExportedVars:");
198     const size_t colonPos = offsetStr.find(':');
199     splitOffsets(offsetStr.substr(colonPos + 1), ';', &mExportedVarOffsets);
200   }
201 }
202 
~RSoVScript()203 RSoVScript::~RSoVScript() {
204   delete mCpuScript;
205   delete mME;
206 }
207 
populateScript(Script *)208 void RSoVScript::populateScript(Script *) {
209 }
210 
invokeFunction(uint32_t slot,const void * params,size_t paramLength)211 void RSoVScript::invokeFunction(uint32_t slot, const void *params,
212                                 size_t paramLength) {
213   getCpuScript()->invokeFunction(slot, params, paramLength);
214 }
215 
invokeRoot()216 int RSoVScript::invokeRoot() { return getCpuScript()->invokeRoot(); }
217 
invokeForEach(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)218 void RSoVScript::invokeForEach(uint32_t slot, const Allocation **ains,
219                                uint32_t inLen, Allocation *aout,
220                                const void *usr, uint32_t usrLen,
221                                const RsScriptCall *sc) {
222   // TODO: Handle kernel without input Allocation
223   rsAssert(ains);
224   std::vector<RSoVAllocation *> inputAllocations(inLen);
225   for (uint32_t i = 0; i < inLen; ++i) {
226     inputAllocations[i] = static_cast<RSoVAllocation *>(ains[i]->mHal.drv);
227   }
228   RSoVAllocation *outputAllocation =
229       static_cast<RSoVAllocation *>(aout->mHal.drv);
230   runForEach(slot, inLen, inputAllocations, outputAllocation);
231 }
232 
invokeReduce(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const RsScriptCall * sc)233 void RSoVScript::invokeReduce(uint32_t slot, const Allocation **ains,
234                               uint32_t inLen, Allocation *aout,
235                               const RsScriptCall *sc) {
236   getCpuScript()->invokeReduce(slot, ains, inLen, aout, sc);
237 }
238 
invokeInit()239 void RSoVScript::invokeInit() {
240   getCpuScript()->invokeInit();
241 }
242 
invokeFreeChildren()243 void RSoVScript::invokeFreeChildren() {
244   // TODO: implement this
245 }
246 
setGlobalVar(uint32_t slot,const void * data,size_t dataLength)247 void RSoVScript::setGlobalVar(uint32_t slot, const void *data,
248                               size_t dataLength) {
249   char *basePtr = mGlobals->getHostPtr();
250   rsAssert(basePtr != nullptr);
251   const uint32_t offset = GetExportedVarOffset(slot);
252   memcpy(basePtr + offset, data, dataLength);
253 }
254 
getGlobalVar(uint32_t slot,void * data,size_t dataLength)255 void RSoVScript::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
256   const char *basePtr = mGlobals->getHostPtr();
257   rsAssert(basePtr != nullptr);
258   const uint32_t offset = GetExportedVarOffset(slot);
259   memcpy(data, basePtr + offset, dataLength);
260 }
261 
setGlobalVarWithElemDims(uint32_t slot,const void * data,size_t dataLength,const Element * elem,const uint32_t * dims,size_t dimLength)262 void RSoVScript::setGlobalVarWithElemDims(uint32_t slot, const void *data,
263                                           size_t dataLength, const Element *elem,
264                                           const uint32_t *dims,
265                                           size_t dimLength) {
266   char *basePtr = mGlobals->getHostPtr();
267   rsAssert(basePtr != nullptr);
268   const uint32_t offset = GetExportedVarOffset(slot);
269   char *destPtr = basePtr + offset;
270 
271   // We want to look at dimension in terms of integer components,
272   // but dimLength is given in terms of bytes.
273   dimLength /= sizeof(int);
274 
275   // Only a single dimension is currently supported.
276   rsAssert(dimLength == 1);
277   if (dimLength != 1) {
278     return;
279   }
280 
281   // First do the increment loop.
282   size_t stride = elem->getSizeBytes();
283   const char *cVal = reinterpret_cast<const char *>(data);
284   for (uint32_t i = 0; i < dims[0]; i++) {
285     elem->incRefs(cVal);
286     cVal += stride;
287   }
288 
289   // Decrement loop comes after (to prevent race conditions).
290   char *oldVal = destPtr;
291   for (uint32_t i = 0; i < dims[0]; i++) {
292     elem->decRefs(oldVal);
293     oldVal += stride;
294   }
295 
296   memcpy(destPtr, data, dataLength);
297 }
298 
setGlobalBind(uint32_t slot,Allocation * data)299 void RSoVScript::setGlobalBind(uint32_t slot, Allocation *data) {
300   ALOGV("%s succeeded.", __FUNCTION__);
301   // TODO: implement this
302 }
303 
setGlobalObj(uint32_t slot,ObjectBase * obj)304 void RSoVScript::setGlobalObj(uint32_t slot, ObjectBase *obj) {
305   mCpuScript->setGlobalObj(slot, obj);
306   ALOGV("%s succeeded.", __FUNCTION__);
307 }
308 
getAllocationForPointer(const void * ptr) const309 Allocation *RSoVScript::getAllocationForPointer(const void *ptr) const {
310   // TODO: implement this
311   return nullptr;
312 }
313 
getGlobalEntries() const314 int RSoVScript::getGlobalEntries() const {
315   // TODO: implement this
316   return 0;
317 }
318 
getGlobalName(int i) const319 const char *RSoVScript::getGlobalName(int i) const {
320   // TODO: implement this
321   return nullptr;
322 }
323 
getGlobalAddress(int i) const324 const void *RSoVScript::getGlobalAddress(int i) const {
325   // TODO: implement this
326   return nullptr;
327 }
328 
getGlobalSize(int i) const329 size_t RSoVScript::getGlobalSize(int i) const {
330   // TODO: implement this
331   return 0;
332 }
333 
getGlobalProperties(int i) const334 uint32_t RSoVScript::getGlobalProperties(int i) const {
335   // TODO: implement this
336   return 0;
337 }
338 
InitDescriptorAndPipelineLayouts(uint32_t inLen)339 void RSoVScript::InitDescriptorAndPipelineLayouts(uint32_t inLen) {
340   // TODO: kernels with zero output allocations
341   std::vector<VkDescriptorSetLayoutBinding> bindings(
342       inLen + 3, {
343                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
344                      .descriptorCount = 1,
345                      .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
346                  });
347   for (uint32_t i = 0; i < inLen + 3; i++) {
348     bindings[i].binding = i;
349   }
350 
351   VkDescriptorSetLayoutCreateInfo descriptor_layout = {
352       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
353       .pNext = nullptr,
354       .flags = 0,
355       .bindingCount = inLen + 3,
356       .pBindings = bindings.data(),
357   };
358 
359   VkResult res;
360 
361   mDescLayout.resize(NUM_DESCRIPTOR_SETS);
362   res = vkCreateDescriptorSetLayout(mDevice, &descriptor_layout, NULL,
363                                     mDescLayout.data());
364   rsAssert(res == VK_SUCCESS);
365 
366   /* Now use the descriptor layout to create a pipeline layout */
367   VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {
368       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
369       .pNext = nullptr,
370       .pushConstantRangeCount = 0,
371       .pPushConstantRanges = nullptr,
372       .setLayoutCount = NUM_DESCRIPTOR_SETS,
373       .pSetLayouts = mDescLayout.data(),
374   };
375 
376   res = vkCreatePipelineLayout(mDevice, &pPipelineLayoutCreateInfo, NULL,
377                                &mPipelineLayout);
378   rsAssert(res == VK_SUCCESS);
379 }
380 
InitShader(uint32_t slot)381 void RSoVScript::InitShader(uint32_t slot) {
382   VkResult res;
383 
384   mShaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
385   mShaderStage.pNext = nullptr;
386   mShaderStage.pSpecializationInfo = nullptr;
387   mShaderStage.flags = 0;
388   mShaderStage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
389 
390   const char **RSKernelNames = mME->getExportForEachNameList();
391   size_t RSKernelNum = mME->getExportForEachSignatureCount();
392   rsAssert(slot < RSKernelNum);
393   rsAssert(RSKernelNames);
394   rsAssert(RSKernelNames[slot]);
395   // ALOGV("slot = %d kernel name = %s", slot, RSKernelNames[slot]);
396   std::string entryName("entry_");
397   entryName.append(RSKernelNames[slot]);
398 
399   mShaderStage.pName = strndup(entryName.c_str(), entryName.size());
400 
401   VkShaderModuleCreateInfo moduleCreateInfo = {
402       .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
403       .pNext = nullptr,
404       .flags = 0,
405       .codeSize = mSPIRVWords.size() * sizeof(unsigned int),
406       .pCode = mSPIRVWords.data(),
407   };
408   res = vkCreateShaderModule(mDevice, &moduleCreateInfo, NULL,
409                              &mShaderStage.module);
410   rsAssert(res == VK_SUCCESS);
411 }
412 
InitDescriptorPool(uint32_t inLen)413 void RSoVScript::InitDescriptorPool(uint32_t inLen) {
414   VkResult res;
415   // 1 global buffer, 1 global allocation metadata buffer, 1 output allocation,
416   // and inLen input allocations
417   VkDescriptorPoolSize type_count[] = {{
418       .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = inLen + 3,
419   }};
420 
421   VkDescriptorPoolCreateInfo descriptor_pool = {
422       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
423       .pNext = nullptr,
424       .maxSets = 1,
425       .poolSizeCount = NELEM(type_count),
426       .pPoolSizes = type_count,
427   };
428 
429   res = vkCreateDescriptorPool(mDevice, &descriptor_pool, NULL, &mDescPool);
430   rsAssert(res == VK_SUCCESS);
431 }
432 
433 // Iterate through a list of global allocations that are used inside the module
434 // and marshal their type information to a dedicated Vulkan Buffer
MarshalTypeInfo(void)435 void RSoVScript::MarshalTypeInfo(void) {
436   // Marshal global allocation metadata to the device
437   auto *cs = getCpuScript();
438   int nr_globals = mGAMapping->size();
439   if (mGlobalAllocationMetadata == nullptr) {
440     mGlobalAllocationMetadata.reset(
441         new RSoVBuffer(mRSoV, sizeof(struct rsovTypeInfo) * nr_globals));
442   }
443   struct rsovTypeInfo *mappedMetadata =
444       (struct rsovTypeInfo *)mGlobalAllocationMetadata->getHostPtr();
445   for (int i = 0; i < nr_globals; ++i) {
446     if (getGlobalRsType(cs->getGlobalProperties(i)) ==
447         RsDataType::RS_TYPE_ALLOCATION) {
448       ALOGV("global variable %d is an allocation!", i);
449       const void *host_buf;
450       cs->getGlobalVar(i, (void *)&host_buf, sizeof(host_buf));
451       if (!host_buf) continue;
452       const android::renderscript::Allocation *GA =
453           static_cast<const android::renderscript::Allocation *>(host_buf);
454       const android::renderscript::Type *T = GA->getType();
455       rsAssert(T);
456 
457       auto global_it = mGAMapping->find(cs->getGlobalName(i));
458       rsAssert(global_it != (*mGAMapping).end());
459       int id = global_it->second;
460       ALOGV("global allocation %s is mapped to ID %d", cs->getGlobalName(i),
461             id);
462       // TODO: marshal other properties
463       mappedMetadata[id].x_size = T->getDimX();
464       mappedMetadata[id].y_size = T->getDimY();
465       mappedMetadata[id].z_size = T->getDimZ();
466     }
467   }
468 }
469 
InitDescriptorSet(const std::vector<RSoVAllocation * > & inputAllocations,RSoVAllocation * outputAllocation)470 void RSoVScript::InitDescriptorSet(
471     const std::vector<RSoVAllocation *> &inputAllocations,
472     RSoVAllocation *outputAllocation) {
473   VkResult res;
474 
475   VkDescriptorSetAllocateInfo alloc_info = {
476       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
477       .pNext = NULL,
478       .descriptorPool = mDescPool,
479       .descriptorSetCount = NUM_DESCRIPTOR_SETS,
480       .pSetLayouts = mDescLayout.data(),
481   };
482 
483   mDescSet.resize(NUM_DESCRIPTOR_SETS);
484   res = vkAllocateDescriptorSets(mDevice, &alloc_info, mDescSet.data());
485   rsAssert(res == VK_SUCCESS);
486 
487   std::vector<VkWriteDescriptorSet> writes{
488       // Global variables
489       {
490           .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
491           .dstSet = mDescSet[0],
492           .dstBinding = 0,
493           .dstArrayElement = 0,
494           .descriptorCount = 1,
495           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
496           .pBufferInfo = mGlobals->getBufferInfo(),
497       },
498       // Metadata for global Allocations
499       {
500           .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
501           .dstSet = mDescSet[0],
502           .dstBinding = 1,
503           .dstArrayElement = 0,
504           .descriptorCount = 1,
505           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
506           .pBufferInfo = mGlobalAllocationMetadata->getBufferInfo(),
507       },
508       // Output Allocation
509       {
510           .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
511           .dstSet = mDescSet[0],
512           .dstBinding = 2,
513           .dstArrayElement = 0,
514           .descriptorCount = 1,
515           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
516           .pBufferInfo = outputAllocation->getBuffer()->getBufferInfo(),
517       },
518   };
519 
520   // Input Allocations
521   for (uint32_t i = 0; i < inputAllocations.size(); ++i) {
522     writes.push_back({
523         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
524         .dstSet = mDescSet[0],
525         .dstBinding = 3 + i,  // input allocations start from binding #3
526         .dstArrayElement = 0,
527         .descriptorCount = 1,
528         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
529         .pBufferInfo = inputAllocations[i]->getBuffer()->getBufferInfo(),
530     });
531   }
532 
533   vkUpdateDescriptorSets(mDevice, writes.size(), writes.data(), 0, NULL);
534 }
535 
InitPipeline()536 void RSoVScript::InitPipeline() {
537   // DEPENDS on mShaderStage, i.e., InitShader()
538 
539   VkResult res;
540 
541   VkComputePipelineCreateInfo pipeline_info = {
542       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
543       .pNext = nullptr,
544       .layout = mPipelineLayout,
545       .basePipelineHandle = VK_NULL_HANDLE,
546       .basePipelineIndex = 0,
547       .flags = 0,
548       .stage = mShaderStage,
549   };
550   res = vkCreateComputePipelines(mDevice, VK_NULL_HANDLE, 1, &pipeline_info,
551                                  NULL, &mComputePipeline);
552   rsAssert(res == VK_SUCCESS);
553 }
554 
runForEach(uint32_t slot,uint32_t inLen,const std::vector<RSoVAllocation * > & inputAllocations,RSoVAllocation * outputAllocation)555 void RSoVScript::runForEach(
556     uint32_t slot, uint32_t inLen,
557     const std::vector<RSoVAllocation *> &inputAllocations,
558     RSoVAllocation *outputAllocation) {
559   VkResult res;
560 
561   InitShader(slot);
562   InitDescriptorPool(inLen);
563   InitDescriptorAndPipelineLayouts(inLen);
564   MarshalTypeInfo();
565   InitDescriptorSet(inputAllocations, outputAllocation);
566   // InitPipelineCache();
567   InitPipeline();
568 
569   VkCommandBuffer cmd;
570 
571   VkCommandBufferAllocateInfo cmd_info = {
572       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
573       .pNext = nullptr,
574       .commandPool = mRSoV->getCmdPool(),
575       .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
576       .commandBufferCount = 1,
577   };
578 
579   res = vkAllocateCommandBuffers(mDevice, &cmd_info, &cmd);
580   rsAssert(res == VK_SUCCESS);
581 
582   VkCommandBufferBeginInfo cmd_buf_info = {
583       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
584       .pNext = nullptr,
585       .flags = 0,
586       .pInheritanceInfo = nullptr,
587   };
588 
589   res = vkBeginCommandBuffer(cmd, &cmd_buf_info);
590   rsAssert(res == VK_SUCCESS);
591 
592   vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mComputePipeline);
593 
594   vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mPipelineLayout,
595                           0, mDescSet.size(), mDescSet.data(), 0, nullptr);
596   // Assuming all input allocations are of the same dimensionality
597   const uint32_t width = inputAllocations[0]->getWidth();
598   const uint32_t height = rsMax(inputAllocations[0]->getHeight(), 1U);
599   const uint32_t depth = rsMax(inputAllocations[0]->getDepth(), 1U);
600   vkCmdDispatch(cmd, width, height, depth);
601 
602   res = vkEndCommandBuffer(cmd);
603   assert(res == VK_SUCCESS);
604 
605   VkSubmitInfo submit_info = {
606       .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
607       .commandBufferCount = 1,
608       .pCommandBuffers = &cmd,
609   };
610 
611   VkFence fence;
612 
613   VkFenceCreateInfo fenceInfo = {
614       .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
615       .pNext = nullptr,
616       .flags = 0,
617   };
618 
619   vkCreateFence(mDevice, &fenceInfo, NULL, &fence);
620 
621   vkQueueSubmit(mRSoV->getQueue(), 1, &submit_info, fence);
622 
623   // Make sure command buffer is finished
624   do {
625     res = vkWaitForFences(mDevice, 1, &fence, VK_TRUE, 100000);
626   } while (res == VK_TIMEOUT);
627 
628   rsAssert(res == VK_SUCCESS);
629 
630   vkDestroyFence(mDevice, fence, NULL);
631 
632   // TODO: shall we reuse command buffers?
633   VkCommandBuffer cmd_bufs[] = {cmd};
634   vkFreeCommandBuffers(mDevice, mRSoV->getCmdPool(), 1, cmd_bufs);
635 
636   vkDestroyPipeline(mDevice, mComputePipeline, nullptr);
637   for (int i = 0; i < NUM_DESCRIPTOR_SETS; i++)
638     vkDestroyDescriptorSetLayout(mDevice, mDescLayout[i], nullptr);
639   vkDestroyPipelineLayout(mDevice, mPipelineLayout, nullptr);
640   vkFreeDescriptorSets(mDevice, mDescPool, NUM_DESCRIPTOR_SETS,
641                        mDescSet.data());
642   vkDestroyDescriptorPool(mDevice, mDescPool, nullptr);
643   free((void *)mShaderStage.pName);
644   vkDestroyShaderModule(mDevice, mShaderStage.module, nullptr);
645 }
646 
647 }  // namespace rsov
648 }  // namespace renderscript
649 }  // namespace android
650 
651 using android::renderscript::Allocation;
652 using android::renderscript::Context;
653 using android::renderscript::Element;
654 using android::renderscript::ObjectBase;
655 using android::renderscript::RsdCpuReference;
656 using android::renderscript::Script;
657 using android::renderscript::ScriptC;
658 using android::renderscript::rs_script;
659 using android::renderscript::rsov::RSoVContext;
660 using android::renderscript::rsov::RSoVScript;
661 using android::renderscript::rsov::compileBitcode;
662 
663 namespace {
664 // A class to parse global allocation metadata; essentially a subset of JSON
665 // it would look like {"__RSoV_GA": {"g":42}}
666 // The result is stored in a refence to a map<string, int>
667 class ParseMD {
668  public:
ParseMD(std::string s,std::map<std::string,int> & map)669   ParseMD(std::string s, std::map<std::string, int> &map)
670       : mString(s), mMapping(map) {}
671 
parse(void)672   bool parse(void) {
673     // remove outermose two pairs of braces
674     mString = removeBraces(mString);
675     if (mString.empty()) {
676       return false;
677     }
678 
679     mString = removeBraces(mString);
680     if (mString.empty()) {
681       return false;
682     }
683 
684     // Now we are supposed to have a comma-separated list that looks like:
685     // "foo":42, "bar":56
686     split<','>(mString, [&](auto s) {
687       split<':'>(s, nullptr, [&](auto pair) {
688         rsAssert(pair.size() == 2);
689         std::string ga_name = removeQuotes(pair[0]);
690         int id = atoi(pair[1].c_str());
691         ALOGV("ParseMD: global allocation %s has ID %d", ga_name.c_str(), id);
692         mMapping[ga_name] = id;
693       });
694     });
695     return true;
696   }
697 
698  private:
699   template <char L, char R>
removeMatching(const std::string & s)700   static std::string removeMatching(const std::string &s) {
701     auto leftCBrace = s.find(L);
702     if (leftCBrace == std::string::npos) {
703       return "";
704     }
705     leftCBrace++;
706     return s.substr(leftCBrace, s.rfind(R) - leftCBrace);
707   }
708 
removeBraces(const std::string & s)709   static std::string removeBraces(const std::string &s) {
710     return removeMatching<'{', '}'>(s);
711   }
712 
removeQuotes(const std::string & s)713   static std::string removeQuotes(const std::string &s) {
714     return removeMatching<'"', '"'>(s);
715   }
716 
717   // Splitting a string, and call "each" and/or "all" with individal elements
718   // and a vector of all tokenized elements
719   template <char D>
split(const std::string & s,std::function<void (const std::string &)> each,std::function<void (const std::vector<const std::string> &)> all=nullptr)720   static void split(const std::string &s,
721                     std::function<void(const std::string &)> each,
722                     std::function<void(const std::vector<const std::string> &)>
723                         all = nullptr) {
724     std::vector<const std::string> result;
725     for (std::string::size_type pos = 0; pos < s.size(); pos++) {
726       std::string::size_type begin = pos;
727 
728       while (pos <= s.size() && s[pos] != D) pos++;
729       std::string found = s.substr(begin, pos - begin);
730       if (each) each(found);
731       if (all) result.push_back(found);
732     }
733     if (all) all(result);
734   }
735 
736   std::string mString;
737   std::map<std::string, int> &mMapping;
738 };
739 
740 }  // namespace
741 
742 class ExtractRSoVMD : public android::spirit::DoNothingVisitor {
743  public:
ExtractRSoVMD()744   ExtractRSoVMD() : mGAMapping(new std::map<std::string, int>) {}
745 
visit(android::spirit::StringInst * s)746   void visit(android::spirit::StringInst *s) {
747     ALOGV("ExtractRSoVMD: string = %s", s->mOperand1.c_str());
748     std::map<std::string, int> mapping;
749     ParseMD p(s->mOperand1, mapping);
750     if (p.parse()) {
751       *mGAMapping = std::move(mapping);
752     }
753   }
754 
takeMapping(void)755   std::map<std::string, int> *takeMapping(void) { return mGAMapping.release(); }
756 
757  private:
758   std::unique_ptr<std::map<std::string, int> > mGAMapping;
759 };
760 
rsovScriptInit(const Context * rsc,ScriptC * script,char const * resName,char const * cacheDir,uint8_t const * bitcode,size_t bitcodeSize,uint32_t flags)761 bool rsovScriptInit(const Context *rsc, ScriptC *script, char const *resName,
762                     char const *cacheDir, uint8_t const *bitcode,
763                     size_t bitcodeSize, uint32_t flags) {
764   RSoVHal *hal = static_cast<RSoVHal *>(rsc->mHal.drv);
765 
766   std::unique_ptr<bcinfo::MetadataExtractor> bitcodeMetadata(
767       new bcinfo::MetadataExtractor((const char *)bitcode, bitcodeSize));
768   if (!bitcodeMetadata || !bitcodeMetadata->extract()) {
769     ALOGE("Could not extract metadata from bitcode from %s", resName);
770     return false;
771   }
772 
773   std::vector<uint8_t> modifiedBitcode;
774   auto spvWords =
775     compileBitcode(resName, cacheDir, (const char *)bitcode, bitcodeSize, modifiedBitcode);
776   if (!spvWords.empty() && !modifiedBitcode.empty()) {
777     // Extract compiler metadata on allocation->binding mapping
778     android::spirit::Module *module =
779         android::spirit::Deserialize<android::spirit::Module>(spvWords);
780     rsAssert(module);
781     ExtractRSoVMD ga_md;
782     module->accept(&ga_md);
783 
784     RSoVScript *rsovScript =
785         new RSoVScript(hal->mRSoV, std::move(spvWords),
786                        bitcodeMetadata.release(), ga_md.takeMapping());
787     if (rsovScript) {
788       std::string modifiedResName(resName);
789       modifiedResName.append("_modified");
790       RsdCpuReference::CpuScript *cs = hal->mCpuRef->createScript(
791           script, modifiedResName.c_str(), cacheDir, modifiedBitcode.data(),
792           modifiedBitcode.size(), flags);
793       if (cs != nullptr) {
794         cs->populateScript(script);
795         rsovScript->setCpuScript(cs);
796         RSoVScript::initScriptOnRSoV(script, rsovScript);
797         return true;
798       }
799     }
800   }
801 
802   ALOGD("Failed creating an RSoV script for %s", resName);
803   // Fall back to CPU driver instead
804 
805   std::unique_ptr<RsdCpuReference::CpuScript> cs(hal->mCpuRef->createScript(
806       script, resName, cacheDir, bitcode, bitcodeSize, flags));
807   if (cs == nullptr) {
808     ALOGE("Failed creating a CPU script %p for %s (%p)", cs.get(), resName,
809           script);
810     return false;
811   }
812   cs->populateScript(script);
813 
814   RSoVScript::initScriptOnCpu(script, cs.release());
815 
816   return true;
817 }
818 
rsovInitIntrinsic(const Context * rsc,Script * s,RsScriptIntrinsicID iid,Element * e)819 bool rsovInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid,
820                        Element *e) {
821   RSoVHal *dc = (RSoVHal *)rsc->mHal.drv;
822   RsdCpuReference::CpuScript *cs = dc->mCpuRef->createIntrinsic(s, iid, e);
823   if (cs == nullptr) {
824     return false;
825   }
826   s->mHal.drv = cs;
827   cs->populateScript(s);
828   return true;
829 }
830 
rsovScriptInvokeForEach(const Context * rsc,Script * s,uint32_t slot,const Allocation * ain,Allocation * aout,const void * usr,size_t usrLen,const RsScriptCall * sc)831 void rsovScriptInvokeForEach(const Context *rsc, Script *s, uint32_t slot,
832                              const Allocation *ain, Allocation *aout,
833                              const void *usr, size_t usrLen,
834                              const RsScriptCall *sc) {
835   if (ain == nullptr) {
836     rsovScriptInvokeForEachMulti(rsc, s, slot, nullptr, 0, aout, usr, usrLen,
837                                  sc);
838   } else {
839     const Allocation *ains[1] = {ain};
840 
841     rsovScriptInvokeForEachMulti(rsc, s, slot, ains, 1, aout, usr, usrLen, sc);
842   }
843 }
844 
rsovScriptInvokeForEachMulti(const Context * rsc,Script * s,uint32_t slot,const Allocation ** ains,size_t inLen,Allocation * aout,const void * usr,size_t usrLen,const RsScriptCall * sc)845 void rsovScriptInvokeForEachMulti(const Context *rsc, Script *s, uint32_t slot,
846                                   const Allocation **ains, size_t inLen,
847                                   Allocation *aout, const void *usr,
848                                   size_t usrLen, const RsScriptCall *sc) {
849   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
850   cs->invokeForEach(slot, ains, inLen, aout, usr, usrLen, sc);
851 }
852 
rsovScriptInvokeRoot(const Context * dc,Script * s)853 int rsovScriptInvokeRoot(const Context *dc, Script *s) {
854   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
855   return cs->invokeRoot();
856 }
857 
rsovScriptInvokeInit(const Context * dc,Script * s)858 void rsovScriptInvokeInit(const Context *dc, Script *s) {
859   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
860   cs->invokeInit();
861 }
862 
rsovScriptInvokeFreeChildren(const Context * dc,Script * s)863 void rsovScriptInvokeFreeChildren(const Context *dc, Script *s) {
864   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
865   cs->invokeFreeChildren();
866 }
867 
rsovScriptInvokeFunction(const Context * dc,Script * s,uint32_t slot,const void * params,size_t paramLength)868 void rsovScriptInvokeFunction(const Context *dc, Script *s, uint32_t slot,
869                               const void *params, size_t paramLength) {
870   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
871   cs->invokeFunction(slot, params, paramLength);
872 }
873 
rsovScriptInvokeReduce(const Context * dc,Script * s,uint32_t slot,const Allocation ** ains,size_t inLen,Allocation * aout,const RsScriptCall * sc)874 void rsovScriptInvokeReduce(const Context *dc, Script *s, uint32_t slot,
875                             const Allocation **ains, size_t inLen,
876                             Allocation *aout, const RsScriptCall *sc) {
877   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
878   cs->invokeReduce(slot, ains, inLen, aout, sc);
879 }
880 
rsovScriptSetGlobalVar(const Context * dc,const Script * s,uint32_t slot,void * data,size_t dataLength)881 void rsovScriptSetGlobalVar(const Context *dc, const Script *s, uint32_t slot,
882                             void *data, size_t dataLength) {
883   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
884   cs->setGlobalVar(slot, data, dataLength);
885 }
886 
rsovScriptGetGlobalVar(const Context * dc,const Script * s,uint32_t slot,void * data,size_t dataLength)887 void rsovScriptGetGlobalVar(const Context *dc, const Script *s, uint32_t slot,
888                             void *data, size_t dataLength) {
889   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
890   cs->getGlobalVar(slot, data, dataLength);
891 }
892 
rsovScriptSetGlobalVarWithElemDims(const Context * dc,const Script * s,uint32_t slot,void * data,size_t dataLength,const android::renderscript::Element * elem,const uint32_t * dims,size_t dimLength)893 void rsovScriptSetGlobalVarWithElemDims(
894     const Context *dc, const Script *s, uint32_t slot, void *data,
895     size_t dataLength, const android::renderscript::Element *elem,
896     const uint32_t *dims, size_t dimLength) {
897   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
898   cs->setGlobalVarWithElemDims(slot, data, dataLength, elem, dims, dimLength);
899 }
900 
rsovScriptSetGlobalBind(const Context * dc,const Script * s,uint32_t slot,Allocation * data)901 void rsovScriptSetGlobalBind(const Context *dc, const Script *s, uint32_t slot,
902                              Allocation *data) {
903   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
904   cs->setGlobalBind(slot, data);
905 }
906 
rsovScriptSetGlobalObj(const Context * dc,const Script * s,uint32_t slot,ObjectBase * data)907 void rsovScriptSetGlobalObj(const Context *dc, const Script *s, uint32_t slot,
908                             ObjectBase *data) {
909   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
910   cs->setGlobalObj(slot, data);
911 }
912 
rsovScriptDestroy(const Context * dc,Script * s)913 void rsovScriptDestroy(const Context *dc, Script *s) {
914   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
915   delete cs;
916   s->mHal.drv = nullptr;
917 }
918 
rsovScriptGetAllocationForPointer(const android::renderscript::Context * dc,const android::renderscript::Script * sc,const void * ptr)919 Allocation *rsovScriptGetAllocationForPointer(
920     const android::renderscript::Context *dc,
921     const android::renderscript::Script *sc, const void *ptr) {
922   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)sc->mHal.drv;
923   return cs->getAllocationForPointer(ptr);
924 }
925 
rsovScriptUpdateCachedObject(const Context * rsc,const Script * script,rs_script * obj)926 void rsovScriptUpdateCachedObject(const Context *rsc, const Script *script,
927                                   rs_script *obj) {
928   obj->p = script;
929 #ifdef __LP64__
930   obj->unused1 = nullptr;
931   obj->unused2 = nullptr;
932   obj->unused3 = nullptr;
933 #endif
934 }
935