1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "rsovScript.h"
18
19 #include "bcinfo/MetadataExtractor.h"
20 #include "module.h"
21 #include "rsContext.h"
22 #include "rsDefines.h"
23 #include "rsType.h"
24 #include "rsUtils.h"
25 #include "rsovAllocation.h"
26 #include "rsovContext.h"
27 #include "rsovCore.h"
28 #include "spirit/file_utils.h"
29 #include "spirit/instructions.h"
30 #include "spirit/module.h"
31
32 #include <fstream>
33 #include <functional>
34 #include <iostream>
35 #include <sstream>
36 #include <string>
37
38 extern "C" {
39 char* __GPUBlock = nullptr;
40 }
41
42 namespace android {
43 namespace renderscript {
44 namespace rsov {
45
46 namespace {
47 // Layout of this struct has to be the same as the struct in generated SPIR-V
48 // TODO: generate this file from some spec that is shared with the compiler
49 struct rsovTypeInfo {
50 uint32_t element_size; // TODO: not implemented
51 uint32_t x_size;
52 uint32_t y_size;
53 uint32_t z_size;
54 };
55
56 const char *COMPILER_EXE_PATH = "/system/bin/rs2spirv";
57
setCompilerArgs(const char * bcFileName,const char * cacheDir)58 std::vector<const char *> setCompilerArgs(const char *bcFileName,
59 const char *cacheDir) {
60 rsAssert(bcFileName && cacheDir);
61
62 std::vector<const char *> args;
63
64 args.push_back(COMPILER_EXE_PATH);
65 args.push_back(bcFileName);
66
67 args.push_back(nullptr);
68 return args;
69 }
70
writeBytes(const char * filename,const char * bytes,size_t size)71 void writeBytes(const char *filename, const char *bytes, size_t size) {
72 std::ofstream ofs(filename, std::ios::binary);
73 ofs.write(bytes, size);
74 ofs.close();
75 }
76
readWords(const char * filename)77 std::vector<uint32_t> readWords(const char *filename) {
78 std::ifstream ifs(filename, std::ios::binary);
79
80 ifs.seekg(0, ifs.end);
81 int length = ifs.tellg();
82 ifs.seekg(0, ifs.beg);
83
84 rsAssert(((length & 3) == 0) && "File size expected to be multiples of 4");
85
86 std::vector<uint32_t> spvWords(length / sizeof(uint32_t));
87
88 ifs.read((char *)(spvWords.data()), length);
89
90 ifs.close();
91
92 return spvWords;
93 }
94
compileBitcode(const char * resName,const char * cacheDir,const char * bitcode,size_t bitcodeSize,std::vector<uint8_t> & modifiedBitcode)95 std::vector<uint32_t> compileBitcode(const char *resName, const char *cacheDir,
96 const char *bitcode, size_t bitcodeSize,
97 std::vector<uint8_t> &modifiedBitcode) {
98 rsAssert(bitcode && bitcodeSize);
99
100 // TODO: Cache the generated code
101
102 std::string bcFileName(cacheDir);
103 bcFileName.append("/");
104 bcFileName.append(resName);
105 bcFileName.append(".bc");
106
107 writeBytes(bcFileName.c_str(), bitcode, bitcodeSize);
108
109 auto args = setCompilerArgs(bcFileName.c_str(), cacheDir);
110
111 if (!rsuExecuteCommand(COMPILER_EXE_PATH, args.size() - 1, args.data())) {
112 ALOGE("compiler command line failed");
113 return std::vector<uint32_t>();
114 }
115
116 ALOGV("compiler command line succeeded");
117
118 std::string spvFileName(cacheDir);
119 spvFileName.append("/");
120 spvFileName.append(resName);
121 spvFileName.append(".spv");
122
123 std::string modifiedBCFileName(cacheDir);
124 modifiedBCFileName.append("/").append(resName).append("_modified.bc");
125
126 args.pop_back();
127 args.push_back("-bc");
128 args.push_back(modifiedBCFileName.c_str());
129 args.push_back(nullptr);
130
131 if (!rsuExecuteCommand(COMPILER_EXE_PATH, args.size() - 1, args.data())) {
132 ALOGE("compiler command line to create modified bitcode failed");
133 return std::vector<uint32_t>();
134 }
135
136 modifiedBitcode = android::spirit::readFile<uint8_t>(modifiedBCFileName);
137
138 return readWords(spvFileName.c_str());
139 }
140
splitOffsets(const std::string & str,char delimiter,std::vector<uint32_t> * offsets)141 void splitOffsets(const std::string &str, char delimiter,
142 std::vector<uint32_t> *offsets) {
143 std::stringstream ss(str);
144 std::string tok;
145
146 while (std::getline(ss, tok, delimiter)) {
147 const uint32_t offset = static_cast<uint32_t>(std::stoi(tok));
148 offsets->push_back(offset);
149 }
150 }
151
152 } // anonymous namespace
153
isScriptCpuBacked(const Script * s)154 bool RSoVScript::isScriptCpuBacked(const Script *s) {
155 return s->mHal.info.mVersionMinor == CPU_SCRIPT_MAGIC_NUMBER;
156 }
157
initScriptOnCpu(Script * s,RsdCpuReference::CpuScript * cs)158 void RSoVScript::initScriptOnCpu(Script *s, RsdCpuReference::CpuScript *cs) {
159 s->mHal.drv = cs;
160 s->mHal.info.mVersionMajor = 0; // Unused. Don't care.
161 s->mHal.info.mVersionMinor = CPU_SCRIPT_MAGIC_NUMBER;
162 }
163
initScriptOnRSoV(Script * s,RSoVScript * rsovScript)164 void RSoVScript::initScriptOnRSoV(Script *s, RSoVScript *rsovScript) {
165 s->mHal.drv = rsovScript;
166 s->mHal.info.mVersionMajor = 0; // Unused. Don't care.
167 s->mHal.info.mVersionMinor = 0;
168 }
169
170 using android::spirit::Module;
171 using android::spirit::Deserialize;
172
RSoVScript(RSoVContext * context,std::vector<uint32_t> && spvWords,bcinfo::MetadataExtractor * ME,std::map<std::string,int> * GA2ID)173 RSoVScript::RSoVScript(RSoVContext *context, std::vector<uint32_t> &&spvWords,
174 bcinfo::MetadataExtractor *ME,
175 std::map<std::string, int> *GA2ID)
176 : mRSoV(context),
177 mDevice(context->getDevice()),
178 mSPIRVWords(std::move(spvWords)),
179 mME(ME),
180 mGlobalAllocationMetadata(nullptr),
181 mGAMapping(GA2ID) {
182 std::unique_ptr<Module> module(Deserialize<Module>(mSPIRVWords));
183
184 const std::string &strGlobalSize =
185 module->findStringOfPrefix(".rsov.GlobalSize:");
186 if (strGlobalSize.empty()) {
187 mGlobals.reset(new RSoVBuffer(context, 4));
188 return;
189 }
190 const size_t colonPosSize = strGlobalSize.find(':');
191 const std::string &strVal = strGlobalSize.substr(colonPosSize + 1);
192 const uint64_t globalSize = static_cast<uint64_t>(std::stol(strVal));
193 if (globalSize > 0) {
194 mGlobals.reset(new RSoVBuffer(context, globalSize));
195 __GPUBlock = mGlobals->getHostPtr();
196 const std::string &offsetStr =
197 module->findStringOfPrefix(".rsov.ExportedVars:");
198 const size_t colonPos = offsetStr.find(':');
199 splitOffsets(offsetStr.substr(colonPos + 1), ';', &mExportedVarOffsets);
200 }
201 }
202
~RSoVScript()203 RSoVScript::~RSoVScript() {
204 delete mCpuScript;
205 delete mME;
206 }
207
populateScript(Script *)208 void RSoVScript::populateScript(Script *) {
209 }
210
invokeFunction(uint32_t slot,const void * params,size_t paramLength)211 void RSoVScript::invokeFunction(uint32_t slot, const void *params,
212 size_t paramLength) {
213 getCpuScript()->invokeFunction(slot, params, paramLength);
214 }
215
invokeRoot()216 int RSoVScript::invokeRoot() { return getCpuScript()->invokeRoot(); }
217
invokeForEach(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)218 void RSoVScript::invokeForEach(uint32_t slot, const Allocation **ains,
219 uint32_t inLen, Allocation *aout,
220 const void *usr, uint32_t usrLen,
221 const RsScriptCall *sc) {
222 // TODO: Handle kernel without input Allocation
223 rsAssert(ains);
224 std::vector<RSoVAllocation *> inputAllocations(inLen);
225 for (uint32_t i = 0; i < inLen; ++i) {
226 inputAllocations[i] = static_cast<RSoVAllocation *>(ains[i]->mHal.drv);
227 }
228 RSoVAllocation *outputAllocation =
229 static_cast<RSoVAllocation *>(aout->mHal.drv);
230 runForEach(slot, inLen, inputAllocations, outputAllocation);
231 }
232
invokeReduce(uint32_t slot,const Allocation ** ains,uint32_t inLen,Allocation * aout,const RsScriptCall * sc)233 void RSoVScript::invokeReduce(uint32_t slot, const Allocation **ains,
234 uint32_t inLen, Allocation *aout,
235 const RsScriptCall *sc) {
236 getCpuScript()->invokeReduce(slot, ains, inLen, aout, sc);
237 }
238
invokeInit()239 void RSoVScript::invokeInit() {
240 getCpuScript()->invokeInit();
241 }
242
invokeFreeChildren()243 void RSoVScript::invokeFreeChildren() {
244 // TODO: implement this
245 }
246
setGlobalVar(uint32_t slot,const void * data,size_t dataLength)247 void RSoVScript::setGlobalVar(uint32_t slot, const void *data,
248 size_t dataLength) {
249 char *basePtr = mGlobals->getHostPtr();
250 rsAssert(basePtr != nullptr);
251 const uint32_t offset = GetExportedVarOffset(slot);
252 memcpy(basePtr + offset, data, dataLength);
253 }
254
getGlobalVar(uint32_t slot,void * data,size_t dataLength)255 void RSoVScript::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
256 const char *basePtr = mGlobals->getHostPtr();
257 rsAssert(basePtr != nullptr);
258 const uint32_t offset = GetExportedVarOffset(slot);
259 memcpy(data, basePtr + offset, dataLength);
260 }
261
setGlobalVarWithElemDims(uint32_t slot,const void * data,size_t dataLength,const Element * elem,const uint32_t * dims,size_t dimLength)262 void RSoVScript::setGlobalVarWithElemDims(uint32_t slot, const void *data,
263 size_t dataLength, const Element *elem,
264 const uint32_t *dims,
265 size_t dimLength) {
266 char *basePtr = mGlobals->getHostPtr();
267 rsAssert(basePtr != nullptr);
268 const uint32_t offset = GetExportedVarOffset(slot);
269 char *destPtr = basePtr + offset;
270
271 // We want to look at dimension in terms of integer components,
272 // but dimLength is given in terms of bytes.
273 dimLength /= sizeof(int);
274
275 // Only a single dimension is currently supported.
276 rsAssert(dimLength == 1);
277 if (dimLength != 1) {
278 return;
279 }
280
281 // First do the increment loop.
282 size_t stride = elem->getSizeBytes();
283 const char *cVal = reinterpret_cast<const char *>(data);
284 for (uint32_t i = 0; i < dims[0]; i++) {
285 elem->incRefs(cVal);
286 cVal += stride;
287 }
288
289 // Decrement loop comes after (to prevent race conditions).
290 char *oldVal = destPtr;
291 for (uint32_t i = 0; i < dims[0]; i++) {
292 elem->decRefs(oldVal);
293 oldVal += stride;
294 }
295
296 memcpy(destPtr, data, dataLength);
297 }
298
setGlobalBind(uint32_t slot,Allocation * data)299 void RSoVScript::setGlobalBind(uint32_t slot, Allocation *data) {
300 ALOGV("%s succeeded.", __FUNCTION__);
301 // TODO: implement this
302 }
303
setGlobalObj(uint32_t slot,ObjectBase * obj)304 void RSoVScript::setGlobalObj(uint32_t slot, ObjectBase *obj) {
305 mCpuScript->setGlobalObj(slot, obj);
306 ALOGV("%s succeeded.", __FUNCTION__);
307 }
308
getAllocationForPointer(const void * ptr) const309 Allocation *RSoVScript::getAllocationForPointer(const void *ptr) const {
310 // TODO: implement this
311 return nullptr;
312 }
313
getGlobalEntries() const314 int RSoVScript::getGlobalEntries() const {
315 // TODO: implement this
316 return 0;
317 }
318
getGlobalName(int i) const319 const char *RSoVScript::getGlobalName(int i) const {
320 // TODO: implement this
321 return nullptr;
322 }
323
getGlobalAddress(int i) const324 const void *RSoVScript::getGlobalAddress(int i) const {
325 // TODO: implement this
326 return nullptr;
327 }
328
getGlobalSize(int i) const329 size_t RSoVScript::getGlobalSize(int i) const {
330 // TODO: implement this
331 return 0;
332 }
333
getGlobalProperties(int i) const334 uint32_t RSoVScript::getGlobalProperties(int i) const {
335 // TODO: implement this
336 return 0;
337 }
338
InitDescriptorAndPipelineLayouts(uint32_t inLen)339 void RSoVScript::InitDescriptorAndPipelineLayouts(uint32_t inLen) {
340 // TODO: kernels with zero output allocations
341 std::vector<VkDescriptorSetLayoutBinding> bindings(
342 inLen + 3, {
343 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
344 .descriptorCount = 1,
345 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
346 });
347 for (uint32_t i = 0; i < inLen + 3; i++) {
348 bindings[i].binding = i;
349 }
350
351 VkDescriptorSetLayoutCreateInfo descriptor_layout = {
352 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
353 .pNext = nullptr,
354 .flags = 0,
355 .bindingCount = inLen + 3,
356 .pBindings = bindings.data(),
357 };
358
359 VkResult res;
360
361 mDescLayout.resize(NUM_DESCRIPTOR_SETS);
362 res = vkCreateDescriptorSetLayout(mDevice, &descriptor_layout, NULL,
363 mDescLayout.data());
364 rsAssert(res == VK_SUCCESS);
365
366 /* Now use the descriptor layout to create a pipeline layout */
367 VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {
368 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
369 .pNext = nullptr,
370 .pushConstantRangeCount = 0,
371 .pPushConstantRanges = nullptr,
372 .setLayoutCount = NUM_DESCRIPTOR_SETS,
373 .pSetLayouts = mDescLayout.data(),
374 };
375
376 res = vkCreatePipelineLayout(mDevice, &pPipelineLayoutCreateInfo, NULL,
377 &mPipelineLayout);
378 rsAssert(res == VK_SUCCESS);
379 }
380
InitShader(uint32_t slot)381 void RSoVScript::InitShader(uint32_t slot) {
382 VkResult res;
383
384 mShaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
385 mShaderStage.pNext = nullptr;
386 mShaderStage.pSpecializationInfo = nullptr;
387 mShaderStage.flags = 0;
388 mShaderStage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
389
390 const char **RSKernelNames = mME->getExportForEachNameList();
391 size_t RSKernelNum = mME->getExportForEachSignatureCount();
392 rsAssert(slot < RSKernelNum);
393 rsAssert(RSKernelNames);
394 rsAssert(RSKernelNames[slot]);
395 // ALOGV("slot = %d kernel name = %s", slot, RSKernelNames[slot]);
396 std::string entryName("entry_");
397 entryName.append(RSKernelNames[slot]);
398
399 mShaderStage.pName = strndup(entryName.c_str(), entryName.size());
400
401 VkShaderModuleCreateInfo moduleCreateInfo = {
402 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
403 .pNext = nullptr,
404 .flags = 0,
405 .codeSize = mSPIRVWords.size() * sizeof(unsigned int),
406 .pCode = mSPIRVWords.data(),
407 };
408 res = vkCreateShaderModule(mDevice, &moduleCreateInfo, NULL,
409 &mShaderStage.module);
410 rsAssert(res == VK_SUCCESS);
411 }
412
InitDescriptorPool(uint32_t inLen)413 void RSoVScript::InitDescriptorPool(uint32_t inLen) {
414 VkResult res;
415 // 1 global buffer, 1 global allocation metadata buffer, 1 output allocation,
416 // and inLen input allocations
417 VkDescriptorPoolSize type_count[] = {{
418 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = inLen + 3,
419 }};
420
421 VkDescriptorPoolCreateInfo descriptor_pool = {
422 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
423 .pNext = nullptr,
424 .maxSets = 1,
425 .poolSizeCount = NELEM(type_count),
426 .pPoolSizes = type_count,
427 };
428
429 res = vkCreateDescriptorPool(mDevice, &descriptor_pool, NULL, &mDescPool);
430 rsAssert(res == VK_SUCCESS);
431 }
432
433 // Iterate through a list of global allocations that are used inside the module
434 // and marshal their type information to a dedicated Vulkan Buffer
MarshalTypeInfo(void)435 void RSoVScript::MarshalTypeInfo(void) {
436 // Marshal global allocation metadata to the device
437 auto *cs = getCpuScript();
438 int nr_globals = mGAMapping->size();
439 if (mGlobalAllocationMetadata == nullptr) {
440 mGlobalAllocationMetadata.reset(
441 new RSoVBuffer(mRSoV, sizeof(struct rsovTypeInfo) * nr_globals));
442 }
443 struct rsovTypeInfo *mappedMetadata =
444 (struct rsovTypeInfo *)mGlobalAllocationMetadata->getHostPtr();
445 for (int i = 0; i < nr_globals; ++i) {
446 if (getGlobalRsType(cs->getGlobalProperties(i)) ==
447 RsDataType::RS_TYPE_ALLOCATION) {
448 ALOGV("global variable %d is an allocation!", i);
449 const void *host_buf;
450 cs->getGlobalVar(i, (void *)&host_buf, sizeof(host_buf));
451 if (!host_buf) continue;
452 const android::renderscript::Allocation *GA =
453 static_cast<const android::renderscript::Allocation *>(host_buf);
454 const android::renderscript::Type *T = GA->getType();
455 rsAssert(T);
456
457 auto global_it = mGAMapping->find(cs->getGlobalName(i));
458 rsAssert(global_it != (*mGAMapping).end());
459 int id = global_it->second;
460 ALOGV("global allocation %s is mapped to ID %d", cs->getGlobalName(i),
461 id);
462 // TODO: marshal other properties
463 mappedMetadata[id].x_size = T->getDimX();
464 mappedMetadata[id].y_size = T->getDimY();
465 mappedMetadata[id].z_size = T->getDimZ();
466 }
467 }
468 }
469
InitDescriptorSet(const std::vector<RSoVAllocation * > & inputAllocations,RSoVAllocation * outputAllocation)470 void RSoVScript::InitDescriptorSet(
471 const std::vector<RSoVAllocation *> &inputAllocations,
472 RSoVAllocation *outputAllocation) {
473 VkResult res;
474
475 VkDescriptorSetAllocateInfo alloc_info = {
476 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
477 .pNext = NULL,
478 .descriptorPool = mDescPool,
479 .descriptorSetCount = NUM_DESCRIPTOR_SETS,
480 .pSetLayouts = mDescLayout.data(),
481 };
482
483 mDescSet.resize(NUM_DESCRIPTOR_SETS);
484 res = vkAllocateDescriptorSets(mDevice, &alloc_info, mDescSet.data());
485 rsAssert(res == VK_SUCCESS);
486
487 std::vector<VkWriteDescriptorSet> writes{
488 // Global variables
489 {
490 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
491 .dstSet = mDescSet[0],
492 .dstBinding = 0,
493 .dstArrayElement = 0,
494 .descriptorCount = 1,
495 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
496 .pBufferInfo = mGlobals->getBufferInfo(),
497 },
498 // Metadata for global Allocations
499 {
500 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
501 .dstSet = mDescSet[0],
502 .dstBinding = 1,
503 .dstArrayElement = 0,
504 .descriptorCount = 1,
505 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
506 .pBufferInfo = mGlobalAllocationMetadata->getBufferInfo(),
507 },
508 // Output Allocation
509 {
510 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
511 .dstSet = mDescSet[0],
512 .dstBinding = 2,
513 .dstArrayElement = 0,
514 .descriptorCount = 1,
515 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
516 .pBufferInfo = outputAllocation->getBuffer()->getBufferInfo(),
517 },
518 };
519
520 // Input Allocations
521 for (uint32_t i = 0; i < inputAllocations.size(); ++i) {
522 writes.push_back({
523 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
524 .dstSet = mDescSet[0],
525 .dstBinding = 3 + i, // input allocations start from binding #3
526 .dstArrayElement = 0,
527 .descriptorCount = 1,
528 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
529 .pBufferInfo = inputAllocations[i]->getBuffer()->getBufferInfo(),
530 });
531 }
532
533 vkUpdateDescriptorSets(mDevice, writes.size(), writes.data(), 0, NULL);
534 }
535
InitPipeline()536 void RSoVScript::InitPipeline() {
537 // DEPENDS on mShaderStage, i.e., InitShader()
538
539 VkResult res;
540
541 VkComputePipelineCreateInfo pipeline_info = {
542 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
543 .pNext = nullptr,
544 .layout = mPipelineLayout,
545 .basePipelineHandle = VK_NULL_HANDLE,
546 .basePipelineIndex = 0,
547 .flags = 0,
548 .stage = mShaderStage,
549 };
550 res = vkCreateComputePipelines(mDevice, VK_NULL_HANDLE, 1, &pipeline_info,
551 NULL, &mComputePipeline);
552 rsAssert(res == VK_SUCCESS);
553 }
554
runForEach(uint32_t slot,uint32_t inLen,const std::vector<RSoVAllocation * > & inputAllocations,RSoVAllocation * outputAllocation)555 void RSoVScript::runForEach(
556 uint32_t slot, uint32_t inLen,
557 const std::vector<RSoVAllocation *> &inputAllocations,
558 RSoVAllocation *outputAllocation) {
559 VkResult res;
560
561 InitShader(slot);
562 InitDescriptorPool(inLen);
563 InitDescriptorAndPipelineLayouts(inLen);
564 MarshalTypeInfo();
565 InitDescriptorSet(inputAllocations, outputAllocation);
566 // InitPipelineCache();
567 InitPipeline();
568
569 VkCommandBuffer cmd;
570
571 VkCommandBufferAllocateInfo cmd_info = {
572 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
573 .pNext = nullptr,
574 .commandPool = mRSoV->getCmdPool(),
575 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
576 .commandBufferCount = 1,
577 };
578
579 res = vkAllocateCommandBuffers(mDevice, &cmd_info, &cmd);
580 rsAssert(res == VK_SUCCESS);
581
582 VkCommandBufferBeginInfo cmd_buf_info = {
583 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
584 .pNext = nullptr,
585 .flags = 0,
586 .pInheritanceInfo = nullptr,
587 };
588
589 res = vkBeginCommandBuffer(cmd, &cmd_buf_info);
590 rsAssert(res == VK_SUCCESS);
591
592 vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mComputePipeline);
593
594 vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mPipelineLayout,
595 0, mDescSet.size(), mDescSet.data(), 0, nullptr);
596 // Assuming all input allocations are of the same dimensionality
597 const uint32_t width = inputAllocations[0]->getWidth();
598 const uint32_t height = rsMax(inputAllocations[0]->getHeight(), 1U);
599 const uint32_t depth = rsMax(inputAllocations[0]->getDepth(), 1U);
600 vkCmdDispatch(cmd, width, height, depth);
601
602 res = vkEndCommandBuffer(cmd);
603 assert(res == VK_SUCCESS);
604
605 VkSubmitInfo submit_info = {
606 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
607 .commandBufferCount = 1,
608 .pCommandBuffers = &cmd,
609 };
610
611 VkFence fence;
612
613 VkFenceCreateInfo fenceInfo = {
614 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
615 .pNext = nullptr,
616 .flags = 0,
617 };
618
619 vkCreateFence(mDevice, &fenceInfo, NULL, &fence);
620
621 vkQueueSubmit(mRSoV->getQueue(), 1, &submit_info, fence);
622
623 // Make sure command buffer is finished
624 do {
625 res = vkWaitForFences(mDevice, 1, &fence, VK_TRUE, 100000);
626 } while (res == VK_TIMEOUT);
627
628 rsAssert(res == VK_SUCCESS);
629
630 vkDestroyFence(mDevice, fence, NULL);
631
632 // TODO: shall we reuse command buffers?
633 VkCommandBuffer cmd_bufs[] = {cmd};
634 vkFreeCommandBuffers(mDevice, mRSoV->getCmdPool(), 1, cmd_bufs);
635
636 vkDestroyPipeline(mDevice, mComputePipeline, nullptr);
637 for (int i = 0; i < NUM_DESCRIPTOR_SETS; i++)
638 vkDestroyDescriptorSetLayout(mDevice, mDescLayout[i], nullptr);
639 vkDestroyPipelineLayout(mDevice, mPipelineLayout, nullptr);
640 vkFreeDescriptorSets(mDevice, mDescPool, NUM_DESCRIPTOR_SETS,
641 mDescSet.data());
642 vkDestroyDescriptorPool(mDevice, mDescPool, nullptr);
643 free((void *)mShaderStage.pName);
644 vkDestroyShaderModule(mDevice, mShaderStage.module, nullptr);
645 }
646
647 } // namespace rsov
648 } // namespace renderscript
649 } // namespace android
650
651 using android::renderscript::Allocation;
652 using android::renderscript::Context;
653 using android::renderscript::Element;
654 using android::renderscript::ObjectBase;
655 using android::renderscript::RsdCpuReference;
656 using android::renderscript::Script;
657 using android::renderscript::ScriptC;
658 using android::renderscript::rs_script;
659 using android::renderscript::rsov::RSoVContext;
660 using android::renderscript::rsov::RSoVScript;
661 using android::renderscript::rsov::compileBitcode;
662
663 namespace {
664 // A class to parse global allocation metadata; essentially a subset of JSON
665 // it would look like {"__RSoV_GA": {"g":42}}
666 // The result is stored in a refence to a map<string, int>
667 class ParseMD {
668 public:
ParseMD(std::string s,std::map<std::string,int> & map)669 ParseMD(std::string s, std::map<std::string, int> &map)
670 : mString(s), mMapping(map) {}
671
parse(void)672 bool parse(void) {
673 // remove outermose two pairs of braces
674 mString = removeBraces(mString);
675 if (mString.empty()) {
676 return false;
677 }
678
679 mString = removeBraces(mString);
680 if (mString.empty()) {
681 return false;
682 }
683
684 // Now we are supposed to have a comma-separated list that looks like:
685 // "foo":42, "bar":56
686 split<','>(mString, [&](auto s) {
687 split<':'>(s, nullptr, [&](auto pair) {
688 rsAssert(pair.size() == 2);
689 std::string ga_name = removeQuotes(pair[0]);
690 int id = atoi(pair[1].c_str());
691 ALOGV("ParseMD: global allocation %s has ID %d", ga_name.c_str(), id);
692 mMapping[ga_name] = id;
693 });
694 });
695 return true;
696 }
697
698 private:
699 template <char L, char R>
removeMatching(const std::string & s)700 static std::string removeMatching(const std::string &s) {
701 auto leftCBrace = s.find(L);
702 if (leftCBrace == std::string::npos) {
703 return "";
704 }
705 leftCBrace++;
706 return s.substr(leftCBrace, s.rfind(R) - leftCBrace);
707 }
708
removeBraces(const std::string & s)709 static std::string removeBraces(const std::string &s) {
710 return removeMatching<'{', '}'>(s);
711 }
712
removeQuotes(const std::string & s)713 static std::string removeQuotes(const std::string &s) {
714 return removeMatching<'"', '"'>(s);
715 }
716
717 // Splitting a string, and call "each" and/or "all" with individal elements
718 // and a vector of all tokenized elements
719 template <char D>
split(const std::string & s,std::function<void (const std::string &)> each,std::function<void (const std::vector<const std::string> &)> all=nullptr)720 static void split(const std::string &s,
721 std::function<void(const std::string &)> each,
722 std::function<void(const std::vector<const std::string> &)>
723 all = nullptr) {
724 std::vector<const std::string> result;
725 for (std::string::size_type pos = 0; pos < s.size(); pos++) {
726 std::string::size_type begin = pos;
727
728 while (pos <= s.size() && s[pos] != D) pos++;
729 std::string found = s.substr(begin, pos - begin);
730 if (each) each(found);
731 if (all) result.push_back(found);
732 }
733 if (all) all(result);
734 }
735
736 std::string mString;
737 std::map<std::string, int> &mMapping;
738 };
739
740 } // namespace
741
742 class ExtractRSoVMD : public android::spirit::DoNothingVisitor {
743 public:
ExtractRSoVMD()744 ExtractRSoVMD() : mGAMapping(new std::map<std::string, int>) {}
745
visit(android::spirit::StringInst * s)746 void visit(android::spirit::StringInst *s) {
747 ALOGV("ExtractRSoVMD: string = %s", s->mOperand1.c_str());
748 std::map<std::string, int> mapping;
749 ParseMD p(s->mOperand1, mapping);
750 if (p.parse()) {
751 *mGAMapping = std::move(mapping);
752 }
753 }
754
takeMapping(void)755 std::map<std::string, int> *takeMapping(void) { return mGAMapping.release(); }
756
757 private:
758 std::unique_ptr<std::map<std::string, int> > mGAMapping;
759 };
760
rsovScriptInit(const Context * rsc,ScriptC * script,char const * resName,char const * cacheDir,uint8_t const * bitcode,size_t bitcodeSize,uint32_t flags)761 bool rsovScriptInit(const Context *rsc, ScriptC *script, char const *resName,
762 char const *cacheDir, uint8_t const *bitcode,
763 size_t bitcodeSize, uint32_t flags) {
764 RSoVHal *hal = static_cast<RSoVHal *>(rsc->mHal.drv);
765
766 std::unique_ptr<bcinfo::MetadataExtractor> bitcodeMetadata(
767 new bcinfo::MetadataExtractor((const char *)bitcode, bitcodeSize));
768 if (!bitcodeMetadata || !bitcodeMetadata->extract()) {
769 ALOGE("Could not extract metadata from bitcode from %s", resName);
770 return false;
771 }
772
773 std::vector<uint8_t> modifiedBitcode;
774 auto spvWords =
775 compileBitcode(resName, cacheDir, (const char *)bitcode, bitcodeSize, modifiedBitcode);
776 if (!spvWords.empty() && !modifiedBitcode.empty()) {
777 // Extract compiler metadata on allocation->binding mapping
778 android::spirit::Module *module =
779 android::spirit::Deserialize<android::spirit::Module>(spvWords);
780 rsAssert(module);
781 ExtractRSoVMD ga_md;
782 module->accept(&ga_md);
783
784 RSoVScript *rsovScript =
785 new RSoVScript(hal->mRSoV, std::move(spvWords),
786 bitcodeMetadata.release(), ga_md.takeMapping());
787 if (rsovScript) {
788 std::string modifiedResName(resName);
789 modifiedResName.append("_modified");
790 RsdCpuReference::CpuScript *cs = hal->mCpuRef->createScript(
791 script, modifiedResName.c_str(), cacheDir, modifiedBitcode.data(),
792 modifiedBitcode.size(), flags);
793 if (cs != nullptr) {
794 cs->populateScript(script);
795 rsovScript->setCpuScript(cs);
796 RSoVScript::initScriptOnRSoV(script, rsovScript);
797 return true;
798 }
799 }
800 }
801
802 ALOGD("Failed creating an RSoV script for %s", resName);
803 // Fall back to CPU driver instead
804
805 std::unique_ptr<RsdCpuReference::CpuScript> cs(hal->mCpuRef->createScript(
806 script, resName, cacheDir, bitcode, bitcodeSize, flags));
807 if (cs == nullptr) {
808 ALOGE("Failed creating a CPU script %p for %s (%p)", cs.get(), resName,
809 script);
810 return false;
811 }
812 cs->populateScript(script);
813
814 RSoVScript::initScriptOnCpu(script, cs.release());
815
816 return true;
817 }
818
rsovInitIntrinsic(const Context * rsc,Script * s,RsScriptIntrinsicID iid,Element * e)819 bool rsovInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid,
820 Element *e) {
821 RSoVHal *dc = (RSoVHal *)rsc->mHal.drv;
822 RsdCpuReference::CpuScript *cs = dc->mCpuRef->createIntrinsic(s, iid, e);
823 if (cs == nullptr) {
824 return false;
825 }
826 s->mHal.drv = cs;
827 cs->populateScript(s);
828 return true;
829 }
830
rsovScriptInvokeForEach(const Context * rsc,Script * s,uint32_t slot,const Allocation * ain,Allocation * aout,const void * usr,size_t usrLen,const RsScriptCall * sc)831 void rsovScriptInvokeForEach(const Context *rsc, Script *s, uint32_t slot,
832 const Allocation *ain, Allocation *aout,
833 const void *usr, size_t usrLen,
834 const RsScriptCall *sc) {
835 if (ain == nullptr) {
836 rsovScriptInvokeForEachMulti(rsc, s, slot, nullptr, 0, aout, usr, usrLen,
837 sc);
838 } else {
839 const Allocation *ains[1] = {ain};
840
841 rsovScriptInvokeForEachMulti(rsc, s, slot, ains, 1, aout, usr, usrLen, sc);
842 }
843 }
844
rsovScriptInvokeForEachMulti(const Context * rsc,Script * s,uint32_t slot,const Allocation ** ains,size_t inLen,Allocation * aout,const void * usr,size_t usrLen,const RsScriptCall * sc)845 void rsovScriptInvokeForEachMulti(const Context *rsc, Script *s, uint32_t slot,
846 const Allocation **ains, size_t inLen,
847 Allocation *aout, const void *usr,
848 size_t usrLen, const RsScriptCall *sc) {
849 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
850 cs->invokeForEach(slot, ains, inLen, aout, usr, usrLen, sc);
851 }
852
rsovScriptInvokeRoot(const Context * dc,Script * s)853 int rsovScriptInvokeRoot(const Context *dc, Script *s) {
854 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
855 return cs->invokeRoot();
856 }
857
rsovScriptInvokeInit(const Context * dc,Script * s)858 void rsovScriptInvokeInit(const Context *dc, Script *s) {
859 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
860 cs->invokeInit();
861 }
862
rsovScriptInvokeFreeChildren(const Context * dc,Script * s)863 void rsovScriptInvokeFreeChildren(const Context *dc, Script *s) {
864 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
865 cs->invokeFreeChildren();
866 }
867
rsovScriptInvokeFunction(const Context * dc,Script * s,uint32_t slot,const void * params,size_t paramLength)868 void rsovScriptInvokeFunction(const Context *dc, Script *s, uint32_t slot,
869 const void *params, size_t paramLength) {
870 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
871 cs->invokeFunction(slot, params, paramLength);
872 }
873
rsovScriptInvokeReduce(const Context * dc,Script * s,uint32_t slot,const Allocation ** ains,size_t inLen,Allocation * aout,const RsScriptCall * sc)874 void rsovScriptInvokeReduce(const Context *dc, Script *s, uint32_t slot,
875 const Allocation **ains, size_t inLen,
876 Allocation *aout, const RsScriptCall *sc) {
877 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
878 cs->invokeReduce(slot, ains, inLen, aout, sc);
879 }
880
rsovScriptSetGlobalVar(const Context * dc,const Script * s,uint32_t slot,void * data,size_t dataLength)881 void rsovScriptSetGlobalVar(const Context *dc, const Script *s, uint32_t slot,
882 void *data, size_t dataLength) {
883 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
884 cs->setGlobalVar(slot, data, dataLength);
885 }
886
rsovScriptGetGlobalVar(const Context * dc,const Script * s,uint32_t slot,void * data,size_t dataLength)887 void rsovScriptGetGlobalVar(const Context *dc, const Script *s, uint32_t slot,
888 void *data, size_t dataLength) {
889 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
890 cs->getGlobalVar(slot, data, dataLength);
891 }
892
rsovScriptSetGlobalVarWithElemDims(const Context * dc,const Script * s,uint32_t slot,void * data,size_t dataLength,const android::renderscript::Element * elem,const uint32_t * dims,size_t dimLength)893 void rsovScriptSetGlobalVarWithElemDims(
894 const Context *dc, const Script *s, uint32_t slot, void *data,
895 size_t dataLength, const android::renderscript::Element *elem,
896 const uint32_t *dims, size_t dimLength) {
897 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
898 cs->setGlobalVarWithElemDims(slot, data, dataLength, elem, dims, dimLength);
899 }
900
rsovScriptSetGlobalBind(const Context * dc,const Script * s,uint32_t slot,Allocation * data)901 void rsovScriptSetGlobalBind(const Context *dc, const Script *s, uint32_t slot,
902 Allocation *data) {
903 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
904 cs->setGlobalBind(slot, data);
905 }
906
rsovScriptSetGlobalObj(const Context * dc,const Script * s,uint32_t slot,ObjectBase * data)907 void rsovScriptSetGlobalObj(const Context *dc, const Script *s, uint32_t slot,
908 ObjectBase *data) {
909 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
910 cs->setGlobalObj(slot, data);
911 }
912
rsovScriptDestroy(const Context * dc,Script * s)913 void rsovScriptDestroy(const Context *dc, Script *s) {
914 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
915 delete cs;
916 s->mHal.drv = nullptr;
917 }
918
rsovScriptGetAllocationForPointer(const android::renderscript::Context * dc,const android::renderscript::Script * sc,const void * ptr)919 Allocation *rsovScriptGetAllocationForPointer(
920 const android::renderscript::Context *dc,
921 const android::renderscript::Script *sc, const void *ptr) {
922 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)sc->mHal.drv;
923 return cs->getAllocationForPointer(ptr);
924 }
925
rsovScriptUpdateCachedObject(const Context * rsc,const Script * script,rs_script * obj)926 void rsovScriptUpdateCachedObject(const Context *rsc, const Script *script,
927 rs_script *obj) {
928 obj->p = script;
929 #ifdef __LP64__
930 obj->unused1 = nullptr;
931 obj->unused2 = nullptr;
932 obj->unused3 = nullptr;
933 #endif
934 }
935