1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #undef NDEBUG
18
19 #include "CompilationBuilder.h"
20 #include "Manager.h"
21 #include "ModelBuilder.h"
22 #include "NeuralNetworks.h"
23 #include "SampleDriver.h"
24 #include "TestNeuralNetworksWrapper.h"
25 #include "Utils.h"
26 #include "ValidateHal.h"
27
28 #include <algorithm>
29 #include <cassert>
30 #include <cstdio>
31 #include <iterator>
32 #include <random>
33 #include <set>
34 #include <tuple>
35 #include <utility>
36 #include <vector>
37
38 #include <unistd.h>
39
40 #include <android-base/logging.h>
41 #include <android/sharedmem.h>
42 #include <gtest/gtest.h>
43
44 // Uncomment the following line to generate some debugging output that
45 // may be useful when analyzing failures:
46 //
47 // #define VERBOSE VERBOSE
48
49 // We randomly generate tests (model + input data) at runtime, and verify
50 // that we get the same results whether we do partitioned compilation/execution
51 // or non partitioned compilation/execution. We perform a test as follows:
52 //
53 // (1) Randomly generate a model (graph and weights), randomly generate input
54 // data, randomly assign inputs and outputs to CPU memory or to shared
55 // memory.
56 //
57 // Randomly leaves dimensions unset for intermediate operands.
58 //
59 // (2) Randomly generate drivers based on the sample driver, each of which
60 // executes models on the CPU. They differ according to which operations
61 // they support.
62 //
63 // (3) Compile and execute without partitioning, saving off the results.
64 //
65 // (4) Compile and execute with partitioning.
66 //
67 // (5) Verify that the saved results from (3) match the results from (4).
68 //
69 // For simplicity, all data (model inputs, model outputs, weights,
70 // temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two
71 // dimensions are fixed throughout a particular test case (and
72 // randomly determined). This prevents us from having to find a
73 // mechanism to "resize" data (e.g., if ADD#a operates on data of size
74 // 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a
75 // and ADD#b become inputs of ADD#c, do we need to insert one or more
76 // operations between (say) ADD#a and ADD#c to convert ADD#2's data
77 // from size 2x2 to size 3x3 in order to match ADD#b). In the few
78 // cases where an operand cannot be of this type, it is a constant
79 // (e.g., activation functions and RNN bias).
80 //
81 // Each operation we generate has a signature (described in more
82 // detail later). The randomly generated drivers decide which
83 // operations they can execute by checking operation signatures. Once
84 // we have built the model and know the set of signatures, we randomly
85 // assign each signature to a driver. No signature is supported by
86 // multiple drivers -- we're not testing the logic that the
87 // partitioning algorithm uses to select the best driver for an
88 // operation.
89
90 namespace android {
91
92 using CompilationBuilder = nn::CompilationBuilder;
93 using Device = nn::Device;
94 using DeviceManager = nn::DeviceManager;
95 using ExecutionPlan = nn::ExecutionPlan;
96 using HalVersion = nn::HalVersion;
97 using HidlModel = hardware::neuralnetworks::V1_2::Model;
98 using HidlToken =
99 ::android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
100 using MemoryBuilder = nn::Memory;
101 using ModelBuilder = nn::ModelBuilder;
102 using Result = nn::test_wrapper::Result;
103 using SampleDriver = nn::sample_driver::SampleDriver;
104 using WrapperCompilation = nn::test_wrapper::Compilation;
105 using WrapperExecution = nn::test_wrapper::Execution;
106 using WrapperMemory = nn::test_wrapper::Memory;
107 using WrapperModel = nn::test_wrapper::Model;
108 using WrapperOperandType = nn::test_wrapper::OperandType;
109 using WrapperType = nn::test_wrapper::Type;
110
111 namespace {
112
113 /// Configure test size //////////////////////////////////////////////////////////
114
115 // We may exceed this in order to connect otherwise disjoint subgraphs.
116 static const unsigned kMaxNumOperations = 100;
117
118 // We build models to process 2-D square tensors up to this size in each dimension;
119 // note that the API promotes by-value weights larger than 128 to by-reference,
120 // so we want to ensure that we can pick both types that exceed and types that do
121 // not exceed this size.
122 static const unsigned kMaxProblemSize = 8;
123
124 // First seed for pseudorandom test generation.
125 static const unsigned kFirstSeed = 0;
126
127 // Number of test cases.
128 static const unsigned kNumTestCases = 225;
129
130 // Force all graph weights into a single pool (as we recommend to users)
131 // or allow them to be distributed across multiple pools (more stress
132 // on the partitioning algorithm and the rest of the runtime)?
133 // Forcing all graph weights into a single pool may be necessary to
134 // prevent large graphs from running up against http://b/70302693
135 // "NNAPI overuses (?) fds".
136 static const bool kAllWeightsInOnePool = false;
137
138 //////////////////////////////////////////////////////////////////////////////////
139
140 // The signature of an operation consists of the operation type (e.g.,
141 // ADD) and the activation function (use -1 in the case of an
142 // operation type for which the activation function is inapplicable).
143 typedef std::pair<ANeuralNetworksOperationType, int> Signature;
144
145 // This class adds some simple utilities on top of WrapperModel. For example,
146 // it provides access to certain features from ModelBuilder that are not exposed
147 // by the base class (such as inputCount() and operation index).
148 class TestModel : public WrapperModel {
149 public:
150
addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)151 uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
152 const std::vector<uint32_t>& outputs) {
153 const uint32_t operationIndex = operationCount();
154 mOperations.push_back(outputs);
155 WrapperModel::addOperation(type, inputs, outputs);
156 return operationIndex;
157 }
158
operationCount() const159 uint32_t operationCount() const {
160 return mOperations.size();
161 }
162
inputCount() const163 uint32_t inputCount() const {
164 return builder()->inputCount();
165 }
outputCount() const166 uint32_t outputCount() const {
167 return builder()->outputCount();
168 }
169
getOperationOutputs(uint32_t index) const170 const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const {
171 CHECK(index < mOperations.size());
172 return mOperations[index];
173 }
174
175 // All values are immediately copied into the model (we need to do
176 // this ourselves in cases where the underlying NNAPI does not).
setOperandValue(uint32_t index,const std::vector<float> & value)177 void setOperandValue(uint32_t index, const std::vector<float>& value) {
178 const size_t length = value.size() * sizeof(float);
179
180 if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
181 WrapperModel::setOperandValue(index, value.data(), length);
182 } else {
183 mOperandValues.push_back(value);
184 WrapperModel::setOperandValue(index, mOperandValues.back().data(), length);
185 }
186 }
187
setOperandValue(uint32_t index,const std::vector<int32_t> & value)188 void setOperandValue(uint32_t index, const std::vector<int32_t>& value) {
189 const size_t length = value.size() * sizeof(int32_t);
190
191 CHECK(length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
192 WrapperModel::setOperandValue(index, value.data(), length);
193 }
194
setOperandValue(uint32_t index,int32_t value)195 void setOperandValue(uint32_t index, int32_t value) {
196 CHECK(sizeof(value) <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
197 WrapperModel::setOperandValue(index, &value, sizeof(value));
198 }
199
200 private:
201
builder() const202 const ModelBuilder* builder() const {
203 return reinterpret_cast<const ModelBuilder*>(getHandle());
204 }
205
206 // Representation of operations: vector index is operation number,
207 // vector value is operation's output operands.
208 std::vector<std::vector<uint32_t>> mOperations;
209
210 // Large operand values -- not immediately copied into the
211 // WrapperModel, so remembered here instead.
212 std::vector<std::vector<float>> mOperandValues;
213 };
214
215 // This class adds some simple utilities on top of WrapperCompilation in order
216 // to provide access to certain features from CompilationBuilder that are not
217 // exposed by the base class.
218 class TestCompilation : public WrapperCompilation {
219 public:
TestCompilation(const WrapperModel * model)220 TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {}
221
TestCompilation(const WrapperModel * model,std::vector<std::shared_ptr<Device>> devices)222 TestCompilation(const WrapperModel* model, std::vector<std::shared_ptr<Device>> devices) {
223 ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
224 CompilationBuilder* c = nullptr;
225 int result = m->createCompilation(&c, devices);
226 EXPECT_EQ(result, 0);
227 mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
228 }
229
230 using WrapperCompilation::finish;
231
setPartitioning(uint32_t partitioning)232 Result setPartitioning(uint32_t partitioning) {
233 return static_cast<Result>(builder()->setPartitioning(partitioning));
234 }
235
getExecutionPlan() const236 const ExecutionPlan& getExecutionPlan() const {
237 return builder()->forTest_getExecutionPlan();
238 }
239
240 private:
builder() const241 const CompilationBuilder* builder() const {
242 return reinterpret_cast<const CompilationBuilder*>(getHandle());
243 }
builder()244 CompilationBuilder* builder() {
245 return reinterpret_cast<CompilationBuilder*>(getHandle());
246 }
247 };
248
249 // This class is used to manage a collection of memory regions,
250 // disjoint windows onto a set of Memory instances, each of which is
251 // associated with a single shared memory region. Each region and
252 // Memory instance is assigned a number. The usage pattern is as
253 // follows:
254 // - Call addMemory() and addRegion() as many times as needed to
255 // declare (but not define) Memory instances and declare region
256 // instances.
257 // - Call layout() to define the Memory instances.
258 // - Call getRegion() as many times as needed to get the details
259 // of memory regions (such as address, or Memory/offset/length).
260 // The Memory instances created by layout() are owned by the
261 // TestMemories instance, and are destroyed when the TestMemories
262 // instance is destroyed.
263 class TestMemories {
264 public:
265 TestMemories() = default;
266 ~TestMemories();
267
268 TestMemories(const TestMemories&) = delete;
269 TestMemories& operator=(const TestMemories&) = delete;
270
addMemory()271 unsigned addMemory() {
272 CHECK(!mLayoutDone);
273 mMemorySizes.push_back(0);
274 return memoryCount() - 1;
275 }
memoryCount() const276 unsigned memoryCount() const {
277 return mMemorySizes.size();
278 }
279
addRegion(unsigned memoryIndex,uint32_t length)280 unsigned addRegion(unsigned memoryIndex, uint32_t length) {
281 CHECK(!mLayoutDone);
282 CHECK(memoryIndex < memoryCount());
283 uint32_t& memorySize = mMemorySizes[memoryIndex];
284 auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length);
285 mRegions.push_back(desc);
286 memorySize += length;
287 return regionCount() - 1;
288 }
regionCount() const289 unsigned regionCount() const {
290 return mRegions.size();
291 }
292
293 void layout();
294
getRegion(unsigned regionIndex,const WrapperMemory ** pMemory,uint32_t * pOffset,uint32_t * pLength)295 void* getRegion(unsigned regionIndex,
296 const WrapperMemory** pMemory, uint32_t* pOffset, uint32_t* pLength) {
297 CHECK(mLayoutDone);
298 CHECK(regionIndex < regionCount());
299 const auto& regionDescriptor = mRegions[regionIndex];
300 const WrapperMemory* memory = &mMemorys[std::get<0>(regionDescriptor)];
301 uint32_t offset = std::get<1>(regionDescriptor);
302 uint32_t length = std::get<2>(regionDescriptor);
303
304 uint8_t* buffer;
305 if (reinterpret_cast<MemoryBuilder*>(memory->get())->getPointer(&buffer) !=
306 ANEURALNETWORKS_NO_ERROR) {
307 CHECK(0);
308 }
309
310 if (pMemory) *pMemory = memory;
311 if (pOffset) *pOffset = offset;
312 if (pLength) *pLength = length;
313
314 return buffer + offset;
315 }
316
getRegion(unsigned regionIndex)317 void* getRegion(unsigned regionIndex) {
318 return getRegion(regionIndex, nullptr, nullptr, nullptr);
319 }
320
321 private:
322 // Index is the memory index; value is the size of the memory
323 // (aggregate size of all regions in the memory).
324 std::vector<uint32_t> mMemorySizes;
325
326 // Index is the memory index.
327 std::vector<WrapperMemory> mMemorys;
328 std::vector<int> mFDs;
329
330 // Index is the region index; tuple represents memory index,
331 // region offset within memory, region length.
332 std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions;
333
334 // For sanity checking.
335 bool mLayoutDone = false;
336 };
337
layout()338 void TestMemories::layout() {
339 CHECK(!mLayoutDone);
340 for (uint32_t memorySize : mMemorySizes) {
341 const int fd = ASharedMemory_create(nullptr, memorySize);
342 CHECK(fd >= 0);
343 mMemorys.emplace_back(memorySize, PROT_READ | PROT_WRITE, fd, 0);
344 mFDs.push_back(fd);
345 }
346 mLayoutDone = true;
347 }
348
~TestMemories()349 TestMemories::~TestMemories() {
350 for (int fd : mFDs) {
351 close(fd);
352 }
353 }
354
355 class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> {
356 public:
RandomPartitioningTest()357 RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {}
358
359 static Signature getSignature(const HidlModel& model, const Operation& operation);
360
361 protected:
362 static V1_0::IDevice* makeTestDriver(HalVersion version, const char* name,
363 std::set<Signature> signatures);
364
365 static HalVersion getMinHalVersion(ANeuralNetworksOperationType type);
366
367 static std::string to_string(HalVersion version);
368
randBool()369 bool randBool() { return randUInt(2) == 1; }
370
randFrac()371 double randFrac() { // [0.0, 1.0)
372 return mRandNumUnitDist(mRandNumEng);
373 }
374
randUInt(unsigned limit)375 unsigned randUInt(unsigned limit) { // [0, limit)
376 return unsigned(randFrac() * limit);
377 }
378
379 // Represents an operation in which every input and output operand
380 // is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except:
381 // - One input operand may be an activation function.
382 // - Any number of input operands may be "special" in some other way
383 // (and in this implementation, not produced by any other operation).
384 // We require that:
385 // - There be at least one input operand that is neither an
386 // activation function nor "special".
387 struct OperationPattern {
388 HalVersion mMinHalVersion;
389 int mOperationType;
390 unsigned mNumInputs;
391 unsigned mNumOutputs;
392 int mActivationFunctionInputIndex; // <0 if none
393
394 // Returns operand index, or <0 if input is normal (must not
395 // be called for an activation function operand). Function
396 // should have the following prototype:
397 //
398 // int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex);
399 //
400 int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned);
401 };
402
403 static const OperationPattern kOperationPatterns[];
404
405 // See OperationPattern::mMakeSpecialInput. This function is used to
406 // manufacture an RNN input operand that doesn't fit the general operand
407 // pattern known to the graph generator infrastructure.
makeRnnSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)408 int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) {
409 if (inputIndex != 3) {
410 return -1;
411 }
412
413 // input operand 3 is bias, a 1-D tensor
414 const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, { problemSize });
415 const uint32_t operandIndex = model->addOperand(&biasType);
416 std::vector<float> biasValue(problemSize);
417 std::generate(biasValue.begin(), biasValue.end(),
418 [this]{ return randFrac(); });
419 model->setOperandValue(operandIndex, biasValue);
420 return int(operandIndex);
421 }
422
423 // See OperationPattern::mMakeSpecialInput. This function is used to
424 // manufacture a TRANSPOSE input operand that doesn't fit the general operand
425 // pattern known to the graph generator infrastructure.
makeTransposeSpecialInput(unsigned,TestModel * model,unsigned inputIndex)426 int makeTransposeSpecialInput(unsigned /* problemSize */, TestModel* model,
427 unsigned inputIndex) {
428 if (inputIndex != 1) {
429 return -1;
430 }
431
432 // input operand 1 is perm, a 1-D tensor
433 const WrapperOperandType permType(WrapperType::TENSOR_INT32, {2});
434 const uint32_t operandIndex = model->addOperand(&permType);
435 std::vector<int32_t> permValue = {1, 0};
436 model->setOperandValue(operandIndex, permValue);
437 return int(operandIndex);
438 }
439
440 #ifdef VERBOSE
441 class ModelStats {
442 public:
ModelStats(const ModelBuilder * model)443 ModelStats(const ModelBuilder* model) :
444 mBuilder(model) { }
ModelStats(const WrapperModel * model)445 ModelStats(const WrapperModel* model) :
446 mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) { }
operator <<(std::ostream & out,const ModelStats & stats)447 friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) {
448 const uint32_t operandCount = stats.mBuilder->operandCount();
449 const uint32_t inputCount = stats.mBuilder->inputCount();
450 const uint32_t outputCount = stats.mBuilder->outputCount();
451 out << "operationCount = " << stats.mBuilder->operationCount()
452 << ", operandCount = " << operandCount
453 << ", inputCount = " << inputCount
454 << " (" << (double(inputCount) / operandCount) << ")"
455 << ", outputCount = " << outputCount
456 << " (" << (double(outputCount) / operandCount) << ")";
457 return out;
458 }
459 private:
460 const ModelBuilder* mBuilder;
461 };
462
463 template <typename T_iterator>
dump(T_iterator I,T_iterator E)464 static void dump(T_iterator I, T_iterator E) {
465 std::cout << "{";
466 for (; I != E; I++) {
467 std::cout << " " << *I;
468 }
469 std::cout << " }" << std::endl;
470 }
471 #endif
472
473 std::mt19937 mRandNumEng;
474
475 private:
476 std::uniform_real_distribution<double> mRandNumUnitDist;
477 };
478
479 const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = {
480 {HalVersion::V1_0, ANEURALNETWORKS_ADD, 3, 1, 2, nullptr},
481 {HalVersion::V1_0, ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr},
482 {HalVersion::V1_0, ANEURALNETWORKS_MUL, 3, 1, 2, nullptr},
483 {HalVersion::V1_0, ANEURALNETWORKS_RNN, 6, 2, 5,
484 &RandomPartitioningTest::makeRnnSpecialInput},
485 {HalVersion::V1_0, ANEURALNETWORKS_TANH, 1, 1, -1, nullptr},
486
487 {HalVersion::V1_1, ANEURALNETWORKS_SUB, 3, 1, 2, nullptr},
488 {HalVersion::V1_1, ANEURALNETWORKS_TRANSPOSE, 2, 1, -1,
489 &RandomPartitioningTest::makeTransposeSpecialInput},
490
491 {HalVersion::V1_2, ANEURALNETWORKS_MAXIMUM, 2, 1, -1, nullptr},
492 {HalVersion::V1_2, ANEURALNETWORKS_NEG, 1, 1, -1, nullptr},
493 {HalVersion::V1_2, ANEURALNETWORKS_SIN, 1, 1, -1, nullptr},
494 };
495
getMinHalVersion(ANeuralNetworksOperationType type)496 HalVersion RandomPartitioningTest::getMinHalVersion(ANeuralNetworksOperationType type) {
497 static const auto kOperationToVersion = [] {
498 std::map<ANeuralNetworksOperationType, HalVersion> result;
499 for (const auto& pattern : kOperationPatterns) {
500 result[pattern.mOperationType] = pattern.mMinHalVersion;
501 }
502 return result;
503 }();
504
505 return kOperationToVersion.at(type);
506 }
507
getSignature(const HidlModel & model,const Operation & operation)508 Signature RandomPartitioningTest::getSignature(const HidlModel& model, const Operation& operation) {
509 static const auto kOperationToActivation = [] {
510 std::map<ANeuralNetworksOperationType, int> result;
511 for (const auto& pattern : kOperationPatterns) {
512 result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex;
513 }
514 return result;
515 }();
516
517 const ANeuralNetworksOperationType operationType =
518 static_cast<ANeuralNetworksOperationType>(operation.type);
519 const int activationFunctionInputIndex = kOperationToActivation.at(operationType);
520 if (activationFunctionInputIndex < 0) {
521 return Signature(operationType, -1);
522 }
523
524 const Operand& operand = model.operands[operation.inputs[activationFunctionInputIndex]];
525 CHECK(operand.lifetime == OperandLifeTime::CONSTANT_COPY);
526 CHECK(operand.type == OperandType::INT32);
527 int32_t value;
528 memcpy(&value,
529 &model.operandValues[operand.location.offset],
530 operand.location.length);
531 return Signature(operationType, value);
532 }
533
to_string(HalVersion version)534 std::string RandomPartitioningTest::to_string(HalVersion version) {
535 switch (version) {
536 case HalVersion::V1_0:
537 return "V1_0";
538 case HalVersion::V1_1:
539 return "V1_1";
540 case HalVersion::V1_2:
541 return "V1_2";
542 default:
543 return "V_UNKNOWN";
544 }
545 };
546
547 class TestDriver : public SampleDriver {
548 public:
549 // Behaves like SampleDriver, except that it only supports
550 // operations with the specified signatures.
TestDriver(const char * name,std::set<Signature> signatures)551 TestDriver(const char* name, std::set<Signature> signatures) :
552 SampleDriver(name), mSignatures(std::move(signatures)) { }
553
getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb)554 Return<void> getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb) override {
555 android::nn::initVLogMask();
556 const PerformanceInfo kPerf = {.execTime = 0.75f, .powerUsage = 0.75f};
557 Capabilities capabilities = {
558 .relaxedFloat32toFloat16PerformanceScalar = kPerf,
559 .relaxedFloat32toFloat16PerformanceTensor = kPerf,
560 .operandPerformance = nn::nonExtensionOperandPerformance(kPerf)};
561 _hidl_cb(ErrorStatus::NONE, capabilities);
562 return Void();
563 }
564
getSupportedOperations_1_2(const HidlModel & model,getSupportedOperations_cb cb)565 Return<void> getSupportedOperations_1_2(const HidlModel& model,
566 getSupportedOperations_cb cb) override {
567 if (nn::validateModel(model)) {
568 const size_t count = model.operations.size();
569 std::vector<bool> supported(count);
570 for (size_t i = 0; i < count; i++) {
571 supported[i] =
572 (mSignatures.count(
573 RandomPartitioningTest::getSignature(
574 model,
575 model.operations[i])) != 0);
576 }
577 cb(ErrorStatus::NONE, supported);
578 } else {
579 std::vector<bool> supported;
580 cb(ErrorStatus::INVALID_ARGUMENT, supported);
581 }
582 return Void();
583 }
584
prepareModel_1_2(const HidlModel & model,ExecutionPreference preference,const hidl_vec<hidl_handle> & modelCache,const hidl_vec<hidl_handle> & dataCache,const HidlToken & token,const sp<IPreparedModelCallback> & callback)585 Return<ErrorStatus> prepareModel_1_2(const HidlModel& model, ExecutionPreference preference,
586 const hidl_vec<hidl_handle>& modelCache,
587 const hidl_vec<hidl_handle>& dataCache,
588 const HidlToken& token,
589 const sp<IPreparedModelCallback>& callback) override {
590 // NOTE: We verify that all operations in the model are supported.
591 ErrorStatus outStatus = ErrorStatus::INVALID_ARGUMENT;
592 auto ret = getSupportedOperations_1_2(
593 model,
594 [&outStatus](ErrorStatus inStatus, const hidl_vec<bool>& supportedOperations) {
595 if (inStatus == ErrorStatus::NONE) {
596 if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
597 [](bool v){ return v; })) {
598 outStatus = ErrorStatus::NONE;
599 }
600 }
601 });
602 if (ret.isOk() && (outStatus == ErrorStatus::NONE)) {
603 return SampleDriver::prepareModel_1_2(model, preference, modelCache, dataCache, token,
604 callback);
605 } else {
606 callback->notify_1_2(ErrorStatus::INVALID_ARGUMENT, nullptr);
607 return ErrorStatus::INVALID_ARGUMENT;
608 }
609 }
610
611 private:
612 const std::set<Signature> mSignatures;
613 };
614
615 // Like TestDriver, but implementing 1.1
616 class TestDriverV1_1 : public V1_1::IDevice {
617 public:
TestDriverV1_1(const char * name,std::set<Signature> signatures)618 TestDriverV1_1(const char* name, std::set<Signature> signatures)
619 : mDriverV1_2(new TestDriver(name, std::move(signatures))) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)620 Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
621 return mDriverV1_2->getCapabilities_1_1(_hidl_cb);
622 }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)623 Return<void> getSupportedOperations_1_1(const V1_1::Model& model,
624 getSupportedOperations_1_1_cb _hidl_cb) override {
625 return mDriverV1_2->getSupportedOperations_1_1(model, _hidl_cb);
626 }
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)627 Return<ErrorStatus> prepareModel_1_1(
628 const V1_1::Model& model, ExecutionPreference preference,
629 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
630 return mDriverV1_2->prepareModel_1_1(model, preference, actualCallback);
631 }
getStatus()632 Return<DeviceStatus> getStatus() override { return mDriverV1_2->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)633 Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
634 return mDriverV1_2->getCapabilities(_hidl_cb);
635 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)636 Return<void> getSupportedOperations(const V1_0::Model& model,
637 getSupportedOperations_cb _hidl_cb) override {
638 return mDriverV1_2->getSupportedOperations(model, _hidl_cb);
639 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)640 Return<ErrorStatus> prepareModel(
641 const V1_0::Model& model,
642 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
643 return mDriverV1_2->prepareModel(model, actualCallback);
644 }
645
646 private:
647 const sp<V1_2::IDevice> mDriverV1_2;
648 };
649
650 // Like TestDriver, but implementing 1.0
651 class TestDriverV1_0 : public V1_0::IDevice {
652 public:
TestDriverV1_0(const char * name,std::set<Signature> signatures)653 TestDriverV1_0(const char* name, std::set<Signature> signatures)
654 : mDriverV1_2(new TestDriver(name, std::move(signatures))) {}
getCapabilities(getCapabilities_cb _hidl_cb)655 Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
656 return mDriverV1_2->getCapabilities(_hidl_cb);
657 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)658 Return<void> getSupportedOperations(const V1_0::Model& model,
659 getSupportedOperations_cb _hidl_cb) override {
660 return mDriverV1_2->getSupportedOperations(model, _hidl_cb);
661 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)662 Return<ErrorStatus> prepareModel(
663 const V1_0::Model& model,
664 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
665 return mDriverV1_2->prepareModel(model, actualCallback);
666 }
getStatus()667 Return<DeviceStatus> getStatus() override { return mDriverV1_2->getStatus(); }
668
669 private:
670 const sp<V1_2::IDevice> mDriverV1_2;
671 };
672
makeTestDriver(HalVersion version,const char * name,std::set<Signature> signatures)673 V1_0::IDevice* RandomPartitioningTest::makeTestDriver(HalVersion version, const char* name,
674 std::set<Signature> signatures) {
675 switch (version) {
676 case HalVersion::V1_0:
677 return new TestDriverV1_0(name, std::move(signatures));
678 case HalVersion::V1_1:
679 return new TestDriverV1_1(name, std::move(signatures));
680 case HalVersion::V1_2:
681 return new TestDriver(name, std::move(signatures));
682 default:
683 ADD_FAILURE() << "Unexpected HalVersion " << static_cast<int32_t>(version);
684 return nullptr;
685 }
686 }
687
688 INSTANTIATE_TEST_CASE_P(Seed, RandomPartitioningTest,
689 ::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases));
690
TEST_P(RandomPartitioningTest,Test)691 TEST_P(RandomPartitioningTest, Test) {
692 LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam();
693
694 #ifdef VERBOSE
695 std::cout << std::setprecision(2) << std::fixed << std::setw(4);
696 #endif
697
698 const unsigned problemSize = 1+randUInt(kMaxProblemSize);
699 const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, { problemSize, problemSize });
700 const WrapperOperandType unknownDimensionsType(WrapperType::TENSOR_FLOAT32, { 0, 0 });
701
702 static const WrapperOperandType activationFunctionType(WrapperType::INT32, { });
703
704 const unsigned numOperations = 2+randUInt(kMaxNumOperations-1);
705 const bool allowDeadOperations = (randFrac() < 0.2);
706 const bool allowUnknownDimensions = (randFrac() < 0.25);
707
708 // TODO: The current algorithm builds the graph in a forward
709 // direction (i.e., later-generated operations consume outputs
710 // from earlier-generated operations). In order to get more
711 // variation in graph topology, perhaps we should also create an
712 // algorithm to build the graph in a backward direction (i.e.,
713 // later-generated operations produce outputs to be consumed by
714 // earlier-generated operations).
715 [[maybe_unused]] const bool buildForward = randBool();
716
717 // TODO: Add a form of forced connectivity that operates by
718 // joining disjoint subgraphs rather than by forcing a root.
719 const bool forceCommonRoot = (randFrac() < 0.75);
720
721 TestModel model;
722 std::vector<uint32_t> modelInputs;
723 std::vector<uint32_t> modelOutputs;
724
725 // Each region in weights is a problem-sized 2-D TENSOR_FLOAT32.
726 TestMemories weights;
727
728 // Keep track of all normal (i.e., not activation function and not
729 // "special") operands that are values (from setOperandValue*()).
730 // .first: operand index
731 // .second: if the operand is already defined (via setOperandValue*()) then ~0U;
732 // otherwise, the operand has yet to be defined, and this is the corresponding
733 // region index in "weights"
734 std::vector<std::pair<uint32_t, unsigned>> valueOperands;
735
736 // An operand is "dead" if it is not consumed by another operation
737 // and is not a model output. Key is operand index; value is
738 // operation index.
739 std::map<uint32_t, uint32_t> deadOperands;
740
741 // An operation is "dead" if all of its outputs are dead.
742 std::set<uint32_t> deadOperations;
743
744 // Collect the signatures of operations in this model.
745 std::set<Signature> signatures;
746
747 // For reporting purposes, keep track of the number of root
748 // operations (those that do not consume results produced by other
749 // operations).
750 unsigned rootOperationCount = 0;
751
752 // Track if we added operands with unknown dimensions. In this case,
753 // partitioned compilation will fail if such an operand is read in a
754 // different partition than it is written.
755 bool hasUnknownDimensions = false;
756
757 // Generate operations.
758 for (unsigned i = 0; i < numOperations; i++) {
759 const unsigned operationPatternIndex = randUInt(std::size(kOperationPatterns));
760 const auto& operationPattern = kOperationPatterns[operationPatternIndex];
761
762 // INPUTS //////////////////////////////////////////////////////////////////////////////////
763
764 std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U);
765
766 // First, process activation function and special inputs, and
767 // keep track of which inputs remain.
768 std::vector<uint32_t> normalOperationInputIndexes;
769 int32_t activationFunction = -1;
770 for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs;
771 operationInputIndex++) {
772 if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) {
773 const uint32_t operandIndex = model.addOperand(&activationFunctionType);
774 activationFunction = randUInt(4);
775 if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) {
776 // workaround for http://b/69011131
777 activationFunction = ANEURALNETWORKS_FUSED_NONE;
778 }
779 model.setOperandValue(operandIndex, activationFunction);
780 operationInputs[operationInputIndex] = operandIndex;
781 continue;
782 }
783 if (operationPattern.mMakeSpecialInput != nullptr) {
784 const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))(
785 problemSize, &model, operationInputIndex);
786 if (operandIndex >= 0) {
787 operationInputs[operationInputIndex] = operandIndex;
788 continue;
789 }
790 }
791 normalOperationInputIndexes.push_back(operationInputIndex);
792 }
793 CHECK(!normalOperationInputIndexes.empty());
794 signatures.insert(Signature(operationPattern.mOperationType, activationFunction));
795
796 // A (normal) operation input can be one of:
797 // - a new or existing model input
798 // - an output of an existing operation
799 // - an OperandValue
800 // - an OperandValueFromMemory
801 // Some guidelines:
802 // - We generally don't want all of an operation's inputs to be values (constants)
803 const unsigned normalOperationInputCount = normalOperationInputIndexes.size();
804 // How many of this operation's inputs are constants?
805 unsigned normalOperationInputConstantCount = 0;
806 // How many of this operation's inputs are model inputs?
807 unsigned normalOperationInputModelInputCount = 0;
808 // We begin by deciding what kind of input each (normal) operation will be; we don't
809 // actually pick input operand indexes at this time, because we might override this
810 // decision later.
811 enum InputKind { IK_MODEL_INPUT, IK_OPERATION_OUTPUT, IK_VALUE };
812 std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount);
813 std::generate(normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
814 [this, &model,
815 numOperations,
816 normalOperationInputCount,
817 &normalOperationInputConstantCount,
818 &normalOperationInputModelInputCount]() -> InputKind {
819 // Constant? Becomes less likely the more
820 // constants we already have as inputs to
821 // this operation.
822 if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) /
823 normalOperationInputCount)) {
824 normalOperationInputConstantCount++;
825 return IK_VALUE;
826 }
827
828 // Model input? Becomes less likely the
829 // more model inputs we already have as
830 // inputs to this operation, and the further
831 // along we are in generating this model
832 // (i.e., the more operations we have
833 // generated).
834 if ((model.operationCount() == 0) ||
835 (randFrac() < 0.5 *
836 (1 - double(normalOperationInputModelInputCount) /
837 normalOperationInputCount) *
838 std::min(0.3, (1 - double(model.operationCount()) /
839 numOperations)))) {
840 normalOperationInputModelInputCount++;
841 return IK_MODEL_INPUT;
842 }
843
844 // Else output of an existing operation.
845 return IK_OPERATION_OUTPUT;
846 });
847
848 // Now force common root or model input, if necessary. (A
849 // model must have at least one input.)
850 auto force =
851 [this, &normalOperationInputKinds, normalOperationInputCount](InputKind forceKind){
852 if (std::none_of(normalOperationInputKinds.begin(),
853 normalOperationInputKinds.end(),
854 [forceKind](InputKind kind){ return kind == forceKind; })) {
855 normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind;
856 }
857 };
858 if (forceCommonRoot && (model.operationCount() != 0)) {
859 force(IK_OPERATION_OUTPUT);
860 }
861 if (modelInputs.empty()) {
862 CHECK(model.operationCount() == 0);
863 force(IK_MODEL_INPUT);
864 }
865
866 // Finally create the normal inputs.
867 bool isRootOperation = true;
868 for (unsigned i = 0; i < normalOperationInputCount; i++) {
869 uint32_t operandIndex = ~0U;
870 switch (normalOperationInputKinds[i]) {
871 case IK_MODEL_INPUT: {
872 if (!modelInputs.empty() && (randFrac() < 0.5)) {
873 operandIndex = modelInputs[randUInt(modelInputs.size())];
874 } else {
875 operandIndex = model.addOperand(&problemType);
876 modelInputs.push_back(operandIndex);
877 }
878 break;
879 }
880 case IK_OPERATION_OUTPUT: {
881 decltype(deadOperands.begin()) deadOperandI;
882 if (!deadOperands.empty() && (randFrac() < 0.5)) {
883 deadOperandI = deadOperands.begin();
884 std::advance(deadOperandI, randUInt(deadOperands.size()));
885 operandIndex = deadOperandI->first;
886 } else {
887 const uint32_t existingOperationIndex = randUInt(model.operationCount());
888 const auto& existingOperationOutputs =
889 model.getOperationOutputs(existingOperationIndex);
890 operandIndex =
891 existingOperationOutputs[randUInt(existingOperationOutputs.size())];
892 deadOperandI = deadOperands.find(operandIndex);
893 CHECK(deadOperandI == deadOperands.end() ||
894 deadOperandI->second == existingOperationIndex);
895 }
896 if (deadOperandI != deadOperands.end()) {
897 const uint32_t correspondingOperation = deadOperandI->second;
898 deadOperands.erase(deadOperandI);
899
900 auto deadOperationI = deadOperations.find(correspondingOperation);
901 if (deadOperationI != deadOperations.end()) {
902 deadOperations.erase(deadOperationI);
903 }
904 }
905 isRootOperation = false;
906 break;
907 }
908 case IK_VALUE: {
909 if (!valueOperands.empty() && (randFrac() < 0.25)) {
910 operandIndex = valueOperands[randUInt(valueOperands.size())].first;
911 } else {
912 operandIndex = model.addOperand(&problemType);
913 if (randFrac() < 0.5) {
914 std::vector<float> value(problemSize * problemSize);
915 std::generate(value.begin(), value.end(), [this]{ return randFrac(); });
916 model.setOperandValue(operandIndex, value);
917 valueOperands.push_back(std::make_pair(operandIndex, ~0U));
918 } else {
919 unsigned memoryIndex = ~0U;
920 if ((weights.memoryCount() != 0) &&
921 (kAllWeightsInOnePool || (randFrac() < 0.5))) {
922 memoryIndex = randUInt(weights.memoryCount());
923 } else {
924 memoryIndex = weights.addMemory();
925 }
926 const size_t length = problemSize * problemSize * sizeof(float);
927 const unsigned regionIndex = weights.addRegion(memoryIndex, length);
928 valueOperands.push_back(std::make_pair(operandIndex, regionIndex));
929 }
930 }
931 break;
932 }
933 default:
934 FAIL();
935 }
936 operationInputs[normalOperationInputIndexes[i]] = operandIndex;
937 }
938 if (isRootOperation) {
939 rootOperationCount++;
940 }
941
942 // OUTPUTS /////////////////////////////////////////////////////////////////////////////////
943
944 std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
945 std::generate(operationOutputs.begin(), operationOutputs.end(),
946 [&model, &problemType, &unknownDimensionsType, &hasUnknownDimensions,
947 allowUnknownDimensions, this]{
948 // 3% unknowns causes ~35% of partitionings to fail
949 // (determined by commenting out the fallback code,
950 // running tests and noting number of failures).
951 if (allowUnknownDimensions && randFrac() < 0.03) {
952 hasUnknownDimensions = true;
953 return model.addOperand(&unknownDimensionsType);
954 } else {
955 return model.addOperand(&problemType);
956 }
957 });
958
959 // OPERATION ///////////////////////////////////////////////////////////////////////////////
960
961 const uint32_t operationIndex =
962 model.addOperation(operationPattern.mOperationType,
963 operationInputs, operationOutputs);
964 deadOperations.insert(operationIndex);
965 std::for_each(operationOutputs.begin(), operationOutputs.end(),
966 [&deadOperands, operationIndex](uint32_t operandIndex) {
967 deadOperands.insert(std::make_pair(operandIndex, operationIndex));
968 });
969 }
970
971 // Now finalize the weights.
972 weights.layout();
973 for (const auto& valueOperand : valueOperands) {
974 const uint32_t operandIndex = valueOperand.first;
975 const unsigned regionIndex = valueOperand.second;
976
977 if (regionIndex == ~0U) {
978 continue;
979 }
980
981 const WrapperMemory* memory;
982 uint32_t offset, length;
983 float* region =
984 static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length));
985 CHECK(length == problemSize * problemSize * sizeof(float));
986 std::generate(region, region + problemSize * problemSize, [this]{ return randFrac(); });
987 model.setOperandValueFromMemory(operandIndex, memory, offset, length);
988 }
989
990 // Now select model outputs.
991 for (uint32_t operationIdx = 0, operationCount = model.operationCount();
992 operationIdx < operationCount; operationIdx++) {
993 const auto& outputs = model.getOperationOutputs(operationIdx);
994 for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount;
995 outputIdx++) {
996 bool modelOutput = false;
997 const uint32_t operandIndex = outputs[outputIdx];
998 const auto deadOperandI = deadOperands.find(operandIndex);
999 if (deadOperandI != deadOperands.end()) {
1000 // This is not consumed within the model, so unless we
1001 // make it an output of the model, it's dead. The
1002 // further along we are in generating this model
1003 // (i.e., the more operations we have generated), the
1004 // more likely we are to classify this operation
1005 // output as a model output.
1006 const double probabilityOfModelOutput =
1007 0.50 * [](double x){ return x*x; }((operationIdx + 1) / operationCount);
1008 modelOutput = (randFrac() < probabilityOfModelOutput);
1009 } else {
1010 // This is consumed within the model, so we'll rarely
1011 // make it an output of the model.
1012 modelOutput = (randFrac() < 0.05);
1013 }
1014 if (!modelOutput) {
1015 continue;
1016 }
1017 modelOutputs.push_back(operandIndex);
1018 if (deadOperandI != deadOperands.end()) {
1019 deadOperands.erase(deadOperandI);
1020 const auto deadOperationI = deadOperations.find(operationIdx);
1021 if (deadOperationI != deadOperations.end()) {
1022 deadOperations.erase(deadOperationI);
1023 }
1024 }
1025 }
1026 }
1027 if (!allowDeadOperations) {
1028 // For each dead operation, pick a random output to become a model output.
1029 for (uint32_t deadOperationIndex : deadOperations) {
1030 const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex);
1031 const uint32_t deadOperandIndex =
1032 deadOperationOutputs[randUInt(deadOperationOutputs.size())];
1033 modelOutputs.push_back(deadOperandIndex);
1034 }
1035 }
1036 // A model must have at least one output.
1037 if (modelOutputs.empty()) {
1038 const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount()));
1039 modelOutputs.push_back(outputs[randUInt(outputs.size())]);
1040 }
1041
1042 model.identifyInputsAndOutputs(modelInputs, modelOutputs);
1043 #ifdef VERBOSE
1044 {
1045 std::cout << "Original model: " << ModelStats(&model) << std::endl;
1046 std::cout << "rootOperationCount = " << rootOperationCount
1047 << ", deadOperations = ";
1048 if (allowDeadOperations) {
1049 std::cout << deadOperations.size();
1050 } else {
1051 std::cout << "forbidden (converted " << deadOperations.size() << ")";
1052 }
1053 std::cout << std::endl;
1054 }
1055 #endif
1056 ASSERT_EQ(model.finish(), Result::NO_ERROR);
1057
1058 // Non-partitioned compilation.
1059 TestCompilation c(&model);
1060 ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1061 ASSERT_EQ(c.finish(), Result::NO_ERROR);
1062
1063 // Create some drivers for partitioned compilation.
1064 CHECK(!signatures.empty());
1065 std::vector<std::set<Signature>> signaturesForDriver(signatures.size());
1066 // First assign each signature to a random driver (a driver is
1067 // just represented as an entry in the signaturesForDriver
1068 // vector).
1069 for (Signature signature : signatures) {
1070 signaturesForDriver[randUInt(signatures.size())].insert(signature);
1071 }
1072 // Now remove each entry that has no signatures.
1073 auto firstExtra =
1074 std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(),
1075 [](const std::set<Signature>& sigSet) { return sigSet.empty(); });
1076 if (firstExtra != signaturesForDriver.end()) {
1077 signaturesForDriver.erase(firstExtra, signaturesForDriver.end());
1078 }
1079 // Now actually create the drivers.
1080 std::vector<std::shared_ptr<Device>> devices;
1081 for (unsigned i = 0; i < signaturesForDriver.size(); i++) {
1082 const auto& signaturesForThisDriver = signaturesForDriver[i];
1083 // Minimum HAL version for this driver is highest minimum HAL version of
1084 // any operation supported by this driver.
1085 const HalVersion minHalVersion = getMinHalVersion(
1086 std::max_element(signaturesForThisDriver.begin(), signaturesForThisDriver.end(),
1087 [](const Signature& a, const Signature& b) {
1088 return getMinHalVersion(a.first) < getMinHalVersion(b.first);
1089 })
1090 ->first);
1091 const HalVersion actualHalVersion =
1092 static_cast<HalVersion>(static_cast<int32_t>(minHalVersion) +
1093 randUInt(static_cast<int32_t>(HalVersion::LATEST) -
1094 static_cast<int32_t>(minHalVersion) + 1));
1095 const std::string name =
1096 "TestDriver(" + std::to_string(i) + "){" + to_string(actualHalVersion) + "}";
1097 #ifdef VERBOSE
1098 std::cout << "Creating " + name + " for collection of signatures that requires HAL " +
1099 to_string(minHalVersion)
1100 << std::endl;
1101 #endif
1102 auto device = DeviceManager::forTest_makeDriverDevice(
1103 name, makeTestDriver(actualHalVersion, name.c_str(), signaturesForThisDriver));
1104 devices.push_back(device);
1105 }
1106 // CPU fallback device
1107 devices.push_back(DeviceManager::getCpuDevice());
1108
1109 // Partitioned compilation.
1110 // For test cases without unknown intermediate operand sizes we require the
1111 // partitioning to succeed without CPU fallback. With unknown sizes we
1112 // retry with a fallback if the non-fallback partitioning fails and require
1113 // the fallback to succeed.
1114 TestCompilation cNoFallback(&model, devices);
1115 TestCompilation cWithFallback(&model, devices);
1116 TestCompilation *c2 = nullptr;
1117 ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1118 Result::NO_ERROR);
1119 auto compilationResult = cNoFallback.finish();
1120 if (hasUnknownDimensions && compilationResult == Result::OP_FAILED &&
1121 cNoFallback.getExecutionPlan().forTest_hasSubModelOutputsOfUnknownSize()) {
1122 ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1123 Result::NO_ERROR);
1124 ASSERT_EQ(cWithFallback.finish(), Result::NO_ERROR);
1125 ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1126 ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1127 DeviceManager::getCpuDevice());
1128 c2 = &cWithFallback;
1129 } else {
1130 ASSERT_EQ(compilationResult, Result::NO_ERROR);
1131 c2 = &cNoFallback;
1132 }
1133
1134 #ifdef VERBOSE
1135 {
1136 std::cout << "signatures = " << signatures.size()
1137 << ", devices = " << devices.size() << std::endl;
1138 const ExecutionPlan& plan = c2->getExecutionPlan();
1139 switch (plan.forTest_getKind()) {
1140 case ExecutionPlan::Kind::SIMPLE:
1141 std::cout << "plan: simple" << std::endl;
1142 break;
1143 case ExecutionPlan::Kind::COMPOUND: {
1144 const auto& steps = plan.forTest_compoundGetSteps();
1145 std::set<const Device*> devicesInPlan;
1146 for (const auto& step : steps) {
1147 devicesInPlan.insert(step->getDevice().get());
1148 }
1149 std::cout << "plan: compound, " << steps.size() << " steps over "
1150 << devicesInPlan.size() << " devices" << std::endl;
1151 for (unsigned i = 0; i < steps.size(); i++) {
1152 std::cout << "Step " << i << ": " << ModelStats(steps[i]->getSubModel())
1153 << ", device = " << steps[i]->getDevice()->getName() << std::endl;
1154 }
1155 break;
1156 }
1157 default:
1158 std::cout << "Unexpected plan kind: "
1159 << static_cast<unsigned>(plan.forTest_getKind());
1160 break;
1161 }
1162 }
1163 #endif
1164
1165 // For execution:
1166 // - create master inputs (one long vector) and master output value
1167 // - master inputs will be copied to actual inputs before each
1168 // of the two executions
1169 // - master output will be used to fill actual outputs before each
1170 // of the two executions
1171 // - create actual inputs and outputs
1172 // - first execution (non-partitioned)
1173 // - initialize inputs and (to avoid unrelated oddities) outputs
1174 // - execute
1175 // - copy outputs to a save area (one long vector)
1176 // - second execution (partitioned)
1177 // - (to avoid unrelated oddities) initialize inputs and outputs
1178 // - execute
1179 // - compare outputs to save area
1180
1181 // If the runtime and drivers are working properly, execution
1182 // should not change the inputs. Nonetheless, we reinitialize the
1183 // inputs for each execution, so as to avoid unrelated problems
1184 // appearing to be problems related to unpartitioned execution
1185 // versus partitioned execution. Similarly, execution behavior
1186 // should not be dependent on the outputs; but we'll initialize the
1187 // outputs anyway.
1188 std::vector<float> masterInputs(problemSize * problemSize * model.inputCount());
1189 std::generate(masterInputs.begin(), masterInputs.end(), [this]{ return randFrac(); });
1190 #ifdef VERBOSE
1191 {
1192 std::cout << "flat inputs = ";
1193 dump(masterInputs.begin(), masterInputs.end());
1194 }
1195 #endif
1196 const float masterOutput = randFrac();
1197
1198 // Create the memory for the actual inputs and outputs.
1199 struct InputOutputDescriptor {
1200 enum Kind { INPUT, OUTPUT };
1201 Kind mKind;
1202
1203 // The input or output either resides in a local buffer
1204 // (mVector, in which case mMemoryRegion is ignored); or in a
1205 // shared memory region within a TestMemories instance
1206 // (mMemoryRegion, in which case mVector is ignored).
1207 enum Location { VECTOR, REGION };
1208 Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; }
1209
1210 std::vector<float> mVector;
1211 unsigned mMemoryRegion;
1212 };
1213 std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount());
1214 for (unsigned i = 0; i < ioDescriptors.size(); i++) {
1215 ioDescriptors[i].mKind = (i < model.inputCount()
1216 ? InputOutputDescriptor::INPUT
1217 : InputOutputDescriptor::OUTPUT);
1218 }
1219 // We randomly interleave inputs and outputs in creation
1220 // order, because when we we create memory regions in a
1221 // TestMemories instance, the order in which regions are
1222 // created within a single Memory is the order they'll be laid
1223 // out in that memory; and when we have inputs and outputs
1224 // within the same Memory, we want the possibility that
1225 // they'll be interleaved.
1226 std::shuffle(ioDescriptors.begin(), ioDescriptors.end(), mRandNumEng);
1227 TestMemories ioMemories;
1228 for (auto &desc : ioDescriptors) {
1229 if (randFrac() < 0.5) {
1230 desc.mVector.resize(problemSize * problemSize);
1231 } else {
1232 // TODO: common this with the way we create IK_VALUE inputs?
1233 unsigned memoryIndex = ~0U;
1234 if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) {
1235 memoryIndex = randUInt(ioMemories.memoryCount());
1236 } else {
1237 memoryIndex = ioMemories.addMemory();
1238 }
1239 const size_t length = problemSize * problemSize * sizeof(float);
1240 desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length);
1241 }
1242 }
1243 ioMemories.layout();
1244
1245 // Function to set up actual inputs and outputs (initializing them
1246 // and telling the WrapperExecution about them).
1247 auto prepareForExecution =
1248 [&model, &ioDescriptors, &ioMemories,
1249 &masterInputs, &masterOutput, problemSize, &problemType](WrapperExecution *e) {
1250 uint32_t inputIndex = 0, outputIndex = 0;
1251 for (auto &desc : ioDescriptors) {
1252 if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1253 if (desc.mKind == InputOutputDescriptor::INPUT) {
1254 const size_t inputOffset = inputIndex * problemSize * problemSize;
1255 std::copy(masterInputs.begin() + inputOffset,
1256 masterInputs.begin() + inputOffset + problemSize * problemSize,
1257 desc.mVector.begin());
1258 e->setInput(inputIndex++, desc.mVector.data(),
1259 desc.mVector.size() * sizeof(float));
1260 } else {
1261 std::fill(desc.mVector.begin(),
1262 desc.mVector.begin() + problemSize * problemSize,
1263 masterOutput);
1264 e->setOutput(outputIndex++, desc.mVector.data(),
1265 desc.mVector.size() * sizeof(float),
1266 &problemType.operandType);
1267 }
1268 } else {
1269 const WrapperMemory* memory;
1270 uint32_t offset, length;
1271 float* region =
1272 static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion,
1273 &memory, &offset, &length));
1274 CHECK(length == problemSize * problemSize * sizeof(float));
1275 if (desc.mKind == InputOutputDescriptor::INPUT) {
1276 const size_t inputOffset = inputIndex * problemSize * problemSize;
1277 std::copy(masterInputs.begin() + inputOffset,
1278 masterInputs.begin() + inputOffset + problemSize * problemSize,
1279 region);
1280 e->setInputFromMemory(inputIndex++, memory, offset, length);
1281 } else {
1282 std::fill(region,
1283 region + problemSize * problemSize,
1284 masterOutput);
1285 e->setOutputFromMemory(outputIndex++, memory, offset, length,
1286 &problemType.operandType);
1287 }
1288 }
1289 };
1290 CHECK(inputIndex == model.inputCount());
1291 CHECK(outputIndex == model.outputCount());
1292 };
1293
1294 // Non-partitioned execution.
1295 WrapperExecution e(&c);
1296 ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e));
1297 ASSERT_EQ(e.compute(), Result::NO_ERROR);
1298
1299 // Copy the outputs of the non-partitioned execution to a save area.
1300 std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount());
1301 {
1302 uint32_t outputIndex = 0;
1303 for (const auto& desc : ioDescriptors) {
1304 if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1305 continue;
1306 }
1307 const size_t outputOffset = outputIndex * problemSize * problemSize;
1308 if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1309 std::copy(desc.mVector.begin(),
1310 desc.mVector.end(),
1311 nonPartitionedOutputs.begin() + outputOffset);
1312 } else {
1313 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1314 std::copy(region,
1315 region + problemSize * problemSize,
1316 nonPartitionedOutputs.begin() + outputOffset);
1317 }
1318 #ifdef VERBOSE
1319 {
1320 std::cout << "nonpartitioned output[" << outputIndex << "] = ";
1321 dump(nonPartitionedOutputs.begin() + outputOffset,
1322 nonPartitionedOutputs.begin() + outputOffset + problemSize * problemSize);
1323 }
1324 #endif
1325 outputIndex++;
1326 }
1327 }
1328
1329 // Partitioned execution.
1330 WrapperExecution e2(c2);
1331 ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
1332 ASSERT_EQ(e2.compute(), Result::NO_ERROR);
1333
1334 // Compare the outputs of the partitioned execution to the save
1335 // area containing the outpus of the non-partitioned execution.
1336 {
1337 uint32_t outputIndex = 0;
1338 for (const auto& desc : ioDescriptors) {
1339 if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1340 continue;
1341 }
1342 SCOPED_TRACE(outputIndex);
1343 const size_t outputOffset = outputIndex * problemSize * problemSize;
1344 if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1345 #ifdef VERBOSE
1346 std::cout << " partitioned output[" << outputIndex << "] = ";
1347 dump(desc.mVector.begin(), desc.mVector.end());
1348 #endif
1349 ASSERT_TRUE(std::equal(desc.mVector.begin(),
1350 desc.mVector.end(),
1351 nonPartitionedOutputs.begin() + outputOffset));
1352 } else {
1353 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1354 #ifdef VERBOSE
1355 std::cout << "part output[" << outputIndex << "] = ";
1356 dump(region, region + problemSize * problemSize);
1357 #endif
1358 ASSERT_TRUE(std::equal(region,
1359 region + problemSize * problemSize,
1360 nonPartitionedOutputs.begin() + outputOffset));
1361 }
1362 outputIndex++;
1363 }
1364 }
1365 }
1366
1367 } // namespace
1368 } // namespace android
1369