1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #undef NDEBUG
18 
19 #include "CompilationBuilder.h"
20 #include "Manager.h"
21 #include "ModelBuilder.h"
22 #include "NeuralNetworks.h"
23 #include "SampleDriver.h"
24 #include "TestNeuralNetworksWrapper.h"
25 #include "Utils.h"
26 #include "ValidateHal.h"
27 
28 #include <algorithm>
29 #include <cassert>
30 #include <cstdio>
31 #include <iterator>
32 #include <random>
33 #include <set>
34 #include <tuple>
35 #include <utility>
36 #include <vector>
37 
38 #include <unistd.h>
39 
40 #include <android-base/logging.h>
41 #include <android/sharedmem.h>
42 #include <gtest/gtest.h>
43 
44 // Uncomment the following line to generate some debugging output that
45 // may be useful when analyzing failures:
46 //
47 // #define VERBOSE VERBOSE
48 
49 // We randomly generate tests (model + input data) at runtime, and verify
50 // that we get the same results whether we do partitioned compilation/execution
51 // or non partitioned compilation/execution.  We perform a test as follows:
52 //
53 // (1) Randomly generate a model (graph and weights), randomly generate input
54 //     data, randomly assign inputs and outputs to CPU memory or to shared
55 //     memory.
56 //
57 //     Randomly leaves dimensions unset for intermediate operands.
58 //
59 // (2) Randomly generate drivers based on the sample driver, each of which
60 //     executes models on the CPU.  They differ according to which operations
61 //     they support.
62 //
63 // (3) Compile and execute without partitioning, saving off the results.
64 //
65 // (4) Compile and execute with partitioning.
66 //
67 // (5) Verify that the saved results from (3) match the results from (4).
68 //
69 // For simplicity, all data (model inputs, model outputs, weights,
70 // temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two
71 // dimensions are fixed throughout a particular test case (and
72 // randomly determined).  This prevents us from having to find a
73 // mechanism to "resize" data (e.g., if ADD#a operates on data of size
74 // 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a
75 // and ADD#b become inputs of ADD#c, do we need to insert one or more
76 // operations between (say) ADD#a and ADD#c to convert ADD#2's data
77 // from size 2x2 to size 3x3 in order to match ADD#b).  In the few
78 // cases where an operand cannot be of this type, it is a constant
79 // (e.g., activation functions and RNN bias).
80 //
81 // Each operation we generate has a signature (described in more
82 // detail later).  The randomly generated drivers decide which
83 // operations they can execute by checking operation signatures.  Once
84 // we have built the model and know the set of signatures, we randomly
85 // assign each signature to a driver.  No signature is supported by
86 // multiple drivers -- we're not testing the logic that the
87 // partitioning algorithm uses to select the best driver for an
88 // operation.
89 
90 namespace android {
91 
92 using CompilationBuilder = nn::CompilationBuilder;
93 using Device = nn::Device;
94 using DeviceManager = nn::DeviceManager;
95 using ExecutionPlan = nn::ExecutionPlan;
96 using HalVersion = nn::HalVersion;
97 using HidlModel = hardware::neuralnetworks::V1_2::Model;
98 using HidlToken =
99         ::android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
100 using MemoryBuilder = nn::Memory;
101 using ModelBuilder = nn::ModelBuilder;
102 using Result = nn::test_wrapper::Result;
103 using SampleDriver = nn::sample_driver::SampleDriver;
104 using WrapperCompilation = nn::test_wrapper::Compilation;
105 using WrapperExecution = nn::test_wrapper::Execution;
106 using WrapperMemory = nn::test_wrapper::Memory;
107 using WrapperModel = nn::test_wrapper::Model;
108 using WrapperOperandType = nn::test_wrapper::OperandType;
109 using WrapperType = nn::test_wrapper::Type;
110 
111 namespace {
112 
113 /// Configure test size //////////////////////////////////////////////////////////
114 
115 // We may exceed this in order to connect otherwise disjoint subgraphs.
116 static const unsigned kMaxNumOperations = 100;
117 
118 // We build models to process 2-D square tensors up to this size in each dimension;
119 // note that the API promotes by-value weights larger than 128 to by-reference,
120 // so we want to ensure that we can pick both types that exceed and types that do
121 // not exceed this size.
122 static const unsigned kMaxProblemSize = 8;
123 
124 // First seed for pseudorandom test generation.
125 static const unsigned kFirstSeed = 0;
126 
127 // Number of test cases.
128 static const unsigned kNumTestCases = 225;
129 
130 // Force all graph weights into a single pool (as we recommend to users)
131 // or allow them to be distributed across multiple pools (more stress
132 // on the partitioning algorithm and the rest of the runtime)?
133 // Forcing all graph weights into a single pool may be necessary to
134 // prevent large graphs from running up against http://b/70302693
135 // "NNAPI overuses (?) fds".
136 static const bool kAllWeightsInOnePool = false;
137 
138 //////////////////////////////////////////////////////////////////////////////////
139 
140 // The signature of an operation consists of the operation type (e.g.,
141 // ADD) and the activation function (use -1 in the case of an
142 // operation type for which the activation function is inapplicable).
143 typedef std::pair<ANeuralNetworksOperationType, int> Signature;
144 
145 // This class adds some simple utilities on top of WrapperModel.  For example,
146 // it provides access to certain features from ModelBuilder that are not exposed
147 // by the base class (such as inputCount() and operation index).
148 class TestModel : public WrapperModel {
149 public:
150 
addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)151     uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
152                           const std::vector<uint32_t>& outputs) {
153         const uint32_t operationIndex = operationCount();
154         mOperations.push_back(outputs);
155         WrapperModel::addOperation(type, inputs, outputs);
156         return operationIndex;
157     }
158 
operationCount() const159     uint32_t operationCount() const {
160         return mOperations.size();
161     }
162 
inputCount() const163     uint32_t inputCount() const {
164         return builder()->inputCount();
165     }
outputCount() const166     uint32_t outputCount() const {
167         return builder()->outputCount();
168     }
169 
getOperationOutputs(uint32_t index) const170     const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const {
171         CHECK(index < mOperations.size());
172         return mOperations[index];
173     }
174 
175     // All values are immediately copied into the model (we need to do
176     // this ourselves in cases where the underlying NNAPI does not).
setOperandValue(uint32_t index,const std::vector<float> & value)177     void setOperandValue(uint32_t index, const std::vector<float>& value) {
178         const size_t length = value.size() * sizeof(float);
179 
180         if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
181             WrapperModel::setOperandValue(index, value.data(), length);
182         } else {
183             mOperandValues.push_back(value);
184             WrapperModel::setOperandValue(index, mOperandValues.back().data(), length);
185         }
186     }
187 
setOperandValue(uint32_t index,const std::vector<int32_t> & value)188     void setOperandValue(uint32_t index, const std::vector<int32_t>& value) {
189         const size_t length = value.size() * sizeof(int32_t);
190 
191         CHECK(length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
192         WrapperModel::setOperandValue(index, value.data(), length);
193     }
194 
setOperandValue(uint32_t index,int32_t value)195     void setOperandValue(uint32_t index, int32_t value) {
196         CHECK(sizeof(value) <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
197         WrapperModel::setOperandValue(index, &value, sizeof(value));
198     }
199 
200 private:
201 
builder() const202     const ModelBuilder* builder() const {
203         return reinterpret_cast<const ModelBuilder*>(getHandle());
204     }
205 
206     // Representation of operations: vector index is operation number,
207     // vector value is operation's output operands.
208     std::vector<std::vector<uint32_t>> mOperations;
209 
210     // Large operand values -- not immediately copied into the
211     // WrapperModel, so remembered here instead.
212     std::vector<std::vector<float>> mOperandValues;
213 };
214 
215 // This class adds some simple utilities on top of WrapperCompilation in order
216 // to provide access to certain features from CompilationBuilder that are not
217 // exposed by the base class.
218 class TestCompilation : public WrapperCompilation {
219 public:
TestCompilation(const WrapperModel * model)220     TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {}
221 
TestCompilation(const WrapperModel * model,std::vector<std::shared_ptr<Device>> devices)222     TestCompilation(const WrapperModel* model, std::vector<std::shared_ptr<Device>> devices) {
223         ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
224         CompilationBuilder* c = nullptr;
225         int result = m->createCompilation(&c, devices);
226         EXPECT_EQ(result, 0);
227         mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
228     }
229 
230     using WrapperCompilation::finish;
231 
setPartitioning(uint32_t partitioning)232     Result setPartitioning(uint32_t partitioning) {
233         return static_cast<Result>(builder()->setPartitioning(partitioning));
234     }
235 
getExecutionPlan() const236     const ExecutionPlan& getExecutionPlan() const {
237         return builder()->forTest_getExecutionPlan();
238     }
239 
240 private:
builder() const241     const CompilationBuilder* builder() const {
242         return reinterpret_cast<const CompilationBuilder*>(getHandle());
243     }
builder()244     CompilationBuilder* builder() {
245         return reinterpret_cast<CompilationBuilder*>(getHandle());
246     }
247 };
248 
249 // This class is used to manage a collection of memory regions,
250 // disjoint windows onto a set of Memory instances, each of which is
251 // associated with a single shared memory region.  Each region and
252 // Memory instance is assigned a number.  The usage pattern is as
253 // follows:
254 // - Call addMemory() and addRegion() as many times as needed to
255 //   declare (but not define) Memory instances and declare region
256 //   instances.
257 // - Call layout() to define the Memory instances.
258 // - Call getRegion() as many times as needed to get the details
259 //   of memory regions (such as address, or Memory/offset/length).
260 // The Memory instances created by layout() are owned by the
261 // TestMemories instance, and are destroyed when the TestMemories
262 // instance is destroyed.
263 class TestMemories {
264 public:
265     TestMemories() = default;
266     ~TestMemories();
267 
268     TestMemories(const TestMemories&) = delete;
269     TestMemories& operator=(const TestMemories&) = delete;
270 
addMemory()271     unsigned addMemory() {
272         CHECK(!mLayoutDone);
273         mMemorySizes.push_back(0);
274         return memoryCount() - 1;
275     }
memoryCount() const276     unsigned memoryCount() const {
277         return mMemorySizes.size();
278     }
279 
addRegion(unsigned memoryIndex,uint32_t length)280     unsigned addRegion(unsigned memoryIndex, uint32_t length) {
281         CHECK(!mLayoutDone);
282         CHECK(memoryIndex < memoryCount());
283         uint32_t& memorySize = mMemorySizes[memoryIndex];
284         auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length);
285         mRegions.push_back(desc);
286         memorySize += length;
287         return regionCount() - 1;
288     }
regionCount() const289     unsigned regionCount() const {
290         return mRegions.size();
291     }
292 
293     void layout();
294 
getRegion(unsigned regionIndex,const WrapperMemory ** pMemory,uint32_t * pOffset,uint32_t * pLength)295     void* getRegion(unsigned regionIndex,
296                     const WrapperMemory** pMemory, uint32_t* pOffset, uint32_t* pLength) {
297         CHECK(mLayoutDone);
298         CHECK(regionIndex < regionCount());
299         const auto& regionDescriptor = mRegions[regionIndex];
300         const WrapperMemory* memory = &mMemorys[std::get<0>(regionDescriptor)];
301         uint32_t offset = std::get<1>(regionDescriptor);
302         uint32_t length = std::get<2>(regionDescriptor);
303 
304         uint8_t* buffer;
305         if (reinterpret_cast<MemoryBuilder*>(memory->get())->getPointer(&buffer) !=
306             ANEURALNETWORKS_NO_ERROR) {
307             CHECK(0);
308         }
309 
310         if (pMemory) *pMemory = memory;
311         if (pOffset) *pOffset = offset;
312         if (pLength) *pLength = length;
313 
314         return buffer + offset;
315     }
316 
getRegion(unsigned regionIndex)317     void* getRegion(unsigned regionIndex) {
318         return getRegion(regionIndex, nullptr, nullptr, nullptr);
319     }
320 
321 private:
322     // Index is the memory index; value is the size of the memory
323     // (aggregate size of all regions in the memory).
324     std::vector<uint32_t> mMemorySizes;
325 
326     // Index is the memory index.
327     std::vector<WrapperMemory> mMemorys;
328     std::vector<int> mFDs;
329 
330     // Index is the region index; tuple represents memory index,
331     // region offset within memory, region length.
332     std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions;
333 
334     // For sanity checking.
335     bool mLayoutDone = false;
336 };
337 
layout()338 void TestMemories::layout() {
339     CHECK(!mLayoutDone);
340     for (uint32_t memorySize : mMemorySizes) {
341         const int fd = ASharedMemory_create(nullptr, memorySize);
342         CHECK(fd >= 0);
343         mMemorys.emplace_back(memorySize, PROT_READ | PROT_WRITE, fd, 0);
344         mFDs.push_back(fd);
345     }
346     mLayoutDone = true;
347 }
348 
~TestMemories()349 TestMemories::~TestMemories() {
350     for (int fd : mFDs) {
351         close(fd);
352     }
353 }
354 
355 class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> {
356 public:
RandomPartitioningTest()357     RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {}
358 
359     static Signature getSignature(const HidlModel& model, const Operation& operation);
360 
361 protected:
362  static V1_0::IDevice* makeTestDriver(HalVersion version, const char* name,
363                                       std::set<Signature> signatures);
364 
365  static HalVersion getMinHalVersion(ANeuralNetworksOperationType type);
366 
367  static std::string to_string(HalVersion version);
368 
randBool()369  bool randBool() { return randUInt(2) == 1; }
370 
randFrac()371  double randFrac() {  // [0.0, 1.0)
372      return mRandNumUnitDist(mRandNumEng);
373     }
374 
randUInt(unsigned limit)375     unsigned randUInt(unsigned limit) {  // [0, limit)
376         return unsigned(randFrac() * limit);
377     }
378 
379     // Represents an operation in which every input and output operand
380     // is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except:
381     // - One input operand may be an activation function.
382     // - Any number of input operands may be "special" in some other way
383     //   (and in this implementation, not produced by any other operation).
384     // We require that:
385     // - There be at least one input operand that is neither an
386     //    activation function nor "special".
387     struct OperationPattern {
388         HalVersion mMinHalVersion;
389         int mOperationType;
390         unsigned mNumInputs;
391         unsigned mNumOutputs;
392         int mActivationFunctionInputIndex;  // <0 if none
393 
394         // Returns operand index, or <0 if input is normal (must not
395         // be called for an activation function operand).  Function
396         // should have the following prototype:
397         //
398         //     int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex);
399         //
400         int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned);
401     };
402 
403     static const OperationPattern kOperationPatterns[];
404 
405     // See OperationPattern::mMakeSpecialInput.  This function is used to
406     // manufacture an RNN input operand that doesn't fit the general operand
407     // pattern known to the graph generator infrastructure.
makeRnnSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)408     int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) {
409         if (inputIndex != 3) {
410             return -1;
411         }
412 
413         // input operand 3 is bias, a 1-D tensor
414         const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, { problemSize });
415         const uint32_t operandIndex = model->addOperand(&biasType);
416         std::vector<float> biasValue(problemSize);
417         std::generate(biasValue.begin(), biasValue.end(),
418                       [this]{ return randFrac(); });
419         model->setOperandValue(operandIndex, biasValue);
420         return int(operandIndex);
421     }
422 
423     // See OperationPattern::mMakeSpecialInput.  This function is used to
424     // manufacture a TRANSPOSE input operand that doesn't fit the general operand
425     // pattern known to the graph generator infrastructure.
makeTransposeSpecialInput(unsigned,TestModel * model,unsigned inputIndex)426     int makeTransposeSpecialInput(unsigned /* problemSize */, TestModel* model,
427                                   unsigned inputIndex) {
428         if (inputIndex != 1) {
429             return -1;
430         }
431 
432         // input operand 1 is perm, a 1-D tensor
433         const WrapperOperandType permType(WrapperType::TENSOR_INT32, {2});
434         const uint32_t operandIndex = model->addOperand(&permType);
435         std::vector<int32_t> permValue = {1, 0};
436         model->setOperandValue(operandIndex, permValue);
437         return int(operandIndex);
438     }
439 
440 #ifdef VERBOSE
441     class ModelStats {
442     public:
ModelStats(const ModelBuilder * model)443         ModelStats(const ModelBuilder* model) :
444                 mBuilder(model) { }
ModelStats(const WrapperModel * model)445         ModelStats(const WrapperModel* model) :
446                 mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) { }
operator <<(std::ostream & out,const ModelStats & stats)447         friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) {
448             const uint32_t operandCount = stats.mBuilder->operandCount();
449             const uint32_t inputCount = stats.mBuilder->inputCount();
450             const uint32_t outputCount = stats.mBuilder->outputCount();
451             out << "operationCount = " << stats.mBuilder->operationCount()
452                 << ", operandCount = " << operandCount
453                 << ", inputCount = " << inputCount
454                 << " (" << (double(inputCount) / operandCount) << ")"
455                 << ", outputCount = " << outputCount
456                 << " (" << (double(outputCount) / operandCount) << ")";
457             return out;
458         }
459     private:
460         const ModelBuilder* mBuilder;
461     };
462 
463     template <typename T_iterator>
dump(T_iterator I,T_iterator E)464     static void dump(T_iterator I, T_iterator E) {
465         std::cout << "{";
466         for (; I != E; I++) {
467             std::cout << " " << *I;
468         }
469         std::cout << " }" << std::endl;
470     }
471 #endif
472 
473     std::mt19937 mRandNumEng;
474 
475    private:
476     std::uniform_real_distribution<double> mRandNumUnitDist;
477 };
478 
479 const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = {
480         {HalVersion::V1_0, ANEURALNETWORKS_ADD, 3, 1, 2, nullptr},
481         {HalVersion::V1_0, ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr},
482         {HalVersion::V1_0, ANEURALNETWORKS_MUL, 3, 1, 2, nullptr},
483         {HalVersion::V1_0, ANEURALNETWORKS_RNN, 6, 2, 5,
484          &RandomPartitioningTest::makeRnnSpecialInput},
485         {HalVersion::V1_0, ANEURALNETWORKS_TANH, 1, 1, -1, nullptr},
486 
487         {HalVersion::V1_1, ANEURALNETWORKS_SUB, 3, 1, 2, nullptr},
488         {HalVersion::V1_1, ANEURALNETWORKS_TRANSPOSE, 2, 1, -1,
489          &RandomPartitioningTest::makeTransposeSpecialInput},
490 
491         {HalVersion::V1_2, ANEURALNETWORKS_MAXIMUM, 2, 1, -1, nullptr},
492         {HalVersion::V1_2, ANEURALNETWORKS_NEG, 1, 1, -1, nullptr},
493         {HalVersion::V1_2, ANEURALNETWORKS_SIN, 1, 1, -1, nullptr},
494 };
495 
getMinHalVersion(ANeuralNetworksOperationType type)496 HalVersion RandomPartitioningTest::getMinHalVersion(ANeuralNetworksOperationType type) {
497     static const auto kOperationToVersion = [] {
498         std::map<ANeuralNetworksOperationType, HalVersion> result;
499         for (const auto& pattern : kOperationPatterns) {
500             result[pattern.mOperationType] = pattern.mMinHalVersion;
501         }
502         return result;
503     }();
504 
505     return kOperationToVersion.at(type);
506 }
507 
getSignature(const HidlModel & model,const Operation & operation)508 Signature RandomPartitioningTest::getSignature(const HidlModel& model, const Operation& operation) {
509     static const auto kOperationToActivation = [] {
510         std::map<ANeuralNetworksOperationType, int> result;
511         for (const auto& pattern : kOperationPatterns) {
512             result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex;
513         }
514         return result;
515     }();
516 
517     const ANeuralNetworksOperationType operationType =
518             static_cast<ANeuralNetworksOperationType>(operation.type);
519     const int activationFunctionInputIndex = kOperationToActivation.at(operationType);
520     if (activationFunctionInputIndex < 0) {
521         return Signature(operationType, -1);
522     }
523 
524     const Operand& operand = model.operands[operation.inputs[activationFunctionInputIndex]];
525     CHECK(operand.lifetime == OperandLifeTime::CONSTANT_COPY);
526     CHECK(operand.type == OperandType::INT32);
527     int32_t value;
528     memcpy(&value,
529            &model.operandValues[operand.location.offset],
530            operand.location.length);
531     return Signature(operationType, value);
532 }
533 
to_string(HalVersion version)534 std::string RandomPartitioningTest::to_string(HalVersion version) {
535     switch (version) {
536         case HalVersion::V1_0:
537             return "V1_0";
538         case HalVersion::V1_1:
539             return "V1_1";
540         case HalVersion::V1_2:
541             return "V1_2";
542         default:
543             return "V_UNKNOWN";
544     }
545 };
546 
547 class TestDriver : public SampleDriver {
548 public:
549     // Behaves like SampleDriver, except that it only supports
550     // operations with the specified signatures.
TestDriver(const char * name,std::set<Signature> signatures)551     TestDriver(const char* name, std::set<Signature> signatures) :
552             SampleDriver(name), mSignatures(std::move(signatures)) { }
553 
getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb)554     Return<void> getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb) override {
555         android::nn::initVLogMask();
556         const PerformanceInfo kPerf = {.execTime = 0.75f, .powerUsage = 0.75f};
557         Capabilities capabilities = {
558                 .relaxedFloat32toFloat16PerformanceScalar = kPerf,
559                 .relaxedFloat32toFloat16PerformanceTensor = kPerf,
560                 .operandPerformance = nn::nonExtensionOperandPerformance(kPerf)};
561         _hidl_cb(ErrorStatus::NONE, capabilities);
562         return Void();
563     }
564 
getSupportedOperations_1_2(const HidlModel & model,getSupportedOperations_cb cb)565     Return<void> getSupportedOperations_1_2(const HidlModel& model,
566                                             getSupportedOperations_cb cb) override {
567         if (nn::validateModel(model)) {
568             const size_t count = model.operations.size();
569             std::vector<bool> supported(count);
570             for (size_t i = 0; i < count; i++) {
571                 supported[i] =
572                     (mSignatures.count(
573                         RandomPartitioningTest::getSignature(
574                             model,
575                             model.operations[i])) != 0);
576             }
577             cb(ErrorStatus::NONE, supported);
578         } else {
579             std::vector<bool> supported;
580             cb(ErrorStatus::INVALID_ARGUMENT, supported);
581         }
582         return Void();
583     }
584 
prepareModel_1_2(const HidlModel & model,ExecutionPreference preference,const hidl_vec<hidl_handle> & modelCache,const hidl_vec<hidl_handle> & dataCache,const HidlToken & token,const sp<IPreparedModelCallback> & callback)585     Return<ErrorStatus> prepareModel_1_2(const HidlModel& model, ExecutionPreference preference,
586                                          const hidl_vec<hidl_handle>& modelCache,
587                                          const hidl_vec<hidl_handle>& dataCache,
588                                          const HidlToken& token,
589                                          const sp<IPreparedModelCallback>& callback) override {
590         // NOTE: We verify that all operations in the model are supported.
591         ErrorStatus outStatus = ErrorStatus::INVALID_ARGUMENT;
592         auto ret = getSupportedOperations_1_2(
593             model,
594             [&outStatus](ErrorStatus inStatus, const hidl_vec<bool>& supportedOperations) {
595                 if (inStatus == ErrorStatus::NONE) {
596                     if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
597                                     [](bool v){ return v; })) {
598                         outStatus = ErrorStatus::NONE;
599                     }
600                 }
601             });
602         if (ret.isOk() && (outStatus == ErrorStatus::NONE)) {
603             return SampleDriver::prepareModel_1_2(model, preference, modelCache, dataCache, token,
604                                                   callback);
605         } else {
606             callback->notify_1_2(ErrorStatus::INVALID_ARGUMENT, nullptr);
607             return ErrorStatus::INVALID_ARGUMENT;
608         }
609     }
610 
611 private:
612     const std::set<Signature> mSignatures;
613 };
614 
615 // Like TestDriver, but implementing 1.1
616 class TestDriverV1_1 : public V1_1::IDevice {
617    public:
TestDriverV1_1(const char * name,std::set<Signature> signatures)618     TestDriverV1_1(const char* name, std::set<Signature> signatures)
619         : mDriverV1_2(new TestDriver(name, std::move(signatures))) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)620     Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
621         return mDriverV1_2->getCapabilities_1_1(_hidl_cb);
622     }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)623     Return<void> getSupportedOperations_1_1(const V1_1::Model& model,
624                                             getSupportedOperations_1_1_cb _hidl_cb) override {
625         return mDriverV1_2->getSupportedOperations_1_1(model, _hidl_cb);
626     }
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)627     Return<ErrorStatus> prepareModel_1_1(
628             const V1_1::Model& model, ExecutionPreference preference,
629             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
630         return mDriverV1_2->prepareModel_1_1(model, preference, actualCallback);
631     }
getStatus()632     Return<DeviceStatus> getStatus() override { return mDriverV1_2->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)633     Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
634         return mDriverV1_2->getCapabilities(_hidl_cb);
635     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)636     Return<void> getSupportedOperations(const V1_0::Model& model,
637                                         getSupportedOperations_cb _hidl_cb) override {
638         return mDriverV1_2->getSupportedOperations(model, _hidl_cb);
639     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)640     Return<ErrorStatus> prepareModel(
641             const V1_0::Model& model,
642             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
643         return mDriverV1_2->prepareModel(model, actualCallback);
644     }
645 
646    private:
647     const sp<V1_2::IDevice> mDriverV1_2;
648 };
649 
650 // Like TestDriver, but implementing 1.0
651 class TestDriverV1_0 : public V1_0::IDevice {
652    public:
TestDriverV1_0(const char * name,std::set<Signature> signatures)653     TestDriverV1_0(const char* name, std::set<Signature> signatures)
654         : mDriverV1_2(new TestDriver(name, std::move(signatures))) {}
getCapabilities(getCapabilities_cb _hidl_cb)655     Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
656         return mDriverV1_2->getCapabilities(_hidl_cb);
657     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)658     Return<void> getSupportedOperations(const V1_0::Model& model,
659                                         getSupportedOperations_cb _hidl_cb) override {
660         return mDriverV1_2->getSupportedOperations(model, _hidl_cb);
661     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)662     Return<ErrorStatus> prepareModel(
663             const V1_0::Model& model,
664             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
665         return mDriverV1_2->prepareModel(model, actualCallback);
666     }
getStatus()667     Return<DeviceStatus> getStatus() override { return mDriverV1_2->getStatus(); }
668 
669    private:
670     const sp<V1_2::IDevice> mDriverV1_2;
671 };
672 
makeTestDriver(HalVersion version,const char * name,std::set<Signature> signatures)673 V1_0::IDevice* RandomPartitioningTest::makeTestDriver(HalVersion version, const char* name,
674                                                       std::set<Signature> signatures) {
675     switch (version) {
676         case HalVersion::V1_0:
677             return new TestDriverV1_0(name, std::move(signatures));
678         case HalVersion::V1_1:
679             return new TestDriverV1_1(name, std::move(signatures));
680         case HalVersion::V1_2:
681             return new TestDriver(name, std::move(signatures));
682         default:
683             ADD_FAILURE() << "Unexpected HalVersion " << static_cast<int32_t>(version);
684             return nullptr;
685     }
686 }
687 
688 INSTANTIATE_TEST_CASE_P(Seed, RandomPartitioningTest,
689                         ::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases));
690 
TEST_P(RandomPartitioningTest,Test)691 TEST_P(RandomPartitioningTest, Test) {
692     LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam();
693 
694 #ifdef VERBOSE
695     std::cout << std::setprecision(2) << std::fixed << std::setw(4);
696 #endif
697 
698     const unsigned problemSize = 1+randUInt(kMaxProblemSize);
699     const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, { problemSize, problemSize });
700     const WrapperOperandType unknownDimensionsType(WrapperType::TENSOR_FLOAT32, { 0, 0 });
701 
702     static const WrapperOperandType activationFunctionType(WrapperType::INT32, { });
703 
704     const unsigned numOperations = 2+randUInt(kMaxNumOperations-1);
705     const bool allowDeadOperations = (randFrac() < 0.2);
706     const bool allowUnknownDimensions = (randFrac() < 0.25);
707 
708     // TODO: The current algorithm builds the graph in a forward
709     // direction (i.e., later-generated operations consume outputs
710     // from earlier-generated operations).  In order to get more
711     // variation in graph topology, perhaps we should also create an
712     // algorithm to build the graph in a backward direction (i.e.,
713     // later-generated operations produce outputs to be consumed by
714     // earlier-generated operations).
715     [[maybe_unused]] const bool buildForward = randBool();
716 
717     // TODO: Add a form of forced connectivity that operates by
718     // joining disjoint subgraphs rather than by forcing a root.
719     const bool forceCommonRoot = (randFrac() < 0.75);
720 
721     TestModel model;
722     std::vector<uint32_t> modelInputs;
723     std::vector<uint32_t> modelOutputs;
724 
725     // Each region in weights is a problem-sized 2-D TENSOR_FLOAT32.
726     TestMemories weights;
727 
728     // Keep track of all normal (i.e., not activation function and not
729     // "special") operands that are values (from setOperandValue*()).
730     // .first: operand index
731     // .second: if the operand is already defined (via setOperandValue*()) then ~0U;
732     //          otherwise, the operand has yet to be defined, and this is the corresponding
733     //          region index in "weights"
734     std::vector<std::pair<uint32_t, unsigned>> valueOperands;
735 
736     // An operand is "dead" if it is not consumed by another operation
737     // and is not a model output.  Key is operand index; value is
738     // operation index.
739     std::map<uint32_t, uint32_t> deadOperands;
740 
741     // An operation is "dead" if all of its outputs are dead.
742     std::set<uint32_t> deadOperations;
743 
744     // Collect the signatures of operations in this model.
745     std::set<Signature> signatures;
746 
747     // For reporting purposes, keep track of the number of root
748     // operations (those that do not consume results produced by other
749     // operations).
750     unsigned rootOperationCount = 0;
751 
752     // Track if we added operands with unknown dimensions. In this case,
753     // partitioned compilation will fail if such an operand is read in a
754     // different partition than it is written.
755     bool hasUnknownDimensions = false;
756 
757     // Generate operations.
758     for (unsigned i = 0; i < numOperations; i++) {
759         const unsigned operationPatternIndex = randUInt(std::size(kOperationPatterns));
760         const auto& operationPattern = kOperationPatterns[operationPatternIndex];
761 
762         // INPUTS //////////////////////////////////////////////////////////////////////////////////
763 
764         std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U);
765 
766         // First, process activation function and special inputs, and
767         // keep track of which inputs remain.
768         std::vector<uint32_t> normalOperationInputIndexes;
769         int32_t activationFunction = -1;
770         for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs;
771              operationInputIndex++) {
772             if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) {
773                 const uint32_t operandIndex = model.addOperand(&activationFunctionType);
774                 activationFunction = randUInt(4);
775                 if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) {
776                     // workaround for http://b/69011131
777                     activationFunction = ANEURALNETWORKS_FUSED_NONE;
778                 }
779                 model.setOperandValue(operandIndex, activationFunction);
780                 operationInputs[operationInputIndex] = operandIndex;
781                 continue;
782             }
783             if (operationPattern.mMakeSpecialInput != nullptr) {
784                 const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))(
785                     problemSize, &model, operationInputIndex);
786                 if (operandIndex >= 0) {
787                     operationInputs[operationInputIndex] = operandIndex;
788                     continue;
789                 }
790             }
791             normalOperationInputIndexes.push_back(operationInputIndex);
792         }
793         CHECK(!normalOperationInputIndexes.empty());
794         signatures.insert(Signature(operationPattern.mOperationType, activationFunction));
795 
796         // A (normal) operation input can be one of:
797         // - a new or existing model input
798         // - an output of an existing operation
799         // - an OperandValue
800         // - an OperandValueFromMemory
801         // Some guidelines:
802         // - We generally don't want all of an operation's inputs to be values (constants)
803         const unsigned normalOperationInputCount = normalOperationInputIndexes.size();
804         //     How many of this operation's inputs are constants?
805         unsigned normalOperationInputConstantCount = 0;
806         //     How many of this operation's inputs are model inputs?
807         unsigned normalOperationInputModelInputCount = 0;
808         // We begin by deciding what kind of input each (normal) operation will be; we don't
809         // actually pick input operand indexes at this time, because we might override this
810         // decision later.
811         enum InputKind { IK_MODEL_INPUT, IK_OPERATION_OUTPUT, IK_VALUE };
812         std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount);
813         std::generate(normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
814                       [this, &model,
815                        numOperations,
816                        normalOperationInputCount,
817                        &normalOperationInputConstantCount,
818                        &normalOperationInputModelInputCount]() -> InputKind {
819                           // Constant?  Becomes less likely the more
820                           // constants we already have as inputs to
821                           // this operation.
822                           if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) /
823                                                    normalOperationInputCount)) {
824                               normalOperationInputConstantCount++;
825                               return IK_VALUE;
826                           }
827 
828                           // Model input?  Becomes less likely the
829                           // more model inputs we already have as
830                           // inputs to this operation, and the further
831                           // along we are in generating this model
832                           // (i.e., the more operations we have
833                           // generated).
834                           if ((model.operationCount() == 0) ||
835                               (randFrac() < 0.5 *
836                                (1 - double(normalOperationInputModelInputCount) /
837                                 normalOperationInputCount) *
838                                std::min(0.3, (1 - double(model.operationCount()) /
839                                               numOperations)))) {
840                               normalOperationInputModelInputCount++;
841                               return IK_MODEL_INPUT;
842                           }
843 
844                           // Else output of an existing operation.
845                           return IK_OPERATION_OUTPUT;
846                       });
847 
848         // Now force common root or model input, if necessary.  (A
849         // model must have at least one input.)
850         auto force =
851                 [this, &normalOperationInputKinds, normalOperationInputCount](InputKind forceKind){
852             if (std::none_of(normalOperationInputKinds.begin(),
853                              normalOperationInputKinds.end(),
854                              [forceKind](InputKind kind){ return kind == forceKind; })) {
855                 normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind;
856             }
857         };
858         if (forceCommonRoot && (model.operationCount() != 0)) {
859             force(IK_OPERATION_OUTPUT);
860         }
861         if (modelInputs.empty()) {
862             CHECK(model.operationCount() == 0);
863             force(IK_MODEL_INPUT);
864         }
865 
866         // Finally create the normal inputs.
867         bool isRootOperation = true;
868         for (unsigned i = 0; i < normalOperationInputCount; i++) {
869             uint32_t operandIndex = ~0U;
870             switch (normalOperationInputKinds[i]) {
871                 case IK_MODEL_INPUT: {
872                     if (!modelInputs.empty() && (randFrac() < 0.5)) {
873                         operandIndex = modelInputs[randUInt(modelInputs.size())];
874                     } else {
875                         operandIndex = model.addOperand(&problemType);
876                         modelInputs.push_back(operandIndex);
877                     }
878                     break;
879                 }
880                 case IK_OPERATION_OUTPUT: {
881                     decltype(deadOperands.begin()) deadOperandI;
882                     if (!deadOperands.empty() && (randFrac() < 0.5)) {
883                         deadOperandI = deadOperands.begin();
884                         std::advance(deadOperandI, randUInt(deadOperands.size()));
885                         operandIndex = deadOperandI->first;
886                     } else {
887                         const uint32_t existingOperationIndex = randUInt(model.operationCount());
888                         const auto& existingOperationOutputs =
889                                 model.getOperationOutputs(existingOperationIndex);
890                         operandIndex =
891                             existingOperationOutputs[randUInt(existingOperationOutputs.size())];
892                         deadOperandI = deadOperands.find(operandIndex);
893                         CHECK(deadOperandI == deadOperands.end() ||
894                               deadOperandI->second == existingOperationIndex);
895                     }
896                     if (deadOperandI != deadOperands.end()) {
897                         const uint32_t correspondingOperation = deadOperandI->second;
898                         deadOperands.erase(deadOperandI);
899 
900                         auto deadOperationI = deadOperations.find(correspondingOperation);
901                         if (deadOperationI != deadOperations.end()) {
902                             deadOperations.erase(deadOperationI);
903                         }
904                     }
905                     isRootOperation = false;
906                     break;
907                 }
908                 case IK_VALUE: {
909                     if (!valueOperands.empty() && (randFrac() < 0.25)) {
910                         operandIndex = valueOperands[randUInt(valueOperands.size())].first;
911                     } else {
912                         operandIndex = model.addOperand(&problemType);
913                         if (randFrac() < 0.5) {
914                             std::vector<float> value(problemSize * problemSize);
915                             std::generate(value.begin(), value.end(), [this]{ return randFrac(); });
916                             model.setOperandValue(operandIndex, value);
917                             valueOperands.push_back(std::make_pair(operandIndex, ~0U));
918                         } else {
919                             unsigned memoryIndex = ~0U;
920                             if ((weights.memoryCount() != 0) &&
921                                 (kAllWeightsInOnePool || (randFrac() < 0.5))) {
922                                 memoryIndex = randUInt(weights.memoryCount());
923                             } else {
924                                 memoryIndex = weights.addMemory();
925                             }
926                             const size_t length = problemSize * problemSize * sizeof(float);
927                             const unsigned regionIndex = weights.addRegion(memoryIndex, length);
928                             valueOperands.push_back(std::make_pair(operandIndex, regionIndex));
929                         }
930                     }
931                     break;
932                 }
933                 default:
934                     FAIL();
935             }
936             operationInputs[normalOperationInputIndexes[i]] = operandIndex;
937         }
938         if (isRootOperation) {
939             rootOperationCount++;
940         }
941 
942         // OUTPUTS /////////////////////////////////////////////////////////////////////////////////
943 
944         std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
945         std::generate(operationOutputs.begin(), operationOutputs.end(),
946                       [&model, &problemType, &unknownDimensionsType, &hasUnknownDimensions,
947                        allowUnknownDimensions, this]{
948                           // 3% unknowns causes ~35% of partitionings to fail
949                           // (determined by commenting out the fallback code,
950                           // running tests and noting number of failures).
951                           if (allowUnknownDimensions && randFrac() < 0.03) {
952                               hasUnknownDimensions = true;
953                               return model.addOperand(&unknownDimensionsType);
954                           } else {
955                               return model.addOperand(&problemType);
956                           }
957                       });
958 
959         // OPERATION ///////////////////////////////////////////////////////////////////////////////
960 
961         const uint32_t operationIndex =
962                 model.addOperation(operationPattern.mOperationType,
963                                    operationInputs, operationOutputs);
964         deadOperations.insert(operationIndex);
965         std::for_each(operationOutputs.begin(), operationOutputs.end(),
966                       [&deadOperands, operationIndex](uint32_t operandIndex) {
967                           deadOperands.insert(std::make_pair(operandIndex, operationIndex));
968                       });
969     }
970 
971     // Now finalize the weights.
972     weights.layout();
973     for (const auto& valueOperand : valueOperands) {
974         const uint32_t operandIndex = valueOperand.first;
975         const unsigned regionIndex = valueOperand.second;
976 
977         if (regionIndex == ~0U) {
978             continue;
979         }
980 
981         const WrapperMemory* memory;
982         uint32_t offset, length;
983         float* region =
984                 static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length));
985         CHECK(length == problemSize * problemSize * sizeof(float));
986         std::generate(region, region + problemSize * problemSize, [this]{ return randFrac(); });
987         model.setOperandValueFromMemory(operandIndex, memory, offset, length);
988     }
989 
990     // Now select model outputs.
991     for (uint32_t operationIdx = 0, operationCount = model.operationCount();
992          operationIdx < operationCount; operationIdx++) {
993         const auto& outputs = model.getOperationOutputs(operationIdx);
994         for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount;
995              outputIdx++) {
996             bool modelOutput = false;
997             const uint32_t operandIndex = outputs[outputIdx];
998             const auto deadOperandI = deadOperands.find(operandIndex);
999             if (deadOperandI != deadOperands.end()) {
1000                 // This is not consumed within the model, so unless we
1001                 // make it an output of the model, it's dead.  The
1002                 // further along we are in generating this model
1003                 // (i.e., the more operations we have generated), the
1004                 // more likely we are to classify this operation
1005                 // output as a model output.
1006                 const double probabilityOfModelOutput =
1007                         0.50 * [](double x){ return x*x; }((operationIdx + 1) / operationCount);
1008                 modelOutput = (randFrac() < probabilityOfModelOutput);
1009             } else {
1010                 // This is consumed within the model, so we'll rarely
1011                 // make it an output of the model.
1012                 modelOutput = (randFrac() < 0.05);
1013             }
1014             if (!modelOutput) {
1015                 continue;
1016             }
1017             modelOutputs.push_back(operandIndex);
1018             if (deadOperandI != deadOperands.end()) {
1019                 deadOperands.erase(deadOperandI);
1020                 const auto deadOperationI = deadOperations.find(operationIdx);
1021                 if (deadOperationI != deadOperations.end()) {
1022                     deadOperations.erase(deadOperationI);
1023                 }
1024             }
1025         }
1026     }
1027     if (!allowDeadOperations) {
1028         // For each dead operation, pick a random output to become a model output.
1029         for (uint32_t deadOperationIndex : deadOperations) {
1030             const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex);
1031             const uint32_t deadOperandIndex =
1032                     deadOperationOutputs[randUInt(deadOperationOutputs.size())];
1033             modelOutputs.push_back(deadOperandIndex);
1034         }
1035     }
1036     // A model must have at least one output.
1037     if (modelOutputs.empty()) {
1038         const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount()));
1039         modelOutputs.push_back(outputs[randUInt(outputs.size())]);
1040     }
1041 
1042     model.identifyInputsAndOutputs(modelInputs, modelOutputs);
1043 #ifdef VERBOSE
1044     {
1045         std::cout << "Original model: " << ModelStats(&model) << std::endl;
1046         std::cout << "rootOperationCount = " << rootOperationCount
1047                   << ", deadOperations = ";
1048         if (allowDeadOperations) {
1049             std::cout << deadOperations.size();
1050         } else {
1051             std::cout << "forbidden (converted " << deadOperations.size() << ")";
1052         }
1053         std::cout << std::endl;
1054     }
1055 #endif
1056     ASSERT_EQ(model.finish(), Result::NO_ERROR);
1057 
1058     // Non-partitioned compilation.
1059     TestCompilation c(&model);
1060     ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1061     ASSERT_EQ(c.finish(), Result::NO_ERROR);
1062 
1063     // Create some drivers for partitioned compilation.
1064     CHECK(!signatures.empty());
1065     std::vector<std::set<Signature>> signaturesForDriver(signatures.size());
1066     //     First assign each signature to a random driver (a driver is
1067     //     just represented as an entry in the signaturesForDriver
1068     //     vector).
1069     for (Signature signature : signatures) {
1070         signaturesForDriver[randUInt(signatures.size())].insert(signature);
1071     }
1072     //     Now remove each entry that has no signatures.
1073     auto firstExtra =
1074         std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(),
1075                        [](const std::set<Signature>& sigSet) { return sigSet.empty(); });
1076     if (firstExtra != signaturesForDriver.end()) {
1077         signaturesForDriver.erase(firstExtra, signaturesForDriver.end());
1078     }
1079     //     Now actually create the drivers.
1080     std::vector<std::shared_ptr<Device>> devices;
1081     for (unsigned i = 0; i < signaturesForDriver.size(); i++) {
1082         const auto& signaturesForThisDriver = signaturesForDriver[i];
1083         // Minimum HAL version for this driver is highest minimum HAL version of
1084         // any operation supported by this driver.
1085         const HalVersion minHalVersion = getMinHalVersion(
1086                 std::max_element(signaturesForThisDriver.begin(), signaturesForThisDriver.end(),
1087                                  [](const Signature& a, const Signature& b) {
1088                                      return getMinHalVersion(a.first) < getMinHalVersion(b.first);
1089                                  })
1090                         ->first);
1091         const HalVersion actualHalVersion =
1092                 static_cast<HalVersion>(static_cast<int32_t>(minHalVersion) +
1093                                         randUInt(static_cast<int32_t>(HalVersion::LATEST) -
1094                                                  static_cast<int32_t>(minHalVersion) + 1));
1095         const std::string name =
1096                 "TestDriver(" + std::to_string(i) + "){" + to_string(actualHalVersion) + "}";
1097 #ifdef VERBOSE
1098         std::cout << "Creating " + name + " for collection of signatures that requires HAL " +
1099                              to_string(minHalVersion)
1100                   << std::endl;
1101 #endif
1102         auto device = DeviceManager::forTest_makeDriverDevice(
1103                 name, makeTestDriver(actualHalVersion, name.c_str(), signaturesForThisDriver));
1104         devices.push_back(device);
1105     }
1106     // CPU fallback device
1107     devices.push_back(DeviceManager::getCpuDevice());
1108 
1109     // Partitioned compilation.
1110     // For test cases without unknown intermediate operand sizes we require the
1111     // partitioning to succeed without CPU fallback. With unknown sizes we
1112     // retry with a fallback if the non-fallback partitioning fails and require
1113     // the fallback to succeed.
1114     TestCompilation cNoFallback(&model, devices);
1115     TestCompilation cWithFallback(&model, devices);
1116     TestCompilation *c2 = nullptr;
1117     ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1118               Result::NO_ERROR);
1119     auto compilationResult = cNoFallback.finish();
1120     if (hasUnknownDimensions && compilationResult == Result::OP_FAILED &&
1121         cNoFallback.getExecutionPlan().forTest_hasSubModelOutputsOfUnknownSize()) {
1122         ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1123                   Result::NO_ERROR);
1124         ASSERT_EQ(cWithFallback.finish(), Result::NO_ERROR);
1125         ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1126         ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1127                   DeviceManager::getCpuDevice());
1128         c2 = &cWithFallback;
1129     } else {
1130         ASSERT_EQ(compilationResult, Result::NO_ERROR);
1131         c2 = &cNoFallback;
1132     }
1133 
1134 #ifdef VERBOSE
1135     {
1136         std::cout << "signatures = " << signatures.size()
1137                   << ", devices = " << devices.size() << std::endl;
1138         const ExecutionPlan& plan = c2->getExecutionPlan();
1139         switch (plan.forTest_getKind()) {
1140             case ExecutionPlan::Kind::SIMPLE:
1141                 std::cout << "plan: simple" << std::endl;
1142                 break;
1143             case ExecutionPlan::Kind::COMPOUND: {
1144                 const auto& steps = plan.forTest_compoundGetSteps();
1145                 std::set<const Device*> devicesInPlan;
1146                 for (const auto& step : steps) {
1147                     devicesInPlan.insert(step->getDevice().get());
1148                 }
1149                 std::cout << "plan: compound, " << steps.size() << " steps over "
1150                           << devicesInPlan.size() << " devices" << std::endl;
1151                 for (unsigned i = 0; i < steps.size(); i++) {
1152                     std::cout << "Step " << i << ": " << ModelStats(steps[i]->getSubModel())
1153                               << ", device = " << steps[i]->getDevice()->getName() << std::endl;
1154                 }
1155                 break;
1156             }
1157             default:
1158                 std::cout << "Unexpected plan kind: "
1159                     << static_cast<unsigned>(plan.forTest_getKind());
1160                 break;
1161         }
1162     }
1163 #endif
1164 
1165     // For execution:
1166     // - create master inputs (one long vector) and master output value
1167     //   - master inputs will be copied to actual inputs before each
1168     //     of the two executions
1169     //   - master output will be used to fill actual outputs before each
1170     //     of the two executions
1171     // - create actual inputs and outputs
1172     // - first execution (non-partitioned)
1173     //   - initialize inputs and (to avoid unrelated oddities) outputs
1174     //   - execute
1175     //   - copy outputs to a save area (one long vector)
1176     // - second execution (partitioned)
1177     //   - (to avoid unrelated oddities) initialize inputs and outputs
1178     //   - execute
1179     //   - compare outputs to save area
1180 
1181     // If the runtime and drivers are working properly, execution
1182     // should not change the inputs.  Nonetheless, we reinitialize the
1183     // inputs for each execution, so as to avoid unrelated problems
1184     // appearing to be problems related to unpartitioned execution
1185     // versus partitioned execution.  Similarly, execution behavior
1186     // should not be dependent on the outputs; but we'll initialize the
1187     // outputs anyway.
1188     std::vector<float> masterInputs(problemSize * problemSize * model.inputCount());
1189     std::generate(masterInputs.begin(), masterInputs.end(), [this]{ return randFrac(); });
1190 #ifdef VERBOSE
1191     {
1192         std::cout << "flat inputs = ";
1193         dump(masterInputs.begin(), masterInputs.end());
1194     }
1195 #endif
1196     const float masterOutput = randFrac();
1197 
1198     // Create the memory for the actual inputs and outputs.
1199     struct InputOutputDescriptor {
1200         enum Kind { INPUT, OUTPUT };
1201         Kind mKind;
1202 
1203         // The input or output either resides in a local buffer
1204         // (mVector, in which case mMemoryRegion is ignored); or in a
1205         // shared memory region within a TestMemories instance
1206         // (mMemoryRegion, in which case mVector is ignored).
1207         enum Location { VECTOR, REGION };
1208         Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; }
1209 
1210         std::vector<float> mVector;
1211         unsigned mMemoryRegion;
1212     };
1213     std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount());
1214     for (unsigned i = 0; i < ioDescriptors.size(); i++) {
1215         ioDescriptors[i].mKind = (i < model.inputCount()
1216                                   ? InputOutputDescriptor::INPUT
1217                                   : InputOutputDescriptor::OUTPUT);
1218     }
1219     //     We randomly interleave inputs and outputs in creation
1220     //     order, because when we we create memory regions in a
1221     //     TestMemories instance, the order in which regions are
1222     //     created within a single Memory is the order they'll be laid
1223     //     out in that memory; and when we have inputs and outputs
1224     //     within the same Memory, we want the possibility that
1225     //     they'll be interleaved.
1226     std::shuffle(ioDescriptors.begin(), ioDescriptors.end(), mRandNumEng);
1227     TestMemories ioMemories;
1228     for (auto &desc : ioDescriptors) {
1229         if (randFrac() < 0.5) {
1230             desc.mVector.resize(problemSize * problemSize);
1231         } else {
1232             // TODO: common this with the way we create IK_VALUE inputs?
1233             unsigned memoryIndex = ~0U;
1234             if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) {
1235                 memoryIndex = randUInt(ioMemories.memoryCount());
1236             } else {
1237                 memoryIndex = ioMemories.addMemory();
1238             }
1239             const size_t length = problemSize * problemSize * sizeof(float);
1240             desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length);
1241         }
1242     }
1243     ioMemories.layout();
1244 
1245     // Function to set up actual inputs and outputs (initializing them
1246     // and telling the WrapperExecution about them).
1247     auto prepareForExecution =
1248             [&model, &ioDescriptors, &ioMemories,
1249              &masterInputs, &masterOutput, problemSize, &problemType](WrapperExecution *e) {
1250         uint32_t inputIndex = 0, outputIndex = 0;
1251         for (auto &desc : ioDescriptors) {
1252             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1253                 if (desc.mKind == InputOutputDescriptor::INPUT) {
1254                     const size_t inputOffset = inputIndex * problemSize * problemSize;
1255                     std::copy(masterInputs.begin() + inputOffset,
1256                               masterInputs.begin() + inputOffset + problemSize * problemSize,
1257                               desc.mVector.begin());
1258                     e->setInput(inputIndex++, desc.mVector.data(),
1259                                 desc.mVector.size() * sizeof(float));
1260                 } else {
1261                     std::fill(desc.mVector.begin(),
1262                               desc.mVector.begin() + problemSize * problemSize,
1263                               masterOutput);
1264                     e->setOutput(outputIndex++, desc.mVector.data(),
1265                                  desc.mVector.size() * sizeof(float),
1266                                  &problemType.operandType);
1267                 }
1268             } else {
1269                 const WrapperMemory* memory;
1270                 uint32_t offset, length;
1271                 float* region =
1272                         static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion,
1273                                                                  &memory, &offset, &length));
1274                 CHECK(length == problemSize * problemSize * sizeof(float));
1275                 if (desc.mKind == InputOutputDescriptor::INPUT) {
1276                     const size_t inputOffset = inputIndex * problemSize * problemSize;
1277                     std::copy(masterInputs.begin() + inputOffset,
1278                               masterInputs.begin() + inputOffset + problemSize * problemSize,
1279                               region);
1280                     e->setInputFromMemory(inputIndex++, memory, offset, length);
1281                 } else {
1282                     std::fill(region,
1283                               region + problemSize * problemSize,
1284                               masterOutput);
1285                     e->setOutputFromMemory(outputIndex++, memory, offset, length,
1286                                            &problemType.operandType);
1287                 }
1288             }
1289         };
1290         CHECK(inputIndex == model.inputCount());
1291         CHECK(outputIndex == model.outputCount());
1292     };
1293 
1294     // Non-partitioned execution.
1295     WrapperExecution e(&c);
1296     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e));
1297     ASSERT_EQ(e.compute(), Result::NO_ERROR);
1298 
1299     // Copy the outputs of the non-partitioned execution to a save area.
1300     std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount());
1301     {
1302         uint32_t outputIndex = 0;
1303         for (const auto& desc : ioDescriptors) {
1304             if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1305                 continue;
1306             }
1307             const size_t outputOffset = outputIndex * problemSize * problemSize;
1308             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1309                 std::copy(desc.mVector.begin(),
1310                           desc.mVector.end(),
1311                           nonPartitionedOutputs.begin() + outputOffset);
1312             } else {
1313                 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1314                 std::copy(region,
1315                           region + problemSize * problemSize,
1316                           nonPartitionedOutputs.begin() + outputOffset);
1317             }
1318 #ifdef VERBOSE
1319             {
1320                 std::cout << "nonpartitioned output[" << outputIndex << "] = ";
1321                 dump(nonPartitionedOutputs.begin() + outputOffset,
1322                      nonPartitionedOutputs.begin() + outputOffset + problemSize * problemSize);
1323             }
1324 #endif
1325             outputIndex++;
1326         }
1327     }
1328 
1329     // Partitioned execution.
1330     WrapperExecution e2(c2);
1331     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
1332     ASSERT_EQ(e2.compute(), Result::NO_ERROR);
1333 
1334     // Compare the outputs of the partitioned execution to the save
1335     // area containing the outpus of the non-partitioned execution.
1336     {
1337         uint32_t outputIndex = 0;
1338         for (const auto& desc : ioDescriptors) {
1339             if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1340                 continue;
1341             }
1342             SCOPED_TRACE(outputIndex);
1343             const size_t outputOffset = outputIndex * problemSize * problemSize;
1344             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1345 #ifdef VERBOSE
1346                 std::cout << "   partitioned output[" << outputIndex << "] = ";
1347                 dump(desc.mVector.begin(), desc.mVector.end());
1348 #endif
1349                 ASSERT_TRUE(std::equal(desc.mVector.begin(),
1350                                        desc.mVector.end(),
1351                                        nonPartitionedOutputs.begin() + outputOffset));
1352             } else {
1353                 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1354 #ifdef VERBOSE
1355                 std::cout << "part output[" << outputIndex << "] = ";
1356                 dump(region, region + problemSize * problemSize);
1357 #endif
1358                 ASSERT_TRUE(std::equal(region,
1359                                        region + problemSize * problemSize,
1360                                        nonPartitionedOutputs.begin() + outputOffset));
1361             }
1362             outputIndex++;
1363         }
1364     }
1365 }
1366 
1367 }  // namespace
1368 }  // namespace android
1369