1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <android-base/logging.h>
18 #include <gtest/gtest.h>
19 #include <unistd.h>
20 
21 #include <algorithm>
22 #include <cassert>
23 #include <cstdio>
24 #include <iterator>
25 #include <map>
26 #include <memory>
27 #include <random>
28 #include <set>
29 #include <string>
30 #include <tuple>
31 #include <utility>
32 #include <vector>
33 
34 #include "CompilationBuilder.h"
35 #include "HalInterfaces.h"
36 #include "Manager.h"
37 #include "ModelBuilder.h"
38 #include "NeuralNetworks.h"
39 #include "SampleDriver.h"
40 #include "TestNeuralNetworksWrapper.h"
41 #include "Utils.h"
42 #include "ValidateHal.h"
43 
44 // Uncomment the following line to generate some debugging output that
45 // may be useful when analyzing failures:
46 //
47 // #define VERBOSE VERBOSE
48 
49 // We randomly generate tests (model + input data) at runtime, and verify
50 // that we get the same results whether we do partitioned compilation/execution
51 // or non partitioned compilation/execution.  We perform a test as follows:
52 //
53 // (1) Randomly generate a model (graph and weights), randomly generate input
54 //     data, randomly assign inputs and outputs to CPU memory or to shared
55 //     memory.
56 //
57 //     Randomly leaves dimensions unset for intermediate operands.
58 //
59 // (2) Randomly generate drivers based on the sample driver, each of which
60 //     executes models on the CPU.  They differ according to which operations
61 //     they support.
62 //
63 // (3) Compile and execute without partitioning, saving off the results.
64 //
65 // (4) Compile and execute with partitioning.
66 //
67 // (5) Verify that the saved results from (3) match the results from (4).
68 //
69 // For simplicity, all data (model inputs, model outputs, weights,
70 // temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two
71 // dimensions are fixed throughout a particular test case (and
72 // randomly determined).  This prevents us from having to find a
73 // mechanism to "resize" data (e.g., if ADD#a operates on data of size
74 // 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a
75 // and ADD#b become inputs of ADD#c, do we need to insert one or more
76 // operations between (say) ADD#a and ADD#c to convert ADD#2's data
77 // from size 2x2 to size 3x3 in order to match ADD#b).  In the few
78 // cases where an operand cannot be of this type, it is a constant
79 // (e.g., activation functions and RNN bias).
80 //
81 // Each operation we generate has a signature (described in more
82 // detail later).  The randomly generated drivers decide which
83 // operations they can execute by checking operation signatures.  Once
84 // we have built the model and know the set of signatures, we randomly
85 // assign each signature to a driver.  No signature is supported by
86 // multiple drivers -- we're not testing the logic that the
87 // partitioning algorithm uses to select the best driver for an
88 // operation.
89 
90 namespace android {
91 
92 using namespace nn::hal;
93 using CompilationBuilder = nn::CompilationBuilder;
94 using Device = nn::Device;
95 using DeviceManager = nn::DeviceManager;
96 using ExecutionPlan = nn::ExecutionPlan;
97 using HalVersion = nn::HalVersion;
98 using HidlModel = V1_3::Model;
99 using ModelBuilder = nn::ModelBuilder;
100 using Result = nn::test_wrapper::Result;
101 using SampleDriver = nn::sample_driver::SampleDriver;
102 using WrapperCompilation = nn::test_wrapper::Compilation;
103 using WrapperExecution = nn::test_wrapper::Execution;
104 using WrapperMemory = nn::test_wrapper::Memory;
105 using WrapperModel = nn::test_wrapper::Model;
106 using WrapperOperandType = nn::test_wrapper::OperandType;
107 using WrapperType = nn::test_wrapper::Type;
108 
109 namespace {
110 
111 /// Configure test size //////////////////////////////////////////////////////////
112 
113 // We may exceed this in order to connect otherwise disjoint subgraphs.
114 static const unsigned kMaxNumOperations = 100;
115 
116 // We build models to process 2-D square tensors up to this size in each dimension;
117 // note that the API promotes by-value weights larger than 128 to by-reference,
118 // so we want to ensure that we can pick both types that exceed and types that do
119 // not exceed this size.
120 static const unsigned kMaxProblemSize = 8;
121 
122 // First seed for pseudorandom test generation.
123 static const unsigned kFirstSeed = 0;
124 
125 // Number of test cases.
126 static const unsigned kNumTestCases = 225;
127 
128 // Force all graph weights into a single pool (as we recommend to users)
129 // or allow them to be distributed across multiple pools (more stress
130 // on the partitioning algorithm and the rest of the runtime)?
131 // Forcing all graph weights into a single pool may be necessary to
132 // prevent large graphs from running up against http://b/70302693
133 // "NNAPI overuses (?) fds".
134 static const bool kAllWeightsInOnePool = false;
135 
136 //////////////////////////////////////////////////////////////////////////////////
137 
138 // The signature of an operation consists of the operation type (e.g.,
139 // ADD) and the activation function (use -1 in the case of an
140 // operation type for which the activation function is inapplicable).
141 typedef std::pair<ANeuralNetworksOperationType, int> Signature;
142 
143 // This class adds some simple utilities on top of WrapperModel.  For example,
144 // it provides access to certain features from ModelBuilder that are not exposed
145 // by the base class (such as inputCount() and operation index).
146 class TestModel : public WrapperModel {
147    public:
addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)148     uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
149                           const std::vector<uint32_t>& outputs) {
150         const uint32_t operationIndex = operationCount();
151         mOperations.push_back(outputs);
152         WrapperModel::addOperation(type, inputs, outputs);
153         return operationIndex;
154     }
155 
operationCount() const156     uint32_t operationCount() const { return mOperations.size(); }
157 
inputCount() const158     uint32_t inputCount() const { return builder()->inputCount(); }
outputCount() const159     uint32_t outputCount() const { return builder()->outputCount(); }
160 
getOperationOutputs(uint32_t index) const161     const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const {
162         CHECK(index < mOperations.size());
163         return mOperations[index];
164     }
165 
166     // All values are immediately copied into the model (we need to do
167     // this ourselves in cases where the underlying NNAPI does not).
setOperandValue(uint32_t index,const std::vector<float> & value)168     void setOperandValue(uint32_t index, const std::vector<float>& value) {
169         const size_t length = value.size() * sizeof(float);
170 
171         if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
172             WrapperModel::setOperandValue(index, value.data(), length);
173         } else {
174             mOperandValues.push_back(value);
175             WrapperModel::setOperandValue(index, mOperandValues.back().data(), length);
176         }
177     }
178 
setOperandValue(uint32_t index,const std::vector<int32_t> & value)179     void setOperandValue(uint32_t index, const std::vector<int32_t>& value) {
180         const size_t length = value.size() * sizeof(int32_t);
181 
182         CHECK(length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
183         WrapperModel::setOperandValue(index, value.data(), length);
184     }
185 
setOperandValue(uint32_t index,int32_t value)186     void setOperandValue(uint32_t index, int32_t value) {
187         CHECK(sizeof(value) <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
188         WrapperModel::setOperandValue(index, &value, sizeof(value));
189     }
190 
191    private:
builder() const192     const ModelBuilder* builder() const {
193         return reinterpret_cast<const ModelBuilder*>(getHandle());
194     }
195 
196     // Representation of operations: vector index is operation number,
197     // vector value is operation's output operands.
198     std::vector<std::vector<uint32_t>> mOperations;
199 
200     // Large operand values -- not immediately copied into the
201     // WrapperModel, so remembered here instead.
202     std::vector<std::vector<float>> mOperandValues;
203 };
204 
205 // This class adds some simple utilities on top of WrapperCompilation in order
206 // to provide access to certain features from CompilationBuilder that are not
207 // exposed by the base class.
208 class TestCompilation : public WrapperCompilation {
209    public:
TestCompilation(const WrapperModel * model)210     TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {}
211 
TestCompilation(const WrapperModel * model,std::vector<std::shared_ptr<Device>> devices)212     TestCompilation(const WrapperModel* model, std::vector<std::shared_ptr<Device>> devices) {
213         ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
214         CompilationBuilder* c = nullptr;
215         int result = m->createCompilation(&c, devices);
216         EXPECT_EQ(result, 0);
217         mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
218     }
219 
220     using WrapperCompilation::finish;
221 
setPartitioning(uint32_t partitioning)222     Result setPartitioning(uint32_t partitioning) {
223         return static_cast<Result>(builder()->setPartitioning(partitioning));
224     }
225 
getExecutionPlan() const226     const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); }
227 
228    private:
builder() const229     const CompilationBuilder* builder() const {
230         return reinterpret_cast<const CompilationBuilder*>(getHandle());
231     }
builder()232     CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); }
233 };
234 
235 // This class is used to manage a collection of memory regions,
236 // disjoint windows onto a set of Memory instances, each of which is
237 // associated with a single shared memory region.  Each region and
238 // Memory instance is assigned a number.  The usage pattern is as
239 // follows:
240 // - Call addMemory() and addRegion() as many times as needed to
241 //   declare (but not define) Memory instances and declare region
242 //   instances.
243 // - Call layout() to define the Memory instances.
244 // - Call getRegion() as many times as needed to get the details
245 //   of memory regions (such as address, or Memory/offset/length).
246 // The Memory instances created by layout() are owned by the
247 // TestMemories instance, and are destroyed when the TestMemories
248 // instance is destroyed.
249 class TestMemories {
250    public:
251     TestMemories() = default;
252 
253     TestMemories(const TestMemories&) = delete;
254     TestMemories& operator=(const TestMemories&) = delete;
255 
addMemory()256     unsigned addMemory() {
257         CHECK(!mLayoutDone);
258         mMemorySizes.push_back(0);
259         return memoryCount() - 1;
260     }
memoryCount() const261     unsigned memoryCount() const { return mMemorySizes.size(); }
262 
addRegion(unsigned memoryIndex,uint32_t length)263     unsigned addRegion(unsigned memoryIndex, uint32_t length) {
264         CHECK(!mLayoutDone);
265         CHECK(memoryIndex < memoryCount());
266         uint32_t& memorySize = mMemorySizes[memoryIndex];
267         auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length);
268         mRegions.push_back(desc);
269         memorySize += length;
270         return regionCount() - 1;
271     }
regionCount() const272     unsigned regionCount() const { return mRegions.size(); }
273 
274     void layout();
275 
getRegion(unsigned regionIndex,const WrapperMemory ** pMemory,uint32_t * pOffset,uint32_t * pLength)276     void* getRegion(unsigned regionIndex, const WrapperMemory** pMemory, uint32_t* pOffset,
277                     uint32_t* pLength) {
278         CHECK(mLayoutDone);
279         CHECK(regionIndex < regionCount());
280         const auto& regionDescriptor = mRegions[regionIndex];
281         const WrapperMemory* memory = &mMemories[std::get<0>(regionDescriptor)];
282         uint32_t offset = std::get<1>(regionDescriptor);
283         uint32_t length = std::get<2>(regionDescriptor);
284 
285         uint8_t* buffer = reinterpret_cast<nn::MemoryAshmem*>(memory->get())->getPointer();
286         CHECK(buffer != nullptr);
287 
288         if (pMemory) *pMemory = memory;
289         if (pOffset) *pOffset = offset;
290         if (pLength) *pLength = length;
291 
292         return buffer + offset;
293     }
294 
getRegion(unsigned regionIndex)295     void* getRegion(unsigned regionIndex) {
296         return getRegion(regionIndex, nullptr, nullptr, nullptr);
297     }
298 
299    private:
300     // Index is the memory index; value is the size of the memory
301     // (aggregate size of all regions in the memory).
302     std::vector<uint32_t> mMemorySizes;
303 
304     // Index is the memory index.
305     std::vector<WrapperMemory> mMemories;
306 
307     // Index is the region index; tuple represents memory index,
308     // region offset within memory, region length.
309     std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions;
310 
311     // For sanity checking.
312     bool mLayoutDone = false;
313 };
314 
layout()315 void TestMemories::layout() {
316     CHECK(!mLayoutDone);
317     for (uint32_t memorySize : mMemorySizes) {
318         auto [n, ashmem] = nn::MemoryAshmem::create(memorySize);
319         CHECK_EQ(n, ANEURALNETWORKS_NO_ERROR);
320         CHECK(ashmem != nullptr);
321 
322         ANeuralNetworksMemory* memory = reinterpret_cast<ANeuralNetworksMemory*>(ashmem.release());
323         mMemories.emplace_back(memory);
324     }
325     mLayoutDone = true;
326 }
327 
328 class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> {
329    public:
RandomPartitioningTest()330     RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {}
331 
332     static Signature getSignature(const HidlModel& model, const Operation& operation);
333 
334    protected:
335     static V1_0::IDevice* makeTestDriver(HalVersion version, const char* name,
336                                          std::set<Signature> signatures);
337 
338     static HalVersion getMinHalVersion(ANeuralNetworksOperationType type);
339 
340     static std::string to_string(HalVersion version);
341 
randBool()342     bool randBool() { return randUInt(2) == 1; }
343 
randFrac()344     double randFrac() {  // [0.0, 1.0)
345         return mRandNumUnitDist(mRandNumEng);
346     }
347 
randUInt(unsigned limit)348     unsigned randUInt(unsigned limit) {  // [0, limit)
349         return unsigned(randFrac() * limit);
350     }
351 
352     // Represents an operation in which every input and output operand
353     // is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except:
354     // - One input operand may be an activation function.
355     // - Any number of input operands may be "special" in some other way
356     //   (and in this implementation, not produced by any other operation).
357     // We require that:
358     // - There be at least one input operand that is neither an
359     //    activation function nor "special".
360     struct OperationPattern {
361         HalVersion mMinHalVersion;
362         int mOperationType;
363         unsigned mNumInputs;
364         unsigned mNumOutputs;
365         int mActivationFunctionInputIndex;  // <0 if none
366 
367         // Returns operand index, or <0 if input is normal (must not
368         // be called for an activation function operand).  Function
369         // should have the following prototype:
370         //
371         //     int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex);
372         //
373         int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned);
374     };
375 
376     static const OperationPattern kOperationPatterns[];
377 
378     // See OperationPattern::mMakeSpecialInput.  This function is used to
379     // manufacture an ELU input operand that doesn't fit the general operand
380     // pattern known to the graph generator infrastructure.
makeEluSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)381     int makeEluSpecialInput([[maybe_unused]] unsigned problemSize, TestModel* model,
382                             unsigned inputIndex) {
383         if (inputIndex != 1) {
384             return -1;
385         }
386 
387         // input operand 1 is alpha, a scalar
388         const WrapperOperandType alphaType(WrapperType::FLOAT32, {});
389         return int(model->addConstantOperand(&alphaType, 1.0f));
390     }
391 
392     // See OperationPattern::mMakeSpecialInput.  This function is used to
393     // manufacture an RNN input operand that doesn't fit the general operand
394     // pattern known to the graph generator infrastructure.
makeRnnSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)395     int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) {
396         if (inputIndex != 3) {
397             return -1;
398         }
399 
400         // input operand 3 is bias, a 1-D tensor
401         const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, {problemSize});
402         const uint32_t operandIndex = model->addOperand(&biasType);
403         std::vector<float> biasValue(problemSize);
404         std::generate(biasValue.begin(), biasValue.end(), [this] { return randFrac(); });
405         model->setOperandValue(operandIndex, biasValue);
406         return int(operandIndex);
407     }
408 
409     // See OperationPattern::mMakeSpecialInput.  This function is used to
410     // manufacture a TRANSPOSE input operand that doesn't fit the general operand
411     // pattern known to the graph generator infrastructure.
makeTransposeSpecialInput(unsigned,TestModel * model,unsigned inputIndex)412     int makeTransposeSpecialInput(unsigned /* problemSize */, TestModel* model,
413                                   unsigned inputIndex) {
414         if (inputIndex != 1) {
415             return -1;
416         }
417 
418         // input operand 1 is perm, a 1-D tensor
419         const WrapperOperandType permType(WrapperType::TENSOR_INT32, {2});
420         const uint32_t operandIndex = model->addOperand(&permType);
421         std::vector<int32_t> permValue = {1, 0};
422         model->setOperandValue(operandIndex, permValue);
423         return int(operandIndex);
424     }
425 
426 #ifdef VERBOSE
427     class ModelStats {
428        public:
ModelStats(const ModelBuilder * model)429         ModelStats(const ModelBuilder* model) : mBuilder(model) {}
ModelStats(const WrapperModel * model)430         ModelStats(const WrapperModel* model)
431             : mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) {}
operator <<(std::ostream & out,const ModelStats & stats)432         friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) {
433             const uint32_t operandCount = stats.mBuilder->operandCount();
434             const uint32_t inputCount = stats.mBuilder->inputCount();
435             const uint32_t outputCount = stats.mBuilder->outputCount();
436             out << "operationCount = " << stats.mBuilder->operationCount()
437                 << ", operandCount = " << operandCount << ", inputCount = " << inputCount << " ("
438                 << (double(inputCount) / operandCount) << ")"
439                 << ", outputCount = " << outputCount << " (" << (double(outputCount) / operandCount)
440                 << ")";
441             return out;
442         }
443 
444        private:
445         const ModelBuilder* mBuilder;
446     };
447 
448     template <typename T_iterator>
dump(T_iterator I,T_iterator E)449     static void dump(T_iterator I, T_iterator E) {
450         std::cout << "{";
451         for (; I != E; I++) {
452             std::cout << " " << *I;
453         }
454         std::cout << " }" << std::endl;
455     }
456 #endif
457 
458     std::mt19937 mRandNumEng;
459 
460    private:
461     std::uniform_real_distribution<double> mRandNumUnitDist;
462 };
463 
464 const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = {
465         {HalVersion::V1_0, ANEURALNETWORKS_ADD, 3, 1, 2, nullptr},
466         {HalVersion::V1_0, ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr},
467         {HalVersion::V1_0, ANEURALNETWORKS_MUL, 3, 1, 2, nullptr},
468         {HalVersion::V1_0, ANEURALNETWORKS_RNN, 6, 2, 5,
469          &RandomPartitioningTest::makeRnnSpecialInput},
470         {HalVersion::V1_0, ANEURALNETWORKS_TANH, 1, 1, -1, nullptr},
471 
472         {HalVersion::V1_1, ANEURALNETWORKS_SUB, 3, 1, 2, nullptr},
473         {HalVersion::V1_1, ANEURALNETWORKS_TRANSPOSE, 2, 1, -1,
474          &RandomPartitioningTest::makeTransposeSpecialInput},
475 
476         {HalVersion::V1_2, ANEURALNETWORKS_MAXIMUM, 2, 1, -1, nullptr},
477         {HalVersion::V1_2, ANEURALNETWORKS_NEG, 1, 1, -1, nullptr},
478         {HalVersion::V1_2, ANEURALNETWORKS_SIN, 1, 1, -1, nullptr},
479 
480         {HalVersion::V1_3, ANEURALNETWORKS_ELU, 2, 1, -1,
481          &RandomPartitioningTest::makeEluSpecialInput},
482         {HalVersion::V1_3, ANEURALNETWORKS_HARD_SWISH, 1, 1, -1, nullptr},
483 };
484 
getMinHalVersion(ANeuralNetworksOperationType type)485 HalVersion RandomPartitioningTest::getMinHalVersion(ANeuralNetworksOperationType type) {
486     static const auto kOperationToVersion = [] {
487         std::map<ANeuralNetworksOperationType, HalVersion> result;
488         for (const auto& pattern : kOperationPatterns) {
489             result[pattern.mOperationType] = pattern.mMinHalVersion;
490         }
491         return result;
492     }();
493 
494     return kOperationToVersion.at(type);
495 }
496 
getSignature(const HidlModel & model,const Operation & operation)497 Signature RandomPartitioningTest::getSignature(const HidlModel& model, const Operation& operation) {
498     static const auto kOperationToActivation = [] {
499         std::map<ANeuralNetworksOperationType, int> result;
500         for (const auto& pattern : kOperationPatterns) {
501             result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex;
502         }
503         return result;
504     }();
505 
506     const ANeuralNetworksOperationType operationType =
507             static_cast<ANeuralNetworksOperationType>(operation.type);
508     const int activationFunctionInputIndex = kOperationToActivation.at(operationType);
509     if (activationFunctionInputIndex < 0) {
510         return Signature(operationType, -1);
511     }
512 
513     const Operand& operand = model.main.operands[operation.inputs[activationFunctionInputIndex]];
514     CHECK(operand.lifetime == OperandLifeTime::CONSTANT_COPY);
515     CHECK(operand.type == OperandType::INT32);
516     int32_t value;
517     memcpy(&value, &model.operandValues[operand.location.offset], operand.location.length);
518     return Signature(operationType, value);
519 }
520 
to_string(HalVersion version)521 std::string RandomPartitioningTest::to_string(HalVersion version) {
522     switch (version) {
523         case HalVersion::V1_0:
524             return "V1_0";
525         case HalVersion::V1_1:
526             return "V1_1";
527         case HalVersion::V1_2:
528             return "V1_2";
529         case HalVersion::V1_3:
530             return "V1_3";
531         default:
532             return "V_UNKNOWN";
533     }
534 };
535 
536 class TestDriver : public SampleDriver {
537    public:
538     // Behaves like SampleDriver, except that it only supports
539     // operations with the specified signatures.
TestDriver(const char * name,std::set<Signature> signatures)540     TestDriver(const char* name, std::set<Signature> signatures)
541         : SampleDriver(name), mSignatures(std::move(signatures)) {}
542 
getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb)543     Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override {
544         android::nn::initVLogMask();
545         const PerformanceInfo kPerf = {.execTime = 0.75f, .powerUsage = 0.75f};
546         Capabilities capabilities = {
547                 .relaxedFloat32toFloat16PerformanceScalar = kPerf,
548                 .relaxedFloat32toFloat16PerformanceTensor = kPerf,
549                 .operandPerformance = nn::nonExtensionOperandPerformance<HalVersion::V1_3>(kPerf),
550                 .ifPerformance = kPerf,
551                 .whilePerformance = kPerf};
552         _hidl_cb(V1_3::ErrorStatus::NONE, capabilities);
553         return Void();
554     }
555 
getSupportedOperations_1_3(const HidlModel & model,getSupportedOperations_1_3_cb cb)556     Return<void> getSupportedOperations_1_3(const HidlModel& model,
557                                             getSupportedOperations_1_3_cb cb) override {
558         if (nn::validateModel(model)) {
559             const size_t count = model.main.operations.size();
560             std::vector<bool> supported(count);
561             for (size_t i = 0; i < count; i++) {
562                 supported[i] = (mSignatures.count(RandomPartitioningTest::getSignature(
563                                         model, model.main.operations[i])) != 0);
564             }
565             cb(V1_3::ErrorStatus::NONE, supported);
566         } else {
567             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {});
568         }
569         return Void();
570     }
571 
prepareModel_1_3(const HidlModel & model,ExecutionPreference preference,Priority priority,const OptionalTimePoint & deadline,const hidl_vec<hidl_handle> & modelCache,const hidl_vec<hidl_handle> & dataCache,const CacheToken & token,const sp<V1_3::IPreparedModelCallback> & callback)572     Return<V1_3::ErrorStatus> prepareModel_1_3(
573             const HidlModel& model, ExecutionPreference preference, Priority priority,
574             const OptionalTimePoint& deadline, const hidl_vec<hidl_handle>& modelCache,
575             const hidl_vec<hidl_handle>& dataCache, const CacheToken& token,
576             const sp<V1_3::IPreparedModelCallback>& callback) override {
577         // NOTE: We verify that all operations in the model are supported.
578         V1_3::ErrorStatus outStatus = V1_3::ErrorStatus::INVALID_ARGUMENT;
579         auto ret = getSupportedOperations_1_3(
580                 model, [&outStatus](V1_3::ErrorStatus inStatus,
581                                     const hidl_vec<bool>& supportedOperations) {
582                     if (inStatus == V1_3::ErrorStatus::NONE) {
583                         if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
584                                         [](bool v) { return v; })) {
585                             outStatus = V1_3::ErrorStatus::NONE;
586                         }
587                     }
588                 });
589         if (ret.isOk() && (outStatus == V1_3::ErrorStatus::NONE)) {
590             return SampleDriver::prepareModel_1_3(model, preference, priority, deadline, modelCache,
591                                                   dataCache, token, callback);
592         } else {
593             callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
594             return V1_3::ErrorStatus::INVALID_ARGUMENT;
595         }
596     }
597 
598    private:
599     const std::set<Signature> mSignatures;
600 };
601 
602 // Like TestDriver, but implementing 1.2
603 class TestDriverV1_2 : public V1_2::IDevice {
604    public:
TestDriverV1_2(const char * name,std::set<Signature> signatures)605     TestDriverV1_2(const char* name, std::set<Signature> signatures)
606         : mLatestDriver(new TestDriver(name, std::move(signatures))) {}
getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb)607     Return<void> getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb) override {
608         return mLatestDriver->getCapabilities_1_2(_hidl_cb);
609     }
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb _hidl_cb)610     Return<void> getSupportedOperations_1_2(const V1_2::Model& model,
611                                             getSupportedOperations_1_2_cb _hidl_cb) override {
612         return mLatestDriver->getSupportedOperations_1_2(model, _hidl_cb);
613     }
prepareModel_1_2(const V1_2::Model & model,ExecutionPreference preference,const hidl_vec<hidl_handle> & modelCache,const hidl_vec<hidl_handle> & dataCache,const CacheToken & token,const sp<V1_2::IPreparedModelCallback> & actualCallback)614     Return<V1_0::ErrorStatus> prepareModel_1_2(
615             const V1_2::Model& model, ExecutionPreference preference,
616             const hidl_vec<hidl_handle>& modelCache, const hidl_vec<hidl_handle>& dataCache,
617             const CacheToken& token,
618             const sp<V1_2::IPreparedModelCallback>& actualCallback) override {
619         return mLatestDriver->prepareModel_1_2(model, preference, modelCache, dataCache, token,
620                                                actualCallback);
621     }
getVersionString(getVersionString_cb _hidl_cb)622     Return<void> getVersionString(getVersionString_cb _hidl_cb) override {
623         return mLatestDriver->getVersionString(_hidl_cb);
624     }
getType(getType_cb _hidl_cb)625     Return<void> getType(getType_cb _hidl_cb) override { return mLatestDriver->getType(_hidl_cb); }
getSupportedExtensions(getSupportedExtensions_cb _hidl_cb)626     Return<void> getSupportedExtensions(getSupportedExtensions_cb _hidl_cb) {
627         return mLatestDriver->getSupportedExtensions(_hidl_cb);
628     }
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb)629     Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb) {
630         return mLatestDriver->getNumberOfCacheFilesNeeded(_hidl_cb);
631     }
prepareModelFromCache(const hidl_vec<hidl_handle> & modelCache,const hidl_vec<hidl_handle> & dataCache,const CacheToken & token,const sp<V1_2::IPreparedModelCallback> & callback)632     Return<V1_0::ErrorStatus> prepareModelFromCache(
633             const hidl_vec<hidl_handle>& modelCache, const hidl_vec<hidl_handle>& dataCache,
634             const CacheToken& token, const sp<V1_2::IPreparedModelCallback>& callback) {
635         return mLatestDriver->prepareModelFromCache(modelCache, dataCache, token, callback);
636     }
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)637     Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
638         return mLatestDriver->getCapabilities_1_1(_hidl_cb);
639     }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)640     Return<void> getSupportedOperations_1_1(const V1_1::Model& model,
641                                             getSupportedOperations_1_1_cb _hidl_cb) override {
642         return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
643     }
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)644     Return<V1_0::ErrorStatus> prepareModel_1_1(
645             const V1_1::Model& model, ExecutionPreference preference,
646             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
647         return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
648     }
getStatus()649     Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)650     Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
651         return mLatestDriver->getCapabilities(_hidl_cb);
652     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)653     Return<void> getSupportedOperations(const V1_0::Model& model,
654                                         getSupportedOperations_cb _hidl_cb) override {
655         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
656     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)657     Return<V1_0::ErrorStatus> prepareModel(
658             const V1_0::Model& model,
659             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
660         return mLatestDriver->prepareModel(model, actualCallback);
661     }
662 
663    private:
664     const sp<V1_3::IDevice> mLatestDriver;
665 };
666 
667 // Like TestDriver, but implementing 1.1
668 class TestDriverV1_1 : public V1_1::IDevice {
669    public:
TestDriverV1_1(const char * name,std::set<Signature> signatures)670     TestDriverV1_1(const char* name, std::set<Signature> signatures)
671         : mLatestDriver(new TestDriver(name, std::move(signatures))) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)672     Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
673         return mLatestDriver->getCapabilities_1_1(_hidl_cb);
674     }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)675     Return<void> getSupportedOperations_1_1(const V1_1::Model& model,
676                                             getSupportedOperations_1_1_cb _hidl_cb) override {
677         return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
678     }
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)679     Return<V1_0::ErrorStatus> prepareModel_1_1(
680             const V1_1::Model& model, ExecutionPreference preference,
681             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
682         return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
683     }
getStatus()684     Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)685     Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
686         return mLatestDriver->getCapabilities(_hidl_cb);
687     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)688     Return<void> getSupportedOperations(const V1_0::Model& model,
689                                         getSupportedOperations_cb _hidl_cb) override {
690         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
691     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)692     Return<V1_0::ErrorStatus> prepareModel(
693             const V1_0::Model& model,
694             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
695         return mLatestDriver->prepareModel(model, actualCallback);
696     }
697 
698    private:
699     const sp<V1_3::IDevice> mLatestDriver;
700 };
701 
702 // Like TestDriver, but implementing 1.0
703 class TestDriverV1_0 : public V1_0::IDevice {
704    public:
TestDriverV1_0(const char * name,std::set<Signature> signatures)705     TestDriverV1_0(const char* name, std::set<Signature> signatures)
706         : mLatestDriver(new TestDriver(name, std::move(signatures))) {}
getCapabilities(getCapabilities_cb _hidl_cb)707     Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
708         return mLatestDriver->getCapabilities(_hidl_cb);
709     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)710     Return<void> getSupportedOperations(const V1_0::Model& model,
711                                         getSupportedOperations_cb _hidl_cb) override {
712         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
713     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)714     Return<V1_0::ErrorStatus> prepareModel(
715             const V1_0::Model& model,
716             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
717         return mLatestDriver->prepareModel(model, actualCallback);
718     }
getStatus()719     Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
720 
721    private:
722     const sp<V1_3::IDevice> mLatestDriver;
723 };
724 
makeTestDriver(HalVersion version,const char * name,std::set<Signature> signatures)725 V1_0::IDevice* RandomPartitioningTest::makeTestDriver(HalVersion version, const char* name,
726                                                       std::set<Signature> signatures) {
727     switch (version) {
728         case HalVersion::V1_0:
729             return new TestDriverV1_0(name, std::move(signatures));
730         case HalVersion::V1_1:
731             return new TestDriverV1_1(name, std::move(signatures));
732         case HalVersion::V1_2:
733             return new TestDriverV1_2(name, std::move(signatures));
734         case HalVersion::V1_3:
735             return new TestDriver(name, std::move(signatures));
736         default:
737             ADD_FAILURE() << "Unexpected HalVersion " << static_cast<int32_t>(version);
738             return nullptr;
739     }
740 }
741 
742 INSTANTIATE_TEST_CASE_P(Seed, RandomPartitioningTest,
743                         ::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases));
744 
TEST_P(RandomPartitioningTest,Test)745 TEST_P(RandomPartitioningTest, Test) {
746     LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam();
747 
748 #ifdef VERBOSE
749     std::cout << std::setprecision(2) << std::fixed << std::setw(4);
750 #endif
751 
752     const unsigned problemSize = 1 + randUInt(kMaxProblemSize);
753     const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, {problemSize, problemSize});
754     const WrapperOperandType unknownDimensionsType(WrapperType::TENSOR_FLOAT32, {0, 0});
755 
756     static const WrapperOperandType activationFunctionType(WrapperType::INT32, {});
757 
758     const unsigned numOperations = 2 + randUInt(kMaxNumOperations - 1);
759     const bool allowDeadOperations = (randFrac() < 0.2);
760     const bool allowUnknownDimensions = (randFrac() < 0.25);
761 
762     // TODO: The current algorithm builds the graph in a forward
763     // direction (i.e., later-generated operations consume outputs
764     // from earlier-generated operations).  In order to get more
765     // variation in graph topology, perhaps we should also create an
766     // algorithm to build the graph in a backward direction (i.e.,
767     // later-generated operations produce outputs to be consumed by
768     // earlier-generated operations).
769     [[maybe_unused]] const bool buildForward = randBool();
770 
771     // TODO: Add a form of forced connectivity that operates by
772     // joining disjoint subgraphs rather than by forcing a root.
773     const bool forceCommonRoot = (randFrac() < 0.75);
774 
775     TestModel model;
776     std::vector<uint32_t> modelInputs;
777     std::vector<uint32_t> modelOutputs;
778 
779     // Each region in weights is a problem-sized 2-D TENSOR_FLOAT32.
780     TestMemories weights;
781 
782     // Keep track of all normal (i.e., not activation function and not
783     // "special") operands that are values (from setOperandValue*()).
784     // .first: operand index
785     // .second: if the operand is already defined (via setOperandValue*()) then ~0U;
786     //          otherwise, the operand has yet to be defined, and this is the corresponding
787     //          region index in "weights"
788     std::vector<std::pair<uint32_t, unsigned>> valueOperands;
789 
790     // An operand is "dead" if it is not consumed by another operation
791     // and is not a model output.  Key is operand index; value is
792     // operation index.
793     std::map<uint32_t, uint32_t> deadOperands;
794 
795     // An operation is "dead" if all of its outputs are dead.
796     std::set<uint32_t> deadOperations;
797 
798     // Collect the signatures of operations in this model.
799     std::set<Signature> signatures;
800 
801     // For reporting purposes, keep track of the number of root
802     // operations (those that do not consume results produced by other
803     // operations).
804     unsigned rootOperationCount = 0;
805 
806     // Track if we added operands with unknown dimensions. In this case,
807     // partitioned compilation will fail if such an operand is read in a
808     // different partition than it is written.
809     bool hasUnknownDimensions = false;
810 
811     // Generate operations.
812     for (unsigned i = 0; i < numOperations; i++) {
813         const unsigned operationPatternIndex = randUInt(std::size(kOperationPatterns));
814         const auto& operationPattern = kOperationPatterns[operationPatternIndex];
815 
816         // INPUTS //////////////////////////////////////////////////////////////////////////////////
817 
818         std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U);
819 
820         // First, process activation function and special inputs, and
821         // keep track of which inputs remain.
822         std::vector<uint32_t> normalOperationInputIndexes;
823         int32_t activationFunction = -1;
824         for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs;
825              operationInputIndex++) {
826             if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) {
827                 const uint32_t operandIndex = model.addOperand(&activationFunctionType);
828                 activationFunction = randUInt(4);
829                 if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) {
830                     // workaround for http://b/69011131
831                     activationFunction = ANEURALNETWORKS_FUSED_NONE;
832                 }
833                 model.setOperandValue(operandIndex, activationFunction);
834                 operationInputs[operationInputIndex] = operandIndex;
835                 continue;
836             }
837             if (operationPattern.mMakeSpecialInput != nullptr) {
838                 const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))(
839                         problemSize, &model, operationInputIndex);
840                 if (operandIndex >= 0) {
841                     operationInputs[operationInputIndex] = operandIndex;
842                     continue;
843                 }
844             }
845             normalOperationInputIndexes.push_back(operationInputIndex);
846         }
847         CHECK(!normalOperationInputIndexes.empty());
848         signatures.insert(Signature(operationPattern.mOperationType, activationFunction));
849 
850         // A (normal) operation input can be one of:
851         // - a new or existing model input
852         // - an output of an existing operation
853         // - an OperandValue
854         // - an OperandValueFromMemory
855         // Some guidelines:
856         // - We generally don't want all of an operation's inputs to be values (constants)
857         const unsigned normalOperationInputCount = normalOperationInputIndexes.size();
858         //     How many of this operation's inputs are constants?
859         unsigned normalOperationInputConstantCount = 0;
860         //     How many of this operation's inputs are model inputs?
861         unsigned normalOperationInputModelInputCount = 0;
862         // We begin by deciding what kind of input each (normal) operation will be; we don't
863         // actually pick input operand indexes at this time, because we might override this
864         // decision later.
865         enum InputKind { IK_SUBGRAPH_INPUT, IK_OPERATION_OUTPUT, IK_VALUE };
866         std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount);
867         std::generate(
868                 normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
869                 [this, &model, numOperations, normalOperationInputCount,
870                  &normalOperationInputConstantCount,
871                  &normalOperationInputModelInputCount]() -> InputKind {
872                     // Constant?  Becomes less likely the more
873                     // constants we already have as inputs to
874                     // this operation.
875                     if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) /
876                                                         normalOperationInputCount)) {
877                         normalOperationInputConstantCount++;
878                         return IK_VALUE;
879                     }
880 
881                     // Model input?  Becomes less likely the
882                     // more model inputs we already have as
883                     // inputs to this operation, and the further
884                     // along we are in generating this model
885                     // (i.e., the more operations we have
886                     // generated).
887                     if ((model.operationCount() == 0) ||
888                         (randFrac() < 0.5 *
889                                               (1 - double(normalOperationInputModelInputCount) /
890                                                            normalOperationInputCount) *
891                                               std::min(0.3, (1 - double(model.operationCount()) /
892                                                                          numOperations)))) {
893                         normalOperationInputModelInputCount++;
894                         return IK_SUBGRAPH_INPUT;
895                     }
896 
897                     // Else output of an existing operation.
898                     return IK_OPERATION_OUTPUT;
899                 });
900 
901         // Now force common root or model input, if necessary.  (A
902         // model must have at least one input.)
903         auto force = [this, &normalOperationInputKinds,
904                       normalOperationInputCount](InputKind forceKind) {
905             if (std::none_of(normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
906                              [forceKind](InputKind kind) { return kind == forceKind; })) {
907                 normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind;
908             }
909         };
910         if (forceCommonRoot && (model.operationCount() != 0)) {
911             force(IK_OPERATION_OUTPUT);
912         }
913         if (modelInputs.empty()) {
914             CHECK(model.operationCount() == 0);
915             force(IK_SUBGRAPH_INPUT);
916         }
917 
918         // Finally create the normal inputs.
919         bool isRootOperation = true;
920         for (unsigned i = 0; i < normalOperationInputCount; i++) {
921             uint32_t operandIndex = ~0U;
922             switch (normalOperationInputKinds[i]) {
923                 case IK_SUBGRAPH_INPUT: {
924                     if (!modelInputs.empty() && (randFrac() < 0.5)) {
925                         operandIndex = modelInputs[randUInt(modelInputs.size())];
926                     } else {
927                         operandIndex = model.addOperand(&problemType);
928                         modelInputs.push_back(operandIndex);
929                     }
930                     break;
931                 }
932                 case IK_OPERATION_OUTPUT: {
933                     decltype(deadOperands.begin()) deadOperandI;
934                     if (!deadOperands.empty() && (randFrac() < 0.5)) {
935                         deadOperandI = deadOperands.begin();
936                         std::advance(deadOperandI, randUInt(deadOperands.size()));
937                         operandIndex = deadOperandI->first;
938                     } else {
939                         const uint32_t existingOperationIndex = randUInt(model.operationCount());
940                         const auto& existingOperationOutputs =
941                                 model.getOperationOutputs(existingOperationIndex);
942                         operandIndex =
943                                 existingOperationOutputs[randUInt(existingOperationOutputs.size())];
944                         deadOperandI = deadOperands.find(operandIndex);
945                         CHECK(deadOperandI == deadOperands.end() ||
946                               deadOperandI->second == existingOperationIndex);
947                     }
948                     if (deadOperandI != deadOperands.end()) {
949                         const uint32_t correspondingOperation = deadOperandI->second;
950                         deadOperands.erase(deadOperandI);
951 
952                         auto deadOperationI = deadOperations.find(correspondingOperation);
953                         if (deadOperationI != deadOperations.end()) {
954                             deadOperations.erase(deadOperationI);
955                         }
956                     }
957                     isRootOperation = false;
958                     break;
959                 }
960                 case IK_VALUE: {
961                     if (!valueOperands.empty() && (randFrac() < 0.25)) {
962                         operandIndex = valueOperands[randUInt(valueOperands.size())].first;
963                     } else {
964                         operandIndex = model.addOperand(&problemType);
965                         if (randFrac() < 0.5) {
966                             std::vector<float> value(problemSize * problemSize);
967                             std::generate(value.begin(), value.end(),
968                                           [this] { return randFrac(); });
969                             model.setOperandValue(operandIndex, value);
970                             valueOperands.push_back(std::make_pair(operandIndex, ~0U));
971                         } else {
972                             unsigned memoryIndex = ~0U;
973                             if ((weights.memoryCount() != 0) &&
974                                 (kAllWeightsInOnePool || (randFrac() < 0.5))) {
975                                 memoryIndex = randUInt(weights.memoryCount());
976                             } else {
977                                 memoryIndex = weights.addMemory();
978                             }
979                             const size_t length = problemSize * problemSize * sizeof(float);
980                             const unsigned regionIndex = weights.addRegion(memoryIndex, length);
981                             valueOperands.push_back(std::make_pair(operandIndex, regionIndex));
982                         }
983                     }
984                     break;
985                 }
986                 default:
987                     FAIL();
988             }
989             operationInputs[normalOperationInputIndexes[i]] = operandIndex;
990         }
991         if (isRootOperation) {
992             rootOperationCount++;
993         }
994 
995         // OUTPUTS /////////////////////////////////////////////////////////////////////////////////
996 
997         std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
998         std::generate(operationOutputs.begin(), operationOutputs.end(),
999                       [&model, &problemType, &unknownDimensionsType, &hasUnknownDimensions,
1000                        allowUnknownDimensions, this] {
1001                           // 3% unknowns causes ~35% of partitionings to fail
1002                           // (determined by commenting out the fallback code,
1003                           // running tests and noting number of failures).
1004                           if (allowUnknownDimensions && randFrac() < 0.03) {
1005                               hasUnknownDimensions = true;
1006                               return model.addOperand(&unknownDimensionsType);
1007                           } else {
1008                               return model.addOperand(&problemType);
1009                           }
1010                       });
1011 
1012         // OPERATION ///////////////////////////////////////////////////////////////////////////////
1013 
1014         const uint32_t operationIndex = model.addOperation(operationPattern.mOperationType,
1015                                                            operationInputs, operationOutputs);
1016         deadOperations.insert(operationIndex);
1017         std::for_each(operationOutputs.begin(), operationOutputs.end(),
1018                       [&deadOperands, operationIndex](uint32_t operandIndex) {
1019                           deadOperands.insert(std::make_pair(operandIndex, operationIndex));
1020                       });
1021     }
1022 
1023     // Now finalize the weights.
1024     weights.layout();
1025     for (const auto& valueOperand : valueOperands) {
1026         const uint32_t operandIndex = valueOperand.first;
1027         const unsigned regionIndex = valueOperand.second;
1028 
1029         if (regionIndex == ~0U) {
1030             continue;
1031         }
1032 
1033         const WrapperMemory* memory;
1034         uint32_t offset, length;
1035         float* region =
1036                 static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length));
1037         CHECK(length == problemSize * problemSize * sizeof(float));
1038         std::generate(region, region + problemSize * problemSize, [this] { return randFrac(); });
1039         model.setOperandValueFromMemory(operandIndex, memory, offset, length);
1040     }
1041 
1042     // Now select model outputs.
1043     for (uint32_t operationIdx = 0, operationCount = model.operationCount();
1044          operationIdx < operationCount; operationIdx++) {
1045         const auto& outputs = model.getOperationOutputs(operationIdx);
1046         for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount;
1047              outputIdx++) {
1048             bool modelOutput = false;
1049             const uint32_t operandIndex = outputs[outputIdx];
1050             const auto deadOperandI = deadOperands.find(operandIndex);
1051             if (deadOperandI != deadOperands.end()) {
1052                 // This is not consumed within the model, so unless we
1053                 // make it an output of the model, it's dead.  The
1054                 // further along we are in generating this model
1055                 // (i.e., the more operations we have generated), the
1056                 // more likely we are to classify this operation
1057                 // output as a model output.
1058                 const double probabilityOfModelOutput =
1059                         0.50 * [](double x) { return x * x; }((operationIdx + 1) / operationCount);
1060                 modelOutput = (randFrac() < probabilityOfModelOutput);
1061             } else {
1062                 // This is consumed within the model, so we'll rarely
1063                 // make it an output of the model.
1064                 modelOutput = (randFrac() < 0.05);
1065             }
1066             if (!modelOutput) {
1067                 continue;
1068             }
1069             modelOutputs.push_back(operandIndex);
1070             if (deadOperandI != deadOperands.end()) {
1071                 deadOperands.erase(deadOperandI);
1072                 const auto deadOperationI = deadOperations.find(operationIdx);
1073                 if (deadOperationI != deadOperations.end()) {
1074                     deadOperations.erase(deadOperationI);
1075                 }
1076             }
1077         }
1078     }
1079     if (!allowDeadOperations) {
1080         // For each dead operation, pick a random output to become a model output.
1081         for (uint32_t deadOperationIndex : deadOperations) {
1082             const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex);
1083             const uint32_t deadOperandIndex =
1084                     deadOperationOutputs[randUInt(deadOperationOutputs.size())];
1085             modelOutputs.push_back(deadOperandIndex);
1086         }
1087     }
1088     // A model must have at least one output.
1089     if (modelOutputs.empty()) {
1090         const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount()));
1091         modelOutputs.push_back(outputs[randUInt(outputs.size())]);
1092     }
1093 
1094     model.identifyInputsAndOutputs(modelInputs, modelOutputs);
1095 #ifdef VERBOSE
1096     {
1097         std::cout << "Original model: " << ModelStats(&model) << std::endl;
1098         std::cout << "rootOperationCount = " << rootOperationCount << ", deadOperations = ";
1099         if (allowDeadOperations) {
1100             std::cout << deadOperations.size();
1101         } else {
1102             std::cout << "forbidden (converted " << deadOperations.size() << ")";
1103         }
1104         std::cout << std::endl;
1105     }
1106 #endif
1107     ASSERT_EQ(model.finish(), Result::NO_ERROR);
1108 
1109     // Non-partitioned compilation.
1110     TestCompilation c(&model);
1111     ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1112     ASSERT_EQ(c.finish(), Result::NO_ERROR);
1113 
1114     // Create some drivers for partitioned compilation.
1115     CHECK(!signatures.empty());
1116     std::vector<std::set<Signature>> signaturesForDriver(signatures.size());
1117     //     First assign each signature to a random driver (a driver is
1118     //     just represented as an entry in the signaturesForDriver
1119     //     vector).
1120     for (Signature signature : signatures) {
1121         signaturesForDriver[randUInt(signatures.size())].insert(signature);
1122     }
1123     //     Now remove each entry that has no signatures.
1124     auto firstExtra =
1125             std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(),
1126                            [](const std::set<Signature>& sigSet) { return sigSet.empty(); });
1127     if (firstExtra != signaturesForDriver.end()) {
1128         signaturesForDriver.erase(firstExtra, signaturesForDriver.end());
1129     }
1130     //     Now actually create the drivers.
1131     std::vector<std::shared_ptr<Device>> devices;
1132     for (unsigned i = 0; i < signaturesForDriver.size(); i++) {
1133         const auto& signaturesForThisDriver = signaturesForDriver[i];
1134         // Minimum HAL version for this driver is highest minimum HAL version of
1135         // any operation supported by this driver.
1136         const HalVersion minHalVersion = getMinHalVersion(
1137                 std::max_element(signaturesForThisDriver.begin(), signaturesForThisDriver.end(),
1138                                  [](const Signature& a, const Signature& b) {
1139                                      return getMinHalVersion(a.first) < getMinHalVersion(b.first);
1140                                  })
1141                         ->first);
1142         const HalVersion actualHalVersion =
1143                 static_cast<HalVersion>(static_cast<int32_t>(minHalVersion) +
1144                                         randUInt(static_cast<int32_t>(HalVersion::LATEST) -
1145                                                  static_cast<int32_t>(minHalVersion) + 1));
1146         const std::string name =
1147                 "TestDriver(" + std::to_string(i) + "){" + to_string(actualHalVersion) + "}";
1148 #ifdef VERBOSE
1149         std::cout << "Creating " + name + " for collection of signatures that requires HAL " +
1150                              to_string(minHalVersion)
1151                   << std::endl;
1152 #endif
1153         auto device = DeviceManager::forTest_makeDriverDevice(
1154                 name, makeTestDriver(actualHalVersion, name.c_str(), signaturesForThisDriver));
1155         devices.push_back(device);
1156     }
1157     // CPU fallback device
1158     devices.push_back(DeviceManager::getCpuDevice());
1159 
1160     // Partitioned compilation.
1161     // For test cases without unknown intermediate operand sizes we require the
1162     // partitioning to succeed without CPU fallback. With unknown sizes we
1163     // retry with a fallback if the non-fallback partitioning fails and require
1164     // the fallback to succeed.
1165     TestCompilation cNoFallback(&model, devices);
1166     TestCompilation cWithFallback(&model, devices);
1167     TestCompilation* c2 = nullptr;
1168     ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1169               Result::NO_ERROR);
1170     auto compilationResult = cNoFallback.finish();
1171     if (hasUnknownDimensions && compilationResult == Result::OP_FAILED &&
1172         cNoFallback.getExecutionPlan().forTest_hasStepModelOutputsOfUnknownSize()) {
1173         ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1174                   Result::NO_ERROR);
1175         ASSERT_EQ(cWithFallback.finish(), Result::NO_ERROR);
1176         ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1177         ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1178                   DeviceManager::getCpuDevice());
1179         c2 = &cWithFallback;
1180     } else {
1181         ASSERT_EQ(compilationResult, Result::NO_ERROR);
1182         c2 = &cNoFallback;
1183     }
1184 
1185 #ifdef VERBOSE
1186     {
1187         std::cout << "signatures = " << signatures.size() << ", devices = " << devices.size()
1188                   << std::endl;
1189         // TODO: When dumping steps, include non-ExecutionSteps.
1190         const ExecutionPlan& plan = c2->getExecutionPlan();
1191         switch (plan.forTest_getKind()) {
1192             case ExecutionPlan::Kind::SIMPLE:
1193                 std::cout << "plan: simple" << std::endl;
1194                 break;
1195             case ExecutionPlan::Kind::COMPOUND: {
1196                 const auto& steps = plan.forTest_compoundGetSteps();
1197                 std::set<const Device*> devicesInPlan;
1198                 for (const auto& step : steps) {
1199                     if (const auto* executionStep = step->tryExecutionStep()) {
1200                         devicesInPlan.insert(executionStep->getDevice().get());
1201                     }
1202                 }
1203                 std::cout << "plan: compound, " << steps.size() << " steps over "
1204                           << devicesInPlan.size() << " devices" << std::endl;
1205                 for (unsigned i = 0; i < steps.size(); i++) {
1206                     if (const auto executionStep = steps[i]->tryExecutionStep()) {
1207                         std::cout << "Step " << i << ": "
1208                                   << ModelStats(executionStep->getStepModel())
1209                                   << ", device = " << executionStep->getDevice()->getName()
1210                                   << std::endl;
1211                     }
1212                 }
1213                 break;
1214             }
1215             default:
1216                 std::cout << "Unexpected plan kind: "
1217                           << static_cast<unsigned>(plan.forTest_getKind());
1218                 break;
1219         }
1220     }
1221 #endif
1222 
1223     // For execution:
1224     // - create master inputs (one long vector) and master output value
1225     //   - master inputs will be copied to actual inputs before each
1226     //     of the two executions
1227     //   - master output will be used to fill actual outputs before each
1228     //     of the two executions
1229     // - create actual inputs and outputs
1230     // - first execution (non-partitioned)
1231     //   - initialize inputs and (to avoid unrelated oddities) outputs
1232     //   - execute
1233     //   - copy outputs to a save area (one long vector)
1234     // - second execution (partitioned)
1235     //   - (to avoid unrelated oddities) initialize inputs and outputs
1236     //   - execute
1237     //   - compare outputs to save area
1238 
1239     // If the runtime and drivers are working properly, execution
1240     // should not change the inputs.  Nonetheless, we reinitialize the
1241     // inputs for each execution, so as to avoid unrelated problems
1242     // appearing to be problems related to unpartitioned execution
1243     // versus partitioned execution.  Similarly, execution behavior
1244     // should not be dependent on the outputs; but we'll initialize the
1245     // outputs anyway.
1246     std::vector<float> masterInputs(problemSize * problemSize * model.inputCount());
1247     std::generate(masterInputs.begin(), masterInputs.end(), [this] { return randFrac(); });
1248 #ifdef VERBOSE
1249     {
1250         std::cout << "flat inputs = ";
1251         dump(masterInputs.begin(), masterInputs.end());
1252     }
1253 #endif
1254     const float masterOutput = randFrac();
1255 
1256     // Create the memory for the actual inputs and outputs.
1257     struct InputOutputDescriptor {
1258         enum Kind { INPUT, OUTPUT };
1259         Kind mKind;
1260 
1261         // The input or output either resides in a local buffer
1262         // (mVector, in which case mMemoryRegion is ignored); or in a
1263         // shared memory region within a TestMemories instance
1264         // (mMemoryRegion, in which case mVector is ignored).
1265         enum Location { VECTOR, REGION };
1266         Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; }
1267 
1268         std::vector<float> mVector;
1269         unsigned mMemoryRegion;
1270     };
1271     std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount());
1272     for (unsigned i = 0; i < ioDescriptors.size(); i++) {
1273         ioDescriptors[i].mKind = (i < model.inputCount() ? InputOutputDescriptor::INPUT
1274                                                          : InputOutputDescriptor::OUTPUT);
1275     }
1276     //     We randomly interleave inputs and outputs in creation
1277     //     order, because when we we create memory regions in a
1278     //     TestMemories instance, the order in which regions are
1279     //     created within a single Memory is the order they'll be laid
1280     //     out in that memory; and when we have inputs and outputs
1281     //     within the same Memory, we want the possibility that
1282     //     they'll be interleaved.
1283     std::shuffle(ioDescriptors.begin(), ioDescriptors.end(), mRandNumEng);
1284     TestMemories ioMemories;
1285     for (auto& desc : ioDescriptors) {
1286         if (randFrac() < 0.5) {
1287             desc.mVector.resize(problemSize * problemSize);
1288         } else {
1289             // TODO: common this with the way we create IK_VALUE inputs?
1290             unsigned memoryIndex = ~0U;
1291             if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) {
1292                 memoryIndex = randUInt(ioMemories.memoryCount());
1293             } else {
1294                 memoryIndex = ioMemories.addMemory();
1295             }
1296             const size_t length = problemSize * problemSize * sizeof(float);
1297             desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length);
1298         }
1299     }
1300     ioMemories.layout();
1301 
1302     // Function to set up actual inputs and outputs (initializing them
1303     // and telling the WrapperExecution about them).
1304     auto prepareForExecution = [&model, &ioDescriptors, &ioMemories, &masterInputs, &masterOutput,
1305                                 problemSize, &problemType](WrapperExecution* e) {
1306         uint32_t inputIndex = 0, outputIndex = 0;
1307         for (auto& desc : ioDescriptors) {
1308             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1309                 if (desc.mKind == InputOutputDescriptor::INPUT) {
1310                     const size_t inputOffset = inputIndex * problemSize * problemSize;
1311                     std::copy(masterInputs.begin() + inputOffset,
1312                               masterInputs.begin() + inputOffset + problemSize * problemSize,
1313                               desc.mVector.begin());
1314                     e->setInput(inputIndex++, desc.mVector.data(),
1315                                 desc.mVector.size() * sizeof(float));
1316                 } else {
1317                     std::fill(desc.mVector.begin(),
1318                               desc.mVector.begin() + problemSize * problemSize, masterOutput);
1319                     e->setOutput(outputIndex++, desc.mVector.data(),
1320                                  desc.mVector.size() * sizeof(float), &problemType.operandType);
1321                 }
1322             } else {
1323                 const WrapperMemory* memory;
1324                 uint32_t offset, length;
1325                 float* region = static_cast<float*>(
1326                         ioMemories.getRegion(desc.mMemoryRegion, &memory, &offset, &length));
1327                 CHECK(length == problemSize * problemSize * sizeof(float));
1328                 if (desc.mKind == InputOutputDescriptor::INPUT) {
1329                     const size_t inputOffset = inputIndex * problemSize * problemSize;
1330                     std::copy(masterInputs.begin() + inputOffset,
1331                               masterInputs.begin() + inputOffset + problemSize * problemSize,
1332                               region);
1333                     e->setInputFromMemory(inputIndex++, memory, offset, length);
1334                 } else {
1335                     std::fill(region, region + problemSize * problemSize, masterOutput);
1336                     e->setOutputFromMemory(outputIndex++, memory, offset, length,
1337                                            &problemType.operandType);
1338                 }
1339             }
1340         };
1341         CHECK(inputIndex == model.inputCount());
1342         CHECK(outputIndex == model.outputCount());
1343     };
1344 
1345     // Non-partitioned execution.
1346     WrapperExecution e(&c);
1347     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e));
1348     ASSERT_EQ(e.compute(), Result::NO_ERROR);
1349 
1350     // Copy the outputs of the non-partitioned execution to a save area.
1351     std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount());
1352     {
1353         uint32_t outputIndex = 0;
1354         for (const auto& desc : ioDescriptors) {
1355             if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1356                 continue;
1357             }
1358             const size_t outputOffset = outputIndex * problemSize * problemSize;
1359             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1360                 std::copy(desc.mVector.begin(), desc.mVector.end(),
1361                           nonPartitionedOutputs.begin() + outputOffset);
1362             } else {
1363                 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1364                 std::copy(region, region + problemSize * problemSize,
1365                           nonPartitionedOutputs.begin() + outputOffset);
1366             }
1367 #ifdef VERBOSE
1368             {
1369                 std::cout << "nonpartitioned output[" << outputIndex << "] = ";
1370                 dump(nonPartitionedOutputs.begin() + outputOffset,
1371                      nonPartitionedOutputs.begin() + outputOffset + problemSize * problemSize);
1372             }
1373 #endif
1374             outputIndex++;
1375         }
1376     }
1377 
1378     // Partitioned execution.
1379     WrapperExecution e2(c2);
1380     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
1381     ASSERT_EQ(e2.compute(), Result::NO_ERROR);
1382 
1383     // Compare the outputs of the partitioned execution to the save
1384     // area containing the outpus of the non-partitioned execution.
1385     {
1386         uint32_t outputIndex = 0;
1387         for (const auto& desc : ioDescriptors) {
1388             if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1389                 continue;
1390             }
1391             SCOPED_TRACE(outputIndex);
1392             const size_t outputOffset = outputIndex * problemSize * problemSize;
1393             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1394 #ifdef VERBOSE
1395                 std::cout << "   partitioned output[" << outputIndex << "] = ";
1396                 dump(desc.mVector.begin(), desc.mVector.end());
1397 #endif
1398                 ASSERT_TRUE(std::equal(desc.mVector.begin(), desc.mVector.end(),
1399                                        nonPartitionedOutputs.begin() + outputOffset));
1400             } else {
1401                 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1402 #ifdef VERBOSE
1403                 std::cout << "part output[" << outputIndex << "] = ";
1404                 dump(region, region + problemSize * problemSize);
1405 #endif
1406                 ASSERT_TRUE(std::equal(region, region + problemSize * problemSize,
1407                                        nonPartitionedOutputs.begin() + outputOffset));
1408             }
1409             outputIndex++;
1410         }
1411     }
1412 }
1413 
1414 }  // namespace
1415 }  // namespace android
1416