1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <android-base/logging.h>
18 #include <gtest/gtest.h>
19 #include <unistd.h>
20
21 #include <algorithm>
22 #include <cassert>
23 #include <cstdio>
24 #include <iterator>
25 #include <map>
26 #include <memory>
27 #include <random>
28 #include <set>
29 #include <string>
30 #include <tuple>
31 #include <utility>
32 #include <vector>
33
34 #include "CompilationBuilder.h"
35 #include "HalInterfaces.h"
36 #include "Manager.h"
37 #include "ModelBuilder.h"
38 #include "NeuralNetworks.h"
39 #include "SampleDriver.h"
40 #include "TestNeuralNetworksWrapper.h"
41 #include "Utils.h"
42 #include "ValidateHal.h"
43
44 // Uncomment the following line to generate some debugging output that
45 // may be useful when analyzing failures:
46 //
47 // #define VERBOSE VERBOSE
48
49 // We randomly generate tests (model + input data) at runtime, and verify
50 // that we get the same results whether we do partitioned compilation/execution
51 // or non partitioned compilation/execution. We perform a test as follows:
52 //
53 // (1) Randomly generate a model (graph and weights), randomly generate input
54 // data, randomly assign inputs and outputs to CPU memory or to shared
55 // memory.
56 //
57 // Randomly leaves dimensions unset for intermediate operands.
58 //
59 // (2) Randomly generate drivers based on the sample driver, each of which
60 // executes models on the CPU. They differ according to which operations
61 // they support.
62 //
63 // (3) Compile and execute without partitioning, saving off the results.
64 //
65 // (4) Compile and execute with partitioning.
66 //
67 // (5) Verify that the saved results from (3) match the results from (4).
68 //
69 // For simplicity, all data (model inputs, model outputs, weights,
70 // temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two
71 // dimensions are fixed throughout a particular test case (and
72 // randomly determined). This prevents us from having to find a
73 // mechanism to "resize" data (e.g., if ADD#a operates on data of size
74 // 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a
75 // and ADD#b become inputs of ADD#c, do we need to insert one or more
76 // operations between (say) ADD#a and ADD#c to convert ADD#2's data
77 // from size 2x2 to size 3x3 in order to match ADD#b). In the few
78 // cases where an operand cannot be of this type, it is a constant
79 // (e.g., activation functions and RNN bias).
80 //
81 // Each operation we generate has a signature (described in more
82 // detail later). The randomly generated drivers decide which
83 // operations they can execute by checking operation signatures. Once
84 // we have built the model and know the set of signatures, we randomly
85 // assign each signature to a driver. No signature is supported by
86 // multiple drivers -- we're not testing the logic that the
87 // partitioning algorithm uses to select the best driver for an
88 // operation.
89
90 namespace android {
91
92 using namespace nn::hal;
93 using CompilationBuilder = nn::CompilationBuilder;
94 using Device = nn::Device;
95 using DeviceManager = nn::DeviceManager;
96 using ExecutionPlan = nn::ExecutionPlan;
97 using HalVersion = nn::HalVersion;
98 using HidlModel = V1_3::Model;
99 using ModelBuilder = nn::ModelBuilder;
100 using Result = nn::test_wrapper::Result;
101 using SampleDriver = nn::sample_driver::SampleDriver;
102 using WrapperCompilation = nn::test_wrapper::Compilation;
103 using WrapperExecution = nn::test_wrapper::Execution;
104 using WrapperMemory = nn::test_wrapper::Memory;
105 using WrapperModel = nn::test_wrapper::Model;
106 using WrapperOperandType = nn::test_wrapper::OperandType;
107 using WrapperType = nn::test_wrapper::Type;
108
109 namespace {
110
111 /// Configure test size //////////////////////////////////////////////////////////
112
113 // We may exceed this in order to connect otherwise disjoint subgraphs.
114 static const unsigned kMaxNumOperations = 100;
115
116 // We build models to process 2-D square tensors up to this size in each dimension;
117 // note that the API promotes by-value weights larger than 128 to by-reference,
118 // so we want to ensure that we can pick both types that exceed and types that do
119 // not exceed this size.
120 static const unsigned kMaxProblemSize = 8;
121
122 // First seed for pseudorandom test generation.
123 static const unsigned kFirstSeed = 0;
124
125 // Number of test cases.
126 static const unsigned kNumTestCases = 225;
127
128 // Force all graph weights into a single pool (as we recommend to users)
129 // or allow them to be distributed across multiple pools (more stress
130 // on the partitioning algorithm and the rest of the runtime)?
131 // Forcing all graph weights into a single pool may be necessary to
132 // prevent large graphs from running up against http://b/70302693
133 // "NNAPI overuses (?) fds".
134 static const bool kAllWeightsInOnePool = false;
135
136 //////////////////////////////////////////////////////////////////////////////////
137
138 // The signature of an operation consists of the operation type (e.g.,
139 // ADD) and the activation function (use -1 in the case of an
140 // operation type for which the activation function is inapplicable).
141 typedef std::pair<ANeuralNetworksOperationType, int> Signature;
142
143 // This class adds some simple utilities on top of WrapperModel. For example,
144 // it provides access to certain features from ModelBuilder that are not exposed
145 // by the base class (such as inputCount() and operation index).
146 class TestModel : public WrapperModel {
147 public:
addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)148 uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
149 const std::vector<uint32_t>& outputs) {
150 const uint32_t operationIndex = operationCount();
151 mOperations.push_back(outputs);
152 WrapperModel::addOperation(type, inputs, outputs);
153 return operationIndex;
154 }
155
operationCount() const156 uint32_t operationCount() const { return mOperations.size(); }
157
inputCount() const158 uint32_t inputCount() const { return builder()->inputCount(); }
outputCount() const159 uint32_t outputCount() const { return builder()->outputCount(); }
160
getOperationOutputs(uint32_t index) const161 const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const {
162 CHECK(index < mOperations.size());
163 return mOperations[index];
164 }
165
166 // All values are immediately copied into the model (we need to do
167 // this ourselves in cases where the underlying NNAPI does not).
setOperandValue(uint32_t index,const std::vector<float> & value)168 void setOperandValue(uint32_t index, const std::vector<float>& value) {
169 const size_t length = value.size() * sizeof(float);
170
171 if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
172 WrapperModel::setOperandValue(index, value.data(), length);
173 } else {
174 mOperandValues.push_back(value);
175 WrapperModel::setOperandValue(index, mOperandValues.back().data(), length);
176 }
177 }
178
setOperandValue(uint32_t index,const std::vector<int32_t> & value)179 void setOperandValue(uint32_t index, const std::vector<int32_t>& value) {
180 const size_t length = value.size() * sizeof(int32_t);
181
182 CHECK(length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
183 WrapperModel::setOperandValue(index, value.data(), length);
184 }
185
setOperandValue(uint32_t index,int32_t value)186 void setOperandValue(uint32_t index, int32_t value) {
187 CHECK(sizeof(value) <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
188 WrapperModel::setOperandValue(index, &value, sizeof(value));
189 }
190
191 private:
builder() const192 const ModelBuilder* builder() const {
193 return reinterpret_cast<const ModelBuilder*>(getHandle());
194 }
195
196 // Representation of operations: vector index is operation number,
197 // vector value is operation's output operands.
198 std::vector<std::vector<uint32_t>> mOperations;
199
200 // Large operand values -- not immediately copied into the
201 // WrapperModel, so remembered here instead.
202 std::vector<std::vector<float>> mOperandValues;
203 };
204
205 // This class adds some simple utilities on top of WrapperCompilation in order
206 // to provide access to certain features from CompilationBuilder that are not
207 // exposed by the base class.
208 class TestCompilation : public WrapperCompilation {
209 public:
TestCompilation(const WrapperModel * model)210 TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {}
211
TestCompilation(const WrapperModel * model,std::vector<std::shared_ptr<Device>> devices)212 TestCompilation(const WrapperModel* model, std::vector<std::shared_ptr<Device>> devices) {
213 ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
214 CompilationBuilder* c = nullptr;
215 int result = m->createCompilation(&c, devices);
216 EXPECT_EQ(result, 0);
217 mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
218 }
219
220 using WrapperCompilation::finish;
221
setPartitioning(uint32_t partitioning)222 Result setPartitioning(uint32_t partitioning) {
223 return static_cast<Result>(builder()->setPartitioning(partitioning));
224 }
225
getExecutionPlan() const226 const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); }
227
228 private:
builder() const229 const CompilationBuilder* builder() const {
230 return reinterpret_cast<const CompilationBuilder*>(getHandle());
231 }
builder()232 CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); }
233 };
234
235 // This class is used to manage a collection of memory regions,
236 // disjoint windows onto a set of Memory instances, each of which is
237 // associated with a single shared memory region. Each region and
238 // Memory instance is assigned a number. The usage pattern is as
239 // follows:
240 // - Call addMemory() and addRegion() as many times as needed to
241 // declare (but not define) Memory instances and declare region
242 // instances.
243 // - Call layout() to define the Memory instances.
244 // - Call getRegion() as many times as needed to get the details
245 // of memory regions (such as address, or Memory/offset/length).
246 // The Memory instances created by layout() are owned by the
247 // TestMemories instance, and are destroyed when the TestMemories
248 // instance is destroyed.
249 class TestMemories {
250 public:
251 TestMemories() = default;
252
253 TestMemories(const TestMemories&) = delete;
254 TestMemories& operator=(const TestMemories&) = delete;
255
addMemory()256 unsigned addMemory() {
257 CHECK(!mLayoutDone);
258 mMemorySizes.push_back(0);
259 return memoryCount() - 1;
260 }
memoryCount() const261 unsigned memoryCount() const { return mMemorySizes.size(); }
262
addRegion(unsigned memoryIndex,uint32_t length)263 unsigned addRegion(unsigned memoryIndex, uint32_t length) {
264 CHECK(!mLayoutDone);
265 CHECK(memoryIndex < memoryCount());
266 uint32_t& memorySize = mMemorySizes[memoryIndex];
267 auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length);
268 mRegions.push_back(desc);
269 memorySize += length;
270 return regionCount() - 1;
271 }
regionCount() const272 unsigned regionCount() const { return mRegions.size(); }
273
274 void layout();
275
getRegion(unsigned regionIndex,const WrapperMemory ** pMemory,uint32_t * pOffset,uint32_t * pLength)276 void* getRegion(unsigned regionIndex, const WrapperMemory** pMemory, uint32_t* pOffset,
277 uint32_t* pLength) {
278 CHECK(mLayoutDone);
279 CHECK(regionIndex < regionCount());
280 const auto& regionDescriptor = mRegions[regionIndex];
281 const WrapperMemory* memory = &mMemories[std::get<0>(regionDescriptor)];
282 uint32_t offset = std::get<1>(regionDescriptor);
283 uint32_t length = std::get<2>(regionDescriptor);
284
285 uint8_t* buffer = reinterpret_cast<nn::MemoryAshmem*>(memory->get())->getPointer();
286 CHECK(buffer != nullptr);
287
288 if (pMemory) *pMemory = memory;
289 if (pOffset) *pOffset = offset;
290 if (pLength) *pLength = length;
291
292 return buffer + offset;
293 }
294
getRegion(unsigned regionIndex)295 void* getRegion(unsigned regionIndex) {
296 return getRegion(regionIndex, nullptr, nullptr, nullptr);
297 }
298
299 private:
300 // Index is the memory index; value is the size of the memory
301 // (aggregate size of all regions in the memory).
302 std::vector<uint32_t> mMemorySizes;
303
304 // Index is the memory index.
305 std::vector<WrapperMemory> mMemories;
306
307 // Index is the region index; tuple represents memory index,
308 // region offset within memory, region length.
309 std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions;
310
311 // For sanity checking.
312 bool mLayoutDone = false;
313 };
314
layout()315 void TestMemories::layout() {
316 CHECK(!mLayoutDone);
317 for (uint32_t memorySize : mMemorySizes) {
318 auto [n, ashmem] = nn::MemoryAshmem::create(memorySize);
319 CHECK_EQ(n, ANEURALNETWORKS_NO_ERROR);
320 CHECK(ashmem != nullptr);
321
322 ANeuralNetworksMemory* memory = reinterpret_cast<ANeuralNetworksMemory*>(ashmem.release());
323 mMemories.emplace_back(memory);
324 }
325 mLayoutDone = true;
326 }
327
328 class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> {
329 public:
RandomPartitioningTest()330 RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {}
331
332 static Signature getSignature(const HidlModel& model, const Operation& operation);
333
334 protected:
335 static V1_0::IDevice* makeTestDriver(HalVersion version, const char* name,
336 std::set<Signature> signatures);
337
338 static HalVersion getMinHalVersion(ANeuralNetworksOperationType type);
339
340 static std::string to_string(HalVersion version);
341
randBool()342 bool randBool() { return randUInt(2) == 1; }
343
randFrac()344 double randFrac() { // [0.0, 1.0)
345 return mRandNumUnitDist(mRandNumEng);
346 }
347
randUInt(unsigned limit)348 unsigned randUInt(unsigned limit) { // [0, limit)
349 return unsigned(randFrac() * limit);
350 }
351
352 // Represents an operation in which every input and output operand
353 // is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except:
354 // - One input operand may be an activation function.
355 // - Any number of input operands may be "special" in some other way
356 // (and in this implementation, not produced by any other operation).
357 // We require that:
358 // - There be at least one input operand that is neither an
359 // activation function nor "special".
360 struct OperationPattern {
361 HalVersion mMinHalVersion;
362 int mOperationType;
363 unsigned mNumInputs;
364 unsigned mNumOutputs;
365 int mActivationFunctionInputIndex; // <0 if none
366
367 // Returns operand index, or <0 if input is normal (must not
368 // be called for an activation function operand). Function
369 // should have the following prototype:
370 //
371 // int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex);
372 //
373 int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned);
374 };
375
376 static const OperationPattern kOperationPatterns[];
377
378 // See OperationPattern::mMakeSpecialInput. This function is used to
379 // manufacture an ELU input operand that doesn't fit the general operand
380 // pattern known to the graph generator infrastructure.
makeEluSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)381 int makeEluSpecialInput([[maybe_unused]] unsigned problemSize, TestModel* model,
382 unsigned inputIndex) {
383 if (inputIndex != 1) {
384 return -1;
385 }
386
387 // input operand 1 is alpha, a scalar
388 const WrapperOperandType alphaType(WrapperType::FLOAT32, {});
389 return int(model->addConstantOperand(&alphaType, 1.0f));
390 }
391
392 // See OperationPattern::mMakeSpecialInput. This function is used to
393 // manufacture an RNN input operand that doesn't fit the general operand
394 // pattern known to the graph generator infrastructure.
makeRnnSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)395 int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) {
396 if (inputIndex != 3) {
397 return -1;
398 }
399
400 // input operand 3 is bias, a 1-D tensor
401 const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, {problemSize});
402 const uint32_t operandIndex = model->addOperand(&biasType);
403 std::vector<float> biasValue(problemSize);
404 std::generate(biasValue.begin(), biasValue.end(), [this] { return randFrac(); });
405 model->setOperandValue(operandIndex, biasValue);
406 return int(operandIndex);
407 }
408
409 // See OperationPattern::mMakeSpecialInput. This function is used to
410 // manufacture a TRANSPOSE input operand that doesn't fit the general operand
411 // pattern known to the graph generator infrastructure.
makeTransposeSpecialInput(unsigned,TestModel * model,unsigned inputIndex)412 int makeTransposeSpecialInput(unsigned /* problemSize */, TestModel* model,
413 unsigned inputIndex) {
414 if (inputIndex != 1) {
415 return -1;
416 }
417
418 // input operand 1 is perm, a 1-D tensor
419 const WrapperOperandType permType(WrapperType::TENSOR_INT32, {2});
420 const uint32_t operandIndex = model->addOperand(&permType);
421 std::vector<int32_t> permValue = {1, 0};
422 model->setOperandValue(operandIndex, permValue);
423 return int(operandIndex);
424 }
425
426 #ifdef VERBOSE
427 class ModelStats {
428 public:
ModelStats(const ModelBuilder * model)429 ModelStats(const ModelBuilder* model) : mBuilder(model) {}
ModelStats(const WrapperModel * model)430 ModelStats(const WrapperModel* model)
431 : mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) {}
operator <<(std::ostream & out,const ModelStats & stats)432 friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) {
433 const uint32_t operandCount = stats.mBuilder->operandCount();
434 const uint32_t inputCount = stats.mBuilder->inputCount();
435 const uint32_t outputCount = stats.mBuilder->outputCount();
436 out << "operationCount = " << stats.mBuilder->operationCount()
437 << ", operandCount = " << operandCount << ", inputCount = " << inputCount << " ("
438 << (double(inputCount) / operandCount) << ")"
439 << ", outputCount = " << outputCount << " (" << (double(outputCount) / operandCount)
440 << ")";
441 return out;
442 }
443
444 private:
445 const ModelBuilder* mBuilder;
446 };
447
448 template <typename T_iterator>
dump(T_iterator I,T_iterator E)449 static void dump(T_iterator I, T_iterator E) {
450 std::cout << "{";
451 for (; I != E; I++) {
452 std::cout << " " << *I;
453 }
454 std::cout << " }" << std::endl;
455 }
456 #endif
457
458 std::mt19937 mRandNumEng;
459
460 private:
461 std::uniform_real_distribution<double> mRandNumUnitDist;
462 };
463
464 const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = {
465 {HalVersion::V1_0, ANEURALNETWORKS_ADD, 3, 1, 2, nullptr},
466 {HalVersion::V1_0, ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr},
467 {HalVersion::V1_0, ANEURALNETWORKS_MUL, 3, 1, 2, nullptr},
468 {HalVersion::V1_0, ANEURALNETWORKS_RNN, 6, 2, 5,
469 &RandomPartitioningTest::makeRnnSpecialInput},
470 {HalVersion::V1_0, ANEURALNETWORKS_TANH, 1, 1, -1, nullptr},
471
472 {HalVersion::V1_1, ANEURALNETWORKS_SUB, 3, 1, 2, nullptr},
473 {HalVersion::V1_1, ANEURALNETWORKS_TRANSPOSE, 2, 1, -1,
474 &RandomPartitioningTest::makeTransposeSpecialInput},
475
476 {HalVersion::V1_2, ANEURALNETWORKS_MAXIMUM, 2, 1, -1, nullptr},
477 {HalVersion::V1_2, ANEURALNETWORKS_NEG, 1, 1, -1, nullptr},
478 {HalVersion::V1_2, ANEURALNETWORKS_SIN, 1, 1, -1, nullptr},
479
480 {HalVersion::V1_3, ANEURALNETWORKS_ELU, 2, 1, -1,
481 &RandomPartitioningTest::makeEluSpecialInput},
482 {HalVersion::V1_3, ANEURALNETWORKS_HARD_SWISH, 1, 1, -1, nullptr},
483 };
484
getMinHalVersion(ANeuralNetworksOperationType type)485 HalVersion RandomPartitioningTest::getMinHalVersion(ANeuralNetworksOperationType type) {
486 static const auto kOperationToVersion = [] {
487 std::map<ANeuralNetworksOperationType, HalVersion> result;
488 for (const auto& pattern : kOperationPatterns) {
489 result[pattern.mOperationType] = pattern.mMinHalVersion;
490 }
491 return result;
492 }();
493
494 return kOperationToVersion.at(type);
495 }
496
getSignature(const HidlModel & model,const Operation & operation)497 Signature RandomPartitioningTest::getSignature(const HidlModel& model, const Operation& operation) {
498 static const auto kOperationToActivation = [] {
499 std::map<ANeuralNetworksOperationType, int> result;
500 for (const auto& pattern : kOperationPatterns) {
501 result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex;
502 }
503 return result;
504 }();
505
506 const ANeuralNetworksOperationType operationType =
507 static_cast<ANeuralNetworksOperationType>(operation.type);
508 const int activationFunctionInputIndex = kOperationToActivation.at(operationType);
509 if (activationFunctionInputIndex < 0) {
510 return Signature(operationType, -1);
511 }
512
513 const Operand& operand = model.main.operands[operation.inputs[activationFunctionInputIndex]];
514 CHECK(operand.lifetime == OperandLifeTime::CONSTANT_COPY);
515 CHECK(operand.type == OperandType::INT32);
516 int32_t value;
517 memcpy(&value, &model.operandValues[operand.location.offset], operand.location.length);
518 return Signature(operationType, value);
519 }
520
to_string(HalVersion version)521 std::string RandomPartitioningTest::to_string(HalVersion version) {
522 switch (version) {
523 case HalVersion::V1_0:
524 return "V1_0";
525 case HalVersion::V1_1:
526 return "V1_1";
527 case HalVersion::V1_2:
528 return "V1_2";
529 case HalVersion::V1_3:
530 return "V1_3";
531 default:
532 return "V_UNKNOWN";
533 }
534 };
535
536 class TestDriver : public SampleDriver {
537 public:
538 // Behaves like SampleDriver, except that it only supports
539 // operations with the specified signatures.
TestDriver(const char * name,std::set<Signature> signatures)540 TestDriver(const char* name, std::set<Signature> signatures)
541 : SampleDriver(name), mSignatures(std::move(signatures)) {}
542
getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb)543 Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override {
544 android::nn::initVLogMask();
545 const PerformanceInfo kPerf = {.execTime = 0.75f, .powerUsage = 0.75f};
546 Capabilities capabilities = {
547 .relaxedFloat32toFloat16PerformanceScalar = kPerf,
548 .relaxedFloat32toFloat16PerformanceTensor = kPerf,
549 .operandPerformance = nn::nonExtensionOperandPerformance<HalVersion::V1_3>(kPerf),
550 .ifPerformance = kPerf,
551 .whilePerformance = kPerf};
552 _hidl_cb(V1_3::ErrorStatus::NONE, capabilities);
553 return Void();
554 }
555
getSupportedOperations_1_3(const HidlModel & model,getSupportedOperations_1_3_cb cb)556 Return<void> getSupportedOperations_1_3(const HidlModel& model,
557 getSupportedOperations_1_3_cb cb) override {
558 if (nn::validateModel(model)) {
559 const size_t count = model.main.operations.size();
560 std::vector<bool> supported(count);
561 for (size_t i = 0; i < count; i++) {
562 supported[i] = (mSignatures.count(RandomPartitioningTest::getSignature(
563 model, model.main.operations[i])) != 0);
564 }
565 cb(V1_3::ErrorStatus::NONE, supported);
566 } else {
567 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {});
568 }
569 return Void();
570 }
571
prepareModel_1_3(const HidlModel & model,ExecutionPreference preference,Priority priority,const OptionalTimePoint & deadline,const hidl_vec<hidl_handle> & modelCache,const hidl_vec<hidl_handle> & dataCache,const CacheToken & token,const sp<V1_3::IPreparedModelCallback> & callback)572 Return<V1_3::ErrorStatus> prepareModel_1_3(
573 const HidlModel& model, ExecutionPreference preference, Priority priority,
574 const OptionalTimePoint& deadline, const hidl_vec<hidl_handle>& modelCache,
575 const hidl_vec<hidl_handle>& dataCache, const CacheToken& token,
576 const sp<V1_3::IPreparedModelCallback>& callback) override {
577 // NOTE: We verify that all operations in the model are supported.
578 V1_3::ErrorStatus outStatus = V1_3::ErrorStatus::INVALID_ARGUMENT;
579 auto ret = getSupportedOperations_1_3(
580 model, [&outStatus](V1_3::ErrorStatus inStatus,
581 const hidl_vec<bool>& supportedOperations) {
582 if (inStatus == V1_3::ErrorStatus::NONE) {
583 if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
584 [](bool v) { return v; })) {
585 outStatus = V1_3::ErrorStatus::NONE;
586 }
587 }
588 });
589 if (ret.isOk() && (outStatus == V1_3::ErrorStatus::NONE)) {
590 return SampleDriver::prepareModel_1_3(model, preference, priority, deadline, modelCache,
591 dataCache, token, callback);
592 } else {
593 callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
594 return V1_3::ErrorStatus::INVALID_ARGUMENT;
595 }
596 }
597
598 private:
599 const std::set<Signature> mSignatures;
600 };
601
602 // Like TestDriver, but implementing 1.2
603 class TestDriverV1_2 : public V1_2::IDevice {
604 public:
TestDriverV1_2(const char * name,std::set<Signature> signatures)605 TestDriverV1_2(const char* name, std::set<Signature> signatures)
606 : mLatestDriver(new TestDriver(name, std::move(signatures))) {}
getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb)607 Return<void> getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb) override {
608 return mLatestDriver->getCapabilities_1_2(_hidl_cb);
609 }
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb _hidl_cb)610 Return<void> getSupportedOperations_1_2(const V1_2::Model& model,
611 getSupportedOperations_1_2_cb _hidl_cb) override {
612 return mLatestDriver->getSupportedOperations_1_2(model, _hidl_cb);
613 }
prepareModel_1_2(const V1_2::Model & model,ExecutionPreference preference,const hidl_vec<hidl_handle> & modelCache,const hidl_vec<hidl_handle> & dataCache,const CacheToken & token,const sp<V1_2::IPreparedModelCallback> & actualCallback)614 Return<V1_0::ErrorStatus> prepareModel_1_2(
615 const V1_2::Model& model, ExecutionPreference preference,
616 const hidl_vec<hidl_handle>& modelCache, const hidl_vec<hidl_handle>& dataCache,
617 const CacheToken& token,
618 const sp<V1_2::IPreparedModelCallback>& actualCallback) override {
619 return mLatestDriver->prepareModel_1_2(model, preference, modelCache, dataCache, token,
620 actualCallback);
621 }
getVersionString(getVersionString_cb _hidl_cb)622 Return<void> getVersionString(getVersionString_cb _hidl_cb) override {
623 return mLatestDriver->getVersionString(_hidl_cb);
624 }
getType(getType_cb _hidl_cb)625 Return<void> getType(getType_cb _hidl_cb) override { return mLatestDriver->getType(_hidl_cb); }
getSupportedExtensions(getSupportedExtensions_cb _hidl_cb)626 Return<void> getSupportedExtensions(getSupportedExtensions_cb _hidl_cb) {
627 return mLatestDriver->getSupportedExtensions(_hidl_cb);
628 }
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb)629 Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb) {
630 return mLatestDriver->getNumberOfCacheFilesNeeded(_hidl_cb);
631 }
prepareModelFromCache(const hidl_vec<hidl_handle> & modelCache,const hidl_vec<hidl_handle> & dataCache,const CacheToken & token,const sp<V1_2::IPreparedModelCallback> & callback)632 Return<V1_0::ErrorStatus> prepareModelFromCache(
633 const hidl_vec<hidl_handle>& modelCache, const hidl_vec<hidl_handle>& dataCache,
634 const CacheToken& token, const sp<V1_2::IPreparedModelCallback>& callback) {
635 return mLatestDriver->prepareModelFromCache(modelCache, dataCache, token, callback);
636 }
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)637 Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
638 return mLatestDriver->getCapabilities_1_1(_hidl_cb);
639 }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)640 Return<void> getSupportedOperations_1_1(const V1_1::Model& model,
641 getSupportedOperations_1_1_cb _hidl_cb) override {
642 return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
643 }
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)644 Return<V1_0::ErrorStatus> prepareModel_1_1(
645 const V1_1::Model& model, ExecutionPreference preference,
646 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
647 return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
648 }
getStatus()649 Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)650 Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
651 return mLatestDriver->getCapabilities(_hidl_cb);
652 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)653 Return<void> getSupportedOperations(const V1_0::Model& model,
654 getSupportedOperations_cb _hidl_cb) override {
655 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
656 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)657 Return<V1_0::ErrorStatus> prepareModel(
658 const V1_0::Model& model,
659 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
660 return mLatestDriver->prepareModel(model, actualCallback);
661 }
662
663 private:
664 const sp<V1_3::IDevice> mLatestDriver;
665 };
666
667 // Like TestDriver, but implementing 1.1
668 class TestDriverV1_1 : public V1_1::IDevice {
669 public:
TestDriverV1_1(const char * name,std::set<Signature> signatures)670 TestDriverV1_1(const char* name, std::set<Signature> signatures)
671 : mLatestDriver(new TestDriver(name, std::move(signatures))) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)672 Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
673 return mLatestDriver->getCapabilities_1_1(_hidl_cb);
674 }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)675 Return<void> getSupportedOperations_1_1(const V1_1::Model& model,
676 getSupportedOperations_1_1_cb _hidl_cb) override {
677 return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
678 }
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)679 Return<V1_0::ErrorStatus> prepareModel_1_1(
680 const V1_1::Model& model, ExecutionPreference preference,
681 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
682 return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
683 }
getStatus()684 Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)685 Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
686 return mLatestDriver->getCapabilities(_hidl_cb);
687 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)688 Return<void> getSupportedOperations(const V1_0::Model& model,
689 getSupportedOperations_cb _hidl_cb) override {
690 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
691 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)692 Return<V1_0::ErrorStatus> prepareModel(
693 const V1_0::Model& model,
694 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
695 return mLatestDriver->prepareModel(model, actualCallback);
696 }
697
698 private:
699 const sp<V1_3::IDevice> mLatestDriver;
700 };
701
702 // Like TestDriver, but implementing 1.0
703 class TestDriverV1_0 : public V1_0::IDevice {
704 public:
TestDriverV1_0(const char * name,std::set<Signature> signatures)705 TestDriverV1_0(const char* name, std::set<Signature> signatures)
706 : mLatestDriver(new TestDriver(name, std::move(signatures))) {}
getCapabilities(getCapabilities_cb _hidl_cb)707 Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
708 return mLatestDriver->getCapabilities(_hidl_cb);
709 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)710 Return<void> getSupportedOperations(const V1_0::Model& model,
711 getSupportedOperations_cb _hidl_cb) override {
712 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
713 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)714 Return<V1_0::ErrorStatus> prepareModel(
715 const V1_0::Model& model,
716 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
717 return mLatestDriver->prepareModel(model, actualCallback);
718 }
getStatus()719 Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
720
721 private:
722 const sp<V1_3::IDevice> mLatestDriver;
723 };
724
makeTestDriver(HalVersion version,const char * name,std::set<Signature> signatures)725 V1_0::IDevice* RandomPartitioningTest::makeTestDriver(HalVersion version, const char* name,
726 std::set<Signature> signatures) {
727 switch (version) {
728 case HalVersion::V1_0:
729 return new TestDriverV1_0(name, std::move(signatures));
730 case HalVersion::V1_1:
731 return new TestDriverV1_1(name, std::move(signatures));
732 case HalVersion::V1_2:
733 return new TestDriverV1_2(name, std::move(signatures));
734 case HalVersion::V1_3:
735 return new TestDriver(name, std::move(signatures));
736 default:
737 ADD_FAILURE() << "Unexpected HalVersion " << static_cast<int32_t>(version);
738 return nullptr;
739 }
740 }
741
742 INSTANTIATE_TEST_CASE_P(Seed, RandomPartitioningTest,
743 ::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases));
744
TEST_P(RandomPartitioningTest,Test)745 TEST_P(RandomPartitioningTest, Test) {
746 LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam();
747
748 #ifdef VERBOSE
749 std::cout << std::setprecision(2) << std::fixed << std::setw(4);
750 #endif
751
752 const unsigned problemSize = 1 + randUInt(kMaxProblemSize);
753 const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, {problemSize, problemSize});
754 const WrapperOperandType unknownDimensionsType(WrapperType::TENSOR_FLOAT32, {0, 0});
755
756 static const WrapperOperandType activationFunctionType(WrapperType::INT32, {});
757
758 const unsigned numOperations = 2 + randUInt(kMaxNumOperations - 1);
759 const bool allowDeadOperations = (randFrac() < 0.2);
760 const bool allowUnknownDimensions = (randFrac() < 0.25);
761
762 // TODO: The current algorithm builds the graph in a forward
763 // direction (i.e., later-generated operations consume outputs
764 // from earlier-generated operations). In order to get more
765 // variation in graph topology, perhaps we should also create an
766 // algorithm to build the graph in a backward direction (i.e.,
767 // later-generated operations produce outputs to be consumed by
768 // earlier-generated operations).
769 [[maybe_unused]] const bool buildForward = randBool();
770
771 // TODO: Add a form of forced connectivity that operates by
772 // joining disjoint subgraphs rather than by forcing a root.
773 const bool forceCommonRoot = (randFrac() < 0.75);
774
775 TestModel model;
776 std::vector<uint32_t> modelInputs;
777 std::vector<uint32_t> modelOutputs;
778
779 // Each region in weights is a problem-sized 2-D TENSOR_FLOAT32.
780 TestMemories weights;
781
782 // Keep track of all normal (i.e., not activation function and not
783 // "special") operands that are values (from setOperandValue*()).
784 // .first: operand index
785 // .second: if the operand is already defined (via setOperandValue*()) then ~0U;
786 // otherwise, the operand has yet to be defined, and this is the corresponding
787 // region index in "weights"
788 std::vector<std::pair<uint32_t, unsigned>> valueOperands;
789
790 // An operand is "dead" if it is not consumed by another operation
791 // and is not a model output. Key is operand index; value is
792 // operation index.
793 std::map<uint32_t, uint32_t> deadOperands;
794
795 // An operation is "dead" if all of its outputs are dead.
796 std::set<uint32_t> deadOperations;
797
798 // Collect the signatures of operations in this model.
799 std::set<Signature> signatures;
800
801 // For reporting purposes, keep track of the number of root
802 // operations (those that do not consume results produced by other
803 // operations).
804 unsigned rootOperationCount = 0;
805
806 // Track if we added operands with unknown dimensions. In this case,
807 // partitioned compilation will fail if such an operand is read in a
808 // different partition than it is written.
809 bool hasUnknownDimensions = false;
810
811 // Generate operations.
812 for (unsigned i = 0; i < numOperations; i++) {
813 const unsigned operationPatternIndex = randUInt(std::size(kOperationPatterns));
814 const auto& operationPattern = kOperationPatterns[operationPatternIndex];
815
816 // INPUTS //////////////////////////////////////////////////////////////////////////////////
817
818 std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U);
819
820 // First, process activation function and special inputs, and
821 // keep track of which inputs remain.
822 std::vector<uint32_t> normalOperationInputIndexes;
823 int32_t activationFunction = -1;
824 for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs;
825 operationInputIndex++) {
826 if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) {
827 const uint32_t operandIndex = model.addOperand(&activationFunctionType);
828 activationFunction = randUInt(4);
829 if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) {
830 // workaround for http://b/69011131
831 activationFunction = ANEURALNETWORKS_FUSED_NONE;
832 }
833 model.setOperandValue(operandIndex, activationFunction);
834 operationInputs[operationInputIndex] = operandIndex;
835 continue;
836 }
837 if (operationPattern.mMakeSpecialInput != nullptr) {
838 const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))(
839 problemSize, &model, operationInputIndex);
840 if (operandIndex >= 0) {
841 operationInputs[operationInputIndex] = operandIndex;
842 continue;
843 }
844 }
845 normalOperationInputIndexes.push_back(operationInputIndex);
846 }
847 CHECK(!normalOperationInputIndexes.empty());
848 signatures.insert(Signature(operationPattern.mOperationType, activationFunction));
849
850 // A (normal) operation input can be one of:
851 // - a new or existing model input
852 // - an output of an existing operation
853 // - an OperandValue
854 // - an OperandValueFromMemory
855 // Some guidelines:
856 // - We generally don't want all of an operation's inputs to be values (constants)
857 const unsigned normalOperationInputCount = normalOperationInputIndexes.size();
858 // How many of this operation's inputs are constants?
859 unsigned normalOperationInputConstantCount = 0;
860 // How many of this operation's inputs are model inputs?
861 unsigned normalOperationInputModelInputCount = 0;
862 // We begin by deciding what kind of input each (normal) operation will be; we don't
863 // actually pick input operand indexes at this time, because we might override this
864 // decision later.
865 enum InputKind { IK_SUBGRAPH_INPUT, IK_OPERATION_OUTPUT, IK_VALUE };
866 std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount);
867 std::generate(
868 normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
869 [this, &model, numOperations, normalOperationInputCount,
870 &normalOperationInputConstantCount,
871 &normalOperationInputModelInputCount]() -> InputKind {
872 // Constant? Becomes less likely the more
873 // constants we already have as inputs to
874 // this operation.
875 if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) /
876 normalOperationInputCount)) {
877 normalOperationInputConstantCount++;
878 return IK_VALUE;
879 }
880
881 // Model input? Becomes less likely the
882 // more model inputs we already have as
883 // inputs to this operation, and the further
884 // along we are in generating this model
885 // (i.e., the more operations we have
886 // generated).
887 if ((model.operationCount() == 0) ||
888 (randFrac() < 0.5 *
889 (1 - double(normalOperationInputModelInputCount) /
890 normalOperationInputCount) *
891 std::min(0.3, (1 - double(model.operationCount()) /
892 numOperations)))) {
893 normalOperationInputModelInputCount++;
894 return IK_SUBGRAPH_INPUT;
895 }
896
897 // Else output of an existing operation.
898 return IK_OPERATION_OUTPUT;
899 });
900
901 // Now force common root or model input, if necessary. (A
902 // model must have at least one input.)
903 auto force = [this, &normalOperationInputKinds,
904 normalOperationInputCount](InputKind forceKind) {
905 if (std::none_of(normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
906 [forceKind](InputKind kind) { return kind == forceKind; })) {
907 normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind;
908 }
909 };
910 if (forceCommonRoot && (model.operationCount() != 0)) {
911 force(IK_OPERATION_OUTPUT);
912 }
913 if (modelInputs.empty()) {
914 CHECK(model.operationCount() == 0);
915 force(IK_SUBGRAPH_INPUT);
916 }
917
918 // Finally create the normal inputs.
919 bool isRootOperation = true;
920 for (unsigned i = 0; i < normalOperationInputCount; i++) {
921 uint32_t operandIndex = ~0U;
922 switch (normalOperationInputKinds[i]) {
923 case IK_SUBGRAPH_INPUT: {
924 if (!modelInputs.empty() && (randFrac() < 0.5)) {
925 operandIndex = modelInputs[randUInt(modelInputs.size())];
926 } else {
927 operandIndex = model.addOperand(&problemType);
928 modelInputs.push_back(operandIndex);
929 }
930 break;
931 }
932 case IK_OPERATION_OUTPUT: {
933 decltype(deadOperands.begin()) deadOperandI;
934 if (!deadOperands.empty() && (randFrac() < 0.5)) {
935 deadOperandI = deadOperands.begin();
936 std::advance(deadOperandI, randUInt(deadOperands.size()));
937 operandIndex = deadOperandI->first;
938 } else {
939 const uint32_t existingOperationIndex = randUInt(model.operationCount());
940 const auto& existingOperationOutputs =
941 model.getOperationOutputs(existingOperationIndex);
942 operandIndex =
943 existingOperationOutputs[randUInt(existingOperationOutputs.size())];
944 deadOperandI = deadOperands.find(operandIndex);
945 CHECK(deadOperandI == deadOperands.end() ||
946 deadOperandI->second == existingOperationIndex);
947 }
948 if (deadOperandI != deadOperands.end()) {
949 const uint32_t correspondingOperation = deadOperandI->second;
950 deadOperands.erase(deadOperandI);
951
952 auto deadOperationI = deadOperations.find(correspondingOperation);
953 if (deadOperationI != deadOperations.end()) {
954 deadOperations.erase(deadOperationI);
955 }
956 }
957 isRootOperation = false;
958 break;
959 }
960 case IK_VALUE: {
961 if (!valueOperands.empty() && (randFrac() < 0.25)) {
962 operandIndex = valueOperands[randUInt(valueOperands.size())].first;
963 } else {
964 operandIndex = model.addOperand(&problemType);
965 if (randFrac() < 0.5) {
966 std::vector<float> value(problemSize * problemSize);
967 std::generate(value.begin(), value.end(),
968 [this] { return randFrac(); });
969 model.setOperandValue(operandIndex, value);
970 valueOperands.push_back(std::make_pair(operandIndex, ~0U));
971 } else {
972 unsigned memoryIndex = ~0U;
973 if ((weights.memoryCount() != 0) &&
974 (kAllWeightsInOnePool || (randFrac() < 0.5))) {
975 memoryIndex = randUInt(weights.memoryCount());
976 } else {
977 memoryIndex = weights.addMemory();
978 }
979 const size_t length = problemSize * problemSize * sizeof(float);
980 const unsigned regionIndex = weights.addRegion(memoryIndex, length);
981 valueOperands.push_back(std::make_pair(operandIndex, regionIndex));
982 }
983 }
984 break;
985 }
986 default:
987 FAIL();
988 }
989 operationInputs[normalOperationInputIndexes[i]] = operandIndex;
990 }
991 if (isRootOperation) {
992 rootOperationCount++;
993 }
994
995 // OUTPUTS /////////////////////////////////////////////////////////////////////////////////
996
997 std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
998 std::generate(operationOutputs.begin(), operationOutputs.end(),
999 [&model, &problemType, &unknownDimensionsType, &hasUnknownDimensions,
1000 allowUnknownDimensions, this] {
1001 // 3% unknowns causes ~35% of partitionings to fail
1002 // (determined by commenting out the fallback code,
1003 // running tests and noting number of failures).
1004 if (allowUnknownDimensions && randFrac() < 0.03) {
1005 hasUnknownDimensions = true;
1006 return model.addOperand(&unknownDimensionsType);
1007 } else {
1008 return model.addOperand(&problemType);
1009 }
1010 });
1011
1012 // OPERATION ///////////////////////////////////////////////////////////////////////////////
1013
1014 const uint32_t operationIndex = model.addOperation(operationPattern.mOperationType,
1015 operationInputs, operationOutputs);
1016 deadOperations.insert(operationIndex);
1017 std::for_each(operationOutputs.begin(), operationOutputs.end(),
1018 [&deadOperands, operationIndex](uint32_t operandIndex) {
1019 deadOperands.insert(std::make_pair(operandIndex, operationIndex));
1020 });
1021 }
1022
1023 // Now finalize the weights.
1024 weights.layout();
1025 for (const auto& valueOperand : valueOperands) {
1026 const uint32_t operandIndex = valueOperand.first;
1027 const unsigned regionIndex = valueOperand.second;
1028
1029 if (regionIndex == ~0U) {
1030 continue;
1031 }
1032
1033 const WrapperMemory* memory;
1034 uint32_t offset, length;
1035 float* region =
1036 static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length));
1037 CHECK(length == problemSize * problemSize * sizeof(float));
1038 std::generate(region, region + problemSize * problemSize, [this] { return randFrac(); });
1039 model.setOperandValueFromMemory(operandIndex, memory, offset, length);
1040 }
1041
1042 // Now select model outputs.
1043 for (uint32_t operationIdx = 0, operationCount = model.operationCount();
1044 operationIdx < operationCount; operationIdx++) {
1045 const auto& outputs = model.getOperationOutputs(operationIdx);
1046 for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount;
1047 outputIdx++) {
1048 bool modelOutput = false;
1049 const uint32_t operandIndex = outputs[outputIdx];
1050 const auto deadOperandI = deadOperands.find(operandIndex);
1051 if (deadOperandI != deadOperands.end()) {
1052 // This is not consumed within the model, so unless we
1053 // make it an output of the model, it's dead. The
1054 // further along we are in generating this model
1055 // (i.e., the more operations we have generated), the
1056 // more likely we are to classify this operation
1057 // output as a model output.
1058 const double probabilityOfModelOutput =
1059 0.50 * [](double x) { return x * x; }((operationIdx + 1) / operationCount);
1060 modelOutput = (randFrac() < probabilityOfModelOutput);
1061 } else {
1062 // This is consumed within the model, so we'll rarely
1063 // make it an output of the model.
1064 modelOutput = (randFrac() < 0.05);
1065 }
1066 if (!modelOutput) {
1067 continue;
1068 }
1069 modelOutputs.push_back(operandIndex);
1070 if (deadOperandI != deadOperands.end()) {
1071 deadOperands.erase(deadOperandI);
1072 const auto deadOperationI = deadOperations.find(operationIdx);
1073 if (deadOperationI != deadOperations.end()) {
1074 deadOperations.erase(deadOperationI);
1075 }
1076 }
1077 }
1078 }
1079 if (!allowDeadOperations) {
1080 // For each dead operation, pick a random output to become a model output.
1081 for (uint32_t deadOperationIndex : deadOperations) {
1082 const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex);
1083 const uint32_t deadOperandIndex =
1084 deadOperationOutputs[randUInt(deadOperationOutputs.size())];
1085 modelOutputs.push_back(deadOperandIndex);
1086 }
1087 }
1088 // A model must have at least one output.
1089 if (modelOutputs.empty()) {
1090 const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount()));
1091 modelOutputs.push_back(outputs[randUInt(outputs.size())]);
1092 }
1093
1094 model.identifyInputsAndOutputs(modelInputs, modelOutputs);
1095 #ifdef VERBOSE
1096 {
1097 std::cout << "Original model: " << ModelStats(&model) << std::endl;
1098 std::cout << "rootOperationCount = " << rootOperationCount << ", deadOperations = ";
1099 if (allowDeadOperations) {
1100 std::cout << deadOperations.size();
1101 } else {
1102 std::cout << "forbidden (converted " << deadOperations.size() << ")";
1103 }
1104 std::cout << std::endl;
1105 }
1106 #endif
1107 ASSERT_EQ(model.finish(), Result::NO_ERROR);
1108
1109 // Non-partitioned compilation.
1110 TestCompilation c(&model);
1111 ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1112 ASSERT_EQ(c.finish(), Result::NO_ERROR);
1113
1114 // Create some drivers for partitioned compilation.
1115 CHECK(!signatures.empty());
1116 std::vector<std::set<Signature>> signaturesForDriver(signatures.size());
1117 // First assign each signature to a random driver (a driver is
1118 // just represented as an entry in the signaturesForDriver
1119 // vector).
1120 for (Signature signature : signatures) {
1121 signaturesForDriver[randUInt(signatures.size())].insert(signature);
1122 }
1123 // Now remove each entry that has no signatures.
1124 auto firstExtra =
1125 std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(),
1126 [](const std::set<Signature>& sigSet) { return sigSet.empty(); });
1127 if (firstExtra != signaturesForDriver.end()) {
1128 signaturesForDriver.erase(firstExtra, signaturesForDriver.end());
1129 }
1130 // Now actually create the drivers.
1131 std::vector<std::shared_ptr<Device>> devices;
1132 for (unsigned i = 0; i < signaturesForDriver.size(); i++) {
1133 const auto& signaturesForThisDriver = signaturesForDriver[i];
1134 // Minimum HAL version for this driver is highest minimum HAL version of
1135 // any operation supported by this driver.
1136 const HalVersion minHalVersion = getMinHalVersion(
1137 std::max_element(signaturesForThisDriver.begin(), signaturesForThisDriver.end(),
1138 [](const Signature& a, const Signature& b) {
1139 return getMinHalVersion(a.first) < getMinHalVersion(b.first);
1140 })
1141 ->first);
1142 const HalVersion actualHalVersion =
1143 static_cast<HalVersion>(static_cast<int32_t>(minHalVersion) +
1144 randUInt(static_cast<int32_t>(HalVersion::LATEST) -
1145 static_cast<int32_t>(minHalVersion) + 1));
1146 const std::string name =
1147 "TestDriver(" + std::to_string(i) + "){" + to_string(actualHalVersion) + "}";
1148 #ifdef VERBOSE
1149 std::cout << "Creating " + name + " for collection of signatures that requires HAL " +
1150 to_string(minHalVersion)
1151 << std::endl;
1152 #endif
1153 auto device = DeviceManager::forTest_makeDriverDevice(
1154 name, makeTestDriver(actualHalVersion, name.c_str(), signaturesForThisDriver));
1155 devices.push_back(device);
1156 }
1157 // CPU fallback device
1158 devices.push_back(DeviceManager::getCpuDevice());
1159
1160 // Partitioned compilation.
1161 // For test cases without unknown intermediate operand sizes we require the
1162 // partitioning to succeed without CPU fallback. With unknown sizes we
1163 // retry with a fallback if the non-fallback partitioning fails and require
1164 // the fallback to succeed.
1165 TestCompilation cNoFallback(&model, devices);
1166 TestCompilation cWithFallback(&model, devices);
1167 TestCompilation* c2 = nullptr;
1168 ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1169 Result::NO_ERROR);
1170 auto compilationResult = cNoFallback.finish();
1171 if (hasUnknownDimensions && compilationResult == Result::OP_FAILED &&
1172 cNoFallback.getExecutionPlan().forTest_hasStepModelOutputsOfUnknownSize()) {
1173 ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1174 Result::NO_ERROR);
1175 ASSERT_EQ(cWithFallback.finish(), Result::NO_ERROR);
1176 ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1177 ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1178 DeviceManager::getCpuDevice());
1179 c2 = &cWithFallback;
1180 } else {
1181 ASSERT_EQ(compilationResult, Result::NO_ERROR);
1182 c2 = &cNoFallback;
1183 }
1184
1185 #ifdef VERBOSE
1186 {
1187 std::cout << "signatures = " << signatures.size() << ", devices = " << devices.size()
1188 << std::endl;
1189 // TODO: When dumping steps, include non-ExecutionSteps.
1190 const ExecutionPlan& plan = c2->getExecutionPlan();
1191 switch (plan.forTest_getKind()) {
1192 case ExecutionPlan::Kind::SIMPLE:
1193 std::cout << "plan: simple" << std::endl;
1194 break;
1195 case ExecutionPlan::Kind::COMPOUND: {
1196 const auto& steps = plan.forTest_compoundGetSteps();
1197 std::set<const Device*> devicesInPlan;
1198 for (const auto& step : steps) {
1199 if (const auto* executionStep = step->tryExecutionStep()) {
1200 devicesInPlan.insert(executionStep->getDevice().get());
1201 }
1202 }
1203 std::cout << "plan: compound, " << steps.size() << " steps over "
1204 << devicesInPlan.size() << " devices" << std::endl;
1205 for (unsigned i = 0; i < steps.size(); i++) {
1206 if (const auto executionStep = steps[i]->tryExecutionStep()) {
1207 std::cout << "Step " << i << ": "
1208 << ModelStats(executionStep->getStepModel())
1209 << ", device = " << executionStep->getDevice()->getName()
1210 << std::endl;
1211 }
1212 }
1213 break;
1214 }
1215 default:
1216 std::cout << "Unexpected plan kind: "
1217 << static_cast<unsigned>(plan.forTest_getKind());
1218 break;
1219 }
1220 }
1221 #endif
1222
1223 // For execution:
1224 // - create master inputs (one long vector) and master output value
1225 // - master inputs will be copied to actual inputs before each
1226 // of the two executions
1227 // - master output will be used to fill actual outputs before each
1228 // of the two executions
1229 // - create actual inputs and outputs
1230 // - first execution (non-partitioned)
1231 // - initialize inputs and (to avoid unrelated oddities) outputs
1232 // - execute
1233 // - copy outputs to a save area (one long vector)
1234 // - second execution (partitioned)
1235 // - (to avoid unrelated oddities) initialize inputs and outputs
1236 // - execute
1237 // - compare outputs to save area
1238
1239 // If the runtime and drivers are working properly, execution
1240 // should not change the inputs. Nonetheless, we reinitialize the
1241 // inputs for each execution, so as to avoid unrelated problems
1242 // appearing to be problems related to unpartitioned execution
1243 // versus partitioned execution. Similarly, execution behavior
1244 // should not be dependent on the outputs; but we'll initialize the
1245 // outputs anyway.
1246 std::vector<float> masterInputs(problemSize * problemSize * model.inputCount());
1247 std::generate(masterInputs.begin(), masterInputs.end(), [this] { return randFrac(); });
1248 #ifdef VERBOSE
1249 {
1250 std::cout << "flat inputs = ";
1251 dump(masterInputs.begin(), masterInputs.end());
1252 }
1253 #endif
1254 const float masterOutput = randFrac();
1255
1256 // Create the memory for the actual inputs and outputs.
1257 struct InputOutputDescriptor {
1258 enum Kind { INPUT, OUTPUT };
1259 Kind mKind;
1260
1261 // The input or output either resides in a local buffer
1262 // (mVector, in which case mMemoryRegion is ignored); or in a
1263 // shared memory region within a TestMemories instance
1264 // (mMemoryRegion, in which case mVector is ignored).
1265 enum Location { VECTOR, REGION };
1266 Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; }
1267
1268 std::vector<float> mVector;
1269 unsigned mMemoryRegion;
1270 };
1271 std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount());
1272 for (unsigned i = 0; i < ioDescriptors.size(); i++) {
1273 ioDescriptors[i].mKind = (i < model.inputCount() ? InputOutputDescriptor::INPUT
1274 : InputOutputDescriptor::OUTPUT);
1275 }
1276 // We randomly interleave inputs and outputs in creation
1277 // order, because when we we create memory regions in a
1278 // TestMemories instance, the order in which regions are
1279 // created within a single Memory is the order they'll be laid
1280 // out in that memory; and when we have inputs and outputs
1281 // within the same Memory, we want the possibility that
1282 // they'll be interleaved.
1283 std::shuffle(ioDescriptors.begin(), ioDescriptors.end(), mRandNumEng);
1284 TestMemories ioMemories;
1285 for (auto& desc : ioDescriptors) {
1286 if (randFrac() < 0.5) {
1287 desc.mVector.resize(problemSize * problemSize);
1288 } else {
1289 // TODO: common this with the way we create IK_VALUE inputs?
1290 unsigned memoryIndex = ~0U;
1291 if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) {
1292 memoryIndex = randUInt(ioMemories.memoryCount());
1293 } else {
1294 memoryIndex = ioMemories.addMemory();
1295 }
1296 const size_t length = problemSize * problemSize * sizeof(float);
1297 desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length);
1298 }
1299 }
1300 ioMemories.layout();
1301
1302 // Function to set up actual inputs and outputs (initializing them
1303 // and telling the WrapperExecution about them).
1304 auto prepareForExecution = [&model, &ioDescriptors, &ioMemories, &masterInputs, &masterOutput,
1305 problemSize, &problemType](WrapperExecution* e) {
1306 uint32_t inputIndex = 0, outputIndex = 0;
1307 for (auto& desc : ioDescriptors) {
1308 if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1309 if (desc.mKind == InputOutputDescriptor::INPUT) {
1310 const size_t inputOffset = inputIndex * problemSize * problemSize;
1311 std::copy(masterInputs.begin() + inputOffset,
1312 masterInputs.begin() + inputOffset + problemSize * problemSize,
1313 desc.mVector.begin());
1314 e->setInput(inputIndex++, desc.mVector.data(),
1315 desc.mVector.size() * sizeof(float));
1316 } else {
1317 std::fill(desc.mVector.begin(),
1318 desc.mVector.begin() + problemSize * problemSize, masterOutput);
1319 e->setOutput(outputIndex++, desc.mVector.data(),
1320 desc.mVector.size() * sizeof(float), &problemType.operandType);
1321 }
1322 } else {
1323 const WrapperMemory* memory;
1324 uint32_t offset, length;
1325 float* region = static_cast<float*>(
1326 ioMemories.getRegion(desc.mMemoryRegion, &memory, &offset, &length));
1327 CHECK(length == problemSize * problemSize * sizeof(float));
1328 if (desc.mKind == InputOutputDescriptor::INPUT) {
1329 const size_t inputOffset = inputIndex * problemSize * problemSize;
1330 std::copy(masterInputs.begin() + inputOffset,
1331 masterInputs.begin() + inputOffset + problemSize * problemSize,
1332 region);
1333 e->setInputFromMemory(inputIndex++, memory, offset, length);
1334 } else {
1335 std::fill(region, region + problemSize * problemSize, masterOutput);
1336 e->setOutputFromMemory(outputIndex++, memory, offset, length,
1337 &problemType.operandType);
1338 }
1339 }
1340 };
1341 CHECK(inputIndex == model.inputCount());
1342 CHECK(outputIndex == model.outputCount());
1343 };
1344
1345 // Non-partitioned execution.
1346 WrapperExecution e(&c);
1347 ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e));
1348 ASSERT_EQ(e.compute(), Result::NO_ERROR);
1349
1350 // Copy the outputs of the non-partitioned execution to a save area.
1351 std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount());
1352 {
1353 uint32_t outputIndex = 0;
1354 for (const auto& desc : ioDescriptors) {
1355 if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1356 continue;
1357 }
1358 const size_t outputOffset = outputIndex * problemSize * problemSize;
1359 if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1360 std::copy(desc.mVector.begin(), desc.mVector.end(),
1361 nonPartitionedOutputs.begin() + outputOffset);
1362 } else {
1363 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1364 std::copy(region, region + problemSize * problemSize,
1365 nonPartitionedOutputs.begin() + outputOffset);
1366 }
1367 #ifdef VERBOSE
1368 {
1369 std::cout << "nonpartitioned output[" << outputIndex << "] = ";
1370 dump(nonPartitionedOutputs.begin() + outputOffset,
1371 nonPartitionedOutputs.begin() + outputOffset + problemSize * problemSize);
1372 }
1373 #endif
1374 outputIndex++;
1375 }
1376 }
1377
1378 // Partitioned execution.
1379 WrapperExecution e2(c2);
1380 ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
1381 ASSERT_EQ(e2.compute(), Result::NO_ERROR);
1382
1383 // Compare the outputs of the partitioned execution to the save
1384 // area containing the outpus of the non-partitioned execution.
1385 {
1386 uint32_t outputIndex = 0;
1387 for (const auto& desc : ioDescriptors) {
1388 if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1389 continue;
1390 }
1391 SCOPED_TRACE(outputIndex);
1392 const size_t outputOffset = outputIndex * problemSize * problemSize;
1393 if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1394 #ifdef VERBOSE
1395 std::cout << " partitioned output[" << outputIndex << "] = ";
1396 dump(desc.mVector.begin(), desc.mVector.end());
1397 #endif
1398 ASSERT_TRUE(std::equal(desc.mVector.begin(), desc.mVector.end(),
1399 nonPartitionedOutputs.begin() + outputOffset));
1400 } else {
1401 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1402 #ifdef VERBOSE
1403 std::cout << "part output[" << outputIndex << "] = ";
1404 dump(region, region + problemSize * problemSize);
1405 #endif
1406 ASSERT_TRUE(std::equal(region, region + problemSize * problemSize,
1407 nonPartitionedOutputs.begin() + outputOffset));
1408 }
1409 outputIndex++;
1410 }
1411 }
1412 }
1413
1414 } // namespace
1415 } // namespace android
1416