1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #undef NDEBUG
18
19 #include "Bridge.h"
20 #include "CompilationBuilder.h"
21 #include "Manager.h"
22 #include "ModelBuilder.h"
23 #include "NeuralNetworks.h"
24 #include "NeuralNetworksWrapper.h"
25 #include "SampleDriver.h"
26 #include "Utils.h"
27 #include "ValidateHal.h"
28
29 #include <algorithm>
30 #include <cassert>
31 #include <cstdio>
32 #include <random>
33 #include <set>
34 #include <tuple>
35 #include <utility>
36 #include <vector>
37
38 #include <unistd.h>
39
40 #include <android-base/logging.h>
41 #include <android/sharedmem.h>
42 #include <gtest/gtest.h>
43
44 // Uncomment the following line to generate some debugging output that
45 // may be useful when analyzing failures:
46 //
47 // #define VERBOSE VERBOSE
48
49 // Uncomment the following line to generate graphs from models:
50 //
51 // #define GRAPH GRAPH
52
53 // We randomly generate tests (model + input data) at runtime, and verify
54 // that we get the same results whether we do partitioned compilation/execution
55 // or non partitioned compilation/execution. We perform a test as follows:
56 //
57 // (1) Randomly generate a model (graph and weights), randomly generate input
58 // data, randomly assign inputs and outputs to CPU memory or to shared
59 // memory.
60 //
61 // Randomly leaves dimensions unset for intermediate operands.
62 //
63 // (2) Randomly generate drivers based on the sample driver, each of which
64 // executes models on the CPU. They differ according to which operations
65 // they support.
66 //
67 // (3) Compile and execute without partitioning, saving off the results.
68 //
69 // (4) Compile and execute with partitioning.
70 //
71 // (5) Verify that the saved results from (3) match the results from (4).
72 //
73 // For simplicity, all data (model inputs, model outputs, weights,
74 // temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two
75 // dimensions are fixed throughout a particular test case (and
76 // randomly determined). This prevents us from having to find a
77 // mechanism to "resize" data (e.g., if ADD#a operates on data of size
78 // 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a
79 // and ADD#b become inputs of ADD#c, do we need to insert one or more
80 // operations between (say) ADD#a and ADD#c to convert ADD#2's data
81 // from size 2x2 to size 3x3 in order to match ADD#b). In the few
82 // cases where an operand cannot be of this type, it is a constant
83 // (e.g., activation functions and RNN bias).
84 //
85 // Each operation we generate has a signature (described in more
86 // detail later). The randomly generated drivers decide which
87 // operations they can execute by checking operation signatures. Once
88 // we have built the model and know the set of signatures, we randomly
89 // assign each signature to a driver. No signature is supported by
90 // multiple drivers -- we're not testing the logic that the
91 // partitioning algorithm uses to select the best driver for an
92 // operation.
93
94 namespace android {
95
96 using CompilationBuilder = nn::CompilationBuilder;
97 using Device = nn::Device;
98 using DeviceManager = nn::DeviceManager;
99 using ExecutionPlan = nn::ExecutionPlan;
100 using HidlModel = hardware::neuralnetworks::V1_1::Model;
101 using MemoryBuilder = nn::Memory;
102 using ModelBuilder = nn::ModelBuilder;
103 using Result = nn::wrapper::Result;
104 using SampleDriver = nn::sample_driver::SampleDriver;
105 using WrapperCompilation = nn::wrapper::Compilation;
106 using WrapperExecution = nn::wrapper::Execution;
107 using WrapperMemory = nn::wrapper::Memory;
108 using WrapperModel = nn::wrapper::Model;
109 using WrapperOperandType = nn::wrapper::OperandType;
110 using WrapperType = nn::wrapper::Type;
111
112 namespace {
113
114 /// Configure test size //////////////////////////////////////////////////////////
115
116 // We may exceed this in order to connect otherwise disjoint subgraphs.
117 static const unsigned kMaxNumOperations = 100;
118
119 // We build models to process 2-D square tensors up to this size in each dimension;
120 // note that the API promotes by-value weights larger than 128 to by-reference,
121 // so we want to ensure that we can pick both types that exceed and types that do
122 // not exceed this size.
123 static const unsigned kMaxProblemSize = 8;
124
125 // First seed for pseudorandom test generation.
126 static const unsigned kFirstSeed = 0;
127
128 // Number of test cases.
129 static const unsigned kNumTestCases = 225;
130
131 // Force all graph weights into a single pool (as we recommend to users)
132 // or allow them to be distributed across multiple pools (more stress
133 // on the partitioning algorithm and the rest of the runtime)?
134 // Forcing all graph weights into a single pool may be necessary to
135 // prevent large graphs from running up against http://b/70302693
136 // "NNAPI overuses (?) fds".
137 static const bool kAllWeightsInOnePool = false;
138
139 //////////////////////////////////////////////////////////////////////////////////
140
141 // The signature of an operation consists of the operation type (e.g.,
142 // ADD) and the activation function (use -1 in the case of an
143 // operation type for which the activation function is inapplicable).
144 typedef std::pair<ANeuralNetworksOperationType, int> Signature;
145
146 // This class adds some simple utilities on top of
147 // ::android::nn::wrapper::Model. For example, it provides access to
148 // certain features from ModelBuilder that are not exposed by the base
149 // class (such as inputCount() and operation index).
150 class TestModel : public WrapperModel {
151 public:
152
addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)153 uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
154 const std::vector<uint32_t>& outputs) {
155 const uint32_t operationIndex = operationCount();
156 mOperations.push_back(outputs);
157 WrapperModel::addOperation(type, inputs, outputs);
158 return operationIndex;
159 }
160
operationCount() const161 uint32_t operationCount() const {
162 return mOperations.size();
163 }
164
inputCount() const165 uint32_t inputCount() const {
166 return builder()->inputCount();
167 }
outputCount() const168 uint32_t outputCount() const {
169 return builder()->outputCount();
170 }
171
getOperationOutputs(uint32_t index) const172 const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const {
173 assert(index < mOperations.size());
174 return mOperations[index];
175 }
176
177 // All values are immediately copied into the model (we need to do
178 // this ourselves in cases where the underlying NNAPI does not).
setOperandValue(uint32_t index,const std::vector<float> & value)179 void setOperandValue(uint32_t index, const std::vector<float>& value) {
180 const size_t length = value.size() * sizeof(float);
181
182 if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
183 WrapperModel::setOperandValue(index, value.data(), length);
184 } else {
185 mOperandValues.push_back(value);
186 WrapperModel::setOperandValue(index, mOperandValues.back().data(), length);
187 }
188 }
189
setOperandValue(uint32_t index,int32_t value)190 void setOperandValue(uint32_t index, int32_t value) {
191 assert(sizeof(value) <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
192 WrapperModel::setOperandValue(index, &value, sizeof(value));
193 }
194
195 private:
196
builder() const197 const ModelBuilder* builder() const {
198 return reinterpret_cast<const ModelBuilder*>(getHandle());
199 }
200
201 // Representation of operations: vector index is operation number,
202 // vector value is operation's output operands.
203 std::vector<std::vector<uint32_t>> mOperations;
204
205 // Large operand values -- not immediately copied into the
206 // WrapperModel, so remembered here instead.
207 std::vector<std::vector<float>> mOperandValues;
208 };
209
210 // This class adds some simple utilities on top of
211 // ::android::nn::wrapper::Compilation in order to provide access to
212 // certain features from CompilationBuilder that are not exposed by
213 // the base class.
214 class TestCompilation : public WrapperCompilation {
215 public:
TestCompilation(const WrapperModel * model)216 TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {}
217
setPartitioning(uint32_t partitioning)218 Result setPartitioning(uint32_t partitioning) {
219 return static_cast<Result>(builder()->setPartitioning(partitioning));
220 }
221
222 using WrapperCompilation::finish;
finish(const std::vector<std::shared_ptr<Device>> & devices)223 Result finish(const std::vector<std::shared_ptr<Device>>& devices) {
224 return static_cast<Result>(builder()->finish(devices));
225 }
226
getExecutionPlan() const227 const ExecutionPlan& getExecutionPlan() const {
228 return builder()->forTest_getExecutionPlan();
229 }
230
231 private:
builder() const232 const CompilationBuilder* builder() const {
233 return reinterpret_cast<const CompilationBuilder*>(getHandle());
234 }
builder()235 CompilationBuilder* builder() {
236 return reinterpret_cast<CompilationBuilder*>(getHandle());
237 }
238 };
239
240 // This class is used to manage a collection of memory regions,
241 // disjoint windows onto a set of Memory instances, each of which is
242 // associated with a single shared memory region. Each region and
243 // Memory instance is assigned a number. The usage pattern is as
244 // follows:
245 // - Call addMemory() and addRegion() as many times as needed to
246 // declare (but not define) Memory instances and declare region
247 // instances.
248 // - Call layout() to define the Memory instances.
249 // - Call getRegion() as many times as needed to get the details
250 // of memory regions (such as address, or Memory/offset/length).
251 // The Memory instances created by layout() are owned by the
252 // TestMemories instance, and are destroyed when the TestMemories
253 // instance is destroyed.
254 class TestMemories {
255 public:
256 TestMemories() = default;
257 ~TestMemories();
258
259 TestMemories(const TestMemories&) = delete;
260 TestMemories& operator=(const TestMemories&) = delete;
261
addMemory()262 unsigned addMemory() {
263 assert(!mLayoutDone);
264 mMemorySizes.push_back(0);
265 return memoryCount() - 1;
266 }
memoryCount() const267 unsigned memoryCount() const {
268 return mMemorySizes.size();
269 }
270
addRegion(unsigned memoryIndex,uint32_t length)271 unsigned addRegion(unsigned memoryIndex, uint32_t length) {
272 assert(!mLayoutDone);
273 assert(memoryIndex < memoryCount());
274 uint32_t& memorySize = mMemorySizes[memoryIndex];
275 auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length);
276 mRegions.push_back(desc);
277 memorySize += length;
278 return regionCount() - 1;
279 }
regionCount() const280 unsigned regionCount() const {
281 return mRegions.size();
282 }
283
284 void layout();
285
getRegion(unsigned regionIndex,const WrapperMemory ** pMemory,uint32_t * pOffset,uint32_t * pLength)286 void* getRegion(unsigned regionIndex,
287 const WrapperMemory** pMemory, uint32_t* pOffset, uint32_t* pLength) {
288 assert(mLayoutDone);
289 assert(regionIndex < regionCount());
290 const auto& regionDescriptor = mRegions[regionIndex];
291 const WrapperMemory* memory = &mMemorys[std::get<0>(regionDescriptor)];
292 uint32_t offset = std::get<1>(regionDescriptor);
293 uint32_t length = std::get<2>(regionDescriptor);
294
295 uint8_t* buffer;
296 if (reinterpret_cast<MemoryBuilder*>(memory->get())->getPointer(&buffer) !=
297 ANEURALNETWORKS_NO_ERROR) {
298 assert(0);
299 }
300
301 if (pMemory) *pMemory = memory;
302 if (pOffset) *pOffset = offset;
303 if (pLength) *pLength = length;
304
305 return buffer + offset;
306 }
307
getRegion(unsigned regionIndex)308 void* getRegion(unsigned regionIndex) {
309 return getRegion(regionIndex, nullptr, nullptr, nullptr);
310 }
311
312 private:
313 // Index is the memory index; value is the size of the memory
314 // (aggregate size of all regions in the memory).
315 std::vector<uint32_t> mMemorySizes;
316
317 // Index is the memory index.
318 std::vector<WrapperMemory> mMemorys;
319 std::vector<int> mFDs;
320
321 // Index is the region index; tuple represents memory index,
322 // region offset within memory, region length.
323 std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions;
324
325 // For sanity checking.
326 bool mLayoutDone = false;
327 };
328
layout()329 void TestMemories::layout() {
330 assert(!mLayoutDone);
331 for (uint32_t memorySize : mMemorySizes) {
332 const int fd = ASharedMemory_create(nullptr, memorySize);
333 assert(fd >= 0);
334 mMemorys.emplace_back(memorySize, PROT_READ | PROT_WRITE, fd, 0);
335 mFDs.push_back(fd);
336 }
337 mLayoutDone = true;
338 }
339
~TestMemories()340 TestMemories::~TestMemories() {
341 for (int fd : mFDs) {
342 close(fd);
343 }
344 }
345
346 class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> {
347 public:
RandomPartitioningTest()348 RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {}
349
350 static Signature getSignature(const HidlModel& model, const Operation& operation);
351
352 protected:
353 void graphDump(const WrapperModel& model);
354
randBool()355 bool randBool() {
356 return randUInt(2) == 1;
357 }
358
randFrac()359 double randFrac() { // [0.0, 1.0)
360 return mRandNumUnitDist(mRandNumEng);
361 }
362
randUInt(unsigned limit)363 unsigned randUInt(unsigned limit) { // [0, limit)
364 return unsigned(randFrac() * limit);
365 }
366
367 // Represents an operation in which every input and output operand
368 // is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except:
369 // - One input operand may be an activation function.
370 // - Any number of input operands may be "special" in some other way
371 // (and in this implementation, not produced by any other operation).
372 // We require that:
373 // - There be at least one input operand that is neither an
374 // activation function nor "special".
375 struct OperationPattern {
376 int mOperationType;
377 unsigned mNumInputs;
378 unsigned mNumOutputs;
379 int mActivationFunctionInputIndex; // <0 if none
380
381 // Returns operand index, or <0 if input is normal (must not
382 // be called for an activation function operand). Function
383 // should have the following prototype:
384 //
385 // int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex);
386 //
387 int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned);
388 };
389
390 static const OperationPattern kOperationPatterns[];
391
makeRnnSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)392 int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) {
393 if (inputIndex != 3) {
394 return -1;
395 }
396
397 // input operand 3 is bias, a 1-D tensor
398 const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, { problemSize });
399 const uint32_t operandIndex = model->addOperand(&biasType);
400 std::vector<float> biasValue(problemSize);
401 std::generate(biasValue.begin(), biasValue.end(),
402 [this]{ return randFrac(); });
403 model->setOperandValue(operandIndex, biasValue);
404 return int(operandIndex);
405 }
406
407 #ifdef VERBOSE
408 class ModelStats {
409 public:
ModelStats(const ModelBuilder * model)410 ModelStats(const ModelBuilder* model) :
411 mBuilder(model) { }
ModelStats(const WrapperModel * model)412 ModelStats(const WrapperModel* model) :
413 mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) { }
operator <<(std::ostream & out,const ModelStats & stats)414 friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) {
415 const uint32_t operandCount = stats.mBuilder->operandCount();
416 const uint32_t inputCount = stats.mBuilder->inputCount();
417 const uint32_t outputCount = stats.mBuilder->outputCount();
418 out << "operationCount = " << stats.mBuilder->operationCount()
419 << ", operandCount = " << operandCount
420 << ", inputCount = " << inputCount
421 << " (" << (double(inputCount) / operandCount) << ")"
422 << ", outputCount = " << outputCount
423 << " (" << (double(outputCount) / operandCount) << ")";
424 return out;
425 }
426 private:
427 const ModelBuilder* mBuilder;
428 };
429 #endif
430
431 private:
432 std::mt19937 mRandNumEng;
433 std::uniform_real_distribution<double> mRandNumUnitDist;
434 };
435
436 const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = {
437 { ANEURALNETWORKS_ADD, 3, 1, 2, nullptr },
438 { ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr },
439 { ANEURALNETWORKS_MUL, 3, 1, 2, nullptr },
440 { ANEURALNETWORKS_RNN, 6, 2, 5, &RandomPartitioningTest::makeRnnSpecialInput },
441 { ANEURALNETWORKS_TANH, 1, 1, -1, nullptr },
442 };
443
getSignature(const HidlModel & model,const Operation & operation)444 Signature RandomPartitioningTest::getSignature(const HidlModel& model, const Operation& operation) {
445 static const std::map<ANeuralNetworksOperationType, int> kOperationToActivation = []() {
446 std::map<ANeuralNetworksOperationType, int> result;
447 for (const auto& pattern : kOperationPatterns) {
448 result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex;
449 }
450 return result;
451 }();
452
453 const ANeuralNetworksOperationType operationType =
454 static_cast<ANeuralNetworksOperationType>(operation.type);
455 const int activationFunctionInputIndex = kOperationToActivation.at(operationType);
456 if (activationFunctionInputIndex < 0) {
457 return Signature(operationType, -1);
458 }
459
460 const Operand& operand = model.operands[operation.inputs[activationFunctionInputIndex]];
461 assert(operand.lifetime == OperandLifeTime::CONSTANT_COPY);
462 assert(operand.type == OperandType::INT32);
463 int32_t value;
464 memcpy(&value,
465 &model.operandValues[operand.location.offset],
466 operand.location.length);
467 return Signature(operationType, value);
468 }
469
graphDump(const WrapperModel & model)470 void RandomPartitioningTest::graphDump([[maybe_unused]] const WrapperModel& model) {
471 #ifdef GRAPH
472 const std::string name = "Test-" + std::to_string(GetParam());
473 nn::bridge_tests::graphDump(name.c_str(),
474 reinterpret_cast<const ModelBuilder*>(model.getHandle()));
475 #endif
476 }
477
478 class TestDriver : public SampleDriver {
479 public:
480 // Behaves like SampleDriver, except that it only supports
481 // operations with the specified signatures.
TestDriver(const char * name,std::set<Signature> signatures)482 TestDriver(const char* name, std::set<Signature> signatures) :
483 SampleDriver(name), mSignatures(std::move(signatures)) { }
484
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)485 Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
486 android::nn::initVLogMask();
487 Capabilities capabilities =
488 {.float32Performance = {.execTime = 0.75f, .powerUsage = 0.75f},
489 .quantized8Performance = {.execTime = 0.75f, .powerUsage = 0.75f},
490 .relaxedFloat32toFloat16Performance = {.execTime = 0.75f, .powerUsage = 0.75f}};
491 _hidl_cb(ErrorStatus::NONE, capabilities);
492 return Void();
493 }
494
getSupportedOperations_1_1(const HidlModel & model,getSupportedOperations_cb cb)495 Return<void> getSupportedOperations_1_1(const HidlModel& model,
496 getSupportedOperations_cb cb) override {
497 if (nn::validateModel(model)) {
498 const size_t count = model.operations.size();
499 std::vector<bool> supported(count);
500 for (size_t i = 0; i < count; i++) {
501 supported[i] =
502 (mSignatures.count(
503 RandomPartitioningTest::getSignature(
504 model,
505 model.operations[i])) != 0);
506 }
507 cb(ErrorStatus::NONE, supported);
508 } else {
509 std::vector<bool> supported;
510 cb(ErrorStatus::INVALID_ARGUMENT, supported);
511 }
512 return Void();
513 }
514
prepareModel_1_1(const HidlModel & model,ExecutionPreference preference,const sp<IPreparedModelCallback> & callback)515 Return<ErrorStatus> prepareModel_1_1(const HidlModel& model, ExecutionPreference preference,
516 const sp<IPreparedModelCallback>& callback) override {
517 // NOTE: We verify that all operations in the model are supported.
518 ErrorStatus outStatus = ErrorStatus::INVALID_ARGUMENT;
519 auto ret = getSupportedOperations_1_1(
520 model,
521 [&outStatus](ErrorStatus inStatus, const hidl_vec<bool>& supportedOperations) {
522 if (inStatus == ErrorStatus::NONE) {
523 if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
524 [](bool v){ return v; })) {
525 outStatus = ErrorStatus::NONE;
526 }
527 }
528 });
529 if (ret.isOk() && (outStatus == ErrorStatus::NONE)) {
530 return SampleDriver::prepareModel_1_1(model, preference, callback);
531 } else {
532 callback->notify(ErrorStatus::INVALID_ARGUMENT, nullptr);
533 return ErrorStatus::INVALID_ARGUMENT;
534 }
535 }
536
537 private:
538 const std::set<Signature> mSignatures;
539 };
540
541 INSTANTIATE_TEST_CASE_P(Seed, RandomPartitioningTest,
542 ::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases));
543
TEST_P(RandomPartitioningTest,Test)544 TEST_P(RandomPartitioningTest, Test) {
545 LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam();
546
547 #ifdef VERBOSE
548 std::cout << std::setprecision(2) << std::fixed << std::setw(4);
549 #endif
550
551 const unsigned problemSize = 1+randUInt(kMaxProblemSize);
552 const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, { problemSize, problemSize });
553 const WrapperOperandType unknownDimensionsType(WrapperType::TENSOR_FLOAT32, { 0, 0 });
554
555 static const WrapperOperandType activationFunctionType(WrapperType::INT32, { });
556
557 const unsigned numOperations = 2+randUInt(kMaxNumOperations-1);
558 const bool allowDeadOperations = (randFrac() < 0.2);
559 const bool allowUnknownDimensions = (randFrac() < 0.25);
560
561 // TODO: The current algorithm builds the graph in a forward
562 // direction (i.e., later-generated operations consume outputs
563 // from earlier-generated operations). In order to get more
564 // variation in graph topology, perhaps we should also create an
565 // algorithm to build the graph in a backward direction (i.e.,
566 // later-generated operations produce outputs to be consumed by
567 // earlier-generated operations).
568 [[maybe_unused]] const bool buildForward = randBool();
569
570 // TODO: Add a form of forced connectivity that operates by
571 // joining disjoint subgraphs rather than by forcing a root.
572 const bool forceCommonRoot = (randFrac() < 0.75);
573
574 TestModel model;
575 std::vector<uint32_t> modelInputs;
576 std::vector<uint32_t> modelOutputs;
577
578 // Each region in weights is a problem-sized 2-D TENSOR_FLOAT32.
579 TestMemories weights;
580
581 // Keep track of all normal (i.e., not activation function and not
582 // "special") operands that are values (from setOperandValue*()).
583 // .first: operand index
584 // .second: if the operand is already defined (via setOperandValue*()) then ~0U;
585 // otherwise, the operand has yet to be defined, and this is the corresponding
586 // region index in "weights"
587 std::vector<std::pair<uint32_t, unsigned>> valueOperands;
588
589 // An operand is "dead" if it is not consumed by another operation
590 // and is not a model output. Key is operand index; value is
591 // operation index.
592 std::map<uint32_t, uint32_t> deadOperands;
593
594 // An operation is "dead" if all of its outputs are dead.
595 std::set<uint32_t> deadOperations;
596
597 // Collect the signatures of operations in this model.
598 std::set<Signature> signatures;
599
600 // For reporting purposes, keep track of the number of root
601 // operations (those that do not consume results produced by other
602 // operations).
603 unsigned rootOperationCount = 0;
604
605 // Track if we added operands with unknown dimensions. In this case,
606 // partitioned compilation will fail if such an operand is read in a
607 // different partition than it is written.
608 bool hasUnknownDimensions = false;
609
610 // Generate operations.
611 for (unsigned i = 0; i < numOperations; i++) {
612 const unsigned operationPatternIndex =
613 randUInt(sizeof(kOperationPatterns)/sizeof(kOperationPatterns[0]));
614 const auto& operationPattern = kOperationPatterns[operationPatternIndex];
615
616 // INPUTS //////////////////////////////////////////////////////////////////////////////////
617
618 std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U);
619
620 // First, process activation function and special inputs, and
621 // keep track of which inputs remain.
622 std::vector<uint32_t> normalOperationInputIndexes;
623 int32_t activationFunction = -1;
624 for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs;
625 operationInputIndex++) {
626 if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) {
627 const uint32_t operandIndex = model.addOperand(&activationFunctionType);
628 activationFunction = randUInt(4);
629 if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) {
630 // workaround for http://b/69011131
631 activationFunction = ANEURALNETWORKS_FUSED_NONE;
632 }
633 model.setOperandValue(operandIndex, activationFunction);
634 operationInputs[operationInputIndex] = operandIndex;
635 continue;
636 }
637 if (operationPattern.mMakeSpecialInput != nullptr) {
638 const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))(
639 problemSize, &model, operationInputIndex);
640 if (operandIndex >= 0) {
641 operationInputs[operationInputIndex] = operandIndex;
642 continue;
643 }
644 }
645 normalOperationInputIndexes.push_back(operationInputIndex);
646 }
647 assert(!normalOperationInputIndexes.empty());
648 signatures.insert(Signature(operationPattern.mOperationType, activationFunction));
649
650 // A (normal) operation input can be one of:
651 // - a new or existing model input
652 // - an output of an existing operation
653 // - an OperandValue
654 // - an OperandValueFromMemory
655 // Some guidelines:
656 // - We generally don't want all of an operation's inputs to be values (constants)
657 const unsigned normalOperationInputCount = normalOperationInputIndexes.size();
658 // How many of this operation's inputs are constants?
659 unsigned normalOperationInputConstantCount = 0;
660 // How many of this operation's inputs are model inputs?
661 unsigned normalOperationInputModelInputCount = 0;
662 // We begin by deciding what kind of input each (normal) operation will be; we don't
663 // actually pick input operand indexes at this time, because we might override this
664 // decision later.
665 enum InputKind { IK_MODEL_INPUT, IK_OPERATION_OUTPUT, IK_VALUE };
666 std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount);
667 std::generate(normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
668 [this, &model,
669 numOperations,
670 normalOperationInputCount,
671 &normalOperationInputConstantCount,
672 &normalOperationInputModelInputCount]() -> InputKind {
673 // Constant? Becomes less likely the more
674 // constants we already have as inputs to
675 // this operation.
676 if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) /
677 normalOperationInputCount)) {
678 normalOperationInputConstantCount++;
679 return IK_VALUE;
680 }
681
682 // Model input? Becomes less likely the
683 // more model inputs we already have as
684 // inputs to this operation, and the further
685 // along we are in generating this model
686 // (i.e., the more operations we have
687 // generated).
688 if ((model.operationCount() == 0) ||
689 (randFrac() < 0.5 *
690 (1 - double(normalOperationInputModelInputCount) /
691 normalOperationInputCount) *
692 std::min(0.3, (1 - double(model.operationCount()) /
693 numOperations)))) {
694 normalOperationInputModelInputCount++;
695 return IK_MODEL_INPUT;
696 }
697
698 // Else output of an existing operation.
699 return IK_OPERATION_OUTPUT;
700 });
701
702 // Now force common root or model input, if necessary. (A
703 // model must have at least one input.)
704 auto force =
705 [this, &normalOperationInputKinds, normalOperationInputCount](InputKind forceKind){
706 if (std::none_of(normalOperationInputKinds.begin(),
707 normalOperationInputKinds.end(),
708 [forceKind](InputKind kind){ return kind == forceKind; })) {
709 normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind;
710 }
711 };
712 if (forceCommonRoot && (model.operationCount() != 0)) {
713 force(IK_OPERATION_OUTPUT);
714 }
715 if (modelInputs.empty()) {
716 assert(model.operationCount() == 0);
717 force(IK_MODEL_INPUT);
718 }
719
720 // Finally create the normal inputs.
721 bool isRootOperation = true;
722 for (unsigned i = 0; i < normalOperationInputCount; i++) {
723 uint32_t operandIndex = ~0U;
724 switch (normalOperationInputKinds[i]) {
725 case IK_MODEL_INPUT: {
726 if (!modelInputs.empty() && (randFrac() < 0.5)) {
727 operandIndex = modelInputs[randUInt(modelInputs.size())];
728 } else {
729 operandIndex = model.addOperand(&problemType);
730 modelInputs.push_back(operandIndex);
731 }
732 break;
733 }
734 case IK_OPERATION_OUTPUT: {
735 decltype(deadOperands.begin()) deadOperandI;
736 if (!deadOperands.empty() && (randFrac() < 0.5)) {
737 deadOperandI = deadOperands.begin();
738 std::advance(deadOperandI, randUInt(deadOperands.size()));
739 operandIndex = deadOperandI->first;
740 } else {
741 const uint32_t existingOperationIndex = randUInt(model.operationCount());
742 const auto& existingOperationOutputs =
743 model.getOperationOutputs(existingOperationIndex);
744 operandIndex =
745 existingOperationOutputs[randUInt(existingOperationOutputs.size())];
746 deadOperandI = deadOperands.find(operandIndex);
747 assert(deadOperandI == deadOperands.end() ||
748 deadOperandI->second == existingOperationIndex);
749 }
750 if (deadOperandI != deadOperands.end()) {
751 const uint32_t correspondingOperation = deadOperandI->second;
752 deadOperands.erase(deadOperandI);
753
754 auto deadOperationI = deadOperations.find(correspondingOperation);
755 if (deadOperationI != deadOperations.end()) {
756 deadOperations.erase(deadOperationI);
757 }
758 }
759 isRootOperation = false;
760 break;
761 }
762 case IK_VALUE: {
763 if (!valueOperands.empty() && (randFrac() < 0.25)) {
764 operandIndex = valueOperands[randUInt(valueOperands.size())].first;
765 } else {
766 operandIndex = model.addOperand(&problemType);
767 if (randFrac() < 0.5) {
768 std::vector<float> value(problemSize * problemSize);
769 std::generate(value.begin(), value.end(), [this]{ return randFrac(); });
770 model.setOperandValue(operandIndex, value);
771 valueOperands.push_back(std::make_pair(operandIndex, ~0U));
772 } else {
773 unsigned memoryIndex = ~0U;
774 if ((weights.memoryCount() != 0) &&
775 (kAllWeightsInOnePool || (randFrac() < 0.5))) {
776 memoryIndex = randUInt(weights.memoryCount());
777 } else {
778 memoryIndex = weights.addMemory();
779 }
780 const size_t length = problemSize * problemSize * sizeof(float);
781 const unsigned regionIndex = weights.addRegion(memoryIndex, length);
782 valueOperands.push_back(std::make_pair(operandIndex, regionIndex));
783 }
784 }
785 break;
786 }
787 default:
788 FAIL();
789 }
790 operationInputs[normalOperationInputIndexes[i]] = operandIndex;
791 }
792 if (isRootOperation) {
793 rootOperationCount++;
794 }
795
796 // OUTPUTS /////////////////////////////////////////////////////////////////////////////////
797
798 std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
799 std::generate(operationOutputs.begin(), operationOutputs.end(),
800 [&model, &problemType, &unknownDimensionsType, &hasUnknownDimensions,
801 allowUnknownDimensions, this]{
802 // 3% unknowns causes ~35% of partitionings to fail
803 // (determined by commenting out the fallback code,
804 // running tests and noting number of failures).
805 if (allowUnknownDimensions && randFrac() < 0.03) {
806 hasUnknownDimensions = true;
807 return model.addOperand(&unknownDimensionsType);
808 } else {
809 return model.addOperand(&problemType);
810 }
811 });
812
813 // OPERATION ///////////////////////////////////////////////////////////////////////////////
814
815 const uint32_t operationIndex =
816 model.addOperation(operationPattern.mOperationType,
817 operationInputs, operationOutputs);
818 deadOperations.insert(operationIndex);
819 std::for_each(operationOutputs.begin(), operationOutputs.end(),
820 [&deadOperands, operationIndex](uint32_t operandIndex) {
821 deadOperands.insert(std::make_pair(operandIndex, operationIndex));
822 });
823 }
824
825 // Now finalize the weights.
826 weights.layout();
827 for (const auto& valueOperand : valueOperands) {
828 const uint32_t operandIndex = valueOperand.first;
829 const unsigned regionIndex = valueOperand.second;
830
831 if (regionIndex == ~0U) {
832 continue;
833 }
834
835 const WrapperMemory* memory;
836 uint32_t offset, length;
837 float* region =
838 static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length));
839 assert(length == problemSize * problemSize * sizeof(float));
840 std::generate(region, region + problemSize * problemSize, [this]{ return randFrac(); });
841 model.setOperandValueFromMemory(operandIndex, memory, offset, length);
842 }
843
844 // Now select model outputs.
845 for (uint32_t operationIdx = 0, operationCount = model.operationCount();
846 operationIdx < operationCount; operationIdx++) {
847 const auto& outputs = model.getOperationOutputs(operationIdx);
848 for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount;
849 outputIdx++) {
850 bool modelOutput = false;
851 const uint32_t operandIndex = outputs[outputIdx];
852 const auto deadOperandI = deadOperands.find(operandIndex);
853 if (deadOperandI != deadOperands.end()) {
854 // This is not consumed within the model, so unless we
855 // make it an output of the model, it's dead. The
856 // further along we are in generating this model
857 // (i.e., the more operations we have generated), the
858 // more likely we are to classify this operation
859 // output as a model output.
860 const double probabilityOfModelOutput =
861 0.50 * [](double x){ return x*x; }((operationIdx + 1) / operationCount);
862 modelOutput = (randFrac() < probabilityOfModelOutput);
863 } else {
864 // This is consumed within the model, so we'll rarely
865 // make it an output of the model.
866 modelOutput = (randFrac() < 0.05);
867 }
868 if (!modelOutput) {
869 continue;
870 }
871 modelOutputs.push_back(operandIndex);
872 if (deadOperandI != deadOperands.end()) {
873 deadOperands.erase(deadOperandI);
874 const auto deadOperationI = deadOperations.find(operationIdx);
875 if (deadOperationI != deadOperations.end()) {
876 deadOperations.erase(deadOperationI);
877 }
878 }
879 }
880 }
881 if (!allowDeadOperations) {
882 // For each dead operation, pick a random output to become a model output.
883 for (uint32_t deadOperationIndex : deadOperations) {
884 const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex);
885 const uint32_t deadOperandIndex =
886 deadOperationOutputs[randUInt(deadOperationOutputs.size())];
887 modelOutputs.push_back(deadOperandIndex);
888 }
889 }
890 // A model must have at least one output.
891 if (modelOutputs.empty()) {
892 const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount()));
893 modelOutputs.push_back(outputs[randUInt(outputs.size())]);
894 }
895
896 model.identifyInputsAndOutputs(modelInputs, modelOutputs);
897 #ifdef VERBOSE
898 {
899 std::cout << "Original model: " << ModelStats(&model) << std::endl;
900 std::cout << "rootOperationCount = " << rootOperationCount
901 << ", deadOperations = ";
902 if (allowDeadOperations) {
903 std::cout << deadOperations.size();
904 } else {
905 std::cout << "forbidden (converted " << deadOperations.size() << ")";
906 }
907 std::cout << std::endl;
908 }
909 #endif
910 ASSERT_EQ(model.finish(), Result::NO_ERROR);
911 graphDump(model);
912
913 // Non-partitioned compilation.
914 TestCompilation c(&model);
915 ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
916 ASSERT_EQ(c.finish(), Result::NO_ERROR);
917
918 // Create some drivers for partitioned compilation.
919 assert(!signatures.empty());
920 std::vector<std::set<Signature>> signaturesForDriver(signatures.size());
921 // First assign each signature to a random driver (a driver is
922 // just represented as an entry in the signaturesForDriver
923 // vector).
924 for (Signature signature : signatures) {
925 signaturesForDriver[randUInt(signatures.size())].insert(signature);
926 }
927 // Now remove each entry that has no signatures.
928 auto firstExtra =
929 std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(),
930 [](const std::set<Signature>& sigSet) { return sigSet.empty(); });
931 if (firstExtra != signaturesForDriver.end()) {
932 signaturesForDriver.erase(firstExtra, signaturesForDriver.end());
933 }
934 // Now actually create the drivers.
935 std::vector<std::shared_ptr<Device>> devices;
936 for (unsigned i = 0; i < signaturesForDriver.size(); i++) {
937 const std::string name = "TestDriver(" + std::to_string(i) + ")";
938 devices.push_back(std::make_shared<Device>(
939 name, new TestDriver(name.c_str(), signaturesForDriver[i])));
940 ASSERT_TRUE(devices.back()->initialize());
941 }
942
943 // Partitioned compilation.
944 // For test cases without unknown intermediate operand sizes we require the
945 // partitioning to succeed without CPU fallback. With unknown sizes we
946 // retry with a fallback if the non-fallback partitioning fails and require
947 // the fallback to succeed.
948 TestCompilation cNoFallback(&model);
949 TestCompilation cWithFallback(&model);
950 TestCompilation *c2 = nullptr;
951 ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
952 Result::NO_ERROR);
953 auto compilationResult = cNoFallback.finish(devices);
954 if (hasUnknownDimensions && compilationResult == Result::OP_FAILED &&
955 cNoFallback.getExecutionPlan().forTest_hasSubModelOutputsOfUnknownSize()) {
956 ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
957 Result::NO_ERROR);
958 ASSERT_EQ(cWithFallback.finish(devices), Result::NO_ERROR);
959 c2 = &cWithFallback;
960 } else {
961 ASSERT_EQ(compilationResult, Result::NO_ERROR);
962 c2 = &cNoFallback;
963 }
964
965 #ifdef VERBOSE
966 {
967 std::cout << "signatures = " << signatures.size()
968 << ", devices = " << devices.size() << std::endl;
969 const ExecutionPlan& plan = c2->getExecutionPlan();
970 switch (plan.forTest_getKind()) {
971 case ExecutionPlan::Kind::SIMPLE:
972 std::cout << "plan: simple" << std::endl;
973 break;
974 case ExecutionPlan::Kind::COMPOUND: {
975 const auto& steps = plan.forTest_compoundGetSteps();
976 std::set<const Device*> devicesInPlan;
977 for (const auto& step : steps) {
978 devicesInPlan.insert(step->getDevice().get());
979 }
980 std::cout << "plan: compound, " << steps.size() << " steps over "
981 << devicesInPlan.size() << " devices" << std::endl;
982 for (unsigned i = 0; i < steps.size(); i++) {
983 std::cout << "Step " << i << ": "
984 << ModelStats(steps[i]->getSubModel()) << std::endl;
985 }
986 break;
987 }
988 default:
989 std::cout << "Unexpected plan kind: "
990 << static_cast<unsigned>(plan.forTest_getKind());
991 break;
992 }
993 }
994 #endif
995
996 // For execution:
997 // - create master inputs (one long vector) and master output value
998 // - master inputs will be copied to actual inputs before each
999 // of the two executions
1000 // - master output will be used to fill actual outputs before each
1001 // of the two executions
1002 // - create actual inputs and outputs
1003 // - first execution (non-partitioned)
1004 // - initialize inputs and (to avoid unrelated oddities) outputs
1005 // - execute
1006 // - copy outputs to a save area (one long vector)
1007 // - second execution (partitioned)
1008 // - (to avoid unrelated oddities) initialize inputs and outputs
1009 // - execute
1010 // - compare outputs to save area
1011
1012 // If the runtime and drivers are working properly, execution
1013 // should not change the inputs. Nonetheless, we reinitialize the
1014 // inputs for each execution, so as to avoid unrelated problems
1015 // appearing to be problems related to unpartitioned execution
1016 // versus partitioned execution. Similarly, execution behavior
1017 // should not be dependent on the outputs; but we'll initialize the
1018 // outputs anyway.
1019 std::vector<float> masterInputs(problemSize * problemSize * model.inputCount());
1020 std::generate(masterInputs.begin(), masterInputs.end(), [this]{ return randFrac(); });
1021 const float masterOutput = randFrac();
1022
1023 // Create the memory for the actual inputs and outputs.
1024 struct InputOutputDescriptor {
1025 enum Kind { INPUT, OUTPUT };
1026 Kind mKind;
1027
1028 // The input or output either resides in a local buffer
1029 // (mVector, in which case mMemoryRegion is ignored); or in a
1030 // shared memory region within a TestMemories instance
1031 // (mMemoryRegion, in which case mVector is ignored).
1032 enum Location { VECTOR, REGION };
1033 Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; }
1034
1035 std::vector<float> mVector;
1036 unsigned mMemoryRegion;
1037 };
1038 std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount());
1039 for (unsigned i = 0; i < ioDescriptors.size(); i++) {
1040 ioDescriptors[i].mKind = (i < model.inputCount()
1041 ? InputOutputDescriptor::INPUT
1042 : InputOutputDescriptor::OUTPUT);
1043 }
1044 // We randomly interleave inputs and outputs in creation
1045 // order, because when we we create memory regions in a
1046 // TestMemories instance, the order in which regions are
1047 // created within a single Memory is the order they'll be laid
1048 // out in that memory; and when we have inputs and outputs
1049 // within the same Memory, we want the possibility that
1050 // they'll be interleaved.
1051 std::random_shuffle(ioDescriptors.begin(), ioDescriptors.end(),
1052 [this](unsigned n) { return randUInt(n); });
1053 TestMemories ioMemories;
1054 for (auto &desc : ioDescriptors) {
1055 if (randFrac() < 0.5) {
1056 desc.mVector.resize(problemSize * problemSize);
1057 } else {
1058 // TODO: common this with the way we create IK_VALUE inputs?
1059 unsigned memoryIndex = ~0U;
1060 if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) {
1061 memoryIndex = randUInt(ioMemories.memoryCount());
1062 } else {
1063 memoryIndex = ioMemories.addMemory();
1064 }
1065 const size_t length = problemSize * problemSize * sizeof(float);
1066 desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length);
1067 }
1068 }
1069 ioMemories.layout();
1070
1071 // Function to set up actual inputs and outputs (initializing them
1072 // and telling the WrapperExecution about them).
1073 auto prepareForExecution =
1074 [&model, &ioDescriptors, &ioMemories,
1075 &masterInputs, &masterOutput, problemSize, &problemType](WrapperExecution *e) {
1076 uint32_t inputIndex = 0, outputIndex = 0;
1077 for (auto &desc : ioDescriptors) {
1078 if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1079 if (desc.mKind == InputOutputDescriptor::INPUT) {
1080 const size_t inputOffset = inputIndex * problemSize * problemSize;
1081 std::copy(masterInputs.begin() + inputOffset,
1082 masterInputs.begin() + inputOffset + problemSize * problemSize,
1083 desc.mVector.begin());
1084 e->setInput(inputIndex++, desc.mVector.data(),
1085 desc.mVector.size() * sizeof(float));
1086 } else {
1087 std::fill(desc.mVector.begin(),
1088 desc.mVector.begin() + problemSize * problemSize,
1089 masterOutput);
1090 e->setOutput(outputIndex++, desc.mVector.data(),
1091 desc.mVector.size() * sizeof(float),
1092 &problemType.operandType);
1093 }
1094 } else {
1095 const WrapperMemory* memory;
1096 uint32_t offset, length;
1097 float* region =
1098 static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion,
1099 &memory, &offset, &length));
1100 assert(length == problemSize * problemSize * sizeof(float));
1101 if (desc.mKind == InputOutputDescriptor::INPUT) {
1102 const size_t inputOffset = inputIndex * problemSize * problemSize;
1103 std::copy(masterInputs.begin() + inputOffset,
1104 masterInputs.begin() + inputOffset + problemSize * problemSize,
1105 region);
1106 e->setInputFromMemory(inputIndex++, memory, offset, length);
1107 } else {
1108 std::fill(region,
1109 region + problemSize * problemSize,
1110 masterOutput);
1111 e->setOutputFromMemory(outputIndex++, memory, offset, length,
1112 &problemType.operandType);
1113 }
1114 }
1115 };
1116 assert(inputIndex == model.inputCount());
1117 assert(outputIndex == model.outputCount());
1118 };
1119
1120 // Non-partitioned execution.
1121 WrapperExecution e(&c);
1122 ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e));
1123 ASSERT_EQ(e.compute(), Result::NO_ERROR);
1124
1125 // Copy the outputs of the non-partitioned execution to a save area.
1126 std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount());
1127 {
1128 uint32_t outputIndex = 0;
1129 for (const auto& desc : ioDescriptors) {
1130 if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1131 continue;
1132 }
1133 const size_t outputOffset = outputIndex * problemSize * problemSize;
1134 if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1135 std::copy(desc.mVector.begin(),
1136 desc.mVector.end(),
1137 nonPartitionedOutputs.begin() + outputOffset);
1138 } else {
1139 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1140 std::copy(region,
1141 region + problemSize * problemSize,
1142 nonPartitionedOutputs.begin() + outputOffset);
1143 }
1144 #ifdef VERBOSE
1145 {
1146 std::cout << "output[" << outputIndex << "] = {";
1147 for (auto I = nonPartitionedOutputs.begin() + outputOffset,
1148 E = nonPartitionedOutputs.begin() +
1149 outputOffset + problemSize * problemSize;
1150 I != E; I++) {
1151 std::cout << " " << *I;
1152 }
1153 std::cout << " }" << std::endl;
1154 }
1155 #endif
1156 outputIndex++;
1157 }
1158 }
1159
1160 // Partitioned execution.
1161 WrapperExecution e2(c2);
1162 ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
1163 ASSERT_EQ(e2.compute(), Result::NO_ERROR);
1164
1165 // Compare the outputs of the partitioned execution to the save
1166 // area containing the outpus of the non-partitioned execution.
1167 {
1168 uint32_t outputIndex = 0;
1169 for (const auto& desc : ioDescriptors) {
1170 if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1171 continue;
1172 }
1173 SCOPED_TRACE(outputIndex);
1174 const size_t outputOffset = outputIndex * problemSize * problemSize;
1175 if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1176 ASSERT_TRUE(std::equal(desc.mVector.begin(),
1177 desc.mVector.end(),
1178 nonPartitionedOutputs.begin() + outputOffset));
1179 } else {
1180 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1181 ASSERT_TRUE(std::equal(region,
1182 region + problemSize * problemSize,
1183 nonPartitionedOutputs.begin() + outputOffset));
1184 }
1185 outputIndex++;
1186 }
1187 }
1188 }
1189
1190 } // namespace
1191 } // namespace android
1192