1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #undef NDEBUG
18 
19 #include "Bridge.h"
20 #include "CompilationBuilder.h"
21 #include "Manager.h"
22 #include "ModelBuilder.h"
23 #include "NeuralNetworks.h"
24 #include "NeuralNetworksWrapper.h"
25 #include "SampleDriver.h"
26 #include "Utils.h"
27 #include "ValidateHal.h"
28 
29 #include <algorithm>
30 #include <cassert>
31 #include <cstdio>
32 #include <random>
33 #include <set>
34 #include <tuple>
35 #include <utility>
36 #include <vector>
37 
38 #include <unistd.h>
39 
40 #include <android-base/logging.h>
41 #include <android/sharedmem.h>
42 #include <gtest/gtest.h>
43 
44 // Uncomment the following line to generate some debugging output that
45 // may be useful when analyzing failures:
46 //
47 // #define VERBOSE VERBOSE
48 
49 // Uncomment the following line to generate graphs from models:
50 //
51 // #define GRAPH GRAPH
52 
53 // We randomly generate tests (model + input data) at runtime, and verify
54 // that we get the same results whether we do partitioned compilation/execution
55 // or non partitioned compilation/execution.  We perform a test as follows:
56 //
57 // (1) Randomly generate a model (graph and weights), randomly generate input
58 //     data, randomly assign inputs and outputs to CPU memory or to shared
59 //     memory.
60 //
61 //     Randomly leaves dimensions unset for intermediate operands.
62 //
63 // (2) Randomly generate drivers based on the sample driver, each of which
64 //     executes models on the CPU.  They differ according to which operations
65 //     they support.
66 //
67 // (3) Compile and execute without partitioning, saving off the results.
68 //
69 // (4) Compile and execute with partitioning.
70 //
71 // (5) Verify that the saved results from (3) match the results from (4).
72 //
73 // For simplicity, all data (model inputs, model outputs, weights,
74 // temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two
75 // dimensions are fixed throughout a particular test case (and
76 // randomly determined).  This prevents us from having to find a
77 // mechanism to "resize" data (e.g., if ADD#a operates on data of size
78 // 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a
79 // and ADD#b become inputs of ADD#c, do we need to insert one or more
80 // operations between (say) ADD#a and ADD#c to convert ADD#2's data
81 // from size 2x2 to size 3x3 in order to match ADD#b).  In the few
82 // cases where an operand cannot be of this type, it is a constant
83 // (e.g., activation functions and RNN bias).
84 //
85 // Each operation we generate has a signature (described in more
86 // detail later).  The randomly generated drivers decide which
87 // operations they can execute by checking operation signatures.  Once
88 // we have built the model and know the set of signatures, we randomly
89 // assign each signature to a driver.  No signature is supported by
90 // multiple drivers -- we're not testing the logic that the
91 // partitioning algorithm uses to select the best driver for an
92 // operation.
93 
94 namespace android {
95 
96 using CompilationBuilder = nn::CompilationBuilder;
97 using Device = nn::Device;
98 using DeviceManager = nn::DeviceManager;
99 using ExecutionPlan = nn::ExecutionPlan;
100 using HidlModel = hardware::neuralnetworks::V1_1::Model;
101 using MemoryBuilder = nn::Memory;
102 using ModelBuilder = nn::ModelBuilder;
103 using Result = nn::wrapper::Result;
104 using SampleDriver = nn::sample_driver::SampleDriver;
105 using WrapperCompilation = nn::wrapper::Compilation;
106 using WrapperExecution = nn::wrapper::Execution;
107 using WrapperMemory = nn::wrapper::Memory;
108 using WrapperModel = nn::wrapper::Model;
109 using WrapperOperandType = nn::wrapper::OperandType;
110 using WrapperType = nn::wrapper::Type;
111 
112 namespace {
113 
114 /// Configure test size //////////////////////////////////////////////////////////
115 
116 // We may exceed this in order to connect otherwise disjoint subgraphs.
117 static const unsigned kMaxNumOperations = 100;
118 
119 // We build models to process 2-D square tensors up to this size in each dimension;
120 // note that the API promotes by-value weights larger than 128 to by-reference,
121 // so we want to ensure that we can pick both types that exceed and types that do
122 // not exceed this size.
123 static const unsigned kMaxProblemSize = 8;
124 
125 // First seed for pseudorandom test generation.
126 static const unsigned kFirstSeed = 0;
127 
128 // Number of test cases.
129 static const unsigned kNumTestCases = 225;
130 
131 // Force all graph weights into a single pool (as we recommend to users)
132 // or allow them to be distributed across multiple pools (more stress
133 // on the partitioning algorithm and the rest of the runtime)?
134 // Forcing all graph weights into a single pool may be necessary to
135 // prevent large graphs from running up against http://b/70302693
136 // "NNAPI overuses (?) fds".
137 static const bool kAllWeightsInOnePool = false;
138 
139 //////////////////////////////////////////////////////////////////////////////////
140 
141 // The signature of an operation consists of the operation type (e.g.,
142 // ADD) and the activation function (use -1 in the case of an
143 // operation type for which the activation function is inapplicable).
144 typedef std::pair<ANeuralNetworksOperationType, int> Signature;
145 
146 // This class adds some simple utilities on top of
147 // ::android::nn::wrapper::Model.  For example, it provides access to
148 // certain features from ModelBuilder that are not exposed by the base
149 // class (such as inputCount() and operation index).
150 class TestModel : public WrapperModel {
151 public:
152 
addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)153     uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
154                           const std::vector<uint32_t>& outputs) {
155         const uint32_t operationIndex = operationCount();
156         mOperations.push_back(outputs);
157         WrapperModel::addOperation(type, inputs, outputs);
158         return operationIndex;
159     }
160 
operationCount() const161     uint32_t operationCount() const {
162         return mOperations.size();
163     }
164 
inputCount() const165     uint32_t inputCount() const {
166         return builder()->inputCount();
167     }
outputCount() const168     uint32_t outputCount() const {
169         return builder()->outputCount();
170     }
171 
getOperationOutputs(uint32_t index) const172     const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const {
173         assert(index < mOperations.size());
174         return mOperations[index];
175     }
176 
177     // All values are immediately copied into the model (we need to do
178     // this ourselves in cases where the underlying NNAPI does not).
setOperandValue(uint32_t index,const std::vector<float> & value)179     void setOperandValue(uint32_t index, const std::vector<float>& value) {
180         const size_t length = value.size() * sizeof(float);
181 
182         if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
183             WrapperModel::setOperandValue(index, value.data(), length);
184         } else {
185             mOperandValues.push_back(value);
186             WrapperModel::setOperandValue(index, mOperandValues.back().data(), length);
187         }
188     }
189 
setOperandValue(uint32_t index,int32_t value)190     void setOperandValue(uint32_t index, int32_t value) {
191         assert(sizeof(value) <=  ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
192         WrapperModel::setOperandValue(index, &value, sizeof(value));
193     }
194 
195 private:
196 
builder() const197     const ModelBuilder* builder() const {
198         return reinterpret_cast<const ModelBuilder*>(getHandle());
199     }
200 
201     // Representation of operations: vector index is operation number,
202     // vector value is operation's output operands.
203     std::vector<std::vector<uint32_t>> mOperations;
204 
205     // Large operand values -- not immediately copied into the
206     // WrapperModel, so remembered here instead.
207     std::vector<std::vector<float>> mOperandValues;
208 };
209 
210 // This class adds some simple utilities on top of
211 // ::android::nn::wrapper::Compilation in order to provide access to
212 // certain features from CompilationBuilder that are not exposed by
213 // the base class.
214 class TestCompilation : public WrapperCompilation {
215 public:
TestCompilation(const WrapperModel * model)216     TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {}
217 
setPartitioning(uint32_t partitioning)218     Result setPartitioning(uint32_t partitioning) {
219         return static_cast<Result>(builder()->setPartitioning(partitioning));
220     }
221 
222     using WrapperCompilation::finish;
finish(const std::vector<std::shared_ptr<Device>> & devices)223     Result finish(const std::vector<std::shared_ptr<Device>>& devices) {
224         return static_cast<Result>(builder()->finish(devices));
225     }
226 
getExecutionPlan() const227     const ExecutionPlan& getExecutionPlan() const {
228         return builder()->forTest_getExecutionPlan();
229     }
230 
231 private:
builder() const232     const CompilationBuilder* builder() const {
233         return reinterpret_cast<const CompilationBuilder*>(getHandle());
234     }
builder()235     CompilationBuilder* builder() {
236         return reinterpret_cast<CompilationBuilder*>(getHandle());
237     }
238 };
239 
240 // This class is used to manage a collection of memory regions,
241 // disjoint windows onto a set of Memory instances, each of which is
242 // associated with a single shared memory region.  Each region and
243 // Memory instance is assigned a number.  The usage pattern is as
244 // follows:
245 // - Call addMemory() and addRegion() as many times as needed to
246 //   declare (but not define) Memory instances and declare region
247 //   instances.
248 // - Call layout() to define the Memory instances.
249 // - Call getRegion() as many times as needed to get the details
250 //   of memory regions (such as address, or Memory/offset/length).
251 // The Memory instances created by layout() are owned by the
252 // TestMemories instance, and are destroyed when the TestMemories
253 // instance is destroyed.
254 class TestMemories {
255 public:
256     TestMemories() = default;
257     ~TestMemories();
258 
259     TestMemories(const TestMemories&) = delete;
260     TestMemories& operator=(const TestMemories&) = delete;
261 
addMemory()262     unsigned addMemory() {
263         assert(!mLayoutDone);
264         mMemorySizes.push_back(0);
265         return memoryCount() - 1;
266     }
memoryCount() const267     unsigned memoryCount() const {
268         return mMemorySizes.size();
269     }
270 
addRegion(unsigned memoryIndex,uint32_t length)271     unsigned addRegion(unsigned memoryIndex, uint32_t length) {
272         assert(!mLayoutDone);
273         assert(memoryIndex < memoryCount());
274         uint32_t& memorySize = mMemorySizes[memoryIndex];
275         auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length);
276         mRegions.push_back(desc);
277         memorySize += length;
278         return regionCount() - 1;
279     }
regionCount() const280     unsigned regionCount() const {
281         return mRegions.size();
282     }
283 
284     void layout();
285 
getRegion(unsigned regionIndex,const WrapperMemory ** pMemory,uint32_t * pOffset,uint32_t * pLength)286     void* getRegion(unsigned regionIndex,
287                     const WrapperMemory** pMemory, uint32_t* pOffset, uint32_t* pLength) {
288         assert(mLayoutDone);
289         assert(regionIndex < regionCount());
290         const auto& regionDescriptor = mRegions[regionIndex];
291         const WrapperMemory* memory = &mMemorys[std::get<0>(regionDescriptor)];
292         uint32_t offset = std::get<1>(regionDescriptor);
293         uint32_t length = std::get<2>(regionDescriptor);
294 
295         uint8_t* buffer;
296         if (reinterpret_cast<MemoryBuilder*>(memory->get())->getPointer(&buffer) !=
297             ANEURALNETWORKS_NO_ERROR) {
298             assert(0);
299         }
300 
301         if (pMemory) *pMemory = memory;
302         if (pOffset) *pOffset = offset;
303         if (pLength) *pLength = length;
304 
305         return buffer + offset;
306     }
307 
getRegion(unsigned regionIndex)308     void* getRegion(unsigned regionIndex) {
309         return getRegion(regionIndex, nullptr, nullptr, nullptr);
310     }
311 
312 private:
313     // Index is the memory index; value is the size of the memory
314     // (aggregate size of all regions in the memory).
315     std::vector<uint32_t> mMemorySizes;
316 
317     // Index is the memory index.
318     std::vector<WrapperMemory> mMemorys;
319     std::vector<int> mFDs;
320 
321     // Index is the region index; tuple represents memory index,
322     // region offset within memory, region length.
323     std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions;
324 
325     // For sanity checking.
326     bool mLayoutDone = false;
327 };
328 
layout()329 void TestMemories::layout() {
330     assert(!mLayoutDone);
331     for (uint32_t memorySize : mMemorySizes) {
332         const int fd = ASharedMemory_create(nullptr, memorySize);
333         assert(fd >= 0);
334         mMemorys.emplace_back(memorySize, PROT_READ | PROT_WRITE, fd, 0);
335         mFDs.push_back(fd);
336     }
337     mLayoutDone = true;
338 }
339 
~TestMemories()340 TestMemories::~TestMemories() {
341     for (int fd : mFDs) {
342         close(fd);
343     }
344 }
345 
346 class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> {
347 public:
RandomPartitioningTest()348     RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {}
349 
350     static Signature getSignature(const HidlModel& model, const Operation& operation);
351 
352 protected:
353     void graphDump(const WrapperModel& model);
354 
randBool()355     bool randBool() {
356         return randUInt(2) == 1;
357     }
358 
randFrac()359     double randFrac() {  // [0.0, 1.0)
360         return mRandNumUnitDist(mRandNumEng);
361     }
362 
randUInt(unsigned limit)363     unsigned randUInt(unsigned limit) {  // [0, limit)
364         return unsigned(randFrac() * limit);
365     }
366 
367     // Represents an operation in which every input and output operand
368     // is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except:
369     // - One input operand may be an activation function.
370     // - Any number of input operands may be "special" in some other way
371     //   (and in this implementation, not produced by any other operation).
372     // We require that:
373     // - There be at least one input operand that is neither an
374     //    activation function nor "special".
375     struct OperationPattern {
376         int mOperationType;
377         unsigned mNumInputs;
378         unsigned mNumOutputs;
379         int mActivationFunctionInputIndex;  // <0 if none
380 
381         // Returns operand index, or <0 if input is normal (must not
382         // be called for an activation function operand).  Function
383         // should have the following prototype:
384         //
385         //     int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex);
386         //
387         int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned);
388     };
389 
390     static const OperationPattern kOperationPatterns[];
391 
makeRnnSpecialInput(unsigned problemSize,TestModel * model,unsigned inputIndex)392     int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) {
393         if (inputIndex != 3) {
394             return -1;
395         }
396 
397         // input operand 3 is bias, a 1-D tensor
398         const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, { problemSize });
399         const uint32_t operandIndex = model->addOperand(&biasType);
400         std::vector<float> biasValue(problemSize);
401         std::generate(biasValue.begin(), biasValue.end(),
402                       [this]{ return randFrac(); });
403         model->setOperandValue(operandIndex, biasValue);
404         return int(operandIndex);
405     }
406 
407 #ifdef VERBOSE
408     class ModelStats {
409     public:
ModelStats(const ModelBuilder * model)410         ModelStats(const ModelBuilder* model) :
411                 mBuilder(model) { }
ModelStats(const WrapperModel * model)412         ModelStats(const WrapperModel* model) :
413                 mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) { }
operator <<(std::ostream & out,const ModelStats & stats)414         friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) {
415             const uint32_t operandCount = stats.mBuilder->operandCount();
416             const uint32_t inputCount = stats.mBuilder->inputCount();
417             const uint32_t outputCount = stats.mBuilder->outputCount();
418             out << "operationCount = " << stats.mBuilder->operationCount()
419                 << ", operandCount = " << operandCount
420                 << ", inputCount = " << inputCount
421                 << " (" << (double(inputCount) / operandCount) << ")"
422                 << ", outputCount = " << outputCount
423                 << " (" << (double(outputCount) / operandCount) << ")";
424             return out;
425         }
426     private:
427         const ModelBuilder* mBuilder;
428     };
429 #endif
430 
431 private:
432     std::mt19937 mRandNumEng;
433     std::uniform_real_distribution<double> mRandNumUnitDist;
434 };
435 
436 const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = {
437     { ANEURALNETWORKS_ADD, 3, 1, 2, nullptr },
438     { ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr },
439     { ANEURALNETWORKS_MUL, 3, 1, 2, nullptr },
440     { ANEURALNETWORKS_RNN, 6, 2, 5, &RandomPartitioningTest::makeRnnSpecialInput },
441     { ANEURALNETWORKS_TANH, 1, 1, -1, nullptr },
442 };
443 
getSignature(const HidlModel & model,const Operation & operation)444 Signature RandomPartitioningTest::getSignature(const HidlModel& model, const Operation& operation) {
445     static const std::map<ANeuralNetworksOperationType, int> kOperationToActivation = []() {
446         std::map<ANeuralNetworksOperationType, int> result;
447         for (const auto& pattern : kOperationPatterns) {
448             result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex;
449         }
450         return result;
451     }();
452 
453     const ANeuralNetworksOperationType operationType =
454             static_cast<ANeuralNetworksOperationType>(operation.type);
455     const int activationFunctionInputIndex = kOperationToActivation.at(operationType);
456     if (activationFunctionInputIndex < 0) {
457         return Signature(operationType, -1);
458     }
459 
460     const Operand& operand = model.operands[operation.inputs[activationFunctionInputIndex]];
461     assert(operand.lifetime == OperandLifeTime::CONSTANT_COPY);
462     assert(operand.type == OperandType::INT32);
463     int32_t value;
464     memcpy(&value,
465            &model.operandValues[operand.location.offset],
466            operand.location.length);
467     return Signature(operationType, value);
468 }
469 
graphDump(const WrapperModel & model)470 void RandomPartitioningTest::graphDump([[maybe_unused]] const WrapperModel& model) {
471 #ifdef GRAPH
472     const std::string name = "Test-" + std::to_string(GetParam());
473     nn::bridge_tests::graphDump(name.c_str(),
474                                 reinterpret_cast<const ModelBuilder*>(model.getHandle()));
475 #endif
476 }
477 
478 class TestDriver : public SampleDriver {
479 public:
480     // Behaves like SampleDriver, except that it only supports
481     // operations with the specified signatures.
TestDriver(const char * name,std::set<Signature> signatures)482     TestDriver(const char* name, std::set<Signature> signatures) :
483             SampleDriver(name), mSignatures(std::move(signatures)) { }
484 
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)485     Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
486         android::nn::initVLogMask();
487         Capabilities capabilities =
488                 {.float32Performance = {.execTime = 0.75f, .powerUsage = 0.75f},
489                  .quantized8Performance = {.execTime = 0.75f, .powerUsage = 0.75f},
490                  .relaxedFloat32toFloat16Performance = {.execTime = 0.75f, .powerUsage = 0.75f}};
491         _hidl_cb(ErrorStatus::NONE, capabilities);
492         return Void();
493     }
494 
getSupportedOperations_1_1(const HidlModel & model,getSupportedOperations_cb cb)495     Return<void> getSupportedOperations_1_1(const HidlModel& model,
496                                             getSupportedOperations_cb cb) override {
497         if (nn::validateModel(model)) {
498             const size_t count = model.operations.size();
499             std::vector<bool> supported(count);
500             for (size_t i = 0; i < count; i++) {
501                 supported[i] =
502                     (mSignatures.count(
503                         RandomPartitioningTest::getSignature(
504                             model,
505                             model.operations[i])) != 0);
506             }
507             cb(ErrorStatus::NONE, supported);
508         } else {
509             std::vector<bool> supported;
510             cb(ErrorStatus::INVALID_ARGUMENT, supported);
511         }
512         return Void();
513     }
514 
prepareModel_1_1(const HidlModel & model,ExecutionPreference preference,const sp<IPreparedModelCallback> & callback)515     Return<ErrorStatus> prepareModel_1_1(const HidlModel& model, ExecutionPreference preference,
516                                          const sp<IPreparedModelCallback>& callback) override {
517         // NOTE: We verify that all operations in the model are supported.
518         ErrorStatus outStatus = ErrorStatus::INVALID_ARGUMENT;
519         auto ret = getSupportedOperations_1_1(
520             model,
521             [&outStatus](ErrorStatus inStatus, const hidl_vec<bool>& supportedOperations) {
522                 if (inStatus == ErrorStatus::NONE) {
523                     if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
524                                     [](bool v){ return v; })) {
525                         outStatus = ErrorStatus::NONE;
526                     }
527                 }
528             });
529         if (ret.isOk() && (outStatus == ErrorStatus::NONE)) {
530             return SampleDriver::prepareModel_1_1(model, preference, callback);
531         } else {
532             callback->notify(ErrorStatus::INVALID_ARGUMENT, nullptr);
533             return ErrorStatus::INVALID_ARGUMENT;
534         }
535     }
536 
537 private:
538     const std::set<Signature> mSignatures;
539 };
540 
541 INSTANTIATE_TEST_CASE_P(Seed, RandomPartitioningTest,
542                         ::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases));
543 
TEST_P(RandomPartitioningTest,Test)544 TEST_P(RandomPartitioningTest, Test) {
545     LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam();
546 
547 #ifdef VERBOSE
548     std::cout << std::setprecision(2) << std::fixed << std::setw(4);
549 #endif
550 
551     const unsigned problemSize = 1+randUInt(kMaxProblemSize);
552     const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, { problemSize, problemSize });
553     const WrapperOperandType unknownDimensionsType(WrapperType::TENSOR_FLOAT32, { 0, 0 });
554 
555     static const WrapperOperandType activationFunctionType(WrapperType::INT32, { });
556 
557     const unsigned numOperations = 2+randUInt(kMaxNumOperations-1);
558     const bool allowDeadOperations = (randFrac() < 0.2);
559     const bool allowUnknownDimensions = (randFrac() < 0.25);
560 
561     // TODO: The current algorithm builds the graph in a forward
562     // direction (i.e., later-generated operations consume outputs
563     // from earlier-generated operations).  In order to get more
564     // variation in graph topology, perhaps we should also create an
565     // algorithm to build the graph in a backward direction (i.e.,
566     // later-generated operations produce outputs to be consumed by
567     // earlier-generated operations).
568     [[maybe_unused]] const bool buildForward = randBool();
569 
570     // TODO: Add a form of forced connectivity that operates by
571     // joining disjoint subgraphs rather than by forcing a root.
572     const bool forceCommonRoot = (randFrac() < 0.75);
573 
574     TestModel model;
575     std::vector<uint32_t> modelInputs;
576     std::vector<uint32_t> modelOutputs;
577 
578     // Each region in weights is a problem-sized 2-D TENSOR_FLOAT32.
579     TestMemories weights;
580 
581     // Keep track of all normal (i.e., not activation function and not
582     // "special") operands that are values (from setOperandValue*()).
583     // .first: operand index
584     // .second: if the operand is already defined (via setOperandValue*()) then ~0U;
585     //          otherwise, the operand has yet to be defined, and this is the corresponding
586     //          region index in "weights"
587     std::vector<std::pair<uint32_t, unsigned>> valueOperands;
588 
589     // An operand is "dead" if it is not consumed by another operation
590     // and is not a model output.  Key is operand index; value is
591     // operation index.
592     std::map<uint32_t, uint32_t> deadOperands;
593 
594     // An operation is "dead" if all of its outputs are dead.
595     std::set<uint32_t> deadOperations;
596 
597     // Collect the signatures of operations in this model.
598     std::set<Signature> signatures;
599 
600     // For reporting purposes, keep track of the number of root
601     // operations (those that do not consume results produced by other
602     // operations).
603     unsigned rootOperationCount = 0;
604 
605     // Track if we added operands with unknown dimensions. In this case,
606     // partitioned compilation will fail if such an operand is read in a
607     // different partition than it is written.
608     bool hasUnknownDimensions = false;
609 
610     // Generate operations.
611     for (unsigned i = 0; i < numOperations; i++) {
612         const unsigned operationPatternIndex =
613                 randUInt(sizeof(kOperationPatterns)/sizeof(kOperationPatterns[0]));
614         const auto& operationPattern = kOperationPatterns[operationPatternIndex];
615 
616         // INPUTS //////////////////////////////////////////////////////////////////////////////////
617 
618         std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U);
619 
620         // First, process activation function and special inputs, and
621         // keep track of which inputs remain.
622         std::vector<uint32_t> normalOperationInputIndexes;
623         int32_t activationFunction = -1;
624         for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs;
625              operationInputIndex++) {
626             if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) {
627                 const uint32_t operandIndex = model.addOperand(&activationFunctionType);
628                 activationFunction = randUInt(4);
629                 if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) {
630                     // workaround for http://b/69011131
631                     activationFunction = ANEURALNETWORKS_FUSED_NONE;
632                 }
633                 model.setOperandValue(operandIndex, activationFunction);
634                 operationInputs[operationInputIndex] = operandIndex;
635                 continue;
636             }
637             if (operationPattern.mMakeSpecialInput != nullptr) {
638                 const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))(
639                     problemSize, &model, operationInputIndex);
640                 if (operandIndex >= 0) {
641                     operationInputs[operationInputIndex] = operandIndex;
642                     continue;
643                 }
644             }
645             normalOperationInputIndexes.push_back(operationInputIndex);
646         }
647         assert(!normalOperationInputIndexes.empty());
648         signatures.insert(Signature(operationPattern.mOperationType, activationFunction));
649 
650         // A (normal) operation input can be one of:
651         // - a new or existing model input
652         // - an output of an existing operation
653         // - an OperandValue
654         // - an OperandValueFromMemory
655         // Some guidelines:
656         // - We generally don't want all of an operation's inputs to be values (constants)
657         const unsigned normalOperationInputCount = normalOperationInputIndexes.size();
658         //     How many of this operation's inputs are constants?
659         unsigned normalOperationInputConstantCount = 0;
660         //     How many of this operation's inputs are model inputs?
661         unsigned normalOperationInputModelInputCount = 0;
662         // We begin by deciding what kind of input each (normal) operation will be; we don't
663         // actually pick input operand indexes at this time, because we might override this
664         // decision later.
665         enum InputKind { IK_MODEL_INPUT, IK_OPERATION_OUTPUT, IK_VALUE };
666         std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount);
667         std::generate(normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
668                       [this, &model,
669                        numOperations,
670                        normalOperationInputCount,
671                        &normalOperationInputConstantCount,
672                        &normalOperationInputModelInputCount]() -> InputKind {
673                           // Constant?  Becomes less likely the more
674                           // constants we already have as inputs to
675                           // this operation.
676                           if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) /
677                                                    normalOperationInputCount)) {
678                               normalOperationInputConstantCount++;
679                               return IK_VALUE;
680                           }
681 
682                           // Model input?  Becomes less likely the
683                           // more model inputs we already have as
684                           // inputs to this operation, and the further
685                           // along we are in generating this model
686                           // (i.e., the more operations we have
687                           // generated).
688                           if ((model.operationCount() == 0) ||
689                               (randFrac() < 0.5 *
690                                (1 - double(normalOperationInputModelInputCount) /
691                                 normalOperationInputCount) *
692                                std::min(0.3, (1 - double(model.operationCount()) /
693                                               numOperations)))) {
694                               normalOperationInputModelInputCount++;
695                               return IK_MODEL_INPUT;
696                           }
697 
698                           // Else output of an existing operation.
699                           return IK_OPERATION_OUTPUT;
700                       });
701 
702         // Now force common root or model input, if necessary.  (A
703         // model must have at least one input.)
704         auto force =
705                 [this, &normalOperationInputKinds, normalOperationInputCount](InputKind forceKind){
706             if (std::none_of(normalOperationInputKinds.begin(),
707                              normalOperationInputKinds.end(),
708                              [forceKind](InputKind kind){ return kind == forceKind; })) {
709                 normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind;
710             }
711         };
712         if (forceCommonRoot && (model.operationCount() != 0)) {
713             force(IK_OPERATION_OUTPUT);
714         }
715         if (modelInputs.empty()) {
716             assert(model.operationCount() == 0);
717             force(IK_MODEL_INPUT);
718         }
719 
720         // Finally create the normal inputs.
721         bool isRootOperation = true;
722         for (unsigned i = 0; i < normalOperationInputCount; i++) {
723             uint32_t operandIndex = ~0U;
724             switch (normalOperationInputKinds[i]) {
725                 case IK_MODEL_INPUT: {
726                     if (!modelInputs.empty() && (randFrac() < 0.5)) {
727                         operandIndex = modelInputs[randUInt(modelInputs.size())];
728                     } else {
729                         operandIndex = model.addOperand(&problemType);
730                         modelInputs.push_back(operandIndex);
731                     }
732                     break;
733                 }
734                 case IK_OPERATION_OUTPUT: {
735                     decltype(deadOperands.begin()) deadOperandI;
736                     if (!deadOperands.empty() && (randFrac() < 0.5)) {
737                         deadOperandI = deadOperands.begin();
738                         std::advance(deadOperandI, randUInt(deadOperands.size()));
739                         operandIndex = deadOperandI->first;
740                     } else {
741                         const uint32_t existingOperationIndex = randUInt(model.operationCount());
742                         const auto& existingOperationOutputs =
743                                 model.getOperationOutputs(existingOperationIndex);
744                         operandIndex =
745                             existingOperationOutputs[randUInt(existingOperationOutputs.size())];
746                         deadOperandI = deadOperands.find(operandIndex);
747                         assert(deadOperandI == deadOperands.end() ||
748                                deadOperandI->second == existingOperationIndex);
749                     }
750                     if (deadOperandI != deadOperands.end()) {
751                         const uint32_t correspondingOperation = deadOperandI->second;
752                         deadOperands.erase(deadOperandI);
753 
754                         auto deadOperationI = deadOperations.find(correspondingOperation);
755                         if (deadOperationI != deadOperations.end()) {
756                             deadOperations.erase(deadOperationI);
757                         }
758                     }
759                     isRootOperation = false;
760                     break;
761                 }
762                 case IK_VALUE: {
763                     if (!valueOperands.empty() && (randFrac() < 0.25)) {
764                         operandIndex = valueOperands[randUInt(valueOperands.size())].first;
765                     } else {
766                         operandIndex = model.addOperand(&problemType);
767                         if (randFrac() < 0.5) {
768                             std::vector<float> value(problemSize * problemSize);
769                             std::generate(value.begin(), value.end(), [this]{ return randFrac(); });
770                             model.setOperandValue(operandIndex, value);
771                             valueOperands.push_back(std::make_pair(operandIndex, ~0U));
772                         } else {
773                             unsigned memoryIndex = ~0U;
774                             if ((weights.memoryCount() != 0) &&
775                                 (kAllWeightsInOnePool || (randFrac() < 0.5))) {
776                                 memoryIndex = randUInt(weights.memoryCount());
777                             } else {
778                                 memoryIndex = weights.addMemory();
779                             }
780                             const size_t length = problemSize * problemSize * sizeof(float);
781                             const unsigned regionIndex = weights.addRegion(memoryIndex, length);
782                             valueOperands.push_back(std::make_pair(operandIndex, regionIndex));
783                         }
784                     }
785                     break;
786                 }
787                 default:
788                     FAIL();
789             }
790             operationInputs[normalOperationInputIndexes[i]] = operandIndex;
791         }
792         if (isRootOperation) {
793             rootOperationCount++;
794         }
795 
796         // OUTPUTS /////////////////////////////////////////////////////////////////////////////////
797 
798         std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
799         std::generate(operationOutputs.begin(), operationOutputs.end(),
800                       [&model, &problemType, &unknownDimensionsType, &hasUnknownDimensions,
801                        allowUnknownDimensions, this]{
802                           // 3% unknowns causes ~35% of partitionings to fail
803                           // (determined by commenting out the fallback code,
804                           // running tests and noting number of failures).
805                           if (allowUnknownDimensions && randFrac() < 0.03) {
806                               hasUnknownDimensions = true;
807                               return model.addOperand(&unknownDimensionsType);
808                           } else {
809                               return model.addOperand(&problemType);
810                           }
811                       });
812 
813         // OPERATION ///////////////////////////////////////////////////////////////////////////////
814 
815         const uint32_t operationIndex =
816                 model.addOperation(operationPattern.mOperationType,
817                                    operationInputs, operationOutputs);
818         deadOperations.insert(operationIndex);
819         std::for_each(operationOutputs.begin(), operationOutputs.end(),
820                       [&deadOperands, operationIndex](uint32_t operandIndex) {
821                           deadOperands.insert(std::make_pair(operandIndex, operationIndex));
822                       });
823     }
824 
825     // Now finalize the weights.
826     weights.layout();
827     for (const auto& valueOperand : valueOperands) {
828         const uint32_t operandIndex = valueOperand.first;
829         const unsigned regionIndex = valueOperand.second;
830 
831         if (regionIndex == ~0U) {
832             continue;
833         }
834 
835         const WrapperMemory* memory;
836         uint32_t offset, length;
837         float* region =
838                 static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length));
839         assert(length == problemSize * problemSize * sizeof(float));
840         std::generate(region, region + problemSize * problemSize, [this]{ return randFrac(); });
841         model.setOperandValueFromMemory(operandIndex, memory, offset, length);
842     }
843 
844     // Now select model outputs.
845     for (uint32_t operationIdx = 0, operationCount = model.operationCount();
846          operationIdx < operationCount; operationIdx++) {
847         const auto& outputs = model.getOperationOutputs(operationIdx);
848         for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount;
849              outputIdx++) {
850             bool modelOutput = false;
851             const uint32_t operandIndex = outputs[outputIdx];
852             const auto deadOperandI = deadOperands.find(operandIndex);
853             if (deadOperandI != deadOperands.end()) {
854                 // This is not consumed within the model, so unless we
855                 // make it an output of the model, it's dead.  The
856                 // further along we are in generating this model
857                 // (i.e., the more operations we have generated), the
858                 // more likely we are to classify this operation
859                 // output as a model output.
860                 const double probabilityOfModelOutput =
861                         0.50 * [](double x){ return x*x; }((operationIdx + 1) / operationCount);
862                 modelOutput = (randFrac() < probabilityOfModelOutput);
863             } else {
864                 // This is consumed within the model, so we'll rarely
865                 // make it an output of the model.
866                 modelOutput = (randFrac() < 0.05);
867             }
868             if (!modelOutput) {
869                 continue;
870             }
871             modelOutputs.push_back(operandIndex);
872             if (deadOperandI != deadOperands.end()) {
873                 deadOperands.erase(deadOperandI);
874                 const auto deadOperationI = deadOperations.find(operationIdx);
875                 if (deadOperationI != deadOperations.end()) {
876                     deadOperations.erase(deadOperationI);
877                 }
878             }
879         }
880     }
881     if (!allowDeadOperations) {
882         // For each dead operation, pick a random output to become a model output.
883         for (uint32_t deadOperationIndex : deadOperations) {
884             const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex);
885             const uint32_t deadOperandIndex =
886                     deadOperationOutputs[randUInt(deadOperationOutputs.size())];
887             modelOutputs.push_back(deadOperandIndex);
888         }
889     }
890     // A model must have at least one output.
891     if (modelOutputs.empty()) {
892         const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount()));
893         modelOutputs.push_back(outputs[randUInt(outputs.size())]);
894     }
895 
896     model.identifyInputsAndOutputs(modelInputs, modelOutputs);
897 #ifdef VERBOSE
898     {
899         std::cout << "Original model: " << ModelStats(&model) << std::endl;
900         std::cout << "rootOperationCount = " << rootOperationCount
901                   << ", deadOperations = ";
902         if (allowDeadOperations) {
903             std::cout << deadOperations.size();
904         } else {
905             std::cout << "forbidden (converted " << deadOperations.size() << ")";
906         }
907         std::cout << std::endl;
908     }
909 #endif
910     ASSERT_EQ(model.finish(), Result::NO_ERROR);
911     graphDump(model);
912 
913     // Non-partitioned compilation.
914     TestCompilation c(&model);
915     ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
916     ASSERT_EQ(c.finish(), Result::NO_ERROR);
917 
918     // Create some drivers for partitioned compilation.
919     assert(!signatures.empty());
920     std::vector<std::set<Signature>> signaturesForDriver(signatures.size());
921     //     First assign each signature to a random driver (a driver is
922     //     just represented as an entry in the signaturesForDriver
923     //     vector).
924     for (Signature signature : signatures) {
925         signaturesForDriver[randUInt(signatures.size())].insert(signature);
926     }
927     //     Now remove each entry that has no signatures.
928     auto firstExtra =
929         std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(),
930                        [](const std::set<Signature>& sigSet) { return sigSet.empty(); });
931     if (firstExtra != signaturesForDriver.end()) {
932         signaturesForDriver.erase(firstExtra, signaturesForDriver.end());
933     }
934     //     Now actually create the drivers.
935     std::vector<std::shared_ptr<Device>> devices;
936     for (unsigned i = 0; i < signaturesForDriver.size(); i++) {
937         const std::string name = "TestDriver(" + std::to_string(i) + ")";
938         devices.push_back(std::make_shared<Device>(
939             name, new TestDriver(name.c_str(), signaturesForDriver[i])));
940         ASSERT_TRUE(devices.back()->initialize());
941     }
942 
943     // Partitioned compilation.
944     // For test cases without unknown intermediate operand sizes we require the
945     // partitioning to succeed without CPU fallback. With unknown sizes we
946     // retry with a fallback if the non-fallback partitioning fails and require
947     // the fallback to succeed.
948     TestCompilation cNoFallback(&model);
949     TestCompilation cWithFallback(&model);
950     TestCompilation *c2 = nullptr;
951     ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
952               Result::NO_ERROR);
953     auto compilationResult = cNoFallback.finish(devices);
954     if (hasUnknownDimensions && compilationResult == Result::OP_FAILED &&
955         cNoFallback.getExecutionPlan().forTest_hasSubModelOutputsOfUnknownSize()) {
956         ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
957                   Result::NO_ERROR);
958         ASSERT_EQ(cWithFallback.finish(devices), Result::NO_ERROR);
959         c2 = &cWithFallback;
960     } else {
961         ASSERT_EQ(compilationResult, Result::NO_ERROR);
962         c2 = &cNoFallback;
963     }
964 
965 #ifdef VERBOSE
966     {
967         std::cout << "signatures = " << signatures.size()
968                   << ", devices = " << devices.size() << std::endl;
969         const ExecutionPlan& plan = c2->getExecutionPlan();
970         switch (plan.forTest_getKind()) {
971             case ExecutionPlan::Kind::SIMPLE:
972                 std::cout << "plan: simple" << std::endl;
973                 break;
974             case ExecutionPlan::Kind::COMPOUND: {
975                 const auto& steps = plan.forTest_compoundGetSteps();
976                 std::set<const Device*> devicesInPlan;
977                 for (const auto& step : steps) {
978                     devicesInPlan.insert(step->getDevice().get());
979                 }
980                 std::cout << "plan: compound, " << steps.size() << " steps over "
981                           << devicesInPlan.size() << " devices" << std::endl;
982                 for (unsigned i = 0; i < steps.size(); i++) {
983                     std::cout << "Step " << i << ": "
984                               << ModelStats(steps[i]->getSubModel()) << std::endl;
985                 }
986                 break;
987             }
988             default:
989                 std::cout << "Unexpected plan kind: "
990                     << static_cast<unsigned>(plan.forTest_getKind());
991                 break;
992         }
993     }
994 #endif
995 
996     // For execution:
997     // - create master inputs (one long vector) and master output value
998     //   - master inputs will be copied to actual inputs before each
999     //     of the two executions
1000     //   - master output will be used to fill actual outputs before each
1001     //     of the two executions
1002     // - create actual inputs and outputs
1003     // - first execution (non-partitioned)
1004     //   - initialize inputs and (to avoid unrelated oddities) outputs
1005     //   - execute
1006     //   - copy outputs to a save area (one long vector)
1007     // - second execution (partitioned)
1008     //   - (to avoid unrelated oddities) initialize inputs and outputs
1009     //   - execute
1010     //   - compare outputs to save area
1011 
1012     // If the runtime and drivers are working properly, execution
1013     // should not change the inputs.  Nonetheless, we reinitialize the
1014     // inputs for each execution, so as to avoid unrelated problems
1015     // appearing to be problems related to unpartitioned execution
1016     // versus partitioned execution.  Similarly, execution behavior
1017     // should not be dependent on the outputs; but we'll initialize the
1018     // outputs anyway.
1019     std::vector<float> masterInputs(problemSize * problemSize * model.inputCount());
1020     std::generate(masterInputs.begin(), masterInputs.end(), [this]{ return randFrac(); });
1021     const float masterOutput = randFrac();
1022 
1023     // Create the memory for the actual inputs and outputs.
1024     struct InputOutputDescriptor {
1025         enum Kind { INPUT, OUTPUT };
1026         Kind mKind;
1027 
1028         // The input or output either resides in a local buffer
1029         // (mVector, in which case mMemoryRegion is ignored); or in a
1030         // shared memory region within a TestMemories instance
1031         // (mMemoryRegion, in which case mVector is ignored).
1032         enum Location { VECTOR, REGION };
1033         Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; }
1034 
1035         std::vector<float> mVector;
1036         unsigned mMemoryRegion;
1037     };
1038     std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount());
1039     for (unsigned i = 0; i < ioDescriptors.size(); i++) {
1040         ioDescriptors[i].mKind = (i < model.inputCount()
1041                                   ? InputOutputDescriptor::INPUT
1042                                   : InputOutputDescriptor::OUTPUT);
1043     }
1044     //     We randomly interleave inputs and outputs in creation
1045     //     order, because when we we create memory regions in a
1046     //     TestMemories instance, the order in which regions are
1047     //     created within a single Memory is the order they'll be laid
1048     //     out in that memory; and when we have inputs and outputs
1049     //     within the same Memory, we want the possibility that
1050     //     they'll be interleaved.
1051     std::random_shuffle(ioDescriptors.begin(), ioDescriptors.end(),
1052                         [this](unsigned n) { return randUInt(n); });
1053     TestMemories ioMemories;
1054     for (auto &desc : ioDescriptors) {
1055         if (randFrac() < 0.5) {
1056             desc.mVector.resize(problemSize * problemSize);
1057         } else {
1058             // TODO: common this with the way we create IK_VALUE inputs?
1059             unsigned memoryIndex = ~0U;
1060             if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) {
1061                 memoryIndex = randUInt(ioMemories.memoryCount());
1062             } else {
1063                 memoryIndex = ioMemories.addMemory();
1064             }
1065             const size_t length = problemSize * problemSize * sizeof(float);
1066             desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length);
1067         }
1068     }
1069     ioMemories.layout();
1070 
1071     // Function to set up actual inputs and outputs (initializing them
1072     // and telling the WrapperExecution about them).
1073     auto prepareForExecution =
1074             [&model, &ioDescriptors, &ioMemories,
1075              &masterInputs, &masterOutput, problemSize, &problemType](WrapperExecution *e) {
1076         uint32_t inputIndex = 0, outputIndex = 0;
1077         for (auto &desc : ioDescriptors) {
1078             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1079                 if (desc.mKind == InputOutputDescriptor::INPUT) {
1080                     const size_t inputOffset = inputIndex * problemSize * problemSize;
1081                     std::copy(masterInputs.begin() + inputOffset,
1082                               masterInputs.begin() + inputOffset + problemSize * problemSize,
1083                               desc.mVector.begin());
1084                     e->setInput(inputIndex++, desc.mVector.data(),
1085                                 desc.mVector.size() * sizeof(float));
1086                 } else {
1087                     std::fill(desc.mVector.begin(),
1088                               desc.mVector.begin() + problemSize * problemSize,
1089                               masterOutput);
1090                     e->setOutput(outputIndex++, desc.mVector.data(),
1091                                  desc.mVector.size() * sizeof(float),
1092                                  &problemType.operandType);
1093                 }
1094             } else {
1095                 const WrapperMemory* memory;
1096                 uint32_t offset, length;
1097                 float* region =
1098                         static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion,
1099                                                                  &memory, &offset, &length));
1100                 assert(length == problemSize * problemSize * sizeof(float));
1101                 if (desc.mKind == InputOutputDescriptor::INPUT) {
1102                     const size_t inputOffset = inputIndex * problemSize * problemSize;
1103                     std::copy(masterInputs.begin() + inputOffset,
1104                               masterInputs.begin() + inputOffset + problemSize * problemSize,
1105                               region);
1106                     e->setInputFromMemory(inputIndex++, memory, offset, length);
1107                 } else {
1108                     std::fill(region,
1109                               region + problemSize * problemSize,
1110                               masterOutput);
1111                     e->setOutputFromMemory(outputIndex++, memory, offset, length,
1112                                            &problemType.operandType);
1113                 }
1114             }
1115         };
1116         assert(inputIndex == model.inputCount());
1117         assert(outputIndex == model.outputCount());
1118     };
1119 
1120     // Non-partitioned execution.
1121     WrapperExecution e(&c);
1122     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e));
1123     ASSERT_EQ(e.compute(), Result::NO_ERROR);
1124 
1125     // Copy the outputs of the non-partitioned execution to a save area.
1126     std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount());
1127     {
1128         uint32_t outputIndex = 0;
1129         for (const auto& desc : ioDescriptors) {
1130             if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1131                 continue;
1132             }
1133             const size_t outputOffset = outputIndex * problemSize * problemSize;
1134             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1135                 std::copy(desc.mVector.begin(),
1136                           desc.mVector.end(),
1137                           nonPartitionedOutputs.begin() + outputOffset);
1138             } else {
1139                 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1140                 std::copy(region,
1141                           region + problemSize * problemSize,
1142                           nonPartitionedOutputs.begin() + outputOffset);
1143             }
1144 #ifdef VERBOSE
1145             {
1146                 std::cout << "output[" << outputIndex << "] = {";
1147                 for (auto I = nonPartitionedOutputs.begin() + outputOffset,
1148                              E = nonPartitionedOutputs.begin() +
1149                                      outputOffset + problemSize * problemSize;
1150                      I != E; I++) {
1151                     std::cout << " " << *I;
1152                 }
1153                 std::cout << " }" << std::endl;
1154             }
1155 #endif
1156             outputIndex++;
1157         }
1158     }
1159 
1160     // Partitioned execution.
1161     WrapperExecution e2(c2);
1162     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
1163     ASSERT_EQ(e2.compute(), Result::NO_ERROR);
1164 
1165     // Compare the outputs of the partitioned execution to the save
1166     // area containing the outpus of the non-partitioned execution.
1167     {
1168         uint32_t outputIndex = 0;
1169         for (const auto& desc : ioDescriptors) {
1170             if (desc.mKind != InputOutputDescriptor::OUTPUT) {
1171                 continue;
1172             }
1173             SCOPED_TRACE(outputIndex);
1174             const size_t outputOffset = outputIndex * problemSize * problemSize;
1175             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
1176                 ASSERT_TRUE(std::equal(desc.mVector.begin(),
1177                                        desc.mVector.end(),
1178                                        nonPartitionedOutputs.begin() + outputOffset));
1179             } else {
1180                 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
1181                 ASSERT_TRUE(std::equal(region,
1182                                        region + problemSize * problemSize,
1183                                        nonPartitionedOutputs.begin() + outputOffset));
1184             }
1185             outputIndex++;
1186         }
1187     }
1188 }
1189 
1190 }  // namespace
1191 }  // namespace android
1192