1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <ControlFlow.h>
18 #include <HalInterfaces.h>
19 #include <SampleDriver.h>
20 #include <Utils.h>
21 #include <ValidateHal.h>
22 #include <gtest/gtest.h>
23 
24 #include <algorithm>
25 #include <filesystem>
26 #include <functional>
27 #include <iostream>
28 #include <map>
29 #include <memory>
30 #include <numeric>
31 #include <queue>
32 #include <set>
33 #include <string>
34 #include <tuple>
35 #include <type_traits>
36 #include <utility>
37 #include <vector>
38 
39 #include "CompilationBuilder.h"
40 #include "ExecutionPlan.h"
41 #include "HalUtils.h"
42 #include "Manager.h"
43 #include "ModelBuilder.h"
44 #include "NeuralNetworks.h"
45 #include "NeuralNetworksOEM.h"
46 #include "TestNeuralNetworksWrapper.h"
47 #include "TmpDirectoryUtils.h"
48 
49 // Uncomment the following line to generate some debugging output that
50 // may be useful when analyzing failures:
51 //
52 // #define VERBOSE VERBOSE
53 
54 // These tests do whitebox testing of the graph partitioning
55 // algorithm.  It is "whitebox" in the sense that we're not evaluating
56 // whether a particular partitioning is legal, or "good enough"
57 // according to some metric, but whether it exactly matches the
58 // expected behavior of the current partitioning algorithm.
59 //
60 // A key part of the current partitioning algorithm is to determine
61 // which device among the available devices should be the one to
62 // execute a particular operation from the graph.  This determination
63 // is made "locally" -- i.e., it does not depend on the graph
64 // topology, only on the properties of the operation in question.
65 // IDevice::getSupportedOperations() indicates which operations in a
66 // graph can be executed on a device, and IDevice::getCapabilities()
67 // indicates how "good" that device is for executing particular kinds
68 // of operations.  For each operation, the partitioning algorithm
69 // picks the "best" device that is capable of executing that
70 // operation; if no device can do so, then the algorithm picks the
71 // cpu.
72 //
73 // As part of this testing approach, we want to make it easy to
74 // specify which operations in a test graph can be executed on which
75 // devices.  We accomplish this in the following way:
76 // - A unary OEM operation is available.
77 // - There is a collection of operations (each of which has two inputs
78 //   and one output):
79 //   - Eight kinds of operations available at driver version V1_0 or
80 //     later.  They are represented in the graph as ADD or MUL with a
81 //     particular activation function -- two opcodes times four
82 //     activation functions means eight available operation kinds.
83 //     This is a low-level representation detail -- when we specify the
84 //     behavior of the device or build a graph, we do so in terms of
85 //     operation encodings 0..7.
86 //   - Eight kinds of operations available at driver version V1_1 or
87 //     later.  They are represented in the graph as DIV or SUB with
88 //     a particular activation function, exactly analogous to ADD
89 //     and MUL above.  We use operation encodings 8..15 for them.
90 //   - Four kinds of operations available at driver version V1_2 or
91 //     later.  They are represented in the graph as MAXIMUM,
92 //     MINIMUM, POW, or PRELU.  These operations take no activation
93 //     function, so we only get 4 operation kinds, for which we
94 //     use operation encodings 16..19.
95 // - There is another collection of operations (each of which has one input
96 //   and one output):
97 //   - Single operation available at driver version V1_3 or
98 //     later.  It is represented in the graph as HARD_SWISH.
99 //     These operations take no activation function, for which we
100 //     use operation encodings 20..20.
101 
102 // When we instantiate a device for testing purposes, we specify what subset of
103 // those operations the device is able to execute.
104 //
105 // In order to determine whether or not a partitioning matches the
106 // expected partitioning, we check the number of partitions, check
107 // which device each partition targets, and compare each partition's
108 // subgraph, model inputs, model outputs, step model inputs, and
109 // step model outputs against what is expected.  In order to perform
110 // that comparison, we build a model to compare against a partition's
111 // step model and run a graph comparison algorithm on it.  The graph
112 // comparison and the inputs and outputs comparisons are syntactic
113 // rather than semantic comparisons -- they don't allow for
114 // reorderings of inputs and outputs.  Because of this, we need to
115 // know exactly how the partitioning algorithm orders inputs and
116 // outputs in order to construct the models and operand lists to
117 // compare against.  Here are some relevant behaviors of the
118 // partitioning algorithm:
119 //
120 // - It builds a subgraph by walking operations in forward topological
121 //   order, and adding each operation's input operands and output
122 //   operands in index order (input followed by output) when that
123 //   operation is added.  (It does not add an input that has already
124 //   been added.)
125 // - It finds model inputs, model outputs, and step model inputs in
126 //   the order the corresponding operands were added to the subgraph
127 //   (see ExecutionStep methods getModelInputs(), getModelOutputs(),
128 //   getTempsAsStepModelInputs(), getOutputsAsStepModelInputs()).
129 // - It finds temps as step model outputs in numerical order of corresponding
130 //   operand number in the original model (see ExecutionStep method
131 //   getTempsAsStepModelOutputs()).
132 // - When it calls identifyInputsAndOutputs() on the step model, it
133 //   passes inputs from getModelInputs() in order, followed by temps as
134 //   step model inputs from getTempsAsStepModelInputs() in order,
135 //   followed by outputs as step model inputs from
136 //   getOutputsAsStepModelInputs() in order; and it passes outputs from
137 //   getModelOutputs() in order followed by step model outputs from
138 //   getTempsAsStepModelOutputs() in order.
139 //
140 // TODO: Maybe the logic for comparing a partition to an expected
141 //       model should be changed to tolerate reorderings of inputs and
142 //       outputs, so that when we build models and lists to compare
143 //       against, we don't need to worry about input and output
144 //       orderings.  But is there a way to do this that still lets us
145 //       verify that we have the correct relationships between
146 //       an (original) model's inputs and outputs and each step model's
147 //       inputs and outputs, as well as the correct relationship
148 //       between step model inputs and outputs across partitions?
149 
150 namespace {
151 
152 namespace hardware = android::hardware;
153 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
154 namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
155 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
156 namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
157 using CompilationBuilder = ::android::nn::CompilationBuilder;
158 using Device = ::android::nn::Device;
159 using DeviceManager = ::android::nn::DeviceManager;
160 using ExecutePreference = ::android::nn::test_wrapper::ExecutePreference;
161 using ExecutePriority = ::android::nn::test_wrapper::ExecutePriority;
162 using ExecutionPlan = ::android::nn::ExecutionPlan;
163 using ExecutionStep = ::android::nn::ExecutionStep;
164 using HalCacheToken = ::android::nn::HalCacheToken;
165 using HalVersion = ::android::nn::HalVersion;
166 using HidlModel = V1_3::Model;
167 using IOType = ::android::nn::IOType;
168 using LogicalStep = ::android::nn::LogicalStep;
169 using ModelBuilder = ::android::nn::ModelBuilder;
170 using Operand = ::android::nn::Operand;
171 using Operation = ::android::nn::Operation;
172 using OptionalTimePoint = ::android::nn::OptionalTimePoint;
173 using Result = ::android::nn::test_wrapper::Result;
174 using SampleDriver = ::android::nn::sample_driver::SampleDriver;
175 using SharedDevice = ::android::nn::SharedDevice;
176 using SourceOperandIndex = ::android::nn::SourceOperandIndex;
177 using StepRole = ::android::nn::StepRole;
178 using WrapperCompilation = ::android::nn::test_wrapper::Compilation;
179 using WrapperExecution = ::android::nn::test_wrapper::Execution;
180 using WrapperModel = ::android::nn::test_wrapper::Model;
181 using WrapperOperandType = ::android::nn::test_wrapper::OperandType;
182 using WrapperSymmPerChannelQuantParams = ::android::nn::test_wrapper::SymmPerChannelQuantParams;
183 using WrapperType = ::android::nn::test_wrapper::Type;
184 using android::sp;
185 
update(V1_3::Capabilities * capabilities,V1_3::OperandType type,float perf)186 void update(V1_3::Capabilities* capabilities, V1_3::OperandType type, float perf) {
187     V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
188     ::android::nn::update(&capabilities->operandPerformance, type, perfInfo);
189 }
190 
lookupExecTime(const V1_3::Capabilities & capabilities,V1_3::OperandType type)191 float lookupExecTime(const V1_3::Capabilities& capabilities, V1_3::OperandType type) {
192     return ::android::nn::lookup(capabilities.operandPerformance, type).execTime;
193 }
194 
min(HalVersion a,HalVersion b)195 HalVersion min(HalVersion a, HalVersion b) {
196     return int32_t(a) < int32_t(b) ? a : b;
197 }
198 
199 const uint32_t kNumFuseCodes = 4;
200 const uint32_t kBadOperation = ~0;
201 
202 // V1_0 operations
203 const uint32_t kFirstEncodingADD = 0;
204 const uint32_t kFirstEncodingMUL = kFirstEncodingADD + kNumFuseCodes;
205 const uint32_t kFirstEncodingV1_0 = kFirstEncodingADD;
206 const uint32_t kLastEncodingV1_0 = kFirstEncodingMUL + kNumFuseCodes - 1;
207 
208 // V1_1 operations
209 const uint32_t kFirstEncodingDIV = kLastEncodingV1_0 + 1;
210 const uint32_t kFirstEncodingSUB = kFirstEncodingDIV + kNumFuseCodes;
211 const uint32_t kFirstEncodingV1_1 = kFirstEncodingDIV;
212 const uint32_t kLastEncodingV1_1 = kFirstEncodingSUB + kNumFuseCodes - 1;
213 
214 // V1_2 operations
215 const uint32_t kFirstEncodingMAXIMUM = kLastEncodingV1_1 + 1;
216 const uint32_t kFirstEncodingMINIMUM = kFirstEncodingMAXIMUM + 1;
217 const uint32_t kFirstEncodingPOW = kFirstEncodingMINIMUM + 1;
218 const uint32_t kFirstEncodingPRELU = kFirstEncodingPOW + 1;
219 const uint32_t kFirstEncodingV1_2 = kFirstEncodingMAXIMUM;
220 const uint32_t kLastEncodingV1_2 = kFirstEncodingPRELU;
221 
222 // V1_3 operations
223 const uint32_t kFirstEncodingHARD_SWISH = kLastEncodingV1_2 + 1;
224 const uint32_t kFirstEncodingV1_3 = kFirstEncodingHARD_SWISH;
225 const uint32_t kLastEncodingV1_3 = kFirstEncodingHARD_SWISH;
226 
227 const std::map<V1_3::OperationType, uint32_t> operationToFirstEncoding = {
228         {V1_3::OperationType::ADD, kFirstEncodingADD},
229         {V1_3::OperationType::MUL, kFirstEncodingMUL},
230         {V1_3::OperationType::DIV, kFirstEncodingDIV},
231         {V1_3::OperationType::SUB, kFirstEncodingSUB},
232         {V1_3::OperationType::MAXIMUM, kFirstEncodingMAXIMUM},
233         {V1_3::OperationType::MINIMUM, kFirstEncodingMINIMUM},
234         {V1_3::OperationType::POW, kFirstEncodingPOW},
235         {V1_3::OperationType::PRELU, kFirstEncodingPRELU},
236         {V1_3::OperationType::HARD_SWISH, kFirstEncodingHARD_SWISH},
237 };
238 
239 // Sorted in reverse order (std::greater) so that we can use map::lower_bound to
240 // find an entry whose key is numerically less than or equal to a search value.
241 // mapped_type is (OperandCode, hasFuseCode).
242 const std::map<uint32_t, std::pair<uint32_t, bool>, std::greater<>> firstEncodingToOperation = {
243         {kFirstEncodingADD, {ANEURALNETWORKS_ADD, true}},
244         {kFirstEncodingMUL, {ANEURALNETWORKS_MUL, true}},
245         {kFirstEncodingDIV, {ANEURALNETWORKS_DIV, true}},
246         {kFirstEncodingSUB, {ANEURALNETWORKS_SUB, true}},
247         {kFirstEncodingMAXIMUM, {ANEURALNETWORKS_MAXIMUM, false}},
248         {kFirstEncodingMINIMUM, {ANEURALNETWORKS_MINIMUM, false}},
249         {kFirstEncodingPOW, {ANEURALNETWORKS_POW, false}},
250         {kFirstEncodingPRELU, {ANEURALNETWORKS_PRELU, false}},
251         {kFirstEncodingHARD_SWISH, {ANEURALNETWORKS_HARD_SWISH, false}},
252 };
253 
254 // Look up the operation with the specified index in a graph, and return the
255 // operation encoding; or, if for some reason this is not one of the encoded
256 // operations, then return kBadOperation.
lookupOperation(std::function<const V1_3::Operation & (uint32_t)> getOperation,std::function<const V1_3::Operand & (uint32_t)> getOperand,std::function<const uint8_t * (uint32_t)> getValue,uint32_t operationIndex)257 uint32_t lookupOperation(std::function<const V1_3::Operation&(uint32_t)> getOperation,
258                          std::function<const V1_3::Operand&(uint32_t)> getOperand,
259                          std::function<const uint8_t*(uint32_t)> getValue,
260                          uint32_t operationIndex) {
261     const V1_3::Operation& operation = getOperation(operationIndex);
262     switch (operation.type) {
263         case V1_3::OperationType::ADD:
264         case V1_3::OperationType::MUL:
265         case V1_3::OperationType::DIV:
266         case V1_3::OperationType::SUB: {
267             // input2 is the fused activation function
268             const V1_3::Operand& input2 = getOperand(operation.inputs[2]);
269             if ((input2.type == V1_3::OperandType::INT32) &&
270                 (input2.lifetime == V1_3::OperandLifeTime::CONSTANT_COPY)) {
271                 int32_t value;
272                 CHECK_EQ(sizeof(value), input2.location.length);
273                 memcpy(&value, getValue(input2.location.offset), input2.location.length);
274                 return value + operationToFirstEncoding.at(operation.type);
275             }
276             break;
277         }
278         default: {
279             auto it = operationToFirstEncoding.find(operation.type);
280             if (it != operationToFirstEncoding.end()) {
281                 return it->second;
282             }
283             break;
284         }
285     }
286     return kBadOperation;
287 }
288 
lookupOperation(const HidlModel & model,const V1_3::Subgraph & subgraph,uint32_t operationIndex)289 uint32_t lookupOperation(const HidlModel& model, const V1_3::Subgraph& subgraph,
290                          uint32_t operationIndex) {
291     return lookupOperation(
292             [&subgraph](uint32_t index) -> const V1_3::Operation& {
293                 return subgraph.operations[index];
294             },
295             [&subgraph](uint32_t index) -> const V1_3::Operand& {
296                 return subgraph.operands[index];
297             },
298             [&model](uint32_t offset) { return &model.operandValues[offset]; }, operationIndex);
299 }
300 
301 #ifdef VERBOSE
302 // This is a debugging utility function
dump(const char * name,const ModelBuilder * model)303 void dump(const char* name, const ModelBuilder* model) {
304     const HidlModel hidlModel = model->makeHidlModel();
305     std::cout << name << ": " << hidlModel << std::endl;
306     std::cout << "inputs: " << hidlModel.main.inputIndexes << std::endl;
307     std::cout << "outputs: " << hidlModel.main.outputIndexes << std::endl;
308     for (size_t i = 0, e = hidlModel.main.operations.size(); i < e; i++) {
309         std::cout << "operation[" << i << "]: " << hidlModel.main.operations[i] << std::endl;
310     }
311 }
312 #endif
313 
314 // This is an IDevice for testing purposes.  It only has a few interesting
315 // properties, all of which are specified as constructor arguments: device
316 // capabilities; which subset of operation kinds (0..19) does the device
317 // support; does the device support the OEM operation; does the device support
318 // other operations.  The subset is represented with a bitmask, in which
319 // operation kind K corresponds to the bit (1 << K).  The other operations are
320 // represented by a set of OperationType.
321 class PartitioningDriver : public SampleDriver {
322    public:
323     enum OEM {
324         OEMNo,          // rejected by getSupportedOperations and prepareModel
325         OEMIndecisive,  // accepted by getSupportedOperations but not prepareModel
326         OEMYes,         // accepted by getSupportedOperations and prepareModel
327     };
328 
PartitioningDriver(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,OEM oem=OEMNo,std::set<V1_3::OperationType> operationTypes={})329     PartitioningDriver(const char* name, const char* version, V1_3::Capabilities capabilities,
330                        uint32_t operationMask, OEM oem = OEMNo,
331                        std::set<V1_3::OperationType> operationTypes = {})
332         : SampleDriver(name),
333           mVersionString(version),
334           mCapabilities(capabilities),
335           mOperationMask(operationMask),
336           mOEM(oem),
337           mOperationTypes(std::move(operationTypes)) {
338         CHECK_EQ(mOperationTypes.count(V1_3::OperationType::OEM_OPERATION), size_t(0));
339         if (operationMask) {
340             std::for_each(mOperationTypes.begin(), mOperationTypes.end(),
__anonb50294d30502(V1_3::OperationType type) 341                           [](V1_3::OperationType type) {
342                               CHECK_EQ(operationToFirstEncoding.count(type), size_t(0));
343                           });
344         }
345     }
~PartitioningDriver()346     ~PartitioningDriver() override {}
347 
getVersionString(getVersionString_cb cb)348     hardware::Return<void> getVersionString(getVersionString_cb cb) override {
349         cb(V1_0::ErrorStatus::NONE, mVersionString);
350         return hardware::Void();
351     }
352 
prepareModel_1_3(const V1_3::Model & model,V1_1::ExecutionPreference preference,V1_3::Priority priority,const V1_3::OptionalTimePoint & deadline,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_3::IPreparedModelCallback> & callback)353     hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
354             const V1_3::Model& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
355             const V1_3::OptionalTimePoint& deadline,
356             const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
357             const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
358             const sp<V1_3::IPreparedModelCallback>& callback) override {
359         if (mOEM == OEMIndecisive) {
360             for (const auto& operation : model.main.operations) {
361                 if (operation.type == V1_3::OperationType::OEM_OPERATION) {
362                     callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
363                     return V1_3::ErrorStatus::INVALID_ARGUMENT;
364                 }
365             }
366         }
367 
368         // NOTE: We verify that all operations in the model are supported.
369         V1_3::ErrorStatus outStatus = V1_3::ErrorStatus::INVALID_ARGUMENT;
370         auto ret = getSupportedOperations_1_3(
371                 model, [&outStatus](V1_3::ErrorStatus inStatus,
372                                     const hardware::hidl_vec<bool>& supportedOperations) {
373                     if (inStatus == V1_3::ErrorStatus::NONE) {
374                         if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
375                                         [](bool v) { return v; })) {
376                             outStatus = V1_3::ErrorStatus::NONE;
377                         }
378                     }
379                 });
380         if (ret.isOk() && (outStatus == V1_3::ErrorStatus::NONE)) {
381             return SampleDriver::prepareModel_1_3(model, preference, priority, deadline, modelCache,
382                                                   dataCache, token, callback);
383         } else {
384             callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
385             return V1_3::ErrorStatus::INVALID_ARGUMENT;
386         }
387     }
388 
getStatus()389     hardware::Return<V1_0::DeviceStatus> getStatus() override {
390         return V1_0::DeviceStatus::AVAILABLE;
391     }
392 
getCapabilities_1_3(getCapabilities_1_3_cb cb)393     hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override {
394         cb(V1_3::ErrorStatus::NONE, mCapabilities);
395         return hardware::Void();
396     }
397 
getSupportedOperations_1_3(const V1_3::Model & model,getSupportedOperations_1_3_cb cb)398     hardware::Return<void> getSupportedOperations_1_3(const V1_3::Model& model,
399                                                       getSupportedOperations_1_3_cb cb) override {
400         if (!android::nn::validateModel(model)) {
401             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>());
402             return hardware::Void();
403         }
404         cb(V1_3::ErrorStatus::NONE, getSupportedOperationsForSubgraph(model, model.main));
405         return hardware::Void();
406     }
407 
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)408     hardware::Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb) override {
409         cb(V1_0::ErrorStatus::NONE, /*numModelCache=*/1, /*numDataCache=*/1);
410         return hardware::Void();
411     }
412 
413    private:
getSupportedOperationsForSubgraph(const V1_3::Model & model,const V1_3::Subgraph & subgraph)414     std::vector<bool> getSupportedOperationsForSubgraph(const V1_3::Model& model,
415                                                         const V1_3::Subgraph& subgraph) {
416         CHECK(&subgraph == &model.main ||
417               std::find_if(model.referenced.begin(), model.referenced.end(),
418                            [&subgraph](const V1_3::Subgraph& refSubgraph) {
419                                return &subgraph == &refSubgraph;
420                            }) != model.referenced.end());
421         auto supportsEntireSubgraph = [this, &model, &subgraph](uint32_t refSubgraphOperandIndex) {
422             CHECK_LT(refSubgraphOperandIndex, subgraph.operands.size());
423             const V1_3::Operand& refSubgraphOperand = subgraph.operands[refSubgraphOperandIndex];
424             CHECK(refSubgraphOperand.lifetime == V1_3::OperandLifeTime::SUBGRAPH);
425             CHECK_LT(refSubgraphOperand.location.offset, model.referenced.size());
426             const V1_3::Subgraph& refSubgraph =
427                     model.referenced[refSubgraphOperand.location.offset];
428             std::vector<bool> supported = getSupportedOperationsForSubgraph(model, refSubgraph);
429             return std::all_of(supported.begin(), supported.end(), [](bool x) { return x; });
430         };
431         const size_t count = subgraph.operations.size();
432         std::vector<bool> supported(count);
433         for (size_t i = 0; i < count; i++) {
434             const V1_3::Operation& operation = subgraph.operations[i];
435             if (mOperationTypes.count(operation.type)) {
436                 if (operation.type == V1_3::OperationType::IF) {
437                     namespace op = android::nn::operation_if;
438                     CHECK_GE(operation.inputs.size(), op::kFirstInput);
439                     supported[i] =
440                             supportsEntireSubgraph(operation.inputs[op::kThenModelOperand]) &&
441                             supportsEntireSubgraph(operation.inputs[op::kElseModelOperand]);
442                 } else if (operation.type == V1_3::OperationType::WHILE) {
443                     namespace op = android::nn::operation_while;
444                     CHECK_GE(operation.inputs.size(), op::kFirstInput);
445                     supported[i] =
446                             supportsEntireSubgraph(operation.inputs[op::kCondModelOperand]) &&
447                             supportsEntireSubgraph(operation.inputs[op::kBodyModelOperand]);
448                 } else {
449                     supported[i] = true;
450                 }
451                 continue;
452             }
453             if (operation.type == V1_3::OperationType::OEM_OPERATION) {
454                 supported[i] = (mOEM != OEMNo);
455                 continue;
456             }
457             supported[i] = false;
458             uint32_t operationEncoding = lookupOperation(model, subgraph, i);
459             if ((operationEncoding != kBadOperation) &&
460                 (mOperationMask & (1 << operationEncoding))) {
461                 supported[i] = true;
462             }
463         }
464         return supported;
465     }
466 
467     std::string mVersionString;
468     V1_3::Capabilities mCapabilities;
469     uint32_t mOperationMask;
470     OEM mOEM;
471     std::set<V1_3::OperationType> mOperationTypes;
472 };
473 
474 // Like PartitioningDriver, but implementing 1.2
475 class PartitioningDriverV1_2 : public V1_2::IDevice {
476    public:
PartitioningDriverV1_2(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<V1_3::OperationType> operationTypes={})477     PartitioningDriverV1_2(const char* name, const char* version, V1_3::Capabilities capabilities,
478                            uint32_t operationMask,
479                            PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
480                            std::set<V1_3::OperationType> operationTypes = {})
481         : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
482                                                operationTypes)) {}
getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb)483     hardware::Return<void> getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb) override {
484         return mLatestDriver->getCapabilities_1_2(_hidl_cb);
485     }
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb _hidl_cb)486     hardware::Return<void> getSupportedOperations_1_2(
487             const V1_2::Model& model, getSupportedOperations_1_2_cb _hidl_cb) override {
488         return mLatestDriver->getSupportedOperations_1_2(model, _hidl_cb);
489     }
prepareModel_1_2(const V1_2::Model & model,V1_1::ExecutionPreference preference,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_2::IPreparedModelCallback> & actualCallback)490     hardware::Return<V1_0::ErrorStatus> prepareModel_1_2(
491             const V1_2::Model& model, V1_1::ExecutionPreference preference,
492             const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
493             const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
494             const sp<V1_2::IPreparedModelCallback>& actualCallback) override {
495         return mLatestDriver->prepareModel_1_2(model, preference, modelCache, dataCache, token,
496                                                actualCallback);
497     }
getVersionString(getVersionString_cb _hidl_cb)498     hardware::Return<void> getVersionString(getVersionString_cb _hidl_cb) override {
499         return mLatestDriver->getVersionString(_hidl_cb);
500     }
getType(getType_cb _hidl_cb)501     hardware::Return<void> getType(getType_cb _hidl_cb) override {
502         return mLatestDriver->getType(_hidl_cb);
503     }
getSupportedExtensions(getSupportedExtensions_cb _hidl_cb)504     hardware::Return<void> getSupportedExtensions(getSupportedExtensions_cb _hidl_cb) {
505         return mLatestDriver->getSupportedExtensions(_hidl_cb);
506     }
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb)507     hardware::Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb) {
508         return mLatestDriver->getNumberOfCacheFilesNeeded(_hidl_cb);
509     }
prepareModelFromCache(const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_2::IPreparedModelCallback> & callback)510     hardware::Return<V1_0::ErrorStatus> prepareModelFromCache(
511             const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
512             const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
513             const sp<V1_2::IPreparedModelCallback>& callback) {
514         return mLatestDriver->prepareModelFromCache(modelCache, dataCache, token, callback);
515     }
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)516     hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
517         return mLatestDriver->getCapabilities_1_1(_hidl_cb);
518     }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)519     hardware::Return<void> getSupportedOperations_1_1(
520             const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override {
521         return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
522     }
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)523     hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
524             const V1_1::Model& model, V1_1::ExecutionPreference preference,
525             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
526         return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
527     }
getStatus()528     hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)529     hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
530         return mLatestDriver->getCapabilities(_hidl_cb);
531     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)532     hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
533                                                   getSupportedOperations_cb _hidl_cb) override {
534         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
535     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)536     hardware::Return<V1_0::ErrorStatus> prepareModel(
537             const V1_0::Model& model,
538             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
539         return mLatestDriver->prepareModel(model, actualCallback);
540     }
541 
542    private:
543     const sp<V1_3::IDevice> mLatestDriver;
544 };
545 
546 // Like PartitioningDriver, but implementing 1.1
547 class PartitioningDriverV1_1 : public V1_1::IDevice {
548    public:
PartitioningDriverV1_1(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<V1_3::OperationType> operationTypes={})549     PartitioningDriverV1_1(const char* name, const char* version, V1_3::Capabilities capabilities,
550                            uint32_t operationMask,
551                            PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
552                            std::set<V1_3::OperationType> operationTypes = {})
553         : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
554                                                operationTypes)) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)555     hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
556         return mLatestDriver->getCapabilities_1_1(_hidl_cb);
557     }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)558     hardware::Return<void> getSupportedOperations_1_1(
559             const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override {
560         return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
561     }
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)562     hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
563             const V1_1::Model& model, V1_1::ExecutionPreference preference,
564             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
565         return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
566     }
getStatus()567     hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)568     hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
569         return mLatestDriver->getCapabilities(_hidl_cb);
570     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)571     hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
572                                                   getSupportedOperations_cb _hidl_cb) override {
573         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
574     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)575     hardware::Return<V1_0::ErrorStatus> prepareModel(
576             const V1_0::Model& model,
577             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
578         return mLatestDriver->prepareModel(model, actualCallback);
579     }
580 
581    private:
582     const sp<V1_3::IDevice> mLatestDriver;
583 };
584 
585 // Like PartitioningDriver, but implementing 1.0
586 class PartitioningDriverV1_0 : public V1_0::IDevice {
587    public:
PartitioningDriverV1_0(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<V1_3::OperationType> operationTypes={})588     PartitioningDriverV1_0(const char* name, const char* version, V1_3::Capabilities capabilities,
589                            uint32_t operationMask,
590                            PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
591                            std::set<V1_3::OperationType> operationTypes = {})
592         : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
593                                                operationTypes)) {}
getCapabilities(getCapabilities_cb _hidl_cb)594     hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
595         return mLatestDriver->getCapabilities(_hidl_cb);
596     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)597     hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
598                                                   getSupportedOperations_cb _hidl_cb) override {
599         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
600     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)601     hardware::Return<V1_0::ErrorStatus> prepareModel(
602             const V1_0::Model& model,
603             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
604         return mLatestDriver->prepareModel(model, actualCallback);
605     }
getStatus()606     hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
607 
608    private:
609     const sp<V1_3::IDevice> mLatestDriver;
610 };
611 
612 enum class Dimensioned {
613     NO,      // either a scalar, or a tensor of either unspecified rank (usually)
614              // or specified rank but with no specified dimensions (where
615              // specifically stated)
616     RANK_1,  // tensor of shape { 0 }    -- i.e., rank 1, unspecified dimensions
617     RANK_2,  // tensor of shape { 0, 0 } -- i.e., rank 2, unspecified dimensions
618     YES_1,   // tensor of shape { 1 }
619     YES_2,   // tensor of shape { 2 }
620     YES_4,   // tensor of shape { 4 }
621     YES = YES_1
622 };
623 
dimensions(Dimensioned dimensioned)624 std::vector<uint32_t> dimensions(Dimensioned dimensioned) {
625     switch (dimensioned) {
626         default:
627             EXPECT_TRUE(false) << "Unknown value";
628             FALLTHROUGH_INTENDED;
629         case Dimensioned::NO:
630             return {};
631         case Dimensioned::RANK_1:
632             return {0};
633         case Dimensioned::RANK_2:
634             return {0, 0};
635         case Dimensioned::YES_1:
636             return {1};
637         case Dimensioned::YES_2:
638             return {2};
639         case Dimensioned::YES_4:
640             return {4};
641     }
642 }
643 
644 // "dimensioned" must be a fully specified kind
numberOfElements(Dimensioned dimensioned)645 uint32_t numberOfElements(Dimensioned dimensioned) {
646     auto dims = dimensions(dimensioned);
647     uint32_t result = std::reduce(dims.begin(), dims.end(), 1u, std::multiplies<>());
648     CHECK_GT(result, 0u);
649     return result;
650 }
651 
toString(Dimensioned dimensioned)652 std::string toString(Dimensioned dimensioned) {
653     switch (dimensioned) {
654         default:
655             return "<Unknown value>";
656         case Dimensioned::NO:
657             return "NO";
658         case Dimensioned::RANK_1:
659             return "RANK_1";
660         case Dimensioned::RANK_2:
661             return "RANK_2";
662         case Dimensioned::YES_1:
663             return "YES_1";
664         case Dimensioned::YES_2:
665             return "YES_2";
666         case Dimensioned::YES_4:
667             return "YES_4";
668     }
669 }
670 
671 // This class adds some simple abstractions and utilities on top of
672 // WrapperModel.  For example, it provides methods that work in terms of
673 // operation kind (0..7); and because we care about graph topology rather than
674 // details of operand types and values, it greatly simplifies the process of
675 // creating operands.
676 class PartitioningModel : private WrapperModel {
677    public:
678     using WrapperModel::finish;
679     using WrapperModel::getHandle;
680     using WrapperModel::identifyInputsAndOutputs;
681     using WrapperModel::isValid;
682     using WrapperModel::relaxComputationFloat32toFloat16;
683     using WrapperModel::setOperandValue;
684 
685     // Create a tensor operand of the specified type, and return the
686     // corresponding operand index.
addIntOperand(Dimensioned dimensioned=Dimensioned::YES)687     uint32_t addIntOperand(Dimensioned dimensioned = Dimensioned::YES) {
688         return addOperand(WrapperType::TENSOR_INT32, dimensioned);
689     }
addIntScalarOperand(std::optional<int> v=std::nullopt)690     uint32_t addIntScalarOperand(std::optional<int> v = std::nullopt) {
691         uint32_t opnd = addOperand(WrapperType::INT32);
692         if (v.has_value()) {
693             setOperandValue(opnd, &v.value());
694         }
695         return opnd;
696     }
addFloatOperand(Dimensioned dimensioned=Dimensioned::YES)697     uint32_t addFloatOperand(Dimensioned dimensioned = Dimensioned::YES) {
698         return addOperand(WrapperType::TENSOR_FLOAT32, dimensioned);
699     }
addQuantOperand(Dimensioned dimensioned=Dimensioned::YES)700     uint32_t addQuantOperand(Dimensioned dimensioned = Dimensioned::YES) {
701         return addOperand(WrapperType::TENSOR_QUANT8_ASYMM, dimensioned);
702     }
addBooleanOperand(Dimensioned dimensioned=Dimensioned::YES)703     uint32_t addBooleanOperand(Dimensioned dimensioned = Dimensioned::YES) {
704         return addOperand(WrapperType::TENSOR_BOOL8, dimensioned);
705     }
addFloatZeroOperand(Dimensioned dimensioned=Dimensioned::YES)706     uint32_t addFloatZeroOperand(Dimensioned dimensioned = Dimensioned::YES) {
707         uint32_t opnd = addFloatOperand(dimensioned);
708         std::vector<float> values(numberOfElements(dimensioned), 0.0f);
709         uint32_t size = values.size() * sizeof(float);
710         // Make sure the values are immediately copied so that it is safe to free the buffer after
711         // the setOperandValue call
712         CHECK_LE(size, ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
713         setOperandValue(opnd, values.data(), size);
714         return opnd;
715     }
716 
717     // Create an operand of the specified type, and return the corresponding
718     // operand index.
addOperand(WrapperType wrapperType,Dimensioned dimensioned=Dimensioned::YES)719     uint32_t addOperand(WrapperType wrapperType, Dimensioned dimensioned = Dimensioned::YES) {
720         switch (static_cast<int>(wrapperType)) {
721             case ANEURALNETWORKS_BOOL:
722             case ANEURALNETWORKS_FLOAT16:
723             case ANEURALNETWORKS_FLOAT32:
724             case ANEURALNETWORKS_INT32:
725             case ANEURALNETWORKS_UINT32:
726             case ANEURALNETWORKS_MODEL:
727             case ANEURALNETWORKS_OEM_SCALAR:
728                 return addOperand(WrapperOperandType{wrapperType, {}});
729 
730             case ANEURALNETWORKS_TENSOR_BOOL8:
731             case ANEURALNETWORKS_TENSOR_FLOAT16:
732             case ANEURALNETWORKS_TENSOR_FLOAT32:
733             case ANEURALNETWORKS_TENSOR_OEM_BYTE:
734                 return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned)});
735 
736             case ANEURALNETWORKS_TENSOR_INT32:
737             case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
738             case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED:
739             case ANEURALNETWORKS_TENSOR_QUANT8_SYMM:
740             case ANEURALNETWORKS_TENSOR_QUANT16_ASYMM:
741             case ANEURALNETWORKS_TENSOR_QUANT16_SYMM:
742                 return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned), 1.0f});
743 
744             case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL:
745                 return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned),
746                                                      WrapperSymmPerChannelQuantParams({1.0f}, 0)});
747 
748             default:
749                 ADD_FAILURE() << "Unexpected type " << static_cast<uint32_t>(wrapperType);
750                 return ~uint32_t(0);
751         }
752     }
753 
754     // Create an operand of the specified operand type, and return the
755     // corresponding operand index.
addOperand(const WrapperOperandType & wrapperOperandType)756     uint32_t addOperand(const WrapperOperandType& wrapperOperandType) {
757         mWrapperOperandType.push_back(wrapperOperandType);
758         return WrapperModel::addOperand(&wrapperOperandType);
759     }
760 
761     // Create an operation with any number of inputs and one output, specifying
762     // the operation type (e.g., ANEURALNETWORKS_ADD), the input operand
763     // indexes, and the output type (e.g., WrapperType::TENSOR_FLOAT32).
764     // Returns the output operand index.
addExplicitOperationXTo1(ANeuralNetworksOperationType operationType,const std::vector<uint32_t> & inputs,WrapperType outputType,Dimensioned dimensionedOutput=Dimensioned::YES)765     uint32_t addExplicitOperationXTo1(ANeuralNetworksOperationType operationType,
766                                       const std::vector<uint32_t>& inputs, WrapperType outputType,
767                                       Dimensioned dimensionedOutput = Dimensioned::YES) {
768         uint32_t output = addOperand(outputType, dimensionedOutput);
769         addOperation(operationType, inputs, {output});
770         return output;
771     }
772 
773     // Create a V1_0 operation with two inputs and one output, specifying the
774     // operation kind (where 0 is the first V1_0 operation) and the input
775     // operand indexes.
776     // Returns the output operand index.
addOperation2To1V1_0(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)777     uint32_t addOperation2To1V1_0(uint32_t operation, const uint32_t input0, const uint32_t input1,
778                                   Dimensioned dimensionedOutput = Dimensioned::YES) {
779         CHECK_LE(operation, kLastEncodingV1_0 - kFirstEncodingV1_0);
780         return addOperation2To1(operation + kFirstEncodingV1_0, input0, input1, dimensionedOutput);
781     }
782 
783     // Create a V1_1 operation with two inputs and one output, specifying the
784     // operation kind (where 0 is the first V1_1 operation) and the input
785     // operand indexes.
786     // Returns the output operand index.
addOperation2To1V1_1(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)787     uint32_t addOperation2To1V1_1(uint32_t operation, const uint32_t input0, const uint32_t input1,
788                                   Dimensioned dimensionedOutput = Dimensioned::YES) {
789         CHECK_LE(operation, kLastEncodingV1_1 - kFirstEncodingV1_1);
790         return addOperation2To1(operation + kFirstEncodingV1_1, input0, input1, dimensionedOutput);
791     }
792 
793     // Create a V1_2 operation with two inputs and one output, specifying the
794     // operation kind (where 0 is the first V1_2 operation) and the input
795     // operand indexes.
796     // Returns the output operand index.
addOperation2To1V1_2(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)797     uint32_t addOperation2To1V1_2(uint32_t operation, const uint32_t input0, const uint32_t input1,
798                                   Dimensioned dimensionedOutput = Dimensioned::YES) {
799         CHECK_LE(operation, kLastEncodingV1_2 - kFirstEncodingV1_2);
800         return addOperation2To1(operation + kFirstEncodingV1_2, input0, input1, dimensionedOutput);
801     }
802 
803     // Create a V1_3 operation with two inputs and one output, specifying the
804     // operation kind (where 0 is the first V1_3 operation) and the input
805     // operand indexes.
806     // Returns the output operand index.
addOperation1To1V1_3(uint32_t operation,const uint32_t input0,Dimensioned dimensionedOutput=Dimensioned::YES)807     uint32_t addOperation1To1V1_3(uint32_t operation, const uint32_t input0,
808                                   Dimensioned dimensionedOutput = Dimensioned::YES) {
809         CHECK_LE(operation, kLastEncodingV1_3 - kFirstEncodingV1_3);
810         return addOperation1To1(operation + kFirstEncodingV1_3, input0, dimensionedOutput);
811     }
812 
813     // Create an OEM operation with one input and one output,
814     // specifying the input operand index.  Returns the output operand
815     // index.
addOperationOEM1To1(const uint32_t input,Dimensioned dimensionedOutput=Dimensioned::YES)816     uint32_t addOperationOEM1To1(const uint32_t input,
817                                  Dimensioned dimensionedOutput = Dimensioned::YES) {
818         uint32_t output = addOperandOfSameType(input, dimensionedOutput);
819         addOperation(ANEURALNETWORKS_OEM_OPERATION, {input}, {output});
820         return output;
821     }
822 
823     // Create an IF operation with the given condition operand and two
824     // referenced models for the true and false cases.
addIfOperation(const uint32_t cond,const PartitioningModel & trueModel,const PartitioningModel & falseModel,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)825     void addIfOperation(const uint32_t cond, const PartitioningModel& trueModel,
826                         const PartitioningModel& falseModel, const std::vector<uint32_t>& inputs,
827                         const std::vector<uint32_t>& outputs) {
828         const uint32_t opndTrue = addRefModelOperand(trueModel);
829         const uint32_t opndFalse = addRefModelOperand(falseModel);
830         std::vector<uint32_t> ifInputs = {cond, opndTrue, opndFalse};
831         ifInputs.insert(ifInputs.end(), inputs.begin(), inputs.end());
832         addOperation(ANEURALNETWORKS_IF, ifInputs, outputs);
833     }
834 
835     // Create a WHILE operation with the given condition and body referenced models.
addWhileOperation(const PartitioningModel & condModel,const PartitioningModel & bodyModel,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)836     void addWhileOperation(const PartitioningModel& condModel, const PartitioningModel& bodyModel,
837                            const std::vector<uint32_t>& inputs,
838                            const std::vector<uint32_t>& outputs) {
839         const uint32_t condOperand = addRefModelOperand(condModel);
840         const uint32_t bodyOperand = addRefModelOperand(bodyModel);
841         std::vector<uint32_t> whileInputs = {condOperand, bodyOperand};
842         whileInputs.insert(whileInputs.end(), inputs.begin(), inputs.end());
843         addOperation(ANEURALNETWORKS_WHILE, whileInputs, outputs);
844     }
845 
846     // Run the partitioning algorithm to create an ExecutionPlan.
partitionTheWork(const std::vector<std::shared_ptr<Device>> & devices,ExecutePreference preference,ExecutePriority priority,const OptionalTimePoint & deadline,ExecutionPlan * plan)847     int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
848                          ExecutePreference preference, ExecutePriority priority,
849                          const OptionalTimePoint& deadline, ExecutionPlan* plan) {
850         return reinterpret_cast<ModelBuilder*>(getHandle())
851                 ->partitionTheWork(devices, static_cast<uint32_t>(preference),
852                                    static_cast<int32_t>(priority), deadline, plan, {});
853     }
854 
855 #ifdef VERBOSE
856     // This is a debugging utility function.
dump(const char * name) const857     void dump(const char* name) const {
858         const ModelBuilder* mb = reinterpret_cast<const ModelBuilder*>(getHandle());
859         ::dump(name, mb);
860     }
861 #endif
862 
863    private:
864     // Create an operation with two inputs and one output, specifying
865     // the operation kind and the input operand indexes.
866     // Returns the output operand index.
addOperation2To1(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)867     uint32_t addOperation2To1(uint32_t operation, const uint32_t input0, const uint32_t input1,
868                               Dimensioned dimensionedOutput = Dimensioned::YES) {
869         auto it = firstEncodingToOperation.lower_bound(operation);
870         CHECK(it != firstEncodingToOperation.end());
871         ANeuralNetworksOperationType type = it->second.first;
872         if (it->second.second) {
873             int32_t fuseCode = operation - it->first;
874             uint32_t input2 = addIntOperand(fuseCode);
875             uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
876             addOperation(type, {input0, input1, input2}, {output});
877             return output;
878         } else {
879             uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
880             addOperation(type, {input0, input1}, {output});
881             return output;
882         }
883     }
884 
885     // Create an operation with one inputs and one output, specifying
886     // the operation kind and the input operand indexes.
887     // Returns the output operand index.
addOperation1To1(uint32_t operation,const uint32_t input0,Dimensioned dimensionedOutput=Dimensioned::YES)888     uint32_t addOperation1To1(uint32_t operation, const uint32_t input0,
889                               Dimensioned dimensionedOutput = Dimensioned::YES) {
890         auto it = firstEncodingToOperation.lower_bound(operation);
891         CHECK(it != firstEncodingToOperation.end());
892         ANeuralNetworksOperationType type = it->second.first;
893 
894         uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
895         addOperation(type, {input0}, {output});
896         return output;
897     }
898 
899     // Create a scalar integer operand of the specified value, and
900     // return the corresponding operand index.
addIntOperand(int32_t value)901     uint32_t addIntOperand(int32_t value) {
902         uint32_t operand = addOperand(WrapperType::INT32);
903         setOperandValue(operand, &value, sizeof(value));
904         return operand;
905     }
906 
907     // Create an operand from a model for control flow graphs.
addRefModelOperand(const PartitioningModel & model)908     uint32_t addRefModelOperand(const PartitioningModel& model) {
909         const uint32_t index = addOperand(WrapperType::MODEL);
910         WrapperModel::setOperandValueFromModel(index, &model);
911         return index;
912     }
913 
914     // Create an operand of the same type as the specified operand,
915     // and return the operand index of the new operand.
916     //
917     // If a tensor, the new operand will have the same rank as the specified
918     // operand.  If dimensioned == Dimensioned::NO, then all dimensions of a new
919     // tensor operand will be unspecified.  If dimensioned != Dimensioned::NO,
920     // then all dimensions of a new tensor operand will have the implied value
921     // (e.g., YES_1 means each dimension will have the value "1").
addOperandOfSameType(uint32_t operand,Dimensioned dimensioned=Dimensioned::YES)922     uint32_t addOperandOfSameType(uint32_t operand, Dimensioned dimensioned = Dimensioned::YES) {
923         WrapperOperandType type = mWrapperOperandType.at(operand);
924 
925         const auto d = dimensions(dimensioned);
926         EXPECT_TRUE(d.size() <= 1);
927         for (auto& dimension : type.dimensions) {
928             dimension = (dimensioned == Dimensioned::NO ? 0 : d[0]);
929         }
930 
931         mWrapperOperandType.push_back(type);
932         return WrapperModel::addOperand(&type);
933     }
934 
935     // operand index to operand type
936     std::vector<WrapperOperandType> mWrapperOperandType;
937 };
938 
939 // This class adds some utilities on top of WrapperCompilation.
940 class PartitioningCompilation : public WrapperCompilation {
941    public:
PartitioningCompilation(const PartitioningModel * model,const std::vector<std::shared_ptr<Device>> & devices)942     PartitioningCompilation(const PartitioningModel* model,
943                             const std::vector<std::shared_ptr<Device>>& devices) {
944         ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
945         CompilationBuilder* c = nullptr;
946         int result = m->createCompilation(&c, devices);
947         EXPECT_EQ(result, 0);
948         mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
949     }
950 
setPartitioning(uint32_t partitioning)951     Result setPartitioning(uint32_t partitioning) {
952         return static_cast<Result>(builder()->forTest_setPartitioning(partitioning));
953     }
954 
955     // Simulate recoverable partitioning failure.
failPartitioning()956     Result failPartitioning() {
957         return static_cast<Result>(
958                 builder()->forTest_failPartitioning(static_cast<int>(Result::OP_FAILED)));
959     }
960 
961     using WrapperCompilation::finish;
962 
getExecutionPlan() const963     const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); }
964 
965    private:
builder()966     CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); }
967 
builder() const968     const CompilationBuilder* builder() const {
969         return reinterpret_cast<const CompilationBuilder*>(getHandle());
970     }
971 };
972 
973 #ifdef VERBOSE
974 #define RETURN_TRUE()                                                 \
975     {                                                                 \
976         std::cerr << "returning true from " << __LINE__ << std::endl; \
977         return true;                                                  \
978     }
979 #else
980 #define RETURN_TRUE() \
981     { return true; }
982 #endif
983 #ifdef VERBOSE
984 #define RETURN_FALSE(MESSAGE)                                                  \
985     {                                                                          \
986         std::cerr << "returning false from " << __LINE__ MESSAGE << std::endl; \
987         return false;                                                          \
988     }
989 #else
990 #define RETURN_FALSE(MESSAGE) \
991     { return false; }
992 #endif
993 
994 class PartitioningTest : public ::testing::Test {
995    protected:
996     using DynamicTemporariesType = decltype(ExecutionPlan().forTest_flatGetDynamicTemporaries());
997     using RemapVectorType = ExecutionStep::RemapVectorType;
998     using StepModelOutputSetType = ExecutionStep::StepModelOutputSetType;
999 
1000     // Used for PartitioningTest::checkExecutionPlanSteps.
1001     static constexpr const char* kIfStep = "IF";
1002     static constexpr const char* kWhileStep = "WHILE";
1003     static constexpr const char* kGotoStep = "GOTO";
1004 
SetUp()1005     virtual void SetUp() {}
1006 
1007     // From a vector of DeviceSpecification, create a vector of
1008     // Devices.
1009     struct DeviceSpecification {
DeviceSpecification__anonb50294d30111::PartitioningTest::DeviceSpecification1010         DeviceSpecification(const std::string& name, const V1_3::Capabilities& capabilities,
1011                             uint32_t operationMask,
1012                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo)
1013             : mName(name),
1014               mVersionString(kVersionString),
1015               mCapabilities(capabilities),
1016               mOperationMask(operationMask),
1017               mOEM(oem) {}
DeviceSpecification__anonb50294d30111::PartitioningTest::DeviceSpecification1018         DeviceSpecification(const std::string& name, float perf, uint32_t operationMask,
1019                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1020                             HalVersion halVersion = HalVersion::LATEST,
1021                             std::set<V1_3::OperationType> operationTypes = {})
1022             : DeviceSpecification(name, perf, perf, operationMask, oem, halVersion,
1023                                   operationTypes) {}
DeviceSpecification__anonb50294d30111::PartitioningTest::DeviceSpecification1024         DeviceSpecification(const std::string& name, float perf, float perfRelaxed,
1025                             uint32_t operationMask,
1026                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1027                             HalVersion halVersion = HalVersion::LATEST,
1028                             std::set<V1_3::OperationType> operationTypes = {})
1029             : DeviceSpecification(name, kVersionString, perf, perfRelaxed, operationMask, oem,
1030                                   halVersion, operationTypes) {}
DeviceSpecification__anonb50294d30111::PartitioningTest::DeviceSpecification1031         DeviceSpecification(const std::string& name, const std::string& version, float perf,
1032                             uint32_t operationMask,
1033                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1034                             HalVersion halVersion = HalVersion::LATEST,
1035                             std::set<V1_3::OperationType> operationTypes = {})
1036             : DeviceSpecification(name, version, perf, perf, operationMask, oem, halVersion,
1037                                   operationTypes) {}
DeviceSpecification__anonb50294d30111::PartitioningTest::DeviceSpecification1038         DeviceSpecification(const std::string& name, const std::string& version, float perf,
1039                             float perfRelaxed, uint32_t operationMask,
1040                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1041                             HalVersion halVersion = HalVersion::LATEST,
1042                             std::set<V1_3::OperationType> operationTypes = {})
1043             : mName(name),
1044               mVersionString(version),
1045               mHalVersion(halVersion),
1046               mOperationMask(operationMask),
1047               mOEM(oem),
1048               mOperationTypes(std::move(operationTypes)) {
1049             V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
1050             V1_0::PerformanceInfo perfRelaxedInfo = {.execTime = perfRelaxed,
1051                                                      .powerUsage = perfRelaxed};
1052             mCapabilities = {
1053                     .relaxedFloat32toFloat16PerformanceScalar = perfRelaxedInfo,
1054                     .relaxedFloat32toFloat16PerformanceTensor = perfRelaxedInfo,
1055                     .operandPerformance =
1056                             ::android::nn::nonExtensionOperandPerformance<HalVersion::V1_3>(
1057                                     perfInfo),
1058                     .ifPerformance = perfInfo,
1059                     .whilePerformance = perfInfo};
1060         }
DeviceSpecification__anonb50294d30111::PartitioningTest::DeviceSpecification1061         DeviceSpecification(const std::string& name, float perf, HalVersion halVersion,
1062                             uint32_t operationMaskV1_0, uint32_t operationMaskV1_1 = 0,
1063                             uint32_t operationMaskV1_2 = 0, uint32_t operationMaskV1_3 = 0)
1064             : DeviceSpecification(
1065                       name, perf, perf,
1066                       makeOperationMask(halVersion, operationMaskV1_0, operationMaskV1_1,
1067                                         operationMaskV1_2, operationMaskV1_3)) {
1068             mHalVersion = halVersion;
1069         }
1070 
1071         std::string mName;
1072         std::string mVersionString;
1073         V1_3::Capabilities mCapabilities;
1074         HalVersion mHalVersion = HalVersion::LATEST;
1075         uint32_t mOperationMask;
1076         PartitioningDriver::OEM mOEM = PartitioningDriver::OEMNo;
1077         std::set<V1_3::OperationType> mOperationTypes;
1078 
1079         static constexpr char kVersionString[] = "JUST_AN_EXAMPLE";
1080 
1081        private:
1082         // This function takes three operation masks aligned at the low-order
1083         // bit -- one mask each for V1_0, V1_1, and V1_2 -- and produces a single
1084         // composite operation mask, formed by shifting each of the input
1085         // operation masks appropriately and ORing the results together.
1086         //
1087         // For convenience, any bits of an input mask that are too high order
1088         // for that mask are discarded -- this allows ~0 to be a legal input
1089         // mask.
1090         //
1091         // For the sake of example, assume that each low order mask is 4 bits
1092         // wide, and take some artistic license to write literals in binary.
1093         // Then:
1094         //
1095         //     assert(makeOperationMask(HalVersion::V1_2, 0b0110, 0b1001, 0b0101) ==
1096         //            0b 0101 1001 0110);
1097         //
1098         // This is used by a DeviceSpecification constructor to build a mask of
1099         // operations to be supported by the device.
makeOperationMask__anonb50294d30111::PartitioningTest::DeviceSpecification1100         static uint32_t makeOperationMask(HalVersion halVersion, uint32_t operationMaskV1_0,
1101                                           uint32_t operationMaskV1_1, uint32_t operationMaskV1_2,
1102                                           uint32_t operationMaskV1_3) {
1103             if (halVersion < HalVersion::V1_3) {
1104                 CHECK(!operationMaskV1_3);
1105             }
1106             if (halVersion < HalVersion::V1_2) {
1107                 CHECK(!operationMaskV1_2);
1108             }
1109             if (halVersion < HalVersion::V1_1) {
1110                 CHECK(!operationMaskV1_1);
1111             }
1112             auto maskOfWidth = [](uint32_t width) -> uint32_t { return (1U << width) - 1; };
1113             static const uint32_t kOperationMaskV1_0 =
1114                     maskOfWidth(kLastEncodingV1_0 - kFirstEncodingV1_0 + 1);
1115             static const uint32_t kOperationMaskV1_1 =
1116                     maskOfWidth(kLastEncodingV1_1 - kFirstEncodingV1_1 + 1);
1117             static const uint32_t kOperationMaskV1_2 =
1118                     maskOfWidth(kLastEncodingV1_2 - kFirstEncodingV1_2 + 1);
1119             static const uint32_t kOperationMaskV1_3 =
1120                     maskOfWidth(kLastEncodingV1_3 - kFirstEncodingV1_3 + 1);
1121             return ((operationMaskV1_0 & kOperationMaskV1_0) << kFirstEncodingV1_0) |
1122                    ((operationMaskV1_1 & kOperationMaskV1_1) << kFirstEncodingV1_1) |
1123                    ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2) |
1124                    ((operationMaskV1_3 & kOperationMaskV1_3) << kFirstEncodingV1_3);
1125         }
1126     };
makeDevices(std::vector<DeviceSpecification> specifications)1127     static std::vector<std::shared_ptr<Device>> makeDevices(
1128             std::vector<DeviceSpecification> specifications) {
1129         std::vector<std::shared_ptr<Device>> devices;
1130         for (const auto& specification : specifications) {
1131             SharedDevice device = nullptr;
1132             switch (specification.mHalVersion) {
1133                 case HalVersion::V1_3:
1134                     device = android::nn::makeSharedDevice(
1135                             specification.mName,
1136                             new PartitioningDriver(specification.mName.c_str(),
1137                                                    specification.mVersionString.c_str(),
1138                                                    specification.mCapabilities,
1139                                                    specification.mOperationMask, specification.mOEM,
1140                                                    specification.mOperationTypes));
1141                     break;
1142                 case HalVersion::V1_2:
1143                     device = android::nn::makeSharedDevice(
1144                             specification.mName,
1145                             new PartitioningDriverV1_2(
1146                                     specification.mName.c_str(),
1147                                     specification.mVersionString.c_str(),
1148                                     specification.mCapabilities, specification.mOperationMask,
1149                                     specification.mOEM, specification.mOperationTypes));
1150                     break;
1151                 case HalVersion::V1_1:
1152                     device = android::nn::makeSharedDevice(
1153                             specification.mName,
1154                             new PartitioningDriverV1_1(
1155                                     specification.mName.c_str(),
1156                                     specification.mVersionString.c_str(),
1157                                     specification.mCapabilities, specification.mOperationMask,
1158                                     specification.mOEM, specification.mOperationTypes));
1159                     break;
1160                 case HalVersion::V1_0:
1161                     device = android::nn::makeSharedDevice(
1162                             specification.mName,
1163                             new PartitioningDriverV1_0(
1164                                     specification.mName.c_str(),
1165                                     specification.mVersionString.c_str(),
1166                                     specification.mCapabilities, specification.mOperationMask,
1167                                     specification.mOEM, specification.mOperationTypes));
1168                     break;
1169                 default:
1170                     ADD_FAILURE() << "Unexpected";
1171             }
1172             auto driverDevice = DeviceManager::forTest_makeDriverDevice(device);
1173             devices.push_back(std::move(driverDevice));
1174         }
1175         devices.push_back(DeviceManager::getCpuDevice());
1176         return devices;
1177     }
1178 
stepsToString(const std::vector<std::string> & steps)1179     static std::string stepsToString(const std::vector<std::string>& steps) {
1180         std::stringstream ss;
1181         ss << "[ ";
1182         for (const auto& step : steps) {
1183             ss << step << " ";
1184         }
1185         ss << "]";
1186         return ss.str();
1187     }
1188 
1189     // Checks the type of each logical step in an execution plan.
1190     // Each entry of "expected" is either: kIfStep for IfStep, kWhileStep for WhileStep,
1191     // kGotoStep for GotoStep, or the device name for ExecutionStep.
checkExecutionPlanSteps(const ExecutionPlan & plan,const std::vector<std::string> & expected)1192     void checkExecutionPlanSteps(const ExecutionPlan& plan,
1193                                  const std::vector<std::string>& expected) {
1194         ASSERT_GT(expected.size(), 0u);
1195 
1196         std::vector<std::string> actual;
1197         if (expected.size() == 1) {
1198             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1199             actual.emplace_back(plan.forTest_simpleGetDevice()->getName());
1200         } else {
1201             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1202             const auto& steps = plan.forTest_compoundGetSteps();
1203             for (const auto& step : steps) {
1204                 if (step->isIf()) {
1205                     actual.emplace_back(kIfStep);
1206                 } else if (step->isWhile()) {
1207                     actual.emplace_back(kWhileStep);
1208                 } else if (step->isGoto()) {
1209                     actual.emplace_back(kGotoStep);
1210                 } else if (step->isExecution()) {
1211                     actual.emplace_back(step->executionStep()->getDevice()->getName());
1212                 } else {
1213                     ASSERT_FALSE(true) << "Unknown LogicalStep";
1214                 }
1215             }
1216         }
1217         ASSERT_TRUE(actual == expected)
1218                 << "expected: " << stepsToString(expected) << ", actual: " << stepsToString(actual);
1219     }
1220 
1221     /*-- Graph comparision ----------------------------------------------------------------*/
1222 
1223     // An operand with certain values for its lifetime does not have a
1224     // defining operation in the graph.  For the purposes of the graph
1225     // comparison algorithm, we encode the "defining operation" index of
1226     // such an operand as follows:
1227     // - NO_VALUE       kPseudoDefiningOperationNoValue
1228     // - SUBGRAPH_INPUT kPseudoDefiningOperationModelInput0 + (position in list of inputs)
1229     // - CONSTANT_COPY  kPseudoDefiningOperationConstantCopy0 + (constant value)
1230     //                    Note: For the graphs we build in this test, we
1231     //                          only expect to see 4-byte constants within
1232     //                          a very restricted range, so we only make
1233     //                          room for such constants in our encoding
1234     //                          space.
1235     // We do not expect to see CONSTANT_REFERENCE, and so we do not handle
1236     // it.
1237     //
1238     // The encoding is intended to be relatively human readable; it is not
1239     // designed to represent some optimal balance of ranges for the items
1240     // within its scope (actual operations, inputs, constants).
1241 
1242     enum PseudoDefiningOperationEncodings : uint32_t {
1243         kPseudoDefiningOperationModelInput0 = 0x80000000U,
1244         kPseudoDefiningOperationConstantCopy0 = 0x90000000U,
1245         kPseudoDefiningOperationNoValue = 0xeeeeeeeeU,
1246 
1247         // lowest value for special encoding
1248         kPseudoDefiningOperationBase = 0x80000000U,
1249 
1250         // range of encoded input or constant
1251         kPseudoDefiningOperationRange = 0x10000000U,
1252     };
1253 
1254     // Build a map from operand to defining operation.
1255     // TODO: Replace map with vector?
buildDefinitionMap(const ModelBuilder * model,std::map<uint32_t,uint32_t> * defMap)1256     void buildDefinitionMap(const ModelBuilder* model, std::map<uint32_t, uint32_t>* defMap) {
1257         // actual definitions
1258         ASSERT_LT(model->operationCount(), kPseudoDefiningOperationBase);
1259         for (uint32_t i = 0, e = model->operationCount(); i < e; i++) {
1260             const V1_3::Operation& operation = android::nn::convertToV1_3(model->getOperation(i));
1261             for (uint32_t output : operation.outputs) {
1262                 (*defMap)[output] = i;
1263             }
1264         }
1265         // inputs
1266         ASSERT_LT(model->inputCount(), kPseudoDefiningOperationRange);
1267         for (uint32_t i = 0, e = model->inputCount(); i < e; i++) {
1268             (*defMap)[model->getInputOperandIndex(i)] = kPseudoDefiningOperationModelInput0 + i;
1269         }
1270         // look for NO_VALUE and CONSTANT_COPY
1271         for (uint32_t i = 0, e = model->operandCount(); i < e; i++) {
1272             const V1_3::Operand& operand = android::nn::convertToV1_3(model->getOperand(i));
1273             switch (operand.lifetime) {
1274                 case V1_3::OperandLifeTime::NO_VALUE:
1275                     (*defMap)[i] = kPseudoDefiningOperationNoValue;
1276                     break;
1277                 case V1_3::OperandLifeTime::CONSTANT_COPY: {
1278                     ASSERT_EQ(operand.location.length, sizeof(uint32_t));
1279                     uint32_t value;
1280                     memcpy(&value, model->getPointerToOperandValue(operand.location.offset),
1281                            sizeof(uint32_t));
1282                     ASSERT_LT(value, kPseudoDefiningOperationNoValue);
1283                     (*defMap)[i] = kPseudoDefiningOperationConstantCopy0 + value;
1284                     break;
1285                 }
1286                 case V1_3::OperandLifeTime::TEMPORARY_VARIABLE:
1287                 case V1_3::OperandLifeTime::SUBGRAPH_INPUT:
1288                 case V1_3::OperandLifeTime::SUBGRAPH_OUTPUT:
1289                     // already handled
1290                     break;
1291                 default:
1292                     FAIL();
1293                     break;
1294             }
1295         }
1296         // validity check
1297         ASSERT_EQ(model->operandCount(), defMap->size());
1298     }
1299 
1300 #ifdef VERBOSE
dump(const char * name,const std::map<uint32_t,uint32_t> * aMap)1301     void dump(const char* name, const std::map<uint32_t, uint32_t>* aMap) {
1302         auto writeNum = [](uint32_t num) {
1303             if (num >= kPseudoDefiningOperationBase) {
1304                 std::cout << "0x" << std::hex << num << std::dec;
1305             } else {
1306                 std::cout << num;
1307             }
1308         };
1309 
1310         std::cout << name << ": { ";
1311         bool gotOne = false;
1312         for (const auto& entry : *aMap) {
1313             if (gotOne) {
1314                 std::cout << ", ";
1315             } else {
1316                 gotOne = true;
1317             }
1318             std::cout << "(";
1319             writeNum(entry.first);
1320             std::cout << ", ";
1321             writeNum(entry.second);
1322             std::cout << ")";
1323         }
1324         std::cout << " }" << std::endl;
1325     }
1326 #endif
1327 
compare(const Operand & operandA,const Operand & operandB)1328     bool compare(const Operand& operandA, const Operand& operandB) {
1329         if (operandA.type != operandB.type || operandA.dimensions != operandB.dimensions ||
1330             operandA.scale != operandB.scale || operandA.zeroPoint != operandB.zeroPoint) {
1331             return false;
1332         }
1333         return true;
1334     }
1335 
1336     // Compare two graphs.  We ignore operand and operation indexes (i.e.,
1337     // two nodes can be the same even if they are numbered differently)
1338     // but we also ignore semantics (e.g., even if an operation kind is
1339     // such that the operand is commutative, we still pay attention to the
1340     // order of its input operands).
1341     //
1342     // The comparison algorithm works by walking modelA from outputs
1343     // towards inputs, along the edge from each operand to its
1344     // defining operation, and then along the edges to the operation's
1345     // input operands.  At each step along the way, we try to match up
1346     // operands and operations from modelA with equivalent operands
1347     // and operations from modelB.
1348     //
1349     // We start by assuming that modelA's outputs and modelB's outputs
1350     // match positionally (e.g., modelA's first output operand is
1351     // equivalent to modelB's first output operand).  Once we've
1352     // discovered two equivalent operands (such as those outputs), we
1353     // place them in a work queue.  We repeatedly pull operands off
1354     // the queue and compare their defining operations and those
1355     // operations' input operands, to discover more pairs of
1356     // equivalent operands.  If we ever find operations that do not
1357     // match (e.g., because operation kind differs), or operands that
1358     // do not match (e.g., because operand type differs); or if we
1359     // ever find a conflict (we've already decided that operand A's
1360     // equivalent operand is B0, but it looks like we need its
1361     // equivalent operand to be B1); then the graphs compare unequal.
1362     // Otherwise, we'll eventually exhaust the work queue, and
1363     // conclude that the graphs compare equal.
1364     //
1365     // As a side effect of the comparison, we produce a map
1366     // *inputsAndOutputsBToA that maps from each of the model input and output
1367     // operand numbers of modelB to the corresponding operand numbers of modelA.
1368     // If the comparison returns false, the contents of the map are undefined.
compare(const ModelBuilder * modelA,const ModelBuilder * modelB,std::map<uint32_t,uint32_t> * inputsAndOutputsBToA)1369     bool compare(const ModelBuilder* modelA, const ModelBuilder* modelB,
1370                  std::map<uint32_t, uint32_t>* inputsAndOutputsBToA) {
1371         CHECK(inputsAndOutputsBToA != nullptr);
1372         EXPECT_TRUE(inputsAndOutputsBToA->empty());
1373 
1374 #ifdef VERBOSE
1375         ::dump("compare(A)", modelA);
1376         ::dump("compare(B)", modelB);
1377 #endif
1378 
1379         if (modelA->operandCount() != modelB->operandCount() ||
1380             modelA->operationCount() != modelB->operationCount() ||
1381             modelA->inputCount() != modelB->inputCount() ||
1382             modelA->outputCount() != modelB->outputCount()) {
1383             RETURN_FALSE();
1384         }
1385 
1386         // Maps from operand index to index of defining operation.
1387         std::map<uint32_t, uint32_t> defsA, defsB;
1388         buildDefinitionMap(modelA, &defsA);
1389         buildDefinitionMap(modelB, &defsB);
1390         if (HasFatalFailure()) return false;
1391 
1392         // Maps from operand index in modelA to equivalent operand index
1393         // in modelB; and from operation index in modelA to equivalent
1394         // operation index in modelB.
1395         std::map<uint32_t, uint32_t> equivalentOperandsAToB;
1396         std::map<uint32_t, uint32_t> equivalentOperationsAToB;
1397 
1398         // Queue of operand indexes from modelA, each of whose defining
1399         // operations are to be checked for equivalence with modelB.
1400         std::queue<uint32_t> workQueueOperandsA;
1401 
1402         // Seed operand equivalence map and work queue from model outputs.
1403         for (uint32_t i = 0, e = modelA->outputCount(); i < e; i++) {
1404             uint32_t outputA = modelA->getOutputOperandIndex(i);
1405             uint32_t outputB = modelB->getOutputOperandIndex(i);
1406             if (!compare(modelA->getOperand(outputA), modelB->getOperand(outputB))) {
1407 #ifdef VERBOSE
1408                 std::cout << "modelA.output[" << i << "] = operand[" << outputA
1409                           << "] = " << toString(modelA->getOperand(outputA)) << std::endl;
1410                 std::cout << "modelB.output[" << i << "] = operand[" << outputB
1411                           << "] = " << toString(modelB->getOperand(outputB)) << std::endl;
1412 #endif
1413                 RETURN_FALSE();
1414             }
1415             equivalentOperandsAToB[outputA] = outputB;
1416             workQueueOperandsA.push(outputA);
1417         }
1418 
1419 #ifdef VERBOSE
1420         dump("defsA", &defsA);
1421         dump("defsB", &defsB);
1422 #endif
1423 
1424         // Process the queue.
1425         uint32_t pseudoDefinitionCount = 0;
1426         while (!workQueueOperandsA.empty()) {
1427 #ifdef VERBOSE
1428             dump("equivalentOperandsAToB", &equivalentOperandsAToB);
1429             dump("equivalentOperationsAToB", &equivalentOperationsAToB);
1430 #endif
1431             uint32_t operandIndexA = workQueueOperandsA.front();
1432 #ifdef VERBOSE
1433             std::cout << "operandIndexA: " << operandIndexA << std::endl;
1434 #endif
1435             workQueueOperandsA.pop();
1436             uint32_t operandIndexB = equivalentOperandsAToB.at(operandIndexA);
1437 
1438             uint32_t operationIndexA = defsA.at(operandIndexA);
1439             uint32_t operationIndexB = defsB.at(operandIndexB);
1440             auto it = equivalentOperationsAToB.find(operationIndexA);
1441             if (it != equivalentOperationsAToB.end()) {
1442                 if (it->second != operationIndexB) {
1443                     RETURN_FALSE();
1444                 }
1445                 continue;
1446             }
1447 
1448             // We haven't identified an equivalent operation for
1449             // operationIndexA.
1450 
1451             if ((operationIndexA >= kPseudoDefiningOperationBase) !=
1452                 (operationIndexB >= kPseudoDefiningOperationBase)) {
1453                 RETURN_FALSE();
1454             }
1455             // Either both operands have pseudo-definitions, or neither
1456             // does.
1457             if (operationIndexA >= kPseudoDefiningOperationBase) {
1458                 // Both operands have pseudo-definitions.
1459                 if (operationIndexA != operationIndexB) {
1460                     RETURN_FALSE();
1461                 }
1462                 equivalentOperationsAToB[operationIndexA] = operationIndexB;
1463                 ++pseudoDefinitionCount;
1464                 continue;
1465             }
1466 
1467             // If we get here, neither operation A nor operation B is a
1468             // pseudo-definition.
1469 
1470             const Operation& operationA = modelA->getOperation(operationIndexA);
1471             const Operation& operationB = modelB->getOperation(operationIndexB);
1472             if (operationA.type != operationB.type ||
1473                 operationA.inputs.size() != operationB.inputs.size() ||
1474                 operationA.outputs.size() != operationB.outputs.size()) {
1475                 RETURN_FALSE();
1476             }
1477             equivalentOperationsAToB[operationIndexA] = operationIndexB;
1478             for (uint32_t i = 0, e = operationA.inputs.size(); i < e; i++) {
1479                 uint32_t inputA = operationA.inputs[i];
1480                 uint32_t inputB = operationB.inputs[i];
1481                 auto it = equivalentOperandsAToB.find(inputA);
1482                 if (it != equivalentOperandsAToB.end()) {
1483                     if (it->second != inputB) {
1484                         RETURN_FALSE();
1485                     }
1486                     continue;
1487                 }
1488                 // We haven't identified an equivalent operand for inputA.
1489                 if (!compare(modelA->getOperand(inputA), modelB->getOperand(inputB))) {
1490 #ifdef VERBOSE
1491                     std::cout << "modelA.input[" << i << "] = operand[" << inputA
1492                               << "] = " << toString(modelA->getOperand(inputA)) << std::endl;
1493                     std::cout << "modelB.input[" << i << "] = operand[" << inputB
1494                               << "] = " << toString(modelB->getOperand(inputB)) << std::endl;
1495 #endif
1496                     RETURN_FALSE();
1497                 }
1498                 equivalentOperandsAToB[inputA] = inputB;
1499                 workQueueOperandsA.push(inputA);
1500             }
1501         }
1502 
1503         // Validity check
1504         if (modelA->operandCount() != defsA.size() || modelA->operandCount() != defsB.size() ||
1505             modelA->operandCount() != equivalentOperandsAToB.size() ||
1506             modelA->operationCount() + pseudoDefinitionCount != equivalentOperationsAToB.size()) {
1507             RETURN_FALSE();
1508         }
1509 
1510         // Build *inputsAndOutputsBToA
1511         for (uint32_t aInputIndex : modelA->getInputOperandIndexes()) {
1512             (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aInputIndex)] = aInputIndex;
1513         }
1514         for (uint32_t aOutputIndex : modelA->getOutputOperandIndexes()) {
1515             (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aOutputIndex)] = aOutputIndex;
1516         }
1517 
1518         RETURN_TRUE();
1519     }
1520 
1521     /*-------------------------------------------------------------------------------------*/
1522 
1523     // As a side effect of the comparison, we produce a map
1524     // *inputsAndOutputsModelToStep that maps from each of the model input and
1525     // output operand numbers of "model" to the corresponding operand numbers of
1526     // the step model from "step".  If the comparison returns false, the contents
1527     // of the map are undefined.
compare(const ExecutionStep * step,const PartitioningModel * model,std::shared_ptr<Device> device,std::map<uint32_t,uint32_t> * inputsAndOutputsModelToStep)1528     bool compare(const ExecutionStep* step, const PartitioningModel* model,
1529                  std::shared_ptr<Device> device,
1530                  std::map<uint32_t, uint32_t>* inputsAndOutputsModelToStep) {
1531         return (step->getDevice() == device) &&
1532                compare(step->getStepModel(),
1533                        reinterpret_cast<const ModelBuilder*>(model->getHandle()),
1534                        inputsAndOutputsModelToStep);
1535     }
1536 
compare(const std::shared_ptr<LogicalStep> logicalStep,const PartitioningModel * model,std::shared_ptr<Device> device,const RemapVectorType & modelInputs,const RemapVectorType & modelOutputs,const RemapVectorType & tempsAsStepModelInputs,const StepModelOutputSetType & tempsAsStepModelOutputs,const RemapVectorType & outputsAsStepModelInputs,const std::set<uint32_t> & modelOutputsThatAreDownstreamInputs)1537     void compare(const std::shared_ptr<LogicalStep> logicalStep, const PartitioningModel* model,
1538                  std::shared_ptr<Device> device, const RemapVectorType& modelInputs,
1539                  const RemapVectorType& modelOutputs, const RemapVectorType& tempsAsStepModelInputs,
1540                  const StepModelOutputSetType& tempsAsStepModelOutputs,
1541                  const RemapVectorType& outputsAsStepModelInputs,
1542                  const std::set<uint32_t>& modelOutputsThatAreDownstreamInputs) {
1543         ASSERT_TRUE(logicalStep->isExecution());
1544         const ExecutionStep* step = logicalStep->executionStep();
1545         std::map<uint32_t, uint32_t> inputsAndOutputsModelToStep;
1546         ASSERT_NO_FATAL_FAILURE(
1547                 ASSERT_TRUE(compare(step, model, device, &inputsAndOutputsModelToStep)));
1548         ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelInputs(),
1549                                         modelInputs));
1550         ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelOutputs(),
1551                                         modelOutputs));
1552         ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep,
1553                                         step->getTempsAsStepModelInputs(), tempsAsStepModelInputs));
1554         ASSERT_TRUE(compareStepModelOutputSets(inputsAndOutputsModelToStep,
1555                                                step->getTempsAsStepModelOutputs(),
1556                                                tempsAsStepModelOutputs));
1557         ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep,
1558                                         step->getOutputsAsStepModelInputs(),
1559                                         outputsAsStepModelInputs));
1560         ASSERT_TRUE(modelOutputsThatAreDownstreamInputs ==
1561                     step->getModelOutputsThatAreDownstreamInputs());
1562     }
1563 
1564    private:
compareRemapVectors(const std::map<uint32_t,uint32_t> & inputsAndOutputsModelToStep,const RemapVectorType & step,RemapVectorType model)1565     static bool compareRemapVectors(const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep,
1566                                     const RemapVectorType& step, RemapVectorType model) {
1567         std::transform(model.begin(), model.end(), model.begin(),
1568                        [&inputsAndOutputsModelToStep](const RemapVectorType::value_type& val) {
1569                            return std::make_pair(val.first,
1570                                                  inputsAndOutputsModelToStep.at(val.second));
1571                        });
1572         return step == model;
1573     }
1574 
compareStepModelOutputSets(const std::map<uint32_t,uint32_t> & inputsAndOutputsModelToStep,const StepModelOutputSetType & step,const StepModelOutputSetType & model)1575     static bool compareStepModelOutputSets(
1576             const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep,
1577             const StepModelOutputSetType& step, const StepModelOutputSetType& model) {
1578         StepModelOutputSetType modelTransformed;
1579         std::transform(
1580                 model.begin(), model.end(), std::inserter(modelTransformed, modelTransformed.end()),
1581                 [&inputsAndOutputsModelToStep](const StepModelOutputSetType::value_type& val) {
1582                     return std::make_pair(val.first, inputsAndOutputsModelToStep.at(val.second));
1583                 });
1584         return step == modelTransformed;
1585     }
1586 };
1587 
TEST_F(PartitioningTest,SimpleModel)1588 TEST_F(PartitioningTest, SimpleModel) {
1589     PartitioningModel model;
1590     uint32_t opnd0 = model.addFloatOperand();
1591     uint32_t opnd1 = model.addFloatOperand();
1592     uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1593     uint32_t opnd3 = model.addFloatOperand();
1594     uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3);
1595     model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
1596     model.finish();
1597     ASSERT_TRUE(model.isValid());
1598 
1599     // Simple partition (two devices are each capable of everything, one is the best).
1600     // No need to compare the original model to the model from the plan -- we
1601     // didn't actually do any partitioning.
1602     const auto devicesA = makeDevices({{"bad", 0.9, ~0U}, {"good", 0.5, ~0U}});
1603     ExecutionPlan planA;
1604     ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
1605                                      ExecutePriority::DEFAULT, {}, &planA),
1606               ANEURALNETWORKS_NO_ERROR);
1607     EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries().empty());
1608     ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1609     ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
1610     ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "good");
1611 
1612     // Simple partition (two devices are each capable of everything, none better than CPU).
1613     // No need to compare the original model to the model from the plan -- we
1614     // didn't actually do any partitioning.
1615     const auto devicesC = makeDevices({{"bad", 1.1, ~0U}, {"bad2", 1.0, ~0U}});
1616     ExecutionPlan planC;
1617     ASSERT_EQ(model.partitionTheWork(devicesC, ExecutePreference::PREFER_LOW_POWER,
1618                                      ExecutePriority::DEFAULT, {}, &planC),
1619               ANEURALNETWORKS_NO_ERROR);
1620     EXPECT_TRUE(planC.forTest_flatGetDynamicTemporaries().empty());
1621     ASSERT_EQ(planC.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1622     ASSERT_EQ(planC.forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
1623 
1624     // Compound partition (two devices, each is capable of one of the
1625     // two operations).  We could do more extensive checking here --
1626     // for example, verify that each step within the plan has the
1627     // correct (model and step model)x(inputs and outputs).
1628     const auto devicesB = makeDevices({{"0", 0.9, 1 << 0}, {"1", 0.5, 1 << 1}});
1629     ExecutionPlan planB;
1630     ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
1631                                      ExecutePriority::DEFAULT, {}, &planB),
1632               ANEURALNETWORKS_NO_ERROR);
1633     EXPECT_TRUE(planB.forTest_flatGetDynamicTemporaries().empty());
1634     ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1635     const auto& stepsB = planB.forTest_compoundGetSteps();
1636     ASSERT_EQ(stepsB.size(), size_t(2));
1637     {
1638         // Build a model to compare against the step model from stepsB[0].
1639         PartitioningModel modelB0;
1640         uint32_t b0Opnd0 = modelB0.addFloatOperand();
1641         uint32_t b0Opnd1 = modelB0.addFloatOperand();
1642         uint32_t b0Opnd2 = modelB0.addOperation2To1V1_0(0, b0Opnd0, b0Opnd1);
1643         modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2});
1644         modelB0.finish();
1645         ASSERT_TRUE(modelB0.isValid());
1646 
1647         ASSERT_NO_FATAL_FAILURE(
1648                 compare(stepsB[0], &modelB0, devicesB[0],
1649                         RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}},  // modelInputs
1650                         RemapVectorType{},                                    // modelOutputs
1651                         RemapVectorType{},                         // tempsAsStepModelInputs
1652                         StepModelOutputSetType{{opnd2, b0Opnd2}},  // tempsAsStepModelOutputs
1653                         RemapVectorType{},                         // outputsAsStepModelInputs
1654                         {}));  // modelOutputsThatAreDownstreamInputs
1655     }
1656     {
1657         // Build a model to compare against the step model from stepsB[1].
1658         PartitioningModel modelB1;
1659         uint32_t b1Opnd2 = modelB1.addFloatOperand();
1660         uint32_t b1Opnd3 = modelB1.addFloatOperand();
1661         uint32_t b1Opnd4 = modelB1.addOperation2To1V1_0(1, b1Opnd2, b1Opnd3);
1662         // Note: In the partitioning algorithm, step model inputs follow
1663         // model inputs.  In the original model "model", opnd2 is not
1664         // an input; so in the step model "modelB1", the corresponding
1665         // input b1Opnd2 is a step model input, and must follow the
1666         // model input b1Opnd3.
1667         modelB1.identifyInputsAndOutputs({b1Opnd3, b1Opnd2}, {b1Opnd4});
1668         modelB1.finish();
1669         ASSERT_TRUE(modelB1.isValid());
1670 
1671         ASSERT_NO_FATAL_FAILURE(compare(
1672                 stepsB[1], &modelB1, devicesB[1], RemapVectorType{{opnd3, b1Opnd3}},  // modelInputs
1673                 RemapVectorType{{opnd4, b1Opnd4}},  // modelOutputs
1674                 RemapVectorType{{opnd2, b1Opnd2}},  // tempsAsStepModelInputs
1675                 StepModelOutputSetType{},           // tempsAsStepModelOutputs
1676                 RemapVectorType{},                  // outputsAsStepModelInputs
1677                 {}));                               // modelOutputsThatAreDownstreamInputs
1678     }
1679 }
1680 
TEST_F(PartitioningTest,SliceModel)1681 TEST_F(PartitioningTest, SliceModel) {
1682     PartitioningModel model;
1683     uint32_t opnd0 = model.addFloatOperand();
1684     uint32_t opnd1 = model.addFloatOperand();
1685     uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1686     uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd0, opnd1);
1687     uint32_t opnd4 = model.addOperation2To1V1_1(0, opnd0, opnd1);
1688     uint32_t opnd5 = model.addOperation2To1V1_2(0, opnd2, opnd3);
1689     uint32_t opnd6 = model.addOperation1To1V1_3(0, opnd2);
1690     model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5, opnd6});
1691     model.finish();
1692     ASSERT_TRUE(model.isValid());
1693 
1694     // Simple partition (V1_0, V1_1, V1_2, V1_3 devices are available; V1_3 has best perf).
1695     // No need to compare the original model to the model from the plan -- we
1696     // didn't actually do any partitioning.
1697     const auto devicesA = makeDevices({{"V1_0", 0.8, HalVersion::V1_0, ~0U},
1698                                        {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1699                                        {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U},
1700                                        {"V1_3", 0.5, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1701     ExecutionPlan planA;
1702     ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
1703                                      ExecutePriority::DEFAULT, {}, &planA),
1704               ANEURALNETWORKS_NO_ERROR);
1705     EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries().empty());
1706     ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1707     ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
1708     ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_3");
1709 
1710     // Compound partition (V1_0, V1_1, V1_2 devices are available, in decreasing
1711     // order of performance; model is distributed across all three devices).
1712     const auto devicesB = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
1713                                        {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1714                                        {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
1715                                        {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1716     ExecutionPlan planB;
1717     ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
1718                                      ExecutePriority::DEFAULT, {}, &planB),
1719               ANEURALNETWORKS_NO_ERROR);
1720     EXPECT_TRUE(planB.forTest_flatGetDynamicTemporaries().empty());
1721     ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1722     const auto& stepsB = planB.forTest_compoundGetSteps();
1723     ASSERT_EQ(stepsB.size(), size_t(4));
1724     {
1725         // Build a model to compare against the step model from stepsB[0].
1726         PartitioningModel modelB0;
1727         uint32_t b0Opnd0 = modelB0.addFloatOperand();
1728         uint32_t b0Opnd1 = modelB0.addFloatOperand();
1729         uint32_t b0Opnd2 = modelB0.addOperation2To1V1_1(0, b0Opnd0, b0Opnd1);
1730         modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2});
1731         modelB0.finish();
1732         ASSERT_TRUE(modelB0.isValid());
1733 
1734         ASSERT_NO_FATAL_FAILURE(
1735                 compare(stepsB[0], &modelB0, devicesB[1],
1736                         RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}},  // modelInputs
1737                         RemapVectorType{{opnd4, b0Opnd2}},                    // modelOutputs
1738                         RemapVectorType{},         // tempsAsStepModelInputs
1739                         StepModelOutputSetType{},  // tempsAsStepModelOutputs
1740                         RemapVectorType{},         // outputsAsStepModelInputs
1741                         {}));                      // modelOutputsThatAreDownstreamInputs
1742     }
1743     {
1744         // Build a model to compare against the step model from stepsB[1].
1745         PartitioningModel modelB1;
1746         uint32_t b1Opnd0 = modelB1.addFloatOperand();
1747         uint32_t b1Opnd1 = modelB1.addFloatOperand();
1748         uint32_t b1Opnd2 = modelB1.addOperation2To1V1_0(0, b1Opnd0, b1Opnd1);
1749         uint32_t b1Opnd3 = modelB1.addOperation2To1V1_0(1, b1Opnd0, b1Opnd1);
1750         modelB1.identifyInputsAndOutputs({b1Opnd0, b1Opnd1}, {b1Opnd2, b1Opnd3});
1751         modelB1.finish();
1752         ASSERT_TRUE(modelB1.isValid());
1753 
1754         // Note that this is also an important test that we can detect
1755         // modelOutputsThatAreDownstreamInputs.
1756         ASSERT_NO_FATAL_FAILURE(
1757                 compare(stepsB[1], &modelB1, devicesB[0],
1758                         RemapVectorType{{opnd0, b1Opnd0}, {opnd1, b1Opnd1}},  // modelInputs
1759                         RemapVectorType{{opnd2, b1Opnd2}},                    // modelOutputs
1760                         RemapVectorType{},                         // tempsAsStepModelInputs
1761                         StepModelOutputSetType{{opnd3, b1Opnd3}},  // tempsAsStepModelOutputs
1762                         RemapVectorType{},                         // outputsAsStepModelInputs
1763                         {0u}));  // modelOutputsThatAreDownstreamInputs
1764     }
1765     {
1766         // Build a model to compare against the step model from stepsB[2].
1767         PartitioningModel modelB2;
1768         uint32_t b2Opnd0 = modelB2.addFloatOperand();
1769         uint32_t b2Opnd1 = modelB2.addOperation1To1V1_3(0, b2Opnd0);
1770         // Note: In the partitioning algorithm, temps that are
1771         // step model inputs precede model outputs that are step model
1772         // inputs.
1773         modelB2.identifyInputsAndOutputs({b2Opnd0}, {b2Opnd1});
1774         modelB2.finish();
1775         ASSERT_TRUE(modelB2.isValid());
1776 
1777         ASSERT_NO_FATAL_FAILURE(
1778                 compare(stepsB[2], &modelB2, devicesB[3], RemapVectorType{},  // modelInputs
1779                         RemapVectorType{{opnd6, b2Opnd1}},                    // modelOutputs
1780                         RemapVectorType{},                  // tempsAsStepModelInputs
1781                         StepModelOutputSetType{},           // tempsAsStepModelOutputs
1782                         RemapVectorType{{opnd2, b2Opnd0}},  // outputsAsStepModelInputs
1783                         {}));                               // modelOutputsThatAreDownstreamInputs
1784     }
1785     {
1786         // Build a model to compare against the step model from stepsB[3].
1787         PartitioningModel modelB3;
1788         uint32_t b3Opnd0 = modelB3.addFloatOperand();
1789         uint32_t b3Opnd1 = modelB3.addFloatOperand();
1790         uint32_t b3Opnd2 = modelB3.addOperation2To1V1_2(0, b3Opnd0, b3Opnd1);
1791         // Note: In the partitioning algorithm, temps that are
1792         // step model inputs precede model outputs that are step model
1793         // inputs.  In the original model "model", opnd3 is a temp and
1794         // opnd2 is a model output; so in the step model "modelB3", the
1795         // corresponding inputs b3Opnd1 and b3Opnd0 must appear in
1796         // that order.
1797         modelB3.identifyInputsAndOutputs({b3Opnd1, b3Opnd0}, {b3Opnd2});
1798         modelB3.finish();
1799         ASSERT_TRUE(modelB3.isValid());
1800 
1801         ASSERT_NO_FATAL_FAILURE(
1802                 compare(stepsB[3], &modelB3, devicesB[2], RemapVectorType{},  // modelInputs
1803                         RemapVectorType{{opnd5, b3Opnd2}},                    // modelOutputs
1804                         RemapVectorType{{opnd3, b3Opnd1}},  // tempsAsStepModelInputs
1805                         StepModelOutputSetType{},           // tempsAsStepModelOutputs
1806                         RemapVectorType{{opnd2, b3Opnd0}},  // outputsAsStepModelInputs
1807                         {}));                               // modelOutputsThatAreDownstreamInputs
1808     }
1809 
1810     // TODO: Make sure this still works when we have multiple devices
1811     // of same version available for slicing. An easy (?) choice would
1812     // be to route the two different V1_0 operations to different
1813     // devices.
1814 }
1815 
TEST_F(PartitioningTest,SliceModelToEmpty)1816 TEST_F(PartitioningTest, SliceModelToEmpty) {
1817     PartitioningModel model;
1818     uint32_t opnd0 = model.addFloatOperand();
1819     uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
1820     model.identifyInputsAndOutputs({opnd0}, {opnd1});
1821     model.finish();
1822     ASSERT_TRUE(model.isValid());
1823 
1824     // Only the V1_3 device can handle any operations in the model.
1825     // No need to compare the original model to the model from the plan -- we
1826     // didn't actually do any partitioning.
1827     const auto devices = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
1828                                       {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1829                                       {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
1830                                       {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1831     ExecutionPlan plan;
1832     ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1833                                      ExecutePriority::DEFAULT, {}, &plan),
1834               ANEURALNETWORKS_NO_ERROR);
1835     EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
1836     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1837     ASSERT_NE(plan.forTest_simpleGetDevice().get(), nullptr);
1838     ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_3");
1839 }
1840 
TEST_F(PartitioningTest,Cpu)1841 TEST_F(PartitioningTest, Cpu) {
1842     // Here's a model where some operations execute only on the Cpu.
1843     // To make things interesting, we produce three partitions --
1844     // device, cpu, same-device.
1845 
1846     static const uint32_t kCpuOp = 1;
1847     static const uint32_t kDevOp = 2;
1848 
1849     const auto devices = makeDevices({{"1", 0.5, 1 << kDevOp}});
1850 
1851     PartitioningModel model;
1852 
1853     uint32_t opnd0 = model.addFloatOperand();
1854     uint32_t opnd1 = model.addFloatOperand();
1855 
1856     uint32_t opnd2 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd1);
1857     uint32_t opnd3 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd2);
1858 
1859     uint32_t opnd4 = model.addOperation2To1V1_0(kCpuOp, opnd0, opnd3);
1860     uint32_t opnd5 = model.addOperation2To1V1_0(kCpuOp, opnd2, opnd4);
1861 
1862     uint32_t opnd6 = model.addFloatOperand();
1863 
1864     uint32_t opnd7 = model.addOperation2To1V1_0(kDevOp, opnd3, opnd5);
1865     uint32_t opnd8 = model.addOperation2To1V1_0(kDevOp, opnd6, opnd7);
1866 
1867     model.identifyInputsAndOutputs({opnd0, opnd1, opnd6}, {opnd4, opnd8});
1868     model.finish();
1869     ASSERT_TRUE(model.isValid());
1870 
1871     ExecutionPlan plan;
1872     ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1873                                      ExecutePriority::DEFAULT, {}, &plan),
1874               ANEURALNETWORKS_NO_ERROR);
1875     EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
1876     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1877     const auto& steps = plan.forTest_compoundGetSteps();
1878     ASSERT_EQ(steps.size(), size_t(3));
1879     {
1880         const auto& step0 = steps[0];
1881 
1882         // Build a model to compare against the step model from steps[0].
1883         PartitioningModel model0;
1884         uint32_t m0Opnd0 = model0.addFloatOperand();
1885         uint32_t m0Opnd1 = model0.addFloatOperand();
1886         uint32_t m0Opnd2 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd1);
1887         uint32_t m0Opnd3 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd2);
1888         model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2, m0Opnd3});
1889         model0.finish();
1890         ASSERT_TRUE(model0.isValid());
1891 
1892         ASSERT_NO_FATAL_FAILURE(
1893                 compare(step0, &model0, devices[0],
1894                         RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}},  // modelInputs
1895                         RemapVectorType{},                                    // modelOutputs
1896                         RemapVectorType{},  // tempsAsStepModelInputs
1897                         StepModelOutputSetType{{opnd2, m0Opnd2},
1898                                                {opnd3, m0Opnd3}},  // tempsAsStepModelOutputs
1899                         RemapVectorType{},                         // outputsAsStepModelInputs
1900                         {}));  // modelOutputsThatAreDownstreamInputs
1901     }
1902     {
1903         const auto& step1 = steps[1];
1904 
1905         // Build a model to compare against the step model from steps[1].
1906         PartitioningModel model1;
1907         uint32_t m1Opnd0 = model1.addFloatOperand();
1908         uint32_t m1Opnd3 = model1.addFloatOperand();
1909         uint32_t m1Opnd4 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd0, m1Opnd3);
1910         uint32_t m1Opnd2 = model1.addFloatOperand();
1911         uint32_t m1Opnd5 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd2, m1Opnd4);
1912         model1.identifyInputsAndOutputs({m1Opnd0, m1Opnd3, m1Opnd2}, {m1Opnd4, m1Opnd5});
1913         model1.finish();
1914         ASSERT_TRUE(model1.isValid());
1915 
1916         ASSERT_NO_FATAL_FAILURE(compare(
1917                 step1, &model1, DeviceManager::getCpuDevice(),
1918                 RemapVectorType{{opnd0, m1Opnd0}},                    // modelInputs
1919                 RemapVectorType{{opnd4, m1Opnd4}},                    // modelOutputs
1920                 RemapVectorType{{opnd3, m1Opnd3}, {opnd2, m1Opnd2}},  // tempsAsStepModelInputs
1921                 StepModelOutputSetType{{opnd5, m1Opnd5}},             // tempsAsStepModelOutputs
1922                 RemapVectorType{},                                    // outputsAsStepModelInputs
1923                 {}));  // modelOutputsThatAreDownstreamInputs
1924     }
1925     {
1926         const auto& step2 = steps[2];
1927 
1928         // Build a model to compare against the step model from steps[2].
1929         PartitioningModel model2;
1930         uint32_t m2Opnd3 = model2.addFloatOperand();
1931         uint32_t m2Opnd5 = model2.addFloatOperand();
1932         uint32_t m2Opnd7 = model2.addOperation2To1V1_0(kDevOp, m2Opnd3, m2Opnd5);
1933         uint32_t m2Opnd6 = model2.addFloatOperand();
1934         uint32_t m2Opnd8 = model2.addOperation2To1V1_0(kDevOp, m2Opnd6, m2Opnd7);
1935         model2.identifyInputsAndOutputs({m2Opnd6, m2Opnd3, m2Opnd5}, {m2Opnd8});
1936         model2.finish();
1937         ASSERT_TRUE(model2.isValid());
1938 
1939         ASSERT_NO_FATAL_FAILURE(compare(
1940                 step2, &model2, devices[0], RemapVectorType{{opnd6, m2Opnd6}},  // modelInputs
1941                 RemapVectorType{{opnd8, m2Opnd8}},                              // modelOutputs
1942                 RemapVectorType{{opnd3, m2Opnd3}, {opnd5, m2Opnd5}},  // tempsAsStepModelInputs
1943                 StepModelOutputSetType{},                             // tempsAsStepModelOutputs
1944                 RemapVectorType{},                                    // outputsAsStepModelInputs
1945                 {}));  // modelOutputsThatAreDownstreamInputs
1946     }
1947 }
1948 
TEST_F(PartitioningTest,SetPartitioning)1949 TEST_F(PartitioningTest, SetPartitioning) {
1950     PartitioningModel model;
1951     uint32_t opnd0 = model.addFloatOperand();
1952     uint32_t opnd1 = model.addFloatOperand();
1953     uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1, Dimensioned::NO);
1954     uint32_t opnd3 = model.addFloatOperand();
1955     uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3);
1956     model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
1957     model.finish();
1958     ASSERT_TRUE(model.isValid());
1959 
1960     // One device that can and should execute operation 0.
1961     const auto devices = makeDevices({{"hw", 0.5, (1 << 0)}});
1962 
1963     // Test kPartitioningNo.  We should not even attempt partitioning,
1964     // so there should be a SIMPLE plan on CPU.
1965     // No need to compare the original model to the model from the plan -- we
1966     // didn't actually do any partitioning.
1967     PartitioningCompilation cPNo(&model, devices);
1968     ASSERT_EQ(cPNo.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1969     ASSERT_EQ(cPNo.failPartitioning(), Result::NO_ERROR);
1970     ASSERT_EQ(cPNo.finish(), Result::NO_ERROR);
1971     ASSERT_EQ(cPNo.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1972     ASSERT_EQ(cPNo.getExecutionPlan().forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
1973 
1974     // Test kPartitioningWithFallback.  We should attempt partitioning, simulate
1975     // a recoverable failure, then fallback to CPU with a SIMPLE plan, and
1976     // finally return success.  No need to compare the original model to the
1977     // model from the plan -- we didn't actually do any partitioning.
1978     PartitioningCompilation cPWithFallback(&model, devices);
1979     ASSERT_EQ(cPWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1980               Result::NO_ERROR);
1981     ASSERT_EQ(cPWithFallback.failPartitioning(), Result::NO_ERROR);
1982     ASSERT_EQ(cPWithFallback.finish(), Result::NO_ERROR);
1983     ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1984     ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1985               DeviceManager::getCpuDevice());
1986 
1987     // Test kPartitioningWithoutFallback.  We should attempt partitioning,
1988     // simulate a recoverable failure, and fail.
1989     PartitioningCompilation cPWithoutFallback(&model, devices);
1990     ASSERT_EQ(cPWithoutFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1991               Result::NO_ERROR);
1992     ASSERT_EQ(cPWithoutFallback.failPartitioning(), Result::NO_ERROR);
1993     ASSERT_EQ(cPWithoutFallback.finish(), Result::OP_FAILED);
1994     ASSERT_EQ(cPWithoutFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::ERROR);
1995 }
1996 
1997 // Regression test for http://b/69166603:
1998 //     "partitioned compilation and execution yields wrong results when model output is step model
1999 //     input"
TEST_F(PartitioningTest,ModelOutputAsStepModelInput)2000 TEST_F(PartitioningTest, ModelOutputAsStepModelInput) {
2001     PartitioningModel model;
2002     uint32_t opnd0 = model.addFloatOperand();
2003     uint32_t opnd1 = model.addFloatOperand();
2004     uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
2005     uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd2, opnd2);
2006     model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd3});
2007     model.finish();
2008     ASSERT_TRUE(model.isValid());
2009 
2010     // Compound partition (two devices, each is capable of one of the
2011     // two operations).  We could do more extensive checking here --
2012     // for example, verify that each step within the plan has the
2013     // correct (model and step model)x(inputs and outputs).
2014     const auto devices = makeDevices({{"0", 0.5, 1 << 0}, {"1", 0.5, 1 << 1}});
2015     ExecutionPlan plan;
2016     ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2017                                      ExecutePriority::DEFAULT, {}, &plan),
2018               ANEURALNETWORKS_NO_ERROR);
2019     EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2020     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
2021     const auto& steps = plan.forTest_compoundGetSteps();
2022     ASSERT_EQ(steps.size(), size_t(2));
2023     {
2024         // Build a model to compare against the step model from steps[0].
2025         PartitioningModel model0;
2026         uint32_t m0Opnd0 = model0.addFloatOperand();
2027         uint32_t m0Opnd1 = model0.addFloatOperand();
2028         uint32_t m0Opnd2 = model0.addOperation2To1V1_0(0, m0Opnd0, m0Opnd1);
2029         model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2});
2030         model0.finish();
2031         ASSERT_TRUE(model0.isValid());
2032         ASSERT_NO_FATAL_FAILURE(
2033                 compare(steps[0], &model0, devices[0],
2034                         RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}},  // modelInputs
2035                         RemapVectorType{{opnd2, m0Opnd2}},                    // modelOutputs
2036                         RemapVectorType{},         // tempsAsStepModelInputs
2037                         StepModelOutputSetType{},  // tempsAsStepModelOutputs
2038                         RemapVectorType{},         // outputsAsStepModelInputs
2039                         {0u}));                    // modelOutputsThatAreDownstreamInputs
2040     }
2041     {
2042         // Build a model to compare against the step model from steps[1].
2043         PartitioningModel model1;
2044         uint32_t m1Opnd2 = model1.addFloatOperand();
2045         uint32_t m1Opnd3 = model1.addOperation2To1V1_0(1, m1Opnd2, m1Opnd2);
2046         model1.identifyInputsAndOutputs({m1Opnd2}, {m1Opnd3});
2047         model1.finish();
2048         ASSERT_TRUE(model1.isValid());
2049 
2050         ASSERT_NO_FATAL_FAILURE(
2051                 compare(steps[1], &model1, devices[1], RemapVectorType{},  // modelInputs
2052                         RemapVectorType{{opnd3, m1Opnd3}},                 // modelOutputs
2053                         RemapVectorType{},                                 // tempsAsStepModelInputs
2054                         StepModelOutputSetType{},           // tempsAsStepModelOutputs
2055                         RemapVectorType{{opnd2, m1Opnd2}},  // outputsAsStepModelInputs
2056                         {}));                               // modelOutputsThatAreDownstreamInputs
2057     }
2058 }
2059 
TEST_F(PartitioningTest,OemOperations)2060 TEST_F(PartitioningTest, OemOperations) {
2061     // Trivial model consisting solely of OEM operation.
2062     PartitioningModel model;
2063     uint32_t opndIn = model.addFloatOperand();
2064     uint32_t opndOut = model.addOperationOEM1To1(opndIn);
2065     model.identifyInputsAndOutputs({opndIn}, {opndOut});
2066     model.finish();
2067     ASSERT_TRUE(model.isValid());
2068 
2069     // Verify that the best driver than can run an OEM operation is
2070     // used, even if it is not better than the CPU.
2071     // No need to compare the original model to the model from the plan -- we
2072     // didn't actually do any partitioning.
2073     const auto devicesBestOEM = makeDevices({{"badOEM", 1.5, ~0U, PartitioningDriver::OEMYes},
2074                                              {"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo},
2075                                              {"goodOEM", 1.2, ~0U, PartitioningDriver::OEMYes}});
2076     PartitioningCompilation compilationBestOEM(&model, devicesBestOEM);
2077     ASSERT_EQ(compilationBestOEM.finish(), Result::NO_ERROR);
2078     const auto& planBestOEM = compilationBestOEM.getExecutionPlan();
2079     ASSERT_EQ(planBestOEM.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2080     ASSERT_NE(planBestOEM.forTest_simpleGetDevice().get(), nullptr);
2081     ASSERT_EQ(planBestOEM.forTest_simpleGetDevice()->getName(), "goodOEM");
2082 
2083     // Verify that we get an error if no driver can run an OEM operation.
2084     const auto devicesNoOEM = makeDevices({{"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo}});
2085     PartitioningCompilation compilationNoOEM(&model, devicesNoOEM);
2086     ASSERT_EQ(compilationNoOEM.finish(), Result::BAD_DATA);
2087 
2088     // Verify that we get an error if a driver can SUPPORT but not PREPARE an OEM operation.
2089     const auto devicesIndecisiveOEM =
2090             makeDevices({{"indecisiveOEM", 0.5, ~0U, PartitioningDriver::OEMIndecisive}});
2091     PartitioningCompilation compilationIndecisiveOEM(&model, devicesIndecisiveOEM);
2092     ASSERT_NE(compilationIndecisiveOEM.finish(), Result::NO_ERROR);
2093 
2094     // Verify that we get an error if there are no drivers (only CPU fallback).
2095     PartitioningCompilation compilationNoDrivers(&model, makeDevices({}) /* no drivers */);
2096     ASSERT_EQ(compilationNoDrivers.finish(), Result::BAD_DATA);
2097 }
2098 
TEST_F(PartitioningTest,RelaxedFP)2099 TEST_F(PartitioningTest, RelaxedFP) {
2100     const auto devices = makeDevices({// Best choice for non-relaxed model.
2101                                       {"f32", 0.8, 0.9 /* relaxed */, ~0U},
2102                                       // Best choice for relaxed model.
2103                                       {"f16", 0.9, 0.8 /* relaxed */, ~0U}});
2104 
2105     auto TrivialTest = [&devices](bool doRelax, const char* expectDevice) {
2106         // Trivial model consisting solely of one operation.
2107         SCOPED_TRACE(expectDevice);
2108         PartitioningModel model;
2109         uint32_t opnd0 = model.addFloatOperand();
2110         uint32_t opnd1 = model.addFloatOperand();
2111         uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
2112         model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2113         model.relaxComputationFloat32toFloat16(doRelax);
2114         model.finish();
2115         ASSERT_TRUE(model.isValid());
2116         // Verify that the model will be executed on the appropriate device.
2117         // No need to compare the original model to the model from the plan -- we
2118         // didn't actually do any partitioning.
2119         ExecutionPlan plan;
2120         ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2121                                          ExecutePriority::DEFAULT, {}, &plan),
2122                   ANEURALNETWORKS_NO_ERROR);
2123         EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2124         ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2125         ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), expectDevice);
2126     };
2127 
2128     ASSERT_NO_FATAL_FAILURE(TrivialTest(false, "f32"));
2129     ASSERT_NO_FATAL_FAILURE(TrivialTest(true, "f16"));
2130 }
2131 
TEST_F(PartitioningTest,Perf)2132 TEST_F(PartitioningTest, Perf) {
2133     // The various type names used here are confusing.
2134     //
2135     // OperandType (from HAL file), WrapperType (from NeuralNetworksWrapper.h),
2136     // and OperandCode (from NeuralNetworks.h) are different enums representing
2137     // the same type kind -- e.g., OperandType::FLOAT32, WrapperType::FLOAT32,
2138     // ANEURALNETWORKS_FLOAT32.  Corresponding enumerators have the same value.
2139     //
2140     // WrapperOperandType is the NeuralNetworksWrapper.h representation of a
2141     // full operand type (WrapperType plus dimensions plus other attributes).
2142 
2143     auto TestType = [](V1_3::OperandType operandType) {
2144         if (operandType == V1_3::OperandType::SUBGRAPH) {
2145             // SUBGRAPH capabilities are handled differently.
2146             return;
2147         }
2148         SCOPED_TRACE(toString(operandType));
2149         // Trivial model consisting solely of OEM operation.  We
2150         // pick OEM operation because this allows us to use
2151         // inputs and outputs of any number and type.
2152         PartitioningModel model;
2153         uint32_t opndIn = model.addOperand(static_cast<WrapperType>(operandType));
2154         uint32_t opndOut = model.addOperationOEM1To1(opndIn);
2155         model.identifyInputsAndOutputs({opndIn}, {opndOut});
2156         model.finish();
2157         ASSERT_TRUE(model.isValid());
2158 
2159         const V1_3::Capabilities baseCapabilities = ::android::nn::makeCapabilities(0.5);
2160 
2161         {
2162             // better than base
2163             V1_3::Capabilities goodCapabilities = baseCapabilities;
2164             update(&goodCapabilities, operandType, 0.25);
2165 
2166             const auto devices =
2167                     makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes},
2168                                  {"good", goodCapabilities, ~0U, PartitioningDriver::OEMYes}});
2169 
2170             // Verify that model will be executed on "good".
2171             // No need to compare the original model to the model from the plan -- we
2172             // didn't actually do any partitioning.
2173             ExecutionPlan plan;
2174             ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2175                                              ExecutePriority::DEFAULT, {}, &plan),
2176                       ANEURALNETWORKS_NO_ERROR);
2177             EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2178             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2179             ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "good");
2180         }
2181 
2182         {
2183             // worse than base
2184             V1_3::Capabilities badCapabilities = baseCapabilities;
2185             update(&badCapabilities, operandType, 0.75);
2186             const auto devices =
2187                     makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes},
2188                                  {"bad", badCapabilities, ~0U, PartitioningDriver::OEMYes}});
2189 
2190             // Verify that model will be executed on "base".
2191             // No need to compare the original model to the model from the plan -- we
2192             // didn't actually do any partitioning.
2193             ExecutionPlan plan;
2194             ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2195                                              ExecutePriority::DEFAULT, {}, &plan),
2196                       ANEURALNETWORKS_NO_ERROR);
2197             EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2198             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2199             ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "base");
2200         }
2201     };
2202 
2203     for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN);
2204          type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
2205         TestType(static_cast<V1_3::OperandType>(type));
2206     }
2207     for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN);
2208          type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) {
2209         TestType(static_cast<V1_3::OperandType>(type));
2210     }
2211 }
2212 
TEST_F(PartitioningTest,ZeroInputStepModel)2213 TEST_F(PartitioningTest, ZeroInputStepModel) {
2214     PartitioningModel model;
2215     const uint32_t opnd0 = model.addFloatZeroOperand();
2216     const uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
2217     const uint32_t opnd2 = model.addFloatOperand();
2218     const uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd1, opnd2);
2219     model.identifyInputsAndOutputs({opnd2}, {opnd3});
2220     ASSERT_EQ(model.finish(), Result::NO_ERROR);
2221 
2222     // This will result in 2 partitions: deviceA handles op0, deviceB handles op1.
2223     // The partition for deviceA does not have any model input, and should result in full CPU
2224     // fallback.
2225     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2226     PartitioningCompilation compilation(&model, devices);
2227     ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2228     const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName();
2229     checkExecutionPlanSteps(compilation.getExecutionPlan(), {cpuDeviceName});
2230 }
2231 
TEST_F(PartitioningTest,ZeroOutputStepModel)2232 TEST_F(PartitioningTest, ZeroOutputStepModel) {
2233     PartitioningModel model;
2234     const uint32_t opnd0 = model.addFloatOperand();
2235     const uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
2236     const uint32_t opnd2 = model.addFloatOperand();
2237     model.addOperation2To1V1_0(1, opnd1, opnd2);
2238     model.identifyInputsAndOutputs({opnd0, opnd2}, {opnd1});
2239     ASSERT_EQ(model.finish(), Result::NO_ERROR);
2240 
2241     // This will result in 2 partitions: deviceA handles op0, deviceB handles op1.
2242     // The partition for deviceB does not have any model output, and should result in full CPU
2243     // fallback.
2244     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2245     PartitioningCompilation compilation(&model, devices);
2246     ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2247     const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName();
2248     checkExecutionPlanSteps(compilation.getExecutionPlan(), {cpuDeviceName});
2249 }
2250 
2251 // Test dynamic temporaries and related parts of the partitioning implementation.
2252 //
2253 // opnd0 = model input                   // tensor to pad
2254 // opnd1 = model input                   // padding
2255 // opnd2 = PAD(opnd1, opnd0)             // model output
2256 // opnd3 = PAD(opnd1, opnd0)
2257 // opnd4 = ADD(opnd2, opnd3, FUSED_NONE) // model output
2258 class DynamicTemporariesTest : public PartitioningTest {
2259    protected:
2260     // Call these functions in sequence in order to perform the test.
2261     // Call to declareOutputDimensions() can be omitted (see the default values below).
2262     // Call to declareHalVersions() can be omitted (defaults to HalVersion::LATEST).
2263     void declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified,
2264                                  bool opnd3PartitionOutputSpecified,
2265                                  bool opnd4ModelOutputSpecified);
2266     void declareHalVersions(HalVersion padDeviceVersion, HalVersion addDeviceVersion);
2267     void makeModelAndValidate();
2268     void compileModelAndComparePlan(bool noFallback = true);
2269     void executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough,
2270                                             bool opnd4ModelOutputBigEnough);
2271 
2272     // set by declareOutputDimensions()
2273     bool mOpnd2ModelAndPartitionOutputSpecified = false;
2274     bool mOpnd3PartitionOutputSpecified = false;
2275     bool mOpnd4ModelOutputSpecified = false;
2276 
2277     // set by declareHalVersions()
2278     HalVersion mPadDeviceVersion = HalVersion::LATEST;
2279     HalVersion mAddDeviceVersion = HalVersion::LATEST;
2280     HalVersion mMinDeviceVersion = HalVersion::LATEST;  // minimum of the other two device versions
2281 
2282     // created by makeModelAndValidate()
2283     std::optional<PartitioningModel> mModel;
2284     std::vector<uint32_t> mOpnds;
2285 
2286     // created by compileModelAndComparePlan();
2287     std::optional<PartitioningCompilation> mCompilation;
2288 
supportsOutputOfUnknownRank(HalVersion version)2289     static bool supportsOutputOfUnknownRank(HalVersion version) {
2290         return version >= HalVersion::V1_2;
2291     }
2292 
dimensionedOutput(HalVersion version,bool specified)2293     static Dimensioned dimensionedOutput(HalVersion version, bool specified) {
2294         return specified ? Dimensioned::YES_4
2295                          : supportsOutputOfUnknownRank(version) ? Dimensioned::NO
2296                                                                 : Dimensioned::RANK_1;
2297     }
2298 };
2299 
declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified,bool opnd3PartitionOutputSpecified,bool opnd4ModelOutputSpecified)2300 void DynamicTemporariesTest::declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified,
2301                                                      bool opnd3PartitionOutputSpecified,
2302                                                      bool opnd4ModelOutputSpecified) {
2303     ASSERT_FALSE(mModel.has_value());
2304     mOpnd2ModelAndPartitionOutputSpecified = opnd2ModelAndPartitionOutputSpecified;
2305     mOpnd3PartitionOutputSpecified = opnd3PartitionOutputSpecified;
2306     mOpnd4ModelOutputSpecified = opnd4ModelOutputSpecified;
2307 }
2308 
declareHalVersions(HalVersion padDeviceVersion,HalVersion addDeviceVersion)2309 void DynamicTemporariesTest::declareHalVersions(HalVersion padDeviceVersion,
2310                                                 HalVersion addDeviceVersion) {
2311     ASSERT_FALSE(mModel.has_value());
2312     mPadDeviceVersion = padDeviceVersion;
2313     mAddDeviceVersion = addDeviceVersion;
2314     mMinDeviceVersion = min(padDeviceVersion, addDeviceVersion);
2315 }
2316 
makeModelAndValidate()2317 void DynamicTemporariesTest::makeModelAndValidate() {
2318     ASSERT_FALSE(mModel.has_value());
2319     mModel = PartitioningModel();
2320 
2321     uint32_t opndActivation = mModel->addIntScalarOperand(ANEURALNETWORKS_FUSED_NONE);
2322 
2323     uint32_t opnd0 = mModel->addFloatOperand(Dimensioned::YES_2);  // tensor to pad
2324     uint32_t opnd1 = mModel->addIntOperand(Dimensioned::RANK_2);   // paddings
2325     uint32_t opnd2 = mModel->addExplicitOperationXTo1(
2326             ANEURALNETWORKS_PAD, {opnd0, opnd1}, WrapperType::TENSOR_FLOAT32,
2327             dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified));
2328     uint32_t opnd3 = mModel->addExplicitOperationXTo1(
2329             ANEURALNETWORKS_PAD, {opnd0, opnd1}, WrapperType::TENSOR_FLOAT32,
2330             dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified));
2331     uint32_t opnd4 = mModel->addExplicitOperationXTo1(
2332             ANEURALNETWORKS_ADD, {opnd2, opnd3, opndActivation}, WrapperType::TENSOR_FLOAT32,
2333             dimensionedOutput(mMinDeviceVersion, mOpnd4ModelOutputSpecified));
2334     mModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4});
2335     mModel->finish();
2336     ASSERT_TRUE(mModel->isValid());
2337 
2338     mOpnds = {opnd0, opnd1, opnd2, opnd3, opnd4};
2339 }
2340 
compileModelAndComparePlan(bool noFallback)2341 void DynamicTemporariesTest::compileModelAndComparePlan(bool noFallback) {
2342     ASSERT_TRUE(mModel.has_value());
2343     ASSERT_TRUE(!mCompilation.has_value());
2344 
2345     auto devices = makeDevices({{"pad",
2346                                  0.9,
2347                                  0U,
2348                                  PartitioningDriver::OEMNo,
2349                                  mPadDeviceVersion,
2350                                  {V1_3::OperationType::PAD}},
2351                                 {"add",
2352                                  0.9,
2353                                  0U,
2354                                  PartitioningDriver::OEMNo,
2355                                  mAddDeviceVersion,
2356                                  {V1_3::OperationType::ADD}}});
2357 
2358     mCompilation = PartitioningCompilation(&mModel.value(), devices);
2359     ASSERT_EQ(mCompilation->setPartitioning(DeviceManager::kPartitioningWithoutFallback),
2360               Result::NO_ERROR);
2361     if (noFallback) {
2362         ASSERT_EQ(mCompilation->finish(), Result::NO_ERROR);
2363         const ExecutionPlan& planA = mCompilation->getExecutionPlan();
2364         EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries() ==
2365                     (mOpnd3PartitionOutputSpecified ? DynamicTemporariesType{}
2366                                                     : DynamicTemporariesType{mOpnds[3]}));
2367         ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
2368         const auto& stepsA = planA.forTest_compoundGetSteps();
2369         ASSERT_EQ(stepsA.size(), size_t(2));
2370         {
2371             // Build a model to compare against the step model from stepsA[0].
2372             PartitioningModel modelA0;
2373             uint32_t a0Opnd0 = modelA0.addFloatOperand(Dimensioned::YES_2);
2374             uint32_t a0Opnd1 = modelA0.addIntOperand(Dimensioned::RANK_2);
2375             uint32_t a0Opnd2 = modelA0.addExplicitOperationXTo1(
2376                     ANEURALNETWORKS_PAD, {a0Opnd0, a0Opnd1}, WrapperType::TENSOR_FLOAT32,
2377                     dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified));
2378             uint32_t a0Opnd3 = modelA0.addExplicitOperationXTo1(
2379                     ANEURALNETWORKS_PAD, {a0Opnd0, a0Opnd1}, WrapperType::TENSOR_FLOAT32,
2380                     dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified));
2381             modelA0.identifyInputsAndOutputs({a0Opnd0, a0Opnd1}, {a0Opnd3, a0Opnd2});
2382             modelA0.finish();
2383             ASSERT_TRUE(modelA0.isValid());
2384 
2385             ASSERT_NO_FATAL_FAILURE(compare(
2386                     stepsA[0], &modelA0, devices[0],
2387                     RemapVectorType{{mOpnds[0], a0Opnd0}, {mOpnds[1], a0Opnd1}},  // modelInputs
2388                     RemapVectorType{{mOpnds[2], a0Opnd3}},                        // modelOutputs
2389                     RemapVectorType{},                             // tempsAsStepModelInputs
2390                     StepModelOutputSetType{{mOpnds[3], a0Opnd2}},  // tempsAsStepModelOutputs
2391                     RemapVectorType{},                             // outputsAsStepModelInputs
2392                     {0u}));  // modelOutputsThatAreDownstreamInputs
2393         }
2394         {
2395             // Build a model to compare against the step model from stepsA[1].
2396             PartitioningModel modelA1;
2397             uint32_t a1Opnd2 = modelA1.addFloatOperand(
2398                     dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified));
2399             uint32_t a1Opnd3 = modelA1.addFloatOperand(
2400                     dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified));
2401             uint32_t a1Opnd4 = modelA1.addOperation2To1V1_0(
2402                     0, a1Opnd2, a1Opnd3,
2403                     dimensionedOutput(mMinDeviceVersion, mOpnd4ModelOutputSpecified));
2404             modelA1.identifyInputsAndOutputs({a1Opnd3, a1Opnd2}, {a1Opnd4});
2405             modelA1.finish();
2406             ASSERT_TRUE(modelA1.isValid());
2407 
2408             ASSERT_NO_FATAL_FAILURE(
2409                     compare(stepsA[1], &modelA1, devices[1], RemapVectorType{},  // modelInputs
2410                             RemapVectorType{{mOpnds[4], a1Opnd4}},               // modelOutputs
2411                             RemapVectorType{{mOpnds[3], a1Opnd3}},  // tempsAsStepModelInputs
2412                             StepModelOutputSetType{},               // tempsAsStepModelOutputs
2413                             RemapVectorType{{mOpnds[2], a1Opnd2}},  // outputsAsStepModelInputs
2414                             {}));  // modelOutputsThatAreDownstreamInputs
2415         }
2416     } else {
2417         ASSERT_EQ(mCompilation->finish(), Result::OP_FAILED);
2418         // Try again, expecting fallback.
2419         mCompilation = PartitioningCompilation(&mModel.value(), devices);
2420         ASSERT_EQ(mCompilation->setPartitioning(DeviceManager::kPartitioningWithFallback),
2421                   Result::NO_ERROR);
2422         ASSERT_EQ(mCompilation->finish(), Result::NO_ERROR);
2423         ASSERT_EQ(mCompilation->getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2424         ASSERT_EQ(mCompilation->getExecutionPlan().forTest_simpleGetDevice(),
2425                   DeviceManager::getCpuDevice());
2426     }
2427 }
2428 
executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough,bool opnd4ModelOutputBigEnough)2429 void DynamicTemporariesTest::executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough,
2430                                                                 bool opnd4ModelOutputBigEnough) {
2431     ASSERT_TRUE(opnd2ModelOutputBigEnough || !mOpnd2ModelAndPartitionOutputSpecified);
2432     ASSERT_TRUE(opnd4ModelOutputBigEnough || !mOpnd4ModelOutputSpecified);
2433 
2434     ASSERT_TRUE(mCompilation.has_value());
2435     WrapperExecution e(&mCompilation.value());
2436 
2437     WrapperOperandType padTensorValueType(WrapperType::TENSOR_FLOAT32, {2});
2438     const float padTensorValue[] = {3.0f, 5.0f};
2439     e.setInput(0, &padTensorValue, &padTensorValueType.operandType);
2440 
2441     WrapperOperandType paddingsType(WrapperType::TENSOR_INT32, {1, 2});
2442     const int paddings[1][2] = {{1, 1}};
2443     e.setInput(1, &paddings, &paddingsType.operandType);
2444 
2445     auto setOutput = [&e](uint32_t index, float* buffer, bool bigEnough, bool specified,
2446                           HalVersion version) {
2447         const uint32_t elts = bigEnough ? 4 : 3;
2448         std::fill(buffer, buffer + elts, -1.0f);
2449         using DimsType = std::vector<uint32_t>;
2450         WrapperOperandType outputType(
2451                 WrapperType::TENSOR_FLOAT32,
2452                 specified ? DimsType{elts}
2453                           : supportsOutputOfUnknownRank(version) ? DimsType{} : DimsType{0});
2454         e.setOutput(index, buffer, elts * sizeof(float), &outputType.operandType);
2455     };
2456     float opnd2ModelOutput[4], opnd4ModelOutput[4];
2457     setOutput(0, opnd2ModelOutput, opnd2ModelOutputBigEnough,
2458               mOpnd2ModelAndPartitionOutputSpecified, mPadDeviceVersion);
2459     setOutput(1, opnd4ModelOutput, opnd4ModelOutputBigEnough, mOpnd4ModelOutputSpecified,
2460               mAddDeviceVersion);
2461 
2462     const Result expectResult = opnd2ModelOutputBigEnough && opnd4ModelOutputBigEnough
2463                                         ? Result::NO_ERROR
2464                                         : Result::OUTPUT_INSUFFICIENT_SIZE;
2465     ASSERT_EQ(e.compute(), expectResult);
2466     if (expectResult == Result::NO_ERROR) {
2467         float expected[4] = {0.0f, padTensorValue[0], padTensorValue[1], 0.0f};
2468         ASSERT_TRUE(std::equal(std::begin(opnd2ModelOutput), std::end(opnd2ModelOutput),
2469                                std::begin(expected)));
2470         for (auto& elt : expected) {
2471             elt *= 2;
2472         }
2473         ASSERT_TRUE(std::equal(std::begin(opnd4ModelOutput), std::end(opnd4ModelOutput),
2474                                std::begin(expected)));
2475     }
2476 }
2477 
TEST_F(DynamicTemporariesTest,ModelOutputsSufficientSize)2478 TEST_F(DynamicTemporariesTest, ModelOutputsSufficientSize) {
2479     // The purpose of this test is to confirm that the partitioner and the
2480     // runtime can handle a model output of unspecified dimensions but
2481     // sufficient size that is written by one partition and read by another.
2482 
2483     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2484                                                     /*opnd3PartitionOutputSpecified=*/true,
2485                                                     /*opnd4ModelOutputSpecified=*/false));
2486     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2487     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2488     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2489 }
2490 
2491 // TODO(b/174851714): Fix the partitioner and re-enable this test.
TEST_F(DynamicTemporariesTest,DISABLED_ModelOutputsSufficientSize_V1_1)2492 TEST_F(DynamicTemporariesTest, DISABLED_ModelOutputsSufficientSize_V1_1) {
2493     // The purpose of this test is to confirm that the partitioner and the
2494     // runtime can handle a model output of unspecified dimensions but
2495     // sufficient size that is written by one partition and read by another.
2496     // Regression test for http://b/174851714.
2497 
2498     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2499                                                     /*opnd3PartitionOutputSpecified=*/true,
2500                                                     /*opnd4ModelOutputSpecified=*/false));
2501     ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_1,
2502                                                /*addDeviceVersion=*/HalVersion::V1_1));
2503     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2504     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2505     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2506 }
2507 
TEST_F(DynamicTemporariesTest,DynamicTemporariesUnspecifiedOutputs)2508 TEST_F(DynamicTemporariesTest, DynamicTemporariesUnspecifiedOutputs) {
2509     // The purpose of this test is to confirm that the partitioner can produce
2510     // dynamic temporaries and that the runtime can handle them properly.  Note
2511     // that all model outputs are of unspecified dimensions but sufficient size.
2512 
2513     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2514     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2515     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2516 }
2517 
TEST_F(DynamicTemporariesTest,DynamicTemporariesSpecifiedOutputs)2518 TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs) {
2519     // The purpose of this test is to confirm that the partitioner can produce
2520     // dynamic temporaries and that the runtime can handle them properly.  Note
2521     // that all model outputs are of specified dimensions.
2522 
2523     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true,
2524                                                     /*opnd3PartitionOutputSpecified=*/false,
2525                                                     /*opnd4ModelOutputSpecified=*/true));
2526     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2527     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2528     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2529 }
2530 
TEST_F(DynamicTemporariesTest,DynamicTemporariesSpecifiedOutputs_V1_2)2531 TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs_V1_2) {
2532     // The purpose of this test is to confirm that the partitioner can produce
2533     // dynamic temporaries and that the runtime can handle them properly.  Note
2534     // that all model outputs are of specified dimensions.
2535     // Regression test for http://b/174851714.
2536 
2537     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true,
2538                                                     /*opnd3PartitionOutputSpecified=*/false,
2539                                                     /*opnd4ModelOutputSpecified=*/true));
2540     ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_2,
2541                                                /*addDeviceVersion=*/HalVersion::V1_2));
2542     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2543     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2544     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2545 }
2546 
TEST_F(DynamicTemporariesTest,DynamicTemporariesSpecifiedOutputs_V1_1)2547 TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs_V1_1) {
2548     // The purpose of this test is to confirm that the partitioner cannot produce
2549     // dynamic temporaries for V1_1 but instead does whole-model CPU fallback.  Note
2550     // that all model outputs are of specified dimensions.
2551     // Regression test for http://b/174851714.
2552 
2553     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true,
2554                                                     /*opnd3PartitionOutputSpecified=*/false,
2555                                                     /*opnd4ModelOutputSpecified=*/true));
2556     ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_1,
2557                                                /*addDeviceVersion=*/HalVersion::V1_1));
2558     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2559     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan(false));
2560     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2561 }
2562 
TEST_F(DynamicTemporariesTest,ModelOutputsInsufficientSizeWithDynamicTemporary)2563 TEST_F(DynamicTemporariesTest, ModelOutputsInsufficientSizeWithDynamicTemporary) {
2564     // The purpose of this test is to confirm that the runtime can detect a
2565     // model output of insufficient size in the presence of a dynamic temporary.
2566 
2567     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2568     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2569     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, false));
2570 }
2571 
TEST_F(DynamicTemporariesTest,ModelOutputsInsufficientSizeWithoutDynamicTemporary)2572 TEST_F(DynamicTemporariesTest, ModelOutputsInsufficientSizeWithoutDynamicTemporary) {
2573     // The purpose of this test is to confirm that the runtime can detect a
2574     // model output of insufficient size in the absence of a dynamic temporary.
2575 
2576     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2577                                                     /*opnd3PartitionOutputSpecified=*/true,
2578                                                     /*opnd4ModelOutputSpecified=*/false));
2579     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2580     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2581     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, false));
2582 }
2583 
TEST_F(DynamicTemporariesTest,ModelOutput2InsufficientSizeWithoutDynamicTemporary)2584 TEST_F(DynamicTemporariesTest, ModelOutput2InsufficientSizeWithoutDynamicTemporary) {
2585     // The purpose of this test is to confirm that the runtime can detect a
2586     // model output of insufficient size in the absence of a dynamic temporary.
2587 
2588     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2589                                                     /*opnd3PartitionOutputSpecified=*/true,
2590                                                     /*opnd4ModelOutputSpecified=*/false));
2591     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2592     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2593     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, true));
2594 }
2595 
TEST_F(DynamicTemporariesTest,ModelOutput4InsufficientSizeWithoutDynamicTemporary)2596 TEST_F(DynamicTemporariesTest, ModelOutput4InsufficientSizeWithoutDynamicTemporary) {
2597     // The purpose of this test is to confirm that the runtime can detect a
2598     // model output of insufficient size in the absence of a dynamic temporary.
2599 
2600     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2601                                                     /*opnd3PartitionOutputSpecified=*/true,
2602                                                     /*opnd4ModelOutputSpecified=*/false));
2603     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2604     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2605     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, false));
2606 }
2607 
2608 // Test token rehashing during the compilation step.
2609 class CacheTest : public PartitioningTest {
2610    protected:
SetUp()2611     virtual void SetUp() override {
2612         PartitioningTest::SetUp();
2613         char cacheDirTemp[] = NN_TMP_DIR "/TestCompilationCachingXXXXXX";
2614         char* cacheDir = mkdtemp(cacheDirTemp);
2615         ASSERT_NE(cacheDir, nullptr);
2616         mCacheDir = cacheDir;
2617     }
2618 
TearDown()2619     virtual void TearDown() override {
2620         if (!::testing::Test::HasFailure()) {
2621             std::filesystem::remove_all(mCacheDir);
2622         }
2623         PartitioningTest::TearDown();
2624     }
2625 
expectUniqueTokens(const std::vector<std::vector<uint8_t>> & tokens)2626     void expectUniqueTokens(const std::vector<std::vector<uint8_t>>& tokens) {
2627         for (uint32_t i = 0; i < tokens.size(); i++) {
2628             SCOPED_TRACE(i);
2629             for (uint32_t j = i + 1; j < tokens.size(); j++) {
2630                 SCOPED_TRACE(j);
2631                 EXPECT_NE(tokens[i], tokens[j]);
2632             }
2633         }
2634     }
2635 
2636     // Launch a single run of the partitioner against the provided model and device list with
2637     // cache token privided as tokenIn. Find the partition for the device with deviceName.
2638     // Record the transformed token into tokenOut. Two or more partitions may be on the same device.
2639     // "devicePartitionIndex" specifies the index of the ExecutionStep corresponding to the
2640     // partition of interest, within the sequence of ExecutionSteps on the target device.
2641     // If tokenIn is empty, no caching information will be provided to the partitioner.
getTransformedCacheTokenSingle(const PartitioningModel & model,const std::vector<std::shared_ptr<Device>> & devices,const char * deviceName,const std::vector<uint8_t> & tokenIn,ExecutePreference preference,ExecutePriority priority,uint32_t devicePartitionIndex,std::vector<uint8_t> * tokenOut)2642     void getTransformedCacheTokenSingle(const PartitioningModel& model,
2643                                         const std::vector<std::shared_ptr<Device>>& devices,
2644                                         const char* deviceName, const std::vector<uint8_t>& tokenIn,
2645                                         ExecutePreference preference, ExecutePriority priority,
2646                                         uint32_t devicePartitionIndex,
2647                                         std::vector<uint8_t>* tokenOut) {
2648         // Compile the model and get the execution plan.
2649         PartitioningCompilation compilation(&model, devices);
2650         if (!tokenIn.empty()) {
2651             compilation.setCaching(mCacheDir.c_str(), tokenIn);
2652         }
2653         compilation.setPreference(preference);
2654         compilation.setPriority(priority);
2655         ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2656         const ExecutionPlan& plan = compilation.getExecutionPlan();
2657 
2658         // Find the cache info for the device.
2659         const uint8_t* token = nullptr;
2660         if (plan.forTest_getKind() == ExecutionPlan::Kind::SIMPLE) {
2661             ASSERT_EQ(devicePartitionIndex, 0u);
2662             ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), deviceName);
2663             token = plan.forTest_simpleGetCacheToken();
2664         } else if (plan.forTest_getKind() == ExecutionPlan::Kind::COMPOUND) {
2665             const auto& steps = plan.forTest_compoundGetSteps();
2666             uint32_t executionStepCount = 0;
2667             for (const auto& step : steps) {
2668                 if (step->isExecution() &&
2669                     step->executionStep()->getDevice()->getName() == deviceName) {
2670                     if (devicePartitionIndex == executionStepCount) {
2671                         token = step->executionStep()->forTest_getCacheToken();
2672                         break;
2673                     }
2674                     executionStepCount++;
2675                 }
2676             }
2677         } else {
2678             FAIL();
2679         }
2680 
2681         // Retrieve the transformed token from the cache info.
2682         if (token == nullptr) {
2683             tokenOut->clear();
2684         } else {
2685             tokenOut->resize(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN);
2686             std::copy(token, token + ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, tokenOut->begin());
2687         }
2688     }
2689 
2690     // A wrapper of getTransformedCacheTokenSingle, which runs getTransformedCacheTokenSingle
2691     // multiple times and checks if the transformation provides consistent result.
2692     // Two or more partitions may be on the same device. "devicePartitionIndex" specifies the index
2693     // of the ExecutionStep corresponding to the partition of interest, within the sequence of
2694     // ExecutionSteps on the target device.
getTransformedCacheToken(const PartitioningModel & model,const std::vector<std::shared_ptr<Device>> & devices,const char * deviceName,const std::vector<uint8_t> & tokenIn,ExecutePreference preference,ExecutePriority priority,std::vector<uint8_t> * tokenOut,uint32_t devicePartitionIndex=0)2695     void getTransformedCacheToken(const PartitioningModel& model,
2696                                   const std::vector<std::shared_ptr<Device>>& devices,
2697                                   const char* deviceName, const std::vector<uint8_t>& tokenIn,
2698                                   ExecutePreference preference, ExecutePriority priority,
2699                                   std::vector<uint8_t>* tokenOut,
2700                                   uint32_t devicePartitionIndex = 0) {
2701         getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference, priority,
2702                                        devicePartitionIndex, tokenOut);
2703 
2704         // Test if the runtime maps to the same cache token every time for the same compilation
2705         // setup.
2706         for (uint32_t i = 0; i < 10; i++) {
2707             std::vector<uint8_t> token;
2708             SCOPED_TRACE(i);
2709             getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference,
2710                                            priority, devicePartitionIndex, &token);
2711             EXPECT_EQ(*tokenOut, token);
2712         }
2713     }
2714 
createModelForCachingTests(PartitioningModel * model)2715     void createModelForCachingTests(PartitioningModel* model) {
2716         uint32_t opnd0 = model->addFloatOperand();
2717         uint32_t opnd1 = model->addFloatOperand();
2718         uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1);
2719         uint32_t opnd3 = model->addFloatOperand();
2720         uint32_t opnd4 = model->addOperation2To1V1_0(1, opnd2, opnd3);
2721         model->identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
2722         model->finish();
2723         ASSERT_TRUE(model->isValid());
2724     }
2725 
2726     // The first model returned in "models" is the main model.
createControlFlowModelForCachingTests(std::vector<std::unique_ptr<PartitioningModel>> * models)2727     void createControlFlowModelForCachingTests(
2728             std::vector<std::unique_ptr<PartitioningModel>>* models) {
2729         CHECK(models != nullptr);
2730 
2731         auto trueModel = std::make_unique<PartitioningModel>();
2732         {
2733             const uint32_t opnd0 = trueModel->addFloatOperand();
2734             const uint32_t opnd1 = trueModel->addFloatOperand();
2735             const uint32_t opnd2 = trueModel->addOperation2To1V1_0(0, opnd0, opnd1);
2736             trueModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2737             trueModel->finish();
2738             ASSERT_TRUE(trueModel->isValid());
2739         }
2740 
2741         auto falseModel = std::make_unique<PartitioningModel>();
2742         {
2743             const uint32_t opnd0 = falseModel->addFloatOperand();
2744             const uint32_t opnd1 = falseModel->addFloatOperand();
2745             const uint32_t opnd2 = falseModel->addOperation2To1V1_0(0, opnd0, opnd1);
2746             falseModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2747             falseModel->finish();
2748             ASSERT_TRUE(falseModel->isValid());
2749         }
2750 
2751         auto mainModel = std::make_unique<PartitioningModel>();
2752         {
2753             const uint32_t opnd0 = mainModel->addBooleanOperand();
2754             const uint32_t opnd1 = mainModel->addFloatOperand();
2755             const uint32_t opnd2 = mainModel->addFloatOperand();
2756             const uint32_t opnd3 = mainModel->addFloatOperand();
2757             mainModel->addIfOperation(opnd0, *trueModel, *falseModel, {opnd1, opnd2}, {opnd3});
2758             mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3});
2759             mainModel->finish();
2760             ASSERT_TRUE(mainModel->isValid());
2761         }
2762 
2763         models->clear();
2764         models->push_back(std::move(mainModel));
2765         models->push_back(std::move(trueModel));
2766         models->push_back(std::move(falseModel));
2767     }
2768 
2769     std::string mCacheDir;
2770 };
2771 
2772 // Test the case when no token is provided by the application and the execution plan has a
2773 // simple body.
TEST_F(CacheTest,CacheTokenNoneSimpleBody)2774 TEST_F(CacheTest, CacheTokenNoneSimpleBody) {
2775     PartitioningModel model;
2776     createModelForCachingTests(&model);
2777 
2778     // deviceA can execute the whole model.
2779     const auto deviceA = makeDevices({
2780             {"deviceA", 0.5, ~0U},
2781     });
2782 
2783     std::vector<uint8_t> tokenIn, tokenOut;
2784     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2785                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2786                              &tokenOut);
2787     EXPECT_TRUE(tokenOut.empty());
2788 }
2789 
2790 // Test if the runtime maps to different cache tokens for devices with different names in
2791 // execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentDeviceNamesSimpleBody)2792 TEST_F(CacheTest, CacheTokenDifferentDeviceNamesSimpleBody) {
2793     PartitioningModel model;
2794     createModelForCachingTests(&model);
2795 
2796     // Two devices that can both execute the whole model.
2797     const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2798     const auto deviceB = makeDevices({{"deviceB", 0.5, ~0U}});
2799 
2800     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2801     std::vector<uint8_t> deviceAToken, deviceBToken;
2802     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2803                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2804                              &deviceAToken);
2805     getTransformedCacheToken(model, deviceB, "deviceB", tokenIn,
2806                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2807                              &deviceBToken);
2808     expectUniqueTokens({deviceAToken, deviceBToken});
2809 }
2810 
2811 // Test if the runtime maps to different cache tokens for devices with different version strings in
2812 // execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentDeviceVersionStringsSimpleBody)2813 TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsSimpleBody) {
2814     PartitioningModel model;
2815     createModelForCachingTests(&model);
2816 
2817     // Two devices that can both execute the whole model.
2818     const auto deviceA_1_0 = makeDevices({{"deviceA", "1.0", 0.5, ~0U}});
2819     const auto deviceA_1_1 = makeDevices({{"deviceA", "1.1", 0.5, ~0U}});
2820 
2821     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2822     std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token;
2823     getTransformedCacheToken(model, deviceA_1_0, "deviceA", tokenIn,
2824                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2825                              &deviceA_1_0_Token);
2826     getTransformedCacheToken(model, deviceA_1_1, "deviceA", tokenIn,
2827                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2828                              &deviceA_1_1_Token);
2829     expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token});
2830 }
2831 
2832 // Test if the runtime maps to different cache tokens for compilations with different preferences
2833 // in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentPreferencesSimpleBody)2834 TEST_F(CacheTest, CacheTokenDifferentPreferencesSimpleBody) {
2835     PartitioningModel model;
2836     createModelForCachingTests(&model);
2837 
2838     // One device that can execute the whole model.
2839     const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2840 
2841     std::vector<uint8_t> fastToken, powerToken, sustainedToken;
2842     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2843     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2844                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2845                              &fastToken);
2846     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2847                              ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT,
2848                              &powerToken);
2849     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2850                              ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT,
2851                              &sustainedToken);
2852     expectUniqueTokens({fastToken, powerToken, sustainedToken});
2853 }
2854 
2855 // TODO (b/207721221): add test for AIDL compilation hints.
2856 // Test if the runtime maps to different cache tokens for compilations with different priorities
2857 // in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentPrioritiesSimpleBody)2858 TEST_F(CacheTest, CacheTokenDifferentPrioritiesSimpleBody) {
2859     PartitioningModel model;
2860     createModelForCachingTests(&model);
2861 
2862     // One device that can execute the whole model.
2863     const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2864 
2865     std::vector<uint8_t> lowToken, mediumToken, highToken;
2866     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2867     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2868                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW,
2869                              &lowToken);
2870     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2871                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM,
2872                              &mediumToken);
2873     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2874                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH,
2875                              &highToken);
2876     expectUniqueTokens({lowToken, mediumToken, highToken});
2877 }
2878 
2879 // Test if the runtime maps to different cache tokens for compilations with different tokens
2880 // provided by application in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentTokensSimpleBody)2881 TEST_F(CacheTest, CacheTokenDifferentTokensSimpleBody) {
2882     PartitioningModel model;
2883     createModelForCachingTests(&model);
2884 
2885     // One device that can execute the whole model.
2886     const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2887 
2888     std::vector<uint8_t> tokenOut1, tokenOut2;
2889     std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2890     std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1);
2891     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn1,
2892                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2893                              &tokenOut1);
2894     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn2,
2895                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2896                              &tokenOut2);
2897     expectUniqueTokens({tokenOut1, tokenOut2});
2898 }
2899 
2900 // Test the case when no token is provided by the application and the execution plan has a
2901 // compound body.
TEST_F(CacheTest,CacheTokenNoneCompoundBody)2902 TEST_F(CacheTest, CacheTokenNoneCompoundBody) {
2903     PartitioningModel model;
2904     createModelForCachingTests(&model);
2905 
2906     // DeviceA executes the first operation only.
2907     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2908 
2909     std::vector<uint8_t> tokenIn, tokenOut;
2910     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2911                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2912                              &tokenOut);
2913     EXPECT_TRUE(tokenOut.empty());
2914     getTransformedCacheToken(model, devices, "deviceB", tokenIn,
2915                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2916                              &tokenOut);
2917     EXPECT_TRUE(tokenOut.empty());
2918 }
2919 
2920 // Test if the runtime maps to different cache tokens for devices with different names in
2921 // execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentDeviceNamesCompoundBody)2922 TEST_F(CacheTest, CacheTokenDifferentDeviceNamesCompoundBody) {
2923     PartitioningModel model;
2924     createModelForCachingTests(&model);
2925 
2926     // DeviceA executes the first operation only.
2927     const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}});
2928     // DeviceB executes the first operation only.
2929     const auto devices2 = makeDevices({{"deviceB", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}});
2930 
2931     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2932     std::vector<uint8_t> deviceAToken, deviceBToken;
2933     getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2934                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2935                              &deviceAToken);
2936     getTransformedCacheToken(model, devices2, "deviceB", tokenIn,
2937                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2938                              &deviceBToken);
2939     expectUniqueTokens({deviceAToken, deviceBToken});
2940 }
2941 
2942 // Test if the runtime maps to different cache tokens for devices with different names in
2943 // execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentDeviceVersionStringsCompoundBody)2944 TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsCompoundBody) {
2945     PartitioningModel model;
2946     createModelForCachingTests(&model);
2947 
2948     // DeviceA executes the first operation only.
2949     const auto devices1 = makeDevices({{"deviceA", "1.0", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2950     // DeviceB executes the first operation only.
2951     const auto devices2 = makeDevices({{"deviceA", "1.1", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2952 
2953     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2954     std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token;
2955     getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2956                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2957                              &deviceA_1_0_Token);
2958     getTransformedCacheToken(model, devices2, "deviceA", tokenIn,
2959                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2960                              &deviceA_1_1_Token);
2961     expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token});
2962 }
2963 
2964 // Test if the runtime maps to different cache tokens for compilations with different preferences
2965 // in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPreferencesCompoundBody)2966 TEST_F(CacheTest, CacheTokenDifferentPreferencesCompoundBody) {
2967     PartitioningModel model;
2968     createModelForCachingTests(&model);
2969 
2970     // DeviceA executes the first operation only.
2971     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2972 
2973     std::vector<uint8_t> fastToken, powerToken, sustainedToken;
2974     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2975     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2976                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2977                              &fastToken);
2978     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2979                              ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT,
2980                              &powerToken);
2981     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2982                              ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT,
2983                              &sustainedToken);
2984     expectUniqueTokens({fastToken, powerToken, sustainedToken});
2985 }
2986 
2987 // Test if the runtime maps to different cache tokens for compilations with different priorities
2988 // in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPrioritiesCompoundBody)2989 TEST_F(CacheTest, CacheTokenDifferentPrioritiesCompoundBody) {
2990     PartitioningModel model;
2991     createModelForCachingTests(&model);
2992 
2993     // DeviceA executes the first operation only.
2994     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2995 
2996     std::vector<uint8_t> lowToken, mediumToken, highToken;
2997     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2998     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2999                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW,
3000                              &lowToken);
3001     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
3002                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM,
3003                              &mediumToken);
3004     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
3005                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH,
3006                              &highToken);
3007     expectUniqueTokens({lowToken, mediumToken, highToken});
3008 }
3009 
3010 // Test if the runtime maps to different cache tokens for compilations with different tokens
3011 // provided by application in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentTokensCompoundBody)3012 TEST_F(CacheTest, CacheTokenDifferentTokensCompoundBody) {
3013     PartitioningModel model;
3014     createModelForCachingTests(&model);
3015 
3016     // DeviceA executes the first operation only.
3017     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
3018 
3019     std::vector<uint8_t> tokenOut1, tokenOut2;
3020     std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
3021     std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1);
3022     getTransformedCacheToken(model, devices, "deviceA", tokenIn1,
3023                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3024                              &tokenOut1);
3025     getTransformedCacheToken(model, devices, "deviceA", tokenIn2,
3026                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3027                              &tokenOut2);
3028     expectUniqueTokens({tokenOut1, tokenOut2});
3029 }
3030 
3031 // Test if the runtime maps to different cache tokens for compilations with different partitioning
3032 // outcome in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPartitionsCompoundBody)3033 TEST_F(CacheTest, CacheTokenDifferentPartitionsCompoundBody) {
3034     PartitioningModel model;
3035     createModelForCachingTests(&model);
3036 
3037     // DeviceA executes the whole model.
3038     const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 0U}});
3039     // DeviceA executes the first operation only.
3040     const auto devices2 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
3041     // DeviceA executes the second operation only.
3042     const auto devices3 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 0}});
3043 
3044     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
3045     std::vector<uint8_t> tokenOut1, tokenOut2, tokenOut3;
3046     getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
3047                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3048                              &tokenOut1);
3049     getTransformedCacheToken(model, devices2, "deviceA", tokenIn,
3050                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3051                              &tokenOut2);
3052     getTransformedCacheToken(model, devices3, "deviceA", tokenIn,
3053                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3054                              &tokenOut3);
3055     expectUniqueTokens({tokenOut1, tokenOut2, tokenOut3});
3056 }
3057 
3058 // Test if the runtime maps different referenced models to different cache tokens.
TEST_F(CacheTest,CacheTokenDifferentReferenceModelPartitions)3059 TEST_F(CacheTest, CacheTokenDifferentReferenceModelPartitions) {
3060     std::vector<std::unique_ptr<PartitioningModel>> models;
3061     createControlFlowModelForCachingTests(&models);
3062     const auto& main = *models[0];
3063 
3064     // DeviceA executes the two referenced models but does not support IF.
3065     // There will be two partitions on deviceA.
3066     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}});
3067 
3068     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
3069     std::vector<uint8_t> tokenOut1, tokenOut2;
3070     getTransformedCacheToken(main, devices, "deviceA", tokenIn,
3071                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3072                              &tokenOut1, /*devicePartitionIndex=*/0);
3073     getTransformedCacheToken(main, devices, "deviceA", tokenIn,
3074                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3075                              &tokenOut2, /*devicePartitionIndex=*/1);
3076     expectUniqueTokens({tokenOut1, tokenOut2});
3077 }
3078 
3079 // Very basic tests of some of the PerformanceInfo functionality.
3080 // Placed in this file because partitioning is the consumer of this functionality.
3081 class PerfTest : public ::testing::Test {};
3082 
TEST_F(PerfTest,Lookup)3083 TEST_F(PerfTest, Lookup) {
3084     // Derive an arbitrary (but reproducible) performance value from an OperandType.
3085     // We'll use this to ensure that we can save and then recover a type's performance.
3086     auto typePerf = [](V1_3::OperandType type) { return float(static_cast<uint32_t>(type)); };
3087 
3088     V1_3::Capabilities capabilities = ::android::nn::makeCapabilities(-1.0f);
3089 
3090     for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN);
3091          type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
3092         V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3093         update(&capabilities, operandType, typePerf(operandType));
3094     }
3095     for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN);
3096          type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) {
3097         V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3098         update(&capabilities, operandType, typePerf(operandType));
3099     }
3100 
3101     // Make sure lookup retrieves the values stored by update
3102 
3103     for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN);
3104          type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
3105         V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3106         if (operandType == V1_3::OperandType::SUBGRAPH) {
3107             // SUBGRAPH capabilities are handled differently.
3108             continue;
3109         }
3110         SCOPED_TRACE(toString(operandType));
3111         EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType));
3112     }
3113     for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN);
3114          type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) {
3115         V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3116         SCOPED_TRACE(toString(operandType));
3117         EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType));
3118     }
3119 
3120     // Check the behavior of a missing type
3121 
3122     V1_3::OperandType operandType = static_cast<V1_3::OperandType>(
3123             static_cast<uint32_t>(V1_3::OperandTypeRange::BASE_MAX) + 1);
3124     EXPECT_EQ(lookupExecTime(capabilities, operandType), FLT_MAX);
3125 }
3126 
3127 class ControlFlowPartitioningTest : public PartitioningTest {
3128    protected:
3129     // opnd0 --> +-----+
3130     //           | op0 | --> opnd2
3131     // opnd1 --> +-----+
createBranchOrBodyModel(Dimensioned dimensioned)3132     std::unique_ptr<PartitioningModel> createBranchOrBodyModel(Dimensioned dimensioned) {
3133         auto model = std::make_unique<PartitioningModel>();
3134         const uint32_t opnd0 = model->addFloatOperand(dimensioned);
3135         const uint32_t opnd1 = model->addFloatOperand(dimensioned);
3136         const uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1, dimensioned);
3137         model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
3138         model->finish();
3139         EXPECT_TRUE(model->isValid());
3140         return model;
3141     }
3142 
3143     // opnd0 --> +-------+
3144     //           | EQUAL | --> opnd2
3145     // opnd1 --> +-------+
createCondModel(Dimensioned dimensioned)3146     std::unique_ptr<PartitioningModel> createCondModel(Dimensioned dimensioned) {
3147         auto model = std::make_unique<PartitioningModel>();
3148         const uint32_t opnd0 = model->addFloatOperand(dimensioned);
3149         const uint32_t opnd1 = model->addFloatOperand(dimensioned);
3150         const uint32_t opnd2 = model->addExplicitOperationXTo1(
3151                 ANEURALNETWORKS_EQUAL, {opnd0, opnd1}, WrapperType::TENSOR_BOOL8);
3152         model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
3153         model->finish();
3154         EXPECT_TRUE(model->isValid());
3155         return model;
3156     }
3157 
3158     // opnd0 --> +----+
3159     // opnd1 --> | IF | --> opnd3
3160     // opnd2 --> +----+
createIfModel(Dimensioned dimensionedMain=Dimensioned::YES,Dimensioned dimensionedThen=Dimensioned::YES,Dimensioned dimensionedElse=Dimensioned::YES)3161     std::vector<std::unique_ptr<PartitioningModel>> createIfModel(
3162             Dimensioned dimensionedMain = Dimensioned::YES,
3163             Dimensioned dimensionedThen = Dimensioned::YES,
3164             Dimensioned dimensionedElse = Dimensioned::YES) {
3165         auto thenModel = createBranchOrBodyModel(dimensionedThen);
3166         auto elseModel = createBranchOrBodyModel(dimensionedElse);
3167 
3168         auto mainModel = std::make_unique<PartitioningModel>();
3169         const uint32_t opnd0 = mainModel->addBooleanOperand();
3170         const uint32_t opnd1 = mainModel->addFloatOperand(dimensionedMain);
3171         const uint32_t opnd2 = mainModel->addFloatOperand(dimensionedMain);
3172         const uint32_t opnd3 = mainModel->addFloatOperand(dimensionedMain);
3173         mainModel->addIfOperation(opnd0, *thenModel, *elseModel, {opnd1, opnd2}, {opnd3});
3174         mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3});
3175         mainModel->finish();
3176         EXPECT_TRUE(mainModel->isValid());
3177 
3178         std::vector<std::unique_ptr<PartitioningModel>> models;
3179         models.push_back(std::move(mainModel));
3180         models.push_back(std::move(thenModel));
3181         models.push_back(std::move(elseModel));
3182         return std::move(models);
3183     }
3184 
3185     // opnd0 --> +-------+
3186     //           | WHILE | --> opnd2
3187     // opnd1 --> +-------+
createWhileModel(Dimensioned dimensionedMain=Dimensioned::YES,Dimensioned dimensionedCond=Dimensioned::YES,Dimensioned dimensionedBody=Dimensioned::YES)3188     std::vector<std::unique_ptr<PartitioningModel>> createWhileModel(
3189             Dimensioned dimensionedMain = Dimensioned::YES,
3190             Dimensioned dimensionedCond = Dimensioned::YES,
3191             Dimensioned dimensionedBody = Dimensioned::YES) {
3192         auto condModel = createCondModel(dimensionedCond);
3193         auto bodyModel = createBranchOrBodyModel(dimensionedBody);
3194 
3195         auto mainModel = std::make_unique<PartitioningModel>();
3196         const uint32_t opnd0 = mainModel->addFloatOperand(dimensionedMain);
3197         const uint32_t opnd1 = mainModel->addFloatOperand(dimensionedMain);
3198         const uint32_t opnd2 = mainModel->addFloatOperand(dimensionedMain);
3199         mainModel->addWhileOperation(*condModel, *bodyModel, {opnd0, opnd1}, {opnd2});
3200         mainModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
3201         mainModel->finish();
3202         EXPECT_TRUE(mainModel->isValid());
3203 
3204         std::vector<std::unique_ptr<PartitioningModel>> models;
3205         models.push_back(std::move(mainModel));
3206         models.push_back(std::move(condModel));
3207         models.push_back(std::move(bodyModel));
3208         return std::move(models);
3209     }
3210 
3211     void testIfUnknownSize(Dimensioned dimensionedMain, Dimensioned dimensionedThen,
3212                            Dimensioned dimensionedElse);
3213     void testWhileUnknownSize(Dimensioned dimensionedMain, Dimensioned dimensionedThen,
3214                               Dimensioned dimensionedElse);
3215 };
3216 
TEST_F(ControlFlowPartitioningTest,IF_Interpreted)3217 TEST_F(ControlFlowPartitioningTest, IF_Interpreted) {
3218     const auto models = createIfModel();
3219 
3220     // The device supports the referenced models but does not support IF.
3221     const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}});
3222 
3223     ExecutionPlan plan;
3224     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3225                                           ExecutePriority::DEFAULT, {}, &plan),
3226               ANEURALNETWORKS_NO_ERROR);
3227     checkExecutionPlanSteps(plan, {kIfStep, "V1_0", kGotoStep, "V1_0"});
3228 }
3229 
TEST_F(ControlFlowPartitioningTest,WHILE_Interpreted)3230 TEST_F(ControlFlowPartitioningTest, WHILE_Interpreted) {
3231     const auto models = createWhileModel();
3232 
3233     // The device supports the body model but does not support WHILE or the
3234     // condition model (because of EQUAL).
3235     const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}});
3236 
3237     ExecutionPlan plan;
3238     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3239                                           ExecutePriority::DEFAULT, {}, &plan),
3240               ANEURALNETWORKS_NO_ERROR);
3241     const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName();
3242     checkExecutionPlanSteps(plan, {kWhileStep, cpuDeviceName, kGotoStep, "V1_0", kGotoStep});
3243 }
3244 
TEST_F(ControlFlowPartitioningTest,IF_SimplePlan)3245 TEST_F(ControlFlowPartitioningTest, IF_SimplePlan) {
3246     const auto models = createIfModel();
3247 
3248     // The device supports all operations.
3249     const auto devices = makeDevices({{"ALL",
3250                                        0.9,
3251                                        ~0U,
3252                                        PartitioningDriver::OEMNo,
3253                                        HalVersion::LATEST,
3254                                        {V1_3::OperationType::IF}}});
3255 
3256     ExecutionPlan plan;
3257     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3258                                           ExecutePriority::DEFAULT, {}, &plan),
3259               ANEURALNETWORKS_NO_ERROR);
3260     checkExecutionPlanSteps(plan, {"ALL"});
3261 }
3262 
TEST_F(ControlFlowPartitioningTest,WHILE_SimplePlan)3263 TEST_F(ControlFlowPartitioningTest, WHILE_SimplePlan) {
3264     const auto models = createWhileModel();
3265 
3266     // The device supports all operations.
3267     const auto devices = makeDevices({{"ALL",
3268                                        0.9,
3269                                        ~0U,
3270                                        PartitioningDriver::OEMNo,
3271                                        HalVersion::LATEST,
3272                                        {V1_3::OperationType::WHILE, V1_3::OperationType::EQUAL}}});
3273 
3274     ExecutionPlan plan;
3275     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3276                                           ExecutePriority::DEFAULT, {}, &plan),
3277               ANEURALNETWORKS_NO_ERROR);
3278     checkExecutionPlanSteps(plan, {"ALL"});
3279 }
3280 
testIfUnknownSize(Dimensioned dimensionedMain,Dimensioned dimensionedThen,Dimensioned dimensionedElse)3281 void ControlFlowPartitioningTest::testIfUnknownSize(Dimensioned dimensionedMain,
3282                                                     Dimensioned dimensionedThen,
3283                                                     Dimensioned dimensionedElse) {
3284     if (dimensionedMain != Dimensioned::NO && dimensionedThen != Dimensioned::NO &&
3285         dimensionedElse != Dimensioned::NO) {
3286         // No unknown size.
3287         return;
3288     }
3289 
3290     const auto models = createIfModel(dimensionedMain, dimensionedThen, dimensionedElse);
3291 
3292     // The device supports all operations but the partitioner ignores its IF
3293     // support due to http://b/159076604#comment5.
3294     const auto devices = makeDevices({{"ALL",
3295                                        0.9,
3296                                        ~0U,
3297                                        PartitioningDriver::OEMNo,
3298                                        HalVersion::LATEST,
3299                                        {V1_3::OperationType::IF}}});
3300 
3301     ExecutionPlan plan;
3302     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3303                                           ExecutePriority::DEFAULT, {}, &plan),
3304               ANEURALNETWORKS_NO_ERROR);
3305     // The control flow interpreter does not support unknown size (b/132458982).
3306     checkExecutionPlanSteps(plan, {DeviceManager::getCpuDevice()->getName()});
3307 }
3308 
TEST_F(ControlFlowPartitioningTest,IF_UnknownSize)3309 TEST_F(ControlFlowPartitioningTest, IF_UnknownSize) {
3310     const std::vector<Dimensioned> configurations = {Dimensioned::NO, Dimensioned::YES};
3311     for (Dimensioned dimensionedMain : configurations) {
3312         SCOPED_TRACE(testing::Message() << "dimensionedMain: " << toString(dimensionedMain));
3313         for (Dimensioned dimensionedThen : configurations) {
3314             SCOPED_TRACE(testing::Message() << "dimensionedThen: " << toString(dimensionedThen));
3315             for (Dimensioned dimensionedElse : configurations) {
3316                 SCOPED_TRACE(testing::Message()
3317                              << "dimensionedElse: " << toString(dimensionedElse));
3318                 testIfUnknownSize(dimensionedMain, dimensionedThen, dimensionedElse);
3319             }
3320         }
3321     }
3322 }
3323 
testWhileUnknownSize(Dimensioned dimensionedMain,Dimensioned dimensionedCond,Dimensioned dimensionedBody)3324 void ControlFlowPartitioningTest::testWhileUnknownSize(Dimensioned dimensionedMain,
3325                                                        Dimensioned dimensionedCond,
3326                                                        Dimensioned dimensionedBody) {
3327     if (dimensionedMain != Dimensioned::NO && dimensionedCond != Dimensioned::NO &&
3328         dimensionedBody != Dimensioned::NO) {
3329         // No unknown size.
3330         return;
3331     }
3332 
3333     const auto models = createWhileModel(dimensionedMain, dimensionedCond, dimensionedBody);
3334 
3335     // The device supports all operations but the partitioner ignores its WHILE
3336     // support due to http://b/159076604#comment5.
3337     const auto devices = makeDevices({{"ALL",
3338                                        0.9,
3339                                        ~0U,
3340                                        PartitioningDriver::OEMNo,
3341                                        HalVersion::LATEST,
3342                                        {V1_3::OperationType::WHILE, V1_3::OperationType::EQUAL}}});
3343 
3344     ExecutionPlan plan;
3345     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3346                                           ExecutePriority::DEFAULT, {}, &plan),
3347               ANEURALNETWORKS_NO_ERROR);
3348     // The control flow interpreter does not support unknown size (b/132458982).
3349     checkExecutionPlanSteps(plan, {DeviceManager::getCpuDevice()->getName()});
3350 }
3351 
TEST_F(ControlFlowPartitioningTest,WHILE_UnknownSize)3352 TEST_F(ControlFlowPartitioningTest, WHILE_UnknownSize) {
3353     const std::vector<Dimensioned> configurations = {Dimensioned::NO, Dimensioned::YES};
3354     for (Dimensioned dimensionedMain : configurations) {
3355         SCOPED_TRACE(testing::Message() << "dimensionedMain: " << toString(dimensionedMain));
3356         for (Dimensioned dimensionedCond : configurations) {
3357             SCOPED_TRACE(testing::Message() << "dimensionedCond: " << toString(dimensionedCond));
3358             for (Dimensioned dimensionedBody : configurations) {
3359                 SCOPED_TRACE(testing::Message()
3360                              << "dimensionedBody: " << toString(dimensionedBody));
3361                 testWhileUnknownSize(dimensionedMain, dimensionedCond, dimensionedBody);
3362             }
3363         }
3364     }
3365 }
3366 
3367 // Test the memory step role analysis of the partitioning implementation.
3368 class MemoryStepRoleTest : public PartitioningTest {
3369    protected:
3370     // A tuple of {device_name, input/output}
3371     using TestStepRole = std::tuple<std::string, IOType>;
3372 
SetUp()3373     void SetUp() override {
3374         PartitioningTest::SetUp();
3375         mModel = std::make_unique<PartitioningModel>();
3376     }
3377 
toString(SourceOperandIndex index)3378     static std::string toString(SourceOperandIndex index) {
3379         return "{" + std::to_string(index.first) + ", " + std::to_string(index.second) + "}";
3380     }
3381 
toString(const std::set<TestStepRole> & roles)3382     static std::string toString(const std::set<TestStepRole>& roles) {
3383         std::stringstream ss;
3384         ss << "[ ";
3385         for (const auto& [deviceName, type] : roles) {
3386             ss << "{" << deviceName << ", " << (type == IOType::INPUT ? "INPUT" : "OUTPUT") << "} ";
3387         }
3388         ss << "]";
3389         return ss.str();
3390     }
3391 
finishAndPartitionModelForDevices(const std::vector<std::shared_ptr<Device>> & devices)3392     void finishAndPartitionModelForDevices(const std::vector<std::shared_ptr<Device>>& devices) {
3393         mModel->finish();
3394         ASSERT_TRUE(mModel->isValid());
3395         ASSERT_EQ(mModel->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3396                                            ExecutePriority::DEFAULT, {}, &mPlan),
3397                   ANEURALNETWORKS_NO_ERROR);
3398     }
3399 
checkStepRolesOfInput(uint32_t index,const std::set<TestStepRole> & expected) const3400     void checkStepRolesOfInput(uint32_t index, const std::set<TestStepRole>& expected) const {
3401         SCOPED_TRACE("Input: " + std::to_string(index));
3402         std::set<TestStepRole> actual;
3403         mPlan.forEachStepRoleOfInput(
3404                 index, [&actual](const auto* preparedModel, IOType type, uint32_t) {
3405                     actual.emplace(preparedModel->getDevice()->getName(), type);
3406                 });
3407         EXPECT_TRUE(expected == actual)
3408                 << "expected: " << toString(expected) << ", actual: " << toString(actual);
3409     }
3410 
checkStepRolesOfOutput(uint32_t index,const std::set<TestStepRole> & expected) const3411     void checkStepRolesOfOutput(uint32_t index, const std::set<TestStepRole>& expected) const {
3412         SCOPED_TRACE("Output: " + std::to_string(index));
3413         std::set<TestStepRole> actual;
3414         mPlan.forEachStepRoleOfOutput(
3415                 index, [&actual](const auto* preparedModel, IOType type, uint32_t) {
3416                     actual.emplace(preparedModel->getDevice()->getName(), type);
3417                 });
3418         EXPECT_TRUE(expected == actual)
3419                 << "expected: " << toString(expected) << ", actual: " << toString(actual);
3420     }
3421 
checkStepRolesOfSourceOperand(SourceOperandIndex index,const std::set<TestStepRole> & expected) const3422     void checkStepRolesOfSourceOperand(SourceOperandIndex index,
3423                                        const std::set<TestStepRole>& expected) const {
3424         SCOPED_TRACE("SourceOperandIndex: " + toString(index));
3425         std::set<TestStepRole> actual;
3426         mPlan.forTest_compoundForEachStepRoleOfSourceOperand(
3427                 index, [&actual](const auto* preparedModel, IOType type, uint32_t) {
3428                     actual.emplace(preparedModel->getDevice()->getName(), type);
3429                 });
3430         EXPECT_TRUE(expected == actual)
3431                 << "expected: " << toString(expected) << ", actual: " << toString(actual);
3432     }
3433 
3434     std::unique_ptr<PartitioningModel> mModel;
3435     ExecutionPlan mPlan;
3436 };
3437 
3438 // Test a graph with 3 operations, each operation in a separate partition:
3439 //     opnd2 = OP0(opnd0, opnd1)
3440 //     opnd4 = OP1(opnd1, opnd3)
3441 //     opnd5 = OP2(opnd2, opnd4)
TEST_F(MemoryStepRoleTest,NoControlFlow)3442 TEST_F(MemoryStepRoleTest, NoControlFlow) {
3443     const uint32_t opnd0 = mModel->addFloatOperand();
3444     const uint32_t opnd1 = mModel->addFloatOperand();
3445     const uint32_t opnd2 = mModel->addOperation2To1V1_0(0, opnd0, opnd1);
3446     const uint32_t opnd3 = mModel->addFloatOperand();
3447     const uint32_t opnd4 = mModel->addOperation2To1V1_0(1, opnd1, opnd3);
3448     const uint32_t opnd5 = mModel->addOperation2To1V1_0(2, opnd2, opnd4);
3449     mModel->identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd2, opnd5});
3450 
3451     // This will result in 3 partitions:
3452     // deviceA handles op0, deviceB handles op1, deviceC handles op2.
3453     const auto devices = makeDevices(
3454             {{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}, {"deviceC", 0.5, 1 << 2}});
3455     finishAndPartitionModelForDevices(devices);
3456     checkExecutionPlanSteps(mPlan, {"deviceB", "deviceA", "deviceC"});
3457 
3458     // Check the step roles of the main model inputs and outputs:
3459     //
3460     // input0 and input2 are each exclusive for a single partition.
3461     checkStepRolesOfInput(0, {{"deviceA", IOType::INPUT}});
3462     checkStepRolesOfInput(2, {{"deviceB", IOType::INPUT}});
3463     // input1 is shared by two operations in different partitions.
3464     checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3465     // output0 is a model output that is a downstream input.
3466     checkStepRolesOfOutput(0, {{"deviceA", IOType::OUTPUT}, {"deviceC", IOType::INPUT}});
3467     // output1 is only used in a single partition.
3468     checkStepRolesOfOutput(1, {{"deviceC", IOType::OUTPUT}});
3469 
3470     // Check the step roles of the partition boundary temporaries that we will allocate memory on
3471     // behalf of (see ExecutionPlan::makeController for the allocation logic):
3472     //
3473     // opnd4 is a partition boundary temporary.
3474     checkStepRolesOfSourceOperand({0, opnd4},
3475                                   {{"deviceB", IOType::OUTPUT}, {"deviceC", IOType::INPUT}});
3476 }
3477 
3478 // Test a graph with an interpreted IF operation.
TEST_F(MemoryStepRoleTest,InterpretedIf)3479 TEST_F(MemoryStepRoleTest, InterpretedIf) {
3480     auto thenModel = std::make_unique<PartitioningModel>();
3481     const uint32_t thenOpnd0 = thenModel->addFloatOperand();
3482     const uint32_t thenOpnd1 = thenModel->addFloatOperand();
3483     const uint32_t thenOpnd2 = thenModel->addOperation2To1V1_0(0, thenOpnd0, thenOpnd1);
3484     thenModel->identifyInputsAndOutputs({thenOpnd0, thenOpnd1}, {thenOpnd2});
3485     thenModel->finish();
3486     EXPECT_TRUE(thenModel->isValid());
3487 
3488     auto elseModel = std::make_unique<PartitioningModel>();
3489     const uint32_t elseOpnd0 = elseModel->addFloatOperand();
3490     const uint32_t elseOpnd1 = elseModel->addFloatOperand();
3491     const uint32_t elseOpnd2 = elseModel->addOperation2To1V1_0(1, elseOpnd0, elseOpnd1);
3492     elseModel->identifyInputsAndOutputs({elseOpnd0, elseOpnd1}, {elseOpnd2});
3493     elseModel->finish();
3494     EXPECT_TRUE(elseModel->isValid());
3495 
3496     const uint32_t mainOpnd0 = mModel->addBooleanOperand();
3497     const uint32_t mainOpnd1 = mModel->addFloatOperand();
3498     const uint32_t mainOpnd2 = mModel->addFloatOperand();
3499     const uint32_t mainOpnd3 = mModel->addFloatOperand();
3500     mModel->addIfOperation(mainOpnd0, *thenModel, *elseModel, {mainOpnd1, mainOpnd2}, {mainOpnd3});
3501     mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3});
3502 
3503     // deviceA handles op0, deviceB handles op1.
3504     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
3505     finishAndPartitionModelForDevices(devices);
3506     checkExecutionPlanSteps(mPlan, {kIfStep, "deviceA", kGotoStep, "deviceB"});
3507 
3508     // Check the step roles of the main model inputs and outputs:
3509     //
3510     // input0 is a condition operand of the interpreted IF that will only be read by the runtime.
3511     checkStepRolesOfInput(0, {});
3512     // input1 and input2 are outer inputs of the interpreted IF. The memories may be directly used
3513     // by the input operands of the then and else model.
3514     checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3515     checkStepRolesOfInput(2, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3516     // output0 is the outer output of the interpreted IF. The memory may be directly
3517     // used by the output operands of the then and else model.
3518     checkStepRolesOfOutput(0, {{"deviceA", IOType::OUTPUT}, {"deviceB", IOType::OUTPUT}});
3519 
3520     // There is no partition boundary temporary in this model that we will allocate memory on
3521     // behalf of (see ExecutionPlan::makeController for the allocation logic).
3522 }
3523 
3524 // Test a graph with an interpreted WHILE operation.
TEST_F(MemoryStepRoleTest,InterpretedWhile)3525 TEST_F(MemoryStepRoleTest, InterpretedWhile) {
3526     // Condition model:
3527     //     condOpnd3 = OP0(condOpnd0, condOpnd1)
3528     //     condOpnd4 = EQUAL(condOpnd2, condOpnd3)
3529     auto condModel = std::make_unique<PartitioningModel>();
3530     const uint32_t condOpnd0 = condModel->addFloatOperand();
3531     const uint32_t condOpnd1 = condModel->addFloatOperand();
3532     const uint32_t condOpnd2 = condModel->addFloatOperand();
3533     const uint32_t condOpnd3 = condModel->addOperation2To1V1_0(0, condOpnd0, condOpnd1);
3534     const uint32_t condOpnd4 = condModel->addExplicitOperationXTo1(
3535             ANEURALNETWORKS_EQUAL, {condOpnd2, condOpnd3}, WrapperType::TENSOR_BOOL8);
3536     condModel->identifyInputsAndOutputs({condOpnd0, condOpnd1, condOpnd2}, {condOpnd4});
3537     condModel->finish();
3538     EXPECT_TRUE(condModel->isValid());
3539 
3540     // Body model:
3541     //     bodyOpnd3 = OP1(bodyOpnd0, bodyOpnd1)
3542     //     bodyOpnd4 = OP1(bodyOpnd0, bodyOpnd2)
3543     auto bodyModel = std::make_unique<PartitioningModel>();
3544     const uint32_t bodyOpnd0 = bodyModel->addFloatOperand();
3545     const uint32_t bodyOpnd1 = bodyModel->addFloatOperand();
3546     const uint32_t bodyOpnd2 = bodyModel->addFloatOperand();
3547     const uint32_t bodyOpnd3 = bodyModel->addOperation2To1V1_0(1, bodyOpnd0, bodyOpnd1);
3548     const uint32_t bodyOpnd4 = bodyModel->addOperation2To1V1_0(1, bodyOpnd0, bodyOpnd2);
3549     bodyModel->identifyInputsAndOutputs({bodyOpnd0, bodyOpnd1, bodyOpnd2}, {bodyOpnd3, bodyOpnd4});
3550     bodyModel->finish();
3551     EXPECT_TRUE(bodyModel->isValid());
3552 
3553     const uint32_t mainOpnd0 = mModel->addFloatOperand();
3554     const uint32_t mainOpnd1 = mModel->addFloatOperand();
3555     const uint32_t mainOpnd2 = mModel->addFloatOperand();
3556     const uint32_t mainOpnd3 = mModel->addFloatOperand();
3557     mModel->addWhileOperation(*condModel, *bodyModel, {mainOpnd0, mainOpnd1, mainOpnd2},
3558                               {mainOpnd3});
3559     mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3});
3560 
3561     // deviceA handles the cond model, deviceB handles the body model.
3562     const auto devices = makeDevices({{"deviceA",
3563                                        0.8,
3564                                        ~0U,
3565                                        PartitioningDriver::OEMNo,
3566                                        HalVersion::LATEST,
3567                                        {V1_3::OperationType::EQUAL}},
3568                                       {"deviceB", 0.5, 1 << 1}});
3569     finishAndPartitionModelForDevices(devices);
3570     checkExecutionPlanSteps(mPlan, {kWhileStep, "deviceA", kGotoStep, "deviceB", kGotoStep});
3571 
3572     // The subgraph indexes of the condition and body models of the WHILE operation.
3573     const uint32_t condModelIndex = 1;
3574     const uint32_t bodyModelIndex = 2;
3575 
3576     // Check the step roles of the main model inputs and outputs:
3577     //
3578     // input0 (input-output), input1 (state-only), and input2 (input-only) are outer inputs of the
3579     // interpreted WHILE. The memories may be directly used by the input operands of the condition
3580     // and body models.
3581     checkStepRolesOfInput(0, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3582     checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3583     checkStepRolesOfInput(2, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3584     // output0 is an outer output of the interpreted WHILE that will only be written by the runtime.
3585     checkStepRolesOfOutput(0, {});
3586 
3587     // Check the step roles of the partition boundary temporaries that we will allocate memory on
3588     // behalf of (see ExecutionPlan::makeController for the allocation logic):
3589     //
3590     // condOpnd4 is output of the interpreted WHILE condition model.
3591     checkStepRolesOfSourceOperand({condModelIndex, condOpnd4}, {{"deviceA", IOType::OUTPUT}});
3592     // bodyOpnd3 (input-output) and bodyOpnd4 (state-only) are outputs of the interpreted WHILE body
3593     // model. The memories may be directly used by the input operands of the condition and body
3594     // models.
3595     checkStepRolesOfSourceOperand(
3596             {bodyModelIndex, bodyOpnd3},
3597             {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceB", IOType::OUTPUT}});
3598     checkStepRolesOfSourceOperand(
3599             {bodyModelIndex, bodyOpnd4},
3600             {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceB", IOType::OUTPUT}});
3601 }
3602 
3603 // Test a graph with nested interpreted control flow operations: a WHILE operation with IF operation
3604 // in the body model.
TEST_F(MemoryStepRoleTest,NestedInterpretedControlFlow)3605 TEST_F(MemoryStepRoleTest, NestedInterpretedControlFlow) {
3606     auto condModel = std::make_unique<PartitioningModel>();
3607     const uint32_t condOpnd0 = condModel->addFloatOperand();
3608     const uint32_t condOpnd1 = condModel->addFloatOperand();
3609     const uint32_t condOpnd2 = condModel->addBooleanOperand();
3610     const uint32_t condOpnd3 = condModel->addExplicitOperationXTo1(
3611             ANEURALNETWORKS_EQUAL, {condOpnd0, condOpnd1}, WrapperType::TENSOR_BOOL8);
3612     condModel->identifyInputsAndOutputs({condOpnd0, condOpnd1, condOpnd2}, {condOpnd3});
3613     condModel->finish();
3614     EXPECT_TRUE(condModel->isValid());
3615 
3616     auto thenModel = std::make_unique<PartitioningModel>();
3617     const uint32_t thenOpnd0 = thenModel->addFloatOperand();
3618     const uint32_t thenOpnd1 = thenModel->addFloatOperand();
3619     const uint32_t thenOpnd2 = thenModel->addOperation2To1V1_0(0, thenOpnd0, thenOpnd1);
3620     thenModel->identifyInputsAndOutputs({thenOpnd0, thenOpnd1}, {thenOpnd2});
3621     thenModel->finish();
3622     EXPECT_TRUE(thenModel->isValid());
3623 
3624     auto elseModel = std::make_unique<PartitioningModel>();
3625     const uint32_t elseOpnd0 = elseModel->addFloatOperand();
3626     const uint32_t elseOpnd1 = elseModel->addFloatOperand();
3627     const uint32_t elseOpnd2 = elseModel->addOperation2To1V1_0(1, elseOpnd0, elseOpnd1);
3628     elseModel->identifyInputsAndOutputs({elseOpnd0, elseOpnd1}, {elseOpnd2});
3629     elseModel->finish();
3630     EXPECT_TRUE(elseModel->isValid());
3631 
3632     auto bodyModel = std::make_unique<PartitioningModel>();
3633     const uint32_t bodyOpnd0 = bodyModel->addFloatOperand();
3634     const uint32_t bodyOpnd1 = bodyModel->addFloatOperand();
3635     const uint32_t bodyOpnd2 = bodyModel->addBooleanOperand();
3636     const uint32_t bodyOpnd3 = bodyModel->addFloatOperand();
3637     bodyModel->addIfOperation(bodyOpnd2, *thenModel, *elseModel, {bodyOpnd0, bodyOpnd1},
3638                               {bodyOpnd3});
3639     bodyModel->identifyInputsAndOutputs({bodyOpnd0, bodyOpnd1, bodyOpnd2}, {bodyOpnd3});
3640     bodyModel->finish();
3641     EXPECT_TRUE(bodyModel->isValid());
3642 
3643     const uint32_t mainOpnd0 = mModel->addFloatOperand();
3644     const uint32_t mainOpnd1 = mModel->addFloatOperand();
3645     const uint32_t mainOpnd2 = mModel->addBooleanOperand();
3646     const uint32_t mainOpnd3 = mModel->addFloatOperand();
3647     mModel->addWhileOperation(*condModel, *bodyModel, {mainOpnd0, mainOpnd1, mainOpnd2},
3648                               {mainOpnd3});
3649     mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3});
3650 
3651     // deviceA handles the cond model, deviceB handles the then model,
3652     // deviceC handles the else model.
3653     const auto devices = makeDevices({{"deviceA",
3654                                        0.8,
3655                                        ~0U,
3656                                        PartitioningDriver::OEMNo,
3657                                        HalVersion::LATEST,
3658                                        {V1_3::OperationType::EQUAL}},
3659                                       {"deviceB", 0.5, 1 << 0},
3660                                       {"deviceC", 0.5, 1 << 1}});
3661     finishAndPartitionModelForDevices(devices);
3662     checkExecutionPlanSteps(mPlan, {kWhileStep, "deviceA", kGotoStep, kIfStep, "deviceB", kGotoStep,
3663                                     "deviceC", kGotoStep});
3664 
3665     // The subgraph indexes of the condition and body models of the WHILE operation.
3666     const uint32_t condModelIndex = 1;
3667     const uint32_t bodyModelIndex = 2;
3668 
3669     // Check the step roles of the main model inputs and outputs:
3670     //
3671     // input0 and input1 are outer inputs of the interpreted WHILE. The memories may be directly
3672     // used by the input operands of the condition and body models, and then be directly used by the
3673     // input operands of the then and else model of the interpreted IF in the body model.
3674     checkStepRolesOfInput(
3675             0,
3676             {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceC", IOType::INPUT}});
3677     checkStepRolesOfInput(
3678             1,
3679             {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceC", IOType::INPUT}});
3680     // input2 is also an outer input of the interpreted WHILE. The memory has no step role in the
3681     // condition model. In the body model, the memory will be used by the condition operand of the
3682     // interpreted IF that will only be read by the runtime.
3683     checkStepRolesOfInput(2, {});
3684     // output0 is an outer output of the interpreted WHILE that will only be written by the runtime.
3685     checkStepRolesOfOutput(0, {});
3686 
3687     // Check the step roles of the partition boundary temporaries that we will allocate memory on
3688     // behalf of (see ExecutionPlan::makeController for the allocation logic):
3689     //
3690     // condOpnd2 is output of the interpreted WHILE condition model.
3691     checkStepRolesOfSourceOperand({condModelIndex, condOpnd3}, {{"deviceA", IOType::OUTPUT}});
3692     // bodyOpnd3 is output of the interpreted WHILE body model. The memories may be directly used by
3693     // the input operands of the condition and body models, and then be directly used by the
3694     // input operands of the then and else model of the interpreted IF in the body model.
3695     checkStepRolesOfSourceOperand({bodyModelIndex, bodyOpnd3}, {{"deviceA", IOType::INPUT},
3696                                                                 {"deviceB", IOType::INPUT},
3697                                                                 {"deviceB", IOType::OUTPUT},
3698                                                                 {"deviceC", IOType::INPUT},
3699                                                                 {"deviceC", IOType::OUTPUT}});
3700 }
3701 
3702 }  // namespace
3703