1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <gtest/gtest.h>
18 
19 #include <algorithm>
20 #include <filesystem>
21 #include <functional>
22 #include <map>
23 #include <memory>
24 #include <queue>
25 #include <set>
26 #include <string>
27 #include <type_traits>
28 #include <utility>
29 #include <vector>
30 
31 #include "CompilationBuilder.h"
32 #include "ControlFlow.h"
33 #include "ExecutionPlan.h"
34 #include "HalInterfaces.h"
35 #include "Manager.h"
36 #include "ModelBuilder.h"
37 #include "NeuralNetworks.h"
38 #include "NeuralNetworksOEM.h"
39 #include "SampleDriver.h"
40 #include "TestNeuralNetworksWrapper.h"
41 #include "Utils.h"
42 #include "ValidateHal.h"
43 
44 // Uncomment the following line to generate some debugging output that
45 // may be useful when analyzing failures:
46 //
47 // #define VERBOSE VERBOSE
48 
49 // These tests do whitebox testing of the graph partitioning
50 // algorithm.  It is "whitebox" in the sense that we're not evaluating
51 // whether a particular partitioning is legal, or "good enough"
52 // according to some metric, but whether it exactly matches the
53 // expected behavior of the current partitioning algorithm.
54 //
55 // A key part of the current partitioning algorithm is to determine
56 // which device among the available devices should be the one to
57 // execute a particular operation from the graph.  This determination
58 // is made "locally" -- i.e., it does not depend on the graph
59 // topology, only on the properties of the operation in question.
60 // IDevice::getSupportedOperations() indicates which operations in a
61 // graph can be executed on a device, and IDevice::getCapabilities()
62 // indicates how "good" that device is for executing particular kinds
63 // of operations.  For each operation, the partitioning algorithm
64 // picks the "best" device that is capable of executing that
65 // operation; if no device can do so, then the algorithm picks the
66 // cpu.
67 //
68 // As part of this testing approach, we want to make it easy to
69 // specify which operations in a test graph can be executed on which
70 // devices.  We accomplish this in the following way:
71 // - A unary OEM operation is available.
72 // - There is a collection of operations (each of which has two inputs
73 //   and one output):
74 //   - Eight kinds of operations available at driver version V1_0 or
75 //     later.  They are represented in the graph as ADD or MUL with a
76 //     particular activation function -- two opcodes times four
77 //     activation functions means eight available operation kinds.
78 //     This is a low-level representation detail -- when we specify the
79 //     behavior of the device or build a graph, we do so in terms of
80 //     operation encodings 0..7.
81 //   - Eight kinds of operations available at driver version V1_1 or
82 //     later.  They are represented in the graph as DIV or SUB with
83 //     a particular activation function, exactly analogous to ADD
84 //     and MUL above.  We use operation encodings 8..15 for them.
85 //   - Four kinds of operations available at driver version V1_2 or
86 //     later.  They are represented in the graph as MAXIMUM,
87 //     MINIMUM, POW, or PRELU.  These operations take no activation
88 //     function, so we only get 4 operation kinds, for which we
89 //     use operation encodings 16..19.
90 // - There is another collection of operations (each of which has one input
91 //   and one output):
92 //   - Single operation available at driver version V1_3 or
93 //     later.  It is represented in the graph as HARD_SWISH.
94 //     These operations take no activation function, for which we
95 //     use operation encodings 20..20.
96 
97 // When we instantiate a device for testing purposes, we specify what subset of
98 // those operations the device is able to execute.
99 //
100 // In order to determine whether or not a partitioning matches the
101 // expected partitioning, we check the number of partitions, check
102 // which device each partition targets, and compare each partition's
103 // subgraph, model inputs, model outputs, step model inputs, and
104 // step model outputs against what is expected.  In order to perform
105 // that comparison, we build a model to compare against a partition's
106 // step model and run a graph comparison algorithm on it.  The graph
107 // comparison and the inputs and outputs comparisons are syntactic
108 // rather than semantic comparisons -- they don't allow for
109 // reorderings of inputs and outputs.  Because of this, we need to
110 // know exactly how the partitioning algorithm orders inputs and
111 // outputs in order to construct the models and operand lists to
112 // compare against.  Here are some relevant behaviors of the
113 // partitioning algorithm:
114 //
115 // - It builds a subgraph by walking operations in forward topological
116 //   order, and adding each operation's input operands and output
117 //   operands in index order (input followed by output) when that
118 //   operation is added.  (It does not add an input that has already
119 //   been added.)
120 // - It finds model inputs, model outputs, and step model inputs in
121 //   the order the corresponding operands were added to the subgraph
122 //   (see ExecutionStep methods getModelInputs(), getModelOutputs(),
123 //   getTempsAsStepModelInputs(), getOutputsAsStepModelInputs()).
124 // - It finds temps as step model outputs in numerical order of corresponding
125 //   operand number in the original model (see ExecutionStep method
126 //   getTempsAsStepModelOutputs()).
127 // - When it calls identifyInputsAndOutputs() on the step model, it
128 //   passes inputs from getModelInputs() in order, followed by temps as
129 //   step model inputs from getTempsAsStepModelInputs() in order,
130 //   followed by outputs as step model inputs from
131 //   getOutputsAsStepModelInputs() in order; and it passes outputs from
132 //   getModelOutputs() in order followed by step model outputs from
133 //   getTempsAsStepModelOutputs() in order.
134 //
135 // TODO: Maybe the logic for comparing a partition to an expected
136 //       model should be changed to tolerate reorderings of inputs and
137 //       outputs, so that when we build models and lists to compare
138 //       against, we don't need to worry about input and output
139 //       orderings.  But is there a way to do this that still lets us
140 //       verify that we have the correct relationships between
141 //       an (original) model's inputs and outputs and each step model's
142 //       inputs and outputs, as well as the correct relationship
143 //       between step model inputs and outputs across partitions?
144 
145 namespace {
146 
147 using namespace android::nn::hal;
148 using CompilationBuilder = ::android::nn::CompilationBuilder;
149 using Deadline = ::android::nn::Deadline;
150 using Device = ::android::nn::Device;
151 using DeviceManager = ::android::nn::DeviceManager;
152 using ExecutePreference = ::android::nn::test_wrapper::ExecutePreference;
153 using ExecutePriority = ::android::nn::test_wrapper::ExecutePriority;
154 using ExecutionPlan = ::android::nn::ExecutionPlan;
155 using ExecutionStep = ::android::nn::ExecutionStep;
156 using HalVersion = ::android::nn::HalVersion;
157 using HidlModel = V1_3::Model;
158 using LogicalStep = ::android::nn::LogicalStep;
159 using ModelBuilder = ::android::nn::ModelBuilder;
160 using Result = ::android::nn::test_wrapper::Result;
161 using SampleDriver = ::android::nn::sample_driver::SampleDriver;
162 using WrapperCompilation = ::android::nn::test_wrapper::Compilation;
163 using WrapperModel = ::android::nn::test_wrapper::Model;
164 using WrapperOperandType = ::android::nn::test_wrapper::OperandType;
165 using WrapperSymmPerChannelQuantParams = ::android::nn::test_wrapper::SymmPerChannelQuantParams;
166 using WrapperType = ::android::nn::test_wrapper::Type;
167 
168 template <typename T>
169 using MQDescriptorSync = ::android::hardware::MQDescriptorSync<T>;
170 
171 constexpr Timing kBadTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
172 
makeCapabilities(float perf)173 Capabilities makeCapabilities(float perf) {
174     PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
175     return {.relaxedFloat32toFloat16PerformanceScalar = perfInfo,
176             .relaxedFloat32toFloat16PerformanceTensor = perfInfo,
177             .operandPerformance =
178                     ::android::nn::nonExtensionOperandPerformance<HalVersion::V1_3>(perfInfo),
179             .ifPerformance = perfInfo,
180             .whilePerformance = perfInfo};
181 };
182 
update(Capabilities * capabilities,OperandType type,float perf)183 void update(Capabilities* capabilities, OperandType type, float perf) {
184     PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
185     ::android::nn::update(&capabilities->operandPerformance, type, perfInfo);
186 }
187 
lookupExecTime(const Capabilities & capabilities,OperandType type)188 float lookupExecTime(const Capabilities& capabilities, OperandType type) {
189     return ::android::nn::lookup(capabilities.operandPerformance, type).execTime;
190 }
191 
192 const uint32_t kNumFuseCodes = 4;
193 const uint32_t kBadOperation = ~0;
194 
195 // V1_0 operations
196 const uint32_t kFirstEncodingADD = 0;
197 const uint32_t kFirstEncodingMUL = kFirstEncodingADD + kNumFuseCodes;
198 const uint32_t kFirstEncodingV1_0 = kFirstEncodingADD;
199 const uint32_t kLastEncodingV1_0 = kFirstEncodingMUL + kNumFuseCodes - 1;
200 
201 // V1_1 operations
202 const uint32_t kFirstEncodingDIV = kLastEncodingV1_0 + 1;
203 const uint32_t kFirstEncodingSUB = kFirstEncodingDIV + kNumFuseCodes;
204 const uint32_t kFirstEncodingV1_1 = kFirstEncodingDIV;
205 const uint32_t kLastEncodingV1_1 = kFirstEncodingSUB + kNumFuseCodes - 1;
206 
207 // V1_2 operations
208 const uint32_t kFirstEncodingMAXIMUM = kLastEncodingV1_1 + 1;
209 const uint32_t kFirstEncodingMINIMUM = kFirstEncodingMAXIMUM + 1;
210 const uint32_t kFirstEncodingPOW = kFirstEncodingMINIMUM + 1;
211 const uint32_t kFirstEncodingPRELU = kFirstEncodingPOW + 1;
212 const uint32_t kFirstEncodingV1_2 = kFirstEncodingMAXIMUM;
213 const uint32_t kLastEncodingV1_2 = kFirstEncodingPRELU;
214 
215 // V1_3 operations
216 const uint32_t kFirstEncodingHARD_SWISH = kLastEncodingV1_2 + 1;
217 const uint32_t kFirstEncodingV1_3 = kFirstEncodingHARD_SWISH;
218 const uint32_t kLastEncodingV1_3 = kFirstEncodingHARD_SWISH;
219 
220 const std::map<OperationType, uint32_t> operationToFirstEncoding = {
221         {OperationType::ADD, kFirstEncodingADD},
222         {OperationType::MUL, kFirstEncodingMUL},
223         {OperationType::DIV, kFirstEncodingDIV},
224         {OperationType::SUB, kFirstEncodingSUB},
225         {OperationType::MAXIMUM, kFirstEncodingMAXIMUM},
226         {OperationType::MINIMUM, kFirstEncodingMINIMUM},
227         {OperationType::POW, kFirstEncodingPOW},
228         {OperationType::PRELU, kFirstEncodingPRELU},
229         {OperationType::HARD_SWISH, kFirstEncodingHARD_SWISH},
230 };
231 
232 // Sorted in reverse order (std::greater) so that we can use map::lower_bound to
233 // find an entry whose key is numerically less than or equal to a search value.
234 // mapped_type is (OperandCode, hasFuseCode).
235 const std::map<uint32_t, std::pair<uint32_t, bool>, std::greater<>> firstEncodingToOperation = {
236         {kFirstEncodingADD, {ANEURALNETWORKS_ADD, true}},
237         {kFirstEncodingMUL, {ANEURALNETWORKS_MUL, true}},
238         {kFirstEncodingDIV, {ANEURALNETWORKS_DIV, true}},
239         {kFirstEncodingSUB, {ANEURALNETWORKS_SUB, true}},
240         {kFirstEncodingMAXIMUM, {ANEURALNETWORKS_MAXIMUM, false}},
241         {kFirstEncodingMINIMUM, {ANEURALNETWORKS_MINIMUM, false}},
242         {kFirstEncodingPOW, {ANEURALNETWORKS_POW, false}},
243         {kFirstEncodingPRELU, {ANEURALNETWORKS_PRELU, false}},
244         {kFirstEncodingHARD_SWISH, {ANEURALNETWORKS_HARD_SWISH, false}},
245 };
246 
247 // Look up the operation with the specified index in a graph, and return the
248 // operation encoding; or, if for some reason this is not one of the encoded
249 // operations, then return kBadOperation.
lookupOperation(std::function<const Operation & (uint32_t)> getOperation,std::function<const Operand & (uint32_t)> getOperand,std::function<const uint8_t * (uint32_t)> getValue,uint32_t operationIndex)250 uint32_t lookupOperation(std::function<const Operation&(uint32_t)> getOperation,
251                          std::function<const Operand&(uint32_t)> getOperand,
252                          std::function<const uint8_t*(uint32_t)> getValue,
253                          uint32_t operationIndex) {
254     const Operation& operation = getOperation(operationIndex);
255     switch (operation.type) {
256         case OperationType::ADD:
257         case OperationType::MUL:
258         case OperationType::DIV:
259         case OperationType::SUB: {
260             // input2 is the fused activation function
261             const Operand& input2 = getOperand(operation.inputs[2]);
262             if ((input2.type == OperandType::INT32) &&
263                 (input2.lifetime == OperandLifeTime::CONSTANT_COPY)) {
264                 int32_t value;
265                 CHECK_EQ(sizeof(value), input2.location.length);
266                 memcpy(&value, getValue(input2.location.offset), input2.location.length);
267                 return value + operationToFirstEncoding.at(operation.type);
268             }
269             break;
270         }
271         default: {
272             auto it = operationToFirstEncoding.find(operation.type);
273             if (it != operationToFirstEncoding.end()) {
274                 return it->second;
275             }
276             break;
277         }
278     }
279     return kBadOperation;
280 }
281 
lookupOperation(const HidlModel & model,const Subgraph & subgraph,uint32_t operationIndex)282 uint32_t lookupOperation(const HidlModel& model, const Subgraph& subgraph,
283                          uint32_t operationIndex) {
284     return lookupOperation(
285             [&subgraph](uint32_t index) -> const Operation& { return subgraph.operations[index]; },
286             [&subgraph](uint32_t index) -> const Operand& { return subgraph.operands[index]; },
287             [&model](uint32_t offset) { return &model.operandValues[offset]; }, operationIndex);
288 }
289 
290 #ifdef VERBOSE
291 // This is a debugging utility function
dump(const char * name,const ModelBuilder * model)292 void dump(const char* name, const ModelBuilder* model) {
293     const HidlModel hidlModel = model->makeHidlModel();
294     std::cout << name << ": " << toString(hidlModel) << std::endl;
295     std::cout << "inputs: " << toString(hidlModel.main.inputIndexes) << std::endl;
296     std::cout << "outputs: " << toString(hidlModel.main.outputIndexes) << std::endl;
297     for (size_t i = 0, e = hidlModel.main.operations.size(); i < e; i++) {
298         std::cout << "operation[" << i << "]: " << toString(hidlModel.main.operations[i])
299                   << std::endl;
300     }
301 }
302 #endif
303 
304 // This is an IDevice for testing purposes.  It only has a few interesting
305 // properties, all of which are specified as constructor arguments: device
306 // capabilities; which subset of operation kinds (0..19) does the device
307 // support; does the device support the OEM operation; does the device support
308 // other operations.  The subset is represented with a bitmask, in which
309 // operation kind K corresponds to the bit (1 << K).  The other operations are
310 // represented by a set of OperationType.
311 class PartitioningDriver : public SampleDriver {
312    private:
313     // Dummy class -- a prepared model must not be nullptr.
314     class PartitioningPreparedModel : public IPreparedModel {
315        public:
execute(const V1_0::Request &,const sp<V1_0::IExecutionCallback> &)316         Return<V1_0::ErrorStatus> execute(const V1_0::Request&,
317                                           const sp<V1_0::IExecutionCallback>&) override {
318             return V1_0::ErrorStatus::DEVICE_UNAVAILABLE;
319         }
execute_1_2(const V1_0::Request &,MeasureTiming,const sp<V1_2::IExecutionCallback> &)320         Return<V1_0::ErrorStatus> execute_1_2(const V1_0::Request&, MeasureTiming,
321                                               const sp<V1_2::IExecutionCallback>&) override {
322             return V1_0::ErrorStatus::DEVICE_UNAVAILABLE;
323         }
execute_1_3(const V1_3::Request &,MeasureTiming,const OptionalTimePoint &,const OptionalTimeoutDuration &,const sp<V1_3::IExecutionCallback> &)324         Return<V1_3::ErrorStatus> execute_1_3(const V1_3::Request&, MeasureTiming,
325                                               const OptionalTimePoint&,
326                                               const OptionalTimeoutDuration&,
327                                               const sp<V1_3::IExecutionCallback>&) override {
328             return V1_3::ErrorStatus::DEVICE_UNAVAILABLE;
329         }
executeSynchronously(const V1_0::Request &,MeasureTiming,executeSynchronously_cb cb)330         Return<void> executeSynchronously(const V1_0::Request&, MeasureTiming,
331                                           executeSynchronously_cb cb) override {
332             cb(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, {}, kBadTiming);
333             return Void();
334         }
executeSynchronously_1_3(const V1_3::Request &,MeasureTiming,const OptionalTimePoint &,const OptionalTimeoutDuration &,executeSynchronously_1_3_cb cb)335         Return<void> executeSynchronously_1_3(const V1_3::Request&, MeasureTiming,
336                                               const OptionalTimePoint&,
337                                               const OptionalTimeoutDuration&,
338                                               executeSynchronously_1_3_cb cb) override {
339             cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, {}, kBadTiming);
340             return Void();
341         }
configureExecutionBurst(const sp<V1_2::IBurstCallback> &,const MQDescriptorSync<V1_2::FmqRequestDatum> &,const MQDescriptorSync<V1_2::FmqResultDatum> &,configureExecutionBurst_cb cb)342         Return<void> configureExecutionBurst(
343                 const sp<V1_2::IBurstCallback>& /*callback*/,
344                 const MQDescriptorSync<V1_2::FmqRequestDatum>& /*requestChannel*/,
345                 const MQDescriptorSync<V1_2::FmqResultDatum>& /*resultChannel*/,
346                 configureExecutionBurst_cb cb) override {
347             cb(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, nullptr);
348             return Void();
349         }
executeFenced(const Request &,const hidl_vec<hidl_handle> &,MeasureTiming,const OptionalTimePoint &,const OptionalTimeoutDuration &,const OptionalTimeoutDuration &,executeFenced_cb cb)350         Return<void> executeFenced(const Request&, const hidl_vec<hidl_handle>&, MeasureTiming,
351                                    const OptionalTimePoint&, const OptionalTimeoutDuration&,
352                                    const OptionalTimeoutDuration&, executeFenced_cb cb) {
353             cb(ErrorStatus::DEVICE_UNAVAILABLE, hidl_handle(nullptr), nullptr);
354             return Void();
355         }
356     };
357 
358    public:
359     enum OEM {
360         OEMNo,          // rejected by getSupportedOperations and prepareModel
361         OEMIndecisive,  // accepted by getSupportedOperations but not prepareModel
362         OEMYes,         // accepted by getSupportedOperations and prepareModel
363     };
364 
PartitioningDriver(const char * name,const char * version,Capabilities capabilities,uint32_t operationMask,OEM oem=OEMNo,std::set<OperationType> operationTypes={})365     PartitioningDriver(const char* name, const char* version, Capabilities capabilities,
366                        uint32_t operationMask, OEM oem = OEMNo,
367                        std::set<OperationType> operationTypes = {})
368         : SampleDriver(name),
369           mVersionString(version),
370           mCapabilities(capabilities),
371           mOperationMask(operationMask),
372           mOEM(oem),
373           mOperationTypes(std::move(operationTypes)) {
374         CHECK_EQ(mOperationTypes.count(OperationType::OEM_OPERATION), size_t(0));
__anon83dd480d0502(OperationType type) 375         std::for_each(mOperationTypes.begin(), mOperationTypes.end(), [](OperationType type) {
376             CHECK_EQ(operationToFirstEncoding.count(type), size_t(0));
377         });
378     }
~PartitioningDriver()379     ~PartitioningDriver() override {}
380 
getVersionString(getVersionString_cb cb)381     Return<void> getVersionString(getVersionString_cb cb) override {
382         cb(V1_0::ErrorStatus::NONE, mVersionString);
383         return Void();
384     }
385 
prepareModel_1_3(const Model & model,ExecutionPreference,Priority,const OptionalTimePoint &,const hidl_vec<hidl_handle> &,const hidl_vec<hidl_handle> &,const CacheToken &,const sp<V1_3::IPreparedModelCallback> & cb)386     Return<V1_3::ErrorStatus> prepareModel_1_3(
387             const Model& model, ExecutionPreference, Priority, const OptionalTimePoint&,
388             const hidl_vec<hidl_handle>&, const hidl_vec<hidl_handle>&, const CacheToken&,
389             const sp<V1_3::IPreparedModelCallback>& cb) override {
390         V1_3::ErrorStatus status = V1_3::ErrorStatus::NONE;
391         if (mOEM != OEMYes) {
392             for (const auto& operation : model.main.operations) {
393                 if (operation.type == OperationType::OEM_OPERATION) {
394                     status = V1_3::ErrorStatus::INVALID_ARGUMENT;
395                     break;
396                 }
397             }
398         }
399         cb->notify_1_3(status, new PartitioningPreparedModel);
400         return status;
401     }
402 
getStatus()403     Return<DeviceStatus> getStatus() override { return DeviceStatus::AVAILABLE; }
404 
getCapabilities_1_3(getCapabilities_1_3_cb cb)405     Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override {
406         cb(V1_3::ErrorStatus::NONE, mCapabilities);
407         return Void();
408     }
409 
getSupportedOperations_1_3(const Model & model,getSupportedOperations_1_3_cb cb)410     Return<void> getSupportedOperations_1_3(const Model& model,
411                                             getSupportedOperations_1_3_cb cb) override {
412         if (!android::nn::validateModel(model)) {
413             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>());
414             return Void();
415         }
416         cb(V1_3::ErrorStatus::NONE, getSupportedOperationsForSubgraph(model, model.main));
417         return Void();
418     }
419 
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)420     Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb) override {
421         cb(V1_0::ErrorStatus::NONE, /*numModelCache=*/1, /*numDataCache=*/1);
422         return Void();
423     }
424 
prepareModelFromCache(const hidl_vec<hidl_handle> &,const hidl_vec<hidl_handle> &,const CacheToken &,const sp<V1_2::IPreparedModelCallback> & callback)425     Return<V1_0::ErrorStatus> prepareModelFromCache(
426             const hidl_vec<hidl_handle>&, const hidl_vec<hidl_handle>&, const CacheToken&,
427             const sp<V1_2::IPreparedModelCallback>& callback) override {
428         callback->notify_1_2(V1_0::ErrorStatus::NONE, new PartitioningPreparedModel);
429         return V1_0::ErrorStatus::NONE;
430     }
431 
432    private:
getSupportedOperationsForSubgraph(const Model & model,const Subgraph & subgraph)433     std::vector<bool> getSupportedOperationsForSubgraph(const Model& model,
434                                                         const Subgraph& subgraph) {
435         auto supportsEntireSubgraph = [this, &model, &subgraph](uint32_t refSubgraphOperandIndex) {
436             const Operand& refSubgraphOperand = subgraph.operands[refSubgraphOperandIndex];
437             const Subgraph& refSubgraph = model.referenced[refSubgraphOperand.location.offset];
438             std::vector<bool> supported = getSupportedOperationsForSubgraph(model, refSubgraph);
439             return std::all_of(supported.begin(), supported.end(), [](bool x) { return x; });
440         };
441         const size_t count = subgraph.operations.size();
442         std::vector<bool> supported(count);
443         for (size_t i = 0; i < count; i++) {
444             const Operation operation = subgraph.operations[i];
445             if (mOperationTypes.count(operation.type)) {
446                 if (operation.type == OperationType::IF) {
447                     namespace op = android::nn::operation_if;
448                     supported[i] =
449                             supportsEntireSubgraph(operation.inputs[op::kThenModelOperand]) &&
450                             supportsEntireSubgraph(operation.inputs[op::kElseModelOperand]);
451                 } else if (operation.type == OperationType::WHILE) {
452                     namespace op = android::nn::operation_while;
453                     supported[i] =
454                             supportsEntireSubgraph(operation.inputs[op::kCondModelOperand]) &&
455                             supportsEntireSubgraph(operation.inputs[op::kBodyModelOperand]);
456                 } else {
457                     supported[i] = true;
458                 }
459                 continue;
460             }
461             if (operation.type == OperationType::OEM_OPERATION) {
462                 supported[i] = (mOEM != OEMNo);
463                 continue;
464             }
465             supported[i] = false;
466             uint32_t operationEncoding = lookupOperation(model, subgraph, i);
467             if ((operationEncoding != kBadOperation) &&
468                 (mOperationMask & (1 << operationEncoding))) {
469                 supported[i] = true;
470             }
471         }
472         return supported;
473     }
474 
475     std::string mVersionString;
476     Capabilities mCapabilities;
477     uint32_t mOperationMask;
478     OEM mOEM;
479     std::set<OperationType> mOperationTypes;
480 };
481 
482 // Like PartitioningDriver, but implementing 1.2
483 class PartitioningDriverV1_2 : public V1_2::IDevice {
484    public:
PartitioningDriverV1_2(const char * name,const char * version,Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<OperationType> operationTypes={})485     PartitioningDriverV1_2(const char* name, const char* version, Capabilities capabilities,
486                            uint32_t operationMask,
487                            PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
488                            std::set<OperationType> operationTypes = {})
489         : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
490                                                operationTypes)) {}
getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb)491     Return<void> getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb) override {
492         return mLatestDriver->getCapabilities_1_2(_hidl_cb);
493     }
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb _hidl_cb)494     Return<void> getSupportedOperations_1_2(const V1_2::Model& model,
495                                             getSupportedOperations_1_2_cb _hidl_cb) override {
496         return mLatestDriver->getSupportedOperations_1_2(model, _hidl_cb);
497     }
prepareModel_1_2(const V1_2::Model & model,ExecutionPreference preference,const hidl_vec<hidl_handle> & modelCache,const hidl_vec<hidl_handle> & dataCache,const CacheToken & token,const sp<V1_2::IPreparedModelCallback> & actualCallback)498     Return<V1_0::ErrorStatus> prepareModel_1_2(
499             const V1_2::Model& model, ExecutionPreference preference,
500             const hidl_vec<hidl_handle>& modelCache, const hidl_vec<hidl_handle>& dataCache,
501             const CacheToken& token,
502             const sp<V1_2::IPreparedModelCallback>& actualCallback) override {
503         return mLatestDriver->prepareModel_1_2(model, preference, modelCache, dataCache, token,
504                                                actualCallback);
505     }
getVersionString(getVersionString_cb _hidl_cb)506     Return<void> getVersionString(getVersionString_cb _hidl_cb) override {
507         return mLatestDriver->getVersionString(_hidl_cb);
508     }
getType(getType_cb _hidl_cb)509     Return<void> getType(getType_cb _hidl_cb) override { return mLatestDriver->getType(_hidl_cb); }
getSupportedExtensions(getSupportedExtensions_cb _hidl_cb)510     Return<void> getSupportedExtensions(getSupportedExtensions_cb _hidl_cb) {
511         return mLatestDriver->getSupportedExtensions(_hidl_cb);
512     }
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb)513     Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb) {
514         return mLatestDriver->getNumberOfCacheFilesNeeded(_hidl_cb);
515     }
prepareModelFromCache(const hidl_vec<hidl_handle> & modelCache,const hidl_vec<hidl_handle> & dataCache,const CacheToken & token,const sp<V1_2::IPreparedModelCallback> & callback)516     Return<V1_0::ErrorStatus> prepareModelFromCache(
517             const hidl_vec<hidl_handle>& modelCache, const hidl_vec<hidl_handle>& dataCache,
518             const CacheToken& token, const sp<V1_2::IPreparedModelCallback>& callback) {
519         return mLatestDriver->prepareModelFromCache(modelCache, dataCache, token, callback);
520     }
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)521     Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
522         return mLatestDriver->getCapabilities_1_1(_hidl_cb);
523     }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)524     Return<void> getSupportedOperations_1_1(const V1_1::Model& model,
525                                             getSupportedOperations_1_1_cb _hidl_cb) override {
526         return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
527     }
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)528     Return<V1_0::ErrorStatus> prepareModel_1_1(
529             const V1_1::Model& model, ExecutionPreference preference,
530             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
531         return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
532     }
getStatus()533     Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)534     Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
535         return mLatestDriver->getCapabilities(_hidl_cb);
536     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)537     Return<void> getSupportedOperations(const V1_0::Model& model,
538                                         getSupportedOperations_cb _hidl_cb) override {
539         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
540     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)541     Return<V1_0::ErrorStatus> prepareModel(
542             const V1_0::Model& model,
543             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
544         return mLatestDriver->prepareModel(model, actualCallback);
545     }
546 
547    private:
548     const sp<V1_3::IDevice> mLatestDriver;
549 };
550 
551 // Like PartitioningDriver, but implementing 1.1
552 class PartitioningDriverV1_1 : public V1_1::IDevice {
553    public:
PartitioningDriverV1_1(const char * name,const char * version,Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<OperationType> operationTypes={})554     PartitioningDriverV1_1(const char* name, const char* version, Capabilities capabilities,
555                            uint32_t operationMask,
556                            PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
557                            std::set<OperationType> operationTypes = {})
558         : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
559                                                operationTypes)) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)560     Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
561         return mLatestDriver->getCapabilities_1_1(_hidl_cb);
562     }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)563     Return<void> getSupportedOperations_1_1(const V1_1::Model& model,
564                                             getSupportedOperations_1_1_cb _hidl_cb) override {
565         return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
566     }
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)567     Return<V1_0::ErrorStatus> prepareModel_1_1(
568             const V1_1::Model& model, ExecutionPreference preference,
569             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
570         return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
571     }
getStatus()572     Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)573     Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
574         return mLatestDriver->getCapabilities(_hidl_cb);
575     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)576     Return<void> getSupportedOperations(const V1_0::Model& model,
577                                         getSupportedOperations_cb _hidl_cb) override {
578         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
579     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)580     Return<V1_0::ErrorStatus> prepareModel(
581             const V1_0::Model& model,
582             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
583         return mLatestDriver->prepareModel(model, actualCallback);
584     }
585 
586    private:
587     const sp<V1_3::IDevice> mLatestDriver;
588 };
589 
590 // Like PartitioningDriver, but implementing 1.0
591 class PartitioningDriverV1_0 : public V1_0::IDevice {
592    public:
PartitioningDriverV1_0(const char * name,const char * version,Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<OperationType> operationTypes={})593     PartitioningDriverV1_0(const char* name, const char* version, Capabilities capabilities,
594                            uint32_t operationMask,
595                            PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
596                            std::set<OperationType> operationTypes = {})
597         : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
598                                                operationTypes)) {}
getCapabilities(getCapabilities_cb _hidl_cb)599     Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
600         return mLatestDriver->getCapabilities(_hidl_cb);
601     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)602     Return<void> getSupportedOperations(const V1_0::Model& model,
603                                         getSupportedOperations_cb _hidl_cb) override {
604         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
605     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)606     Return<V1_0::ErrorStatus> prepareModel(
607             const V1_0::Model& model,
608             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
609         return mLatestDriver->prepareModel(model, actualCallback);
610     }
getStatus()611     Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
612 
613    private:
614     const sp<V1_3::IDevice> mLatestDriver;
615 };
616 
617 // This class adds some simple abstractions and utilities on top of
618 // WrapperModel.  For example, it provides methods that work in terms of
619 // operation kind (0..7); and because we care about graph topology rather than
620 // details of operand types and values, it greatly simplifies the process of
621 // creating operands.
622 class PartitioningModel : private WrapperModel {
623    public:
624     using WrapperModel::finish;
625     using WrapperModel::getHandle;
626     using WrapperModel::identifyInputsAndOutputs;
627     using WrapperModel::isValid;
628     using WrapperModel::relaxComputationFloat32toFloat16;
629 
630     enum class Dimensioned { NO, YES };
631 
632     // Create a tensor operand of the specified type, and return the
633     // corresponding operand index.
addFloatOperand(Dimensioned dimensioned=Dimensioned::YES)634     uint32_t addFloatOperand(Dimensioned dimensioned = Dimensioned::YES) {
635         return addOperand(WrapperType::TENSOR_FLOAT32, dimensioned);
636     }
addQuantOperand(Dimensioned dimensioned=Dimensioned::YES)637     uint32_t addQuantOperand(Dimensioned dimensioned = Dimensioned::YES) {
638         return addOperand(WrapperType::TENSOR_QUANT8_ASYMM, dimensioned);
639     }
addBooleanOperand(Dimensioned dimensioned=Dimensioned::YES)640     uint32_t addBooleanOperand(Dimensioned dimensioned = Dimensioned::YES) {
641         return addOperand(WrapperType::TENSOR_BOOL8, dimensioned);
642     }
643 
644     // Create an operand of the specified type, and return the corresponding
645     // operand index.
addOperand(WrapperType wrapperType,Dimensioned dimensioned=Dimensioned::YES)646     uint32_t addOperand(WrapperType wrapperType, Dimensioned dimensioned = Dimensioned::YES) {
647         auto dimensions = [dimensioned]() -> std::vector<uint32_t> {
648             if (dimensioned == Dimensioned::YES) {
649                 return {1};
650             } else {
651                 return {};
652             }
653         };
654 
655         switch (static_cast<int>(wrapperType)) {
656             case ANEURALNETWORKS_BOOL:
657             case ANEURALNETWORKS_FLOAT16:
658             case ANEURALNETWORKS_FLOAT32:
659             case ANEURALNETWORKS_INT32:
660             case ANEURALNETWORKS_UINT32:
661             case ANEURALNETWORKS_MODEL:
662             case ANEURALNETWORKS_OEM_SCALAR: {
663                 return addOperand(WrapperOperandType{wrapperType, {}});
664             }
665 
666             case ANEURALNETWORKS_TENSOR_BOOL8:
667             case ANEURALNETWORKS_TENSOR_FLOAT16:
668             case ANEURALNETWORKS_TENSOR_FLOAT32:
669             case ANEURALNETWORKS_TENSOR_OEM_BYTE: {
670                 return addOperand(WrapperOperandType{wrapperType, dimensions()});
671             }
672 
673             case ANEURALNETWORKS_TENSOR_INT32:
674             case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
675             case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED:
676             case ANEURALNETWORKS_TENSOR_QUANT8_SYMM:
677             case ANEURALNETWORKS_TENSOR_QUANT16_ASYMM:
678             case ANEURALNETWORKS_TENSOR_QUANT16_SYMM: {
679                 return addOperand(WrapperOperandType{wrapperType, dimensions(), 1.0f});
680             }
681 
682             case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL: {
683                 return addOperand(WrapperOperandType{wrapperType, dimensions(),
684                                                      WrapperSymmPerChannelQuantParams({1.0f}, 0)});
685             }
686 
687             default:
688                 ADD_FAILURE() << "Unexpected type " << static_cast<uint32_t>(wrapperType);
689                 return ~uint32_t(0);
690         }
691     }
692 
693     // Create an operand of the specified operand type, and return the
694     // corresponding operand index.
addOperand(const WrapperOperandType & wrapperOperandType)695     uint32_t addOperand(const WrapperOperandType& wrapperOperandType) {
696         mWrapperOperandType.push_back(wrapperOperandType);
697         return WrapperModel::addOperand(&wrapperOperandType);
698     }
699 
700     // Create an operation with any number of inputs and one output, specifying
701     // the operation type (e.g., ANEURALNETWORKS_ADD), the input operand
702     // indexes, and the output type (e.g., WrapperType::TENSOR_FLOAT32).
703     // Returns the output operand index.
addExplicitOperationXTo1(ANeuralNetworksOperationType operationType,const std::vector<uint32_t> & inputs,WrapperType outputType,Dimensioned dimensionedOutput=Dimensioned::YES)704     uint32_t addExplicitOperationXTo1(ANeuralNetworksOperationType operationType,
705                                       const std::vector<uint32_t>& inputs, WrapperType outputType,
706                                       Dimensioned dimensionedOutput = Dimensioned::YES) {
707         uint32_t output = addOperand(outputType, dimensionedOutput);
708         addOperation(operationType, inputs, {output});
709         return output;
710     }
711 
712     // Create a V1_0 operation with two inputs and one output, specifying the
713     // operation kind (where 0 is the first V1_0 operation) and the input
714     // operand indexes.
715     // Returns the output operand index.
addOperation2To1V1_0(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)716     uint32_t addOperation2To1V1_0(uint32_t operation, const uint32_t input0, const uint32_t input1,
717                                   Dimensioned dimensionedOutput = Dimensioned::YES) {
718         CHECK_LE(operation, kLastEncodingV1_0 - kFirstEncodingV1_0);
719         return addOperation2To1(operation + kFirstEncodingV1_0, input0, input1, dimensionedOutput);
720     }
721 
722     // Create a V1_1 operation with two inputs and one output, specifying the
723     // operation kind (where 0 is the first V1_1 operation) and the input
724     // operand indexes.
725     // Returns the output operand index.
addOperation2To1V1_1(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)726     uint32_t addOperation2To1V1_1(uint32_t operation, const uint32_t input0, const uint32_t input1,
727                                   Dimensioned dimensionedOutput = Dimensioned::YES) {
728         CHECK_LE(operation, kLastEncodingV1_1 - kFirstEncodingV1_1);
729         return addOperation2To1(operation + kFirstEncodingV1_1, input0, input1, dimensionedOutput);
730     }
731 
732     // Create a V1_2 operation with two inputs and one output, specifying the
733     // operation kind (where 0 is the first V1_2 operation) and the input
734     // operand indexes.
735     // Returns the output operand index.
addOperation2To1V1_2(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)736     uint32_t addOperation2To1V1_2(uint32_t operation, const uint32_t input0, const uint32_t input1,
737                                   Dimensioned dimensionedOutput = Dimensioned::YES) {
738         CHECK_LE(operation, kLastEncodingV1_2 - kFirstEncodingV1_2);
739         return addOperation2To1(operation + kFirstEncodingV1_2, input0, input1, dimensionedOutput);
740     }
741 
742     // Create a V1_3 operation with two inputs and one output, specifying the
743     // operation kind (where 0 is the first V1_3 operation) and the input
744     // operand indexes.
745     // Returns the output operand index.
addOperation1To1V1_3(uint32_t operation,const uint32_t input0,Dimensioned dimensionedOutput=Dimensioned::YES)746     uint32_t addOperation1To1V1_3(uint32_t operation, const uint32_t input0,
747                                   Dimensioned dimensionedOutput = Dimensioned::YES) {
748         CHECK_LE(operation, kLastEncodingV1_3 - kFirstEncodingV1_3);
749         return addOperation1To1(operation + kFirstEncodingV1_3, input0, dimensionedOutput);
750     }
751 
752     // Create an OEM operation with one input and one output,
753     // specifying the input operand index.  Returns the output operand
754     // index.
addOperationOEM1To1(const uint32_t input,Dimensioned dimensionedOutput=Dimensioned::YES)755     uint32_t addOperationOEM1To1(const uint32_t input,
756                                  Dimensioned dimensionedOutput = Dimensioned::YES) {
757         uint32_t output = addOperandOfSameType(input, dimensionedOutput);
758         addOperation(ANEURALNETWORKS_OEM_OPERATION, {input}, {output});
759         return output;
760     }
761 
762     // Create an IF operation with the given condition operand and two
763     // referenced models for the true and false cases.
addIfOperation(const uint32_t cond,const PartitioningModel & trueModel,const PartitioningModel & falseModel,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)764     void addIfOperation(const uint32_t cond, const PartitioningModel& trueModel,
765                         const PartitioningModel& falseModel, const std::vector<uint32_t>& inputs,
766                         const std::vector<uint32_t>& outputs) {
767         const uint32_t opndTrue = addRefModelOperand(trueModel);
768         const uint32_t opndFalse = addRefModelOperand(falseModel);
769         std::vector<uint32_t> ifInputs = {cond, opndTrue, opndFalse};
770         ifInputs.insert(ifInputs.end(), inputs.begin(), inputs.end());
771         addOperation(ANEURALNETWORKS_IF, ifInputs, outputs);
772     }
773 
774     // Create a WHILE operation with the given condition and body referenced models.
addWhileOperation(const PartitioningModel & condModel,const PartitioningModel & bodyModel,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)775     void addWhileOperation(const PartitioningModel& condModel, const PartitioningModel& bodyModel,
776                            const std::vector<uint32_t>& inputs,
777                            const std::vector<uint32_t>& outputs) {
778         const uint32_t condOperand = addRefModelOperand(condModel);
779         const uint32_t bodyOperand = addRefModelOperand(bodyModel);
780         std::vector<uint32_t> whileInputs = {condOperand, bodyOperand};
781         whileInputs.insert(whileInputs.end(), inputs.begin(), inputs.end());
782         addOperation(ANEURALNETWORKS_WHILE, whileInputs, outputs);
783     }
784 
785     // Run the partitioning algorithm to create an ExecutionPlan.
partitionTheWork(const std::vector<std::shared_ptr<Device>> & devices,ExecutePreference preference,ExecutePriority priority,const std::optional<Deadline> & deadline,ExecutionPlan * plan)786     int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
787                          ExecutePreference preference, ExecutePriority priority,
788                          const std::optional<Deadline>& deadline, ExecutionPlan* plan) {
789         return reinterpret_cast<ModelBuilder*>(getHandle())
790                 ->partitionTheWork(devices, static_cast<uint32_t>(preference),
791                                    static_cast<int32_t>(priority), deadline, plan);
792     }
793 
794 #ifdef VERBOSE
795     // This is a debugging utility function.
dump(const char * name) const796     void dump(const char* name) const {
797         const ModelBuilder* mb = reinterpret_cast<const ModelBuilder*>(getHandle());
798         ::dump(name, mb);
799     }
800 #endif
801 
802    private:
803     // Create an operation with two inputs and one output, specifying
804     // the operation kind and the input operand indexes.
805     // Returns the output operand index.
addOperation2To1(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)806     uint32_t addOperation2To1(uint32_t operation, const uint32_t input0, const uint32_t input1,
807                               Dimensioned dimensionedOutput = Dimensioned::YES) {
808         auto it = firstEncodingToOperation.lower_bound(operation);
809         CHECK(it != firstEncodingToOperation.end());
810         ANeuralNetworksOperationType type = it->second.first;
811         if (it->second.second) {
812             int32_t fuseCode = operation - it->first;
813             uint32_t input2 = addIntOperand(fuseCode);
814             uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
815             addOperation(type, {input0, input1, input2}, {output});
816             return output;
817         } else {
818             uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
819             addOperation(type, {input0, input1}, {output});
820             return output;
821         }
822     }
823 
824     // Create an operation with one inputs and one output, specifying
825     // the operation kind and the input operand indexes.
826     // Returns the output operand index.
addOperation1To1(uint32_t operation,const uint32_t input0,Dimensioned dimensionedOutput=Dimensioned::YES)827     uint32_t addOperation1To1(uint32_t operation, const uint32_t input0,
828                               Dimensioned dimensionedOutput = Dimensioned::YES) {
829         auto it = firstEncodingToOperation.lower_bound(operation);
830         CHECK(it != firstEncodingToOperation.end());
831         ANeuralNetworksOperationType type = it->second.first;
832 
833         uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
834         addOperation(type, {input0}, {output});
835         return output;
836     }
837 
838     // Create a scalar integer operand of the specified value, and
839     // return the corresponding operand index.
addIntOperand(int32_t value)840     uint32_t addIntOperand(int32_t value) {
841         uint32_t operand = addOperand(WrapperType::INT32);
842         setOperandValue(operand, &value, sizeof(value));
843         return operand;
844     }
845 
846     // Create an operand from a model for control flow graphs.
addRefModelOperand(const PartitioningModel & model)847     uint32_t addRefModelOperand(const PartitioningModel& model) {
848         const uint32_t index = addOperand(WrapperType::MODEL);
849         WrapperModel::setOperandValueFromModel(index, &model);
850         return index;
851     }
852 
853     // Create an operand of the same type as the specified operand,
854     // and return the operand index of the new operand.
addOperandOfSameType(uint32_t operand,Dimensioned dimensioned=Dimensioned::YES)855     uint32_t addOperandOfSameType(uint32_t operand, Dimensioned dimensioned = Dimensioned::YES) {
856         WrapperOperandType type = mWrapperOperandType.at(operand);
857         for (auto& dimension : type.dimensions) {
858             dimension = (dimensioned == Dimensioned::YES);
859         }
860         mWrapperOperandType.push_back(type);
861         return WrapperModel::addOperand(&type);
862     }
863 
864     // operand index to operand type
865     std::vector<WrapperOperandType> mWrapperOperandType;
866 };
867 
868 // This class adds some utilities on top of WrapperCompilation.
869 class PartitioningCompilation : public WrapperCompilation {
870    public:
PartitioningCompilation(const PartitioningModel * model,const std::vector<std::shared_ptr<Device>> & devices)871     PartitioningCompilation(const PartitioningModel* model,
872                             const std::vector<std::shared_ptr<Device>>& devices) {
873         ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
874         CompilationBuilder* c = nullptr;
875         int result = m->createCompilation(&c, devices);
876         EXPECT_EQ(result, 0);
877         mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
878     }
879 
setPartitioning(uint32_t partitioning)880     Result setPartitioning(uint32_t partitioning) {
881         return static_cast<Result>(builder()->setPartitioning(partitioning));
882     }
883 
884     using WrapperCompilation::finish;
885 
getExecutionPlan() const886     const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); }
887 
888    private:
builder()889     CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); }
890 
builder() const891     const CompilationBuilder* builder() const {
892         return reinterpret_cast<const CompilationBuilder*>(getHandle());
893     }
894 };
895 
896 #ifdef VERBOSE
897 #define RETURN_TRUE()                                                 \
898     {                                                                 \
899         std::cerr << "returning true from " << __LINE__ << std::endl; \
900         return true;                                                  \
901     }
902 #else
903 #define RETURN_TRUE() \
904     { return true; }
905 #endif
906 #ifdef VERBOSE
907 #define RETURN_FALSE(MESSAGE)                                                  \
908     {                                                                          \
909         std::cerr << "returning false from " << __LINE__ MESSAGE << std::endl; \
910         return false;                                                          \
911     }
912 #else
913 #define RETURN_FALSE(MESSAGE) \
914     { return false; }
915 #endif
916 
917 class PartitioningTest : public ::testing::Test {
918    protected:
919     using RemapVectorType = ExecutionStep::RemapVectorType;
920     using StepModelOutputSetType = ExecutionStep::StepModelOutputSetType;
921 
SetUp()922     virtual void SetUp() {}
923 
924     // From a vector of DeviceSpecification, create a vector of
925     // Devices.
926     struct DeviceSpecification {
DeviceSpecification__anon83dd480d0111::PartitioningTest::DeviceSpecification927         DeviceSpecification(const std::string& name, const Capabilities& capabilities,
928                             uint32_t operationMask,
929                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo)
930             : mName(name),
931               mVersionString(kVersionString),
932               mCapabilities(capabilities),
933               mOperationMask(operationMask),
934               mOEM(oem) {}
DeviceSpecification__anon83dd480d0111::PartitioningTest::DeviceSpecification935         DeviceSpecification(const std::string& name, float perf, uint32_t operationMask,
936                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
937                             std::set<OperationType> operationTypes = {})
938             : DeviceSpecification(name, perf, perf, operationMask, oem, operationTypes) {}
DeviceSpecification__anon83dd480d0111::PartitioningTest::DeviceSpecification939         DeviceSpecification(const std::string& name, float perf, float perfRelaxed,
940                             uint32_t operationMask,
941                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
942                             std::set<OperationType> operationTypes = {})
943             : DeviceSpecification(name, kVersionString, perf, perfRelaxed, operationMask, oem,
944                                   operationTypes) {}
DeviceSpecification__anon83dd480d0111::PartitioningTest::DeviceSpecification945         DeviceSpecification(const std::string& name, const std::string& version, float perf,
946                             uint32_t operationMask,
947                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
948                             std::set<OperationType> operationTypes = {})
949             : DeviceSpecification(name, version, perf, perf, operationMask, oem, operationTypes) {}
DeviceSpecification__anon83dd480d0111::PartitioningTest::DeviceSpecification950         DeviceSpecification(const std::string& name, const std::string& version, float perf,
951                             float perfRelaxed, uint32_t operationMask,
952                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
953                             std::set<OperationType> operationTypes = {})
954             : mName(name),
955               mVersionString(version),
956               mOperationMask(operationMask),
957               mOEM(oem),
958               mOperationTypes(std::move(operationTypes)) {
959             PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
960             PerformanceInfo perfRelaxedInfo = {.execTime = perfRelaxed, .powerUsage = perfRelaxed};
961             mCapabilities = {
962                     .relaxedFloat32toFloat16PerformanceScalar = perfRelaxedInfo,
963                     .relaxedFloat32toFloat16PerformanceTensor = perfRelaxedInfo,
964                     .operandPerformance =
965                             ::android::nn::nonExtensionOperandPerformance<HalVersion::V1_3>(
966                                     perfInfo),
967                     .ifPerformance = perfInfo,
968                     .whilePerformance = perfInfo};
969         }
DeviceSpecification__anon83dd480d0111::PartitioningTest::DeviceSpecification970         DeviceSpecification(const std::string& name, float perf, HalVersion halVersion,
971                             uint32_t operationMaskV1_0, uint32_t operationMaskV1_1 = 0,
972                             uint32_t operationMaskV1_2 = 0, uint32_t operationMaskV1_3 = 0)
973             : DeviceSpecification(
974                       name, perf, perf,
975                       makeOperationMask(halVersion, operationMaskV1_0, operationMaskV1_1,
976                                         operationMaskV1_2, operationMaskV1_3)) {
977             mHalVersion = halVersion;
978         }
979 
980         std::string mName;
981         std::string mVersionString;
982         Capabilities mCapabilities;
983         HalVersion mHalVersion = HalVersion::LATEST;
984         uint32_t mOperationMask;
985         PartitioningDriver::OEM mOEM = PartitioningDriver::OEMNo;
986         std::set<OperationType> mOperationTypes;
987 
988         static constexpr char kVersionString[] = "JUST_AN_EXAMPLE";
989 
990        private:
991         // This function takes three operation masks aligned at the low-order
992         // bit -- one mask each for V1_0, V1_1, and V1_2 -- and produces a single
993         // composite operation mask, formed by shifting each of the input
994         // operation masks appropriately and ORing the results together.
995         //
996         // For convenience, any bits of an input mask that are too high order
997         // for that mask are discarded -- this allows ~0 to be a legal input
998         // mask.
999         //
1000         // For the sake of example, assume that each low order mask is 4 bits
1001         // wide, and take some artistic license to write literals in binary.
1002         // Then:
1003         //
1004         //     assert(makeOperationMask(HalVersion::V1_2, 0b0110, 0b1001, 0b0101) ==
1005         //            0b 0101 1001 0110);
1006         //
1007         // This is used by a DeviceSpecification constructor to build a mask of
1008         // operations to be supported by the device.
makeOperationMask__anon83dd480d0111::PartitioningTest::DeviceSpecification1009         static uint32_t makeOperationMask(HalVersion halVersion, uint32_t operationMaskV1_0,
1010                                           uint32_t operationMaskV1_1, uint32_t operationMaskV1_2,
1011                                           uint32_t operationMaskV1_3) {
1012             if (halVersion < HalVersion::V1_3) {
1013                 CHECK(!operationMaskV1_3);
1014             }
1015             if (halVersion < HalVersion::V1_2) {
1016                 CHECK(!operationMaskV1_2);
1017             }
1018             if (halVersion < HalVersion::V1_1) {
1019                 CHECK(!operationMaskV1_1);
1020             }
1021             auto maskOfWidth = [](uint32_t width) -> uint32_t { return (1U << width) - 1; };
1022             static const uint32_t kOperationMaskV1_0 =
1023                     maskOfWidth(kLastEncodingV1_0 - kFirstEncodingV1_0 + 1);
1024             static const uint32_t kOperationMaskV1_1 =
1025                     maskOfWidth(kLastEncodingV1_1 - kFirstEncodingV1_1 + 1);
1026             static const uint32_t kOperationMaskV1_2 =
1027                     maskOfWidth(kLastEncodingV1_2 - kFirstEncodingV1_2 + 1);
1028             static const uint32_t kOperationMaskV1_3 =
1029                     maskOfWidth(kLastEncodingV1_3 - kFirstEncodingV1_3 + 1);
1030             return ((operationMaskV1_0 & kOperationMaskV1_0) << kFirstEncodingV1_0) |
1031                    ((operationMaskV1_1 & kOperationMaskV1_1) << kFirstEncodingV1_1) |
1032                    ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2) |
1033                    ((operationMaskV1_3 & kOperationMaskV1_3) << kFirstEncodingV1_3);
1034         }
1035     };
makeDevices(std::vector<DeviceSpecification> specifications)1036     static std::vector<std::shared_ptr<Device>> makeDevices(
1037             std::vector<DeviceSpecification> specifications) {
1038         std::vector<std::shared_ptr<Device>> devices;
1039         for (const auto& specification : specifications) {
1040             V1_0::IDevice* halDriver = nullptr;
1041             switch (specification.mHalVersion) {
1042                 case HalVersion::V1_3:
1043                     halDriver = new PartitioningDriver(
1044                             specification.mName.c_str(), specification.mVersionString.c_str(),
1045                             specification.mCapabilities, specification.mOperationMask,
1046                             specification.mOEM, specification.mOperationTypes);
1047                     break;
1048                 case HalVersion::V1_2:
1049                     halDriver = new PartitioningDriverV1_2(
1050                             specification.mName.c_str(), specification.mVersionString.c_str(),
1051                             specification.mCapabilities, specification.mOperationMask,
1052                             specification.mOEM, specification.mOperationTypes);
1053                     break;
1054                 case HalVersion::V1_1:
1055                     halDriver = new PartitioningDriverV1_1(
1056                             specification.mName.c_str(), specification.mVersionString.c_str(),
1057                             specification.mCapabilities, specification.mOperationMask,
1058                             specification.mOEM, specification.mOperationTypes);
1059                     break;
1060                 case HalVersion::V1_0:
1061                     halDriver = new PartitioningDriverV1_0(
1062                             specification.mName.c_str(), specification.mVersionString.c_str(),
1063                             specification.mCapabilities, specification.mOperationMask,
1064                             specification.mOEM, specification.mOperationTypes);
1065                     break;
1066                 default:
1067                     ADD_FAILURE() << "Unexpected";
1068             }
1069             auto device = DeviceManager::forTest_makeDriverDevice(specification.mName, halDriver);
1070             devices.push_back(device);
1071         }
1072         devices.push_back(DeviceManager::getCpuDevice());
1073         return devices;
1074     }
1075 
1076     /*-- Graph comparision ----------------------------------------------------------------*/
1077 
1078     // An operand with certain values for its lifetime does not have a
1079     // defining operation in the graph.  For the purposes of the graph
1080     // comparison algorithm, we encode the "defining operation" index of
1081     // such an operand as follows:
1082     // - NO_VALUE       kPseudoDefiningOperationNoValue
1083     // - SUBGRAPH_INPUT kPseudoDefiningOperationModelInput0 + (position in list of inputs)
1084     // - CONSTANT_COPY  kPseudoDefiningOperationConstantCopy0 + (constant value)
1085     //                    Note: For the graphs we build in this test, we
1086     //                          only expect to see 4-byte constants within
1087     //                          a very restricted range, so we only make
1088     //                          room for such constants in our encoding
1089     //                          space.
1090     // We do not expect to see CONSTANT_REFERENCE, and so we do not handle
1091     // it.
1092     //
1093     // The encoding is intended to be relatively human readable; it is not
1094     // designed to represent some optimal balance of ranges for the items
1095     // within its scope (actual operations, inputs, constants).
1096 
1097     enum PseudoDefiningOperationEncodings : uint32_t {
1098         kPseudoDefiningOperationModelInput0 = 0x80000000U,
1099         kPseudoDefiningOperationConstantCopy0 = 0x90000000U,
1100         kPseudoDefiningOperationNoValue = 0xeeeeeeeeU,
1101 
1102         // lowest value for special encoding
1103         kPseudoDefiningOperationBase = 0x80000000U,
1104 
1105         // range of encoded input or constant
1106         kPseudoDefiningOperationRange = 0x10000000U,
1107     };
1108 
1109     // Build a map from operand to defining operation.
1110     // TODO: Replace map with vector?
buildDefinitionMap(const ModelBuilder * model,std::map<uint32_t,uint32_t> * defMap)1111     void buildDefinitionMap(const ModelBuilder* model, std::map<uint32_t, uint32_t>* defMap) {
1112         // actual definitions
1113         ASSERT_LT(model->operationCount(), kPseudoDefiningOperationBase);
1114         for (uint32_t i = 0, e = model->operationCount(); i < e; i++) {
1115             const Operation& operation = model->getOperation(i);
1116             for (uint32_t output : operation.outputs) {
1117                 (*defMap)[output] = i;
1118             }
1119         }
1120         // inputs
1121         ASSERT_LT(model->inputCount(), kPseudoDefiningOperationRange);
1122         for (uint32_t i = 0, e = model->inputCount(); i < e; i++) {
1123             (*defMap)[model->getInputOperandIndex(i)] = kPseudoDefiningOperationModelInput0 + i;
1124         }
1125         // look for NO_VALUE and CONSTANT_COPY
1126         for (uint32_t i = 0, e = model->operandCount(); i < e; i++) {
1127             const Operand& operand = model->getOperand(i);
1128             switch (operand.lifetime) {
1129                 case OperandLifeTime::NO_VALUE:
1130                     (*defMap)[i] = kPseudoDefiningOperationNoValue;
1131                     break;
1132                 case OperandLifeTime::CONSTANT_COPY: {
1133                     ASSERT_EQ(operand.location.length, sizeof(uint32_t));
1134                     uint32_t value;
1135                     memcpy(&value, model->getPointerToOperandValue(operand.location.offset),
1136                            sizeof(uint32_t));
1137                     ASSERT_LT(value, kPseudoDefiningOperationNoValue);
1138                     (*defMap)[i] = kPseudoDefiningOperationConstantCopy0 + value;
1139                     break;
1140                 }
1141                 case OperandLifeTime::TEMPORARY_VARIABLE:
1142                 case OperandLifeTime::SUBGRAPH_INPUT:
1143                 case OperandLifeTime::SUBGRAPH_OUTPUT:
1144                     // already handled
1145                     break;
1146                 default:
1147                     FAIL();
1148                     break;
1149             }
1150         }
1151         // sanity check
1152         ASSERT_EQ(model->operandCount(), defMap->size());
1153     }
1154 
1155 #ifdef VERBOSE
dump(const char * name,const std::map<uint32_t,uint32_t> * aMap)1156     void dump(const char* name, const std::map<uint32_t, uint32_t>* aMap) {
1157         auto writeNum = [](uint32_t num) {
1158             if (num >= kPseudoDefiningOperationBase) {
1159                 std::cout << "0x" << std::hex << num << std::dec;
1160             } else {
1161                 std::cout << num;
1162             }
1163         };
1164 
1165         std::cout << name << ": { ";
1166         bool gotOne = false;
1167         for (const auto& entry : *aMap) {
1168             if (gotOne) {
1169                 std::cout << ", ";
1170             } else {
1171                 gotOne = true;
1172             }
1173             std::cout << "(";
1174             writeNum(entry.first);
1175             std::cout << ", ";
1176             writeNum(entry.second);
1177             std::cout << ")";
1178         }
1179         std::cout << " }" << std::endl;
1180     }
1181 #endif
1182 
compare(const Operand & operandA,const Operand & operandB)1183     bool compare(const Operand& operandA, const Operand& operandB) {
1184         if (operandA.type != operandB.type || operandA.dimensions != operandB.dimensions ||
1185             operandA.numberOfConsumers != operandB.numberOfConsumers ||
1186             operandA.scale != operandB.scale || operandA.zeroPoint != operandB.zeroPoint) {
1187             return false;
1188         }
1189         return true;
1190     }
1191 
1192     // Compare two graphs.  We ignore operand and operation indexes (i.e.,
1193     // two nodes can be the same even if they are numbered differently)
1194     // but we also ignore semantics (e.g., even if an operation kind is
1195     // such that the operand is commutative, we still pay attention to the
1196     // order of its input operands).
1197     //
1198     // The comparison algorithm works by walking modelA from outputs
1199     // towards inputs, along the edge from each operand to its
1200     // defining operation, and then along the edges to the operation's
1201     // input operands.  At each step along the way, we try to match up
1202     // operands and operations from modelA with equivalent operands
1203     // and operations from modelB.
1204     //
1205     // We start by assuming that modelA's outputs and modelB's outputs
1206     // match positionally (e.g., modelA's first output operand is
1207     // equivalent to modelB's first output operand).  Once we've
1208     // discovered two equivalent operands (such as those outputs), we
1209     // place them in a work queue.  We repeatedly pull operands off
1210     // the queue and compare their defining operations and those
1211     // operations' input operands, to discover more pairs of
1212     // equivalent operands.  If we ever find operations that do not
1213     // match (e.g., because operation kind differs), or operands that
1214     // do not match (e.g., because operand type differs); or if we
1215     // ever find a conflict (we've already decided that operand A's
1216     // equivalent operand is B0, but it looks like we need its
1217     // equivalent operand to be B1); then the graphs compare unequal.
1218     // Otherwise, we'll eventually exhaust the work queue, and
1219     // conclude that the graphs compare equal.
1220     //
1221     // As a side effect of the comparison, we produce a map
1222     // *inputsAndOutputsBToA that maps from each of the model input and output
1223     // operand numbers of modelB to the corresponding operand numbers of modelA.
1224     // If the comparison returns false, the contents of the map are undefined.
compare(const ModelBuilder * modelA,const ModelBuilder * modelB,std::map<uint32_t,uint32_t> * inputsAndOutputsBToA)1225     bool compare(const ModelBuilder* modelA, const ModelBuilder* modelB,
1226                  std::map<uint32_t, uint32_t>* inputsAndOutputsBToA) {
1227         CHECK(inputsAndOutputsBToA != nullptr);
1228         EXPECT_TRUE(inputsAndOutputsBToA->empty());
1229 
1230 #ifdef VERBOSE
1231         ::dump("compare(A)", modelA);
1232         ::dump("compare(B)", modelB);
1233 #endif
1234 
1235         if (modelA->operandCount() != modelB->operandCount() ||
1236             modelA->operationCount() != modelB->operationCount() ||
1237             modelA->inputCount() != modelB->inputCount() ||
1238             modelA->outputCount() != modelB->outputCount()) {
1239             RETURN_FALSE();
1240         }
1241 
1242         // Maps from operand index to index of defining operation.
1243         std::map<uint32_t, uint32_t> defsA, defsB;
1244         buildDefinitionMap(modelA, &defsA);
1245         buildDefinitionMap(modelB, &defsB);
1246         if (HasFatalFailure()) return false;
1247 
1248         // Maps from operand index in modelA to equivalent operand index
1249         // in modelB; and from operation index in modelA to equivalent
1250         // operation index in modelB.
1251         std::map<uint32_t, uint32_t> equivalentOperandsAToB;
1252         std::map<uint32_t, uint32_t> equivalentOperationsAToB;
1253 
1254         // Queue of operand indexes from modelA, each of whose defining
1255         // operations are to be checked for equivalence with modelB.
1256         std::queue<uint32_t> workQueueOperandsA;
1257 
1258         // Seed operand equivalence map and work queue from model outputs.
1259         for (uint32_t i = 0, e = modelA->outputCount(); i < e; i++) {
1260             uint32_t outputA = modelA->getOutputOperandIndex(i);
1261             uint32_t outputB = modelB->getOutputOperandIndex(i);
1262             if (!compare(modelA->getOperand(outputA), modelB->getOperand(outputB))) {
1263                 RETURN_FALSE();
1264             }
1265             equivalentOperandsAToB[outputA] = outputB;
1266             workQueueOperandsA.push(outputA);
1267         }
1268 
1269 #ifdef VERBOSE
1270         dump("defsA", &defsA);
1271         dump("defsB", &defsB);
1272 #endif
1273 
1274         // Process the queue.
1275         uint32_t pseudoDefinitionCount = 0;
1276         while (!workQueueOperandsA.empty()) {
1277 #ifdef VERBOSE
1278             dump("equivalentOperandsAToB", &equivalentOperandsAToB);
1279             dump("equivalentOperationsAToB", &equivalentOperationsAToB);
1280 #endif
1281             uint32_t operandIndexA = workQueueOperandsA.front();
1282 #ifdef VERBOSE
1283             std::cout << "operandIndexA: " << operandIndexA << std::endl;
1284 #endif
1285             workQueueOperandsA.pop();
1286             uint32_t operandIndexB = equivalentOperandsAToB.at(operandIndexA);
1287 
1288             uint32_t operationIndexA = defsA.at(operandIndexA);
1289             uint32_t operationIndexB = defsB.at(operandIndexB);
1290             auto it = equivalentOperationsAToB.find(operationIndexA);
1291             if (it != equivalentOperationsAToB.end()) {
1292                 if (it->second != operationIndexB) {
1293                     RETURN_FALSE();
1294                 }
1295                 continue;
1296             }
1297 
1298             // We haven't identified an equivalent operation for
1299             // operationIndexA.
1300 
1301             if ((operationIndexA >= kPseudoDefiningOperationBase) !=
1302                 (operationIndexB >= kPseudoDefiningOperationBase)) {
1303                 RETURN_FALSE();
1304             }
1305             // Either both operands have pseudo-definitions, or neither
1306             // does.
1307             if (operationIndexA >= kPseudoDefiningOperationBase) {
1308                 // Both operands have pseudo-definitions.
1309                 if (operationIndexA != operationIndexB) {
1310                     RETURN_FALSE();
1311                 }
1312                 equivalentOperationsAToB[operationIndexA] = operationIndexB;
1313                 ++pseudoDefinitionCount;
1314                 continue;
1315             }
1316 
1317             // If we get here, neither operation A nor operation B is a
1318             // pseudo-definition.
1319 
1320             const Operation& operationA = modelA->getOperation(operationIndexA);
1321             const Operation& operationB = modelB->getOperation(operationIndexB);
1322             if (operationA.type != operationB.type ||
1323                 operationA.inputs.size() != operationB.inputs.size() ||
1324                 operationA.outputs.size() != operationB.outputs.size()) {
1325                 RETURN_FALSE();
1326             }
1327             equivalentOperationsAToB[operationIndexA] = operationIndexB;
1328             for (uint32_t i = 0, e = operationA.inputs.size(); i < e; i++) {
1329                 uint32_t inputA = operationA.inputs[i];
1330                 uint32_t inputB = operationB.inputs[i];
1331                 auto it = equivalentOperandsAToB.find(inputA);
1332                 if (it != equivalentOperandsAToB.end()) {
1333                     if (it->second != inputB) {
1334                         RETURN_FALSE();
1335                     }
1336                     continue;
1337                 }
1338                 // We haven't identified an equivalent operand for inputA.
1339                 if (!compare(modelA->getOperand(inputA), modelB->getOperand(inputB))) {
1340                     RETURN_FALSE();
1341                 }
1342                 equivalentOperandsAToB[inputA] = inputB;
1343                 workQueueOperandsA.push(inputA);
1344             }
1345         }
1346 
1347         // Sanity check
1348         if (modelA->operandCount() != defsA.size() || modelA->operandCount() != defsB.size() ||
1349             modelA->operandCount() != equivalentOperandsAToB.size() ||
1350             modelA->operationCount() + pseudoDefinitionCount != equivalentOperationsAToB.size()) {
1351             RETURN_FALSE();
1352         }
1353 
1354         // Build *inputsAndOutputsBToA
1355         for (uint32_t aInputIndex : modelA->getInputOperandIndexes()) {
1356             (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aInputIndex)] = aInputIndex;
1357         }
1358         for (uint32_t aOutputIndex : modelA->getOutputOperandIndexes()) {
1359             (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aOutputIndex)] = aOutputIndex;
1360         }
1361 
1362         RETURN_TRUE();
1363     }
1364 
1365     /*-------------------------------------------------------------------------------------*/
1366 
1367     // As a side effect of the comparison, we produce a map
1368     // *inputsAndOutputsModelToStep that maps from each of the model input and
1369     // output operand numbers of "model" to the corresponding operand numbers of
1370     // the step model from "step".  If the comparison returns false, the contents
1371     // of the map are undefined.
compare(const ExecutionStep * step,const PartitioningModel * model,std::shared_ptr<Device> device,std::map<uint32_t,uint32_t> * inputsAndOutputsModelToStep)1372     bool compare(const ExecutionStep* step, const PartitioningModel* model,
1373                  std::shared_ptr<Device> device,
1374                  std::map<uint32_t, uint32_t>* inputsAndOutputsModelToStep) {
1375         return (step->getDevice() == device) &&
1376                compare(step->getStepModel(),
1377                        reinterpret_cast<const ModelBuilder*>(model->getHandle()),
1378                        inputsAndOutputsModelToStep);
1379     }
1380 
compare(const std::shared_ptr<LogicalStep> logicalStep,const PartitioningModel * model,std::shared_ptr<Device> device,const RemapVectorType & modelInputs,const RemapVectorType & modelOutputs,const RemapVectorType & tempsAsStepModelInputs,const StepModelOutputSetType & tempsAsStepModelOutputs,const RemapVectorType & outputsAsStepModelInputs)1381     void compare(const std::shared_ptr<LogicalStep> logicalStep, const PartitioningModel* model,
1382                  std::shared_ptr<Device> device, const RemapVectorType& modelInputs,
1383                  const RemapVectorType& modelOutputs, const RemapVectorType& tempsAsStepModelInputs,
1384                  const StepModelOutputSetType& tempsAsStepModelOutputs,
1385                  const RemapVectorType& outputsAsStepModelInputs) {
1386         ASSERT_TRUE(logicalStep->isExecution());
1387         const ExecutionStep* step = logicalStep->executionStep();
1388         std::map<uint32_t, uint32_t> inputsAndOutputsModelToStep;
1389         ASSERT_NO_FATAL_FAILURE(
1390                 ASSERT_TRUE(compare(step, model, device, &inputsAndOutputsModelToStep)));
1391         ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelInputs(),
1392                                         modelInputs));
1393         ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelOutputs(),
1394                                         modelOutputs));
1395         ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep,
1396                                         step->getTempsAsStepModelInputs(), tempsAsStepModelInputs));
1397         ASSERT_TRUE(compareStepModelOutputSets(inputsAndOutputsModelToStep,
1398                                                step->getTempsAsStepModelOutputs(),
1399                                                tempsAsStepModelOutputs));
1400         ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep,
1401                                         step->getOutputsAsStepModelInputs(),
1402                                         outputsAsStepModelInputs));
1403     }
1404 
1405    private:
compareRemapVectors(const std::map<uint32_t,uint32_t> & inputsAndOutputsModelToStep,const RemapVectorType & step,RemapVectorType model)1406     static bool compareRemapVectors(const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep,
1407                                     const RemapVectorType& step, RemapVectorType model) {
1408         std::transform(model.begin(), model.end(), model.begin(),
1409                        [&inputsAndOutputsModelToStep](const RemapVectorType::value_type& val) {
1410                            return std::make_pair(val.first,
1411                                                  inputsAndOutputsModelToStep.at(val.second));
1412                        });
1413         return step == model;
1414     }
1415 
compareStepModelOutputSets(const std::map<uint32_t,uint32_t> & inputsAndOutputsModelToStep,const StepModelOutputSetType & step,const StepModelOutputSetType & model)1416     static bool compareStepModelOutputSets(
1417             const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep,
1418             const StepModelOutputSetType& step, const StepModelOutputSetType& model) {
1419         StepModelOutputSetType modelTransformed;
1420         std::transform(
1421                 model.begin(), model.end(), std::inserter(modelTransformed, modelTransformed.end()),
1422                 [&inputsAndOutputsModelToStep](const StepModelOutputSetType::value_type& val) {
1423                     return std::make_pair(val.first, inputsAndOutputsModelToStep.at(val.second));
1424                 });
1425         return step == modelTransformed;
1426     }
1427 };
1428 
TEST_F(PartitioningTest,SimpleModel)1429 TEST_F(PartitioningTest, SimpleModel) {
1430     PartitioningModel model;
1431     uint32_t opnd0 = model.addFloatOperand();
1432     uint32_t opnd1 = model.addFloatOperand();
1433     uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1434     uint32_t opnd3 = model.addFloatOperand();
1435     uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3);
1436     model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
1437     model.finish();
1438     ASSERT_TRUE(model.isValid());
1439 
1440     // Simple partition (two devices are each capable of everything, one is the best).
1441     // No need to compare the original model to the model from the plan -- we
1442     // didn't actually do any partitioning.
1443     const auto devicesA = makeDevices({{"bad", 0.9, ~0U}, {"good", 0.5, ~0U}});
1444     ExecutionPlan planA;
1445     ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
1446                                      ExecutePriority::DEFAULT, {}, &planA),
1447               ANEURALNETWORKS_NO_ERROR);
1448     ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1449     ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
1450     ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "good");
1451 
1452     // Simple partition (two devices are each capable of everything, none better than CPU).
1453     // No need to compare the original model to the model from the plan -- we
1454     // didn't actually do any partitioning.
1455     const auto devicesC = makeDevices({{"bad", 1.1, ~0U}, {"bad2", 1.0, ~0U}});
1456     ExecutionPlan planC;
1457     ASSERT_EQ(model.partitionTheWork(devicesC, ExecutePreference::PREFER_LOW_POWER,
1458                                      ExecutePriority::DEFAULT, {}, &planC),
1459               ANEURALNETWORKS_NO_ERROR);
1460     ASSERT_EQ(planC.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1461     ASSERT_EQ(planC.forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
1462 
1463     // Compound partition (two devices, each is capable of one of the
1464     // two operations).  We could do more extensive checking here --
1465     // for example, verify that each step within the plan has the
1466     // correct (model and step model)x(inputs and outputs).
1467     const auto devicesB = makeDevices({{"0", 0.9, 1 << 0}, {"1", 0.5, 1 << 1}});
1468     ExecutionPlan planB;
1469     ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
1470                                      ExecutePriority::DEFAULT, {}, &planB),
1471               ANEURALNETWORKS_NO_ERROR);
1472     ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1473     const auto& stepsB = planB.forTest_compoundGetSteps();
1474     ASSERT_EQ(stepsB.size(), size_t(2));
1475     {
1476         // Build a model to compare against the step model from stepsB[0].
1477         PartitioningModel modelB0;
1478         uint32_t b0Opnd0 = modelB0.addFloatOperand();
1479         uint32_t b0Opnd1 = modelB0.addFloatOperand();
1480         uint32_t b0Opnd2 = modelB0.addOperation2To1V1_0(0, b0Opnd0, b0Opnd1);
1481         modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2});
1482         modelB0.finish();
1483         ASSERT_TRUE(modelB0.isValid());
1484 
1485         ASSERT_NO_FATAL_FAILURE(
1486                 compare(stepsB[0], &modelB0, devicesB[0],
1487                         RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}},  // modelInputs
1488                         RemapVectorType{},                                    // modelOutputs
1489                         RemapVectorType{},                         // tempsAsStepModelInputs
1490                         StepModelOutputSetType{{opnd2, b0Opnd2}},  // tempsAsStepModelOutputs
1491                         RemapVectorType{}));                       // outputsAsStepModelInputs;
1492     }
1493     {
1494         // Build a model to compare against the step model from stepsB[1].
1495         PartitioningModel modelB1;
1496         uint32_t b1Opnd2 = modelB1.addFloatOperand();
1497         uint32_t b1Opnd3 = modelB1.addFloatOperand();
1498         uint32_t b1Opnd4 = modelB1.addOperation2To1V1_0(1, b1Opnd2, b1Opnd3);
1499         // Note: In the partitioning algorithm, step model inputs follow
1500         // model inputs.  In the original model "model", opnd2 is not
1501         // an input; so in the step model "modelB1", the corresponding
1502         // input b1Opnd2 is a step model input, and must follow the
1503         // model input b1Opnd3.
1504         modelB1.identifyInputsAndOutputs({b1Opnd3, b1Opnd2}, {b1Opnd4});
1505         modelB1.finish();
1506         ASSERT_TRUE(modelB1.isValid());
1507 
1508         ASSERT_NO_FATAL_FAILURE(compare(
1509                 stepsB[1], &modelB1, devicesB[1], RemapVectorType{{opnd3, b1Opnd3}},  // modelInputs
1510                 RemapVectorType{{opnd4, b1Opnd4}},  // modelOutputs
1511                 RemapVectorType{{opnd2, b1Opnd2}},  // tempsAsStepModelInputs
1512                 StepModelOutputSetType{},           // tempsAsStepModelOutputs
1513                 RemapVectorType{}));                // outputsAsStepModelInputs
1514     }
1515 }
1516 
TEST_F(PartitioningTest,SliceModel)1517 TEST_F(PartitioningTest, SliceModel) {
1518     PartitioningModel model;
1519     uint32_t opnd0 = model.addFloatOperand();
1520     uint32_t opnd1 = model.addFloatOperand();
1521     uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1522     uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd0, opnd1);
1523     uint32_t opnd4 = model.addOperation2To1V1_1(0, opnd0, opnd1);
1524     uint32_t opnd5 = model.addOperation2To1V1_2(0, opnd2, opnd3);
1525     uint32_t opnd6 = model.addOperation1To1V1_3(0, opnd2);
1526     model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5, opnd6});
1527     model.finish();
1528     ASSERT_TRUE(model.isValid());
1529 
1530     // Simple partition (V1_0, V1_1, V1_2, V1_3 devices are available; V1_3 has best perf).
1531     // No need to compare the original model to the model from the plan -- we
1532     // didn't actually do any partitioning.
1533     const auto devicesA = makeDevices({{"V1_0", 0.8, HalVersion::V1_0, ~0U},
1534                                        {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1535                                        {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U},
1536                                        {"V1_3", 0.5, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1537     ExecutionPlan planA;
1538     ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
1539                                      ExecutePriority::DEFAULT, {}, &planA),
1540               ANEURALNETWORKS_NO_ERROR);
1541     ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1542     ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
1543     ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_3");
1544 
1545     // Compound partition (V1_0, V1_1, V1_2 devices are available, in decreasing
1546     // order of performance; model is distributed across all three devices).
1547     const auto devicesB = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
1548                                        {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1549                                        {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
1550                                        {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1551     ExecutionPlan planB;
1552     ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
1553                                      ExecutePriority::DEFAULT, {}, &planB),
1554               ANEURALNETWORKS_NO_ERROR);
1555     ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1556     const auto& stepsB = planB.forTest_compoundGetSteps();
1557     ASSERT_EQ(stepsB.size(), size_t(4));
1558     {
1559         // Build a model to compare against the step model from stepsB[0].
1560         PartitioningModel modelB0;
1561         uint32_t b0Opnd0 = modelB0.addFloatOperand();
1562         uint32_t b0Opnd1 = modelB0.addFloatOperand();
1563         uint32_t b0Opnd2 = modelB0.addOperation2To1V1_1(0, b0Opnd0, b0Opnd1);
1564         modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2});
1565         modelB0.finish();
1566         ASSERT_TRUE(modelB0.isValid());
1567 
1568         ASSERT_NO_FATAL_FAILURE(
1569                 compare(stepsB[0], &modelB0, devicesB[1],
1570                         RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}},  // modelInputs
1571                         RemapVectorType{{opnd4, b0Opnd2}},                    // modelOutputs
1572                         RemapVectorType{},         // tempsAsStepModelInputs
1573                         StepModelOutputSetType{},  // tempsAsStepModelOutputs
1574                         RemapVectorType{}));       // outputsAsStepModelInputs
1575     }
1576     {
1577         // Build a model to compare against the step model from stepsB[1].
1578         PartitioningModel modelB1;
1579         uint32_t b1Opnd0 = modelB1.addFloatOperand();
1580         uint32_t b1Opnd1 = modelB1.addFloatOperand();
1581         uint32_t b1Opnd2 = modelB1.addOperation2To1V1_0(0, b1Opnd0, b1Opnd1);
1582         uint32_t b1Opnd3 = modelB1.addOperation2To1V1_0(1, b1Opnd0, b1Opnd1);
1583         modelB1.identifyInputsAndOutputs({b1Opnd0, b1Opnd1}, {b1Opnd2, b1Opnd3});
1584         modelB1.finish();
1585         ASSERT_TRUE(modelB1.isValid());
1586 
1587         ASSERT_NO_FATAL_FAILURE(
1588                 compare(stepsB[1], &modelB1, devicesB[0],
1589                         RemapVectorType{{opnd0, b1Opnd0}, {opnd1, b1Opnd1}},  // modelInputs
1590                         RemapVectorType{{opnd2, b1Opnd2}},                    // modelOutputs
1591                         RemapVectorType{},                         // tempsAsStepModelInputs
1592                         StepModelOutputSetType{{opnd3, b1Opnd3}},  // tempsAsStepModelOutputs
1593                         RemapVectorType{}));                       // outputsAsStepModelInputs
1594     }
1595     {
1596         // Build a model to compare against the step model from stepsB[2].
1597         PartitioningModel modelB2;
1598         uint32_t b2Opnd0 = modelB2.addFloatOperand();
1599         uint32_t b2Opnd1 = modelB2.addOperation1To1V1_3(0, b2Opnd0);
1600         // Note: In the partitioning algorithm, temps that are
1601         // step model inputs precede model outputs that are step model
1602         // inputs.
1603         modelB2.identifyInputsAndOutputs({b2Opnd0}, {b2Opnd1});
1604         modelB2.finish();
1605         ASSERT_TRUE(modelB2.isValid());
1606 
1607         ASSERT_NO_FATAL_FAILURE(
1608                 compare(stepsB[2], &modelB2, devicesB[3], RemapVectorType{},  // modelInputs
1609                         RemapVectorType{{opnd6, b2Opnd1}},                    // modelOutputs
1610                         RemapVectorType{},                    // tempsAsStepModelInputs
1611                         StepModelOutputSetType{},             // tempsAsStepModelOutputs
1612                         RemapVectorType{{opnd2, b2Opnd0}}));  // outputsAsStepModelInputs
1613     }
1614     {
1615         // Build a model to compare against the step model from stepsB[3].
1616         PartitioningModel modelB3;
1617         uint32_t b3Opnd0 = modelB3.addFloatOperand();
1618         uint32_t b3Opnd1 = modelB3.addFloatOperand();
1619         uint32_t b3Opnd2 = modelB3.addOperation2To1V1_2(0, b3Opnd0, b3Opnd1);
1620         // Note: In the partitioning algorithm, temps that are
1621         // step model inputs precede model outputs that are step model
1622         // inputs.  In the original model "model", opnd3 is a temp and
1623         // opnd2 is a model output; so in the step model "modelB3", the
1624         // corresponding inputs b3Opnd1 and b3Opnd0 must appear in
1625         // that order.
1626         modelB3.identifyInputsAndOutputs({b3Opnd1, b3Opnd0}, {b3Opnd2});
1627         modelB3.finish();
1628         ASSERT_TRUE(modelB3.isValid());
1629 
1630         ASSERT_NO_FATAL_FAILURE(
1631                 compare(stepsB[3], &modelB3, devicesB[2], RemapVectorType{},  // modelInputs
1632                         RemapVectorType{{opnd5, b3Opnd2}},                    // modelOutputs
1633                         RemapVectorType{{opnd3, b3Opnd1}},    // tempsAsStepModelInputs
1634                         StepModelOutputSetType{},             // tempsAsStepModelOutputs
1635                         RemapVectorType{{opnd2, b3Opnd0}}));  // outputsAsStepModelInputs
1636     }
1637 
1638     // TODO: Make sure this still works when we have multiple devices
1639     // of same version available for slicing. An easy (?) choice would
1640     // be to route the two different V1_0 operations to different
1641     // devices.
1642 }
1643 
TEST_F(PartitioningTest,SliceModelToEmpty)1644 TEST_F(PartitioningTest, SliceModelToEmpty) {
1645     PartitioningModel model;
1646     uint32_t opnd0 = model.addFloatOperand();
1647     uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
1648     model.identifyInputsAndOutputs({opnd0}, {opnd1});
1649     model.finish();
1650     ASSERT_TRUE(model.isValid());
1651 
1652     // Only the V1_3 device can handle any operations in the model.
1653     // No need to compare the original model to the model from the plan -- we
1654     // didn't actually do any partitioning.
1655     const auto devices = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
1656                                       {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1657                                       {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
1658                                       {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1659     ExecutionPlan plan;
1660     ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1661                                      ExecutePriority::DEFAULT, {}, &plan),
1662               ANEURALNETWORKS_NO_ERROR);
1663     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1664     ASSERT_NE(plan.forTest_simpleGetDevice().get(), nullptr);
1665     ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_3");
1666 }
1667 
TEST_F(PartitioningTest,Cpu)1668 TEST_F(PartitioningTest, Cpu) {
1669     // Here's a model where some operations execute only on the Cpu.
1670     // To make things interesting, we produce three partitions --
1671     // device, cpu, same-device.
1672 
1673     static const uint32_t kCpuOp = 1;
1674     static const uint32_t kDevOp = 2;
1675 
1676     const auto devices = makeDevices({{"1", 0.5, 1 << kDevOp}});
1677 
1678     PartitioningModel model;
1679 
1680     uint32_t opnd0 = model.addFloatOperand();
1681     uint32_t opnd1 = model.addFloatOperand();
1682 
1683     uint32_t opnd2 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd1);
1684     uint32_t opnd3 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd2);
1685 
1686     uint32_t opnd4 = model.addOperation2To1V1_0(kCpuOp, opnd0, opnd3);
1687     uint32_t opnd5 = model.addOperation2To1V1_0(kCpuOp, opnd2, opnd4);
1688 
1689     uint32_t opnd6 = model.addFloatOperand();
1690 
1691     uint32_t opnd7 = model.addOperation2To1V1_0(kDevOp, opnd3, opnd5);
1692     uint32_t opnd8 = model.addOperation2To1V1_0(kDevOp, opnd6, opnd7);
1693 
1694     model.identifyInputsAndOutputs({opnd0, opnd1, opnd6}, {opnd4, opnd8});
1695     model.finish();
1696     ASSERT_TRUE(model.isValid());
1697 
1698     ExecutionPlan plan;
1699     ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1700                                      ExecutePriority::DEFAULT, {}, &plan),
1701               ANEURALNETWORKS_NO_ERROR);
1702     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1703     const auto& steps = plan.forTest_compoundGetSteps();
1704     ASSERT_EQ(steps.size(), size_t(3));
1705     {
1706         const auto& step0 = steps[0];
1707 
1708         // Build a model to compare against the step model from steps[0].
1709         PartitioningModel model0;
1710         uint32_t m0Opnd0 = model0.addFloatOperand();
1711         uint32_t m0Opnd1 = model0.addFloatOperand();
1712         uint32_t m0Opnd2 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd1);
1713         uint32_t m0Opnd3 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd2);
1714         model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2, m0Opnd3});
1715         model0.finish();
1716         ASSERT_TRUE(model0.isValid());
1717 
1718         ASSERT_NO_FATAL_FAILURE(
1719                 compare(step0, &model0, devices[0],
1720                         RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}},  // modelInputs
1721                         RemapVectorType{},                                    // modelOutputs
1722                         RemapVectorType{},  // tempsAsStepModelInputs
1723                         StepModelOutputSetType{{opnd2, m0Opnd2},
1724                                                {opnd3, m0Opnd3}},  // tempsAsStepModelOutputs
1725                         RemapVectorType{}));                       // outputsAsStepModelInputs
1726     }
1727     {
1728         const auto& step1 = steps[1];
1729 
1730         // Build a model to compare against the step model from steps[1].
1731         PartitioningModel model1;
1732         uint32_t m1Opnd0 = model1.addFloatOperand();
1733         uint32_t m1Opnd3 = model1.addFloatOperand();
1734         uint32_t m1Opnd4 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd0, m1Opnd3);
1735         uint32_t m1Opnd2 = model1.addFloatOperand();
1736         uint32_t m1Opnd5 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd2, m1Opnd4);
1737         model1.identifyInputsAndOutputs({m1Opnd0, m1Opnd3, m1Opnd2}, {m1Opnd4, m1Opnd5});
1738         model1.finish();
1739         ASSERT_TRUE(model1.isValid());
1740 
1741         ASSERT_NO_FATAL_FAILURE(compare(
1742                 step1, &model1, DeviceManager::getCpuDevice(),
1743                 RemapVectorType{{opnd0, m1Opnd0}},                    // modelInputs
1744                 RemapVectorType{{opnd4, m1Opnd4}},                    // modelOutputs
1745                 RemapVectorType{{opnd3, m1Opnd3}, {opnd2, m1Opnd2}},  // tempsAsStepModelInputs
1746                 StepModelOutputSetType{{opnd5, m1Opnd5}},             // tempsAsStepModelOutputs
1747                 RemapVectorType{}));                                  // outputsAsStepModelInputs
1748     }
1749     {
1750         const auto& step2 = steps[2];
1751 
1752         // Build a model to compare against the step model from steps[2].
1753         PartitioningModel model2;
1754         uint32_t m2Opnd3 = model2.addFloatOperand();
1755         uint32_t m2Opnd5 = model2.addFloatOperand();
1756         uint32_t m2Opnd7 = model2.addOperation2To1V1_0(kDevOp, m2Opnd3, m2Opnd5);
1757         uint32_t m2Opnd6 = model2.addFloatOperand();
1758         uint32_t m2Opnd8 = model2.addOperation2To1V1_0(kDevOp, m2Opnd6, m2Opnd7);
1759         model2.identifyInputsAndOutputs({m2Opnd6, m2Opnd3, m2Opnd5}, {m2Opnd8});
1760         model2.finish();
1761         ASSERT_TRUE(model2.isValid());
1762 
1763         ASSERT_NO_FATAL_FAILURE(compare(
1764                 step2, &model2, devices[0], RemapVectorType{{opnd6, m2Opnd6}},  // modelInputs
1765                 RemapVectorType{{opnd8, m2Opnd8}},                              // modelOutputs
1766                 RemapVectorType{{opnd3, m2Opnd3}, {opnd5, m2Opnd5}},  // tempsAsStepModelInputs
1767                 StepModelOutputSetType{},                             // tempsAsStepModelOutputs
1768                 RemapVectorType{}));                                  // outputsAsStepModelInputs
1769     }
1770 }
1771 
TEST_F(PartitioningTest,SetPartitioning)1772 TEST_F(PartitioningTest, SetPartitioning) {
1773     PartitioningModel model;
1774     uint32_t opnd0 = model.addFloatOperand();
1775     uint32_t opnd1 = model.addFloatOperand();
1776     uint32_t opnd2 =
1777             model.addOperation2To1V1_0(0, opnd0, opnd1, PartitioningModel::Dimensioned::NO);
1778     uint32_t opnd3 = model.addFloatOperand();
1779     uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3);
1780     model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
1781     model.finish();
1782     ASSERT_TRUE(model.isValid());
1783 
1784     // We expect that we cannot successfully partition, because we
1785     // have an intermediate operand (opnd2) without dimensions, and
1786     // this is not currently handled.
1787 
1788     // One device that can and should execute operation 0.
1789     const auto devices = makeDevices({{"hw", 0.5, (1 << 0)}});
1790 
1791     // Test kPartitioningNo.  We should not even attempt partitioning,
1792     // so there should be a SIMPLE plan on CPU.
1793     // No need to compare the original model to the model from the plan -- we
1794     // didn't actually do any partitioning.
1795     PartitioningCompilation cPNo(&model, devices);
1796     ASSERT_EQ(cPNo.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1797     ASSERT_EQ(cPNo.finish(), Result::NO_ERROR);
1798     ASSERT_EQ(cPNo.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1799     ASSERT_EQ(cPNo.getExecutionPlan().forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
1800 
1801     // Test kPartitioningWithFallback.  We should attempt
1802     // partitioning, reach the end of the partitioning process (so we
1803     // have an unsuccessful execution plan), discover the dimensionless
1804     // intermediate operand, then fallback to CPU with a SIMPLE plan, and
1805     // finally return success.
1806     // No need to compare the original model to the model from the plan -- we
1807     // didn't actually do any partitioning.
1808     PartitioningCompilation cPWithFallback(&model, devices);
1809     ASSERT_EQ(cPWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1810               Result::NO_ERROR);
1811     ASSERT_EQ(cPWithFallback.finish(), Result::NO_ERROR);
1812     ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1813     ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1814               DeviceManager::getCpuDevice());
1815 
1816     // Test kPartitioningWithoutFallback.  We should attempt
1817     // partitioning, and fail.
1818     PartitioningCompilation cPWithoutFallback(&model, devices);
1819     ASSERT_EQ(cPWithoutFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1820               Result::NO_ERROR);
1821     ASSERT_EQ(cPWithoutFallback.finish(), Result::OP_FAILED);
1822     ASSERT_TRUE(cPWithoutFallback.getExecutionPlan().forTest_hasStepModelOutputsOfUnknownSize());
1823     ASSERT_EQ(cPWithoutFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::ERROR);
1824 }
1825 
1826 // Regression test for http://b/69166603:
1827 //     "partitioned compilation and execution yields wrong results when model output is step model
1828 //     input"
TEST_F(PartitioningTest,ModelOutputAsStepModelInput)1829 TEST_F(PartitioningTest, ModelOutputAsStepModelInput) {
1830     PartitioningModel model;
1831     uint32_t opnd0 = model.addFloatOperand();
1832     uint32_t opnd1 = model.addFloatOperand();
1833     uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1834     uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd2, opnd2);
1835     model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd3});
1836     model.finish();
1837     ASSERT_TRUE(model.isValid());
1838 
1839     // Compound partition (two devices, each is capable of one of the
1840     // two operations).  We could do more extensive checking here --
1841     // for example, verify that each step within the plan has the
1842     // correct (model and step model)x(inputs and outputs).
1843     const auto devices = makeDevices({{"0", 0.5, 1 << 0}, {"1", 0.5, 1 << 1}});
1844     ExecutionPlan plan;
1845     ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1846                                      ExecutePriority::DEFAULT, {}, &plan),
1847               ANEURALNETWORKS_NO_ERROR);
1848     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1849     const auto& steps = plan.forTest_compoundGetSteps();
1850     ASSERT_EQ(steps.size(), size_t(2));
1851     {
1852         // Build a model to compare against the step model from steps[0].
1853         PartitioningModel model0;
1854         uint32_t m0Opnd0 = model0.addFloatOperand();
1855         uint32_t m0Opnd1 = model0.addFloatOperand();
1856         uint32_t m0Opnd2 = model0.addOperation2To1V1_0(0, m0Opnd0, m0Opnd1);
1857         model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2});
1858         model0.finish();
1859         ASSERT_TRUE(model0.isValid());
1860         ASSERT_NO_FATAL_FAILURE(
1861                 compare(steps[0], &model0, devices[0],
1862                         RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}},  // modelInputs
1863                         RemapVectorType{{opnd2, m0Opnd2}},                    // modelOutputs
1864                         RemapVectorType{},         // tempsAsStepModelInputs
1865                         StepModelOutputSetType{},  // tempsAsStepModelOutputs
1866                         RemapVectorType{}));       // outputsAsStepModelInputs
1867     }
1868     {
1869         // Build a model to compare against the step model from steps[1].
1870         PartitioningModel model1;
1871         uint32_t m1Opnd2 = model1.addFloatOperand();
1872         uint32_t m1Opnd3 = model1.addOperation2To1V1_0(1, m1Opnd2, m1Opnd2);
1873         model1.identifyInputsAndOutputs({m1Opnd2}, {m1Opnd3});
1874         model1.finish();
1875         ASSERT_TRUE(model1.isValid());
1876 
1877         ASSERT_NO_FATAL_FAILURE(
1878                 compare(steps[1], &model1, devices[1], RemapVectorType{},  // modelInputs
1879                         RemapVectorType{{opnd3, m1Opnd3}},                 // modelOutputs
1880                         RemapVectorType{},                                 // tempsAsStepModelInputs
1881                         StepModelOutputSetType{},             // tempsAsStepModelOutputs
1882                         RemapVectorType{{opnd2, m1Opnd2}}));  // outputsAsStepModelInputs
1883     }
1884 }
1885 
TEST_F(PartitioningTest,OemOperations)1886 TEST_F(PartitioningTest, OemOperations) {
1887     // Trivial model consisting solely of OEM operation.
1888     PartitioningModel model;
1889     uint32_t opndIn = model.addFloatOperand();
1890     uint32_t opndOut = model.addOperationOEM1To1(opndIn);
1891     model.identifyInputsAndOutputs({opndIn}, {opndOut});
1892     model.finish();
1893     ASSERT_TRUE(model.isValid());
1894 
1895     // Verify that the best driver than can run an OEM operation is
1896     // used, even if it is not better than the CPU.
1897     // No need to compare the original model to the model from the plan -- we
1898     // didn't actually do any partitioning.
1899     const auto devicesBestOEM = makeDevices({{"badOEM", 1.5, ~0U, PartitioningDriver::OEMYes},
1900                                              {"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo},
1901                                              {"goodOEM", 1.2, ~0U, PartitioningDriver::OEMYes}});
1902     PartitioningCompilation compilationBestOEM(&model, devicesBestOEM);
1903     ASSERT_EQ(compilationBestOEM.finish(), Result::NO_ERROR);
1904     const auto& planBestOEM = compilationBestOEM.getExecutionPlan();
1905     ASSERT_EQ(planBestOEM.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1906     ASSERT_NE(planBestOEM.forTest_simpleGetDevice().get(), nullptr);
1907     ASSERT_EQ(planBestOEM.forTest_simpleGetDevice()->getName(), "goodOEM");
1908 
1909     // Verify that we get an error if no driver can run an OEM operation.
1910     const auto devicesNoOEM = makeDevices({{"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo}});
1911     PartitioningCompilation compilationNoOEM(&model, devicesNoOEM);
1912     ASSERT_EQ(compilationNoOEM.finish(), Result::BAD_DATA);
1913 
1914     // Verify that we get an error if a driver can SUPPORT but not PREPARE an OEM operation.
1915     const auto devicesIndecisiveOEM =
1916             makeDevices({{"indecisiveOEM", 0.5, ~0U, PartitioningDriver::OEMIndecisive}});
1917     PartitioningCompilation compilationIndecisiveOEM(&model, devicesIndecisiveOEM);
1918     ASSERT_NE(compilationIndecisiveOEM.finish(), Result::NO_ERROR);
1919 
1920     // Verify that we get an error if there are no drivers (only CPU fallback).
1921     PartitioningCompilation compilationNoDrivers(&model, makeDevices({}) /* no drivers */);
1922     ASSERT_EQ(compilationNoDrivers.finish(), Result::BAD_DATA);
1923 }
1924 
TEST_F(PartitioningTest,RelaxedFP)1925 TEST_F(PartitioningTest, RelaxedFP) {
1926     const auto devices = makeDevices({// Best choice for non-relaxed model.
1927                                       {"f32", 0.8, 0.9 /* relaxed */, ~0U},
1928                                       // Best choice for relaxed model.
1929                                       {"f16", 0.9, 0.8 /* relaxed */, ~0U}});
1930 
1931     auto TrivialTest = [&devices](bool doRelax, const char* expectDevice) {
1932         // Trivial model consisting solely of one operation.
1933         SCOPED_TRACE(expectDevice);
1934         PartitioningModel model;
1935         uint32_t opnd0 = model.addFloatOperand();
1936         uint32_t opnd1 = model.addFloatOperand();
1937         uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1938         model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
1939         model.relaxComputationFloat32toFloat16(doRelax);
1940         model.finish();
1941         ASSERT_TRUE(model.isValid());
1942         // Verify that the model will be executed on the appropriate device.
1943         // No need to compare the original model to the model from the plan -- we
1944         // didn't actually do any partitioning.
1945         ExecutionPlan plan;
1946         ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1947                                          ExecutePriority::DEFAULT, {}, &plan),
1948                   ANEURALNETWORKS_NO_ERROR);
1949         ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1950         ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), expectDevice);
1951     };
1952 
1953     ASSERT_NO_FATAL_FAILURE(TrivialTest(false, "f32"));
1954     ASSERT_NO_FATAL_FAILURE(TrivialTest(true, "f16"));
1955 }
1956 
TEST_F(PartitioningTest,Perf)1957 TEST_F(PartitioningTest, Perf) {
1958     // The various type names used here are confusing.
1959     //
1960     // OperandType (from HAL file), WrapperType (from NeuralNetworksWrapper.h),
1961     // and OperandCode (from NeuralNetworks.h) are different enums representing
1962     // the same type kind -- e.g., OperandType::FLOAT32, WrapperType::FLOAT32,
1963     // ANEURALNETWORKS_FLOAT32.  Corresponding enumerators have the same value.
1964     //
1965     // WrapperOperandType is the NeuralNetworksWrapper.h representation of a
1966     // full operand type (WrapperType plus dimensions plus other attributes).
1967 
1968     auto TestType = [](OperandType operandType) {
1969         if (operandType == OperandType::SUBGRAPH) {
1970             // SUBGRAPH capabilities are handled differently.
1971             return;
1972         }
1973         SCOPED_TRACE(toString(operandType));
1974         // Trivial model consisting solely of OEM operation.  We
1975         // pick OEM operation because this allows us to use
1976         // inputs and outputs of any number and type.
1977         PartitioningModel model;
1978         uint32_t opndIn = model.addOperand(static_cast<WrapperType>(operandType));
1979         uint32_t opndOut = model.addOperationOEM1To1(opndIn);
1980         model.identifyInputsAndOutputs({opndIn}, {opndOut});
1981         model.finish();
1982         ASSERT_TRUE(model.isValid());
1983 
1984         const Capabilities baseCapabilities = makeCapabilities(0.5);
1985 
1986         {
1987             // better than base
1988             Capabilities goodCapabilities = baseCapabilities;
1989             update(&goodCapabilities, operandType, 0.25);
1990 
1991             const auto devices =
1992                     makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes},
1993                                  {"good", goodCapabilities, ~0U, PartitioningDriver::OEMYes}});
1994 
1995             // Verify that model will be executed on "good".
1996             // No need to compare the original model to the model from the plan -- we
1997             // didn't actually do any partitioning.
1998             ExecutionPlan plan;
1999             ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2000                                              ExecutePriority::DEFAULT, {}, &plan),
2001                       ANEURALNETWORKS_NO_ERROR);
2002             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2003             ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "good");
2004         }
2005 
2006         {
2007             // worse than base
2008             Capabilities badCapabilities = baseCapabilities;
2009             update(&badCapabilities, operandType, 0.75);
2010             const auto devices =
2011                     makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes},
2012                                  {"bad", badCapabilities, ~0U, PartitioningDriver::OEMYes}});
2013 
2014             // Verify that model will be executed on "base".
2015             // No need to compare the original model to the model from the plan -- we
2016             // didn't actually do any partitioning.
2017             ExecutionPlan plan;
2018             ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2019                                              ExecutePriority::DEFAULT, {}, &plan),
2020                       ANEURALNETWORKS_NO_ERROR);
2021             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2022             ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "base");
2023         }
2024     };
2025 
2026     for (uint32_t type = static_cast<uint32_t>(OperandTypeRange::FUNDAMENTAL_MIN);
2027          type <= static_cast<uint32_t>(OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
2028         TestType(static_cast<OperandType>(type));
2029     }
2030     for (uint32_t type = static_cast<uint32_t>(OperandTypeRange::OEM_MIN);
2031          type <= static_cast<uint32_t>(OperandTypeRange::OEM_MAX); ++type) {
2032         TestType(static_cast<OperandType>(type));
2033     }
2034 }
2035 
2036 // Test token rehashing during the compilation step.
2037 class CacheTest : public PartitioningTest {
2038    protected:
SetUp()2039     virtual void SetUp() override {
2040         PartitioningTest::SetUp();
2041         char cacheDirTemp[] = "/data/local/tmp/TestCompilationCachingXXXXXX";
2042         char* cacheDir = mkdtemp(cacheDirTemp);
2043         ASSERT_NE(cacheDir, nullptr);
2044         mCacheDir = cacheDir;
2045     }
2046 
TearDown()2047     virtual void TearDown() override {
2048         if (!::testing::Test::HasFailure()) {
2049             std::filesystem::remove_all(mCacheDir);
2050         }
2051         PartitioningTest::TearDown();
2052     }
2053 
expectUniqueTokens(const std::vector<std::vector<uint8_t>> & tokens)2054     void expectUniqueTokens(const std::vector<std::vector<uint8_t>>& tokens) {
2055         for (uint32_t i = 0; i < tokens.size(); i++) {
2056             SCOPED_TRACE(i);
2057             for (uint32_t j = i + 1; j < tokens.size(); j++) {
2058                 SCOPED_TRACE(j);
2059                 EXPECT_NE(tokens[i], tokens[j]);
2060             }
2061         }
2062     }
2063 
2064     // Launch a single run of the partitioner against the provided model and device list with
2065     // cache token privided as tokenIn. Find the partition for the device with deviceName.
2066     // Record the transformed token into tokenOut. Two or more partitions may be on the same device.
2067     // "devicePartitionIndex" specifies the index of the ExecutionStep corresponding to the
2068     // partition of interest, within the sequence of ExecutionSteps on the target device.
2069     // If tokenIn is empty, no caching information will be provided to the partitioner.
getTransformedCacheTokenSingle(const PartitioningModel & model,const std::vector<std::shared_ptr<Device>> & devices,const char * deviceName,const std::vector<uint8_t> & tokenIn,ExecutePreference preference,ExecutePriority priority,uint32_t devicePartitionIndex,std::vector<uint8_t> * tokenOut)2070     void getTransformedCacheTokenSingle(const PartitioningModel& model,
2071                                         const std::vector<std::shared_ptr<Device>>& devices,
2072                                         const char* deviceName, const std::vector<uint8_t>& tokenIn,
2073                                         ExecutePreference preference, ExecutePriority priority,
2074                                         uint32_t devicePartitionIndex,
2075                                         std::vector<uint8_t>* tokenOut) {
2076         // Compile the model and get the execution plan.
2077         PartitioningCompilation compilation(&model, devices);
2078         if (!tokenIn.empty()) {
2079             compilation.setCaching(mCacheDir.c_str(), tokenIn);
2080         }
2081         compilation.setPreference(preference);
2082         compilation.setPriority(priority);
2083         ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2084         const ExecutionPlan& plan = compilation.getExecutionPlan();
2085 
2086         // Find the cache info for the device.
2087         const uint8_t* token = nullptr;
2088         if (plan.forTest_getKind() == ExecutionPlan::Kind::SIMPLE) {
2089             ASSERT_EQ(devicePartitionIndex, 0u);
2090             ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), deviceName);
2091             token = plan.forTest_simpleGetCacheToken();
2092         } else if (plan.forTest_getKind() == ExecutionPlan::Kind::COMPOUND) {
2093             const auto& steps = plan.forTest_compoundGetSteps();
2094             uint32_t executionStepCount = 0;
2095             for (const auto& step : steps) {
2096                 if (step->isExecution() &&
2097                     step->executionStep()->getDevice()->getName() == deviceName) {
2098                     if (devicePartitionIndex == executionStepCount) {
2099                         token = step->executionStep()->forTest_getCacheToken();
2100                         break;
2101                     }
2102                     executionStepCount++;
2103                 }
2104             }
2105         } else {
2106             FAIL();
2107         }
2108 
2109         // Retrieve the transformed token from the cache info.
2110         if (token == nullptr) {
2111             tokenOut->clear();
2112         } else {
2113             tokenOut->resize(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN);
2114             std::copy(token, token + ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, tokenOut->begin());
2115         }
2116     }
2117 
2118     // A wrapper of getTransformedCacheTokenSingle, which runs getTransformedCacheTokenSingle
2119     // multiple times and checks if the transformation provides consistent result.
2120     // Two or more partitions may be on the same device. "devicePartitionIndex" specifies the index
2121     // of the ExecutionStep corresponding to the partition of interest, within the sequence of
2122     // ExecutionSteps on the target device.
getTransformedCacheToken(const PartitioningModel & model,const std::vector<std::shared_ptr<Device>> & devices,const char * deviceName,const std::vector<uint8_t> & tokenIn,ExecutePreference preference,ExecutePriority priority,std::vector<uint8_t> * tokenOut,uint32_t devicePartitionIndex=0)2123     void getTransformedCacheToken(const PartitioningModel& model,
2124                                   const std::vector<std::shared_ptr<Device>>& devices,
2125                                   const char* deviceName, const std::vector<uint8_t>& tokenIn,
2126                                   ExecutePreference preference, ExecutePriority priority,
2127                                   std::vector<uint8_t>* tokenOut,
2128                                   uint32_t devicePartitionIndex = 0) {
2129         getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference, priority,
2130                                        devicePartitionIndex, tokenOut);
2131 
2132         // Test if the runtime maps to the same cache token every time for the same compilation
2133         // setup.
2134         for (uint32_t i = 0; i < 10; i++) {
2135             std::vector<uint8_t> token;
2136             SCOPED_TRACE(i);
2137             getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference,
2138                                            priority, devicePartitionIndex, &token);
2139             EXPECT_EQ(*tokenOut, token);
2140         }
2141     }
2142 
createModelForCachingTests(PartitioningModel * model)2143     void createModelForCachingTests(PartitioningModel* model) {
2144         uint32_t opnd0 = model->addFloatOperand();
2145         uint32_t opnd1 = model->addFloatOperand();
2146         uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1);
2147         uint32_t opnd3 = model->addFloatOperand();
2148         uint32_t opnd4 = model->addOperation2To1V1_0(1, opnd2, opnd3);
2149         model->identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
2150         model->finish();
2151         ASSERT_TRUE(model->isValid());
2152     }
2153 
2154     // The first model returned in "models" is the main model.
createControlFlowModelForCachingTests(std::vector<std::unique_ptr<PartitioningModel>> * models)2155     void createControlFlowModelForCachingTests(
2156             std::vector<std::unique_ptr<PartitioningModel>>* models) {
2157         CHECK(models != nullptr);
2158 
2159         auto trueModel = std::make_unique<PartitioningModel>();
2160         {
2161             const uint32_t opnd0 = trueModel->addFloatOperand();
2162             const uint32_t opnd1 = trueModel->addFloatOperand();
2163             const uint32_t opnd2 = trueModel->addOperation2To1V1_0(0, opnd0, opnd1);
2164             trueModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2165             trueModel->finish();
2166             ASSERT_TRUE(trueModel->isValid());
2167         }
2168 
2169         auto falseModel = std::make_unique<PartitioningModel>();
2170         {
2171             const uint32_t opnd0 = falseModel->addFloatOperand();
2172             const uint32_t opnd1 = falseModel->addFloatOperand();
2173             const uint32_t opnd2 = falseModel->addOperation2To1V1_0(0, opnd0, opnd1);
2174             falseModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2175             falseModel->finish();
2176             ASSERT_TRUE(falseModel->isValid());
2177         }
2178 
2179         auto mainModel = std::make_unique<PartitioningModel>();
2180         {
2181             const uint32_t opnd0 = mainModel->addBooleanOperand();
2182             const uint32_t opnd1 = mainModel->addFloatOperand();
2183             const uint32_t opnd2 = mainModel->addFloatOperand();
2184             const uint32_t opnd3 = mainModel->addFloatOperand();
2185             mainModel->addIfOperation(opnd0, *trueModel, *falseModel, {opnd1, opnd2}, {opnd3});
2186             mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3});
2187             mainModel->finish();
2188             ASSERT_TRUE(mainModel->isValid());
2189         }
2190 
2191         models->clear();
2192         models->push_back(std::move(mainModel));
2193         models->push_back(std::move(trueModel));
2194         models->push_back(std::move(falseModel));
2195     }
2196 
2197     std::string mCacheDir;
2198 };
2199 
2200 // Test the case when no token is provided by the application and the execution plan has a
2201 // simple body.
TEST_F(CacheTest,CacheTokenNoneSimpleBody)2202 TEST_F(CacheTest, CacheTokenNoneSimpleBody) {
2203     PartitioningModel model;
2204     createModelForCachingTests(&model);
2205 
2206     // deviceA can execute the whole model.
2207     const auto deviceA = makeDevices({
2208             {"deviceA", 0.5, ~0U},
2209     });
2210 
2211     std::vector<uint8_t> tokenIn, tokenOut;
2212     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2213                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2214                              &tokenOut);
2215     EXPECT_TRUE(tokenOut.empty());
2216 }
2217 
2218 // Test if the runtime maps to different cache tokens for devices with different names in
2219 // execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentDeviceNamesSimpleBody)2220 TEST_F(CacheTest, CacheTokenDifferentDeviceNamesSimpleBody) {
2221     PartitioningModel model;
2222     createModelForCachingTests(&model);
2223 
2224     // Two devices that can both execute the whole model.
2225     const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2226     const auto deviceB = makeDevices({{"deviceB", 0.5, ~0U}});
2227 
2228     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2229     std::vector<uint8_t> deviceAToken, deviceBToken;
2230     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2231                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2232                              &deviceAToken);
2233     getTransformedCacheToken(model, deviceB, "deviceB", tokenIn,
2234                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2235                              &deviceBToken);
2236     expectUniqueTokens({deviceAToken, deviceBToken});
2237 }
2238 
2239 // Test if the runtime maps to different cache tokens for devices with different version strings in
2240 // execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentDeviceVersionStringsSimpleBody)2241 TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsSimpleBody) {
2242     PartitioningModel model;
2243     createModelForCachingTests(&model);
2244 
2245     // Two devices that can both execute the whole model.
2246     const auto deviceA_1_0 = makeDevices({{"deviceA", "1.0", 0.5, ~0U}});
2247     const auto deviceA_1_1 = makeDevices({{"deviceA", "1.1", 0.5, ~0U}});
2248 
2249     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2250     std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token;
2251     getTransformedCacheToken(model, deviceA_1_0, "deviceA", tokenIn,
2252                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2253                              &deviceA_1_0_Token);
2254     getTransformedCacheToken(model, deviceA_1_1, "deviceA", tokenIn,
2255                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2256                              &deviceA_1_1_Token);
2257     expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token});
2258 }
2259 
2260 // Test if the runtime maps to different cache tokens for compilations with different preferences
2261 // in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentPreferencesSimpleBody)2262 TEST_F(CacheTest, CacheTokenDifferentPreferencesSimpleBody) {
2263     PartitioningModel model;
2264     createModelForCachingTests(&model);
2265 
2266     // One device that can execute the whole model.
2267     const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2268 
2269     std::vector<uint8_t> fastToken, powerToken, sustainedToken;
2270     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2271     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2272                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2273                              &fastToken);
2274     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2275                              ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT,
2276                              &powerToken);
2277     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2278                              ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT,
2279                              &sustainedToken);
2280     expectUniqueTokens({fastToken, powerToken, sustainedToken});
2281 }
2282 
2283 // Test if the runtime maps to different cache tokens for compilations with different priorities
2284 // in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentPrioritiesSimpleBody)2285 TEST_F(CacheTest, CacheTokenDifferentPrioritiesSimpleBody) {
2286     PartitioningModel model;
2287     createModelForCachingTests(&model);
2288 
2289     // One device that can execute the whole model.
2290     const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2291 
2292     std::vector<uint8_t> lowToken, mediumToken, highToken;
2293     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2294     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2295                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW,
2296                              &lowToken);
2297     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2298                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM,
2299                              &mediumToken);
2300     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2301                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH,
2302                              &highToken);
2303     expectUniqueTokens({lowToken, mediumToken, highToken});
2304 }
2305 
2306 // Test if the runtime maps to different cache tokens for compilations with different tokens
2307 // provided by application in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentTokensSimpleBody)2308 TEST_F(CacheTest, CacheTokenDifferentTokensSimpleBody) {
2309     PartitioningModel model;
2310     createModelForCachingTests(&model);
2311 
2312     // One device that can execute the whole model.
2313     const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2314 
2315     std::vector<uint8_t> tokenOut1, tokenOut2;
2316     std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2317     std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1);
2318     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn1,
2319                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2320                              &tokenOut1);
2321     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn2,
2322                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2323                              &tokenOut2);
2324     expectUniqueTokens({tokenOut1, tokenOut2});
2325 }
2326 
2327 // Test the case when no token is provided by the application and the execution plan has a
2328 // compound body.
TEST_F(CacheTest,CacheTokenNoneCompoundBody)2329 TEST_F(CacheTest, CacheTokenNoneCompoundBody) {
2330     PartitioningModel model;
2331     createModelForCachingTests(&model);
2332 
2333     // DeviceA executes the first operation only.
2334     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2335 
2336     std::vector<uint8_t> tokenIn, tokenOut;
2337     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2338                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2339                              &tokenOut);
2340     EXPECT_TRUE(tokenOut.empty());
2341     getTransformedCacheToken(model, devices, "deviceB", tokenIn,
2342                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2343                              &tokenOut);
2344     EXPECT_TRUE(tokenOut.empty());
2345 }
2346 
2347 // Test if the runtime maps to different cache tokens for devices with different names in
2348 // execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentDeviceNamesCompoundBody)2349 TEST_F(CacheTest, CacheTokenDifferentDeviceNamesCompoundBody) {
2350     PartitioningModel model;
2351     createModelForCachingTests(&model);
2352 
2353     // DeviceA executes the first operation only.
2354     const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}});
2355     // DeviceB executes the first operation only.
2356     const auto devices2 = makeDevices({{"deviceB", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}});
2357 
2358     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2359     std::vector<uint8_t> deviceAToken, deviceBToken;
2360     getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2361                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2362                              &deviceAToken);
2363     getTransformedCacheToken(model, devices2, "deviceB", tokenIn,
2364                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2365                              &deviceBToken);
2366     expectUniqueTokens({deviceAToken, deviceBToken});
2367 }
2368 
2369 // Test if the runtime maps to different cache tokens for devices with different names in
2370 // execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentDeviceVersionStringsCompoundBody)2371 TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsCompoundBody) {
2372     PartitioningModel model;
2373     createModelForCachingTests(&model);
2374 
2375     // DeviceA executes the first operation only.
2376     const auto devices1 = makeDevices({{"deviceA", "1.0", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2377     // DeviceB executes the first operation only.
2378     const auto devices2 = makeDevices({{"deviceA", "1.1", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2379 
2380     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2381     std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token;
2382     getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2383                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2384                              &deviceA_1_0_Token);
2385     getTransformedCacheToken(model, devices2, "deviceA", tokenIn,
2386                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2387                              &deviceA_1_1_Token);
2388     expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token});
2389 }
2390 
2391 // Test if the runtime maps to different cache tokens for compilations with different preferences
2392 // in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPreferencesCompoundBody)2393 TEST_F(CacheTest, CacheTokenDifferentPreferencesCompoundBody) {
2394     PartitioningModel model;
2395     createModelForCachingTests(&model);
2396 
2397     // DeviceA executes the first operation only.
2398     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2399 
2400     std::vector<uint8_t> fastToken, powerToken, sustainedToken;
2401     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2402     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2403                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2404                              &fastToken);
2405     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2406                              ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT,
2407                              &powerToken);
2408     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2409                              ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT,
2410                              &sustainedToken);
2411     expectUniqueTokens({fastToken, powerToken, sustainedToken});
2412 }
2413 
2414 // Test if the runtime maps to different cache tokens for compilations with different priorities
2415 // in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPrioritiesCompoundBody)2416 TEST_F(CacheTest, CacheTokenDifferentPrioritiesCompoundBody) {
2417     PartitioningModel model;
2418     createModelForCachingTests(&model);
2419 
2420     // DeviceA executes the first operation only.
2421     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2422 
2423     std::vector<uint8_t> lowToken, mediumToken, highToken;
2424     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2425     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2426                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW,
2427                              &lowToken);
2428     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2429                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM,
2430                              &mediumToken);
2431     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2432                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH,
2433                              &highToken);
2434     expectUniqueTokens({lowToken, mediumToken, highToken});
2435 }
2436 
2437 // Test if the runtime maps to different cache tokens for compilations with different tokens
2438 // provided by application in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentTokensCompoundBody)2439 TEST_F(CacheTest, CacheTokenDifferentTokensCompoundBody) {
2440     PartitioningModel model;
2441     createModelForCachingTests(&model);
2442 
2443     // DeviceA executes the first operation only.
2444     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2445 
2446     std::vector<uint8_t> tokenOut1, tokenOut2;
2447     std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2448     std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1);
2449     getTransformedCacheToken(model, devices, "deviceA", tokenIn1,
2450                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2451                              &tokenOut1);
2452     getTransformedCacheToken(model, devices, "deviceA", tokenIn2,
2453                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2454                              &tokenOut2);
2455     expectUniqueTokens({tokenOut1, tokenOut2});
2456 }
2457 
2458 // Test if the runtime maps to different cache tokens for compilations with different partitioning
2459 // outcome in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPartitionsCompoundBody)2460 TEST_F(CacheTest, CacheTokenDifferentPartitionsCompoundBody) {
2461     PartitioningModel model;
2462     createModelForCachingTests(&model);
2463 
2464     // DeviceA executes the whole model.
2465     const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 0U}});
2466     // DeviceA executes the first operation only.
2467     const auto devices2 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2468     // DeviceA executes the second operation only.
2469     const auto devices3 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 0}});
2470 
2471     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2472     std::vector<uint8_t> tokenOut1, tokenOut2, tokenOut3;
2473     getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2474                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2475                              &tokenOut1);
2476     getTransformedCacheToken(model, devices2, "deviceA", tokenIn,
2477                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2478                              &tokenOut2);
2479     getTransformedCacheToken(model, devices3, "deviceA", tokenIn,
2480                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2481                              &tokenOut3);
2482     expectUniqueTokens({tokenOut1, tokenOut2, tokenOut3});
2483 }
2484 
2485 // Test if the runtime maps different referenced models to different cache tokens.
TEST_F(CacheTest,CacheTokenDifferentReferenceModelPartitions)2486 TEST_F(CacheTest, CacheTokenDifferentReferenceModelPartitions) {
2487     std::vector<std::unique_ptr<PartitioningModel>> models;
2488     createControlFlowModelForCachingTests(&models);
2489     const auto& main = *models[0];
2490 
2491     // DeviceA executes the two referenced models but does not support IF.
2492     // There will be two partitions on deviceA.
2493     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}});
2494 
2495     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2496     std::vector<uint8_t> tokenOut1, tokenOut2;
2497     getTransformedCacheToken(main, devices, "deviceA", tokenIn,
2498                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2499                              &tokenOut1, /*devicePartitionIndex=*/0);
2500     getTransformedCacheToken(main, devices, "deviceA", tokenIn,
2501                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2502                              &tokenOut2, /*devicePartitionIndex=*/1);
2503     expectUniqueTokens({tokenOut1, tokenOut2});
2504 }
2505 
2506 // Very basic tests of some of the PerformanceInfo functionality.
2507 // Placed in this file because partitioning is the consumer of this functionality.
2508 class PerfTest : public ::testing::Test {};
2509 
TEST_F(PerfTest,Lookup)2510 TEST_F(PerfTest, Lookup) {
2511     // Derive an arbitrary (but reproducible) performance value from an OperandType.
2512     // We'll use this to ensure that we can save and then recover a type's performance.
2513     auto typePerf = [](OperandType type) { return float(static_cast<uint32_t>(type)); };
2514 
2515     Capabilities capabilities = makeCapabilities(-1.0f);
2516 
2517     for (uint32_t type = static_cast<uint32_t>(OperandTypeRange::FUNDAMENTAL_MIN);
2518          type <= static_cast<uint32_t>(OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
2519         OperandType operandType = static_cast<OperandType>(type);
2520         update(&capabilities, operandType, typePerf(operandType));
2521     }
2522     for (uint32_t type = static_cast<uint32_t>(OperandTypeRange::OEM_MIN);
2523          type <= static_cast<uint32_t>(OperandTypeRange::OEM_MAX); ++type) {
2524         OperandType operandType = static_cast<OperandType>(type);
2525         update(&capabilities, operandType, typePerf(operandType));
2526     }
2527 
2528     // Make sure lookup retrieves the values stored by update
2529 
2530     for (uint32_t type = static_cast<uint32_t>(OperandTypeRange::FUNDAMENTAL_MIN);
2531          type <= static_cast<uint32_t>(OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
2532         OperandType operandType = static_cast<OperandType>(type);
2533         if (operandType == OperandType::SUBGRAPH) {
2534             // SUBGRAPH capabilities are handled differently.
2535             continue;
2536         }
2537         SCOPED_TRACE(toString(operandType));
2538         EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType));
2539     }
2540     for (uint32_t type = static_cast<uint32_t>(OperandTypeRange::OEM_MIN);
2541          type <= static_cast<uint32_t>(OperandTypeRange::OEM_MAX); ++type) {
2542         OperandType operandType = static_cast<OperandType>(type);
2543         SCOPED_TRACE(toString(operandType));
2544         EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType));
2545     }
2546 
2547     // Check the behavior of a missing type
2548 
2549     OperandType operandType =
2550             static_cast<OperandType>(static_cast<uint32_t>(OperandTypeRange::BASE_MAX) + 1);
2551     EXPECT_EQ(lookupExecTime(capabilities, operandType), FLT_MAX);
2552 }
2553 
2554 class ControlFlowPartitioningTest : public PartitioningTest {
2555    protected:
2556     // opnd0 --> +-----+
2557     //           | ADD | --> opnd2
2558     // opnd1 --> +-----+
createBranchOrBodyModel()2559     std::unique_ptr<PartitioningModel> createBranchOrBodyModel() {
2560         auto model = std::make_unique<PartitioningModel>();
2561         const uint32_t opnd0 = model->addFloatOperand();
2562         const uint32_t opnd1 = model->addFloatOperand();
2563         const uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1);
2564         model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2565         model->finish();
2566         EXPECT_TRUE(model->isValid());
2567         return model;
2568     }
2569 
2570     // opnd0 --> +-------+
2571     //           | EQUAL | --> opnd2
2572     // opnd1 --> +-------+
createCondModel()2573     std::unique_ptr<PartitioningModel> createCondModel() {
2574         auto model = std::make_unique<PartitioningModel>();
2575         const uint32_t opnd0 = model->addFloatOperand();
2576         const uint32_t opnd1 = model->addFloatOperand();
2577         const uint32_t opnd2 = model->addExplicitOperationXTo1(
2578                 ANEURALNETWORKS_EQUAL, {opnd0, opnd1}, WrapperType::TENSOR_BOOL8);
2579         model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2580         model->finish();
2581         EXPECT_TRUE(model->isValid());
2582         return model;
2583     }
2584 
2585     // opnd0 --> +----+
2586     // opnd1 --> | IF | --> opnd3
2587     // opnd2 --> +----+
createIfModel(bool firstOuterInputUnknownSize=false)2588     std::vector<std::unique_ptr<PartitioningModel>> createIfModel(
2589             bool firstOuterInputUnknownSize = false) {
2590         auto thenModel = createBranchOrBodyModel();
2591         auto elseModel = createBranchOrBodyModel();
2592 
2593         auto mainModel = std::make_unique<PartitioningModel>();
2594         const uint32_t opnd0 = mainModel->addBooleanOperand();
2595         const uint32_t opnd1 = mainModel->addFloatOperand(
2596                 firstOuterInputUnknownSize ? PartitioningModel::Dimensioned::NO
2597                                            : PartitioningModel::Dimensioned::YES);
2598         const uint32_t opnd2 = mainModel->addFloatOperand();
2599         const uint32_t opnd3 = mainModel->addFloatOperand();
2600         mainModel->addIfOperation(opnd0, *thenModel, *elseModel, {opnd1, opnd2}, {opnd3});
2601         mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3});
2602         mainModel->finish();
2603         EXPECT_TRUE(mainModel->isValid());
2604 
2605         std::vector<std::unique_ptr<PartitioningModel>> models;
2606         models.push_back(std::move(mainModel));
2607         models.push_back(std::move(thenModel));
2608         models.push_back(std::move(elseModel));
2609         return std::move(models);
2610     }
2611 
2612     // opnd0 --> +-------+
2613     //           | WHILE | --> opnd2
2614     // opnd1 --> +-------+
createWhileModel(bool firstOuterInputUnknownSize=false)2615     std::vector<std::unique_ptr<PartitioningModel>> createWhileModel(
2616             bool firstOuterInputUnknownSize = false) {
2617         auto condModel = createCondModel();
2618         auto bodyModel = createBranchOrBodyModel();
2619 
2620         auto mainModel = std::make_unique<PartitioningModel>();
2621         const uint32_t opnd0 = mainModel->addFloatOperand(
2622                 firstOuterInputUnknownSize ? PartitioningModel::Dimensioned::NO
2623                                            : PartitioningModel::Dimensioned::YES);
2624         const uint32_t opnd1 = mainModel->addFloatOperand();
2625         const uint32_t opnd2 = mainModel->addFloatOperand();
2626         mainModel->addWhileOperation(*condModel, *bodyModel, {opnd0, opnd1}, {opnd2});
2627         mainModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2628         mainModel->finish();
2629         EXPECT_TRUE(mainModel->isValid());
2630 
2631         std::vector<std::unique_ptr<PartitioningModel>> models;
2632         models.push_back(std::move(mainModel));
2633         models.push_back(std::move(condModel));
2634         models.push_back(std::move(bodyModel));
2635         return std::move(models);
2636     }
2637 };
2638 
TEST_F(ControlFlowPartitioningTest,IF_Interpreted)2639 TEST_F(ControlFlowPartitioningTest, IF_Interpreted) {
2640     const auto models = createIfModel();
2641 
2642     // The device supports the referenced models but does not support IF.
2643     const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}});
2644 
2645     ExecutionPlan plan;
2646     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2647                                           ExecutePriority::DEFAULT, {}, &plan),
2648               ANEURALNETWORKS_NO_ERROR);
2649     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
2650     const auto& steps = plan.forTest_compoundGetSteps();
2651     ASSERT_EQ(steps.size(), size_t(4));
2652     ASSERT_TRUE(steps[0]->isIf());
2653     ASSERT_TRUE(steps[1]->isExecution());
2654     ASSERT_TRUE(steps[2]->isGoto());
2655     ASSERT_TRUE(steps[3]->isExecution());
2656     ASSERT_EQ(steps[1]->executionStep()->getDevice()->getName(), "V1_0");
2657     ASSERT_EQ(steps[3]->executionStep()->getDevice()->getName(), "V1_0");
2658 }
2659 
TEST_F(ControlFlowPartitioningTest,WHILE_Interpreted)2660 TEST_F(ControlFlowPartitioningTest, WHILE_Interpreted) {
2661     const auto models = createWhileModel();
2662 
2663     // The device supports the body model but does not support WHILE or the
2664     // condition model (because of EQUAL).
2665     const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}});
2666 
2667     ExecutionPlan plan;
2668     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2669                                           ExecutePriority::DEFAULT, {}, &plan),
2670               ANEURALNETWORKS_NO_ERROR);
2671     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
2672     const auto& steps = plan.forTest_compoundGetSteps();
2673     ASSERT_EQ(steps.size(), size_t(5));
2674     ASSERT_TRUE(steps[0]->isWhile());
2675     ASSERT_TRUE(steps[1]->isExecution());
2676     ASSERT_TRUE(steps[2]->isGoto());
2677     ASSERT_TRUE(steps[3]->isExecution());
2678     ASSERT_TRUE(steps[4]->isGoto());
2679     ASSERT_EQ(steps[1]->executionStep()->getDevice()->getName(),
2680               DeviceManager::getCpuDevice()->getName());
2681     ASSERT_EQ(steps[3]->executionStep()->getDevice()->getName(), "V1_0");
2682 }
2683 
TEST_F(ControlFlowPartitioningTest,IF_SimplePlan)2684 TEST_F(ControlFlowPartitioningTest, IF_SimplePlan) {
2685     const auto models = createIfModel();
2686 
2687     // The device supports all operations.
2688     const auto devices =
2689             makeDevices({{"ALL", 0.9, ~0U, PartitioningDriver::OEMNo, {OperationType::IF}}});
2690 
2691     ExecutionPlan plan;
2692     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2693                                           ExecutePriority::DEFAULT, {}, &plan),
2694               ANEURALNETWORKS_NO_ERROR);
2695     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2696     ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "ALL");
2697 }
2698 
TEST_F(ControlFlowPartitioningTest,WHILE_SimplePlan)2699 TEST_F(ControlFlowPartitioningTest, WHILE_SimplePlan) {
2700     const auto models = createWhileModel();
2701 
2702     // The device supports all operations.
2703     const auto devices = makeDevices({{"ALL",
2704                                        0.9,
2705                                        ~0U,
2706                                        PartitioningDriver::OEMNo,
2707                                        {OperationType::WHILE, OperationType::EQUAL}}});
2708 
2709     ExecutionPlan plan;
2710     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2711                                           ExecutePriority::DEFAULT, {}, &plan),
2712               ANEURALNETWORKS_NO_ERROR);
2713     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2714     ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "ALL");
2715 }
2716 
TEST_F(ControlFlowPartitioningTest,IF_UnknownSize)2717 TEST_F(ControlFlowPartitioningTest, IF_UnknownSize) {
2718     const auto models = createIfModel(/*firstOuterInputUnknownSize=*/true);
2719 
2720     // The device supports all operations but the partitioner ignores its IF
2721     // support due to http://b/159076604#comment5.
2722     const auto devices =
2723             makeDevices({{"ALL", 0.9, ~0U, PartitioningDriver::OEMNo, {OperationType::IF}}});
2724 
2725     ExecutionPlan plan;
2726     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2727                                           ExecutePriority::DEFAULT, {}, &plan),
2728               ANEURALNETWORKS_NO_ERROR);
2729     // The control flow interpreter does not support unknown size (b/132458982).
2730     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2731     ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), DeviceManager::getCpuDevice()->getName());
2732 }
2733 
TEST_F(ControlFlowPartitioningTest,WHILE_UnknownSize)2734 TEST_F(ControlFlowPartitioningTest, WHILE_UnknownSize) {
2735     const auto models = createWhileModel(/*firstOuterInputUnknownSize=*/true);
2736 
2737     // The device supports all operations but the partitioner ignores its WHILE
2738     // support due to http://b/159076604#comment5.
2739     const auto devices = makeDevices({{"ALL",
2740                                        0.9,
2741                                        ~0U,
2742                                        PartitioningDriver::OEMNo,
2743                                        {OperationType::WHILE, OperationType::EQUAL}}});
2744 
2745     ExecutionPlan plan;
2746     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2747                                           ExecutePriority::DEFAULT, {}, &plan),
2748               ANEURALNETWORKS_NO_ERROR);
2749     // The control flow interpreter does not support unknown size (b/132458982).
2750     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2751     ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), DeviceManager::getCpuDevice()->getName());
2752 }
2753 
2754 }  // namespace
2755