1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <gtest/gtest.h>
18
19 #include <algorithm>
20 #include <filesystem>
21 #include <functional>
22 #include <map>
23 #include <memory>
24 #include <queue>
25 #include <set>
26 #include <string>
27 #include <type_traits>
28 #include <utility>
29 #include <vector>
30
31 #include "CompilationBuilder.h"
32 #include "ControlFlow.h"
33 #include "ExecutionPlan.h"
34 #include "HalInterfaces.h"
35 #include "Manager.h"
36 #include "ModelBuilder.h"
37 #include "NeuralNetworks.h"
38 #include "NeuralNetworksOEM.h"
39 #include "SampleDriver.h"
40 #include "TestNeuralNetworksWrapper.h"
41 #include "Utils.h"
42 #include "ValidateHal.h"
43
44 // Uncomment the following line to generate some debugging output that
45 // may be useful when analyzing failures:
46 //
47 // #define VERBOSE VERBOSE
48
49 // These tests do whitebox testing of the graph partitioning
50 // algorithm. It is "whitebox" in the sense that we're not evaluating
51 // whether a particular partitioning is legal, or "good enough"
52 // according to some metric, but whether it exactly matches the
53 // expected behavior of the current partitioning algorithm.
54 //
55 // A key part of the current partitioning algorithm is to determine
56 // which device among the available devices should be the one to
57 // execute a particular operation from the graph. This determination
58 // is made "locally" -- i.e., it does not depend on the graph
59 // topology, only on the properties of the operation in question.
60 // IDevice::getSupportedOperations() indicates which operations in a
61 // graph can be executed on a device, and IDevice::getCapabilities()
62 // indicates how "good" that device is for executing particular kinds
63 // of operations. For each operation, the partitioning algorithm
64 // picks the "best" device that is capable of executing that
65 // operation; if no device can do so, then the algorithm picks the
66 // cpu.
67 //
68 // As part of this testing approach, we want to make it easy to
69 // specify which operations in a test graph can be executed on which
70 // devices. We accomplish this in the following way:
71 // - A unary OEM operation is available.
72 // - There is a collection of operations (each of which has two inputs
73 // and one output):
74 // - Eight kinds of operations available at driver version V1_0 or
75 // later. They are represented in the graph as ADD or MUL with a
76 // particular activation function -- two opcodes times four
77 // activation functions means eight available operation kinds.
78 // This is a low-level representation detail -- when we specify the
79 // behavior of the device or build a graph, we do so in terms of
80 // operation encodings 0..7.
81 // - Eight kinds of operations available at driver version V1_1 or
82 // later. They are represented in the graph as DIV or SUB with
83 // a particular activation function, exactly analogous to ADD
84 // and MUL above. We use operation encodings 8..15 for them.
85 // - Four kinds of operations available at driver version V1_2 or
86 // later. They are represented in the graph as MAXIMUM,
87 // MINIMUM, POW, or PRELU. These operations take no activation
88 // function, so we only get 4 operation kinds, for which we
89 // use operation encodings 16..19.
90 // - There is another collection of operations (each of which has one input
91 // and one output):
92 // - Single operation available at driver version V1_3 or
93 // later. It is represented in the graph as HARD_SWISH.
94 // These operations take no activation function, for which we
95 // use operation encodings 20..20.
96
97 // When we instantiate a device for testing purposes, we specify what subset of
98 // those operations the device is able to execute.
99 //
100 // In order to determine whether or not a partitioning matches the
101 // expected partitioning, we check the number of partitions, check
102 // which device each partition targets, and compare each partition's
103 // subgraph, model inputs, model outputs, step model inputs, and
104 // step model outputs against what is expected. In order to perform
105 // that comparison, we build a model to compare against a partition's
106 // step model and run a graph comparison algorithm on it. The graph
107 // comparison and the inputs and outputs comparisons are syntactic
108 // rather than semantic comparisons -- they don't allow for
109 // reorderings of inputs and outputs. Because of this, we need to
110 // know exactly how the partitioning algorithm orders inputs and
111 // outputs in order to construct the models and operand lists to
112 // compare against. Here are some relevant behaviors of the
113 // partitioning algorithm:
114 //
115 // - It builds a subgraph by walking operations in forward topological
116 // order, and adding each operation's input operands and output
117 // operands in index order (input followed by output) when that
118 // operation is added. (It does not add an input that has already
119 // been added.)
120 // - It finds model inputs, model outputs, and step model inputs in
121 // the order the corresponding operands were added to the subgraph
122 // (see ExecutionStep methods getModelInputs(), getModelOutputs(),
123 // getTempsAsStepModelInputs(), getOutputsAsStepModelInputs()).
124 // - It finds temps as step model outputs in numerical order of corresponding
125 // operand number in the original model (see ExecutionStep method
126 // getTempsAsStepModelOutputs()).
127 // - When it calls identifyInputsAndOutputs() on the step model, it
128 // passes inputs from getModelInputs() in order, followed by temps as
129 // step model inputs from getTempsAsStepModelInputs() in order,
130 // followed by outputs as step model inputs from
131 // getOutputsAsStepModelInputs() in order; and it passes outputs from
132 // getModelOutputs() in order followed by step model outputs from
133 // getTempsAsStepModelOutputs() in order.
134 //
135 // TODO: Maybe the logic for comparing a partition to an expected
136 // model should be changed to tolerate reorderings of inputs and
137 // outputs, so that when we build models and lists to compare
138 // against, we don't need to worry about input and output
139 // orderings. But is there a way to do this that still lets us
140 // verify that we have the correct relationships between
141 // an (original) model's inputs and outputs and each step model's
142 // inputs and outputs, as well as the correct relationship
143 // between step model inputs and outputs across partitions?
144
145 namespace {
146
147 using namespace android::nn::hal;
148 using CompilationBuilder = ::android::nn::CompilationBuilder;
149 using Deadline = ::android::nn::Deadline;
150 using Device = ::android::nn::Device;
151 using DeviceManager = ::android::nn::DeviceManager;
152 using ExecutePreference = ::android::nn::test_wrapper::ExecutePreference;
153 using ExecutePriority = ::android::nn::test_wrapper::ExecutePriority;
154 using ExecutionPlan = ::android::nn::ExecutionPlan;
155 using ExecutionStep = ::android::nn::ExecutionStep;
156 using HalVersion = ::android::nn::HalVersion;
157 using HidlModel = V1_3::Model;
158 using LogicalStep = ::android::nn::LogicalStep;
159 using ModelBuilder = ::android::nn::ModelBuilder;
160 using Result = ::android::nn::test_wrapper::Result;
161 using SampleDriver = ::android::nn::sample_driver::SampleDriver;
162 using WrapperCompilation = ::android::nn::test_wrapper::Compilation;
163 using WrapperModel = ::android::nn::test_wrapper::Model;
164 using WrapperOperandType = ::android::nn::test_wrapper::OperandType;
165 using WrapperSymmPerChannelQuantParams = ::android::nn::test_wrapper::SymmPerChannelQuantParams;
166 using WrapperType = ::android::nn::test_wrapper::Type;
167
168 template <typename T>
169 using MQDescriptorSync = ::android::hardware::MQDescriptorSync<T>;
170
171 constexpr Timing kBadTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
172
makeCapabilities(float perf)173 Capabilities makeCapabilities(float perf) {
174 PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
175 return {.relaxedFloat32toFloat16PerformanceScalar = perfInfo,
176 .relaxedFloat32toFloat16PerformanceTensor = perfInfo,
177 .operandPerformance =
178 ::android::nn::nonExtensionOperandPerformance<HalVersion::V1_3>(perfInfo),
179 .ifPerformance = perfInfo,
180 .whilePerformance = perfInfo};
181 };
182
update(Capabilities * capabilities,OperandType type,float perf)183 void update(Capabilities* capabilities, OperandType type, float perf) {
184 PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
185 ::android::nn::update(&capabilities->operandPerformance, type, perfInfo);
186 }
187
lookupExecTime(const Capabilities & capabilities,OperandType type)188 float lookupExecTime(const Capabilities& capabilities, OperandType type) {
189 return ::android::nn::lookup(capabilities.operandPerformance, type).execTime;
190 }
191
192 const uint32_t kNumFuseCodes = 4;
193 const uint32_t kBadOperation = ~0;
194
195 // V1_0 operations
196 const uint32_t kFirstEncodingADD = 0;
197 const uint32_t kFirstEncodingMUL = kFirstEncodingADD + kNumFuseCodes;
198 const uint32_t kFirstEncodingV1_0 = kFirstEncodingADD;
199 const uint32_t kLastEncodingV1_0 = kFirstEncodingMUL + kNumFuseCodes - 1;
200
201 // V1_1 operations
202 const uint32_t kFirstEncodingDIV = kLastEncodingV1_0 + 1;
203 const uint32_t kFirstEncodingSUB = kFirstEncodingDIV + kNumFuseCodes;
204 const uint32_t kFirstEncodingV1_1 = kFirstEncodingDIV;
205 const uint32_t kLastEncodingV1_1 = kFirstEncodingSUB + kNumFuseCodes - 1;
206
207 // V1_2 operations
208 const uint32_t kFirstEncodingMAXIMUM = kLastEncodingV1_1 + 1;
209 const uint32_t kFirstEncodingMINIMUM = kFirstEncodingMAXIMUM + 1;
210 const uint32_t kFirstEncodingPOW = kFirstEncodingMINIMUM + 1;
211 const uint32_t kFirstEncodingPRELU = kFirstEncodingPOW + 1;
212 const uint32_t kFirstEncodingV1_2 = kFirstEncodingMAXIMUM;
213 const uint32_t kLastEncodingV1_2 = kFirstEncodingPRELU;
214
215 // V1_3 operations
216 const uint32_t kFirstEncodingHARD_SWISH = kLastEncodingV1_2 + 1;
217 const uint32_t kFirstEncodingV1_3 = kFirstEncodingHARD_SWISH;
218 const uint32_t kLastEncodingV1_3 = kFirstEncodingHARD_SWISH;
219
220 const std::map<OperationType, uint32_t> operationToFirstEncoding = {
221 {OperationType::ADD, kFirstEncodingADD},
222 {OperationType::MUL, kFirstEncodingMUL},
223 {OperationType::DIV, kFirstEncodingDIV},
224 {OperationType::SUB, kFirstEncodingSUB},
225 {OperationType::MAXIMUM, kFirstEncodingMAXIMUM},
226 {OperationType::MINIMUM, kFirstEncodingMINIMUM},
227 {OperationType::POW, kFirstEncodingPOW},
228 {OperationType::PRELU, kFirstEncodingPRELU},
229 {OperationType::HARD_SWISH, kFirstEncodingHARD_SWISH},
230 };
231
232 // Sorted in reverse order (std::greater) so that we can use map::lower_bound to
233 // find an entry whose key is numerically less than or equal to a search value.
234 // mapped_type is (OperandCode, hasFuseCode).
235 const std::map<uint32_t, std::pair<uint32_t, bool>, std::greater<>> firstEncodingToOperation = {
236 {kFirstEncodingADD, {ANEURALNETWORKS_ADD, true}},
237 {kFirstEncodingMUL, {ANEURALNETWORKS_MUL, true}},
238 {kFirstEncodingDIV, {ANEURALNETWORKS_DIV, true}},
239 {kFirstEncodingSUB, {ANEURALNETWORKS_SUB, true}},
240 {kFirstEncodingMAXIMUM, {ANEURALNETWORKS_MAXIMUM, false}},
241 {kFirstEncodingMINIMUM, {ANEURALNETWORKS_MINIMUM, false}},
242 {kFirstEncodingPOW, {ANEURALNETWORKS_POW, false}},
243 {kFirstEncodingPRELU, {ANEURALNETWORKS_PRELU, false}},
244 {kFirstEncodingHARD_SWISH, {ANEURALNETWORKS_HARD_SWISH, false}},
245 };
246
247 // Look up the operation with the specified index in a graph, and return the
248 // operation encoding; or, if for some reason this is not one of the encoded
249 // operations, then return kBadOperation.
lookupOperation(std::function<const Operation & (uint32_t)> getOperation,std::function<const Operand & (uint32_t)> getOperand,std::function<const uint8_t * (uint32_t)> getValue,uint32_t operationIndex)250 uint32_t lookupOperation(std::function<const Operation&(uint32_t)> getOperation,
251 std::function<const Operand&(uint32_t)> getOperand,
252 std::function<const uint8_t*(uint32_t)> getValue,
253 uint32_t operationIndex) {
254 const Operation& operation = getOperation(operationIndex);
255 switch (operation.type) {
256 case OperationType::ADD:
257 case OperationType::MUL:
258 case OperationType::DIV:
259 case OperationType::SUB: {
260 // input2 is the fused activation function
261 const Operand& input2 = getOperand(operation.inputs[2]);
262 if ((input2.type == OperandType::INT32) &&
263 (input2.lifetime == OperandLifeTime::CONSTANT_COPY)) {
264 int32_t value;
265 CHECK_EQ(sizeof(value), input2.location.length);
266 memcpy(&value, getValue(input2.location.offset), input2.location.length);
267 return value + operationToFirstEncoding.at(operation.type);
268 }
269 break;
270 }
271 default: {
272 auto it = operationToFirstEncoding.find(operation.type);
273 if (it != operationToFirstEncoding.end()) {
274 return it->second;
275 }
276 break;
277 }
278 }
279 return kBadOperation;
280 }
281
lookupOperation(const HidlModel & model,const Subgraph & subgraph,uint32_t operationIndex)282 uint32_t lookupOperation(const HidlModel& model, const Subgraph& subgraph,
283 uint32_t operationIndex) {
284 return lookupOperation(
285 [&subgraph](uint32_t index) -> const Operation& { return subgraph.operations[index]; },
286 [&subgraph](uint32_t index) -> const Operand& { return subgraph.operands[index]; },
287 [&model](uint32_t offset) { return &model.operandValues[offset]; }, operationIndex);
288 }
289
290 #ifdef VERBOSE
291 // This is a debugging utility function
dump(const char * name,const ModelBuilder * model)292 void dump(const char* name, const ModelBuilder* model) {
293 const HidlModel hidlModel = model->makeHidlModel();
294 std::cout << name << ": " << toString(hidlModel) << std::endl;
295 std::cout << "inputs: " << toString(hidlModel.main.inputIndexes) << std::endl;
296 std::cout << "outputs: " << toString(hidlModel.main.outputIndexes) << std::endl;
297 for (size_t i = 0, e = hidlModel.main.operations.size(); i < e; i++) {
298 std::cout << "operation[" << i << "]: " << toString(hidlModel.main.operations[i])
299 << std::endl;
300 }
301 }
302 #endif
303
304 // This is an IDevice for testing purposes. It only has a few interesting
305 // properties, all of which are specified as constructor arguments: device
306 // capabilities; which subset of operation kinds (0..19) does the device
307 // support; does the device support the OEM operation; does the device support
308 // other operations. The subset is represented with a bitmask, in which
309 // operation kind K corresponds to the bit (1 << K). The other operations are
310 // represented by a set of OperationType.
311 class PartitioningDriver : public SampleDriver {
312 private:
313 // Dummy class -- a prepared model must not be nullptr.
314 class PartitioningPreparedModel : public IPreparedModel {
315 public:
execute(const V1_0::Request &,const sp<V1_0::IExecutionCallback> &)316 Return<V1_0::ErrorStatus> execute(const V1_0::Request&,
317 const sp<V1_0::IExecutionCallback>&) override {
318 return V1_0::ErrorStatus::DEVICE_UNAVAILABLE;
319 }
execute_1_2(const V1_0::Request &,MeasureTiming,const sp<V1_2::IExecutionCallback> &)320 Return<V1_0::ErrorStatus> execute_1_2(const V1_0::Request&, MeasureTiming,
321 const sp<V1_2::IExecutionCallback>&) override {
322 return V1_0::ErrorStatus::DEVICE_UNAVAILABLE;
323 }
execute_1_3(const V1_3::Request &,MeasureTiming,const OptionalTimePoint &,const OptionalTimeoutDuration &,const sp<V1_3::IExecutionCallback> &)324 Return<V1_3::ErrorStatus> execute_1_3(const V1_3::Request&, MeasureTiming,
325 const OptionalTimePoint&,
326 const OptionalTimeoutDuration&,
327 const sp<V1_3::IExecutionCallback>&) override {
328 return V1_3::ErrorStatus::DEVICE_UNAVAILABLE;
329 }
executeSynchronously(const V1_0::Request &,MeasureTiming,executeSynchronously_cb cb)330 Return<void> executeSynchronously(const V1_0::Request&, MeasureTiming,
331 executeSynchronously_cb cb) override {
332 cb(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, {}, kBadTiming);
333 return Void();
334 }
executeSynchronously_1_3(const V1_3::Request &,MeasureTiming,const OptionalTimePoint &,const OptionalTimeoutDuration &,executeSynchronously_1_3_cb cb)335 Return<void> executeSynchronously_1_3(const V1_3::Request&, MeasureTiming,
336 const OptionalTimePoint&,
337 const OptionalTimeoutDuration&,
338 executeSynchronously_1_3_cb cb) override {
339 cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, {}, kBadTiming);
340 return Void();
341 }
configureExecutionBurst(const sp<V1_2::IBurstCallback> &,const MQDescriptorSync<V1_2::FmqRequestDatum> &,const MQDescriptorSync<V1_2::FmqResultDatum> &,configureExecutionBurst_cb cb)342 Return<void> configureExecutionBurst(
343 const sp<V1_2::IBurstCallback>& /*callback*/,
344 const MQDescriptorSync<V1_2::FmqRequestDatum>& /*requestChannel*/,
345 const MQDescriptorSync<V1_2::FmqResultDatum>& /*resultChannel*/,
346 configureExecutionBurst_cb cb) override {
347 cb(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, nullptr);
348 return Void();
349 }
executeFenced(const Request &,const hidl_vec<hidl_handle> &,MeasureTiming,const OptionalTimePoint &,const OptionalTimeoutDuration &,const OptionalTimeoutDuration &,executeFenced_cb cb)350 Return<void> executeFenced(const Request&, const hidl_vec<hidl_handle>&, MeasureTiming,
351 const OptionalTimePoint&, const OptionalTimeoutDuration&,
352 const OptionalTimeoutDuration&, executeFenced_cb cb) {
353 cb(ErrorStatus::DEVICE_UNAVAILABLE, hidl_handle(nullptr), nullptr);
354 return Void();
355 }
356 };
357
358 public:
359 enum OEM {
360 OEMNo, // rejected by getSupportedOperations and prepareModel
361 OEMIndecisive, // accepted by getSupportedOperations but not prepareModel
362 OEMYes, // accepted by getSupportedOperations and prepareModel
363 };
364
PartitioningDriver(const char * name,const char * version,Capabilities capabilities,uint32_t operationMask,OEM oem=OEMNo,std::set<OperationType> operationTypes={})365 PartitioningDriver(const char* name, const char* version, Capabilities capabilities,
366 uint32_t operationMask, OEM oem = OEMNo,
367 std::set<OperationType> operationTypes = {})
368 : SampleDriver(name),
369 mVersionString(version),
370 mCapabilities(capabilities),
371 mOperationMask(operationMask),
372 mOEM(oem),
373 mOperationTypes(std::move(operationTypes)) {
374 CHECK_EQ(mOperationTypes.count(OperationType::OEM_OPERATION), size_t(0));
__anon83dd480d0502(OperationType type) 375 std::for_each(mOperationTypes.begin(), mOperationTypes.end(), [](OperationType type) {
376 CHECK_EQ(operationToFirstEncoding.count(type), size_t(0));
377 });
378 }
~PartitioningDriver()379 ~PartitioningDriver() override {}
380
getVersionString(getVersionString_cb cb)381 Return<void> getVersionString(getVersionString_cb cb) override {
382 cb(V1_0::ErrorStatus::NONE, mVersionString);
383 return Void();
384 }
385
prepareModel_1_3(const Model & model,ExecutionPreference,Priority,const OptionalTimePoint &,const hidl_vec<hidl_handle> &,const hidl_vec<hidl_handle> &,const CacheToken &,const sp<V1_3::IPreparedModelCallback> & cb)386 Return<V1_3::ErrorStatus> prepareModel_1_3(
387 const Model& model, ExecutionPreference, Priority, const OptionalTimePoint&,
388 const hidl_vec<hidl_handle>&, const hidl_vec<hidl_handle>&, const CacheToken&,
389 const sp<V1_3::IPreparedModelCallback>& cb) override {
390 V1_3::ErrorStatus status = V1_3::ErrorStatus::NONE;
391 if (mOEM != OEMYes) {
392 for (const auto& operation : model.main.operations) {
393 if (operation.type == OperationType::OEM_OPERATION) {
394 status = V1_3::ErrorStatus::INVALID_ARGUMENT;
395 break;
396 }
397 }
398 }
399 cb->notify_1_3(status, new PartitioningPreparedModel);
400 return status;
401 }
402
getStatus()403 Return<DeviceStatus> getStatus() override { return DeviceStatus::AVAILABLE; }
404
getCapabilities_1_3(getCapabilities_1_3_cb cb)405 Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override {
406 cb(V1_3::ErrorStatus::NONE, mCapabilities);
407 return Void();
408 }
409
getSupportedOperations_1_3(const Model & model,getSupportedOperations_1_3_cb cb)410 Return<void> getSupportedOperations_1_3(const Model& model,
411 getSupportedOperations_1_3_cb cb) override {
412 if (!android::nn::validateModel(model)) {
413 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>());
414 return Void();
415 }
416 cb(V1_3::ErrorStatus::NONE, getSupportedOperationsForSubgraph(model, model.main));
417 return Void();
418 }
419
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)420 Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb) override {
421 cb(V1_0::ErrorStatus::NONE, /*numModelCache=*/1, /*numDataCache=*/1);
422 return Void();
423 }
424
prepareModelFromCache(const hidl_vec<hidl_handle> &,const hidl_vec<hidl_handle> &,const CacheToken &,const sp<V1_2::IPreparedModelCallback> & callback)425 Return<V1_0::ErrorStatus> prepareModelFromCache(
426 const hidl_vec<hidl_handle>&, const hidl_vec<hidl_handle>&, const CacheToken&,
427 const sp<V1_2::IPreparedModelCallback>& callback) override {
428 callback->notify_1_2(V1_0::ErrorStatus::NONE, new PartitioningPreparedModel);
429 return V1_0::ErrorStatus::NONE;
430 }
431
432 private:
getSupportedOperationsForSubgraph(const Model & model,const Subgraph & subgraph)433 std::vector<bool> getSupportedOperationsForSubgraph(const Model& model,
434 const Subgraph& subgraph) {
435 auto supportsEntireSubgraph = [this, &model, &subgraph](uint32_t refSubgraphOperandIndex) {
436 const Operand& refSubgraphOperand = subgraph.operands[refSubgraphOperandIndex];
437 const Subgraph& refSubgraph = model.referenced[refSubgraphOperand.location.offset];
438 std::vector<bool> supported = getSupportedOperationsForSubgraph(model, refSubgraph);
439 return std::all_of(supported.begin(), supported.end(), [](bool x) { return x; });
440 };
441 const size_t count = subgraph.operations.size();
442 std::vector<bool> supported(count);
443 for (size_t i = 0; i < count; i++) {
444 const Operation operation = subgraph.operations[i];
445 if (mOperationTypes.count(operation.type)) {
446 if (operation.type == OperationType::IF) {
447 namespace op = android::nn::operation_if;
448 supported[i] =
449 supportsEntireSubgraph(operation.inputs[op::kThenModelOperand]) &&
450 supportsEntireSubgraph(operation.inputs[op::kElseModelOperand]);
451 } else if (operation.type == OperationType::WHILE) {
452 namespace op = android::nn::operation_while;
453 supported[i] =
454 supportsEntireSubgraph(operation.inputs[op::kCondModelOperand]) &&
455 supportsEntireSubgraph(operation.inputs[op::kBodyModelOperand]);
456 } else {
457 supported[i] = true;
458 }
459 continue;
460 }
461 if (operation.type == OperationType::OEM_OPERATION) {
462 supported[i] = (mOEM != OEMNo);
463 continue;
464 }
465 supported[i] = false;
466 uint32_t operationEncoding = lookupOperation(model, subgraph, i);
467 if ((operationEncoding != kBadOperation) &&
468 (mOperationMask & (1 << operationEncoding))) {
469 supported[i] = true;
470 }
471 }
472 return supported;
473 }
474
475 std::string mVersionString;
476 Capabilities mCapabilities;
477 uint32_t mOperationMask;
478 OEM mOEM;
479 std::set<OperationType> mOperationTypes;
480 };
481
482 // Like PartitioningDriver, but implementing 1.2
483 class PartitioningDriverV1_2 : public V1_2::IDevice {
484 public:
PartitioningDriverV1_2(const char * name,const char * version,Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<OperationType> operationTypes={})485 PartitioningDriverV1_2(const char* name, const char* version, Capabilities capabilities,
486 uint32_t operationMask,
487 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
488 std::set<OperationType> operationTypes = {})
489 : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
490 operationTypes)) {}
getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb)491 Return<void> getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb) override {
492 return mLatestDriver->getCapabilities_1_2(_hidl_cb);
493 }
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb _hidl_cb)494 Return<void> getSupportedOperations_1_2(const V1_2::Model& model,
495 getSupportedOperations_1_2_cb _hidl_cb) override {
496 return mLatestDriver->getSupportedOperations_1_2(model, _hidl_cb);
497 }
prepareModel_1_2(const V1_2::Model & model,ExecutionPreference preference,const hidl_vec<hidl_handle> & modelCache,const hidl_vec<hidl_handle> & dataCache,const CacheToken & token,const sp<V1_2::IPreparedModelCallback> & actualCallback)498 Return<V1_0::ErrorStatus> prepareModel_1_2(
499 const V1_2::Model& model, ExecutionPreference preference,
500 const hidl_vec<hidl_handle>& modelCache, const hidl_vec<hidl_handle>& dataCache,
501 const CacheToken& token,
502 const sp<V1_2::IPreparedModelCallback>& actualCallback) override {
503 return mLatestDriver->prepareModel_1_2(model, preference, modelCache, dataCache, token,
504 actualCallback);
505 }
getVersionString(getVersionString_cb _hidl_cb)506 Return<void> getVersionString(getVersionString_cb _hidl_cb) override {
507 return mLatestDriver->getVersionString(_hidl_cb);
508 }
getType(getType_cb _hidl_cb)509 Return<void> getType(getType_cb _hidl_cb) override { return mLatestDriver->getType(_hidl_cb); }
getSupportedExtensions(getSupportedExtensions_cb _hidl_cb)510 Return<void> getSupportedExtensions(getSupportedExtensions_cb _hidl_cb) {
511 return mLatestDriver->getSupportedExtensions(_hidl_cb);
512 }
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb)513 Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb) {
514 return mLatestDriver->getNumberOfCacheFilesNeeded(_hidl_cb);
515 }
prepareModelFromCache(const hidl_vec<hidl_handle> & modelCache,const hidl_vec<hidl_handle> & dataCache,const CacheToken & token,const sp<V1_2::IPreparedModelCallback> & callback)516 Return<V1_0::ErrorStatus> prepareModelFromCache(
517 const hidl_vec<hidl_handle>& modelCache, const hidl_vec<hidl_handle>& dataCache,
518 const CacheToken& token, const sp<V1_2::IPreparedModelCallback>& callback) {
519 return mLatestDriver->prepareModelFromCache(modelCache, dataCache, token, callback);
520 }
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)521 Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
522 return mLatestDriver->getCapabilities_1_1(_hidl_cb);
523 }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)524 Return<void> getSupportedOperations_1_1(const V1_1::Model& model,
525 getSupportedOperations_1_1_cb _hidl_cb) override {
526 return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
527 }
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)528 Return<V1_0::ErrorStatus> prepareModel_1_1(
529 const V1_1::Model& model, ExecutionPreference preference,
530 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
531 return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
532 }
getStatus()533 Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)534 Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
535 return mLatestDriver->getCapabilities(_hidl_cb);
536 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)537 Return<void> getSupportedOperations(const V1_0::Model& model,
538 getSupportedOperations_cb _hidl_cb) override {
539 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
540 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)541 Return<V1_0::ErrorStatus> prepareModel(
542 const V1_0::Model& model,
543 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
544 return mLatestDriver->prepareModel(model, actualCallback);
545 }
546
547 private:
548 const sp<V1_3::IDevice> mLatestDriver;
549 };
550
551 // Like PartitioningDriver, but implementing 1.1
552 class PartitioningDriverV1_1 : public V1_1::IDevice {
553 public:
PartitioningDriverV1_1(const char * name,const char * version,Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<OperationType> operationTypes={})554 PartitioningDriverV1_1(const char* name, const char* version, Capabilities capabilities,
555 uint32_t operationMask,
556 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
557 std::set<OperationType> operationTypes = {})
558 : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
559 operationTypes)) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)560 Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
561 return mLatestDriver->getCapabilities_1_1(_hidl_cb);
562 }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)563 Return<void> getSupportedOperations_1_1(const V1_1::Model& model,
564 getSupportedOperations_1_1_cb _hidl_cb) override {
565 return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
566 }
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)567 Return<V1_0::ErrorStatus> prepareModel_1_1(
568 const V1_1::Model& model, ExecutionPreference preference,
569 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
570 return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
571 }
getStatus()572 Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)573 Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
574 return mLatestDriver->getCapabilities(_hidl_cb);
575 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)576 Return<void> getSupportedOperations(const V1_0::Model& model,
577 getSupportedOperations_cb _hidl_cb) override {
578 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
579 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)580 Return<V1_0::ErrorStatus> prepareModel(
581 const V1_0::Model& model,
582 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
583 return mLatestDriver->prepareModel(model, actualCallback);
584 }
585
586 private:
587 const sp<V1_3::IDevice> mLatestDriver;
588 };
589
590 // Like PartitioningDriver, but implementing 1.0
591 class PartitioningDriverV1_0 : public V1_0::IDevice {
592 public:
PartitioningDriverV1_0(const char * name,const char * version,Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<OperationType> operationTypes={})593 PartitioningDriverV1_0(const char* name, const char* version, Capabilities capabilities,
594 uint32_t operationMask,
595 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
596 std::set<OperationType> operationTypes = {})
597 : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
598 operationTypes)) {}
getCapabilities(getCapabilities_cb _hidl_cb)599 Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
600 return mLatestDriver->getCapabilities(_hidl_cb);
601 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)602 Return<void> getSupportedOperations(const V1_0::Model& model,
603 getSupportedOperations_cb _hidl_cb) override {
604 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
605 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)606 Return<V1_0::ErrorStatus> prepareModel(
607 const V1_0::Model& model,
608 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
609 return mLatestDriver->prepareModel(model, actualCallback);
610 }
getStatus()611 Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
612
613 private:
614 const sp<V1_3::IDevice> mLatestDriver;
615 };
616
617 // This class adds some simple abstractions and utilities on top of
618 // WrapperModel. For example, it provides methods that work in terms of
619 // operation kind (0..7); and because we care about graph topology rather than
620 // details of operand types and values, it greatly simplifies the process of
621 // creating operands.
622 class PartitioningModel : private WrapperModel {
623 public:
624 using WrapperModel::finish;
625 using WrapperModel::getHandle;
626 using WrapperModel::identifyInputsAndOutputs;
627 using WrapperModel::isValid;
628 using WrapperModel::relaxComputationFloat32toFloat16;
629
630 enum class Dimensioned { NO, YES };
631
632 // Create a tensor operand of the specified type, and return the
633 // corresponding operand index.
addFloatOperand(Dimensioned dimensioned=Dimensioned::YES)634 uint32_t addFloatOperand(Dimensioned dimensioned = Dimensioned::YES) {
635 return addOperand(WrapperType::TENSOR_FLOAT32, dimensioned);
636 }
addQuantOperand(Dimensioned dimensioned=Dimensioned::YES)637 uint32_t addQuantOperand(Dimensioned dimensioned = Dimensioned::YES) {
638 return addOperand(WrapperType::TENSOR_QUANT8_ASYMM, dimensioned);
639 }
addBooleanOperand(Dimensioned dimensioned=Dimensioned::YES)640 uint32_t addBooleanOperand(Dimensioned dimensioned = Dimensioned::YES) {
641 return addOperand(WrapperType::TENSOR_BOOL8, dimensioned);
642 }
643
644 // Create an operand of the specified type, and return the corresponding
645 // operand index.
addOperand(WrapperType wrapperType,Dimensioned dimensioned=Dimensioned::YES)646 uint32_t addOperand(WrapperType wrapperType, Dimensioned dimensioned = Dimensioned::YES) {
647 auto dimensions = [dimensioned]() -> std::vector<uint32_t> {
648 if (dimensioned == Dimensioned::YES) {
649 return {1};
650 } else {
651 return {};
652 }
653 };
654
655 switch (static_cast<int>(wrapperType)) {
656 case ANEURALNETWORKS_BOOL:
657 case ANEURALNETWORKS_FLOAT16:
658 case ANEURALNETWORKS_FLOAT32:
659 case ANEURALNETWORKS_INT32:
660 case ANEURALNETWORKS_UINT32:
661 case ANEURALNETWORKS_MODEL:
662 case ANEURALNETWORKS_OEM_SCALAR: {
663 return addOperand(WrapperOperandType{wrapperType, {}});
664 }
665
666 case ANEURALNETWORKS_TENSOR_BOOL8:
667 case ANEURALNETWORKS_TENSOR_FLOAT16:
668 case ANEURALNETWORKS_TENSOR_FLOAT32:
669 case ANEURALNETWORKS_TENSOR_OEM_BYTE: {
670 return addOperand(WrapperOperandType{wrapperType, dimensions()});
671 }
672
673 case ANEURALNETWORKS_TENSOR_INT32:
674 case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
675 case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED:
676 case ANEURALNETWORKS_TENSOR_QUANT8_SYMM:
677 case ANEURALNETWORKS_TENSOR_QUANT16_ASYMM:
678 case ANEURALNETWORKS_TENSOR_QUANT16_SYMM: {
679 return addOperand(WrapperOperandType{wrapperType, dimensions(), 1.0f});
680 }
681
682 case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL: {
683 return addOperand(WrapperOperandType{wrapperType, dimensions(),
684 WrapperSymmPerChannelQuantParams({1.0f}, 0)});
685 }
686
687 default:
688 ADD_FAILURE() << "Unexpected type " << static_cast<uint32_t>(wrapperType);
689 return ~uint32_t(0);
690 }
691 }
692
693 // Create an operand of the specified operand type, and return the
694 // corresponding operand index.
addOperand(const WrapperOperandType & wrapperOperandType)695 uint32_t addOperand(const WrapperOperandType& wrapperOperandType) {
696 mWrapperOperandType.push_back(wrapperOperandType);
697 return WrapperModel::addOperand(&wrapperOperandType);
698 }
699
700 // Create an operation with any number of inputs and one output, specifying
701 // the operation type (e.g., ANEURALNETWORKS_ADD), the input operand
702 // indexes, and the output type (e.g., WrapperType::TENSOR_FLOAT32).
703 // Returns the output operand index.
addExplicitOperationXTo1(ANeuralNetworksOperationType operationType,const std::vector<uint32_t> & inputs,WrapperType outputType,Dimensioned dimensionedOutput=Dimensioned::YES)704 uint32_t addExplicitOperationXTo1(ANeuralNetworksOperationType operationType,
705 const std::vector<uint32_t>& inputs, WrapperType outputType,
706 Dimensioned dimensionedOutput = Dimensioned::YES) {
707 uint32_t output = addOperand(outputType, dimensionedOutput);
708 addOperation(operationType, inputs, {output});
709 return output;
710 }
711
712 // Create a V1_0 operation with two inputs and one output, specifying the
713 // operation kind (where 0 is the first V1_0 operation) and the input
714 // operand indexes.
715 // Returns the output operand index.
addOperation2To1V1_0(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)716 uint32_t addOperation2To1V1_0(uint32_t operation, const uint32_t input0, const uint32_t input1,
717 Dimensioned dimensionedOutput = Dimensioned::YES) {
718 CHECK_LE(operation, kLastEncodingV1_0 - kFirstEncodingV1_0);
719 return addOperation2To1(operation + kFirstEncodingV1_0, input0, input1, dimensionedOutput);
720 }
721
722 // Create a V1_1 operation with two inputs and one output, specifying the
723 // operation kind (where 0 is the first V1_1 operation) and the input
724 // operand indexes.
725 // Returns the output operand index.
addOperation2To1V1_1(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)726 uint32_t addOperation2To1V1_1(uint32_t operation, const uint32_t input0, const uint32_t input1,
727 Dimensioned dimensionedOutput = Dimensioned::YES) {
728 CHECK_LE(operation, kLastEncodingV1_1 - kFirstEncodingV1_1);
729 return addOperation2To1(operation + kFirstEncodingV1_1, input0, input1, dimensionedOutput);
730 }
731
732 // Create a V1_2 operation with two inputs and one output, specifying the
733 // operation kind (where 0 is the first V1_2 operation) and the input
734 // operand indexes.
735 // Returns the output operand index.
addOperation2To1V1_2(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)736 uint32_t addOperation2To1V1_2(uint32_t operation, const uint32_t input0, const uint32_t input1,
737 Dimensioned dimensionedOutput = Dimensioned::YES) {
738 CHECK_LE(operation, kLastEncodingV1_2 - kFirstEncodingV1_2);
739 return addOperation2To1(operation + kFirstEncodingV1_2, input0, input1, dimensionedOutput);
740 }
741
742 // Create a V1_3 operation with two inputs and one output, specifying the
743 // operation kind (where 0 is the first V1_3 operation) and the input
744 // operand indexes.
745 // Returns the output operand index.
addOperation1To1V1_3(uint32_t operation,const uint32_t input0,Dimensioned dimensionedOutput=Dimensioned::YES)746 uint32_t addOperation1To1V1_3(uint32_t operation, const uint32_t input0,
747 Dimensioned dimensionedOutput = Dimensioned::YES) {
748 CHECK_LE(operation, kLastEncodingV1_3 - kFirstEncodingV1_3);
749 return addOperation1To1(operation + kFirstEncodingV1_3, input0, dimensionedOutput);
750 }
751
752 // Create an OEM operation with one input and one output,
753 // specifying the input operand index. Returns the output operand
754 // index.
addOperationOEM1To1(const uint32_t input,Dimensioned dimensionedOutput=Dimensioned::YES)755 uint32_t addOperationOEM1To1(const uint32_t input,
756 Dimensioned dimensionedOutput = Dimensioned::YES) {
757 uint32_t output = addOperandOfSameType(input, dimensionedOutput);
758 addOperation(ANEURALNETWORKS_OEM_OPERATION, {input}, {output});
759 return output;
760 }
761
762 // Create an IF operation with the given condition operand and two
763 // referenced models for the true and false cases.
addIfOperation(const uint32_t cond,const PartitioningModel & trueModel,const PartitioningModel & falseModel,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)764 void addIfOperation(const uint32_t cond, const PartitioningModel& trueModel,
765 const PartitioningModel& falseModel, const std::vector<uint32_t>& inputs,
766 const std::vector<uint32_t>& outputs) {
767 const uint32_t opndTrue = addRefModelOperand(trueModel);
768 const uint32_t opndFalse = addRefModelOperand(falseModel);
769 std::vector<uint32_t> ifInputs = {cond, opndTrue, opndFalse};
770 ifInputs.insert(ifInputs.end(), inputs.begin(), inputs.end());
771 addOperation(ANEURALNETWORKS_IF, ifInputs, outputs);
772 }
773
774 // Create a WHILE operation with the given condition and body referenced models.
addWhileOperation(const PartitioningModel & condModel,const PartitioningModel & bodyModel,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)775 void addWhileOperation(const PartitioningModel& condModel, const PartitioningModel& bodyModel,
776 const std::vector<uint32_t>& inputs,
777 const std::vector<uint32_t>& outputs) {
778 const uint32_t condOperand = addRefModelOperand(condModel);
779 const uint32_t bodyOperand = addRefModelOperand(bodyModel);
780 std::vector<uint32_t> whileInputs = {condOperand, bodyOperand};
781 whileInputs.insert(whileInputs.end(), inputs.begin(), inputs.end());
782 addOperation(ANEURALNETWORKS_WHILE, whileInputs, outputs);
783 }
784
785 // Run the partitioning algorithm to create an ExecutionPlan.
partitionTheWork(const std::vector<std::shared_ptr<Device>> & devices,ExecutePreference preference,ExecutePriority priority,const std::optional<Deadline> & deadline,ExecutionPlan * plan)786 int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
787 ExecutePreference preference, ExecutePriority priority,
788 const std::optional<Deadline>& deadline, ExecutionPlan* plan) {
789 return reinterpret_cast<ModelBuilder*>(getHandle())
790 ->partitionTheWork(devices, static_cast<uint32_t>(preference),
791 static_cast<int32_t>(priority), deadline, plan);
792 }
793
794 #ifdef VERBOSE
795 // This is a debugging utility function.
dump(const char * name) const796 void dump(const char* name) const {
797 const ModelBuilder* mb = reinterpret_cast<const ModelBuilder*>(getHandle());
798 ::dump(name, mb);
799 }
800 #endif
801
802 private:
803 // Create an operation with two inputs and one output, specifying
804 // the operation kind and the input operand indexes.
805 // Returns the output operand index.
addOperation2To1(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)806 uint32_t addOperation2To1(uint32_t operation, const uint32_t input0, const uint32_t input1,
807 Dimensioned dimensionedOutput = Dimensioned::YES) {
808 auto it = firstEncodingToOperation.lower_bound(operation);
809 CHECK(it != firstEncodingToOperation.end());
810 ANeuralNetworksOperationType type = it->second.first;
811 if (it->second.second) {
812 int32_t fuseCode = operation - it->first;
813 uint32_t input2 = addIntOperand(fuseCode);
814 uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
815 addOperation(type, {input0, input1, input2}, {output});
816 return output;
817 } else {
818 uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
819 addOperation(type, {input0, input1}, {output});
820 return output;
821 }
822 }
823
824 // Create an operation with one inputs and one output, specifying
825 // the operation kind and the input operand indexes.
826 // Returns the output operand index.
addOperation1To1(uint32_t operation,const uint32_t input0,Dimensioned dimensionedOutput=Dimensioned::YES)827 uint32_t addOperation1To1(uint32_t operation, const uint32_t input0,
828 Dimensioned dimensionedOutput = Dimensioned::YES) {
829 auto it = firstEncodingToOperation.lower_bound(operation);
830 CHECK(it != firstEncodingToOperation.end());
831 ANeuralNetworksOperationType type = it->second.first;
832
833 uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
834 addOperation(type, {input0}, {output});
835 return output;
836 }
837
838 // Create a scalar integer operand of the specified value, and
839 // return the corresponding operand index.
addIntOperand(int32_t value)840 uint32_t addIntOperand(int32_t value) {
841 uint32_t operand = addOperand(WrapperType::INT32);
842 setOperandValue(operand, &value, sizeof(value));
843 return operand;
844 }
845
846 // Create an operand from a model for control flow graphs.
addRefModelOperand(const PartitioningModel & model)847 uint32_t addRefModelOperand(const PartitioningModel& model) {
848 const uint32_t index = addOperand(WrapperType::MODEL);
849 WrapperModel::setOperandValueFromModel(index, &model);
850 return index;
851 }
852
853 // Create an operand of the same type as the specified operand,
854 // and return the operand index of the new operand.
addOperandOfSameType(uint32_t operand,Dimensioned dimensioned=Dimensioned::YES)855 uint32_t addOperandOfSameType(uint32_t operand, Dimensioned dimensioned = Dimensioned::YES) {
856 WrapperOperandType type = mWrapperOperandType.at(operand);
857 for (auto& dimension : type.dimensions) {
858 dimension = (dimensioned == Dimensioned::YES);
859 }
860 mWrapperOperandType.push_back(type);
861 return WrapperModel::addOperand(&type);
862 }
863
864 // operand index to operand type
865 std::vector<WrapperOperandType> mWrapperOperandType;
866 };
867
868 // This class adds some utilities on top of WrapperCompilation.
869 class PartitioningCompilation : public WrapperCompilation {
870 public:
PartitioningCompilation(const PartitioningModel * model,const std::vector<std::shared_ptr<Device>> & devices)871 PartitioningCompilation(const PartitioningModel* model,
872 const std::vector<std::shared_ptr<Device>>& devices) {
873 ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
874 CompilationBuilder* c = nullptr;
875 int result = m->createCompilation(&c, devices);
876 EXPECT_EQ(result, 0);
877 mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
878 }
879
setPartitioning(uint32_t partitioning)880 Result setPartitioning(uint32_t partitioning) {
881 return static_cast<Result>(builder()->setPartitioning(partitioning));
882 }
883
884 using WrapperCompilation::finish;
885
getExecutionPlan() const886 const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); }
887
888 private:
builder()889 CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); }
890
builder() const891 const CompilationBuilder* builder() const {
892 return reinterpret_cast<const CompilationBuilder*>(getHandle());
893 }
894 };
895
896 #ifdef VERBOSE
897 #define RETURN_TRUE() \
898 { \
899 std::cerr << "returning true from " << __LINE__ << std::endl; \
900 return true; \
901 }
902 #else
903 #define RETURN_TRUE() \
904 { return true; }
905 #endif
906 #ifdef VERBOSE
907 #define RETURN_FALSE(MESSAGE) \
908 { \
909 std::cerr << "returning false from " << __LINE__ MESSAGE << std::endl; \
910 return false; \
911 }
912 #else
913 #define RETURN_FALSE(MESSAGE) \
914 { return false; }
915 #endif
916
917 class PartitioningTest : public ::testing::Test {
918 protected:
919 using RemapVectorType = ExecutionStep::RemapVectorType;
920 using StepModelOutputSetType = ExecutionStep::StepModelOutputSetType;
921
SetUp()922 virtual void SetUp() {}
923
924 // From a vector of DeviceSpecification, create a vector of
925 // Devices.
926 struct DeviceSpecification {
DeviceSpecification__anon83dd480d0111::PartitioningTest::DeviceSpecification927 DeviceSpecification(const std::string& name, const Capabilities& capabilities,
928 uint32_t operationMask,
929 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo)
930 : mName(name),
931 mVersionString(kVersionString),
932 mCapabilities(capabilities),
933 mOperationMask(operationMask),
934 mOEM(oem) {}
DeviceSpecification__anon83dd480d0111::PartitioningTest::DeviceSpecification935 DeviceSpecification(const std::string& name, float perf, uint32_t operationMask,
936 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
937 std::set<OperationType> operationTypes = {})
938 : DeviceSpecification(name, perf, perf, operationMask, oem, operationTypes) {}
DeviceSpecification__anon83dd480d0111::PartitioningTest::DeviceSpecification939 DeviceSpecification(const std::string& name, float perf, float perfRelaxed,
940 uint32_t operationMask,
941 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
942 std::set<OperationType> operationTypes = {})
943 : DeviceSpecification(name, kVersionString, perf, perfRelaxed, operationMask, oem,
944 operationTypes) {}
DeviceSpecification__anon83dd480d0111::PartitioningTest::DeviceSpecification945 DeviceSpecification(const std::string& name, const std::string& version, float perf,
946 uint32_t operationMask,
947 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
948 std::set<OperationType> operationTypes = {})
949 : DeviceSpecification(name, version, perf, perf, operationMask, oem, operationTypes) {}
DeviceSpecification__anon83dd480d0111::PartitioningTest::DeviceSpecification950 DeviceSpecification(const std::string& name, const std::string& version, float perf,
951 float perfRelaxed, uint32_t operationMask,
952 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
953 std::set<OperationType> operationTypes = {})
954 : mName(name),
955 mVersionString(version),
956 mOperationMask(operationMask),
957 mOEM(oem),
958 mOperationTypes(std::move(operationTypes)) {
959 PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
960 PerformanceInfo perfRelaxedInfo = {.execTime = perfRelaxed, .powerUsage = perfRelaxed};
961 mCapabilities = {
962 .relaxedFloat32toFloat16PerformanceScalar = perfRelaxedInfo,
963 .relaxedFloat32toFloat16PerformanceTensor = perfRelaxedInfo,
964 .operandPerformance =
965 ::android::nn::nonExtensionOperandPerformance<HalVersion::V1_3>(
966 perfInfo),
967 .ifPerformance = perfInfo,
968 .whilePerformance = perfInfo};
969 }
DeviceSpecification__anon83dd480d0111::PartitioningTest::DeviceSpecification970 DeviceSpecification(const std::string& name, float perf, HalVersion halVersion,
971 uint32_t operationMaskV1_0, uint32_t operationMaskV1_1 = 0,
972 uint32_t operationMaskV1_2 = 0, uint32_t operationMaskV1_3 = 0)
973 : DeviceSpecification(
974 name, perf, perf,
975 makeOperationMask(halVersion, operationMaskV1_0, operationMaskV1_1,
976 operationMaskV1_2, operationMaskV1_3)) {
977 mHalVersion = halVersion;
978 }
979
980 std::string mName;
981 std::string mVersionString;
982 Capabilities mCapabilities;
983 HalVersion mHalVersion = HalVersion::LATEST;
984 uint32_t mOperationMask;
985 PartitioningDriver::OEM mOEM = PartitioningDriver::OEMNo;
986 std::set<OperationType> mOperationTypes;
987
988 static constexpr char kVersionString[] = "JUST_AN_EXAMPLE";
989
990 private:
991 // This function takes three operation masks aligned at the low-order
992 // bit -- one mask each for V1_0, V1_1, and V1_2 -- and produces a single
993 // composite operation mask, formed by shifting each of the input
994 // operation masks appropriately and ORing the results together.
995 //
996 // For convenience, any bits of an input mask that are too high order
997 // for that mask are discarded -- this allows ~0 to be a legal input
998 // mask.
999 //
1000 // For the sake of example, assume that each low order mask is 4 bits
1001 // wide, and take some artistic license to write literals in binary.
1002 // Then:
1003 //
1004 // assert(makeOperationMask(HalVersion::V1_2, 0b0110, 0b1001, 0b0101) ==
1005 // 0b 0101 1001 0110);
1006 //
1007 // This is used by a DeviceSpecification constructor to build a mask of
1008 // operations to be supported by the device.
makeOperationMask__anon83dd480d0111::PartitioningTest::DeviceSpecification1009 static uint32_t makeOperationMask(HalVersion halVersion, uint32_t operationMaskV1_0,
1010 uint32_t operationMaskV1_1, uint32_t operationMaskV1_2,
1011 uint32_t operationMaskV1_3) {
1012 if (halVersion < HalVersion::V1_3) {
1013 CHECK(!operationMaskV1_3);
1014 }
1015 if (halVersion < HalVersion::V1_2) {
1016 CHECK(!operationMaskV1_2);
1017 }
1018 if (halVersion < HalVersion::V1_1) {
1019 CHECK(!operationMaskV1_1);
1020 }
1021 auto maskOfWidth = [](uint32_t width) -> uint32_t { return (1U << width) - 1; };
1022 static const uint32_t kOperationMaskV1_0 =
1023 maskOfWidth(kLastEncodingV1_0 - kFirstEncodingV1_0 + 1);
1024 static const uint32_t kOperationMaskV1_1 =
1025 maskOfWidth(kLastEncodingV1_1 - kFirstEncodingV1_1 + 1);
1026 static const uint32_t kOperationMaskV1_2 =
1027 maskOfWidth(kLastEncodingV1_2 - kFirstEncodingV1_2 + 1);
1028 static const uint32_t kOperationMaskV1_3 =
1029 maskOfWidth(kLastEncodingV1_3 - kFirstEncodingV1_3 + 1);
1030 return ((operationMaskV1_0 & kOperationMaskV1_0) << kFirstEncodingV1_0) |
1031 ((operationMaskV1_1 & kOperationMaskV1_1) << kFirstEncodingV1_1) |
1032 ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2) |
1033 ((operationMaskV1_3 & kOperationMaskV1_3) << kFirstEncodingV1_3);
1034 }
1035 };
makeDevices(std::vector<DeviceSpecification> specifications)1036 static std::vector<std::shared_ptr<Device>> makeDevices(
1037 std::vector<DeviceSpecification> specifications) {
1038 std::vector<std::shared_ptr<Device>> devices;
1039 for (const auto& specification : specifications) {
1040 V1_0::IDevice* halDriver = nullptr;
1041 switch (specification.mHalVersion) {
1042 case HalVersion::V1_3:
1043 halDriver = new PartitioningDriver(
1044 specification.mName.c_str(), specification.mVersionString.c_str(),
1045 specification.mCapabilities, specification.mOperationMask,
1046 specification.mOEM, specification.mOperationTypes);
1047 break;
1048 case HalVersion::V1_2:
1049 halDriver = new PartitioningDriverV1_2(
1050 specification.mName.c_str(), specification.mVersionString.c_str(),
1051 specification.mCapabilities, specification.mOperationMask,
1052 specification.mOEM, specification.mOperationTypes);
1053 break;
1054 case HalVersion::V1_1:
1055 halDriver = new PartitioningDriverV1_1(
1056 specification.mName.c_str(), specification.mVersionString.c_str(),
1057 specification.mCapabilities, specification.mOperationMask,
1058 specification.mOEM, specification.mOperationTypes);
1059 break;
1060 case HalVersion::V1_0:
1061 halDriver = new PartitioningDriverV1_0(
1062 specification.mName.c_str(), specification.mVersionString.c_str(),
1063 specification.mCapabilities, specification.mOperationMask,
1064 specification.mOEM, specification.mOperationTypes);
1065 break;
1066 default:
1067 ADD_FAILURE() << "Unexpected";
1068 }
1069 auto device = DeviceManager::forTest_makeDriverDevice(specification.mName, halDriver);
1070 devices.push_back(device);
1071 }
1072 devices.push_back(DeviceManager::getCpuDevice());
1073 return devices;
1074 }
1075
1076 /*-- Graph comparision ----------------------------------------------------------------*/
1077
1078 // An operand with certain values for its lifetime does not have a
1079 // defining operation in the graph. For the purposes of the graph
1080 // comparison algorithm, we encode the "defining operation" index of
1081 // such an operand as follows:
1082 // - NO_VALUE kPseudoDefiningOperationNoValue
1083 // - SUBGRAPH_INPUT kPseudoDefiningOperationModelInput0 + (position in list of inputs)
1084 // - CONSTANT_COPY kPseudoDefiningOperationConstantCopy0 + (constant value)
1085 // Note: For the graphs we build in this test, we
1086 // only expect to see 4-byte constants within
1087 // a very restricted range, so we only make
1088 // room for such constants in our encoding
1089 // space.
1090 // We do not expect to see CONSTANT_REFERENCE, and so we do not handle
1091 // it.
1092 //
1093 // The encoding is intended to be relatively human readable; it is not
1094 // designed to represent some optimal balance of ranges for the items
1095 // within its scope (actual operations, inputs, constants).
1096
1097 enum PseudoDefiningOperationEncodings : uint32_t {
1098 kPseudoDefiningOperationModelInput0 = 0x80000000U,
1099 kPseudoDefiningOperationConstantCopy0 = 0x90000000U,
1100 kPseudoDefiningOperationNoValue = 0xeeeeeeeeU,
1101
1102 // lowest value for special encoding
1103 kPseudoDefiningOperationBase = 0x80000000U,
1104
1105 // range of encoded input or constant
1106 kPseudoDefiningOperationRange = 0x10000000U,
1107 };
1108
1109 // Build a map from operand to defining operation.
1110 // TODO: Replace map with vector?
buildDefinitionMap(const ModelBuilder * model,std::map<uint32_t,uint32_t> * defMap)1111 void buildDefinitionMap(const ModelBuilder* model, std::map<uint32_t, uint32_t>* defMap) {
1112 // actual definitions
1113 ASSERT_LT(model->operationCount(), kPseudoDefiningOperationBase);
1114 for (uint32_t i = 0, e = model->operationCount(); i < e; i++) {
1115 const Operation& operation = model->getOperation(i);
1116 for (uint32_t output : operation.outputs) {
1117 (*defMap)[output] = i;
1118 }
1119 }
1120 // inputs
1121 ASSERT_LT(model->inputCount(), kPseudoDefiningOperationRange);
1122 for (uint32_t i = 0, e = model->inputCount(); i < e; i++) {
1123 (*defMap)[model->getInputOperandIndex(i)] = kPseudoDefiningOperationModelInput0 + i;
1124 }
1125 // look for NO_VALUE and CONSTANT_COPY
1126 for (uint32_t i = 0, e = model->operandCount(); i < e; i++) {
1127 const Operand& operand = model->getOperand(i);
1128 switch (operand.lifetime) {
1129 case OperandLifeTime::NO_VALUE:
1130 (*defMap)[i] = kPseudoDefiningOperationNoValue;
1131 break;
1132 case OperandLifeTime::CONSTANT_COPY: {
1133 ASSERT_EQ(operand.location.length, sizeof(uint32_t));
1134 uint32_t value;
1135 memcpy(&value, model->getPointerToOperandValue(operand.location.offset),
1136 sizeof(uint32_t));
1137 ASSERT_LT(value, kPseudoDefiningOperationNoValue);
1138 (*defMap)[i] = kPseudoDefiningOperationConstantCopy0 + value;
1139 break;
1140 }
1141 case OperandLifeTime::TEMPORARY_VARIABLE:
1142 case OperandLifeTime::SUBGRAPH_INPUT:
1143 case OperandLifeTime::SUBGRAPH_OUTPUT:
1144 // already handled
1145 break;
1146 default:
1147 FAIL();
1148 break;
1149 }
1150 }
1151 // sanity check
1152 ASSERT_EQ(model->operandCount(), defMap->size());
1153 }
1154
1155 #ifdef VERBOSE
dump(const char * name,const std::map<uint32_t,uint32_t> * aMap)1156 void dump(const char* name, const std::map<uint32_t, uint32_t>* aMap) {
1157 auto writeNum = [](uint32_t num) {
1158 if (num >= kPseudoDefiningOperationBase) {
1159 std::cout << "0x" << std::hex << num << std::dec;
1160 } else {
1161 std::cout << num;
1162 }
1163 };
1164
1165 std::cout << name << ": { ";
1166 bool gotOne = false;
1167 for (const auto& entry : *aMap) {
1168 if (gotOne) {
1169 std::cout << ", ";
1170 } else {
1171 gotOne = true;
1172 }
1173 std::cout << "(";
1174 writeNum(entry.first);
1175 std::cout << ", ";
1176 writeNum(entry.second);
1177 std::cout << ")";
1178 }
1179 std::cout << " }" << std::endl;
1180 }
1181 #endif
1182
compare(const Operand & operandA,const Operand & operandB)1183 bool compare(const Operand& operandA, const Operand& operandB) {
1184 if (operandA.type != operandB.type || operandA.dimensions != operandB.dimensions ||
1185 operandA.numberOfConsumers != operandB.numberOfConsumers ||
1186 operandA.scale != operandB.scale || operandA.zeroPoint != operandB.zeroPoint) {
1187 return false;
1188 }
1189 return true;
1190 }
1191
1192 // Compare two graphs. We ignore operand and operation indexes (i.e.,
1193 // two nodes can be the same even if they are numbered differently)
1194 // but we also ignore semantics (e.g., even if an operation kind is
1195 // such that the operand is commutative, we still pay attention to the
1196 // order of its input operands).
1197 //
1198 // The comparison algorithm works by walking modelA from outputs
1199 // towards inputs, along the edge from each operand to its
1200 // defining operation, and then along the edges to the operation's
1201 // input operands. At each step along the way, we try to match up
1202 // operands and operations from modelA with equivalent operands
1203 // and operations from modelB.
1204 //
1205 // We start by assuming that modelA's outputs and modelB's outputs
1206 // match positionally (e.g., modelA's first output operand is
1207 // equivalent to modelB's first output operand). Once we've
1208 // discovered two equivalent operands (such as those outputs), we
1209 // place them in a work queue. We repeatedly pull operands off
1210 // the queue and compare their defining operations and those
1211 // operations' input operands, to discover more pairs of
1212 // equivalent operands. If we ever find operations that do not
1213 // match (e.g., because operation kind differs), or operands that
1214 // do not match (e.g., because operand type differs); or if we
1215 // ever find a conflict (we've already decided that operand A's
1216 // equivalent operand is B0, but it looks like we need its
1217 // equivalent operand to be B1); then the graphs compare unequal.
1218 // Otherwise, we'll eventually exhaust the work queue, and
1219 // conclude that the graphs compare equal.
1220 //
1221 // As a side effect of the comparison, we produce a map
1222 // *inputsAndOutputsBToA that maps from each of the model input and output
1223 // operand numbers of modelB to the corresponding operand numbers of modelA.
1224 // If the comparison returns false, the contents of the map are undefined.
compare(const ModelBuilder * modelA,const ModelBuilder * modelB,std::map<uint32_t,uint32_t> * inputsAndOutputsBToA)1225 bool compare(const ModelBuilder* modelA, const ModelBuilder* modelB,
1226 std::map<uint32_t, uint32_t>* inputsAndOutputsBToA) {
1227 CHECK(inputsAndOutputsBToA != nullptr);
1228 EXPECT_TRUE(inputsAndOutputsBToA->empty());
1229
1230 #ifdef VERBOSE
1231 ::dump("compare(A)", modelA);
1232 ::dump("compare(B)", modelB);
1233 #endif
1234
1235 if (modelA->operandCount() != modelB->operandCount() ||
1236 modelA->operationCount() != modelB->operationCount() ||
1237 modelA->inputCount() != modelB->inputCount() ||
1238 modelA->outputCount() != modelB->outputCount()) {
1239 RETURN_FALSE();
1240 }
1241
1242 // Maps from operand index to index of defining operation.
1243 std::map<uint32_t, uint32_t> defsA, defsB;
1244 buildDefinitionMap(modelA, &defsA);
1245 buildDefinitionMap(modelB, &defsB);
1246 if (HasFatalFailure()) return false;
1247
1248 // Maps from operand index in modelA to equivalent operand index
1249 // in modelB; and from operation index in modelA to equivalent
1250 // operation index in modelB.
1251 std::map<uint32_t, uint32_t> equivalentOperandsAToB;
1252 std::map<uint32_t, uint32_t> equivalentOperationsAToB;
1253
1254 // Queue of operand indexes from modelA, each of whose defining
1255 // operations are to be checked for equivalence with modelB.
1256 std::queue<uint32_t> workQueueOperandsA;
1257
1258 // Seed operand equivalence map and work queue from model outputs.
1259 for (uint32_t i = 0, e = modelA->outputCount(); i < e; i++) {
1260 uint32_t outputA = modelA->getOutputOperandIndex(i);
1261 uint32_t outputB = modelB->getOutputOperandIndex(i);
1262 if (!compare(modelA->getOperand(outputA), modelB->getOperand(outputB))) {
1263 RETURN_FALSE();
1264 }
1265 equivalentOperandsAToB[outputA] = outputB;
1266 workQueueOperandsA.push(outputA);
1267 }
1268
1269 #ifdef VERBOSE
1270 dump("defsA", &defsA);
1271 dump("defsB", &defsB);
1272 #endif
1273
1274 // Process the queue.
1275 uint32_t pseudoDefinitionCount = 0;
1276 while (!workQueueOperandsA.empty()) {
1277 #ifdef VERBOSE
1278 dump("equivalentOperandsAToB", &equivalentOperandsAToB);
1279 dump("equivalentOperationsAToB", &equivalentOperationsAToB);
1280 #endif
1281 uint32_t operandIndexA = workQueueOperandsA.front();
1282 #ifdef VERBOSE
1283 std::cout << "operandIndexA: " << operandIndexA << std::endl;
1284 #endif
1285 workQueueOperandsA.pop();
1286 uint32_t operandIndexB = equivalentOperandsAToB.at(operandIndexA);
1287
1288 uint32_t operationIndexA = defsA.at(operandIndexA);
1289 uint32_t operationIndexB = defsB.at(operandIndexB);
1290 auto it = equivalentOperationsAToB.find(operationIndexA);
1291 if (it != equivalentOperationsAToB.end()) {
1292 if (it->second != operationIndexB) {
1293 RETURN_FALSE();
1294 }
1295 continue;
1296 }
1297
1298 // We haven't identified an equivalent operation for
1299 // operationIndexA.
1300
1301 if ((operationIndexA >= kPseudoDefiningOperationBase) !=
1302 (operationIndexB >= kPseudoDefiningOperationBase)) {
1303 RETURN_FALSE();
1304 }
1305 // Either both operands have pseudo-definitions, or neither
1306 // does.
1307 if (operationIndexA >= kPseudoDefiningOperationBase) {
1308 // Both operands have pseudo-definitions.
1309 if (operationIndexA != operationIndexB) {
1310 RETURN_FALSE();
1311 }
1312 equivalentOperationsAToB[operationIndexA] = operationIndexB;
1313 ++pseudoDefinitionCount;
1314 continue;
1315 }
1316
1317 // If we get here, neither operation A nor operation B is a
1318 // pseudo-definition.
1319
1320 const Operation& operationA = modelA->getOperation(operationIndexA);
1321 const Operation& operationB = modelB->getOperation(operationIndexB);
1322 if (operationA.type != operationB.type ||
1323 operationA.inputs.size() != operationB.inputs.size() ||
1324 operationA.outputs.size() != operationB.outputs.size()) {
1325 RETURN_FALSE();
1326 }
1327 equivalentOperationsAToB[operationIndexA] = operationIndexB;
1328 for (uint32_t i = 0, e = operationA.inputs.size(); i < e; i++) {
1329 uint32_t inputA = operationA.inputs[i];
1330 uint32_t inputB = operationB.inputs[i];
1331 auto it = equivalentOperandsAToB.find(inputA);
1332 if (it != equivalentOperandsAToB.end()) {
1333 if (it->second != inputB) {
1334 RETURN_FALSE();
1335 }
1336 continue;
1337 }
1338 // We haven't identified an equivalent operand for inputA.
1339 if (!compare(modelA->getOperand(inputA), modelB->getOperand(inputB))) {
1340 RETURN_FALSE();
1341 }
1342 equivalentOperandsAToB[inputA] = inputB;
1343 workQueueOperandsA.push(inputA);
1344 }
1345 }
1346
1347 // Sanity check
1348 if (modelA->operandCount() != defsA.size() || modelA->operandCount() != defsB.size() ||
1349 modelA->operandCount() != equivalentOperandsAToB.size() ||
1350 modelA->operationCount() + pseudoDefinitionCount != equivalentOperationsAToB.size()) {
1351 RETURN_FALSE();
1352 }
1353
1354 // Build *inputsAndOutputsBToA
1355 for (uint32_t aInputIndex : modelA->getInputOperandIndexes()) {
1356 (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aInputIndex)] = aInputIndex;
1357 }
1358 for (uint32_t aOutputIndex : modelA->getOutputOperandIndexes()) {
1359 (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aOutputIndex)] = aOutputIndex;
1360 }
1361
1362 RETURN_TRUE();
1363 }
1364
1365 /*-------------------------------------------------------------------------------------*/
1366
1367 // As a side effect of the comparison, we produce a map
1368 // *inputsAndOutputsModelToStep that maps from each of the model input and
1369 // output operand numbers of "model" to the corresponding operand numbers of
1370 // the step model from "step". If the comparison returns false, the contents
1371 // of the map are undefined.
compare(const ExecutionStep * step,const PartitioningModel * model,std::shared_ptr<Device> device,std::map<uint32_t,uint32_t> * inputsAndOutputsModelToStep)1372 bool compare(const ExecutionStep* step, const PartitioningModel* model,
1373 std::shared_ptr<Device> device,
1374 std::map<uint32_t, uint32_t>* inputsAndOutputsModelToStep) {
1375 return (step->getDevice() == device) &&
1376 compare(step->getStepModel(),
1377 reinterpret_cast<const ModelBuilder*>(model->getHandle()),
1378 inputsAndOutputsModelToStep);
1379 }
1380
compare(const std::shared_ptr<LogicalStep> logicalStep,const PartitioningModel * model,std::shared_ptr<Device> device,const RemapVectorType & modelInputs,const RemapVectorType & modelOutputs,const RemapVectorType & tempsAsStepModelInputs,const StepModelOutputSetType & tempsAsStepModelOutputs,const RemapVectorType & outputsAsStepModelInputs)1381 void compare(const std::shared_ptr<LogicalStep> logicalStep, const PartitioningModel* model,
1382 std::shared_ptr<Device> device, const RemapVectorType& modelInputs,
1383 const RemapVectorType& modelOutputs, const RemapVectorType& tempsAsStepModelInputs,
1384 const StepModelOutputSetType& tempsAsStepModelOutputs,
1385 const RemapVectorType& outputsAsStepModelInputs) {
1386 ASSERT_TRUE(logicalStep->isExecution());
1387 const ExecutionStep* step = logicalStep->executionStep();
1388 std::map<uint32_t, uint32_t> inputsAndOutputsModelToStep;
1389 ASSERT_NO_FATAL_FAILURE(
1390 ASSERT_TRUE(compare(step, model, device, &inputsAndOutputsModelToStep)));
1391 ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelInputs(),
1392 modelInputs));
1393 ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelOutputs(),
1394 modelOutputs));
1395 ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep,
1396 step->getTempsAsStepModelInputs(), tempsAsStepModelInputs));
1397 ASSERT_TRUE(compareStepModelOutputSets(inputsAndOutputsModelToStep,
1398 step->getTempsAsStepModelOutputs(),
1399 tempsAsStepModelOutputs));
1400 ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep,
1401 step->getOutputsAsStepModelInputs(),
1402 outputsAsStepModelInputs));
1403 }
1404
1405 private:
compareRemapVectors(const std::map<uint32_t,uint32_t> & inputsAndOutputsModelToStep,const RemapVectorType & step,RemapVectorType model)1406 static bool compareRemapVectors(const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep,
1407 const RemapVectorType& step, RemapVectorType model) {
1408 std::transform(model.begin(), model.end(), model.begin(),
1409 [&inputsAndOutputsModelToStep](const RemapVectorType::value_type& val) {
1410 return std::make_pair(val.first,
1411 inputsAndOutputsModelToStep.at(val.second));
1412 });
1413 return step == model;
1414 }
1415
compareStepModelOutputSets(const std::map<uint32_t,uint32_t> & inputsAndOutputsModelToStep,const StepModelOutputSetType & step,const StepModelOutputSetType & model)1416 static bool compareStepModelOutputSets(
1417 const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep,
1418 const StepModelOutputSetType& step, const StepModelOutputSetType& model) {
1419 StepModelOutputSetType modelTransformed;
1420 std::transform(
1421 model.begin(), model.end(), std::inserter(modelTransformed, modelTransformed.end()),
1422 [&inputsAndOutputsModelToStep](const StepModelOutputSetType::value_type& val) {
1423 return std::make_pair(val.first, inputsAndOutputsModelToStep.at(val.second));
1424 });
1425 return step == modelTransformed;
1426 }
1427 };
1428
TEST_F(PartitioningTest,SimpleModel)1429 TEST_F(PartitioningTest, SimpleModel) {
1430 PartitioningModel model;
1431 uint32_t opnd0 = model.addFloatOperand();
1432 uint32_t opnd1 = model.addFloatOperand();
1433 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1434 uint32_t opnd3 = model.addFloatOperand();
1435 uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3);
1436 model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
1437 model.finish();
1438 ASSERT_TRUE(model.isValid());
1439
1440 // Simple partition (two devices are each capable of everything, one is the best).
1441 // No need to compare the original model to the model from the plan -- we
1442 // didn't actually do any partitioning.
1443 const auto devicesA = makeDevices({{"bad", 0.9, ~0U}, {"good", 0.5, ~0U}});
1444 ExecutionPlan planA;
1445 ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
1446 ExecutePriority::DEFAULT, {}, &planA),
1447 ANEURALNETWORKS_NO_ERROR);
1448 ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1449 ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
1450 ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "good");
1451
1452 // Simple partition (two devices are each capable of everything, none better than CPU).
1453 // No need to compare the original model to the model from the plan -- we
1454 // didn't actually do any partitioning.
1455 const auto devicesC = makeDevices({{"bad", 1.1, ~0U}, {"bad2", 1.0, ~0U}});
1456 ExecutionPlan planC;
1457 ASSERT_EQ(model.partitionTheWork(devicesC, ExecutePreference::PREFER_LOW_POWER,
1458 ExecutePriority::DEFAULT, {}, &planC),
1459 ANEURALNETWORKS_NO_ERROR);
1460 ASSERT_EQ(planC.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1461 ASSERT_EQ(planC.forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
1462
1463 // Compound partition (two devices, each is capable of one of the
1464 // two operations). We could do more extensive checking here --
1465 // for example, verify that each step within the plan has the
1466 // correct (model and step model)x(inputs and outputs).
1467 const auto devicesB = makeDevices({{"0", 0.9, 1 << 0}, {"1", 0.5, 1 << 1}});
1468 ExecutionPlan planB;
1469 ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
1470 ExecutePriority::DEFAULT, {}, &planB),
1471 ANEURALNETWORKS_NO_ERROR);
1472 ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1473 const auto& stepsB = planB.forTest_compoundGetSteps();
1474 ASSERT_EQ(stepsB.size(), size_t(2));
1475 {
1476 // Build a model to compare against the step model from stepsB[0].
1477 PartitioningModel modelB0;
1478 uint32_t b0Opnd0 = modelB0.addFloatOperand();
1479 uint32_t b0Opnd1 = modelB0.addFloatOperand();
1480 uint32_t b0Opnd2 = modelB0.addOperation2To1V1_0(0, b0Opnd0, b0Opnd1);
1481 modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2});
1482 modelB0.finish();
1483 ASSERT_TRUE(modelB0.isValid());
1484
1485 ASSERT_NO_FATAL_FAILURE(
1486 compare(stepsB[0], &modelB0, devicesB[0],
1487 RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}}, // modelInputs
1488 RemapVectorType{}, // modelOutputs
1489 RemapVectorType{}, // tempsAsStepModelInputs
1490 StepModelOutputSetType{{opnd2, b0Opnd2}}, // tempsAsStepModelOutputs
1491 RemapVectorType{})); // outputsAsStepModelInputs;
1492 }
1493 {
1494 // Build a model to compare against the step model from stepsB[1].
1495 PartitioningModel modelB1;
1496 uint32_t b1Opnd2 = modelB1.addFloatOperand();
1497 uint32_t b1Opnd3 = modelB1.addFloatOperand();
1498 uint32_t b1Opnd4 = modelB1.addOperation2To1V1_0(1, b1Opnd2, b1Opnd3);
1499 // Note: In the partitioning algorithm, step model inputs follow
1500 // model inputs. In the original model "model", opnd2 is not
1501 // an input; so in the step model "modelB1", the corresponding
1502 // input b1Opnd2 is a step model input, and must follow the
1503 // model input b1Opnd3.
1504 modelB1.identifyInputsAndOutputs({b1Opnd3, b1Opnd2}, {b1Opnd4});
1505 modelB1.finish();
1506 ASSERT_TRUE(modelB1.isValid());
1507
1508 ASSERT_NO_FATAL_FAILURE(compare(
1509 stepsB[1], &modelB1, devicesB[1], RemapVectorType{{opnd3, b1Opnd3}}, // modelInputs
1510 RemapVectorType{{opnd4, b1Opnd4}}, // modelOutputs
1511 RemapVectorType{{opnd2, b1Opnd2}}, // tempsAsStepModelInputs
1512 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1513 RemapVectorType{})); // outputsAsStepModelInputs
1514 }
1515 }
1516
TEST_F(PartitioningTest,SliceModel)1517 TEST_F(PartitioningTest, SliceModel) {
1518 PartitioningModel model;
1519 uint32_t opnd0 = model.addFloatOperand();
1520 uint32_t opnd1 = model.addFloatOperand();
1521 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1522 uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd0, opnd1);
1523 uint32_t opnd4 = model.addOperation2To1V1_1(0, opnd0, opnd1);
1524 uint32_t opnd5 = model.addOperation2To1V1_2(0, opnd2, opnd3);
1525 uint32_t opnd6 = model.addOperation1To1V1_3(0, opnd2);
1526 model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5, opnd6});
1527 model.finish();
1528 ASSERT_TRUE(model.isValid());
1529
1530 // Simple partition (V1_0, V1_1, V1_2, V1_3 devices are available; V1_3 has best perf).
1531 // No need to compare the original model to the model from the plan -- we
1532 // didn't actually do any partitioning.
1533 const auto devicesA = makeDevices({{"V1_0", 0.8, HalVersion::V1_0, ~0U},
1534 {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1535 {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U},
1536 {"V1_3", 0.5, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1537 ExecutionPlan planA;
1538 ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
1539 ExecutePriority::DEFAULT, {}, &planA),
1540 ANEURALNETWORKS_NO_ERROR);
1541 ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1542 ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
1543 ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_3");
1544
1545 // Compound partition (V1_0, V1_1, V1_2 devices are available, in decreasing
1546 // order of performance; model is distributed across all three devices).
1547 const auto devicesB = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
1548 {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1549 {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
1550 {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1551 ExecutionPlan planB;
1552 ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
1553 ExecutePriority::DEFAULT, {}, &planB),
1554 ANEURALNETWORKS_NO_ERROR);
1555 ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1556 const auto& stepsB = planB.forTest_compoundGetSteps();
1557 ASSERT_EQ(stepsB.size(), size_t(4));
1558 {
1559 // Build a model to compare against the step model from stepsB[0].
1560 PartitioningModel modelB0;
1561 uint32_t b0Opnd0 = modelB0.addFloatOperand();
1562 uint32_t b0Opnd1 = modelB0.addFloatOperand();
1563 uint32_t b0Opnd2 = modelB0.addOperation2To1V1_1(0, b0Opnd0, b0Opnd1);
1564 modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2});
1565 modelB0.finish();
1566 ASSERT_TRUE(modelB0.isValid());
1567
1568 ASSERT_NO_FATAL_FAILURE(
1569 compare(stepsB[0], &modelB0, devicesB[1],
1570 RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}}, // modelInputs
1571 RemapVectorType{{opnd4, b0Opnd2}}, // modelOutputs
1572 RemapVectorType{}, // tempsAsStepModelInputs
1573 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1574 RemapVectorType{})); // outputsAsStepModelInputs
1575 }
1576 {
1577 // Build a model to compare against the step model from stepsB[1].
1578 PartitioningModel modelB1;
1579 uint32_t b1Opnd0 = modelB1.addFloatOperand();
1580 uint32_t b1Opnd1 = modelB1.addFloatOperand();
1581 uint32_t b1Opnd2 = modelB1.addOperation2To1V1_0(0, b1Opnd0, b1Opnd1);
1582 uint32_t b1Opnd3 = modelB1.addOperation2To1V1_0(1, b1Opnd0, b1Opnd1);
1583 modelB1.identifyInputsAndOutputs({b1Opnd0, b1Opnd1}, {b1Opnd2, b1Opnd3});
1584 modelB1.finish();
1585 ASSERT_TRUE(modelB1.isValid());
1586
1587 ASSERT_NO_FATAL_FAILURE(
1588 compare(stepsB[1], &modelB1, devicesB[0],
1589 RemapVectorType{{opnd0, b1Opnd0}, {opnd1, b1Opnd1}}, // modelInputs
1590 RemapVectorType{{opnd2, b1Opnd2}}, // modelOutputs
1591 RemapVectorType{}, // tempsAsStepModelInputs
1592 StepModelOutputSetType{{opnd3, b1Opnd3}}, // tempsAsStepModelOutputs
1593 RemapVectorType{})); // outputsAsStepModelInputs
1594 }
1595 {
1596 // Build a model to compare against the step model from stepsB[2].
1597 PartitioningModel modelB2;
1598 uint32_t b2Opnd0 = modelB2.addFloatOperand();
1599 uint32_t b2Opnd1 = modelB2.addOperation1To1V1_3(0, b2Opnd0);
1600 // Note: In the partitioning algorithm, temps that are
1601 // step model inputs precede model outputs that are step model
1602 // inputs.
1603 modelB2.identifyInputsAndOutputs({b2Opnd0}, {b2Opnd1});
1604 modelB2.finish();
1605 ASSERT_TRUE(modelB2.isValid());
1606
1607 ASSERT_NO_FATAL_FAILURE(
1608 compare(stepsB[2], &modelB2, devicesB[3], RemapVectorType{}, // modelInputs
1609 RemapVectorType{{opnd6, b2Opnd1}}, // modelOutputs
1610 RemapVectorType{}, // tempsAsStepModelInputs
1611 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1612 RemapVectorType{{opnd2, b2Opnd0}})); // outputsAsStepModelInputs
1613 }
1614 {
1615 // Build a model to compare against the step model from stepsB[3].
1616 PartitioningModel modelB3;
1617 uint32_t b3Opnd0 = modelB3.addFloatOperand();
1618 uint32_t b3Opnd1 = modelB3.addFloatOperand();
1619 uint32_t b3Opnd2 = modelB3.addOperation2To1V1_2(0, b3Opnd0, b3Opnd1);
1620 // Note: In the partitioning algorithm, temps that are
1621 // step model inputs precede model outputs that are step model
1622 // inputs. In the original model "model", opnd3 is a temp and
1623 // opnd2 is a model output; so in the step model "modelB3", the
1624 // corresponding inputs b3Opnd1 and b3Opnd0 must appear in
1625 // that order.
1626 modelB3.identifyInputsAndOutputs({b3Opnd1, b3Opnd0}, {b3Opnd2});
1627 modelB3.finish();
1628 ASSERT_TRUE(modelB3.isValid());
1629
1630 ASSERT_NO_FATAL_FAILURE(
1631 compare(stepsB[3], &modelB3, devicesB[2], RemapVectorType{}, // modelInputs
1632 RemapVectorType{{opnd5, b3Opnd2}}, // modelOutputs
1633 RemapVectorType{{opnd3, b3Opnd1}}, // tempsAsStepModelInputs
1634 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1635 RemapVectorType{{opnd2, b3Opnd0}})); // outputsAsStepModelInputs
1636 }
1637
1638 // TODO: Make sure this still works when we have multiple devices
1639 // of same version available for slicing. An easy (?) choice would
1640 // be to route the two different V1_0 operations to different
1641 // devices.
1642 }
1643
TEST_F(PartitioningTest,SliceModelToEmpty)1644 TEST_F(PartitioningTest, SliceModelToEmpty) {
1645 PartitioningModel model;
1646 uint32_t opnd0 = model.addFloatOperand();
1647 uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
1648 model.identifyInputsAndOutputs({opnd0}, {opnd1});
1649 model.finish();
1650 ASSERT_TRUE(model.isValid());
1651
1652 // Only the V1_3 device can handle any operations in the model.
1653 // No need to compare the original model to the model from the plan -- we
1654 // didn't actually do any partitioning.
1655 const auto devices = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
1656 {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1657 {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
1658 {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1659 ExecutionPlan plan;
1660 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1661 ExecutePriority::DEFAULT, {}, &plan),
1662 ANEURALNETWORKS_NO_ERROR);
1663 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1664 ASSERT_NE(plan.forTest_simpleGetDevice().get(), nullptr);
1665 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_3");
1666 }
1667
TEST_F(PartitioningTest,Cpu)1668 TEST_F(PartitioningTest, Cpu) {
1669 // Here's a model where some operations execute only on the Cpu.
1670 // To make things interesting, we produce three partitions --
1671 // device, cpu, same-device.
1672
1673 static const uint32_t kCpuOp = 1;
1674 static const uint32_t kDevOp = 2;
1675
1676 const auto devices = makeDevices({{"1", 0.5, 1 << kDevOp}});
1677
1678 PartitioningModel model;
1679
1680 uint32_t opnd0 = model.addFloatOperand();
1681 uint32_t opnd1 = model.addFloatOperand();
1682
1683 uint32_t opnd2 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd1);
1684 uint32_t opnd3 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd2);
1685
1686 uint32_t opnd4 = model.addOperation2To1V1_0(kCpuOp, opnd0, opnd3);
1687 uint32_t opnd5 = model.addOperation2To1V1_0(kCpuOp, opnd2, opnd4);
1688
1689 uint32_t opnd6 = model.addFloatOperand();
1690
1691 uint32_t opnd7 = model.addOperation2To1V1_0(kDevOp, opnd3, opnd5);
1692 uint32_t opnd8 = model.addOperation2To1V1_0(kDevOp, opnd6, opnd7);
1693
1694 model.identifyInputsAndOutputs({opnd0, opnd1, opnd6}, {opnd4, opnd8});
1695 model.finish();
1696 ASSERT_TRUE(model.isValid());
1697
1698 ExecutionPlan plan;
1699 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1700 ExecutePriority::DEFAULT, {}, &plan),
1701 ANEURALNETWORKS_NO_ERROR);
1702 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1703 const auto& steps = plan.forTest_compoundGetSteps();
1704 ASSERT_EQ(steps.size(), size_t(3));
1705 {
1706 const auto& step0 = steps[0];
1707
1708 // Build a model to compare against the step model from steps[0].
1709 PartitioningModel model0;
1710 uint32_t m0Opnd0 = model0.addFloatOperand();
1711 uint32_t m0Opnd1 = model0.addFloatOperand();
1712 uint32_t m0Opnd2 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd1);
1713 uint32_t m0Opnd3 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd2);
1714 model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2, m0Opnd3});
1715 model0.finish();
1716 ASSERT_TRUE(model0.isValid());
1717
1718 ASSERT_NO_FATAL_FAILURE(
1719 compare(step0, &model0, devices[0],
1720 RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}}, // modelInputs
1721 RemapVectorType{}, // modelOutputs
1722 RemapVectorType{}, // tempsAsStepModelInputs
1723 StepModelOutputSetType{{opnd2, m0Opnd2},
1724 {opnd3, m0Opnd3}}, // tempsAsStepModelOutputs
1725 RemapVectorType{})); // outputsAsStepModelInputs
1726 }
1727 {
1728 const auto& step1 = steps[1];
1729
1730 // Build a model to compare against the step model from steps[1].
1731 PartitioningModel model1;
1732 uint32_t m1Opnd0 = model1.addFloatOperand();
1733 uint32_t m1Opnd3 = model1.addFloatOperand();
1734 uint32_t m1Opnd4 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd0, m1Opnd3);
1735 uint32_t m1Opnd2 = model1.addFloatOperand();
1736 uint32_t m1Opnd5 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd2, m1Opnd4);
1737 model1.identifyInputsAndOutputs({m1Opnd0, m1Opnd3, m1Opnd2}, {m1Opnd4, m1Opnd5});
1738 model1.finish();
1739 ASSERT_TRUE(model1.isValid());
1740
1741 ASSERT_NO_FATAL_FAILURE(compare(
1742 step1, &model1, DeviceManager::getCpuDevice(),
1743 RemapVectorType{{opnd0, m1Opnd0}}, // modelInputs
1744 RemapVectorType{{opnd4, m1Opnd4}}, // modelOutputs
1745 RemapVectorType{{opnd3, m1Opnd3}, {opnd2, m1Opnd2}}, // tempsAsStepModelInputs
1746 StepModelOutputSetType{{opnd5, m1Opnd5}}, // tempsAsStepModelOutputs
1747 RemapVectorType{})); // outputsAsStepModelInputs
1748 }
1749 {
1750 const auto& step2 = steps[2];
1751
1752 // Build a model to compare against the step model from steps[2].
1753 PartitioningModel model2;
1754 uint32_t m2Opnd3 = model2.addFloatOperand();
1755 uint32_t m2Opnd5 = model2.addFloatOperand();
1756 uint32_t m2Opnd7 = model2.addOperation2To1V1_0(kDevOp, m2Opnd3, m2Opnd5);
1757 uint32_t m2Opnd6 = model2.addFloatOperand();
1758 uint32_t m2Opnd8 = model2.addOperation2To1V1_0(kDevOp, m2Opnd6, m2Opnd7);
1759 model2.identifyInputsAndOutputs({m2Opnd6, m2Opnd3, m2Opnd5}, {m2Opnd8});
1760 model2.finish();
1761 ASSERT_TRUE(model2.isValid());
1762
1763 ASSERT_NO_FATAL_FAILURE(compare(
1764 step2, &model2, devices[0], RemapVectorType{{opnd6, m2Opnd6}}, // modelInputs
1765 RemapVectorType{{opnd8, m2Opnd8}}, // modelOutputs
1766 RemapVectorType{{opnd3, m2Opnd3}, {opnd5, m2Opnd5}}, // tempsAsStepModelInputs
1767 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1768 RemapVectorType{})); // outputsAsStepModelInputs
1769 }
1770 }
1771
TEST_F(PartitioningTest,SetPartitioning)1772 TEST_F(PartitioningTest, SetPartitioning) {
1773 PartitioningModel model;
1774 uint32_t opnd0 = model.addFloatOperand();
1775 uint32_t opnd1 = model.addFloatOperand();
1776 uint32_t opnd2 =
1777 model.addOperation2To1V1_0(0, opnd0, opnd1, PartitioningModel::Dimensioned::NO);
1778 uint32_t opnd3 = model.addFloatOperand();
1779 uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3);
1780 model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
1781 model.finish();
1782 ASSERT_TRUE(model.isValid());
1783
1784 // We expect that we cannot successfully partition, because we
1785 // have an intermediate operand (opnd2) without dimensions, and
1786 // this is not currently handled.
1787
1788 // One device that can and should execute operation 0.
1789 const auto devices = makeDevices({{"hw", 0.5, (1 << 0)}});
1790
1791 // Test kPartitioningNo. We should not even attempt partitioning,
1792 // so there should be a SIMPLE plan on CPU.
1793 // No need to compare the original model to the model from the plan -- we
1794 // didn't actually do any partitioning.
1795 PartitioningCompilation cPNo(&model, devices);
1796 ASSERT_EQ(cPNo.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1797 ASSERT_EQ(cPNo.finish(), Result::NO_ERROR);
1798 ASSERT_EQ(cPNo.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1799 ASSERT_EQ(cPNo.getExecutionPlan().forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
1800
1801 // Test kPartitioningWithFallback. We should attempt
1802 // partitioning, reach the end of the partitioning process (so we
1803 // have an unsuccessful execution plan), discover the dimensionless
1804 // intermediate operand, then fallback to CPU with a SIMPLE plan, and
1805 // finally return success.
1806 // No need to compare the original model to the model from the plan -- we
1807 // didn't actually do any partitioning.
1808 PartitioningCompilation cPWithFallback(&model, devices);
1809 ASSERT_EQ(cPWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1810 Result::NO_ERROR);
1811 ASSERT_EQ(cPWithFallback.finish(), Result::NO_ERROR);
1812 ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1813 ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1814 DeviceManager::getCpuDevice());
1815
1816 // Test kPartitioningWithoutFallback. We should attempt
1817 // partitioning, and fail.
1818 PartitioningCompilation cPWithoutFallback(&model, devices);
1819 ASSERT_EQ(cPWithoutFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1820 Result::NO_ERROR);
1821 ASSERT_EQ(cPWithoutFallback.finish(), Result::OP_FAILED);
1822 ASSERT_TRUE(cPWithoutFallback.getExecutionPlan().forTest_hasStepModelOutputsOfUnknownSize());
1823 ASSERT_EQ(cPWithoutFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::ERROR);
1824 }
1825
1826 // Regression test for http://b/69166603:
1827 // "partitioned compilation and execution yields wrong results when model output is step model
1828 // input"
TEST_F(PartitioningTest,ModelOutputAsStepModelInput)1829 TEST_F(PartitioningTest, ModelOutputAsStepModelInput) {
1830 PartitioningModel model;
1831 uint32_t opnd0 = model.addFloatOperand();
1832 uint32_t opnd1 = model.addFloatOperand();
1833 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1834 uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd2, opnd2);
1835 model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd3});
1836 model.finish();
1837 ASSERT_TRUE(model.isValid());
1838
1839 // Compound partition (two devices, each is capable of one of the
1840 // two operations). We could do more extensive checking here --
1841 // for example, verify that each step within the plan has the
1842 // correct (model and step model)x(inputs and outputs).
1843 const auto devices = makeDevices({{"0", 0.5, 1 << 0}, {"1", 0.5, 1 << 1}});
1844 ExecutionPlan plan;
1845 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1846 ExecutePriority::DEFAULT, {}, &plan),
1847 ANEURALNETWORKS_NO_ERROR);
1848 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1849 const auto& steps = plan.forTest_compoundGetSteps();
1850 ASSERT_EQ(steps.size(), size_t(2));
1851 {
1852 // Build a model to compare against the step model from steps[0].
1853 PartitioningModel model0;
1854 uint32_t m0Opnd0 = model0.addFloatOperand();
1855 uint32_t m0Opnd1 = model0.addFloatOperand();
1856 uint32_t m0Opnd2 = model0.addOperation2To1V1_0(0, m0Opnd0, m0Opnd1);
1857 model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2});
1858 model0.finish();
1859 ASSERT_TRUE(model0.isValid());
1860 ASSERT_NO_FATAL_FAILURE(
1861 compare(steps[0], &model0, devices[0],
1862 RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}}, // modelInputs
1863 RemapVectorType{{opnd2, m0Opnd2}}, // modelOutputs
1864 RemapVectorType{}, // tempsAsStepModelInputs
1865 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1866 RemapVectorType{})); // outputsAsStepModelInputs
1867 }
1868 {
1869 // Build a model to compare against the step model from steps[1].
1870 PartitioningModel model1;
1871 uint32_t m1Opnd2 = model1.addFloatOperand();
1872 uint32_t m1Opnd3 = model1.addOperation2To1V1_0(1, m1Opnd2, m1Opnd2);
1873 model1.identifyInputsAndOutputs({m1Opnd2}, {m1Opnd3});
1874 model1.finish();
1875 ASSERT_TRUE(model1.isValid());
1876
1877 ASSERT_NO_FATAL_FAILURE(
1878 compare(steps[1], &model1, devices[1], RemapVectorType{}, // modelInputs
1879 RemapVectorType{{opnd3, m1Opnd3}}, // modelOutputs
1880 RemapVectorType{}, // tempsAsStepModelInputs
1881 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1882 RemapVectorType{{opnd2, m1Opnd2}})); // outputsAsStepModelInputs
1883 }
1884 }
1885
TEST_F(PartitioningTest,OemOperations)1886 TEST_F(PartitioningTest, OemOperations) {
1887 // Trivial model consisting solely of OEM operation.
1888 PartitioningModel model;
1889 uint32_t opndIn = model.addFloatOperand();
1890 uint32_t opndOut = model.addOperationOEM1To1(opndIn);
1891 model.identifyInputsAndOutputs({opndIn}, {opndOut});
1892 model.finish();
1893 ASSERT_TRUE(model.isValid());
1894
1895 // Verify that the best driver than can run an OEM operation is
1896 // used, even if it is not better than the CPU.
1897 // No need to compare the original model to the model from the plan -- we
1898 // didn't actually do any partitioning.
1899 const auto devicesBestOEM = makeDevices({{"badOEM", 1.5, ~0U, PartitioningDriver::OEMYes},
1900 {"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo},
1901 {"goodOEM", 1.2, ~0U, PartitioningDriver::OEMYes}});
1902 PartitioningCompilation compilationBestOEM(&model, devicesBestOEM);
1903 ASSERT_EQ(compilationBestOEM.finish(), Result::NO_ERROR);
1904 const auto& planBestOEM = compilationBestOEM.getExecutionPlan();
1905 ASSERT_EQ(planBestOEM.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1906 ASSERT_NE(planBestOEM.forTest_simpleGetDevice().get(), nullptr);
1907 ASSERT_EQ(planBestOEM.forTest_simpleGetDevice()->getName(), "goodOEM");
1908
1909 // Verify that we get an error if no driver can run an OEM operation.
1910 const auto devicesNoOEM = makeDevices({{"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo}});
1911 PartitioningCompilation compilationNoOEM(&model, devicesNoOEM);
1912 ASSERT_EQ(compilationNoOEM.finish(), Result::BAD_DATA);
1913
1914 // Verify that we get an error if a driver can SUPPORT but not PREPARE an OEM operation.
1915 const auto devicesIndecisiveOEM =
1916 makeDevices({{"indecisiveOEM", 0.5, ~0U, PartitioningDriver::OEMIndecisive}});
1917 PartitioningCompilation compilationIndecisiveOEM(&model, devicesIndecisiveOEM);
1918 ASSERT_NE(compilationIndecisiveOEM.finish(), Result::NO_ERROR);
1919
1920 // Verify that we get an error if there are no drivers (only CPU fallback).
1921 PartitioningCompilation compilationNoDrivers(&model, makeDevices({}) /* no drivers */);
1922 ASSERT_EQ(compilationNoDrivers.finish(), Result::BAD_DATA);
1923 }
1924
TEST_F(PartitioningTest,RelaxedFP)1925 TEST_F(PartitioningTest, RelaxedFP) {
1926 const auto devices = makeDevices({// Best choice for non-relaxed model.
1927 {"f32", 0.8, 0.9 /* relaxed */, ~0U},
1928 // Best choice for relaxed model.
1929 {"f16", 0.9, 0.8 /* relaxed */, ~0U}});
1930
1931 auto TrivialTest = [&devices](bool doRelax, const char* expectDevice) {
1932 // Trivial model consisting solely of one operation.
1933 SCOPED_TRACE(expectDevice);
1934 PartitioningModel model;
1935 uint32_t opnd0 = model.addFloatOperand();
1936 uint32_t opnd1 = model.addFloatOperand();
1937 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1938 model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
1939 model.relaxComputationFloat32toFloat16(doRelax);
1940 model.finish();
1941 ASSERT_TRUE(model.isValid());
1942 // Verify that the model will be executed on the appropriate device.
1943 // No need to compare the original model to the model from the plan -- we
1944 // didn't actually do any partitioning.
1945 ExecutionPlan plan;
1946 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1947 ExecutePriority::DEFAULT, {}, &plan),
1948 ANEURALNETWORKS_NO_ERROR);
1949 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1950 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), expectDevice);
1951 };
1952
1953 ASSERT_NO_FATAL_FAILURE(TrivialTest(false, "f32"));
1954 ASSERT_NO_FATAL_FAILURE(TrivialTest(true, "f16"));
1955 }
1956
TEST_F(PartitioningTest,Perf)1957 TEST_F(PartitioningTest, Perf) {
1958 // The various type names used here are confusing.
1959 //
1960 // OperandType (from HAL file), WrapperType (from NeuralNetworksWrapper.h),
1961 // and OperandCode (from NeuralNetworks.h) are different enums representing
1962 // the same type kind -- e.g., OperandType::FLOAT32, WrapperType::FLOAT32,
1963 // ANEURALNETWORKS_FLOAT32. Corresponding enumerators have the same value.
1964 //
1965 // WrapperOperandType is the NeuralNetworksWrapper.h representation of a
1966 // full operand type (WrapperType plus dimensions plus other attributes).
1967
1968 auto TestType = [](OperandType operandType) {
1969 if (operandType == OperandType::SUBGRAPH) {
1970 // SUBGRAPH capabilities are handled differently.
1971 return;
1972 }
1973 SCOPED_TRACE(toString(operandType));
1974 // Trivial model consisting solely of OEM operation. We
1975 // pick OEM operation because this allows us to use
1976 // inputs and outputs of any number and type.
1977 PartitioningModel model;
1978 uint32_t opndIn = model.addOperand(static_cast<WrapperType>(operandType));
1979 uint32_t opndOut = model.addOperationOEM1To1(opndIn);
1980 model.identifyInputsAndOutputs({opndIn}, {opndOut});
1981 model.finish();
1982 ASSERT_TRUE(model.isValid());
1983
1984 const Capabilities baseCapabilities = makeCapabilities(0.5);
1985
1986 {
1987 // better than base
1988 Capabilities goodCapabilities = baseCapabilities;
1989 update(&goodCapabilities, operandType, 0.25);
1990
1991 const auto devices =
1992 makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes},
1993 {"good", goodCapabilities, ~0U, PartitioningDriver::OEMYes}});
1994
1995 // Verify that model will be executed on "good".
1996 // No need to compare the original model to the model from the plan -- we
1997 // didn't actually do any partitioning.
1998 ExecutionPlan plan;
1999 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2000 ExecutePriority::DEFAULT, {}, &plan),
2001 ANEURALNETWORKS_NO_ERROR);
2002 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2003 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "good");
2004 }
2005
2006 {
2007 // worse than base
2008 Capabilities badCapabilities = baseCapabilities;
2009 update(&badCapabilities, operandType, 0.75);
2010 const auto devices =
2011 makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes},
2012 {"bad", badCapabilities, ~0U, PartitioningDriver::OEMYes}});
2013
2014 // Verify that model will be executed on "base".
2015 // No need to compare the original model to the model from the plan -- we
2016 // didn't actually do any partitioning.
2017 ExecutionPlan plan;
2018 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2019 ExecutePriority::DEFAULT, {}, &plan),
2020 ANEURALNETWORKS_NO_ERROR);
2021 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2022 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "base");
2023 }
2024 };
2025
2026 for (uint32_t type = static_cast<uint32_t>(OperandTypeRange::FUNDAMENTAL_MIN);
2027 type <= static_cast<uint32_t>(OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
2028 TestType(static_cast<OperandType>(type));
2029 }
2030 for (uint32_t type = static_cast<uint32_t>(OperandTypeRange::OEM_MIN);
2031 type <= static_cast<uint32_t>(OperandTypeRange::OEM_MAX); ++type) {
2032 TestType(static_cast<OperandType>(type));
2033 }
2034 }
2035
2036 // Test token rehashing during the compilation step.
2037 class CacheTest : public PartitioningTest {
2038 protected:
SetUp()2039 virtual void SetUp() override {
2040 PartitioningTest::SetUp();
2041 char cacheDirTemp[] = "/data/local/tmp/TestCompilationCachingXXXXXX";
2042 char* cacheDir = mkdtemp(cacheDirTemp);
2043 ASSERT_NE(cacheDir, nullptr);
2044 mCacheDir = cacheDir;
2045 }
2046
TearDown()2047 virtual void TearDown() override {
2048 if (!::testing::Test::HasFailure()) {
2049 std::filesystem::remove_all(mCacheDir);
2050 }
2051 PartitioningTest::TearDown();
2052 }
2053
expectUniqueTokens(const std::vector<std::vector<uint8_t>> & tokens)2054 void expectUniqueTokens(const std::vector<std::vector<uint8_t>>& tokens) {
2055 for (uint32_t i = 0; i < tokens.size(); i++) {
2056 SCOPED_TRACE(i);
2057 for (uint32_t j = i + 1; j < tokens.size(); j++) {
2058 SCOPED_TRACE(j);
2059 EXPECT_NE(tokens[i], tokens[j]);
2060 }
2061 }
2062 }
2063
2064 // Launch a single run of the partitioner against the provided model and device list with
2065 // cache token privided as tokenIn. Find the partition for the device with deviceName.
2066 // Record the transformed token into tokenOut. Two or more partitions may be on the same device.
2067 // "devicePartitionIndex" specifies the index of the ExecutionStep corresponding to the
2068 // partition of interest, within the sequence of ExecutionSteps on the target device.
2069 // If tokenIn is empty, no caching information will be provided to the partitioner.
getTransformedCacheTokenSingle(const PartitioningModel & model,const std::vector<std::shared_ptr<Device>> & devices,const char * deviceName,const std::vector<uint8_t> & tokenIn,ExecutePreference preference,ExecutePriority priority,uint32_t devicePartitionIndex,std::vector<uint8_t> * tokenOut)2070 void getTransformedCacheTokenSingle(const PartitioningModel& model,
2071 const std::vector<std::shared_ptr<Device>>& devices,
2072 const char* deviceName, const std::vector<uint8_t>& tokenIn,
2073 ExecutePreference preference, ExecutePriority priority,
2074 uint32_t devicePartitionIndex,
2075 std::vector<uint8_t>* tokenOut) {
2076 // Compile the model and get the execution plan.
2077 PartitioningCompilation compilation(&model, devices);
2078 if (!tokenIn.empty()) {
2079 compilation.setCaching(mCacheDir.c_str(), tokenIn);
2080 }
2081 compilation.setPreference(preference);
2082 compilation.setPriority(priority);
2083 ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2084 const ExecutionPlan& plan = compilation.getExecutionPlan();
2085
2086 // Find the cache info for the device.
2087 const uint8_t* token = nullptr;
2088 if (plan.forTest_getKind() == ExecutionPlan::Kind::SIMPLE) {
2089 ASSERT_EQ(devicePartitionIndex, 0u);
2090 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), deviceName);
2091 token = plan.forTest_simpleGetCacheToken();
2092 } else if (plan.forTest_getKind() == ExecutionPlan::Kind::COMPOUND) {
2093 const auto& steps = plan.forTest_compoundGetSteps();
2094 uint32_t executionStepCount = 0;
2095 for (const auto& step : steps) {
2096 if (step->isExecution() &&
2097 step->executionStep()->getDevice()->getName() == deviceName) {
2098 if (devicePartitionIndex == executionStepCount) {
2099 token = step->executionStep()->forTest_getCacheToken();
2100 break;
2101 }
2102 executionStepCount++;
2103 }
2104 }
2105 } else {
2106 FAIL();
2107 }
2108
2109 // Retrieve the transformed token from the cache info.
2110 if (token == nullptr) {
2111 tokenOut->clear();
2112 } else {
2113 tokenOut->resize(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN);
2114 std::copy(token, token + ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, tokenOut->begin());
2115 }
2116 }
2117
2118 // A wrapper of getTransformedCacheTokenSingle, which runs getTransformedCacheTokenSingle
2119 // multiple times and checks if the transformation provides consistent result.
2120 // Two or more partitions may be on the same device. "devicePartitionIndex" specifies the index
2121 // of the ExecutionStep corresponding to the partition of interest, within the sequence of
2122 // ExecutionSteps on the target device.
getTransformedCacheToken(const PartitioningModel & model,const std::vector<std::shared_ptr<Device>> & devices,const char * deviceName,const std::vector<uint8_t> & tokenIn,ExecutePreference preference,ExecutePriority priority,std::vector<uint8_t> * tokenOut,uint32_t devicePartitionIndex=0)2123 void getTransformedCacheToken(const PartitioningModel& model,
2124 const std::vector<std::shared_ptr<Device>>& devices,
2125 const char* deviceName, const std::vector<uint8_t>& tokenIn,
2126 ExecutePreference preference, ExecutePriority priority,
2127 std::vector<uint8_t>* tokenOut,
2128 uint32_t devicePartitionIndex = 0) {
2129 getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference, priority,
2130 devicePartitionIndex, tokenOut);
2131
2132 // Test if the runtime maps to the same cache token every time for the same compilation
2133 // setup.
2134 for (uint32_t i = 0; i < 10; i++) {
2135 std::vector<uint8_t> token;
2136 SCOPED_TRACE(i);
2137 getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference,
2138 priority, devicePartitionIndex, &token);
2139 EXPECT_EQ(*tokenOut, token);
2140 }
2141 }
2142
createModelForCachingTests(PartitioningModel * model)2143 void createModelForCachingTests(PartitioningModel* model) {
2144 uint32_t opnd0 = model->addFloatOperand();
2145 uint32_t opnd1 = model->addFloatOperand();
2146 uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1);
2147 uint32_t opnd3 = model->addFloatOperand();
2148 uint32_t opnd4 = model->addOperation2To1V1_0(1, opnd2, opnd3);
2149 model->identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
2150 model->finish();
2151 ASSERT_TRUE(model->isValid());
2152 }
2153
2154 // The first model returned in "models" is the main model.
createControlFlowModelForCachingTests(std::vector<std::unique_ptr<PartitioningModel>> * models)2155 void createControlFlowModelForCachingTests(
2156 std::vector<std::unique_ptr<PartitioningModel>>* models) {
2157 CHECK(models != nullptr);
2158
2159 auto trueModel = std::make_unique<PartitioningModel>();
2160 {
2161 const uint32_t opnd0 = trueModel->addFloatOperand();
2162 const uint32_t opnd1 = trueModel->addFloatOperand();
2163 const uint32_t opnd2 = trueModel->addOperation2To1V1_0(0, opnd0, opnd1);
2164 trueModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2165 trueModel->finish();
2166 ASSERT_TRUE(trueModel->isValid());
2167 }
2168
2169 auto falseModel = std::make_unique<PartitioningModel>();
2170 {
2171 const uint32_t opnd0 = falseModel->addFloatOperand();
2172 const uint32_t opnd1 = falseModel->addFloatOperand();
2173 const uint32_t opnd2 = falseModel->addOperation2To1V1_0(0, opnd0, opnd1);
2174 falseModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2175 falseModel->finish();
2176 ASSERT_TRUE(falseModel->isValid());
2177 }
2178
2179 auto mainModel = std::make_unique<PartitioningModel>();
2180 {
2181 const uint32_t opnd0 = mainModel->addBooleanOperand();
2182 const uint32_t opnd1 = mainModel->addFloatOperand();
2183 const uint32_t opnd2 = mainModel->addFloatOperand();
2184 const uint32_t opnd3 = mainModel->addFloatOperand();
2185 mainModel->addIfOperation(opnd0, *trueModel, *falseModel, {opnd1, opnd2}, {opnd3});
2186 mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3});
2187 mainModel->finish();
2188 ASSERT_TRUE(mainModel->isValid());
2189 }
2190
2191 models->clear();
2192 models->push_back(std::move(mainModel));
2193 models->push_back(std::move(trueModel));
2194 models->push_back(std::move(falseModel));
2195 }
2196
2197 std::string mCacheDir;
2198 };
2199
2200 // Test the case when no token is provided by the application and the execution plan has a
2201 // simple body.
TEST_F(CacheTest,CacheTokenNoneSimpleBody)2202 TEST_F(CacheTest, CacheTokenNoneSimpleBody) {
2203 PartitioningModel model;
2204 createModelForCachingTests(&model);
2205
2206 // deviceA can execute the whole model.
2207 const auto deviceA = makeDevices({
2208 {"deviceA", 0.5, ~0U},
2209 });
2210
2211 std::vector<uint8_t> tokenIn, tokenOut;
2212 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2213 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2214 &tokenOut);
2215 EXPECT_TRUE(tokenOut.empty());
2216 }
2217
2218 // Test if the runtime maps to different cache tokens for devices with different names in
2219 // execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentDeviceNamesSimpleBody)2220 TEST_F(CacheTest, CacheTokenDifferentDeviceNamesSimpleBody) {
2221 PartitioningModel model;
2222 createModelForCachingTests(&model);
2223
2224 // Two devices that can both execute the whole model.
2225 const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2226 const auto deviceB = makeDevices({{"deviceB", 0.5, ~0U}});
2227
2228 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2229 std::vector<uint8_t> deviceAToken, deviceBToken;
2230 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2231 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2232 &deviceAToken);
2233 getTransformedCacheToken(model, deviceB, "deviceB", tokenIn,
2234 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2235 &deviceBToken);
2236 expectUniqueTokens({deviceAToken, deviceBToken});
2237 }
2238
2239 // Test if the runtime maps to different cache tokens for devices with different version strings in
2240 // execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentDeviceVersionStringsSimpleBody)2241 TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsSimpleBody) {
2242 PartitioningModel model;
2243 createModelForCachingTests(&model);
2244
2245 // Two devices that can both execute the whole model.
2246 const auto deviceA_1_0 = makeDevices({{"deviceA", "1.0", 0.5, ~0U}});
2247 const auto deviceA_1_1 = makeDevices({{"deviceA", "1.1", 0.5, ~0U}});
2248
2249 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2250 std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token;
2251 getTransformedCacheToken(model, deviceA_1_0, "deviceA", tokenIn,
2252 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2253 &deviceA_1_0_Token);
2254 getTransformedCacheToken(model, deviceA_1_1, "deviceA", tokenIn,
2255 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2256 &deviceA_1_1_Token);
2257 expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token});
2258 }
2259
2260 // Test if the runtime maps to different cache tokens for compilations with different preferences
2261 // in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentPreferencesSimpleBody)2262 TEST_F(CacheTest, CacheTokenDifferentPreferencesSimpleBody) {
2263 PartitioningModel model;
2264 createModelForCachingTests(&model);
2265
2266 // One device that can execute the whole model.
2267 const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2268
2269 std::vector<uint8_t> fastToken, powerToken, sustainedToken;
2270 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2271 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2272 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2273 &fastToken);
2274 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2275 ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT,
2276 &powerToken);
2277 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2278 ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT,
2279 &sustainedToken);
2280 expectUniqueTokens({fastToken, powerToken, sustainedToken});
2281 }
2282
2283 // Test if the runtime maps to different cache tokens for compilations with different priorities
2284 // in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentPrioritiesSimpleBody)2285 TEST_F(CacheTest, CacheTokenDifferentPrioritiesSimpleBody) {
2286 PartitioningModel model;
2287 createModelForCachingTests(&model);
2288
2289 // One device that can execute the whole model.
2290 const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2291
2292 std::vector<uint8_t> lowToken, mediumToken, highToken;
2293 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2294 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2295 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW,
2296 &lowToken);
2297 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2298 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM,
2299 &mediumToken);
2300 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2301 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH,
2302 &highToken);
2303 expectUniqueTokens({lowToken, mediumToken, highToken});
2304 }
2305
2306 // Test if the runtime maps to different cache tokens for compilations with different tokens
2307 // provided by application in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentTokensSimpleBody)2308 TEST_F(CacheTest, CacheTokenDifferentTokensSimpleBody) {
2309 PartitioningModel model;
2310 createModelForCachingTests(&model);
2311
2312 // One device that can execute the whole model.
2313 const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2314
2315 std::vector<uint8_t> tokenOut1, tokenOut2;
2316 std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2317 std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1);
2318 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn1,
2319 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2320 &tokenOut1);
2321 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn2,
2322 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2323 &tokenOut2);
2324 expectUniqueTokens({tokenOut1, tokenOut2});
2325 }
2326
2327 // Test the case when no token is provided by the application and the execution plan has a
2328 // compound body.
TEST_F(CacheTest,CacheTokenNoneCompoundBody)2329 TEST_F(CacheTest, CacheTokenNoneCompoundBody) {
2330 PartitioningModel model;
2331 createModelForCachingTests(&model);
2332
2333 // DeviceA executes the first operation only.
2334 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2335
2336 std::vector<uint8_t> tokenIn, tokenOut;
2337 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2338 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2339 &tokenOut);
2340 EXPECT_TRUE(tokenOut.empty());
2341 getTransformedCacheToken(model, devices, "deviceB", tokenIn,
2342 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2343 &tokenOut);
2344 EXPECT_TRUE(tokenOut.empty());
2345 }
2346
2347 // Test if the runtime maps to different cache tokens for devices with different names in
2348 // execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentDeviceNamesCompoundBody)2349 TEST_F(CacheTest, CacheTokenDifferentDeviceNamesCompoundBody) {
2350 PartitioningModel model;
2351 createModelForCachingTests(&model);
2352
2353 // DeviceA executes the first operation only.
2354 const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}});
2355 // DeviceB executes the first operation only.
2356 const auto devices2 = makeDevices({{"deviceB", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}});
2357
2358 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2359 std::vector<uint8_t> deviceAToken, deviceBToken;
2360 getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2361 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2362 &deviceAToken);
2363 getTransformedCacheToken(model, devices2, "deviceB", tokenIn,
2364 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2365 &deviceBToken);
2366 expectUniqueTokens({deviceAToken, deviceBToken});
2367 }
2368
2369 // Test if the runtime maps to different cache tokens for devices with different names in
2370 // execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentDeviceVersionStringsCompoundBody)2371 TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsCompoundBody) {
2372 PartitioningModel model;
2373 createModelForCachingTests(&model);
2374
2375 // DeviceA executes the first operation only.
2376 const auto devices1 = makeDevices({{"deviceA", "1.0", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2377 // DeviceB executes the first operation only.
2378 const auto devices2 = makeDevices({{"deviceA", "1.1", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2379
2380 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2381 std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token;
2382 getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2383 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2384 &deviceA_1_0_Token);
2385 getTransformedCacheToken(model, devices2, "deviceA", tokenIn,
2386 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2387 &deviceA_1_1_Token);
2388 expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token});
2389 }
2390
2391 // Test if the runtime maps to different cache tokens for compilations with different preferences
2392 // in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPreferencesCompoundBody)2393 TEST_F(CacheTest, CacheTokenDifferentPreferencesCompoundBody) {
2394 PartitioningModel model;
2395 createModelForCachingTests(&model);
2396
2397 // DeviceA executes the first operation only.
2398 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2399
2400 std::vector<uint8_t> fastToken, powerToken, sustainedToken;
2401 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2402 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2403 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2404 &fastToken);
2405 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2406 ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT,
2407 &powerToken);
2408 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2409 ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT,
2410 &sustainedToken);
2411 expectUniqueTokens({fastToken, powerToken, sustainedToken});
2412 }
2413
2414 // Test if the runtime maps to different cache tokens for compilations with different priorities
2415 // in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPrioritiesCompoundBody)2416 TEST_F(CacheTest, CacheTokenDifferentPrioritiesCompoundBody) {
2417 PartitioningModel model;
2418 createModelForCachingTests(&model);
2419
2420 // DeviceA executes the first operation only.
2421 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2422
2423 std::vector<uint8_t> lowToken, mediumToken, highToken;
2424 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2425 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2426 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW,
2427 &lowToken);
2428 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2429 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM,
2430 &mediumToken);
2431 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2432 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH,
2433 &highToken);
2434 expectUniqueTokens({lowToken, mediumToken, highToken});
2435 }
2436
2437 // Test if the runtime maps to different cache tokens for compilations with different tokens
2438 // provided by application in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentTokensCompoundBody)2439 TEST_F(CacheTest, CacheTokenDifferentTokensCompoundBody) {
2440 PartitioningModel model;
2441 createModelForCachingTests(&model);
2442
2443 // DeviceA executes the first operation only.
2444 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2445
2446 std::vector<uint8_t> tokenOut1, tokenOut2;
2447 std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2448 std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1);
2449 getTransformedCacheToken(model, devices, "deviceA", tokenIn1,
2450 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2451 &tokenOut1);
2452 getTransformedCacheToken(model, devices, "deviceA", tokenIn2,
2453 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2454 &tokenOut2);
2455 expectUniqueTokens({tokenOut1, tokenOut2});
2456 }
2457
2458 // Test if the runtime maps to different cache tokens for compilations with different partitioning
2459 // outcome in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPartitionsCompoundBody)2460 TEST_F(CacheTest, CacheTokenDifferentPartitionsCompoundBody) {
2461 PartitioningModel model;
2462 createModelForCachingTests(&model);
2463
2464 // DeviceA executes the whole model.
2465 const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 0U}});
2466 // DeviceA executes the first operation only.
2467 const auto devices2 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2468 // DeviceA executes the second operation only.
2469 const auto devices3 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 0}});
2470
2471 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2472 std::vector<uint8_t> tokenOut1, tokenOut2, tokenOut3;
2473 getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2474 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2475 &tokenOut1);
2476 getTransformedCacheToken(model, devices2, "deviceA", tokenIn,
2477 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2478 &tokenOut2);
2479 getTransformedCacheToken(model, devices3, "deviceA", tokenIn,
2480 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2481 &tokenOut3);
2482 expectUniqueTokens({tokenOut1, tokenOut2, tokenOut3});
2483 }
2484
2485 // Test if the runtime maps different referenced models to different cache tokens.
TEST_F(CacheTest,CacheTokenDifferentReferenceModelPartitions)2486 TEST_F(CacheTest, CacheTokenDifferentReferenceModelPartitions) {
2487 std::vector<std::unique_ptr<PartitioningModel>> models;
2488 createControlFlowModelForCachingTests(&models);
2489 const auto& main = *models[0];
2490
2491 // DeviceA executes the two referenced models but does not support IF.
2492 // There will be two partitions on deviceA.
2493 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}});
2494
2495 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2496 std::vector<uint8_t> tokenOut1, tokenOut2;
2497 getTransformedCacheToken(main, devices, "deviceA", tokenIn,
2498 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2499 &tokenOut1, /*devicePartitionIndex=*/0);
2500 getTransformedCacheToken(main, devices, "deviceA", tokenIn,
2501 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2502 &tokenOut2, /*devicePartitionIndex=*/1);
2503 expectUniqueTokens({tokenOut1, tokenOut2});
2504 }
2505
2506 // Very basic tests of some of the PerformanceInfo functionality.
2507 // Placed in this file because partitioning is the consumer of this functionality.
2508 class PerfTest : public ::testing::Test {};
2509
TEST_F(PerfTest,Lookup)2510 TEST_F(PerfTest, Lookup) {
2511 // Derive an arbitrary (but reproducible) performance value from an OperandType.
2512 // We'll use this to ensure that we can save and then recover a type's performance.
2513 auto typePerf = [](OperandType type) { return float(static_cast<uint32_t>(type)); };
2514
2515 Capabilities capabilities = makeCapabilities(-1.0f);
2516
2517 for (uint32_t type = static_cast<uint32_t>(OperandTypeRange::FUNDAMENTAL_MIN);
2518 type <= static_cast<uint32_t>(OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
2519 OperandType operandType = static_cast<OperandType>(type);
2520 update(&capabilities, operandType, typePerf(operandType));
2521 }
2522 for (uint32_t type = static_cast<uint32_t>(OperandTypeRange::OEM_MIN);
2523 type <= static_cast<uint32_t>(OperandTypeRange::OEM_MAX); ++type) {
2524 OperandType operandType = static_cast<OperandType>(type);
2525 update(&capabilities, operandType, typePerf(operandType));
2526 }
2527
2528 // Make sure lookup retrieves the values stored by update
2529
2530 for (uint32_t type = static_cast<uint32_t>(OperandTypeRange::FUNDAMENTAL_MIN);
2531 type <= static_cast<uint32_t>(OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
2532 OperandType operandType = static_cast<OperandType>(type);
2533 if (operandType == OperandType::SUBGRAPH) {
2534 // SUBGRAPH capabilities are handled differently.
2535 continue;
2536 }
2537 SCOPED_TRACE(toString(operandType));
2538 EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType));
2539 }
2540 for (uint32_t type = static_cast<uint32_t>(OperandTypeRange::OEM_MIN);
2541 type <= static_cast<uint32_t>(OperandTypeRange::OEM_MAX); ++type) {
2542 OperandType operandType = static_cast<OperandType>(type);
2543 SCOPED_TRACE(toString(operandType));
2544 EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType));
2545 }
2546
2547 // Check the behavior of a missing type
2548
2549 OperandType operandType =
2550 static_cast<OperandType>(static_cast<uint32_t>(OperandTypeRange::BASE_MAX) + 1);
2551 EXPECT_EQ(lookupExecTime(capabilities, operandType), FLT_MAX);
2552 }
2553
2554 class ControlFlowPartitioningTest : public PartitioningTest {
2555 protected:
2556 // opnd0 --> +-----+
2557 // | ADD | --> opnd2
2558 // opnd1 --> +-----+
createBranchOrBodyModel()2559 std::unique_ptr<PartitioningModel> createBranchOrBodyModel() {
2560 auto model = std::make_unique<PartitioningModel>();
2561 const uint32_t opnd0 = model->addFloatOperand();
2562 const uint32_t opnd1 = model->addFloatOperand();
2563 const uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1);
2564 model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2565 model->finish();
2566 EXPECT_TRUE(model->isValid());
2567 return model;
2568 }
2569
2570 // opnd0 --> +-------+
2571 // | EQUAL | --> opnd2
2572 // opnd1 --> +-------+
createCondModel()2573 std::unique_ptr<PartitioningModel> createCondModel() {
2574 auto model = std::make_unique<PartitioningModel>();
2575 const uint32_t opnd0 = model->addFloatOperand();
2576 const uint32_t opnd1 = model->addFloatOperand();
2577 const uint32_t opnd2 = model->addExplicitOperationXTo1(
2578 ANEURALNETWORKS_EQUAL, {opnd0, opnd1}, WrapperType::TENSOR_BOOL8);
2579 model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2580 model->finish();
2581 EXPECT_TRUE(model->isValid());
2582 return model;
2583 }
2584
2585 // opnd0 --> +----+
2586 // opnd1 --> | IF | --> opnd3
2587 // opnd2 --> +----+
createIfModel(bool firstOuterInputUnknownSize=false)2588 std::vector<std::unique_ptr<PartitioningModel>> createIfModel(
2589 bool firstOuterInputUnknownSize = false) {
2590 auto thenModel = createBranchOrBodyModel();
2591 auto elseModel = createBranchOrBodyModel();
2592
2593 auto mainModel = std::make_unique<PartitioningModel>();
2594 const uint32_t opnd0 = mainModel->addBooleanOperand();
2595 const uint32_t opnd1 = mainModel->addFloatOperand(
2596 firstOuterInputUnknownSize ? PartitioningModel::Dimensioned::NO
2597 : PartitioningModel::Dimensioned::YES);
2598 const uint32_t opnd2 = mainModel->addFloatOperand();
2599 const uint32_t opnd3 = mainModel->addFloatOperand();
2600 mainModel->addIfOperation(opnd0, *thenModel, *elseModel, {opnd1, opnd2}, {opnd3});
2601 mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3});
2602 mainModel->finish();
2603 EXPECT_TRUE(mainModel->isValid());
2604
2605 std::vector<std::unique_ptr<PartitioningModel>> models;
2606 models.push_back(std::move(mainModel));
2607 models.push_back(std::move(thenModel));
2608 models.push_back(std::move(elseModel));
2609 return std::move(models);
2610 }
2611
2612 // opnd0 --> +-------+
2613 // | WHILE | --> opnd2
2614 // opnd1 --> +-------+
createWhileModel(bool firstOuterInputUnknownSize=false)2615 std::vector<std::unique_ptr<PartitioningModel>> createWhileModel(
2616 bool firstOuterInputUnknownSize = false) {
2617 auto condModel = createCondModel();
2618 auto bodyModel = createBranchOrBodyModel();
2619
2620 auto mainModel = std::make_unique<PartitioningModel>();
2621 const uint32_t opnd0 = mainModel->addFloatOperand(
2622 firstOuterInputUnknownSize ? PartitioningModel::Dimensioned::NO
2623 : PartitioningModel::Dimensioned::YES);
2624 const uint32_t opnd1 = mainModel->addFloatOperand();
2625 const uint32_t opnd2 = mainModel->addFloatOperand();
2626 mainModel->addWhileOperation(*condModel, *bodyModel, {opnd0, opnd1}, {opnd2});
2627 mainModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2628 mainModel->finish();
2629 EXPECT_TRUE(mainModel->isValid());
2630
2631 std::vector<std::unique_ptr<PartitioningModel>> models;
2632 models.push_back(std::move(mainModel));
2633 models.push_back(std::move(condModel));
2634 models.push_back(std::move(bodyModel));
2635 return std::move(models);
2636 }
2637 };
2638
TEST_F(ControlFlowPartitioningTest,IF_Interpreted)2639 TEST_F(ControlFlowPartitioningTest, IF_Interpreted) {
2640 const auto models = createIfModel();
2641
2642 // The device supports the referenced models but does not support IF.
2643 const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}});
2644
2645 ExecutionPlan plan;
2646 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2647 ExecutePriority::DEFAULT, {}, &plan),
2648 ANEURALNETWORKS_NO_ERROR);
2649 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
2650 const auto& steps = plan.forTest_compoundGetSteps();
2651 ASSERT_EQ(steps.size(), size_t(4));
2652 ASSERT_TRUE(steps[0]->isIf());
2653 ASSERT_TRUE(steps[1]->isExecution());
2654 ASSERT_TRUE(steps[2]->isGoto());
2655 ASSERT_TRUE(steps[3]->isExecution());
2656 ASSERT_EQ(steps[1]->executionStep()->getDevice()->getName(), "V1_0");
2657 ASSERT_EQ(steps[3]->executionStep()->getDevice()->getName(), "V1_0");
2658 }
2659
TEST_F(ControlFlowPartitioningTest,WHILE_Interpreted)2660 TEST_F(ControlFlowPartitioningTest, WHILE_Interpreted) {
2661 const auto models = createWhileModel();
2662
2663 // The device supports the body model but does not support WHILE or the
2664 // condition model (because of EQUAL).
2665 const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}});
2666
2667 ExecutionPlan plan;
2668 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2669 ExecutePriority::DEFAULT, {}, &plan),
2670 ANEURALNETWORKS_NO_ERROR);
2671 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
2672 const auto& steps = plan.forTest_compoundGetSteps();
2673 ASSERT_EQ(steps.size(), size_t(5));
2674 ASSERT_TRUE(steps[0]->isWhile());
2675 ASSERT_TRUE(steps[1]->isExecution());
2676 ASSERT_TRUE(steps[2]->isGoto());
2677 ASSERT_TRUE(steps[3]->isExecution());
2678 ASSERT_TRUE(steps[4]->isGoto());
2679 ASSERT_EQ(steps[1]->executionStep()->getDevice()->getName(),
2680 DeviceManager::getCpuDevice()->getName());
2681 ASSERT_EQ(steps[3]->executionStep()->getDevice()->getName(), "V1_0");
2682 }
2683
TEST_F(ControlFlowPartitioningTest,IF_SimplePlan)2684 TEST_F(ControlFlowPartitioningTest, IF_SimplePlan) {
2685 const auto models = createIfModel();
2686
2687 // The device supports all operations.
2688 const auto devices =
2689 makeDevices({{"ALL", 0.9, ~0U, PartitioningDriver::OEMNo, {OperationType::IF}}});
2690
2691 ExecutionPlan plan;
2692 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2693 ExecutePriority::DEFAULT, {}, &plan),
2694 ANEURALNETWORKS_NO_ERROR);
2695 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2696 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "ALL");
2697 }
2698
TEST_F(ControlFlowPartitioningTest,WHILE_SimplePlan)2699 TEST_F(ControlFlowPartitioningTest, WHILE_SimplePlan) {
2700 const auto models = createWhileModel();
2701
2702 // The device supports all operations.
2703 const auto devices = makeDevices({{"ALL",
2704 0.9,
2705 ~0U,
2706 PartitioningDriver::OEMNo,
2707 {OperationType::WHILE, OperationType::EQUAL}}});
2708
2709 ExecutionPlan plan;
2710 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2711 ExecutePriority::DEFAULT, {}, &plan),
2712 ANEURALNETWORKS_NO_ERROR);
2713 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2714 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "ALL");
2715 }
2716
TEST_F(ControlFlowPartitioningTest,IF_UnknownSize)2717 TEST_F(ControlFlowPartitioningTest, IF_UnknownSize) {
2718 const auto models = createIfModel(/*firstOuterInputUnknownSize=*/true);
2719
2720 // The device supports all operations but the partitioner ignores its IF
2721 // support due to http://b/159076604#comment5.
2722 const auto devices =
2723 makeDevices({{"ALL", 0.9, ~0U, PartitioningDriver::OEMNo, {OperationType::IF}}});
2724
2725 ExecutionPlan plan;
2726 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2727 ExecutePriority::DEFAULT, {}, &plan),
2728 ANEURALNETWORKS_NO_ERROR);
2729 // The control flow interpreter does not support unknown size (b/132458982).
2730 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2731 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), DeviceManager::getCpuDevice()->getName());
2732 }
2733
TEST_F(ControlFlowPartitioningTest,WHILE_UnknownSize)2734 TEST_F(ControlFlowPartitioningTest, WHILE_UnknownSize) {
2735 const auto models = createWhileModel(/*firstOuterInputUnknownSize=*/true);
2736
2737 // The device supports all operations but the partitioner ignores its WHILE
2738 // support due to http://b/159076604#comment5.
2739 const auto devices = makeDevices({{"ALL",
2740 0.9,
2741 ~0U,
2742 PartitioningDriver::OEMNo,
2743 {OperationType::WHILE, OperationType::EQUAL}}});
2744
2745 ExecutionPlan plan;
2746 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2747 ExecutePriority::DEFAULT, {}, &plan),
2748 ANEURALNETWORKS_NO_ERROR);
2749 // The control flow interpreter does not support unknown size (b/132458982).
2750 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2751 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), DeviceManager::getCpuDevice()->getName());
2752 }
2753
2754 } // namespace
2755