1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <ControlFlow.h>
18 #include <HalInterfaces.h>
19 #include <SampleDriver.h>
20 #include <Utils.h>
21 #include <ValidateHal.h>
22 #include <gtest/gtest.h>
23
24 #include <algorithm>
25 #include <filesystem>
26 #include <functional>
27 #include <iostream>
28 #include <map>
29 #include <memory>
30 #include <numeric>
31 #include <queue>
32 #include <set>
33 #include <string>
34 #include <tuple>
35 #include <type_traits>
36 #include <utility>
37 #include <vector>
38
39 #include "CompilationBuilder.h"
40 #include "ExecutionPlan.h"
41 #include "HalUtils.h"
42 #include "Manager.h"
43 #include "ModelBuilder.h"
44 #include "NeuralNetworks.h"
45 #include "NeuralNetworksOEM.h"
46 #include "TestNeuralNetworksWrapper.h"
47 #include "TmpDirectoryUtils.h"
48
49 // Uncomment the following line to generate some debugging output that
50 // may be useful when analyzing failures:
51 //
52 // #define VERBOSE VERBOSE
53
54 // These tests do whitebox testing of the graph partitioning
55 // algorithm. It is "whitebox" in the sense that we're not evaluating
56 // whether a particular partitioning is legal, or "good enough"
57 // according to some metric, but whether it exactly matches the
58 // expected behavior of the current partitioning algorithm.
59 //
60 // A key part of the current partitioning algorithm is to determine
61 // which device among the available devices should be the one to
62 // execute a particular operation from the graph. This determination
63 // is made "locally" -- i.e., it does not depend on the graph
64 // topology, only on the properties of the operation in question.
65 // IDevice::getSupportedOperations() indicates which operations in a
66 // graph can be executed on a device, and IDevice::getCapabilities()
67 // indicates how "good" that device is for executing particular kinds
68 // of operations. For each operation, the partitioning algorithm
69 // picks the "best" device that is capable of executing that
70 // operation; if no device can do so, then the algorithm picks the
71 // cpu.
72 //
73 // As part of this testing approach, we want to make it easy to
74 // specify which operations in a test graph can be executed on which
75 // devices. We accomplish this in the following way:
76 // - A unary OEM operation is available.
77 // - There is a collection of operations (each of which has two inputs
78 // and one output):
79 // - Eight kinds of operations available at driver version V1_0 or
80 // later. They are represented in the graph as ADD or MUL with a
81 // particular activation function -- two opcodes times four
82 // activation functions means eight available operation kinds.
83 // This is a low-level representation detail -- when we specify the
84 // behavior of the device or build a graph, we do so in terms of
85 // operation encodings 0..7.
86 // - Eight kinds of operations available at driver version V1_1 or
87 // later. They are represented in the graph as DIV or SUB with
88 // a particular activation function, exactly analogous to ADD
89 // and MUL above. We use operation encodings 8..15 for them.
90 // - Four kinds of operations available at driver version V1_2 or
91 // later. They are represented in the graph as MAXIMUM,
92 // MINIMUM, POW, or PRELU. These operations take no activation
93 // function, so we only get 4 operation kinds, for which we
94 // use operation encodings 16..19.
95 // - There is another collection of operations (each of which has one input
96 // and one output):
97 // - Single operation available at driver version V1_3 or
98 // later. It is represented in the graph as HARD_SWISH.
99 // These operations take no activation function, for which we
100 // use operation encodings 20..20.
101
102 // When we instantiate a device for testing purposes, we specify what subset of
103 // those operations the device is able to execute.
104 //
105 // In order to determine whether or not a partitioning matches the
106 // expected partitioning, we check the number of partitions, check
107 // which device each partition targets, and compare each partition's
108 // subgraph, model inputs, model outputs, step model inputs, and
109 // step model outputs against what is expected. In order to perform
110 // that comparison, we build a model to compare against a partition's
111 // step model and run a graph comparison algorithm on it. The graph
112 // comparison and the inputs and outputs comparisons are syntactic
113 // rather than semantic comparisons -- they don't allow for
114 // reorderings of inputs and outputs. Because of this, we need to
115 // know exactly how the partitioning algorithm orders inputs and
116 // outputs in order to construct the models and operand lists to
117 // compare against. Here are some relevant behaviors of the
118 // partitioning algorithm:
119 //
120 // - It builds a subgraph by walking operations in forward topological
121 // order, and adding each operation's input operands and output
122 // operands in index order (input followed by output) when that
123 // operation is added. (It does not add an input that has already
124 // been added.)
125 // - It finds model inputs, model outputs, and step model inputs in
126 // the order the corresponding operands were added to the subgraph
127 // (see ExecutionStep methods getModelInputs(), getModelOutputs(),
128 // getTempsAsStepModelInputs(), getOutputsAsStepModelInputs()).
129 // - It finds temps as step model outputs in numerical order of corresponding
130 // operand number in the original model (see ExecutionStep method
131 // getTempsAsStepModelOutputs()).
132 // - When it calls identifyInputsAndOutputs() on the step model, it
133 // passes inputs from getModelInputs() in order, followed by temps as
134 // step model inputs from getTempsAsStepModelInputs() in order,
135 // followed by outputs as step model inputs from
136 // getOutputsAsStepModelInputs() in order; and it passes outputs from
137 // getModelOutputs() in order followed by step model outputs from
138 // getTempsAsStepModelOutputs() in order.
139 //
140 // TODO: Maybe the logic for comparing a partition to an expected
141 // model should be changed to tolerate reorderings of inputs and
142 // outputs, so that when we build models and lists to compare
143 // against, we don't need to worry about input and output
144 // orderings. But is there a way to do this that still lets us
145 // verify that we have the correct relationships between
146 // an (original) model's inputs and outputs and each step model's
147 // inputs and outputs, as well as the correct relationship
148 // between step model inputs and outputs across partitions?
149
150 namespace {
151
152 namespace hardware = android::hardware;
153 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
154 namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
155 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
156 namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
157 using CompilationBuilder = ::android::nn::CompilationBuilder;
158 using Device = ::android::nn::Device;
159 using DeviceManager = ::android::nn::DeviceManager;
160 using ExecutePreference = ::android::nn::test_wrapper::ExecutePreference;
161 using ExecutePriority = ::android::nn::test_wrapper::ExecutePriority;
162 using ExecutionPlan = ::android::nn::ExecutionPlan;
163 using ExecutionStep = ::android::nn::ExecutionStep;
164 using HalCacheToken = ::android::nn::HalCacheToken;
165 using HalVersion = ::android::nn::HalVersion;
166 using HidlModel = V1_3::Model;
167 using IOType = ::android::nn::IOType;
168 using LogicalStep = ::android::nn::LogicalStep;
169 using ModelBuilder = ::android::nn::ModelBuilder;
170 using Operand = ::android::nn::Operand;
171 using Operation = ::android::nn::Operation;
172 using OptionalTimePoint = ::android::nn::OptionalTimePoint;
173 using Result = ::android::nn::test_wrapper::Result;
174 using SampleDriver = ::android::nn::sample_driver::SampleDriver;
175 using SharedDevice = ::android::nn::SharedDevice;
176 using SourceOperandIndex = ::android::nn::SourceOperandIndex;
177 using StepRole = ::android::nn::StepRole;
178 using WrapperCompilation = ::android::nn::test_wrapper::Compilation;
179 using WrapperExecution = ::android::nn::test_wrapper::Execution;
180 using WrapperModel = ::android::nn::test_wrapper::Model;
181 using WrapperOperandType = ::android::nn::test_wrapper::OperandType;
182 using WrapperSymmPerChannelQuantParams = ::android::nn::test_wrapper::SymmPerChannelQuantParams;
183 using WrapperType = ::android::nn::test_wrapper::Type;
184 using android::sp;
185
update(V1_3::Capabilities * capabilities,V1_3::OperandType type,float perf)186 void update(V1_3::Capabilities* capabilities, V1_3::OperandType type, float perf) {
187 V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
188 ::android::nn::update(&capabilities->operandPerformance, type, perfInfo);
189 }
190
lookupExecTime(const V1_3::Capabilities & capabilities,V1_3::OperandType type)191 float lookupExecTime(const V1_3::Capabilities& capabilities, V1_3::OperandType type) {
192 return ::android::nn::lookup(capabilities.operandPerformance, type).execTime;
193 }
194
min(HalVersion a,HalVersion b)195 HalVersion min(HalVersion a, HalVersion b) {
196 return int32_t(a) < int32_t(b) ? a : b;
197 }
198
199 const uint32_t kNumFuseCodes = 4;
200 const uint32_t kBadOperation = ~0;
201
202 // V1_0 operations
203 const uint32_t kFirstEncodingADD = 0;
204 const uint32_t kFirstEncodingMUL = kFirstEncodingADD + kNumFuseCodes;
205 const uint32_t kFirstEncodingV1_0 = kFirstEncodingADD;
206 const uint32_t kLastEncodingV1_0 = kFirstEncodingMUL + kNumFuseCodes - 1;
207
208 // V1_1 operations
209 const uint32_t kFirstEncodingDIV = kLastEncodingV1_0 + 1;
210 const uint32_t kFirstEncodingSUB = kFirstEncodingDIV + kNumFuseCodes;
211 const uint32_t kFirstEncodingV1_1 = kFirstEncodingDIV;
212 const uint32_t kLastEncodingV1_1 = kFirstEncodingSUB + kNumFuseCodes - 1;
213
214 // V1_2 operations
215 const uint32_t kFirstEncodingMAXIMUM = kLastEncodingV1_1 + 1;
216 const uint32_t kFirstEncodingMINIMUM = kFirstEncodingMAXIMUM + 1;
217 const uint32_t kFirstEncodingPOW = kFirstEncodingMINIMUM + 1;
218 const uint32_t kFirstEncodingPRELU = kFirstEncodingPOW + 1;
219 const uint32_t kFirstEncodingV1_2 = kFirstEncodingMAXIMUM;
220 const uint32_t kLastEncodingV1_2 = kFirstEncodingPRELU;
221
222 // V1_3 operations
223 const uint32_t kFirstEncodingHARD_SWISH = kLastEncodingV1_2 + 1;
224 const uint32_t kFirstEncodingV1_3 = kFirstEncodingHARD_SWISH;
225 const uint32_t kLastEncodingV1_3 = kFirstEncodingHARD_SWISH;
226
227 const std::map<V1_3::OperationType, uint32_t> operationToFirstEncoding = {
228 {V1_3::OperationType::ADD, kFirstEncodingADD},
229 {V1_3::OperationType::MUL, kFirstEncodingMUL},
230 {V1_3::OperationType::DIV, kFirstEncodingDIV},
231 {V1_3::OperationType::SUB, kFirstEncodingSUB},
232 {V1_3::OperationType::MAXIMUM, kFirstEncodingMAXIMUM},
233 {V1_3::OperationType::MINIMUM, kFirstEncodingMINIMUM},
234 {V1_3::OperationType::POW, kFirstEncodingPOW},
235 {V1_3::OperationType::PRELU, kFirstEncodingPRELU},
236 {V1_3::OperationType::HARD_SWISH, kFirstEncodingHARD_SWISH},
237 };
238
239 // Sorted in reverse order (std::greater) so that we can use map::lower_bound to
240 // find an entry whose key is numerically less than or equal to a search value.
241 // mapped_type is (OperandCode, hasFuseCode).
242 const std::map<uint32_t, std::pair<uint32_t, bool>, std::greater<>> firstEncodingToOperation = {
243 {kFirstEncodingADD, {ANEURALNETWORKS_ADD, true}},
244 {kFirstEncodingMUL, {ANEURALNETWORKS_MUL, true}},
245 {kFirstEncodingDIV, {ANEURALNETWORKS_DIV, true}},
246 {kFirstEncodingSUB, {ANEURALNETWORKS_SUB, true}},
247 {kFirstEncodingMAXIMUM, {ANEURALNETWORKS_MAXIMUM, false}},
248 {kFirstEncodingMINIMUM, {ANEURALNETWORKS_MINIMUM, false}},
249 {kFirstEncodingPOW, {ANEURALNETWORKS_POW, false}},
250 {kFirstEncodingPRELU, {ANEURALNETWORKS_PRELU, false}},
251 {kFirstEncodingHARD_SWISH, {ANEURALNETWORKS_HARD_SWISH, false}},
252 };
253
254 // Look up the operation with the specified index in a graph, and return the
255 // operation encoding; or, if for some reason this is not one of the encoded
256 // operations, then return kBadOperation.
lookupOperation(std::function<const V1_3::Operation & (uint32_t)> getOperation,std::function<const V1_3::Operand & (uint32_t)> getOperand,std::function<const uint8_t * (uint32_t)> getValue,uint32_t operationIndex)257 uint32_t lookupOperation(std::function<const V1_3::Operation&(uint32_t)> getOperation,
258 std::function<const V1_3::Operand&(uint32_t)> getOperand,
259 std::function<const uint8_t*(uint32_t)> getValue,
260 uint32_t operationIndex) {
261 const V1_3::Operation& operation = getOperation(operationIndex);
262 switch (operation.type) {
263 case V1_3::OperationType::ADD:
264 case V1_3::OperationType::MUL:
265 case V1_3::OperationType::DIV:
266 case V1_3::OperationType::SUB: {
267 // input2 is the fused activation function
268 const V1_3::Operand& input2 = getOperand(operation.inputs[2]);
269 if ((input2.type == V1_3::OperandType::INT32) &&
270 (input2.lifetime == V1_3::OperandLifeTime::CONSTANT_COPY)) {
271 int32_t value;
272 CHECK_EQ(sizeof(value), input2.location.length);
273 memcpy(&value, getValue(input2.location.offset), input2.location.length);
274 return value + operationToFirstEncoding.at(operation.type);
275 }
276 break;
277 }
278 default: {
279 auto it = operationToFirstEncoding.find(operation.type);
280 if (it != operationToFirstEncoding.end()) {
281 return it->second;
282 }
283 break;
284 }
285 }
286 return kBadOperation;
287 }
288
lookupOperation(const HidlModel & model,const V1_3::Subgraph & subgraph,uint32_t operationIndex)289 uint32_t lookupOperation(const HidlModel& model, const V1_3::Subgraph& subgraph,
290 uint32_t operationIndex) {
291 return lookupOperation(
292 [&subgraph](uint32_t index) -> const V1_3::Operation& {
293 return subgraph.operations[index];
294 },
295 [&subgraph](uint32_t index) -> const V1_3::Operand& {
296 return subgraph.operands[index];
297 },
298 [&model](uint32_t offset) { return &model.operandValues[offset]; }, operationIndex);
299 }
300
301 #ifdef VERBOSE
302 // This is a debugging utility function
dump(const char * name,const ModelBuilder * model)303 void dump(const char* name, const ModelBuilder* model) {
304 const HidlModel hidlModel = model->makeHidlModel();
305 std::cout << name << ": " << hidlModel << std::endl;
306 std::cout << "inputs: " << hidlModel.main.inputIndexes << std::endl;
307 std::cout << "outputs: " << hidlModel.main.outputIndexes << std::endl;
308 for (size_t i = 0, e = hidlModel.main.operations.size(); i < e; i++) {
309 std::cout << "operation[" << i << "]: " << hidlModel.main.operations[i] << std::endl;
310 }
311 }
312 #endif
313
314 // This is an IDevice for testing purposes. It only has a few interesting
315 // properties, all of which are specified as constructor arguments: device
316 // capabilities; which subset of operation kinds (0..19) does the device
317 // support; does the device support the OEM operation; does the device support
318 // other operations. The subset is represented with a bitmask, in which
319 // operation kind K corresponds to the bit (1 << K). The other operations are
320 // represented by a set of OperationType.
321 class PartitioningDriver : public SampleDriver {
322 public:
323 enum OEM {
324 OEMNo, // rejected by getSupportedOperations and prepareModel
325 OEMIndecisive, // accepted by getSupportedOperations but not prepareModel
326 OEMYes, // accepted by getSupportedOperations and prepareModel
327 };
328
PartitioningDriver(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,OEM oem=OEMNo,std::set<V1_3::OperationType> operationTypes={})329 PartitioningDriver(const char* name, const char* version, V1_3::Capabilities capabilities,
330 uint32_t operationMask, OEM oem = OEMNo,
331 std::set<V1_3::OperationType> operationTypes = {})
332 : SampleDriver(name),
333 mVersionString(version),
334 mCapabilities(capabilities),
335 mOperationMask(operationMask),
336 mOEM(oem),
337 mOperationTypes(std::move(operationTypes)) {
338 CHECK_EQ(mOperationTypes.count(V1_3::OperationType::OEM_OPERATION), size_t(0));
339 if (operationMask) {
340 std::for_each(mOperationTypes.begin(), mOperationTypes.end(),
__anonb50294d30502(V1_3::OperationType type) 341 [](V1_3::OperationType type) {
342 CHECK_EQ(operationToFirstEncoding.count(type), size_t(0));
343 });
344 }
345 }
~PartitioningDriver()346 ~PartitioningDriver() override {}
347
getVersionString(getVersionString_cb cb)348 hardware::Return<void> getVersionString(getVersionString_cb cb) override {
349 cb(V1_0::ErrorStatus::NONE, mVersionString);
350 return hardware::Void();
351 }
352
prepareModel_1_3(const V1_3::Model & model,V1_1::ExecutionPreference preference,V1_3::Priority priority,const V1_3::OptionalTimePoint & deadline,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_3::IPreparedModelCallback> & callback)353 hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
354 const V1_3::Model& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
355 const V1_3::OptionalTimePoint& deadline,
356 const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
357 const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
358 const sp<V1_3::IPreparedModelCallback>& callback) override {
359 if (mOEM == OEMIndecisive) {
360 for (const auto& operation : model.main.operations) {
361 if (operation.type == V1_3::OperationType::OEM_OPERATION) {
362 callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
363 return V1_3::ErrorStatus::INVALID_ARGUMENT;
364 }
365 }
366 }
367
368 // NOTE: We verify that all operations in the model are supported.
369 V1_3::ErrorStatus outStatus = V1_3::ErrorStatus::INVALID_ARGUMENT;
370 auto ret = getSupportedOperations_1_3(
371 model, [&outStatus](V1_3::ErrorStatus inStatus,
372 const hardware::hidl_vec<bool>& supportedOperations) {
373 if (inStatus == V1_3::ErrorStatus::NONE) {
374 if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
375 [](bool v) { return v; })) {
376 outStatus = V1_3::ErrorStatus::NONE;
377 }
378 }
379 });
380 if (ret.isOk() && (outStatus == V1_3::ErrorStatus::NONE)) {
381 return SampleDriver::prepareModel_1_3(model, preference, priority, deadline, modelCache,
382 dataCache, token, callback);
383 } else {
384 callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
385 return V1_3::ErrorStatus::INVALID_ARGUMENT;
386 }
387 }
388
getStatus()389 hardware::Return<V1_0::DeviceStatus> getStatus() override {
390 return V1_0::DeviceStatus::AVAILABLE;
391 }
392
getCapabilities_1_3(getCapabilities_1_3_cb cb)393 hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override {
394 cb(V1_3::ErrorStatus::NONE, mCapabilities);
395 return hardware::Void();
396 }
397
getSupportedOperations_1_3(const V1_3::Model & model,getSupportedOperations_1_3_cb cb)398 hardware::Return<void> getSupportedOperations_1_3(const V1_3::Model& model,
399 getSupportedOperations_1_3_cb cb) override {
400 if (!android::nn::validateModel(model)) {
401 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>());
402 return hardware::Void();
403 }
404 cb(V1_3::ErrorStatus::NONE, getSupportedOperationsForSubgraph(model, model.main));
405 return hardware::Void();
406 }
407
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)408 hardware::Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb) override {
409 cb(V1_0::ErrorStatus::NONE, /*numModelCache=*/1, /*numDataCache=*/1);
410 return hardware::Void();
411 }
412
413 private:
getSupportedOperationsForSubgraph(const V1_3::Model & model,const V1_3::Subgraph & subgraph)414 std::vector<bool> getSupportedOperationsForSubgraph(const V1_3::Model& model,
415 const V1_3::Subgraph& subgraph) {
416 CHECK(&subgraph == &model.main ||
417 std::find_if(model.referenced.begin(), model.referenced.end(),
418 [&subgraph](const V1_3::Subgraph& refSubgraph) {
419 return &subgraph == &refSubgraph;
420 }) != model.referenced.end());
421 auto supportsEntireSubgraph = [this, &model, &subgraph](uint32_t refSubgraphOperandIndex) {
422 CHECK_LT(refSubgraphOperandIndex, subgraph.operands.size());
423 const V1_3::Operand& refSubgraphOperand = subgraph.operands[refSubgraphOperandIndex];
424 CHECK(refSubgraphOperand.lifetime == V1_3::OperandLifeTime::SUBGRAPH);
425 CHECK_LT(refSubgraphOperand.location.offset, model.referenced.size());
426 const V1_3::Subgraph& refSubgraph =
427 model.referenced[refSubgraphOperand.location.offset];
428 std::vector<bool> supported = getSupportedOperationsForSubgraph(model, refSubgraph);
429 return std::all_of(supported.begin(), supported.end(), [](bool x) { return x; });
430 };
431 const size_t count = subgraph.operations.size();
432 std::vector<bool> supported(count);
433 for (size_t i = 0; i < count; i++) {
434 const V1_3::Operation& operation = subgraph.operations[i];
435 if (mOperationTypes.count(operation.type)) {
436 if (operation.type == V1_3::OperationType::IF) {
437 namespace op = android::nn::operation_if;
438 CHECK_GE(operation.inputs.size(), op::kFirstInput);
439 supported[i] =
440 supportsEntireSubgraph(operation.inputs[op::kThenModelOperand]) &&
441 supportsEntireSubgraph(operation.inputs[op::kElseModelOperand]);
442 } else if (operation.type == V1_3::OperationType::WHILE) {
443 namespace op = android::nn::operation_while;
444 CHECK_GE(operation.inputs.size(), op::kFirstInput);
445 supported[i] =
446 supportsEntireSubgraph(operation.inputs[op::kCondModelOperand]) &&
447 supportsEntireSubgraph(operation.inputs[op::kBodyModelOperand]);
448 } else {
449 supported[i] = true;
450 }
451 continue;
452 }
453 if (operation.type == V1_3::OperationType::OEM_OPERATION) {
454 supported[i] = (mOEM != OEMNo);
455 continue;
456 }
457 supported[i] = false;
458 uint32_t operationEncoding = lookupOperation(model, subgraph, i);
459 if ((operationEncoding != kBadOperation) &&
460 (mOperationMask & (1 << operationEncoding))) {
461 supported[i] = true;
462 }
463 }
464 return supported;
465 }
466
467 std::string mVersionString;
468 V1_3::Capabilities mCapabilities;
469 uint32_t mOperationMask;
470 OEM mOEM;
471 std::set<V1_3::OperationType> mOperationTypes;
472 };
473
474 // Like PartitioningDriver, but implementing 1.2
475 class PartitioningDriverV1_2 : public V1_2::IDevice {
476 public:
PartitioningDriverV1_2(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<V1_3::OperationType> operationTypes={})477 PartitioningDriverV1_2(const char* name, const char* version, V1_3::Capabilities capabilities,
478 uint32_t operationMask,
479 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
480 std::set<V1_3::OperationType> operationTypes = {})
481 : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
482 operationTypes)) {}
getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb)483 hardware::Return<void> getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb) override {
484 return mLatestDriver->getCapabilities_1_2(_hidl_cb);
485 }
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb _hidl_cb)486 hardware::Return<void> getSupportedOperations_1_2(
487 const V1_2::Model& model, getSupportedOperations_1_2_cb _hidl_cb) override {
488 return mLatestDriver->getSupportedOperations_1_2(model, _hidl_cb);
489 }
prepareModel_1_2(const V1_2::Model & model,V1_1::ExecutionPreference preference,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_2::IPreparedModelCallback> & actualCallback)490 hardware::Return<V1_0::ErrorStatus> prepareModel_1_2(
491 const V1_2::Model& model, V1_1::ExecutionPreference preference,
492 const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
493 const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
494 const sp<V1_2::IPreparedModelCallback>& actualCallback) override {
495 return mLatestDriver->prepareModel_1_2(model, preference, modelCache, dataCache, token,
496 actualCallback);
497 }
getVersionString(getVersionString_cb _hidl_cb)498 hardware::Return<void> getVersionString(getVersionString_cb _hidl_cb) override {
499 return mLatestDriver->getVersionString(_hidl_cb);
500 }
getType(getType_cb _hidl_cb)501 hardware::Return<void> getType(getType_cb _hidl_cb) override {
502 return mLatestDriver->getType(_hidl_cb);
503 }
getSupportedExtensions(getSupportedExtensions_cb _hidl_cb)504 hardware::Return<void> getSupportedExtensions(getSupportedExtensions_cb _hidl_cb) {
505 return mLatestDriver->getSupportedExtensions(_hidl_cb);
506 }
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb)507 hardware::Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb) {
508 return mLatestDriver->getNumberOfCacheFilesNeeded(_hidl_cb);
509 }
prepareModelFromCache(const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_2::IPreparedModelCallback> & callback)510 hardware::Return<V1_0::ErrorStatus> prepareModelFromCache(
511 const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
512 const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
513 const sp<V1_2::IPreparedModelCallback>& callback) {
514 return mLatestDriver->prepareModelFromCache(modelCache, dataCache, token, callback);
515 }
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)516 hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
517 return mLatestDriver->getCapabilities_1_1(_hidl_cb);
518 }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)519 hardware::Return<void> getSupportedOperations_1_1(
520 const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override {
521 return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
522 }
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)523 hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
524 const V1_1::Model& model, V1_1::ExecutionPreference preference,
525 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
526 return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
527 }
getStatus()528 hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)529 hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
530 return mLatestDriver->getCapabilities(_hidl_cb);
531 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)532 hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
533 getSupportedOperations_cb _hidl_cb) override {
534 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
535 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)536 hardware::Return<V1_0::ErrorStatus> prepareModel(
537 const V1_0::Model& model,
538 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
539 return mLatestDriver->prepareModel(model, actualCallback);
540 }
541
542 private:
543 const sp<V1_3::IDevice> mLatestDriver;
544 };
545
546 // Like PartitioningDriver, but implementing 1.1
547 class PartitioningDriverV1_1 : public V1_1::IDevice {
548 public:
PartitioningDriverV1_1(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<V1_3::OperationType> operationTypes={})549 PartitioningDriverV1_1(const char* name, const char* version, V1_3::Capabilities capabilities,
550 uint32_t operationMask,
551 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
552 std::set<V1_3::OperationType> operationTypes = {})
553 : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
554 operationTypes)) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)555 hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
556 return mLatestDriver->getCapabilities_1_1(_hidl_cb);
557 }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)558 hardware::Return<void> getSupportedOperations_1_1(
559 const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override {
560 return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
561 }
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)562 hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
563 const V1_1::Model& model, V1_1::ExecutionPreference preference,
564 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
565 return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
566 }
getStatus()567 hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)568 hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
569 return mLatestDriver->getCapabilities(_hidl_cb);
570 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)571 hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
572 getSupportedOperations_cb _hidl_cb) override {
573 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
574 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)575 hardware::Return<V1_0::ErrorStatus> prepareModel(
576 const V1_0::Model& model,
577 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
578 return mLatestDriver->prepareModel(model, actualCallback);
579 }
580
581 private:
582 const sp<V1_3::IDevice> mLatestDriver;
583 };
584
585 // Like PartitioningDriver, but implementing 1.0
586 class PartitioningDriverV1_0 : public V1_0::IDevice {
587 public:
PartitioningDriverV1_0(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<V1_3::OperationType> operationTypes={})588 PartitioningDriverV1_0(const char* name, const char* version, V1_3::Capabilities capabilities,
589 uint32_t operationMask,
590 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
591 std::set<V1_3::OperationType> operationTypes = {})
592 : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
593 operationTypes)) {}
getCapabilities(getCapabilities_cb _hidl_cb)594 hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
595 return mLatestDriver->getCapabilities(_hidl_cb);
596 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)597 hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
598 getSupportedOperations_cb _hidl_cb) override {
599 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
600 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)601 hardware::Return<V1_0::ErrorStatus> prepareModel(
602 const V1_0::Model& model,
603 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
604 return mLatestDriver->prepareModel(model, actualCallback);
605 }
getStatus()606 hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
607
608 private:
609 const sp<V1_3::IDevice> mLatestDriver;
610 };
611
612 enum class Dimensioned {
613 NO, // either a scalar, or a tensor of either unspecified rank (usually)
614 // or specified rank but with no specified dimensions (where
615 // specifically stated)
616 RANK_1, // tensor of shape { 0 } -- i.e., rank 1, unspecified dimensions
617 RANK_2, // tensor of shape { 0, 0 } -- i.e., rank 2, unspecified dimensions
618 YES_1, // tensor of shape { 1 }
619 YES_2, // tensor of shape { 2 }
620 YES_4, // tensor of shape { 4 }
621 YES = YES_1
622 };
623
dimensions(Dimensioned dimensioned)624 std::vector<uint32_t> dimensions(Dimensioned dimensioned) {
625 switch (dimensioned) {
626 default:
627 EXPECT_TRUE(false) << "Unknown value";
628 FALLTHROUGH_INTENDED;
629 case Dimensioned::NO:
630 return {};
631 case Dimensioned::RANK_1:
632 return {0};
633 case Dimensioned::RANK_2:
634 return {0, 0};
635 case Dimensioned::YES_1:
636 return {1};
637 case Dimensioned::YES_2:
638 return {2};
639 case Dimensioned::YES_4:
640 return {4};
641 }
642 }
643
644 // "dimensioned" must be a fully specified kind
numberOfElements(Dimensioned dimensioned)645 uint32_t numberOfElements(Dimensioned dimensioned) {
646 auto dims = dimensions(dimensioned);
647 uint32_t result = std::reduce(dims.begin(), dims.end(), 1u, std::multiplies<>());
648 CHECK_GT(result, 0u);
649 return result;
650 }
651
toString(Dimensioned dimensioned)652 std::string toString(Dimensioned dimensioned) {
653 switch (dimensioned) {
654 default:
655 return "<Unknown value>";
656 case Dimensioned::NO:
657 return "NO";
658 case Dimensioned::RANK_1:
659 return "RANK_1";
660 case Dimensioned::RANK_2:
661 return "RANK_2";
662 case Dimensioned::YES_1:
663 return "YES_1";
664 case Dimensioned::YES_2:
665 return "YES_2";
666 case Dimensioned::YES_4:
667 return "YES_4";
668 }
669 }
670
671 // This class adds some simple abstractions and utilities on top of
672 // WrapperModel. For example, it provides methods that work in terms of
673 // operation kind (0..7); and because we care about graph topology rather than
674 // details of operand types and values, it greatly simplifies the process of
675 // creating operands.
676 class PartitioningModel : private WrapperModel {
677 public:
678 using WrapperModel::finish;
679 using WrapperModel::getHandle;
680 using WrapperModel::identifyInputsAndOutputs;
681 using WrapperModel::isValid;
682 using WrapperModel::relaxComputationFloat32toFloat16;
683 using WrapperModel::setOperandValue;
684
685 // Create a tensor operand of the specified type, and return the
686 // corresponding operand index.
addIntOperand(Dimensioned dimensioned=Dimensioned::YES)687 uint32_t addIntOperand(Dimensioned dimensioned = Dimensioned::YES) {
688 return addOperand(WrapperType::TENSOR_INT32, dimensioned);
689 }
addIntScalarOperand(std::optional<int> v=std::nullopt)690 uint32_t addIntScalarOperand(std::optional<int> v = std::nullopt) {
691 uint32_t opnd = addOperand(WrapperType::INT32);
692 if (v.has_value()) {
693 setOperandValue(opnd, &v.value());
694 }
695 return opnd;
696 }
addFloatOperand(Dimensioned dimensioned=Dimensioned::YES)697 uint32_t addFloatOperand(Dimensioned dimensioned = Dimensioned::YES) {
698 return addOperand(WrapperType::TENSOR_FLOAT32, dimensioned);
699 }
addQuantOperand(Dimensioned dimensioned=Dimensioned::YES)700 uint32_t addQuantOperand(Dimensioned dimensioned = Dimensioned::YES) {
701 return addOperand(WrapperType::TENSOR_QUANT8_ASYMM, dimensioned);
702 }
addBooleanOperand(Dimensioned dimensioned=Dimensioned::YES)703 uint32_t addBooleanOperand(Dimensioned dimensioned = Dimensioned::YES) {
704 return addOperand(WrapperType::TENSOR_BOOL8, dimensioned);
705 }
addFloatZeroOperand(Dimensioned dimensioned=Dimensioned::YES)706 uint32_t addFloatZeroOperand(Dimensioned dimensioned = Dimensioned::YES) {
707 uint32_t opnd = addFloatOperand(dimensioned);
708 std::vector<float> values(numberOfElements(dimensioned), 0.0f);
709 uint32_t size = values.size() * sizeof(float);
710 // Make sure the values are immediately copied so that it is safe to free the buffer after
711 // the setOperandValue call
712 CHECK_LE(size, ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
713 setOperandValue(opnd, values.data(), size);
714 return opnd;
715 }
716
717 // Create an operand of the specified type, and return the corresponding
718 // operand index.
addOperand(WrapperType wrapperType,Dimensioned dimensioned=Dimensioned::YES)719 uint32_t addOperand(WrapperType wrapperType, Dimensioned dimensioned = Dimensioned::YES) {
720 switch (static_cast<int>(wrapperType)) {
721 case ANEURALNETWORKS_BOOL:
722 case ANEURALNETWORKS_FLOAT16:
723 case ANEURALNETWORKS_FLOAT32:
724 case ANEURALNETWORKS_INT32:
725 case ANEURALNETWORKS_UINT32:
726 case ANEURALNETWORKS_MODEL:
727 case ANEURALNETWORKS_OEM_SCALAR:
728 return addOperand(WrapperOperandType{wrapperType, {}});
729
730 case ANEURALNETWORKS_TENSOR_BOOL8:
731 case ANEURALNETWORKS_TENSOR_FLOAT16:
732 case ANEURALNETWORKS_TENSOR_FLOAT32:
733 case ANEURALNETWORKS_TENSOR_OEM_BYTE:
734 return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned)});
735
736 case ANEURALNETWORKS_TENSOR_INT32:
737 case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
738 case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED:
739 case ANEURALNETWORKS_TENSOR_QUANT8_SYMM:
740 case ANEURALNETWORKS_TENSOR_QUANT16_ASYMM:
741 case ANEURALNETWORKS_TENSOR_QUANT16_SYMM:
742 return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned), 1.0f});
743
744 case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL:
745 return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned),
746 WrapperSymmPerChannelQuantParams({1.0f}, 0)});
747
748 default:
749 ADD_FAILURE() << "Unexpected type " << static_cast<uint32_t>(wrapperType);
750 return ~uint32_t(0);
751 }
752 }
753
754 // Create an operand of the specified operand type, and return the
755 // corresponding operand index.
addOperand(const WrapperOperandType & wrapperOperandType)756 uint32_t addOperand(const WrapperOperandType& wrapperOperandType) {
757 mWrapperOperandType.push_back(wrapperOperandType);
758 return WrapperModel::addOperand(&wrapperOperandType);
759 }
760
761 // Create an operation with any number of inputs and one output, specifying
762 // the operation type (e.g., ANEURALNETWORKS_ADD), the input operand
763 // indexes, and the output type (e.g., WrapperType::TENSOR_FLOAT32).
764 // Returns the output operand index.
addExplicitOperationXTo1(ANeuralNetworksOperationType operationType,const std::vector<uint32_t> & inputs,WrapperType outputType,Dimensioned dimensionedOutput=Dimensioned::YES)765 uint32_t addExplicitOperationXTo1(ANeuralNetworksOperationType operationType,
766 const std::vector<uint32_t>& inputs, WrapperType outputType,
767 Dimensioned dimensionedOutput = Dimensioned::YES) {
768 uint32_t output = addOperand(outputType, dimensionedOutput);
769 addOperation(operationType, inputs, {output});
770 return output;
771 }
772
773 // Create a V1_0 operation with two inputs and one output, specifying the
774 // operation kind (where 0 is the first V1_0 operation) and the input
775 // operand indexes.
776 // Returns the output operand index.
addOperation2To1V1_0(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)777 uint32_t addOperation2To1V1_0(uint32_t operation, const uint32_t input0, const uint32_t input1,
778 Dimensioned dimensionedOutput = Dimensioned::YES) {
779 CHECK_LE(operation, kLastEncodingV1_0 - kFirstEncodingV1_0);
780 return addOperation2To1(operation + kFirstEncodingV1_0, input0, input1, dimensionedOutput);
781 }
782
783 // Create a V1_1 operation with two inputs and one output, specifying the
784 // operation kind (where 0 is the first V1_1 operation) and the input
785 // operand indexes.
786 // Returns the output operand index.
addOperation2To1V1_1(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)787 uint32_t addOperation2To1V1_1(uint32_t operation, const uint32_t input0, const uint32_t input1,
788 Dimensioned dimensionedOutput = Dimensioned::YES) {
789 CHECK_LE(operation, kLastEncodingV1_1 - kFirstEncodingV1_1);
790 return addOperation2To1(operation + kFirstEncodingV1_1, input0, input1, dimensionedOutput);
791 }
792
793 // Create a V1_2 operation with two inputs and one output, specifying the
794 // operation kind (where 0 is the first V1_2 operation) and the input
795 // operand indexes.
796 // Returns the output operand index.
addOperation2To1V1_2(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)797 uint32_t addOperation2To1V1_2(uint32_t operation, const uint32_t input0, const uint32_t input1,
798 Dimensioned dimensionedOutput = Dimensioned::YES) {
799 CHECK_LE(operation, kLastEncodingV1_2 - kFirstEncodingV1_2);
800 return addOperation2To1(operation + kFirstEncodingV1_2, input0, input1, dimensionedOutput);
801 }
802
803 // Create a V1_3 operation with two inputs and one output, specifying the
804 // operation kind (where 0 is the first V1_3 operation) and the input
805 // operand indexes.
806 // Returns the output operand index.
addOperation1To1V1_3(uint32_t operation,const uint32_t input0,Dimensioned dimensionedOutput=Dimensioned::YES)807 uint32_t addOperation1To1V1_3(uint32_t operation, const uint32_t input0,
808 Dimensioned dimensionedOutput = Dimensioned::YES) {
809 CHECK_LE(operation, kLastEncodingV1_3 - kFirstEncodingV1_3);
810 return addOperation1To1(operation + kFirstEncodingV1_3, input0, dimensionedOutput);
811 }
812
813 // Create an OEM operation with one input and one output,
814 // specifying the input operand index. Returns the output operand
815 // index.
addOperationOEM1To1(const uint32_t input,Dimensioned dimensionedOutput=Dimensioned::YES)816 uint32_t addOperationOEM1To1(const uint32_t input,
817 Dimensioned dimensionedOutput = Dimensioned::YES) {
818 uint32_t output = addOperandOfSameType(input, dimensionedOutput);
819 addOperation(ANEURALNETWORKS_OEM_OPERATION, {input}, {output});
820 return output;
821 }
822
823 // Create an IF operation with the given condition operand and two
824 // referenced models for the true and false cases.
addIfOperation(const uint32_t cond,const PartitioningModel & trueModel,const PartitioningModel & falseModel,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)825 void addIfOperation(const uint32_t cond, const PartitioningModel& trueModel,
826 const PartitioningModel& falseModel, const std::vector<uint32_t>& inputs,
827 const std::vector<uint32_t>& outputs) {
828 const uint32_t opndTrue = addRefModelOperand(trueModel);
829 const uint32_t opndFalse = addRefModelOperand(falseModel);
830 std::vector<uint32_t> ifInputs = {cond, opndTrue, opndFalse};
831 ifInputs.insert(ifInputs.end(), inputs.begin(), inputs.end());
832 addOperation(ANEURALNETWORKS_IF, ifInputs, outputs);
833 }
834
835 // Create a WHILE operation with the given condition and body referenced models.
addWhileOperation(const PartitioningModel & condModel,const PartitioningModel & bodyModel,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)836 void addWhileOperation(const PartitioningModel& condModel, const PartitioningModel& bodyModel,
837 const std::vector<uint32_t>& inputs,
838 const std::vector<uint32_t>& outputs) {
839 const uint32_t condOperand = addRefModelOperand(condModel);
840 const uint32_t bodyOperand = addRefModelOperand(bodyModel);
841 std::vector<uint32_t> whileInputs = {condOperand, bodyOperand};
842 whileInputs.insert(whileInputs.end(), inputs.begin(), inputs.end());
843 addOperation(ANEURALNETWORKS_WHILE, whileInputs, outputs);
844 }
845
846 // Run the partitioning algorithm to create an ExecutionPlan.
partitionTheWork(const std::vector<std::shared_ptr<Device>> & devices,ExecutePreference preference,ExecutePriority priority,const OptionalTimePoint & deadline,ExecutionPlan * plan)847 int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
848 ExecutePreference preference, ExecutePriority priority,
849 const OptionalTimePoint& deadline, ExecutionPlan* plan) {
850 return reinterpret_cast<ModelBuilder*>(getHandle())
851 ->partitionTheWork(devices, static_cast<uint32_t>(preference),
852 static_cast<int32_t>(priority), deadline, plan, {});
853 }
854
855 #ifdef VERBOSE
856 // This is a debugging utility function.
dump(const char * name) const857 void dump(const char* name) const {
858 const ModelBuilder* mb = reinterpret_cast<const ModelBuilder*>(getHandle());
859 ::dump(name, mb);
860 }
861 #endif
862
863 private:
864 // Create an operation with two inputs and one output, specifying
865 // the operation kind and the input operand indexes.
866 // Returns the output operand index.
addOperation2To1(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)867 uint32_t addOperation2To1(uint32_t operation, const uint32_t input0, const uint32_t input1,
868 Dimensioned dimensionedOutput = Dimensioned::YES) {
869 auto it = firstEncodingToOperation.lower_bound(operation);
870 CHECK(it != firstEncodingToOperation.end());
871 ANeuralNetworksOperationType type = it->second.first;
872 if (it->second.second) {
873 int32_t fuseCode = operation - it->first;
874 uint32_t input2 = addIntOperand(fuseCode);
875 uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
876 addOperation(type, {input0, input1, input2}, {output});
877 return output;
878 } else {
879 uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
880 addOperation(type, {input0, input1}, {output});
881 return output;
882 }
883 }
884
885 // Create an operation with one inputs and one output, specifying
886 // the operation kind and the input operand indexes.
887 // Returns the output operand index.
addOperation1To1(uint32_t operation,const uint32_t input0,Dimensioned dimensionedOutput=Dimensioned::YES)888 uint32_t addOperation1To1(uint32_t operation, const uint32_t input0,
889 Dimensioned dimensionedOutput = Dimensioned::YES) {
890 auto it = firstEncodingToOperation.lower_bound(operation);
891 CHECK(it != firstEncodingToOperation.end());
892 ANeuralNetworksOperationType type = it->second.first;
893
894 uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
895 addOperation(type, {input0}, {output});
896 return output;
897 }
898
899 // Create a scalar integer operand of the specified value, and
900 // return the corresponding operand index.
addIntOperand(int32_t value)901 uint32_t addIntOperand(int32_t value) {
902 uint32_t operand = addOperand(WrapperType::INT32);
903 setOperandValue(operand, &value, sizeof(value));
904 return operand;
905 }
906
907 // Create an operand from a model for control flow graphs.
addRefModelOperand(const PartitioningModel & model)908 uint32_t addRefModelOperand(const PartitioningModel& model) {
909 const uint32_t index = addOperand(WrapperType::MODEL);
910 WrapperModel::setOperandValueFromModel(index, &model);
911 return index;
912 }
913
914 // Create an operand of the same type as the specified operand,
915 // and return the operand index of the new operand.
916 //
917 // If a tensor, the new operand will have the same rank as the specified
918 // operand. If dimensioned == Dimensioned::NO, then all dimensions of a new
919 // tensor operand will be unspecified. If dimensioned != Dimensioned::NO,
920 // then all dimensions of a new tensor operand will have the implied value
921 // (e.g., YES_1 means each dimension will have the value "1").
addOperandOfSameType(uint32_t operand,Dimensioned dimensioned=Dimensioned::YES)922 uint32_t addOperandOfSameType(uint32_t operand, Dimensioned dimensioned = Dimensioned::YES) {
923 WrapperOperandType type = mWrapperOperandType.at(operand);
924
925 const auto d = dimensions(dimensioned);
926 EXPECT_TRUE(d.size() <= 1);
927 for (auto& dimension : type.dimensions) {
928 dimension = (dimensioned == Dimensioned::NO ? 0 : d[0]);
929 }
930
931 mWrapperOperandType.push_back(type);
932 return WrapperModel::addOperand(&type);
933 }
934
935 // operand index to operand type
936 std::vector<WrapperOperandType> mWrapperOperandType;
937 };
938
939 // This class adds some utilities on top of WrapperCompilation.
940 class PartitioningCompilation : public WrapperCompilation {
941 public:
PartitioningCompilation(const PartitioningModel * model,const std::vector<std::shared_ptr<Device>> & devices)942 PartitioningCompilation(const PartitioningModel* model,
943 const std::vector<std::shared_ptr<Device>>& devices) {
944 ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
945 CompilationBuilder* c = nullptr;
946 int result = m->createCompilation(&c, devices);
947 EXPECT_EQ(result, 0);
948 mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
949 }
950
setPartitioning(uint32_t partitioning)951 Result setPartitioning(uint32_t partitioning) {
952 return static_cast<Result>(builder()->forTest_setPartitioning(partitioning));
953 }
954
955 // Simulate recoverable partitioning failure.
failPartitioning()956 Result failPartitioning() {
957 return static_cast<Result>(
958 builder()->forTest_failPartitioning(static_cast<int>(Result::OP_FAILED)));
959 }
960
961 using WrapperCompilation::finish;
962
getExecutionPlan() const963 const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); }
964
965 private:
builder()966 CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); }
967
builder() const968 const CompilationBuilder* builder() const {
969 return reinterpret_cast<const CompilationBuilder*>(getHandle());
970 }
971 };
972
973 #ifdef VERBOSE
974 #define RETURN_TRUE() \
975 { \
976 std::cerr << "returning true from " << __LINE__ << std::endl; \
977 return true; \
978 }
979 #else
980 #define RETURN_TRUE() \
981 { return true; }
982 #endif
983 #ifdef VERBOSE
984 #define RETURN_FALSE(MESSAGE) \
985 { \
986 std::cerr << "returning false from " << __LINE__ MESSAGE << std::endl; \
987 return false; \
988 }
989 #else
990 #define RETURN_FALSE(MESSAGE) \
991 { return false; }
992 #endif
993
994 class PartitioningTest : public ::testing::Test {
995 protected:
996 using DynamicTemporariesType = decltype(ExecutionPlan().forTest_flatGetDynamicTemporaries());
997 using RemapVectorType = ExecutionStep::RemapVectorType;
998 using StepModelOutputSetType = ExecutionStep::StepModelOutputSetType;
999
1000 // Used for PartitioningTest::checkExecutionPlanSteps.
1001 static constexpr const char* kIfStep = "IF";
1002 static constexpr const char* kWhileStep = "WHILE";
1003 static constexpr const char* kGotoStep = "GOTO";
1004
SetUp()1005 virtual void SetUp() {}
1006
1007 // From a vector of DeviceSpecification, create a vector of
1008 // Devices.
1009 struct DeviceSpecification {
DeviceSpecification__anonb50294d30111::PartitioningTest::DeviceSpecification1010 DeviceSpecification(const std::string& name, const V1_3::Capabilities& capabilities,
1011 uint32_t operationMask,
1012 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo)
1013 : mName(name),
1014 mVersionString(kVersionString),
1015 mCapabilities(capabilities),
1016 mOperationMask(operationMask),
1017 mOEM(oem) {}
DeviceSpecification__anonb50294d30111::PartitioningTest::DeviceSpecification1018 DeviceSpecification(const std::string& name, float perf, uint32_t operationMask,
1019 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1020 HalVersion halVersion = HalVersion::LATEST,
1021 std::set<V1_3::OperationType> operationTypes = {})
1022 : DeviceSpecification(name, perf, perf, operationMask, oem, halVersion,
1023 operationTypes) {}
DeviceSpecification__anonb50294d30111::PartitioningTest::DeviceSpecification1024 DeviceSpecification(const std::string& name, float perf, float perfRelaxed,
1025 uint32_t operationMask,
1026 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1027 HalVersion halVersion = HalVersion::LATEST,
1028 std::set<V1_3::OperationType> operationTypes = {})
1029 : DeviceSpecification(name, kVersionString, perf, perfRelaxed, operationMask, oem,
1030 halVersion, operationTypes) {}
DeviceSpecification__anonb50294d30111::PartitioningTest::DeviceSpecification1031 DeviceSpecification(const std::string& name, const std::string& version, float perf,
1032 uint32_t operationMask,
1033 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1034 HalVersion halVersion = HalVersion::LATEST,
1035 std::set<V1_3::OperationType> operationTypes = {})
1036 : DeviceSpecification(name, version, perf, perf, operationMask, oem, halVersion,
1037 operationTypes) {}
DeviceSpecification__anonb50294d30111::PartitioningTest::DeviceSpecification1038 DeviceSpecification(const std::string& name, const std::string& version, float perf,
1039 float perfRelaxed, uint32_t operationMask,
1040 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1041 HalVersion halVersion = HalVersion::LATEST,
1042 std::set<V1_3::OperationType> operationTypes = {})
1043 : mName(name),
1044 mVersionString(version),
1045 mHalVersion(halVersion),
1046 mOperationMask(operationMask),
1047 mOEM(oem),
1048 mOperationTypes(std::move(operationTypes)) {
1049 V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
1050 V1_0::PerformanceInfo perfRelaxedInfo = {.execTime = perfRelaxed,
1051 .powerUsage = perfRelaxed};
1052 mCapabilities = {
1053 .relaxedFloat32toFloat16PerformanceScalar = perfRelaxedInfo,
1054 .relaxedFloat32toFloat16PerformanceTensor = perfRelaxedInfo,
1055 .operandPerformance =
1056 ::android::nn::nonExtensionOperandPerformance<HalVersion::V1_3>(
1057 perfInfo),
1058 .ifPerformance = perfInfo,
1059 .whilePerformance = perfInfo};
1060 }
DeviceSpecification__anonb50294d30111::PartitioningTest::DeviceSpecification1061 DeviceSpecification(const std::string& name, float perf, HalVersion halVersion,
1062 uint32_t operationMaskV1_0, uint32_t operationMaskV1_1 = 0,
1063 uint32_t operationMaskV1_2 = 0, uint32_t operationMaskV1_3 = 0)
1064 : DeviceSpecification(
1065 name, perf, perf,
1066 makeOperationMask(halVersion, operationMaskV1_0, operationMaskV1_1,
1067 operationMaskV1_2, operationMaskV1_3)) {
1068 mHalVersion = halVersion;
1069 }
1070
1071 std::string mName;
1072 std::string mVersionString;
1073 V1_3::Capabilities mCapabilities;
1074 HalVersion mHalVersion = HalVersion::LATEST;
1075 uint32_t mOperationMask;
1076 PartitioningDriver::OEM mOEM = PartitioningDriver::OEMNo;
1077 std::set<V1_3::OperationType> mOperationTypes;
1078
1079 static constexpr char kVersionString[] = "JUST_AN_EXAMPLE";
1080
1081 private:
1082 // This function takes three operation masks aligned at the low-order
1083 // bit -- one mask each for V1_0, V1_1, and V1_2 -- and produces a single
1084 // composite operation mask, formed by shifting each of the input
1085 // operation masks appropriately and ORing the results together.
1086 //
1087 // For convenience, any bits of an input mask that are too high order
1088 // for that mask are discarded -- this allows ~0 to be a legal input
1089 // mask.
1090 //
1091 // For the sake of example, assume that each low order mask is 4 bits
1092 // wide, and take some artistic license to write literals in binary.
1093 // Then:
1094 //
1095 // assert(makeOperationMask(HalVersion::V1_2, 0b0110, 0b1001, 0b0101) ==
1096 // 0b 0101 1001 0110);
1097 //
1098 // This is used by a DeviceSpecification constructor to build a mask of
1099 // operations to be supported by the device.
makeOperationMask__anonb50294d30111::PartitioningTest::DeviceSpecification1100 static uint32_t makeOperationMask(HalVersion halVersion, uint32_t operationMaskV1_0,
1101 uint32_t operationMaskV1_1, uint32_t operationMaskV1_2,
1102 uint32_t operationMaskV1_3) {
1103 if (halVersion < HalVersion::V1_3) {
1104 CHECK(!operationMaskV1_3);
1105 }
1106 if (halVersion < HalVersion::V1_2) {
1107 CHECK(!operationMaskV1_2);
1108 }
1109 if (halVersion < HalVersion::V1_1) {
1110 CHECK(!operationMaskV1_1);
1111 }
1112 auto maskOfWidth = [](uint32_t width) -> uint32_t { return (1U << width) - 1; };
1113 static const uint32_t kOperationMaskV1_0 =
1114 maskOfWidth(kLastEncodingV1_0 - kFirstEncodingV1_0 + 1);
1115 static const uint32_t kOperationMaskV1_1 =
1116 maskOfWidth(kLastEncodingV1_1 - kFirstEncodingV1_1 + 1);
1117 static const uint32_t kOperationMaskV1_2 =
1118 maskOfWidth(kLastEncodingV1_2 - kFirstEncodingV1_2 + 1);
1119 static const uint32_t kOperationMaskV1_3 =
1120 maskOfWidth(kLastEncodingV1_3 - kFirstEncodingV1_3 + 1);
1121 return ((operationMaskV1_0 & kOperationMaskV1_0) << kFirstEncodingV1_0) |
1122 ((operationMaskV1_1 & kOperationMaskV1_1) << kFirstEncodingV1_1) |
1123 ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2) |
1124 ((operationMaskV1_3 & kOperationMaskV1_3) << kFirstEncodingV1_3);
1125 }
1126 };
makeDevices(std::vector<DeviceSpecification> specifications)1127 static std::vector<std::shared_ptr<Device>> makeDevices(
1128 std::vector<DeviceSpecification> specifications) {
1129 std::vector<std::shared_ptr<Device>> devices;
1130 for (const auto& specification : specifications) {
1131 SharedDevice device = nullptr;
1132 switch (specification.mHalVersion) {
1133 case HalVersion::V1_3:
1134 device = android::nn::makeSharedDevice(
1135 specification.mName,
1136 new PartitioningDriver(specification.mName.c_str(),
1137 specification.mVersionString.c_str(),
1138 specification.mCapabilities,
1139 specification.mOperationMask, specification.mOEM,
1140 specification.mOperationTypes));
1141 break;
1142 case HalVersion::V1_2:
1143 device = android::nn::makeSharedDevice(
1144 specification.mName,
1145 new PartitioningDriverV1_2(
1146 specification.mName.c_str(),
1147 specification.mVersionString.c_str(),
1148 specification.mCapabilities, specification.mOperationMask,
1149 specification.mOEM, specification.mOperationTypes));
1150 break;
1151 case HalVersion::V1_1:
1152 device = android::nn::makeSharedDevice(
1153 specification.mName,
1154 new PartitioningDriverV1_1(
1155 specification.mName.c_str(),
1156 specification.mVersionString.c_str(),
1157 specification.mCapabilities, specification.mOperationMask,
1158 specification.mOEM, specification.mOperationTypes));
1159 break;
1160 case HalVersion::V1_0:
1161 device = android::nn::makeSharedDevice(
1162 specification.mName,
1163 new PartitioningDriverV1_0(
1164 specification.mName.c_str(),
1165 specification.mVersionString.c_str(),
1166 specification.mCapabilities, specification.mOperationMask,
1167 specification.mOEM, specification.mOperationTypes));
1168 break;
1169 default:
1170 ADD_FAILURE() << "Unexpected";
1171 }
1172 auto driverDevice = DeviceManager::forTest_makeDriverDevice(device);
1173 devices.push_back(std::move(driverDevice));
1174 }
1175 devices.push_back(DeviceManager::getCpuDevice());
1176 return devices;
1177 }
1178
stepsToString(const std::vector<std::string> & steps)1179 static std::string stepsToString(const std::vector<std::string>& steps) {
1180 std::stringstream ss;
1181 ss << "[ ";
1182 for (const auto& step : steps) {
1183 ss << step << " ";
1184 }
1185 ss << "]";
1186 return ss.str();
1187 }
1188
1189 // Checks the type of each logical step in an execution plan.
1190 // Each entry of "expected" is either: kIfStep for IfStep, kWhileStep for WhileStep,
1191 // kGotoStep for GotoStep, or the device name for ExecutionStep.
checkExecutionPlanSteps(const ExecutionPlan & plan,const std::vector<std::string> & expected)1192 void checkExecutionPlanSteps(const ExecutionPlan& plan,
1193 const std::vector<std::string>& expected) {
1194 ASSERT_GT(expected.size(), 0u);
1195
1196 std::vector<std::string> actual;
1197 if (expected.size() == 1) {
1198 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1199 actual.emplace_back(plan.forTest_simpleGetDevice()->getName());
1200 } else {
1201 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1202 const auto& steps = plan.forTest_compoundGetSteps();
1203 for (const auto& step : steps) {
1204 if (step->isIf()) {
1205 actual.emplace_back(kIfStep);
1206 } else if (step->isWhile()) {
1207 actual.emplace_back(kWhileStep);
1208 } else if (step->isGoto()) {
1209 actual.emplace_back(kGotoStep);
1210 } else if (step->isExecution()) {
1211 actual.emplace_back(step->executionStep()->getDevice()->getName());
1212 } else {
1213 ASSERT_FALSE(true) << "Unknown LogicalStep";
1214 }
1215 }
1216 }
1217 ASSERT_TRUE(actual == expected)
1218 << "expected: " << stepsToString(expected) << ", actual: " << stepsToString(actual);
1219 }
1220
1221 /*-- Graph comparision ----------------------------------------------------------------*/
1222
1223 // An operand with certain values for its lifetime does not have a
1224 // defining operation in the graph. For the purposes of the graph
1225 // comparison algorithm, we encode the "defining operation" index of
1226 // such an operand as follows:
1227 // - NO_VALUE kPseudoDefiningOperationNoValue
1228 // - SUBGRAPH_INPUT kPseudoDefiningOperationModelInput0 + (position in list of inputs)
1229 // - CONSTANT_COPY kPseudoDefiningOperationConstantCopy0 + (constant value)
1230 // Note: For the graphs we build in this test, we
1231 // only expect to see 4-byte constants within
1232 // a very restricted range, so we only make
1233 // room for such constants in our encoding
1234 // space.
1235 // We do not expect to see CONSTANT_REFERENCE, and so we do not handle
1236 // it.
1237 //
1238 // The encoding is intended to be relatively human readable; it is not
1239 // designed to represent some optimal balance of ranges for the items
1240 // within its scope (actual operations, inputs, constants).
1241
1242 enum PseudoDefiningOperationEncodings : uint32_t {
1243 kPseudoDefiningOperationModelInput0 = 0x80000000U,
1244 kPseudoDefiningOperationConstantCopy0 = 0x90000000U,
1245 kPseudoDefiningOperationNoValue = 0xeeeeeeeeU,
1246
1247 // lowest value for special encoding
1248 kPseudoDefiningOperationBase = 0x80000000U,
1249
1250 // range of encoded input or constant
1251 kPseudoDefiningOperationRange = 0x10000000U,
1252 };
1253
1254 // Build a map from operand to defining operation.
1255 // TODO: Replace map with vector?
buildDefinitionMap(const ModelBuilder * model,std::map<uint32_t,uint32_t> * defMap)1256 void buildDefinitionMap(const ModelBuilder* model, std::map<uint32_t, uint32_t>* defMap) {
1257 // actual definitions
1258 ASSERT_LT(model->operationCount(), kPseudoDefiningOperationBase);
1259 for (uint32_t i = 0, e = model->operationCount(); i < e; i++) {
1260 const V1_3::Operation& operation = android::nn::convertToV1_3(model->getOperation(i));
1261 for (uint32_t output : operation.outputs) {
1262 (*defMap)[output] = i;
1263 }
1264 }
1265 // inputs
1266 ASSERT_LT(model->inputCount(), kPseudoDefiningOperationRange);
1267 for (uint32_t i = 0, e = model->inputCount(); i < e; i++) {
1268 (*defMap)[model->getInputOperandIndex(i)] = kPseudoDefiningOperationModelInput0 + i;
1269 }
1270 // look for NO_VALUE and CONSTANT_COPY
1271 for (uint32_t i = 0, e = model->operandCount(); i < e; i++) {
1272 const V1_3::Operand& operand = android::nn::convertToV1_3(model->getOperand(i));
1273 switch (operand.lifetime) {
1274 case V1_3::OperandLifeTime::NO_VALUE:
1275 (*defMap)[i] = kPseudoDefiningOperationNoValue;
1276 break;
1277 case V1_3::OperandLifeTime::CONSTANT_COPY: {
1278 ASSERT_EQ(operand.location.length, sizeof(uint32_t));
1279 uint32_t value;
1280 memcpy(&value, model->getPointerToOperandValue(operand.location.offset),
1281 sizeof(uint32_t));
1282 ASSERT_LT(value, kPseudoDefiningOperationNoValue);
1283 (*defMap)[i] = kPseudoDefiningOperationConstantCopy0 + value;
1284 break;
1285 }
1286 case V1_3::OperandLifeTime::TEMPORARY_VARIABLE:
1287 case V1_3::OperandLifeTime::SUBGRAPH_INPUT:
1288 case V1_3::OperandLifeTime::SUBGRAPH_OUTPUT:
1289 // already handled
1290 break;
1291 default:
1292 FAIL();
1293 break;
1294 }
1295 }
1296 // validity check
1297 ASSERT_EQ(model->operandCount(), defMap->size());
1298 }
1299
1300 #ifdef VERBOSE
dump(const char * name,const std::map<uint32_t,uint32_t> * aMap)1301 void dump(const char* name, const std::map<uint32_t, uint32_t>* aMap) {
1302 auto writeNum = [](uint32_t num) {
1303 if (num >= kPseudoDefiningOperationBase) {
1304 std::cout << "0x" << std::hex << num << std::dec;
1305 } else {
1306 std::cout << num;
1307 }
1308 };
1309
1310 std::cout << name << ": { ";
1311 bool gotOne = false;
1312 for (const auto& entry : *aMap) {
1313 if (gotOne) {
1314 std::cout << ", ";
1315 } else {
1316 gotOne = true;
1317 }
1318 std::cout << "(";
1319 writeNum(entry.first);
1320 std::cout << ", ";
1321 writeNum(entry.second);
1322 std::cout << ")";
1323 }
1324 std::cout << " }" << std::endl;
1325 }
1326 #endif
1327
compare(const Operand & operandA,const Operand & operandB)1328 bool compare(const Operand& operandA, const Operand& operandB) {
1329 if (operandA.type != operandB.type || operandA.dimensions != operandB.dimensions ||
1330 operandA.scale != operandB.scale || operandA.zeroPoint != operandB.zeroPoint) {
1331 return false;
1332 }
1333 return true;
1334 }
1335
1336 // Compare two graphs. We ignore operand and operation indexes (i.e.,
1337 // two nodes can be the same even if they are numbered differently)
1338 // but we also ignore semantics (e.g., even if an operation kind is
1339 // such that the operand is commutative, we still pay attention to the
1340 // order of its input operands).
1341 //
1342 // The comparison algorithm works by walking modelA from outputs
1343 // towards inputs, along the edge from each operand to its
1344 // defining operation, and then along the edges to the operation's
1345 // input operands. At each step along the way, we try to match up
1346 // operands and operations from modelA with equivalent operands
1347 // and operations from modelB.
1348 //
1349 // We start by assuming that modelA's outputs and modelB's outputs
1350 // match positionally (e.g., modelA's first output operand is
1351 // equivalent to modelB's first output operand). Once we've
1352 // discovered two equivalent operands (such as those outputs), we
1353 // place them in a work queue. We repeatedly pull operands off
1354 // the queue and compare their defining operations and those
1355 // operations' input operands, to discover more pairs of
1356 // equivalent operands. If we ever find operations that do not
1357 // match (e.g., because operation kind differs), or operands that
1358 // do not match (e.g., because operand type differs); or if we
1359 // ever find a conflict (we've already decided that operand A's
1360 // equivalent operand is B0, but it looks like we need its
1361 // equivalent operand to be B1); then the graphs compare unequal.
1362 // Otherwise, we'll eventually exhaust the work queue, and
1363 // conclude that the graphs compare equal.
1364 //
1365 // As a side effect of the comparison, we produce a map
1366 // *inputsAndOutputsBToA that maps from each of the model input and output
1367 // operand numbers of modelB to the corresponding operand numbers of modelA.
1368 // If the comparison returns false, the contents of the map are undefined.
compare(const ModelBuilder * modelA,const ModelBuilder * modelB,std::map<uint32_t,uint32_t> * inputsAndOutputsBToA)1369 bool compare(const ModelBuilder* modelA, const ModelBuilder* modelB,
1370 std::map<uint32_t, uint32_t>* inputsAndOutputsBToA) {
1371 CHECK(inputsAndOutputsBToA != nullptr);
1372 EXPECT_TRUE(inputsAndOutputsBToA->empty());
1373
1374 #ifdef VERBOSE
1375 ::dump("compare(A)", modelA);
1376 ::dump("compare(B)", modelB);
1377 #endif
1378
1379 if (modelA->operandCount() != modelB->operandCount() ||
1380 modelA->operationCount() != modelB->operationCount() ||
1381 modelA->inputCount() != modelB->inputCount() ||
1382 modelA->outputCount() != modelB->outputCount()) {
1383 RETURN_FALSE();
1384 }
1385
1386 // Maps from operand index to index of defining operation.
1387 std::map<uint32_t, uint32_t> defsA, defsB;
1388 buildDefinitionMap(modelA, &defsA);
1389 buildDefinitionMap(modelB, &defsB);
1390 if (HasFatalFailure()) return false;
1391
1392 // Maps from operand index in modelA to equivalent operand index
1393 // in modelB; and from operation index in modelA to equivalent
1394 // operation index in modelB.
1395 std::map<uint32_t, uint32_t> equivalentOperandsAToB;
1396 std::map<uint32_t, uint32_t> equivalentOperationsAToB;
1397
1398 // Queue of operand indexes from modelA, each of whose defining
1399 // operations are to be checked for equivalence with modelB.
1400 std::queue<uint32_t> workQueueOperandsA;
1401
1402 // Seed operand equivalence map and work queue from model outputs.
1403 for (uint32_t i = 0, e = modelA->outputCount(); i < e; i++) {
1404 uint32_t outputA = modelA->getOutputOperandIndex(i);
1405 uint32_t outputB = modelB->getOutputOperandIndex(i);
1406 if (!compare(modelA->getOperand(outputA), modelB->getOperand(outputB))) {
1407 #ifdef VERBOSE
1408 std::cout << "modelA.output[" << i << "] = operand[" << outputA
1409 << "] = " << toString(modelA->getOperand(outputA)) << std::endl;
1410 std::cout << "modelB.output[" << i << "] = operand[" << outputB
1411 << "] = " << toString(modelB->getOperand(outputB)) << std::endl;
1412 #endif
1413 RETURN_FALSE();
1414 }
1415 equivalentOperandsAToB[outputA] = outputB;
1416 workQueueOperandsA.push(outputA);
1417 }
1418
1419 #ifdef VERBOSE
1420 dump("defsA", &defsA);
1421 dump("defsB", &defsB);
1422 #endif
1423
1424 // Process the queue.
1425 uint32_t pseudoDefinitionCount = 0;
1426 while (!workQueueOperandsA.empty()) {
1427 #ifdef VERBOSE
1428 dump("equivalentOperandsAToB", &equivalentOperandsAToB);
1429 dump("equivalentOperationsAToB", &equivalentOperationsAToB);
1430 #endif
1431 uint32_t operandIndexA = workQueueOperandsA.front();
1432 #ifdef VERBOSE
1433 std::cout << "operandIndexA: " << operandIndexA << std::endl;
1434 #endif
1435 workQueueOperandsA.pop();
1436 uint32_t operandIndexB = equivalentOperandsAToB.at(operandIndexA);
1437
1438 uint32_t operationIndexA = defsA.at(operandIndexA);
1439 uint32_t operationIndexB = defsB.at(operandIndexB);
1440 auto it = equivalentOperationsAToB.find(operationIndexA);
1441 if (it != equivalentOperationsAToB.end()) {
1442 if (it->second != operationIndexB) {
1443 RETURN_FALSE();
1444 }
1445 continue;
1446 }
1447
1448 // We haven't identified an equivalent operation for
1449 // operationIndexA.
1450
1451 if ((operationIndexA >= kPseudoDefiningOperationBase) !=
1452 (operationIndexB >= kPseudoDefiningOperationBase)) {
1453 RETURN_FALSE();
1454 }
1455 // Either both operands have pseudo-definitions, or neither
1456 // does.
1457 if (operationIndexA >= kPseudoDefiningOperationBase) {
1458 // Both operands have pseudo-definitions.
1459 if (operationIndexA != operationIndexB) {
1460 RETURN_FALSE();
1461 }
1462 equivalentOperationsAToB[operationIndexA] = operationIndexB;
1463 ++pseudoDefinitionCount;
1464 continue;
1465 }
1466
1467 // If we get here, neither operation A nor operation B is a
1468 // pseudo-definition.
1469
1470 const Operation& operationA = modelA->getOperation(operationIndexA);
1471 const Operation& operationB = modelB->getOperation(operationIndexB);
1472 if (operationA.type != operationB.type ||
1473 operationA.inputs.size() != operationB.inputs.size() ||
1474 operationA.outputs.size() != operationB.outputs.size()) {
1475 RETURN_FALSE();
1476 }
1477 equivalentOperationsAToB[operationIndexA] = operationIndexB;
1478 for (uint32_t i = 0, e = operationA.inputs.size(); i < e; i++) {
1479 uint32_t inputA = operationA.inputs[i];
1480 uint32_t inputB = operationB.inputs[i];
1481 auto it = equivalentOperandsAToB.find(inputA);
1482 if (it != equivalentOperandsAToB.end()) {
1483 if (it->second != inputB) {
1484 RETURN_FALSE();
1485 }
1486 continue;
1487 }
1488 // We haven't identified an equivalent operand for inputA.
1489 if (!compare(modelA->getOperand(inputA), modelB->getOperand(inputB))) {
1490 #ifdef VERBOSE
1491 std::cout << "modelA.input[" << i << "] = operand[" << inputA
1492 << "] = " << toString(modelA->getOperand(inputA)) << std::endl;
1493 std::cout << "modelB.input[" << i << "] = operand[" << inputB
1494 << "] = " << toString(modelB->getOperand(inputB)) << std::endl;
1495 #endif
1496 RETURN_FALSE();
1497 }
1498 equivalentOperandsAToB[inputA] = inputB;
1499 workQueueOperandsA.push(inputA);
1500 }
1501 }
1502
1503 // Validity check
1504 if (modelA->operandCount() != defsA.size() || modelA->operandCount() != defsB.size() ||
1505 modelA->operandCount() != equivalentOperandsAToB.size() ||
1506 modelA->operationCount() + pseudoDefinitionCount != equivalentOperationsAToB.size()) {
1507 RETURN_FALSE();
1508 }
1509
1510 // Build *inputsAndOutputsBToA
1511 for (uint32_t aInputIndex : modelA->getInputOperandIndexes()) {
1512 (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aInputIndex)] = aInputIndex;
1513 }
1514 for (uint32_t aOutputIndex : modelA->getOutputOperandIndexes()) {
1515 (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aOutputIndex)] = aOutputIndex;
1516 }
1517
1518 RETURN_TRUE();
1519 }
1520
1521 /*-------------------------------------------------------------------------------------*/
1522
1523 // As a side effect of the comparison, we produce a map
1524 // *inputsAndOutputsModelToStep that maps from each of the model input and
1525 // output operand numbers of "model" to the corresponding operand numbers of
1526 // the step model from "step". If the comparison returns false, the contents
1527 // of the map are undefined.
compare(const ExecutionStep * step,const PartitioningModel * model,std::shared_ptr<Device> device,std::map<uint32_t,uint32_t> * inputsAndOutputsModelToStep)1528 bool compare(const ExecutionStep* step, const PartitioningModel* model,
1529 std::shared_ptr<Device> device,
1530 std::map<uint32_t, uint32_t>* inputsAndOutputsModelToStep) {
1531 return (step->getDevice() == device) &&
1532 compare(step->getStepModel(),
1533 reinterpret_cast<const ModelBuilder*>(model->getHandle()),
1534 inputsAndOutputsModelToStep);
1535 }
1536
compare(const std::shared_ptr<LogicalStep> logicalStep,const PartitioningModel * model,std::shared_ptr<Device> device,const RemapVectorType & modelInputs,const RemapVectorType & modelOutputs,const RemapVectorType & tempsAsStepModelInputs,const StepModelOutputSetType & tempsAsStepModelOutputs,const RemapVectorType & outputsAsStepModelInputs,const std::set<uint32_t> & modelOutputsThatAreDownstreamInputs)1537 void compare(const std::shared_ptr<LogicalStep> logicalStep, const PartitioningModel* model,
1538 std::shared_ptr<Device> device, const RemapVectorType& modelInputs,
1539 const RemapVectorType& modelOutputs, const RemapVectorType& tempsAsStepModelInputs,
1540 const StepModelOutputSetType& tempsAsStepModelOutputs,
1541 const RemapVectorType& outputsAsStepModelInputs,
1542 const std::set<uint32_t>& modelOutputsThatAreDownstreamInputs) {
1543 ASSERT_TRUE(logicalStep->isExecution());
1544 const ExecutionStep* step = logicalStep->executionStep();
1545 std::map<uint32_t, uint32_t> inputsAndOutputsModelToStep;
1546 ASSERT_NO_FATAL_FAILURE(
1547 ASSERT_TRUE(compare(step, model, device, &inputsAndOutputsModelToStep)));
1548 ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelInputs(),
1549 modelInputs));
1550 ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelOutputs(),
1551 modelOutputs));
1552 ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep,
1553 step->getTempsAsStepModelInputs(), tempsAsStepModelInputs));
1554 ASSERT_TRUE(compareStepModelOutputSets(inputsAndOutputsModelToStep,
1555 step->getTempsAsStepModelOutputs(),
1556 tempsAsStepModelOutputs));
1557 ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep,
1558 step->getOutputsAsStepModelInputs(),
1559 outputsAsStepModelInputs));
1560 ASSERT_TRUE(modelOutputsThatAreDownstreamInputs ==
1561 step->getModelOutputsThatAreDownstreamInputs());
1562 }
1563
1564 private:
compareRemapVectors(const std::map<uint32_t,uint32_t> & inputsAndOutputsModelToStep,const RemapVectorType & step,RemapVectorType model)1565 static bool compareRemapVectors(const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep,
1566 const RemapVectorType& step, RemapVectorType model) {
1567 std::transform(model.begin(), model.end(), model.begin(),
1568 [&inputsAndOutputsModelToStep](const RemapVectorType::value_type& val) {
1569 return std::make_pair(val.first,
1570 inputsAndOutputsModelToStep.at(val.second));
1571 });
1572 return step == model;
1573 }
1574
compareStepModelOutputSets(const std::map<uint32_t,uint32_t> & inputsAndOutputsModelToStep,const StepModelOutputSetType & step,const StepModelOutputSetType & model)1575 static bool compareStepModelOutputSets(
1576 const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep,
1577 const StepModelOutputSetType& step, const StepModelOutputSetType& model) {
1578 StepModelOutputSetType modelTransformed;
1579 std::transform(
1580 model.begin(), model.end(), std::inserter(modelTransformed, modelTransformed.end()),
1581 [&inputsAndOutputsModelToStep](const StepModelOutputSetType::value_type& val) {
1582 return std::make_pair(val.first, inputsAndOutputsModelToStep.at(val.second));
1583 });
1584 return step == modelTransformed;
1585 }
1586 };
1587
TEST_F(PartitioningTest,SimpleModel)1588 TEST_F(PartitioningTest, SimpleModel) {
1589 PartitioningModel model;
1590 uint32_t opnd0 = model.addFloatOperand();
1591 uint32_t opnd1 = model.addFloatOperand();
1592 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1593 uint32_t opnd3 = model.addFloatOperand();
1594 uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3);
1595 model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
1596 model.finish();
1597 ASSERT_TRUE(model.isValid());
1598
1599 // Simple partition (two devices are each capable of everything, one is the best).
1600 // No need to compare the original model to the model from the plan -- we
1601 // didn't actually do any partitioning.
1602 const auto devicesA = makeDevices({{"bad", 0.9, ~0U}, {"good", 0.5, ~0U}});
1603 ExecutionPlan planA;
1604 ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
1605 ExecutePriority::DEFAULT, {}, &planA),
1606 ANEURALNETWORKS_NO_ERROR);
1607 EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries().empty());
1608 ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1609 ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
1610 ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "good");
1611
1612 // Simple partition (two devices are each capable of everything, none better than CPU).
1613 // No need to compare the original model to the model from the plan -- we
1614 // didn't actually do any partitioning.
1615 const auto devicesC = makeDevices({{"bad", 1.1, ~0U}, {"bad2", 1.0, ~0U}});
1616 ExecutionPlan planC;
1617 ASSERT_EQ(model.partitionTheWork(devicesC, ExecutePreference::PREFER_LOW_POWER,
1618 ExecutePriority::DEFAULT, {}, &planC),
1619 ANEURALNETWORKS_NO_ERROR);
1620 EXPECT_TRUE(planC.forTest_flatGetDynamicTemporaries().empty());
1621 ASSERT_EQ(planC.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1622 ASSERT_EQ(planC.forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
1623
1624 // Compound partition (two devices, each is capable of one of the
1625 // two operations). We could do more extensive checking here --
1626 // for example, verify that each step within the plan has the
1627 // correct (model and step model)x(inputs and outputs).
1628 const auto devicesB = makeDevices({{"0", 0.9, 1 << 0}, {"1", 0.5, 1 << 1}});
1629 ExecutionPlan planB;
1630 ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
1631 ExecutePriority::DEFAULT, {}, &planB),
1632 ANEURALNETWORKS_NO_ERROR);
1633 EXPECT_TRUE(planB.forTest_flatGetDynamicTemporaries().empty());
1634 ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1635 const auto& stepsB = planB.forTest_compoundGetSteps();
1636 ASSERT_EQ(stepsB.size(), size_t(2));
1637 {
1638 // Build a model to compare against the step model from stepsB[0].
1639 PartitioningModel modelB0;
1640 uint32_t b0Opnd0 = modelB0.addFloatOperand();
1641 uint32_t b0Opnd1 = modelB0.addFloatOperand();
1642 uint32_t b0Opnd2 = modelB0.addOperation2To1V1_0(0, b0Opnd0, b0Opnd1);
1643 modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2});
1644 modelB0.finish();
1645 ASSERT_TRUE(modelB0.isValid());
1646
1647 ASSERT_NO_FATAL_FAILURE(
1648 compare(stepsB[0], &modelB0, devicesB[0],
1649 RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}}, // modelInputs
1650 RemapVectorType{}, // modelOutputs
1651 RemapVectorType{}, // tempsAsStepModelInputs
1652 StepModelOutputSetType{{opnd2, b0Opnd2}}, // tempsAsStepModelOutputs
1653 RemapVectorType{}, // outputsAsStepModelInputs
1654 {})); // modelOutputsThatAreDownstreamInputs
1655 }
1656 {
1657 // Build a model to compare against the step model from stepsB[1].
1658 PartitioningModel modelB1;
1659 uint32_t b1Opnd2 = modelB1.addFloatOperand();
1660 uint32_t b1Opnd3 = modelB1.addFloatOperand();
1661 uint32_t b1Opnd4 = modelB1.addOperation2To1V1_0(1, b1Opnd2, b1Opnd3);
1662 // Note: In the partitioning algorithm, step model inputs follow
1663 // model inputs. In the original model "model", opnd2 is not
1664 // an input; so in the step model "modelB1", the corresponding
1665 // input b1Opnd2 is a step model input, and must follow the
1666 // model input b1Opnd3.
1667 modelB1.identifyInputsAndOutputs({b1Opnd3, b1Opnd2}, {b1Opnd4});
1668 modelB1.finish();
1669 ASSERT_TRUE(modelB1.isValid());
1670
1671 ASSERT_NO_FATAL_FAILURE(compare(
1672 stepsB[1], &modelB1, devicesB[1], RemapVectorType{{opnd3, b1Opnd3}}, // modelInputs
1673 RemapVectorType{{opnd4, b1Opnd4}}, // modelOutputs
1674 RemapVectorType{{opnd2, b1Opnd2}}, // tempsAsStepModelInputs
1675 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1676 RemapVectorType{}, // outputsAsStepModelInputs
1677 {})); // modelOutputsThatAreDownstreamInputs
1678 }
1679 }
1680
TEST_F(PartitioningTest,SliceModel)1681 TEST_F(PartitioningTest, SliceModel) {
1682 PartitioningModel model;
1683 uint32_t opnd0 = model.addFloatOperand();
1684 uint32_t opnd1 = model.addFloatOperand();
1685 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1686 uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd0, opnd1);
1687 uint32_t opnd4 = model.addOperation2To1V1_1(0, opnd0, opnd1);
1688 uint32_t opnd5 = model.addOperation2To1V1_2(0, opnd2, opnd3);
1689 uint32_t opnd6 = model.addOperation1To1V1_3(0, opnd2);
1690 model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5, opnd6});
1691 model.finish();
1692 ASSERT_TRUE(model.isValid());
1693
1694 // Simple partition (V1_0, V1_1, V1_2, V1_3 devices are available; V1_3 has best perf).
1695 // No need to compare the original model to the model from the plan -- we
1696 // didn't actually do any partitioning.
1697 const auto devicesA = makeDevices({{"V1_0", 0.8, HalVersion::V1_0, ~0U},
1698 {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1699 {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U},
1700 {"V1_3", 0.5, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1701 ExecutionPlan planA;
1702 ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
1703 ExecutePriority::DEFAULT, {}, &planA),
1704 ANEURALNETWORKS_NO_ERROR);
1705 EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries().empty());
1706 ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1707 ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
1708 ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_3");
1709
1710 // Compound partition (V1_0, V1_1, V1_2 devices are available, in decreasing
1711 // order of performance; model is distributed across all three devices).
1712 const auto devicesB = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
1713 {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1714 {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
1715 {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1716 ExecutionPlan planB;
1717 ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
1718 ExecutePriority::DEFAULT, {}, &planB),
1719 ANEURALNETWORKS_NO_ERROR);
1720 EXPECT_TRUE(planB.forTest_flatGetDynamicTemporaries().empty());
1721 ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1722 const auto& stepsB = planB.forTest_compoundGetSteps();
1723 ASSERT_EQ(stepsB.size(), size_t(4));
1724 {
1725 // Build a model to compare against the step model from stepsB[0].
1726 PartitioningModel modelB0;
1727 uint32_t b0Opnd0 = modelB0.addFloatOperand();
1728 uint32_t b0Opnd1 = modelB0.addFloatOperand();
1729 uint32_t b0Opnd2 = modelB0.addOperation2To1V1_1(0, b0Opnd0, b0Opnd1);
1730 modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2});
1731 modelB0.finish();
1732 ASSERT_TRUE(modelB0.isValid());
1733
1734 ASSERT_NO_FATAL_FAILURE(
1735 compare(stepsB[0], &modelB0, devicesB[1],
1736 RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}}, // modelInputs
1737 RemapVectorType{{opnd4, b0Opnd2}}, // modelOutputs
1738 RemapVectorType{}, // tempsAsStepModelInputs
1739 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1740 RemapVectorType{}, // outputsAsStepModelInputs
1741 {})); // modelOutputsThatAreDownstreamInputs
1742 }
1743 {
1744 // Build a model to compare against the step model from stepsB[1].
1745 PartitioningModel modelB1;
1746 uint32_t b1Opnd0 = modelB1.addFloatOperand();
1747 uint32_t b1Opnd1 = modelB1.addFloatOperand();
1748 uint32_t b1Opnd2 = modelB1.addOperation2To1V1_0(0, b1Opnd0, b1Opnd1);
1749 uint32_t b1Opnd3 = modelB1.addOperation2To1V1_0(1, b1Opnd0, b1Opnd1);
1750 modelB1.identifyInputsAndOutputs({b1Opnd0, b1Opnd1}, {b1Opnd2, b1Opnd3});
1751 modelB1.finish();
1752 ASSERT_TRUE(modelB1.isValid());
1753
1754 // Note that this is also an important test that we can detect
1755 // modelOutputsThatAreDownstreamInputs.
1756 ASSERT_NO_FATAL_FAILURE(
1757 compare(stepsB[1], &modelB1, devicesB[0],
1758 RemapVectorType{{opnd0, b1Opnd0}, {opnd1, b1Opnd1}}, // modelInputs
1759 RemapVectorType{{opnd2, b1Opnd2}}, // modelOutputs
1760 RemapVectorType{}, // tempsAsStepModelInputs
1761 StepModelOutputSetType{{opnd3, b1Opnd3}}, // tempsAsStepModelOutputs
1762 RemapVectorType{}, // outputsAsStepModelInputs
1763 {0u})); // modelOutputsThatAreDownstreamInputs
1764 }
1765 {
1766 // Build a model to compare against the step model from stepsB[2].
1767 PartitioningModel modelB2;
1768 uint32_t b2Opnd0 = modelB2.addFloatOperand();
1769 uint32_t b2Opnd1 = modelB2.addOperation1To1V1_3(0, b2Opnd0);
1770 // Note: In the partitioning algorithm, temps that are
1771 // step model inputs precede model outputs that are step model
1772 // inputs.
1773 modelB2.identifyInputsAndOutputs({b2Opnd0}, {b2Opnd1});
1774 modelB2.finish();
1775 ASSERT_TRUE(modelB2.isValid());
1776
1777 ASSERT_NO_FATAL_FAILURE(
1778 compare(stepsB[2], &modelB2, devicesB[3], RemapVectorType{}, // modelInputs
1779 RemapVectorType{{opnd6, b2Opnd1}}, // modelOutputs
1780 RemapVectorType{}, // tempsAsStepModelInputs
1781 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1782 RemapVectorType{{opnd2, b2Opnd0}}, // outputsAsStepModelInputs
1783 {})); // modelOutputsThatAreDownstreamInputs
1784 }
1785 {
1786 // Build a model to compare against the step model from stepsB[3].
1787 PartitioningModel modelB3;
1788 uint32_t b3Opnd0 = modelB3.addFloatOperand();
1789 uint32_t b3Opnd1 = modelB3.addFloatOperand();
1790 uint32_t b3Opnd2 = modelB3.addOperation2To1V1_2(0, b3Opnd0, b3Opnd1);
1791 // Note: In the partitioning algorithm, temps that are
1792 // step model inputs precede model outputs that are step model
1793 // inputs. In the original model "model", opnd3 is a temp and
1794 // opnd2 is a model output; so in the step model "modelB3", the
1795 // corresponding inputs b3Opnd1 and b3Opnd0 must appear in
1796 // that order.
1797 modelB3.identifyInputsAndOutputs({b3Opnd1, b3Opnd0}, {b3Opnd2});
1798 modelB3.finish();
1799 ASSERT_TRUE(modelB3.isValid());
1800
1801 ASSERT_NO_FATAL_FAILURE(
1802 compare(stepsB[3], &modelB3, devicesB[2], RemapVectorType{}, // modelInputs
1803 RemapVectorType{{opnd5, b3Opnd2}}, // modelOutputs
1804 RemapVectorType{{opnd3, b3Opnd1}}, // tempsAsStepModelInputs
1805 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1806 RemapVectorType{{opnd2, b3Opnd0}}, // outputsAsStepModelInputs
1807 {})); // modelOutputsThatAreDownstreamInputs
1808 }
1809
1810 // TODO: Make sure this still works when we have multiple devices
1811 // of same version available for slicing. An easy (?) choice would
1812 // be to route the two different V1_0 operations to different
1813 // devices.
1814 }
1815
TEST_F(PartitioningTest,SliceModelToEmpty)1816 TEST_F(PartitioningTest, SliceModelToEmpty) {
1817 PartitioningModel model;
1818 uint32_t opnd0 = model.addFloatOperand();
1819 uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
1820 model.identifyInputsAndOutputs({opnd0}, {opnd1});
1821 model.finish();
1822 ASSERT_TRUE(model.isValid());
1823
1824 // Only the V1_3 device can handle any operations in the model.
1825 // No need to compare the original model to the model from the plan -- we
1826 // didn't actually do any partitioning.
1827 const auto devices = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
1828 {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1829 {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
1830 {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1831 ExecutionPlan plan;
1832 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1833 ExecutePriority::DEFAULT, {}, &plan),
1834 ANEURALNETWORKS_NO_ERROR);
1835 EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
1836 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1837 ASSERT_NE(plan.forTest_simpleGetDevice().get(), nullptr);
1838 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_3");
1839 }
1840
TEST_F(PartitioningTest,Cpu)1841 TEST_F(PartitioningTest, Cpu) {
1842 // Here's a model where some operations execute only on the Cpu.
1843 // To make things interesting, we produce three partitions --
1844 // device, cpu, same-device.
1845
1846 static const uint32_t kCpuOp = 1;
1847 static const uint32_t kDevOp = 2;
1848
1849 const auto devices = makeDevices({{"1", 0.5, 1 << kDevOp}});
1850
1851 PartitioningModel model;
1852
1853 uint32_t opnd0 = model.addFloatOperand();
1854 uint32_t opnd1 = model.addFloatOperand();
1855
1856 uint32_t opnd2 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd1);
1857 uint32_t opnd3 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd2);
1858
1859 uint32_t opnd4 = model.addOperation2To1V1_0(kCpuOp, opnd0, opnd3);
1860 uint32_t opnd5 = model.addOperation2To1V1_0(kCpuOp, opnd2, opnd4);
1861
1862 uint32_t opnd6 = model.addFloatOperand();
1863
1864 uint32_t opnd7 = model.addOperation2To1V1_0(kDevOp, opnd3, opnd5);
1865 uint32_t opnd8 = model.addOperation2To1V1_0(kDevOp, opnd6, opnd7);
1866
1867 model.identifyInputsAndOutputs({opnd0, opnd1, opnd6}, {opnd4, opnd8});
1868 model.finish();
1869 ASSERT_TRUE(model.isValid());
1870
1871 ExecutionPlan plan;
1872 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1873 ExecutePriority::DEFAULT, {}, &plan),
1874 ANEURALNETWORKS_NO_ERROR);
1875 EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
1876 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1877 const auto& steps = plan.forTest_compoundGetSteps();
1878 ASSERT_EQ(steps.size(), size_t(3));
1879 {
1880 const auto& step0 = steps[0];
1881
1882 // Build a model to compare against the step model from steps[0].
1883 PartitioningModel model0;
1884 uint32_t m0Opnd0 = model0.addFloatOperand();
1885 uint32_t m0Opnd1 = model0.addFloatOperand();
1886 uint32_t m0Opnd2 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd1);
1887 uint32_t m0Opnd3 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd2);
1888 model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2, m0Opnd3});
1889 model0.finish();
1890 ASSERT_TRUE(model0.isValid());
1891
1892 ASSERT_NO_FATAL_FAILURE(
1893 compare(step0, &model0, devices[0],
1894 RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}}, // modelInputs
1895 RemapVectorType{}, // modelOutputs
1896 RemapVectorType{}, // tempsAsStepModelInputs
1897 StepModelOutputSetType{{opnd2, m0Opnd2},
1898 {opnd3, m0Opnd3}}, // tempsAsStepModelOutputs
1899 RemapVectorType{}, // outputsAsStepModelInputs
1900 {})); // modelOutputsThatAreDownstreamInputs
1901 }
1902 {
1903 const auto& step1 = steps[1];
1904
1905 // Build a model to compare against the step model from steps[1].
1906 PartitioningModel model1;
1907 uint32_t m1Opnd0 = model1.addFloatOperand();
1908 uint32_t m1Opnd3 = model1.addFloatOperand();
1909 uint32_t m1Opnd4 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd0, m1Opnd3);
1910 uint32_t m1Opnd2 = model1.addFloatOperand();
1911 uint32_t m1Opnd5 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd2, m1Opnd4);
1912 model1.identifyInputsAndOutputs({m1Opnd0, m1Opnd3, m1Opnd2}, {m1Opnd4, m1Opnd5});
1913 model1.finish();
1914 ASSERT_TRUE(model1.isValid());
1915
1916 ASSERT_NO_FATAL_FAILURE(compare(
1917 step1, &model1, DeviceManager::getCpuDevice(),
1918 RemapVectorType{{opnd0, m1Opnd0}}, // modelInputs
1919 RemapVectorType{{opnd4, m1Opnd4}}, // modelOutputs
1920 RemapVectorType{{opnd3, m1Opnd3}, {opnd2, m1Opnd2}}, // tempsAsStepModelInputs
1921 StepModelOutputSetType{{opnd5, m1Opnd5}}, // tempsAsStepModelOutputs
1922 RemapVectorType{}, // outputsAsStepModelInputs
1923 {})); // modelOutputsThatAreDownstreamInputs
1924 }
1925 {
1926 const auto& step2 = steps[2];
1927
1928 // Build a model to compare against the step model from steps[2].
1929 PartitioningModel model2;
1930 uint32_t m2Opnd3 = model2.addFloatOperand();
1931 uint32_t m2Opnd5 = model2.addFloatOperand();
1932 uint32_t m2Opnd7 = model2.addOperation2To1V1_0(kDevOp, m2Opnd3, m2Opnd5);
1933 uint32_t m2Opnd6 = model2.addFloatOperand();
1934 uint32_t m2Opnd8 = model2.addOperation2To1V1_0(kDevOp, m2Opnd6, m2Opnd7);
1935 model2.identifyInputsAndOutputs({m2Opnd6, m2Opnd3, m2Opnd5}, {m2Opnd8});
1936 model2.finish();
1937 ASSERT_TRUE(model2.isValid());
1938
1939 ASSERT_NO_FATAL_FAILURE(compare(
1940 step2, &model2, devices[0], RemapVectorType{{opnd6, m2Opnd6}}, // modelInputs
1941 RemapVectorType{{opnd8, m2Opnd8}}, // modelOutputs
1942 RemapVectorType{{opnd3, m2Opnd3}, {opnd5, m2Opnd5}}, // tempsAsStepModelInputs
1943 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1944 RemapVectorType{}, // outputsAsStepModelInputs
1945 {})); // modelOutputsThatAreDownstreamInputs
1946 }
1947 }
1948
TEST_F(PartitioningTest,SetPartitioning)1949 TEST_F(PartitioningTest, SetPartitioning) {
1950 PartitioningModel model;
1951 uint32_t opnd0 = model.addFloatOperand();
1952 uint32_t opnd1 = model.addFloatOperand();
1953 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1, Dimensioned::NO);
1954 uint32_t opnd3 = model.addFloatOperand();
1955 uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3);
1956 model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
1957 model.finish();
1958 ASSERT_TRUE(model.isValid());
1959
1960 // One device that can and should execute operation 0.
1961 const auto devices = makeDevices({{"hw", 0.5, (1 << 0)}});
1962
1963 // Test kPartitioningNo. We should not even attempt partitioning,
1964 // so there should be a SIMPLE plan on CPU.
1965 // No need to compare the original model to the model from the plan -- we
1966 // didn't actually do any partitioning.
1967 PartitioningCompilation cPNo(&model, devices);
1968 ASSERT_EQ(cPNo.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1969 ASSERT_EQ(cPNo.failPartitioning(), Result::NO_ERROR);
1970 ASSERT_EQ(cPNo.finish(), Result::NO_ERROR);
1971 ASSERT_EQ(cPNo.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1972 ASSERT_EQ(cPNo.getExecutionPlan().forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
1973
1974 // Test kPartitioningWithFallback. We should attempt partitioning, simulate
1975 // a recoverable failure, then fallback to CPU with a SIMPLE plan, and
1976 // finally return success. No need to compare the original model to the
1977 // model from the plan -- we didn't actually do any partitioning.
1978 PartitioningCompilation cPWithFallback(&model, devices);
1979 ASSERT_EQ(cPWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1980 Result::NO_ERROR);
1981 ASSERT_EQ(cPWithFallback.failPartitioning(), Result::NO_ERROR);
1982 ASSERT_EQ(cPWithFallback.finish(), Result::NO_ERROR);
1983 ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1984 ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1985 DeviceManager::getCpuDevice());
1986
1987 // Test kPartitioningWithoutFallback. We should attempt partitioning,
1988 // simulate a recoverable failure, and fail.
1989 PartitioningCompilation cPWithoutFallback(&model, devices);
1990 ASSERT_EQ(cPWithoutFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1991 Result::NO_ERROR);
1992 ASSERT_EQ(cPWithoutFallback.failPartitioning(), Result::NO_ERROR);
1993 ASSERT_EQ(cPWithoutFallback.finish(), Result::OP_FAILED);
1994 ASSERT_EQ(cPWithoutFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::ERROR);
1995 }
1996
1997 // Regression test for http://b/69166603:
1998 // "partitioned compilation and execution yields wrong results when model output is step model
1999 // input"
TEST_F(PartitioningTest,ModelOutputAsStepModelInput)2000 TEST_F(PartitioningTest, ModelOutputAsStepModelInput) {
2001 PartitioningModel model;
2002 uint32_t opnd0 = model.addFloatOperand();
2003 uint32_t opnd1 = model.addFloatOperand();
2004 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
2005 uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd2, opnd2);
2006 model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd3});
2007 model.finish();
2008 ASSERT_TRUE(model.isValid());
2009
2010 // Compound partition (two devices, each is capable of one of the
2011 // two operations). We could do more extensive checking here --
2012 // for example, verify that each step within the plan has the
2013 // correct (model and step model)x(inputs and outputs).
2014 const auto devices = makeDevices({{"0", 0.5, 1 << 0}, {"1", 0.5, 1 << 1}});
2015 ExecutionPlan plan;
2016 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2017 ExecutePriority::DEFAULT, {}, &plan),
2018 ANEURALNETWORKS_NO_ERROR);
2019 EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2020 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
2021 const auto& steps = plan.forTest_compoundGetSteps();
2022 ASSERT_EQ(steps.size(), size_t(2));
2023 {
2024 // Build a model to compare against the step model from steps[0].
2025 PartitioningModel model0;
2026 uint32_t m0Opnd0 = model0.addFloatOperand();
2027 uint32_t m0Opnd1 = model0.addFloatOperand();
2028 uint32_t m0Opnd2 = model0.addOperation2To1V1_0(0, m0Opnd0, m0Opnd1);
2029 model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2});
2030 model0.finish();
2031 ASSERT_TRUE(model0.isValid());
2032 ASSERT_NO_FATAL_FAILURE(
2033 compare(steps[0], &model0, devices[0],
2034 RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}}, // modelInputs
2035 RemapVectorType{{opnd2, m0Opnd2}}, // modelOutputs
2036 RemapVectorType{}, // tempsAsStepModelInputs
2037 StepModelOutputSetType{}, // tempsAsStepModelOutputs
2038 RemapVectorType{}, // outputsAsStepModelInputs
2039 {0u})); // modelOutputsThatAreDownstreamInputs
2040 }
2041 {
2042 // Build a model to compare against the step model from steps[1].
2043 PartitioningModel model1;
2044 uint32_t m1Opnd2 = model1.addFloatOperand();
2045 uint32_t m1Opnd3 = model1.addOperation2To1V1_0(1, m1Opnd2, m1Opnd2);
2046 model1.identifyInputsAndOutputs({m1Opnd2}, {m1Opnd3});
2047 model1.finish();
2048 ASSERT_TRUE(model1.isValid());
2049
2050 ASSERT_NO_FATAL_FAILURE(
2051 compare(steps[1], &model1, devices[1], RemapVectorType{}, // modelInputs
2052 RemapVectorType{{opnd3, m1Opnd3}}, // modelOutputs
2053 RemapVectorType{}, // tempsAsStepModelInputs
2054 StepModelOutputSetType{}, // tempsAsStepModelOutputs
2055 RemapVectorType{{opnd2, m1Opnd2}}, // outputsAsStepModelInputs
2056 {})); // modelOutputsThatAreDownstreamInputs
2057 }
2058 }
2059
TEST_F(PartitioningTest,OemOperations)2060 TEST_F(PartitioningTest, OemOperations) {
2061 // Trivial model consisting solely of OEM operation.
2062 PartitioningModel model;
2063 uint32_t opndIn = model.addFloatOperand();
2064 uint32_t opndOut = model.addOperationOEM1To1(opndIn);
2065 model.identifyInputsAndOutputs({opndIn}, {opndOut});
2066 model.finish();
2067 ASSERT_TRUE(model.isValid());
2068
2069 // Verify that the best driver than can run an OEM operation is
2070 // used, even if it is not better than the CPU.
2071 // No need to compare the original model to the model from the plan -- we
2072 // didn't actually do any partitioning.
2073 const auto devicesBestOEM = makeDevices({{"badOEM", 1.5, ~0U, PartitioningDriver::OEMYes},
2074 {"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo},
2075 {"goodOEM", 1.2, ~0U, PartitioningDriver::OEMYes}});
2076 PartitioningCompilation compilationBestOEM(&model, devicesBestOEM);
2077 ASSERT_EQ(compilationBestOEM.finish(), Result::NO_ERROR);
2078 const auto& planBestOEM = compilationBestOEM.getExecutionPlan();
2079 ASSERT_EQ(planBestOEM.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2080 ASSERT_NE(planBestOEM.forTest_simpleGetDevice().get(), nullptr);
2081 ASSERT_EQ(planBestOEM.forTest_simpleGetDevice()->getName(), "goodOEM");
2082
2083 // Verify that we get an error if no driver can run an OEM operation.
2084 const auto devicesNoOEM = makeDevices({{"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo}});
2085 PartitioningCompilation compilationNoOEM(&model, devicesNoOEM);
2086 ASSERT_EQ(compilationNoOEM.finish(), Result::BAD_DATA);
2087
2088 // Verify that we get an error if a driver can SUPPORT but not PREPARE an OEM operation.
2089 const auto devicesIndecisiveOEM =
2090 makeDevices({{"indecisiveOEM", 0.5, ~0U, PartitioningDriver::OEMIndecisive}});
2091 PartitioningCompilation compilationIndecisiveOEM(&model, devicesIndecisiveOEM);
2092 ASSERT_NE(compilationIndecisiveOEM.finish(), Result::NO_ERROR);
2093
2094 // Verify that we get an error if there are no drivers (only CPU fallback).
2095 PartitioningCompilation compilationNoDrivers(&model, makeDevices({}) /* no drivers */);
2096 ASSERT_EQ(compilationNoDrivers.finish(), Result::BAD_DATA);
2097 }
2098
TEST_F(PartitioningTest,RelaxedFP)2099 TEST_F(PartitioningTest, RelaxedFP) {
2100 const auto devices = makeDevices({// Best choice for non-relaxed model.
2101 {"f32", 0.8, 0.9 /* relaxed */, ~0U},
2102 // Best choice for relaxed model.
2103 {"f16", 0.9, 0.8 /* relaxed */, ~0U}});
2104
2105 auto TrivialTest = [&devices](bool doRelax, const char* expectDevice) {
2106 // Trivial model consisting solely of one operation.
2107 SCOPED_TRACE(expectDevice);
2108 PartitioningModel model;
2109 uint32_t opnd0 = model.addFloatOperand();
2110 uint32_t opnd1 = model.addFloatOperand();
2111 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
2112 model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2113 model.relaxComputationFloat32toFloat16(doRelax);
2114 model.finish();
2115 ASSERT_TRUE(model.isValid());
2116 // Verify that the model will be executed on the appropriate device.
2117 // No need to compare the original model to the model from the plan -- we
2118 // didn't actually do any partitioning.
2119 ExecutionPlan plan;
2120 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2121 ExecutePriority::DEFAULT, {}, &plan),
2122 ANEURALNETWORKS_NO_ERROR);
2123 EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2124 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2125 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), expectDevice);
2126 };
2127
2128 ASSERT_NO_FATAL_FAILURE(TrivialTest(false, "f32"));
2129 ASSERT_NO_FATAL_FAILURE(TrivialTest(true, "f16"));
2130 }
2131
TEST_F(PartitioningTest,Perf)2132 TEST_F(PartitioningTest, Perf) {
2133 // The various type names used here are confusing.
2134 //
2135 // OperandType (from HAL file), WrapperType (from NeuralNetworksWrapper.h),
2136 // and OperandCode (from NeuralNetworks.h) are different enums representing
2137 // the same type kind -- e.g., OperandType::FLOAT32, WrapperType::FLOAT32,
2138 // ANEURALNETWORKS_FLOAT32. Corresponding enumerators have the same value.
2139 //
2140 // WrapperOperandType is the NeuralNetworksWrapper.h representation of a
2141 // full operand type (WrapperType plus dimensions plus other attributes).
2142
2143 auto TestType = [](V1_3::OperandType operandType) {
2144 if (operandType == V1_3::OperandType::SUBGRAPH) {
2145 // SUBGRAPH capabilities are handled differently.
2146 return;
2147 }
2148 SCOPED_TRACE(toString(operandType));
2149 // Trivial model consisting solely of OEM operation. We
2150 // pick OEM operation because this allows us to use
2151 // inputs and outputs of any number and type.
2152 PartitioningModel model;
2153 uint32_t opndIn = model.addOperand(static_cast<WrapperType>(operandType));
2154 uint32_t opndOut = model.addOperationOEM1To1(opndIn);
2155 model.identifyInputsAndOutputs({opndIn}, {opndOut});
2156 model.finish();
2157 ASSERT_TRUE(model.isValid());
2158
2159 const V1_3::Capabilities baseCapabilities = ::android::nn::makeCapabilities(0.5);
2160
2161 {
2162 // better than base
2163 V1_3::Capabilities goodCapabilities = baseCapabilities;
2164 update(&goodCapabilities, operandType, 0.25);
2165
2166 const auto devices =
2167 makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes},
2168 {"good", goodCapabilities, ~0U, PartitioningDriver::OEMYes}});
2169
2170 // Verify that model will be executed on "good".
2171 // No need to compare the original model to the model from the plan -- we
2172 // didn't actually do any partitioning.
2173 ExecutionPlan plan;
2174 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2175 ExecutePriority::DEFAULT, {}, &plan),
2176 ANEURALNETWORKS_NO_ERROR);
2177 EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2178 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2179 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "good");
2180 }
2181
2182 {
2183 // worse than base
2184 V1_3::Capabilities badCapabilities = baseCapabilities;
2185 update(&badCapabilities, operandType, 0.75);
2186 const auto devices =
2187 makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes},
2188 {"bad", badCapabilities, ~0U, PartitioningDriver::OEMYes}});
2189
2190 // Verify that model will be executed on "base".
2191 // No need to compare the original model to the model from the plan -- we
2192 // didn't actually do any partitioning.
2193 ExecutionPlan plan;
2194 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2195 ExecutePriority::DEFAULT, {}, &plan),
2196 ANEURALNETWORKS_NO_ERROR);
2197 EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2198 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2199 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "base");
2200 }
2201 };
2202
2203 for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN);
2204 type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
2205 TestType(static_cast<V1_3::OperandType>(type));
2206 }
2207 for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN);
2208 type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) {
2209 TestType(static_cast<V1_3::OperandType>(type));
2210 }
2211 }
2212
TEST_F(PartitioningTest,ZeroInputStepModel)2213 TEST_F(PartitioningTest, ZeroInputStepModel) {
2214 PartitioningModel model;
2215 const uint32_t opnd0 = model.addFloatZeroOperand();
2216 const uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
2217 const uint32_t opnd2 = model.addFloatOperand();
2218 const uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd1, opnd2);
2219 model.identifyInputsAndOutputs({opnd2}, {opnd3});
2220 ASSERT_EQ(model.finish(), Result::NO_ERROR);
2221
2222 // This will result in 2 partitions: deviceA handles op0, deviceB handles op1.
2223 // The partition for deviceA does not have any model input, and should result in full CPU
2224 // fallback.
2225 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2226 PartitioningCompilation compilation(&model, devices);
2227 ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2228 const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName();
2229 checkExecutionPlanSteps(compilation.getExecutionPlan(), {cpuDeviceName});
2230 }
2231
TEST_F(PartitioningTest,ZeroOutputStepModel)2232 TEST_F(PartitioningTest, ZeroOutputStepModel) {
2233 PartitioningModel model;
2234 const uint32_t opnd0 = model.addFloatOperand();
2235 const uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
2236 const uint32_t opnd2 = model.addFloatOperand();
2237 model.addOperation2To1V1_0(1, opnd1, opnd2);
2238 model.identifyInputsAndOutputs({opnd0, opnd2}, {opnd1});
2239 ASSERT_EQ(model.finish(), Result::NO_ERROR);
2240
2241 // This will result in 2 partitions: deviceA handles op0, deviceB handles op1.
2242 // The partition for deviceB does not have any model output, and should result in full CPU
2243 // fallback.
2244 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2245 PartitioningCompilation compilation(&model, devices);
2246 ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2247 const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName();
2248 checkExecutionPlanSteps(compilation.getExecutionPlan(), {cpuDeviceName});
2249 }
2250
2251 // Test dynamic temporaries and related parts of the partitioning implementation.
2252 //
2253 // opnd0 = model input // tensor to pad
2254 // opnd1 = model input // padding
2255 // opnd2 = PAD(opnd1, opnd0) // model output
2256 // opnd3 = PAD(opnd1, opnd0)
2257 // opnd4 = ADD(opnd2, opnd3, FUSED_NONE) // model output
2258 class DynamicTemporariesTest : public PartitioningTest {
2259 protected:
2260 // Call these functions in sequence in order to perform the test.
2261 // Call to declareOutputDimensions() can be omitted (see the default values below).
2262 // Call to declareHalVersions() can be omitted (defaults to HalVersion::LATEST).
2263 void declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified,
2264 bool opnd3PartitionOutputSpecified,
2265 bool opnd4ModelOutputSpecified);
2266 void declareHalVersions(HalVersion padDeviceVersion, HalVersion addDeviceVersion);
2267 void makeModelAndValidate();
2268 void compileModelAndComparePlan(bool noFallback = true);
2269 void executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough,
2270 bool opnd4ModelOutputBigEnough);
2271
2272 // set by declareOutputDimensions()
2273 bool mOpnd2ModelAndPartitionOutputSpecified = false;
2274 bool mOpnd3PartitionOutputSpecified = false;
2275 bool mOpnd4ModelOutputSpecified = false;
2276
2277 // set by declareHalVersions()
2278 HalVersion mPadDeviceVersion = HalVersion::LATEST;
2279 HalVersion mAddDeviceVersion = HalVersion::LATEST;
2280 HalVersion mMinDeviceVersion = HalVersion::LATEST; // minimum of the other two device versions
2281
2282 // created by makeModelAndValidate()
2283 std::optional<PartitioningModel> mModel;
2284 std::vector<uint32_t> mOpnds;
2285
2286 // created by compileModelAndComparePlan();
2287 std::optional<PartitioningCompilation> mCompilation;
2288
supportsOutputOfUnknownRank(HalVersion version)2289 static bool supportsOutputOfUnknownRank(HalVersion version) {
2290 return version >= HalVersion::V1_2;
2291 }
2292
dimensionedOutput(HalVersion version,bool specified)2293 static Dimensioned dimensionedOutput(HalVersion version, bool specified) {
2294 return specified ? Dimensioned::YES_4
2295 : supportsOutputOfUnknownRank(version) ? Dimensioned::NO
2296 : Dimensioned::RANK_1;
2297 }
2298 };
2299
declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified,bool opnd3PartitionOutputSpecified,bool opnd4ModelOutputSpecified)2300 void DynamicTemporariesTest::declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified,
2301 bool opnd3PartitionOutputSpecified,
2302 bool opnd4ModelOutputSpecified) {
2303 ASSERT_FALSE(mModel.has_value());
2304 mOpnd2ModelAndPartitionOutputSpecified = opnd2ModelAndPartitionOutputSpecified;
2305 mOpnd3PartitionOutputSpecified = opnd3PartitionOutputSpecified;
2306 mOpnd4ModelOutputSpecified = opnd4ModelOutputSpecified;
2307 }
2308
declareHalVersions(HalVersion padDeviceVersion,HalVersion addDeviceVersion)2309 void DynamicTemporariesTest::declareHalVersions(HalVersion padDeviceVersion,
2310 HalVersion addDeviceVersion) {
2311 ASSERT_FALSE(mModel.has_value());
2312 mPadDeviceVersion = padDeviceVersion;
2313 mAddDeviceVersion = addDeviceVersion;
2314 mMinDeviceVersion = min(padDeviceVersion, addDeviceVersion);
2315 }
2316
makeModelAndValidate()2317 void DynamicTemporariesTest::makeModelAndValidate() {
2318 ASSERT_FALSE(mModel.has_value());
2319 mModel = PartitioningModel();
2320
2321 uint32_t opndActivation = mModel->addIntScalarOperand(ANEURALNETWORKS_FUSED_NONE);
2322
2323 uint32_t opnd0 = mModel->addFloatOperand(Dimensioned::YES_2); // tensor to pad
2324 uint32_t opnd1 = mModel->addIntOperand(Dimensioned::RANK_2); // paddings
2325 uint32_t opnd2 = mModel->addExplicitOperationXTo1(
2326 ANEURALNETWORKS_PAD, {opnd0, opnd1}, WrapperType::TENSOR_FLOAT32,
2327 dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified));
2328 uint32_t opnd3 = mModel->addExplicitOperationXTo1(
2329 ANEURALNETWORKS_PAD, {opnd0, opnd1}, WrapperType::TENSOR_FLOAT32,
2330 dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified));
2331 uint32_t opnd4 = mModel->addExplicitOperationXTo1(
2332 ANEURALNETWORKS_ADD, {opnd2, opnd3, opndActivation}, WrapperType::TENSOR_FLOAT32,
2333 dimensionedOutput(mMinDeviceVersion, mOpnd4ModelOutputSpecified));
2334 mModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4});
2335 mModel->finish();
2336 ASSERT_TRUE(mModel->isValid());
2337
2338 mOpnds = {opnd0, opnd1, opnd2, opnd3, opnd4};
2339 }
2340
compileModelAndComparePlan(bool noFallback)2341 void DynamicTemporariesTest::compileModelAndComparePlan(bool noFallback) {
2342 ASSERT_TRUE(mModel.has_value());
2343 ASSERT_TRUE(!mCompilation.has_value());
2344
2345 auto devices = makeDevices({{"pad",
2346 0.9,
2347 0U,
2348 PartitioningDriver::OEMNo,
2349 mPadDeviceVersion,
2350 {V1_3::OperationType::PAD}},
2351 {"add",
2352 0.9,
2353 0U,
2354 PartitioningDriver::OEMNo,
2355 mAddDeviceVersion,
2356 {V1_3::OperationType::ADD}}});
2357
2358 mCompilation = PartitioningCompilation(&mModel.value(), devices);
2359 ASSERT_EQ(mCompilation->setPartitioning(DeviceManager::kPartitioningWithoutFallback),
2360 Result::NO_ERROR);
2361 if (noFallback) {
2362 ASSERT_EQ(mCompilation->finish(), Result::NO_ERROR);
2363 const ExecutionPlan& planA = mCompilation->getExecutionPlan();
2364 EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries() ==
2365 (mOpnd3PartitionOutputSpecified ? DynamicTemporariesType{}
2366 : DynamicTemporariesType{mOpnds[3]}));
2367 ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
2368 const auto& stepsA = planA.forTest_compoundGetSteps();
2369 ASSERT_EQ(stepsA.size(), size_t(2));
2370 {
2371 // Build a model to compare against the step model from stepsA[0].
2372 PartitioningModel modelA0;
2373 uint32_t a0Opnd0 = modelA0.addFloatOperand(Dimensioned::YES_2);
2374 uint32_t a0Opnd1 = modelA0.addIntOperand(Dimensioned::RANK_2);
2375 uint32_t a0Opnd2 = modelA0.addExplicitOperationXTo1(
2376 ANEURALNETWORKS_PAD, {a0Opnd0, a0Opnd1}, WrapperType::TENSOR_FLOAT32,
2377 dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified));
2378 uint32_t a0Opnd3 = modelA0.addExplicitOperationXTo1(
2379 ANEURALNETWORKS_PAD, {a0Opnd0, a0Opnd1}, WrapperType::TENSOR_FLOAT32,
2380 dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified));
2381 modelA0.identifyInputsAndOutputs({a0Opnd0, a0Opnd1}, {a0Opnd3, a0Opnd2});
2382 modelA0.finish();
2383 ASSERT_TRUE(modelA0.isValid());
2384
2385 ASSERT_NO_FATAL_FAILURE(compare(
2386 stepsA[0], &modelA0, devices[0],
2387 RemapVectorType{{mOpnds[0], a0Opnd0}, {mOpnds[1], a0Opnd1}}, // modelInputs
2388 RemapVectorType{{mOpnds[2], a0Opnd3}}, // modelOutputs
2389 RemapVectorType{}, // tempsAsStepModelInputs
2390 StepModelOutputSetType{{mOpnds[3], a0Opnd2}}, // tempsAsStepModelOutputs
2391 RemapVectorType{}, // outputsAsStepModelInputs
2392 {0u})); // modelOutputsThatAreDownstreamInputs
2393 }
2394 {
2395 // Build a model to compare against the step model from stepsA[1].
2396 PartitioningModel modelA1;
2397 uint32_t a1Opnd2 = modelA1.addFloatOperand(
2398 dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified));
2399 uint32_t a1Opnd3 = modelA1.addFloatOperand(
2400 dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified));
2401 uint32_t a1Opnd4 = modelA1.addOperation2To1V1_0(
2402 0, a1Opnd2, a1Opnd3,
2403 dimensionedOutput(mMinDeviceVersion, mOpnd4ModelOutputSpecified));
2404 modelA1.identifyInputsAndOutputs({a1Opnd3, a1Opnd2}, {a1Opnd4});
2405 modelA1.finish();
2406 ASSERT_TRUE(modelA1.isValid());
2407
2408 ASSERT_NO_FATAL_FAILURE(
2409 compare(stepsA[1], &modelA1, devices[1], RemapVectorType{}, // modelInputs
2410 RemapVectorType{{mOpnds[4], a1Opnd4}}, // modelOutputs
2411 RemapVectorType{{mOpnds[3], a1Opnd3}}, // tempsAsStepModelInputs
2412 StepModelOutputSetType{}, // tempsAsStepModelOutputs
2413 RemapVectorType{{mOpnds[2], a1Opnd2}}, // outputsAsStepModelInputs
2414 {})); // modelOutputsThatAreDownstreamInputs
2415 }
2416 } else {
2417 ASSERT_EQ(mCompilation->finish(), Result::OP_FAILED);
2418 // Try again, expecting fallback.
2419 mCompilation = PartitioningCompilation(&mModel.value(), devices);
2420 ASSERT_EQ(mCompilation->setPartitioning(DeviceManager::kPartitioningWithFallback),
2421 Result::NO_ERROR);
2422 ASSERT_EQ(mCompilation->finish(), Result::NO_ERROR);
2423 ASSERT_EQ(mCompilation->getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2424 ASSERT_EQ(mCompilation->getExecutionPlan().forTest_simpleGetDevice(),
2425 DeviceManager::getCpuDevice());
2426 }
2427 }
2428
executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough,bool opnd4ModelOutputBigEnough)2429 void DynamicTemporariesTest::executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough,
2430 bool opnd4ModelOutputBigEnough) {
2431 ASSERT_TRUE(opnd2ModelOutputBigEnough || !mOpnd2ModelAndPartitionOutputSpecified);
2432 ASSERT_TRUE(opnd4ModelOutputBigEnough || !mOpnd4ModelOutputSpecified);
2433
2434 ASSERT_TRUE(mCompilation.has_value());
2435 WrapperExecution e(&mCompilation.value());
2436
2437 WrapperOperandType padTensorValueType(WrapperType::TENSOR_FLOAT32, {2});
2438 const float padTensorValue[] = {3.0f, 5.0f};
2439 e.setInput(0, &padTensorValue, &padTensorValueType.operandType);
2440
2441 WrapperOperandType paddingsType(WrapperType::TENSOR_INT32, {1, 2});
2442 const int paddings[1][2] = {{1, 1}};
2443 e.setInput(1, &paddings, &paddingsType.operandType);
2444
2445 auto setOutput = [&e](uint32_t index, float* buffer, bool bigEnough, bool specified,
2446 HalVersion version) {
2447 const uint32_t elts = bigEnough ? 4 : 3;
2448 std::fill(buffer, buffer + elts, -1.0f);
2449 using DimsType = std::vector<uint32_t>;
2450 WrapperOperandType outputType(
2451 WrapperType::TENSOR_FLOAT32,
2452 specified ? DimsType{elts}
2453 : supportsOutputOfUnknownRank(version) ? DimsType{} : DimsType{0});
2454 e.setOutput(index, buffer, elts * sizeof(float), &outputType.operandType);
2455 };
2456 float opnd2ModelOutput[4], opnd4ModelOutput[4];
2457 setOutput(0, opnd2ModelOutput, opnd2ModelOutputBigEnough,
2458 mOpnd2ModelAndPartitionOutputSpecified, mPadDeviceVersion);
2459 setOutput(1, opnd4ModelOutput, opnd4ModelOutputBigEnough, mOpnd4ModelOutputSpecified,
2460 mAddDeviceVersion);
2461
2462 const Result expectResult = opnd2ModelOutputBigEnough && opnd4ModelOutputBigEnough
2463 ? Result::NO_ERROR
2464 : Result::OUTPUT_INSUFFICIENT_SIZE;
2465 ASSERT_EQ(e.compute(), expectResult);
2466 if (expectResult == Result::NO_ERROR) {
2467 float expected[4] = {0.0f, padTensorValue[0], padTensorValue[1], 0.0f};
2468 ASSERT_TRUE(std::equal(std::begin(opnd2ModelOutput), std::end(opnd2ModelOutput),
2469 std::begin(expected)));
2470 for (auto& elt : expected) {
2471 elt *= 2;
2472 }
2473 ASSERT_TRUE(std::equal(std::begin(opnd4ModelOutput), std::end(opnd4ModelOutput),
2474 std::begin(expected)));
2475 }
2476 }
2477
TEST_F(DynamicTemporariesTest,ModelOutputsSufficientSize)2478 TEST_F(DynamicTemporariesTest, ModelOutputsSufficientSize) {
2479 // The purpose of this test is to confirm that the partitioner and the
2480 // runtime can handle a model output of unspecified dimensions but
2481 // sufficient size that is written by one partition and read by another.
2482
2483 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2484 /*opnd3PartitionOutputSpecified=*/true,
2485 /*opnd4ModelOutputSpecified=*/false));
2486 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2487 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2488 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2489 }
2490
2491 // TODO(b/174851714): Fix the partitioner and re-enable this test.
TEST_F(DynamicTemporariesTest,DISABLED_ModelOutputsSufficientSize_V1_1)2492 TEST_F(DynamicTemporariesTest, DISABLED_ModelOutputsSufficientSize_V1_1) {
2493 // The purpose of this test is to confirm that the partitioner and the
2494 // runtime can handle a model output of unspecified dimensions but
2495 // sufficient size that is written by one partition and read by another.
2496 // Regression test for http://b/174851714.
2497
2498 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2499 /*opnd3PartitionOutputSpecified=*/true,
2500 /*opnd4ModelOutputSpecified=*/false));
2501 ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_1,
2502 /*addDeviceVersion=*/HalVersion::V1_1));
2503 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2504 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2505 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2506 }
2507
TEST_F(DynamicTemporariesTest,DynamicTemporariesUnspecifiedOutputs)2508 TEST_F(DynamicTemporariesTest, DynamicTemporariesUnspecifiedOutputs) {
2509 // The purpose of this test is to confirm that the partitioner can produce
2510 // dynamic temporaries and that the runtime can handle them properly. Note
2511 // that all model outputs are of unspecified dimensions but sufficient size.
2512
2513 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2514 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2515 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2516 }
2517
TEST_F(DynamicTemporariesTest,DynamicTemporariesSpecifiedOutputs)2518 TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs) {
2519 // The purpose of this test is to confirm that the partitioner can produce
2520 // dynamic temporaries and that the runtime can handle them properly. Note
2521 // that all model outputs are of specified dimensions.
2522
2523 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true,
2524 /*opnd3PartitionOutputSpecified=*/false,
2525 /*opnd4ModelOutputSpecified=*/true));
2526 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2527 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2528 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2529 }
2530
TEST_F(DynamicTemporariesTest,DynamicTemporariesSpecifiedOutputs_V1_2)2531 TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs_V1_2) {
2532 // The purpose of this test is to confirm that the partitioner can produce
2533 // dynamic temporaries and that the runtime can handle them properly. Note
2534 // that all model outputs are of specified dimensions.
2535 // Regression test for http://b/174851714.
2536
2537 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true,
2538 /*opnd3PartitionOutputSpecified=*/false,
2539 /*opnd4ModelOutputSpecified=*/true));
2540 ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_2,
2541 /*addDeviceVersion=*/HalVersion::V1_2));
2542 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2543 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2544 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2545 }
2546
TEST_F(DynamicTemporariesTest,DynamicTemporariesSpecifiedOutputs_V1_1)2547 TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs_V1_1) {
2548 // The purpose of this test is to confirm that the partitioner cannot produce
2549 // dynamic temporaries for V1_1 but instead does whole-model CPU fallback. Note
2550 // that all model outputs are of specified dimensions.
2551 // Regression test for http://b/174851714.
2552
2553 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true,
2554 /*opnd3PartitionOutputSpecified=*/false,
2555 /*opnd4ModelOutputSpecified=*/true));
2556 ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_1,
2557 /*addDeviceVersion=*/HalVersion::V1_1));
2558 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2559 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan(false));
2560 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2561 }
2562
TEST_F(DynamicTemporariesTest,ModelOutputsInsufficientSizeWithDynamicTemporary)2563 TEST_F(DynamicTemporariesTest, ModelOutputsInsufficientSizeWithDynamicTemporary) {
2564 // The purpose of this test is to confirm that the runtime can detect a
2565 // model output of insufficient size in the presence of a dynamic temporary.
2566
2567 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2568 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2569 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, false));
2570 }
2571
TEST_F(DynamicTemporariesTest,ModelOutputsInsufficientSizeWithoutDynamicTemporary)2572 TEST_F(DynamicTemporariesTest, ModelOutputsInsufficientSizeWithoutDynamicTemporary) {
2573 // The purpose of this test is to confirm that the runtime can detect a
2574 // model output of insufficient size in the absence of a dynamic temporary.
2575
2576 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2577 /*opnd3PartitionOutputSpecified=*/true,
2578 /*opnd4ModelOutputSpecified=*/false));
2579 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2580 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2581 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, false));
2582 }
2583
TEST_F(DynamicTemporariesTest,ModelOutput2InsufficientSizeWithoutDynamicTemporary)2584 TEST_F(DynamicTemporariesTest, ModelOutput2InsufficientSizeWithoutDynamicTemporary) {
2585 // The purpose of this test is to confirm that the runtime can detect a
2586 // model output of insufficient size in the absence of a dynamic temporary.
2587
2588 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2589 /*opnd3PartitionOutputSpecified=*/true,
2590 /*opnd4ModelOutputSpecified=*/false));
2591 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2592 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2593 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, true));
2594 }
2595
TEST_F(DynamicTemporariesTest,ModelOutput4InsufficientSizeWithoutDynamicTemporary)2596 TEST_F(DynamicTemporariesTest, ModelOutput4InsufficientSizeWithoutDynamicTemporary) {
2597 // The purpose of this test is to confirm that the runtime can detect a
2598 // model output of insufficient size in the absence of a dynamic temporary.
2599
2600 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2601 /*opnd3PartitionOutputSpecified=*/true,
2602 /*opnd4ModelOutputSpecified=*/false));
2603 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2604 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2605 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, false));
2606 }
2607
2608 // Test token rehashing during the compilation step.
2609 class CacheTest : public PartitioningTest {
2610 protected:
SetUp()2611 virtual void SetUp() override {
2612 PartitioningTest::SetUp();
2613 char cacheDirTemp[] = NN_TMP_DIR "/TestCompilationCachingXXXXXX";
2614 char* cacheDir = mkdtemp(cacheDirTemp);
2615 ASSERT_NE(cacheDir, nullptr);
2616 mCacheDir = cacheDir;
2617 }
2618
TearDown()2619 virtual void TearDown() override {
2620 if (!::testing::Test::HasFailure()) {
2621 std::filesystem::remove_all(mCacheDir);
2622 }
2623 PartitioningTest::TearDown();
2624 }
2625
expectUniqueTokens(const std::vector<std::vector<uint8_t>> & tokens)2626 void expectUniqueTokens(const std::vector<std::vector<uint8_t>>& tokens) {
2627 for (uint32_t i = 0; i < tokens.size(); i++) {
2628 SCOPED_TRACE(i);
2629 for (uint32_t j = i + 1; j < tokens.size(); j++) {
2630 SCOPED_TRACE(j);
2631 EXPECT_NE(tokens[i], tokens[j]);
2632 }
2633 }
2634 }
2635
2636 // Launch a single run of the partitioner against the provided model and device list with
2637 // cache token privided as tokenIn. Find the partition for the device with deviceName.
2638 // Record the transformed token into tokenOut. Two or more partitions may be on the same device.
2639 // "devicePartitionIndex" specifies the index of the ExecutionStep corresponding to the
2640 // partition of interest, within the sequence of ExecutionSteps on the target device.
2641 // If tokenIn is empty, no caching information will be provided to the partitioner.
getTransformedCacheTokenSingle(const PartitioningModel & model,const std::vector<std::shared_ptr<Device>> & devices,const char * deviceName,const std::vector<uint8_t> & tokenIn,ExecutePreference preference,ExecutePriority priority,uint32_t devicePartitionIndex,std::vector<uint8_t> * tokenOut)2642 void getTransformedCacheTokenSingle(const PartitioningModel& model,
2643 const std::vector<std::shared_ptr<Device>>& devices,
2644 const char* deviceName, const std::vector<uint8_t>& tokenIn,
2645 ExecutePreference preference, ExecutePriority priority,
2646 uint32_t devicePartitionIndex,
2647 std::vector<uint8_t>* tokenOut) {
2648 // Compile the model and get the execution plan.
2649 PartitioningCompilation compilation(&model, devices);
2650 if (!tokenIn.empty()) {
2651 compilation.setCaching(mCacheDir.c_str(), tokenIn);
2652 }
2653 compilation.setPreference(preference);
2654 compilation.setPriority(priority);
2655 ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2656 const ExecutionPlan& plan = compilation.getExecutionPlan();
2657
2658 // Find the cache info for the device.
2659 const uint8_t* token = nullptr;
2660 if (plan.forTest_getKind() == ExecutionPlan::Kind::SIMPLE) {
2661 ASSERT_EQ(devicePartitionIndex, 0u);
2662 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), deviceName);
2663 token = plan.forTest_simpleGetCacheToken();
2664 } else if (plan.forTest_getKind() == ExecutionPlan::Kind::COMPOUND) {
2665 const auto& steps = plan.forTest_compoundGetSteps();
2666 uint32_t executionStepCount = 0;
2667 for (const auto& step : steps) {
2668 if (step->isExecution() &&
2669 step->executionStep()->getDevice()->getName() == deviceName) {
2670 if (devicePartitionIndex == executionStepCount) {
2671 token = step->executionStep()->forTest_getCacheToken();
2672 break;
2673 }
2674 executionStepCount++;
2675 }
2676 }
2677 } else {
2678 FAIL();
2679 }
2680
2681 // Retrieve the transformed token from the cache info.
2682 if (token == nullptr) {
2683 tokenOut->clear();
2684 } else {
2685 tokenOut->resize(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN);
2686 std::copy(token, token + ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, tokenOut->begin());
2687 }
2688 }
2689
2690 // A wrapper of getTransformedCacheTokenSingle, which runs getTransformedCacheTokenSingle
2691 // multiple times and checks if the transformation provides consistent result.
2692 // Two or more partitions may be on the same device. "devicePartitionIndex" specifies the index
2693 // of the ExecutionStep corresponding to the partition of interest, within the sequence of
2694 // ExecutionSteps on the target device.
getTransformedCacheToken(const PartitioningModel & model,const std::vector<std::shared_ptr<Device>> & devices,const char * deviceName,const std::vector<uint8_t> & tokenIn,ExecutePreference preference,ExecutePriority priority,std::vector<uint8_t> * tokenOut,uint32_t devicePartitionIndex=0)2695 void getTransformedCacheToken(const PartitioningModel& model,
2696 const std::vector<std::shared_ptr<Device>>& devices,
2697 const char* deviceName, const std::vector<uint8_t>& tokenIn,
2698 ExecutePreference preference, ExecutePriority priority,
2699 std::vector<uint8_t>* tokenOut,
2700 uint32_t devicePartitionIndex = 0) {
2701 getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference, priority,
2702 devicePartitionIndex, tokenOut);
2703
2704 // Test if the runtime maps to the same cache token every time for the same compilation
2705 // setup.
2706 for (uint32_t i = 0; i < 10; i++) {
2707 std::vector<uint8_t> token;
2708 SCOPED_TRACE(i);
2709 getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference,
2710 priority, devicePartitionIndex, &token);
2711 EXPECT_EQ(*tokenOut, token);
2712 }
2713 }
2714
createModelForCachingTests(PartitioningModel * model)2715 void createModelForCachingTests(PartitioningModel* model) {
2716 uint32_t opnd0 = model->addFloatOperand();
2717 uint32_t opnd1 = model->addFloatOperand();
2718 uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1);
2719 uint32_t opnd3 = model->addFloatOperand();
2720 uint32_t opnd4 = model->addOperation2To1V1_0(1, opnd2, opnd3);
2721 model->identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
2722 model->finish();
2723 ASSERT_TRUE(model->isValid());
2724 }
2725
2726 // The first model returned in "models" is the main model.
createControlFlowModelForCachingTests(std::vector<std::unique_ptr<PartitioningModel>> * models)2727 void createControlFlowModelForCachingTests(
2728 std::vector<std::unique_ptr<PartitioningModel>>* models) {
2729 CHECK(models != nullptr);
2730
2731 auto trueModel = std::make_unique<PartitioningModel>();
2732 {
2733 const uint32_t opnd0 = trueModel->addFloatOperand();
2734 const uint32_t opnd1 = trueModel->addFloatOperand();
2735 const uint32_t opnd2 = trueModel->addOperation2To1V1_0(0, opnd0, opnd1);
2736 trueModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2737 trueModel->finish();
2738 ASSERT_TRUE(trueModel->isValid());
2739 }
2740
2741 auto falseModel = std::make_unique<PartitioningModel>();
2742 {
2743 const uint32_t opnd0 = falseModel->addFloatOperand();
2744 const uint32_t opnd1 = falseModel->addFloatOperand();
2745 const uint32_t opnd2 = falseModel->addOperation2To1V1_0(0, opnd0, opnd1);
2746 falseModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2747 falseModel->finish();
2748 ASSERT_TRUE(falseModel->isValid());
2749 }
2750
2751 auto mainModel = std::make_unique<PartitioningModel>();
2752 {
2753 const uint32_t opnd0 = mainModel->addBooleanOperand();
2754 const uint32_t opnd1 = mainModel->addFloatOperand();
2755 const uint32_t opnd2 = mainModel->addFloatOperand();
2756 const uint32_t opnd3 = mainModel->addFloatOperand();
2757 mainModel->addIfOperation(opnd0, *trueModel, *falseModel, {opnd1, opnd2}, {opnd3});
2758 mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3});
2759 mainModel->finish();
2760 ASSERT_TRUE(mainModel->isValid());
2761 }
2762
2763 models->clear();
2764 models->push_back(std::move(mainModel));
2765 models->push_back(std::move(trueModel));
2766 models->push_back(std::move(falseModel));
2767 }
2768
2769 std::string mCacheDir;
2770 };
2771
2772 // Test the case when no token is provided by the application and the execution plan has a
2773 // simple body.
TEST_F(CacheTest,CacheTokenNoneSimpleBody)2774 TEST_F(CacheTest, CacheTokenNoneSimpleBody) {
2775 PartitioningModel model;
2776 createModelForCachingTests(&model);
2777
2778 // deviceA can execute the whole model.
2779 const auto deviceA = makeDevices({
2780 {"deviceA", 0.5, ~0U},
2781 });
2782
2783 std::vector<uint8_t> tokenIn, tokenOut;
2784 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2785 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2786 &tokenOut);
2787 EXPECT_TRUE(tokenOut.empty());
2788 }
2789
2790 // Test if the runtime maps to different cache tokens for devices with different names in
2791 // execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentDeviceNamesSimpleBody)2792 TEST_F(CacheTest, CacheTokenDifferentDeviceNamesSimpleBody) {
2793 PartitioningModel model;
2794 createModelForCachingTests(&model);
2795
2796 // Two devices that can both execute the whole model.
2797 const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2798 const auto deviceB = makeDevices({{"deviceB", 0.5, ~0U}});
2799
2800 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2801 std::vector<uint8_t> deviceAToken, deviceBToken;
2802 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2803 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2804 &deviceAToken);
2805 getTransformedCacheToken(model, deviceB, "deviceB", tokenIn,
2806 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2807 &deviceBToken);
2808 expectUniqueTokens({deviceAToken, deviceBToken});
2809 }
2810
2811 // Test if the runtime maps to different cache tokens for devices with different version strings in
2812 // execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentDeviceVersionStringsSimpleBody)2813 TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsSimpleBody) {
2814 PartitioningModel model;
2815 createModelForCachingTests(&model);
2816
2817 // Two devices that can both execute the whole model.
2818 const auto deviceA_1_0 = makeDevices({{"deviceA", "1.0", 0.5, ~0U}});
2819 const auto deviceA_1_1 = makeDevices({{"deviceA", "1.1", 0.5, ~0U}});
2820
2821 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2822 std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token;
2823 getTransformedCacheToken(model, deviceA_1_0, "deviceA", tokenIn,
2824 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2825 &deviceA_1_0_Token);
2826 getTransformedCacheToken(model, deviceA_1_1, "deviceA", tokenIn,
2827 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2828 &deviceA_1_1_Token);
2829 expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token});
2830 }
2831
2832 // Test if the runtime maps to different cache tokens for compilations with different preferences
2833 // in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentPreferencesSimpleBody)2834 TEST_F(CacheTest, CacheTokenDifferentPreferencesSimpleBody) {
2835 PartitioningModel model;
2836 createModelForCachingTests(&model);
2837
2838 // One device that can execute the whole model.
2839 const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2840
2841 std::vector<uint8_t> fastToken, powerToken, sustainedToken;
2842 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2843 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2844 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2845 &fastToken);
2846 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2847 ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT,
2848 &powerToken);
2849 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2850 ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT,
2851 &sustainedToken);
2852 expectUniqueTokens({fastToken, powerToken, sustainedToken});
2853 }
2854
2855 // TODO (b/207721221): add test for AIDL compilation hints.
2856 // Test if the runtime maps to different cache tokens for compilations with different priorities
2857 // in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentPrioritiesSimpleBody)2858 TEST_F(CacheTest, CacheTokenDifferentPrioritiesSimpleBody) {
2859 PartitioningModel model;
2860 createModelForCachingTests(&model);
2861
2862 // One device that can execute the whole model.
2863 const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2864
2865 std::vector<uint8_t> lowToken, mediumToken, highToken;
2866 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2867 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2868 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW,
2869 &lowToken);
2870 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2871 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM,
2872 &mediumToken);
2873 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2874 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH,
2875 &highToken);
2876 expectUniqueTokens({lowToken, mediumToken, highToken});
2877 }
2878
2879 // Test if the runtime maps to different cache tokens for compilations with different tokens
2880 // provided by application in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentTokensSimpleBody)2881 TEST_F(CacheTest, CacheTokenDifferentTokensSimpleBody) {
2882 PartitioningModel model;
2883 createModelForCachingTests(&model);
2884
2885 // One device that can execute the whole model.
2886 const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2887
2888 std::vector<uint8_t> tokenOut1, tokenOut2;
2889 std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2890 std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1);
2891 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn1,
2892 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2893 &tokenOut1);
2894 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn2,
2895 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2896 &tokenOut2);
2897 expectUniqueTokens({tokenOut1, tokenOut2});
2898 }
2899
2900 // Test the case when no token is provided by the application and the execution plan has a
2901 // compound body.
TEST_F(CacheTest,CacheTokenNoneCompoundBody)2902 TEST_F(CacheTest, CacheTokenNoneCompoundBody) {
2903 PartitioningModel model;
2904 createModelForCachingTests(&model);
2905
2906 // DeviceA executes the first operation only.
2907 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2908
2909 std::vector<uint8_t> tokenIn, tokenOut;
2910 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2911 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2912 &tokenOut);
2913 EXPECT_TRUE(tokenOut.empty());
2914 getTransformedCacheToken(model, devices, "deviceB", tokenIn,
2915 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2916 &tokenOut);
2917 EXPECT_TRUE(tokenOut.empty());
2918 }
2919
2920 // Test if the runtime maps to different cache tokens for devices with different names in
2921 // execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentDeviceNamesCompoundBody)2922 TEST_F(CacheTest, CacheTokenDifferentDeviceNamesCompoundBody) {
2923 PartitioningModel model;
2924 createModelForCachingTests(&model);
2925
2926 // DeviceA executes the first operation only.
2927 const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}});
2928 // DeviceB executes the first operation only.
2929 const auto devices2 = makeDevices({{"deviceB", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}});
2930
2931 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2932 std::vector<uint8_t> deviceAToken, deviceBToken;
2933 getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2934 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2935 &deviceAToken);
2936 getTransformedCacheToken(model, devices2, "deviceB", tokenIn,
2937 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2938 &deviceBToken);
2939 expectUniqueTokens({deviceAToken, deviceBToken});
2940 }
2941
2942 // Test if the runtime maps to different cache tokens for devices with different names in
2943 // execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentDeviceVersionStringsCompoundBody)2944 TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsCompoundBody) {
2945 PartitioningModel model;
2946 createModelForCachingTests(&model);
2947
2948 // DeviceA executes the first operation only.
2949 const auto devices1 = makeDevices({{"deviceA", "1.0", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2950 // DeviceB executes the first operation only.
2951 const auto devices2 = makeDevices({{"deviceA", "1.1", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2952
2953 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2954 std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token;
2955 getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2956 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2957 &deviceA_1_0_Token);
2958 getTransformedCacheToken(model, devices2, "deviceA", tokenIn,
2959 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2960 &deviceA_1_1_Token);
2961 expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token});
2962 }
2963
2964 // Test if the runtime maps to different cache tokens for compilations with different preferences
2965 // in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPreferencesCompoundBody)2966 TEST_F(CacheTest, CacheTokenDifferentPreferencesCompoundBody) {
2967 PartitioningModel model;
2968 createModelForCachingTests(&model);
2969
2970 // DeviceA executes the first operation only.
2971 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2972
2973 std::vector<uint8_t> fastToken, powerToken, sustainedToken;
2974 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2975 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2976 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2977 &fastToken);
2978 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2979 ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT,
2980 &powerToken);
2981 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2982 ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT,
2983 &sustainedToken);
2984 expectUniqueTokens({fastToken, powerToken, sustainedToken});
2985 }
2986
2987 // Test if the runtime maps to different cache tokens for compilations with different priorities
2988 // in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPrioritiesCompoundBody)2989 TEST_F(CacheTest, CacheTokenDifferentPrioritiesCompoundBody) {
2990 PartitioningModel model;
2991 createModelForCachingTests(&model);
2992
2993 // DeviceA executes the first operation only.
2994 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2995
2996 std::vector<uint8_t> lowToken, mediumToken, highToken;
2997 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2998 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2999 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW,
3000 &lowToken);
3001 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
3002 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM,
3003 &mediumToken);
3004 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
3005 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH,
3006 &highToken);
3007 expectUniqueTokens({lowToken, mediumToken, highToken});
3008 }
3009
3010 // Test if the runtime maps to different cache tokens for compilations with different tokens
3011 // provided by application in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentTokensCompoundBody)3012 TEST_F(CacheTest, CacheTokenDifferentTokensCompoundBody) {
3013 PartitioningModel model;
3014 createModelForCachingTests(&model);
3015
3016 // DeviceA executes the first operation only.
3017 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
3018
3019 std::vector<uint8_t> tokenOut1, tokenOut2;
3020 std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
3021 std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1);
3022 getTransformedCacheToken(model, devices, "deviceA", tokenIn1,
3023 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3024 &tokenOut1);
3025 getTransformedCacheToken(model, devices, "deviceA", tokenIn2,
3026 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3027 &tokenOut2);
3028 expectUniqueTokens({tokenOut1, tokenOut2});
3029 }
3030
3031 // Test if the runtime maps to different cache tokens for compilations with different partitioning
3032 // outcome in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPartitionsCompoundBody)3033 TEST_F(CacheTest, CacheTokenDifferentPartitionsCompoundBody) {
3034 PartitioningModel model;
3035 createModelForCachingTests(&model);
3036
3037 // DeviceA executes the whole model.
3038 const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 0U}});
3039 // DeviceA executes the first operation only.
3040 const auto devices2 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
3041 // DeviceA executes the second operation only.
3042 const auto devices3 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 0}});
3043
3044 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
3045 std::vector<uint8_t> tokenOut1, tokenOut2, tokenOut3;
3046 getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
3047 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3048 &tokenOut1);
3049 getTransformedCacheToken(model, devices2, "deviceA", tokenIn,
3050 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3051 &tokenOut2);
3052 getTransformedCacheToken(model, devices3, "deviceA", tokenIn,
3053 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3054 &tokenOut3);
3055 expectUniqueTokens({tokenOut1, tokenOut2, tokenOut3});
3056 }
3057
3058 // Test if the runtime maps different referenced models to different cache tokens.
TEST_F(CacheTest,CacheTokenDifferentReferenceModelPartitions)3059 TEST_F(CacheTest, CacheTokenDifferentReferenceModelPartitions) {
3060 std::vector<std::unique_ptr<PartitioningModel>> models;
3061 createControlFlowModelForCachingTests(&models);
3062 const auto& main = *models[0];
3063
3064 // DeviceA executes the two referenced models but does not support IF.
3065 // There will be two partitions on deviceA.
3066 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}});
3067
3068 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
3069 std::vector<uint8_t> tokenOut1, tokenOut2;
3070 getTransformedCacheToken(main, devices, "deviceA", tokenIn,
3071 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3072 &tokenOut1, /*devicePartitionIndex=*/0);
3073 getTransformedCacheToken(main, devices, "deviceA", tokenIn,
3074 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3075 &tokenOut2, /*devicePartitionIndex=*/1);
3076 expectUniqueTokens({tokenOut1, tokenOut2});
3077 }
3078
3079 // Very basic tests of some of the PerformanceInfo functionality.
3080 // Placed in this file because partitioning is the consumer of this functionality.
3081 class PerfTest : public ::testing::Test {};
3082
TEST_F(PerfTest,Lookup)3083 TEST_F(PerfTest, Lookup) {
3084 // Derive an arbitrary (but reproducible) performance value from an OperandType.
3085 // We'll use this to ensure that we can save and then recover a type's performance.
3086 auto typePerf = [](V1_3::OperandType type) { return float(static_cast<uint32_t>(type)); };
3087
3088 V1_3::Capabilities capabilities = ::android::nn::makeCapabilities(-1.0f);
3089
3090 for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN);
3091 type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
3092 V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3093 update(&capabilities, operandType, typePerf(operandType));
3094 }
3095 for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN);
3096 type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) {
3097 V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3098 update(&capabilities, operandType, typePerf(operandType));
3099 }
3100
3101 // Make sure lookup retrieves the values stored by update
3102
3103 for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN);
3104 type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
3105 V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3106 if (operandType == V1_3::OperandType::SUBGRAPH) {
3107 // SUBGRAPH capabilities are handled differently.
3108 continue;
3109 }
3110 SCOPED_TRACE(toString(operandType));
3111 EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType));
3112 }
3113 for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN);
3114 type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) {
3115 V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3116 SCOPED_TRACE(toString(operandType));
3117 EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType));
3118 }
3119
3120 // Check the behavior of a missing type
3121
3122 V1_3::OperandType operandType = static_cast<V1_3::OperandType>(
3123 static_cast<uint32_t>(V1_3::OperandTypeRange::BASE_MAX) + 1);
3124 EXPECT_EQ(lookupExecTime(capabilities, operandType), FLT_MAX);
3125 }
3126
3127 class ControlFlowPartitioningTest : public PartitioningTest {
3128 protected:
3129 // opnd0 --> +-----+
3130 // | op0 | --> opnd2
3131 // opnd1 --> +-----+
createBranchOrBodyModel(Dimensioned dimensioned)3132 std::unique_ptr<PartitioningModel> createBranchOrBodyModel(Dimensioned dimensioned) {
3133 auto model = std::make_unique<PartitioningModel>();
3134 const uint32_t opnd0 = model->addFloatOperand(dimensioned);
3135 const uint32_t opnd1 = model->addFloatOperand(dimensioned);
3136 const uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1, dimensioned);
3137 model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
3138 model->finish();
3139 EXPECT_TRUE(model->isValid());
3140 return model;
3141 }
3142
3143 // opnd0 --> +-------+
3144 // | EQUAL | --> opnd2
3145 // opnd1 --> +-------+
createCondModel(Dimensioned dimensioned)3146 std::unique_ptr<PartitioningModel> createCondModel(Dimensioned dimensioned) {
3147 auto model = std::make_unique<PartitioningModel>();
3148 const uint32_t opnd0 = model->addFloatOperand(dimensioned);
3149 const uint32_t opnd1 = model->addFloatOperand(dimensioned);
3150 const uint32_t opnd2 = model->addExplicitOperationXTo1(
3151 ANEURALNETWORKS_EQUAL, {opnd0, opnd1}, WrapperType::TENSOR_BOOL8);
3152 model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
3153 model->finish();
3154 EXPECT_TRUE(model->isValid());
3155 return model;
3156 }
3157
3158 // opnd0 --> +----+
3159 // opnd1 --> | IF | --> opnd3
3160 // opnd2 --> +----+
createIfModel(Dimensioned dimensionedMain=Dimensioned::YES,Dimensioned dimensionedThen=Dimensioned::YES,Dimensioned dimensionedElse=Dimensioned::YES)3161 std::vector<std::unique_ptr<PartitioningModel>> createIfModel(
3162 Dimensioned dimensionedMain = Dimensioned::YES,
3163 Dimensioned dimensionedThen = Dimensioned::YES,
3164 Dimensioned dimensionedElse = Dimensioned::YES) {
3165 auto thenModel = createBranchOrBodyModel(dimensionedThen);
3166 auto elseModel = createBranchOrBodyModel(dimensionedElse);
3167
3168 auto mainModel = std::make_unique<PartitioningModel>();
3169 const uint32_t opnd0 = mainModel->addBooleanOperand();
3170 const uint32_t opnd1 = mainModel->addFloatOperand(dimensionedMain);
3171 const uint32_t opnd2 = mainModel->addFloatOperand(dimensionedMain);
3172 const uint32_t opnd3 = mainModel->addFloatOperand(dimensionedMain);
3173 mainModel->addIfOperation(opnd0, *thenModel, *elseModel, {opnd1, opnd2}, {opnd3});
3174 mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3});
3175 mainModel->finish();
3176 EXPECT_TRUE(mainModel->isValid());
3177
3178 std::vector<std::unique_ptr<PartitioningModel>> models;
3179 models.push_back(std::move(mainModel));
3180 models.push_back(std::move(thenModel));
3181 models.push_back(std::move(elseModel));
3182 return std::move(models);
3183 }
3184
3185 // opnd0 --> +-------+
3186 // | WHILE | --> opnd2
3187 // opnd1 --> +-------+
createWhileModel(Dimensioned dimensionedMain=Dimensioned::YES,Dimensioned dimensionedCond=Dimensioned::YES,Dimensioned dimensionedBody=Dimensioned::YES)3188 std::vector<std::unique_ptr<PartitioningModel>> createWhileModel(
3189 Dimensioned dimensionedMain = Dimensioned::YES,
3190 Dimensioned dimensionedCond = Dimensioned::YES,
3191 Dimensioned dimensionedBody = Dimensioned::YES) {
3192 auto condModel = createCondModel(dimensionedCond);
3193 auto bodyModel = createBranchOrBodyModel(dimensionedBody);
3194
3195 auto mainModel = std::make_unique<PartitioningModel>();
3196 const uint32_t opnd0 = mainModel->addFloatOperand(dimensionedMain);
3197 const uint32_t opnd1 = mainModel->addFloatOperand(dimensionedMain);
3198 const uint32_t opnd2 = mainModel->addFloatOperand(dimensionedMain);
3199 mainModel->addWhileOperation(*condModel, *bodyModel, {opnd0, opnd1}, {opnd2});
3200 mainModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
3201 mainModel->finish();
3202 EXPECT_TRUE(mainModel->isValid());
3203
3204 std::vector<std::unique_ptr<PartitioningModel>> models;
3205 models.push_back(std::move(mainModel));
3206 models.push_back(std::move(condModel));
3207 models.push_back(std::move(bodyModel));
3208 return std::move(models);
3209 }
3210
3211 void testIfUnknownSize(Dimensioned dimensionedMain, Dimensioned dimensionedThen,
3212 Dimensioned dimensionedElse);
3213 void testWhileUnknownSize(Dimensioned dimensionedMain, Dimensioned dimensionedThen,
3214 Dimensioned dimensionedElse);
3215 };
3216
TEST_F(ControlFlowPartitioningTest,IF_Interpreted)3217 TEST_F(ControlFlowPartitioningTest, IF_Interpreted) {
3218 const auto models = createIfModel();
3219
3220 // The device supports the referenced models but does not support IF.
3221 const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}});
3222
3223 ExecutionPlan plan;
3224 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3225 ExecutePriority::DEFAULT, {}, &plan),
3226 ANEURALNETWORKS_NO_ERROR);
3227 checkExecutionPlanSteps(plan, {kIfStep, "V1_0", kGotoStep, "V1_0"});
3228 }
3229
TEST_F(ControlFlowPartitioningTest,WHILE_Interpreted)3230 TEST_F(ControlFlowPartitioningTest, WHILE_Interpreted) {
3231 const auto models = createWhileModel();
3232
3233 // The device supports the body model but does not support WHILE or the
3234 // condition model (because of EQUAL).
3235 const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}});
3236
3237 ExecutionPlan plan;
3238 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3239 ExecutePriority::DEFAULT, {}, &plan),
3240 ANEURALNETWORKS_NO_ERROR);
3241 const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName();
3242 checkExecutionPlanSteps(plan, {kWhileStep, cpuDeviceName, kGotoStep, "V1_0", kGotoStep});
3243 }
3244
TEST_F(ControlFlowPartitioningTest,IF_SimplePlan)3245 TEST_F(ControlFlowPartitioningTest, IF_SimplePlan) {
3246 const auto models = createIfModel();
3247
3248 // The device supports all operations.
3249 const auto devices = makeDevices({{"ALL",
3250 0.9,
3251 ~0U,
3252 PartitioningDriver::OEMNo,
3253 HalVersion::LATEST,
3254 {V1_3::OperationType::IF}}});
3255
3256 ExecutionPlan plan;
3257 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3258 ExecutePriority::DEFAULT, {}, &plan),
3259 ANEURALNETWORKS_NO_ERROR);
3260 checkExecutionPlanSteps(plan, {"ALL"});
3261 }
3262
TEST_F(ControlFlowPartitioningTest,WHILE_SimplePlan)3263 TEST_F(ControlFlowPartitioningTest, WHILE_SimplePlan) {
3264 const auto models = createWhileModel();
3265
3266 // The device supports all operations.
3267 const auto devices = makeDevices({{"ALL",
3268 0.9,
3269 ~0U,
3270 PartitioningDriver::OEMNo,
3271 HalVersion::LATEST,
3272 {V1_3::OperationType::WHILE, V1_3::OperationType::EQUAL}}});
3273
3274 ExecutionPlan plan;
3275 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3276 ExecutePriority::DEFAULT, {}, &plan),
3277 ANEURALNETWORKS_NO_ERROR);
3278 checkExecutionPlanSteps(plan, {"ALL"});
3279 }
3280
testIfUnknownSize(Dimensioned dimensionedMain,Dimensioned dimensionedThen,Dimensioned dimensionedElse)3281 void ControlFlowPartitioningTest::testIfUnknownSize(Dimensioned dimensionedMain,
3282 Dimensioned dimensionedThen,
3283 Dimensioned dimensionedElse) {
3284 if (dimensionedMain != Dimensioned::NO && dimensionedThen != Dimensioned::NO &&
3285 dimensionedElse != Dimensioned::NO) {
3286 // No unknown size.
3287 return;
3288 }
3289
3290 const auto models = createIfModel(dimensionedMain, dimensionedThen, dimensionedElse);
3291
3292 // The device supports all operations but the partitioner ignores its IF
3293 // support due to http://b/159076604#comment5.
3294 const auto devices = makeDevices({{"ALL",
3295 0.9,
3296 ~0U,
3297 PartitioningDriver::OEMNo,
3298 HalVersion::LATEST,
3299 {V1_3::OperationType::IF}}});
3300
3301 ExecutionPlan plan;
3302 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3303 ExecutePriority::DEFAULT, {}, &plan),
3304 ANEURALNETWORKS_NO_ERROR);
3305 // The control flow interpreter does not support unknown size (b/132458982).
3306 checkExecutionPlanSteps(plan, {DeviceManager::getCpuDevice()->getName()});
3307 }
3308
TEST_F(ControlFlowPartitioningTest,IF_UnknownSize)3309 TEST_F(ControlFlowPartitioningTest, IF_UnknownSize) {
3310 const std::vector<Dimensioned> configurations = {Dimensioned::NO, Dimensioned::YES};
3311 for (Dimensioned dimensionedMain : configurations) {
3312 SCOPED_TRACE(testing::Message() << "dimensionedMain: " << toString(dimensionedMain));
3313 for (Dimensioned dimensionedThen : configurations) {
3314 SCOPED_TRACE(testing::Message() << "dimensionedThen: " << toString(dimensionedThen));
3315 for (Dimensioned dimensionedElse : configurations) {
3316 SCOPED_TRACE(testing::Message()
3317 << "dimensionedElse: " << toString(dimensionedElse));
3318 testIfUnknownSize(dimensionedMain, dimensionedThen, dimensionedElse);
3319 }
3320 }
3321 }
3322 }
3323
testWhileUnknownSize(Dimensioned dimensionedMain,Dimensioned dimensionedCond,Dimensioned dimensionedBody)3324 void ControlFlowPartitioningTest::testWhileUnknownSize(Dimensioned dimensionedMain,
3325 Dimensioned dimensionedCond,
3326 Dimensioned dimensionedBody) {
3327 if (dimensionedMain != Dimensioned::NO && dimensionedCond != Dimensioned::NO &&
3328 dimensionedBody != Dimensioned::NO) {
3329 // No unknown size.
3330 return;
3331 }
3332
3333 const auto models = createWhileModel(dimensionedMain, dimensionedCond, dimensionedBody);
3334
3335 // The device supports all operations but the partitioner ignores its WHILE
3336 // support due to http://b/159076604#comment5.
3337 const auto devices = makeDevices({{"ALL",
3338 0.9,
3339 ~0U,
3340 PartitioningDriver::OEMNo,
3341 HalVersion::LATEST,
3342 {V1_3::OperationType::WHILE, V1_3::OperationType::EQUAL}}});
3343
3344 ExecutionPlan plan;
3345 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3346 ExecutePriority::DEFAULT, {}, &plan),
3347 ANEURALNETWORKS_NO_ERROR);
3348 // The control flow interpreter does not support unknown size (b/132458982).
3349 checkExecutionPlanSteps(plan, {DeviceManager::getCpuDevice()->getName()});
3350 }
3351
TEST_F(ControlFlowPartitioningTest,WHILE_UnknownSize)3352 TEST_F(ControlFlowPartitioningTest, WHILE_UnknownSize) {
3353 const std::vector<Dimensioned> configurations = {Dimensioned::NO, Dimensioned::YES};
3354 for (Dimensioned dimensionedMain : configurations) {
3355 SCOPED_TRACE(testing::Message() << "dimensionedMain: " << toString(dimensionedMain));
3356 for (Dimensioned dimensionedCond : configurations) {
3357 SCOPED_TRACE(testing::Message() << "dimensionedCond: " << toString(dimensionedCond));
3358 for (Dimensioned dimensionedBody : configurations) {
3359 SCOPED_TRACE(testing::Message()
3360 << "dimensionedBody: " << toString(dimensionedBody));
3361 testWhileUnknownSize(dimensionedMain, dimensionedCond, dimensionedBody);
3362 }
3363 }
3364 }
3365 }
3366
3367 // Test the memory step role analysis of the partitioning implementation.
3368 class MemoryStepRoleTest : public PartitioningTest {
3369 protected:
3370 // A tuple of {device_name, input/output}
3371 using TestStepRole = std::tuple<std::string, IOType>;
3372
SetUp()3373 void SetUp() override {
3374 PartitioningTest::SetUp();
3375 mModel = std::make_unique<PartitioningModel>();
3376 }
3377
toString(SourceOperandIndex index)3378 static std::string toString(SourceOperandIndex index) {
3379 return "{" + std::to_string(index.first) + ", " + std::to_string(index.second) + "}";
3380 }
3381
toString(const std::set<TestStepRole> & roles)3382 static std::string toString(const std::set<TestStepRole>& roles) {
3383 std::stringstream ss;
3384 ss << "[ ";
3385 for (const auto& [deviceName, type] : roles) {
3386 ss << "{" << deviceName << ", " << (type == IOType::INPUT ? "INPUT" : "OUTPUT") << "} ";
3387 }
3388 ss << "]";
3389 return ss.str();
3390 }
3391
finishAndPartitionModelForDevices(const std::vector<std::shared_ptr<Device>> & devices)3392 void finishAndPartitionModelForDevices(const std::vector<std::shared_ptr<Device>>& devices) {
3393 mModel->finish();
3394 ASSERT_TRUE(mModel->isValid());
3395 ASSERT_EQ(mModel->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3396 ExecutePriority::DEFAULT, {}, &mPlan),
3397 ANEURALNETWORKS_NO_ERROR);
3398 }
3399
checkStepRolesOfInput(uint32_t index,const std::set<TestStepRole> & expected) const3400 void checkStepRolesOfInput(uint32_t index, const std::set<TestStepRole>& expected) const {
3401 SCOPED_TRACE("Input: " + std::to_string(index));
3402 std::set<TestStepRole> actual;
3403 mPlan.forEachStepRoleOfInput(
3404 index, [&actual](const auto* preparedModel, IOType type, uint32_t) {
3405 actual.emplace(preparedModel->getDevice()->getName(), type);
3406 });
3407 EXPECT_TRUE(expected == actual)
3408 << "expected: " << toString(expected) << ", actual: " << toString(actual);
3409 }
3410
checkStepRolesOfOutput(uint32_t index,const std::set<TestStepRole> & expected) const3411 void checkStepRolesOfOutput(uint32_t index, const std::set<TestStepRole>& expected) const {
3412 SCOPED_TRACE("Output: " + std::to_string(index));
3413 std::set<TestStepRole> actual;
3414 mPlan.forEachStepRoleOfOutput(
3415 index, [&actual](const auto* preparedModel, IOType type, uint32_t) {
3416 actual.emplace(preparedModel->getDevice()->getName(), type);
3417 });
3418 EXPECT_TRUE(expected == actual)
3419 << "expected: " << toString(expected) << ", actual: " << toString(actual);
3420 }
3421
checkStepRolesOfSourceOperand(SourceOperandIndex index,const std::set<TestStepRole> & expected) const3422 void checkStepRolesOfSourceOperand(SourceOperandIndex index,
3423 const std::set<TestStepRole>& expected) const {
3424 SCOPED_TRACE("SourceOperandIndex: " + toString(index));
3425 std::set<TestStepRole> actual;
3426 mPlan.forTest_compoundForEachStepRoleOfSourceOperand(
3427 index, [&actual](const auto* preparedModel, IOType type, uint32_t) {
3428 actual.emplace(preparedModel->getDevice()->getName(), type);
3429 });
3430 EXPECT_TRUE(expected == actual)
3431 << "expected: " << toString(expected) << ", actual: " << toString(actual);
3432 }
3433
3434 std::unique_ptr<PartitioningModel> mModel;
3435 ExecutionPlan mPlan;
3436 };
3437
3438 // Test a graph with 3 operations, each operation in a separate partition:
3439 // opnd2 = OP0(opnd0, opnd1)
3440 // opnd4 = OP1(opnd1, opnd3)
3441 // opnd5 = OP2(opnd2, opnd4)
TEST_F(MemoryStepRoleTest,NoControlFlow)3442 TEST_F(MemoryStepRoleTest, NoControlFlow) {
3443 const uint32_t opnd0 = mModel->addFloatOperand();
3444 const uint32_t opnd1 = mModel->addFloatOperand();
3445 const uint32_t opnd2 = mModel->addOperation2To1V1_0(0, opnd0, opnd1);
3446 const uint32_t opnd3 = mModel->addFloatOperand();
3447 const uint32_t opnd4 = mModel->addOperation2To1V1_0(1, opnd1, opnd3);
3448 const uint32_t opnd5 = mModel->addOperation2To1V1_0(2, opnd2, opnd4);
3449 mModel->identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd2, opnd5});
3450
3451 // This will result in 3 partitions:
3452 // deviceA handles op0, deviceB handles op1, deviceC handles op2.
3453 const auto devices = makeDevices(
3454 {{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}, {"deviceC", 0.5, 1 << 2}});
3455 finishAndPartitionModelForDevices(devices);
3456 checkExecutionPlanSteps(mPlan, {"deviceB", "deviceA", "deviceC"});
3457
3458 // Check the step roles of the main model inputs and outputs:
3459 //
3460 // input0 and input2 are each exclusive for a single partition.
3461 checkStepRolesOfInput(0, {{"deviceA", IOType::INPUT}});
3462 checkStepRolesOfInput(2, {{"deviceB", IOType::INPUT}});
3463 // input1 is shared by two operations in different partitions.
3464 checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3465 // output0 is a model output that is a downstream input.
3466 checkStepRolesOfOutput(0, {{"deviceA", IOType::OUTPUT}, {"deviceC", IOType::INPUT}});
3467 // output1 is only used in a single partition.
3468 checkStepRolesOfOutput(1, {{"deviceC", IOType::OUTPUT}});
3469
3470 // Check the step roles of the partition boundary temporaries that we will allocate memory on
3471 // behalf of (see ExecutionPlan::makeController for the allocation logic):
3472 //
3473 // opnd4 is a partition boundary temporary.
3474 checkStepRolesOfSourceOperand({0, opnd4},
3475 {{"deviceB", IOType::OUTPUT}, {"deviceC", IOType::INPUT}});
3476 }
3477
3478 // Test a graph with an interpreted IF operation.
TEST_F(MemoryStepRoleTest,InterpretedIf)3479 TEST_F(MemoryStepRoleTest, InterpretedIf) {
3480 auto thenModel = std::make_unique<PartitioningModel>();
3481 const uint32_t thenOpnd0 = thenModel->addFloatOperand();
3482 const uint32_t thenOpnd1 = thenModel->addFloatOperand();
3483 const uint32_t thenOpnd2 = thenModel->addOperation2To1V1_0(0, thenOpnd0, thenOpnd1);
3484 thenModel->identifyInputsAndOutputs({thenOpnd0, thenOpnd1}, {thenOpnd2});
3485 thenModel->finish();
3486 EXPECT_TRUE(thenModel->isValid());
3487
3488 auto elseModel = std::make_unique<PartitioningModel>();
3489 const uint32_t elseOpnd0 = elseModel->addFloatOperand();
3490 const uint32_t elseOpnd1 = elseModel->addFloatOperand();
3491 const uint32_t elseOpnd2 = elseModel->addOperation2To1V1_0(1, elseOpnd0, elseOpnd1);
3492 elseModel->identifyInputsAndOutputs({elseOpnd0, elseOpnd1}, {elseOpnd2});
3493 elseModel->finish();
3494 EXPECT_TRUE(elseModel->isValid());
3495
3496 const uint32_t mainOpnd0 = mModel->addBooleanOperand();
3497 const uint32_t mainOpnd1 = mModel->addFloatOperand();
3498 const uint32_t mainOpnd2 = mModel->addFloatOperand();
3499 const uint32_t mainOpnd3 = mModel->addFloatOperand();
3500 mModel->addIfOperation(mainOpnd0, *thenModel, *elseModel, {mainOpnd1, mainOpnd2}, {mainOpnd3});
3501 mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3});
3502
3503 // deviceA handles op0, deviceB handles op1.
3504 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
3505 finishAndPartitionModelForDevices(devices);
3506 checkExecutionPlanSteps(mPlan, {kIfStep, "deviceA", kGotoStep, "deviceB"});
3507
3508 // Check the step roles of the main model inputs and outputs:
3509 //
3510 // input0 is a condition operand of the interpreted IF that will only be read by the runtime.
3511 checkStepRolesOfInput(0, {});
3512 // input1 and input2 are outer inputs of the interpreted IF. The memories may be directly used
3513 // by the input operands of the then and else model.
3514 checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3515 checkStepRolesOfInput(2, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3516 // output0 is the outer output of the interpreted IF. The memory may be directly
3517 // used by the output operands of the then and else model.
3518 checkStepRolesOfOutput(0, {{"deviceA", IOType::OUTPUT}, {"deviceB", IOType::OUTPUT}});
3519
3520 // There is no partition boundary temporary in this model that we will allocate memory on
3521 // behalf of (see ExecutionPlan::makeController for the allocation logic).
3522 }
3523
3524 // Test a graph with an interpreted WHILE operation.
TEST_F(MemoryStepRoleTest,InterpretedWhile)3525 TEST_F(MemoryStepRoleTest, InterpretedWhile) {
3526 // Condition model:
3527 // condOpnd3 = OP0(condOpnd0, condOpnd1)
3528 // condOpnd4 = EQUAL(condOpnd2, condOpnd3)
3529 auto condModel = std::make_unique<PartitioningModel>();
3530 const uint32_t condOpnd0 = condModel->addFloatOperand();
3531 const uint32_t condOpnd1 = condModel->addFloatOperand();
3532 const uint32_t condOpnd2 = condModel->addFloatOperand();
3533 const uint32_t condOpnd3 = condModel->addOperation2To1V1_0(0, condOpnd0, condOpnd1);
3534 const uint32_t condOpnd4 = condModel->addExplicitOperationXTo1(
3535 ANEURALNETWORKS_EQUAL, {condOpnd2, condOpnd3}, WrapperType::TENSOR_BOOL8);
3536 condModel->identifyInputsAndOutputs({condOpnd0, condOpnd1, condOpnd2}, {condOpnd4});
3537 condModel->finish();
3538 EXPECT_TRUE(condModel->isValid());
3539
3540 // Body model:
3541 // bodyOpnd3 = OP1(bodyOpnd0, bodyOpnd1)
3542 // bodyOpnd4 = OP1(bodyOpnd0, bodyOpnd2)
3543 auto bodyModel = std::make_unique<PartitioningModel>();
3544 const uint32_t bodyOpnd0 = bodyModel->addFloatOperand();
3545 const uint32_t bodyOpnd1 = bodyModel->addFloatOperand();
3546 const uint32_t bodyOpnd2 = bodyModel->addFloatOperand();
3547 const uint32_t bodyOpnd3 = bodyModel->addOperation2To1V1_0(1, bodyOpnd0, bodyOpnd1);
3548 const uint32_t bodyOpnd4 = bodyModel->addOperation2To1V1_0(1, bodyOpnd0, bodyOpnd2);
3549 bodyModel->identifyInputsAndOutputs({bodyOpnd0, bodyOpnd1, bodyOpnd2}, {bodyOpnd3, bodyOpnd4});
3550 bodyModel->finish();
3551 EXPECT_TRUE(bodyModel->isValid());
3552
3553 const uint32_t mainOpnd0 = mModel->addFloatOperand();
3554 const uint32_t mainOpnd1 = mModel->addFloatOperand();
3555 const uint32_t mainOpnd2 = mModel->addFloatOperand();
3556 const uint32_t mainOpnd3 = mModel->addFloatOperand();
3557 mModel->addWhileOperation(*condModel, *bodyModel, {mainOpnd0, mainOpnd1, mainOpnd2},
3558 {mainOpnd3});
3559 mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3});
3560
3561 // deviceA handles the cond model, deviceB handles the body model.
3562 const auto devices = makeDevices({{"deviceA",
3563 0.8,
3564 ~0U,
3565 PartitioningDriver::OEMNo,
3566 HalVersion::LATEST,
3567 {V1_3::OperationType::EQUAL}},
3568 {"deviceB", 0.5, 1 << 1}});
3569 finishAndPartitionModelForDevices(devices);
3570 checkExecutionPlanSteps(mPlan, {kWhileStep, "deviceA", kGotoStep, "deviceB", kGotoStep});
3571
3572 // The subgraph indexes of the condition and body models of the WHILE operation.
3573 const uint32_t condModelIndex = 1;
3574 const uint32_t bodyModelIndex = 2;
3575
3576 // Check the step roles of the main model inputs and outputs:
3577 //
3578 // input0 (input-output), input1 (state-only), and input2 (input-only) are outer inputs of the
3579 // interpreted WHILE. The memories may be directly used by the input operands of the condition
3580 // and body models.
3581 checkStepRolesOfInput(0, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3582 checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3583 checkStepRolesOfInput(2, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3584 // output0 is an outer output of the interpreted WHILE that will only be written by the runtime.
3585 checkStepRolesOfOutput(0, {});
3586
3587 // Check the step roles of the partition boundary temporaries that we will allocate memory on
3588 // behalf of (see ExecutionPlan::makeController for the allocation logic):
3589 //
3590 // condOpnd4 is output of the interpreted WHILE condition model.
3591 checkStepRolesOfSourceOperand({condModelIndex, condOpnd4}, {{"deviceA", IOType::OUTPUT}});
3592 // bodyOpnd3 (input-output) and bodyOpnd4 (state-only) are outputs of the interpreted WHILE body
3593 // model. The memories may be directly used by the input operands of the condition and body
3594 // models.
3595 checkStepRolesOfSourceOperand(
3596 {bodyModelIndex, bodyOpnd3},
3597 {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceB", IOType::OUTPUT}});
3598 checkStepRolesOfSourceOperand(
3599 {bodyModelIndex, bodyOpnd4},
3600 {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceB", IOType::OUTPUT}});
3601 }
3602
3603 // Test a graph with nested interpreted control flow operations: a WHILE operation with IF operation
3604 // in the body model.
TEST_F(MemoryStepRoleTest,NestedInterpretedControlFlow)3605 TEST_F(MemoryStepRoleTest, NestedInterpretedControlFlow) {
3606 auto condModel = std::make_unique<PartitioningModel>();
3607 const uint32_t condOpnd0 = condModel->addFloatOperand();
3608 const uint32_t condOpnd1 = condModel->addFloatOperand();
3609 const uint32_t condOpnd2 = condModel->addBooleanOperand();
3610 const uint32_t condOpnd3 = condModel->addExplicitOperationXTo1(
3611 ANEURALNETWORKS_EQUAL, {condOpnd0, condOpnd1}, WrapperType::TENSOR_BOOL8);
3612 condModel->identifyInputsAndOutputs({condOpnd0, condOpnd1, condOpnd2}, {condOpnd3});
3613 condModel->finish();
3614 EXPECT_TRUE(condModel->isValid());
3615
3616 auto thenModel = std::make_unique<PartitioningModel>();
3617 const uint32_t thenOpnd0 = thenModel->addFloatOperand();
3618 const uint32_t thenOpnd1 = thenModel->addFloatOperand();
3619 const uint32_t thenOpnd2 = thenModel->addOperation2To1V1_0(0, thenOpnd0, thenOpnd1);
3620 thenModel->identifyInputsAndOutputs({thenOpnd0, thenOpnd1}, {thenOpnd2});
3621 thenModel->finish();
3622 EXPECT_TRUE(thenModel->isValid());
3623
3624 auto elseModel = std::make_unique<PartitioningModel>();
3625 const uint32_t elseOpnd0 = elseModel->addFloatOperand();
3626 const uint32_t elseOpnd1 = elseModel->addFloatOperand();
3627 const uint32_t elseOpnd2 = elseModel->addOperation2To1V1_0(1, elseOpnd0, elseOpnd1);
3628 elseModel->identifyInputsAndOutputs({elseOpnd0, elseOpnd1}, {elseOpnd2});
3629 elseModel->finish();
3630 EXPECT_TRUE(elseModel->isValid());
3631
3632 auto bodyModel = std::make_unique<PartitioningModel>();
3633 const uint32_t bodyOpnd0 = bodyModel->addFloatOperand();
3634 const uint32_t bodyOpnd1 = bodyModel->addFloatOperand();
3635 const uint32_t bodyOpnd2 = bodyModel->addBooleanOperand();
3636 const uint32_t bodyOpnd3 = bodyModel->addFloatOperand();
3637 bodyModel->addIfOperation(bodyOpnd2, *thenModel, *elseModel, {bodyOpnd0, bodyOpnd1},
3638 {bodyOpnd3});
3639 bodyModel->identifyInputsAndOutputs({bodyOpnd0, bodyOpnd1, bodyOpnd2}, {bodyOpnd3});
3640 bodyModel->finish();
3641 EXPECT_TRUE(bodyModel->isValid());
3642
3643 const uint32_t mainOpnd0 = mModel->addFloatOperand();
3644 const uint32_t mainOpnd1 = mModel->addFloatOperand();
3645 const uint32_t mainOpnd2 = mModel->addBooleanOperand();
3646 const uint32_t mainOpnd3 = mModel->addFloatOperand();
3647 mModel->addWhileOperation(*condModel, *bodyModel, {mainOpnd0, mainOpnd1, mainOpnd2},
3648 {mainOpnd3});
3649 mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3});
3650
3651 // deviceA handles the cond model, deviceB handles the then model,
3652 // deviceC handles the else model.
3653 const auto devices = makeDevices({{"deviceA",
3654 0.8,
3655 ~0U,
3656 PartitioningDriver::OEMNo,
3657 HalVersion::LATEST,
3658 {V1_3::OperationType::EQUAL}},
3659 {"deviceB", 0.5, 1 << 0},
3660 {"deviceC", 0.5, 1 << 1}});
3661 finishAndPartitionModelForDevices(devices);
3662 checkExecutionPlanSteps(mPlan, {kWhileStep, "deviceA", kGotoStep, kIfStep, "deviceB", kGotoStep,
3663 "deviceC", kGotoStep});
3664
3665 // The subgraph indexes of the condition and body models of the WHILE operation.
3666 const uint32_t condModelIndex = 1;
3667 const uint32_t bodyModelIndex = 2;
3668
3669 // Check the step roles of the main model inputs and outputs:
3670 //
3671 // input0 and input1 are outer inputs of the interpreted WHILE. The memories may be directly
3672 // used by the input operands of the condition and body models, and then be directly used by the
3673 // input operands of the then and else model of the interpreted IF in the body model.
3674 checkStepRolesOfInput(
3675 0,
3676 {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceC", IOType::INPUT}});
3677 checkStepRolesOfInput(
3678 1,
3679 {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceC", IOType::INPUT}});
3680 // input2 is also an outer input of the interpreted WHILE. The memory has no step role in the
3681 // condition model. In the body model, the memory will be used by the condition operand of the
3682 // interpreted IF that will only be read by the runtime.
3683 checkStepRolesOfInput(2, {});
3684 // output0 is an outer output of the interpreted WHILE that will only be written by the runtime.
3685 checkStepRolesOfOutput(0, {});
3686
3687 // Check the step roles of the partition boundary temporaries that we will allocate memory on
3688 // behalf of (see ExecutionPlan::makeController for the allocation logic):
3689 //
3690 // condOpnd2 is output of the interpreted WHILE condition model.
3691 checkStepRolesOfSourceOperand({condModelIndex, condOpnd3}, {{"deviceA", IOType::OUTPUT}});
3692 // bodyOpnd3 is output of the interpreted WHILE body model. The memories may be directly used by
3693 // the input operands of the condition and body models, and then be directly used by the
3694 // input operands of the then and else model of the interpreted IF in the body model.
3695 checkStepRolesOfSourceOperand({bodyModelIndex, bodyOpnd3}, {{"deviceA", IOType::INPUT},
3696 {"deviceB", IOType::INPUT},
3697 {"deviceB", IOType::OUTPUT},
3698 {"deviceC", IOType::INPUT},
3699 {"deviceC", IOType::OUTPUT}});
3700 }
3701
3702 } // namespace
3703