1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
16 #define TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
17 
18 #include <stddef.h>
19 #include <stdint.h>
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include <algorithm>
24 #include <cmath>
25 #include <complex>
26 #include <functional>
27 #include <initializer_list>
28 #include <limits>
29 #include <map>
30 #include <memory>
31 #include <string>
32 #include <tuple>
33 #include <type_traits>
34 #include <utility>
35 #include <vector>
36 
37 #include <gmock/gmock.h>
38 #include <gtest/gtest.h>
39 #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
40 #include "tensorflow/core/platform/logging.h"
41 #include "tensorflow/lite/core/api/op_resolver.h"
42 #include "tensorflow/lite/interpreter.h"
43 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
44 #include "tensorflow/lite/schema/schema_generated.h"
45 #include "tensorflow/lite/string_type.h"
46 #include "tensorflow/lite/string_util.h"
47 #include "tensorflow/lite/testing/util.h"  // IWYU pragma: keep
48 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
49 #include "tensorflow/lite/tools/optimize/sparsity/format_converter.h"
50 #include "tensorflow/lite/type_to_tflitetype.h"
51 
52 namespace tflite {
53 
54 // A gmock matcher that check that elements of a float vector match to a given
55 // tolerance.
56 std::vector<::testing::Matcher<float>> ArrayFloatNear(
57     const std::vector<float>& values, float max_abs_error = 1e-5);
58 
59 // A gmock matcher that check that elements of a complex vector match to a given
60 // tolerance.
61 std::vector<::testing::Matcher<std::complex<float>>> ArrayComplex64Near(
62     const std::vector<std::complex<float>>& values, float max_abs_error = 1e-5);
63 
64 template <typename T>
Quantize(const std::vector<float> & data,float scale,int32_t zero_point)65 inline std::vector<T> Quantize(const std::vector<float>& data, float scale,
66                                int32_t zero_point) {
67   std::vector<T> q;
68   for (const auto& f : data) {
69     q.push_back(static_cast<T>(std::max<float>(
70         std::numeric_limits<T>::min(),
71         std::min<float>(std::numeric_limits<T>::max(),
72                         std::round(zero_point + (f / scale))))));
73   }
74   return q;
75 }
76 
77 template <typename T>
Dequantize(const std::vector<T> & data,float scale,int32_t zero_point)78 inline std::vector<float> Dequantize(const std::vector<T>& data, float scale,
79                                      int32_t zero_point) {
80   std::vector<float> f;
81   f.reserve(data.size());
82   for (const T& q : data) {
83     f.push_back(scale * (q - zero_point));
84   }
85   return f;
86 }
87 
88 // A test model that contains a single operator. All operator inputs and
89 // output are external to the model, so the tests can directly access them.
90 // Typical usage:
91 //    SingleOpModel m;
92 //    int a = m.AddInput({TensorType_FLOAT32, a_shape});
93 //    int b = m.AddInput({TensorType_FLOAT32, b_shape});
94 //    int c = m.AddOutput({TensorType_FLOAT32, {}});
95 //    m.SetBuiltinOp(...);
96 //    m.BuildInterpreter({GetShape(a), GetShape(b)});
97 //    m.PopulateTensor(a, {...});
98 //    m.PopulateTensor(b, {...});
99 //    m.Invoke();
100 //    EXPECT_THAT(m.ExtractVector<float>(c), ArrayFloatNear({...}));
101 //
102 
103 // A helper struct to construct test tensors. This is particularly useful for
104 // quantized tensor which must have their scale and zero_point defined before
105 // the actual data is known. This mimics what happens in practice: quantization
106 // parameters are calculated during training or post training..
107 struct TensorData {
108   // NOLINTNEXTLINE
109   TensorData(TensorType type = TensorType_FLOAT32, std::vector<int> shape = {},
110              float min = 0.0f, float max = 0.0f, float scale = 0.0f,
111              int32_t zero_point = 0, bool per_channel_quantization = false,
112              std::vector<float> per_channel_quantization_scales = {},
113              std::vector<int64_t> per_channel_quantization_offsets = {},
114              int32_t channel_index = 0, std::vector<int> traversal_order = {},
115              std::vector<TfLiteDimensionType> format = {},
116              std::vector<int> block_size = {}, std::vector<int> block_map = {},
117              std::vector<int> shape_signature = {})
typeTensorData118       : type(type),
119         shape(shape),
120         min(min),
121         max(max),
122         scale(scale),
123         zero_point(zero_point),
124         per_channel_quantization(per_channel_quantization),
125         per_channel_quantization_scales(
126             std::move(per_channel_quantization_scales)),
127         per_channel_quantization_offsets(
128             std::move(per_channel_quantization_offsets)),
129         channel_index(channel_index),
130         traversal_order(traversal_order),
131         format(format),
132         block_size(block_size),
133         block_map(block_map),
134         shape_signature(shape_signature) {}
135   TensorType type;
136   std::vector<int> shape;
137   float min;
138   float max;
139   float scale;
140   int32_t zero_point;
141   bool per_channel_quantization;
142   std::vector<float> per_channel_quantization_scales;
143   std::vector<int64_t> per_channel_quantization_offsets;
144   int32_t channel_index;
145   std::vector<int> traversal_order;
146   std::vector<TfLiteDimensionType> format;
147   std::vector<int> block_size;
148   std::vector<int> block_map;
149   std::vector<int> shape_signature;
150 };
151 
152 class SingleOpResolver : public OpResolver {
153  public:
154   SingleOpResolver(const BuiltinOperator op, TfLiteRegistration* registration,
155                    int version = 1)
op_(op)156       : op_(op), registration_(*registration) {
157     registration_.builtin_code = static_cast<int32_t>(op);
158     registration_.version = version;
159   }
FindOp(BuiltinOperator op,int version)160   const TfLiteRegistration* FindOp(BuiltinOperator op,
161                                    int version) const override {
162     if (op == op_) {
163       return &registration_;
164     }
165     return nullptr;
166   }
FindOp(const char * op,int version)167   const TfLiteRegistration* FindOp(const char* op, int version) const override {
168     return nullptr;
169   }
170 
171  private:
172   const BuiltinOperator op_;
173   TfLiteRegistration registration_;
174 };
175 
176 class SingleOpModel {
177  public:
SingleOpModel()178   SingleOpModel() {}
179   ~SingleOpModel();
180 
181   // Set a delegate that is applied right after graph is prepared. This is
182   // useful for testing other runtimes like NN API or GPU.
SetDelegate(TfLiteDelegate * delegate)183   void SetDelegate(TfLiteDelegate* delegate) { delegate_ = delegate; }
184 
185   TfLiteStatus ApplyDelegate();
186 
187   // Copying or assignment is disallowed to simplify ownership semantics.
188   SingleOpModel(const SingleOpModel&) = delete;
189   SingleOpModel& operator=(const SingleOpModel&) = delete;
190 
191   // Add a TensorType input tensor and return its index.
192   int AddInput(const TensorData& t);
193   int AddVariableInput(const TensorData& t);
194 
195   int AddIntermediate(TensorType type, const std::vector<float>& scale,
196                       const std::vector<int64_t>& zero_point);
197 
198   // Templated version of AddConstInput().
199   template <typename T>
AddConstInput(const TensorData & t,std::initializer_list<T> data)200   int AddConstInput(const TensorData& t, std::initializer_list<T> data) {
201     int id = 0;
202     if (t.per_channel_quantization) {
203       id = AddTensorPerChannelQuant(t, data);
204     } else {
205       id = AddTensor(t, data);
206     }
207     inputs_.push_back(id);
208     return id;
209   }
210   template <typename T>
AddConstInput(TensorType type,std::initializer_list<T> data,std::initializer_list<int> shape)211   int AddConstInput(TensorType type, std::initializer_list<T> data,
212                     std::initializer_list<int> shape) {
213     return AddConstInput(TensorData{type, shape}, data);
214   }
215 
216   // TODO(b/166202747): Use a better way to do type specialization. Reduce
217   // duplicate code in the two functions below.
AddConstSparseInput(const TensorData & t,const std::vector<int8_t> & data)218   int AddConstSparseInput(const TensorData& t,
219                           const std::vector<int8_t>& data) {
220     int id = tensors_.size();
221     const int dims_count = t.traversal_order.size();
222     std::vector<int8_t> dense_data(data);
223 
224     tflite::optimize::sparsity::FormatConverter<int8_t> converter(
225         t.shape, t.traversal_order, t.format, t.block_size, t.block_map);
226     converter.DenseToSparse(dense_data.data());
227 
228     const auto& dim_metadata = converter.GetDimMetadata();
229     const auto& sparse_data = converter.GetData();
230 
231     // Build sparsity parameter.
232     std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
233         dims_count);
234     for (int i = 0; i < dims_count; i++) {
235       const int metadata_idx = 2 * i;
236       if (i < t.shape.size() &&
237           t.format[t.traversal_order[i]] == kTfLiteDimSparseCSR) {
238         auto array_segments =
239             CreateInt32Vector(builder_,
240                               builder_.CreateVector(dim_metadata[metadata_idx]))
241                 .Union();
242         auto array_indices =
243             CreateInt32Vector(
244                 builder_, builder_.CreateVector(dim_metadata[metadata_idx + 1]))
245                 .Union();
246         fb_dim_metadata[i] = CreateDimensionMetadata(
247             builder_, DimensionType_SPARSE_CSR, 0,
248             SparseIndexVector_Int32Vector, array_segments,
249             SparseIndexVector_Int32Vector, array_indices);
250       } else {
251         fb_dim_metadata[i] = CreateDimensionMetadata(
252             builder_, DimensionType_DENSE, dim_metadata[metadata_idx][0]);
253       }
254     }
255 
256     flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
257         builder_, builder_.CreateVector(t.traversal_order),
258         builder_.CreateVector(t.block_map),
259         builder_.CreateVector(fb_dim_metadata));
260 
261     int buffer_id = 0;
262     if (!data.empty()) {
263       // Initialize buffers list with empty buffer to allow for non-const
264       // tensors.
265       if (buffers_.empty()) {
266         buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
267       }
268 
269       // Add compressed data as a Buffer to buffers list.
270       buffer_id = buffers_.size();
271       auto data_buffer = builder_.CreateVector(
272           reinterpret_cast<const uint8_t*>(sparse_data.data()),
273           sparse_data.size());
274       buffers_.push_back(CreateBuffer(builder_, data_buffer));
275     }
276 
277     tensors_.push_back(CreateTensor(
278         builder_, builder_.CreateVector<int>(t.shape), t.type,
279         /*buffer=*/buffer_id,
280         /*name=*/0, /*quantization=*/0, /*is_variable=*/false, s_param));
281 
282     inputs_.push_back(id);
283     tensor_data_[id] = t;
284 
285     return id;
286   }
287 
288   // Add a constant sparse tensor as input.
289   template <typename T>
290   int AddConstSparseInput(const TensorData& t, const std::vector<T>& data,
291                           bool symmetric_quantize = false) {
292     int id = tensors_.size();
293     const int dims_count = t.traversal_order.size();
294     std::vector<T> dense_data(data);
295 
296     tflite::optimize::sparsity::FormatConverter<T> converter(
297         t.shape, t.traversal_order, t.format, t.block_size, t.block_map);
298     converter.DenseToSparse(dense_data.data());
299 
300     const auto dim_metadata = converter.GetDimMetadata();
301     const auto sparse_data = converter.GetData();
302 
303     // Build sparsity parameter.
304     std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
305         dims_count);
306     for (int i = 0; i < dims_count; i++) {
307       const int metadata_idx = 2 * i;
308       if (i < t.shape.size() &&
309           t.format[t.traversal_order[i]] == kTfLiteDimSparseCSR) {
310         auto array_segments =
311             CreateInt32Vector(builder_,
312                               builder_.CreateVector(dim_metadata[metadata_idx]))
313                 .Union();
314         auto array_indices =
315             CreateInt32Vector(
316                 builder_, builder_.CreateVector(dim_metadata[metadata_idx + 1]))
317                 .Union();
318         fb_dim_metadata[i] = CreateDimensionMetadata(
319             builder_, DimensionType_SPARSE_CSR, 0,
320             SparseIndexVector_Int32Vector, array_segments,
321             SparseIndexVector_Int32Vector, array_indices);
322       } else {
323         fb_dim_metadata[i] = CreateDimensionMetadata(
324             builder_, DimensionType_DENSE, dim_metadata[metadata_idx][0]);
325       }
326     }
327 
328     flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
329         builder_, builder_.CreateVector(t.traversal_order),
330         builder_.CreateVector(t.block_map),
331         builder_.CreateVector(fb_dim_metadata));
332 
333     flatbuffers::Offset<QuantizationParameters> q_params = 0;
334     int buffer_id = 0;
335     if (!data.empty()) {
336       // Initialize buffers list with empty buffer to allow for non-const
337       // tensors.
338       if (buffers_.empty()) {
339         buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
340       }
341 
342       // Add compressed data as a Buffer to buffers list.
343       buffer_id = buffers_.size();
344       if (symmetric_quantize) {
345         const int length = sparse_data.size();
346         std::vector<int8_t> q(length);
347         float min, max, scaling_factor;
348         tensor_utils::SymmetricQuantizeFloats(
349             sparse_data.data(), length, q.data(), &min, &max, &scaling_factor);
350         q_params = CreateQuantizationParameters(
351             builder_, 0, 0, builder_.CreateVector<float>({scaling_factor}),
352             builder_.CreateVector<int64_t>({0}));
353         auto data_buffer = builder_.CreateVector(
354             reinterpret_cast<const uint8_t*>(q.data()), q.size());
355         buffers_.push_back(CreateBuffer(builder_, data_buffer));
356       } else {
357         auto data_buffer = builder_.CreateVector(
358             reinterpret_cast<const uint8_t*>(sparse_data.data()),
359             sizeof(T) * sparse_data.size());
360         buffers_.push_back(CreateBuffer(builder_, data_buffer));
361       }
362     }
363 
364     tensors_.push_back(
365         CreateTensor(builder_, builder_.CreateVector<int>(t.shape),
366                      symmetric_quantize ? TensorType_INT8 : t.type,
367                      /*buffer=*/buffer_id,
368                      /*name=*/0, q_params, /*is_variable=*/false, s_param));
369 
370     inputs_.push_back(id);
371     tensor_data_[id] = t;
372 
373     return id;
374   }
375 
376   // Add a null input tensor (optional input) and return kTfLiteOptionalTensor.
377   int AddNullInput();
378 
379   // Add a TensorType output tensor and return its index.
380   int AddOutput(const TensorData& t);
381 
382   template <typename T>
QuantizeAndPopulate(int index,const std::vector<float> & data)383   void QuantizeAndPopulate(int index, const std::vector<float>& data) {
384     TfLiteTensor* t = interpreter_->tensor(index);
385     auto q = Quantize<T>(data, t->params.scale, t->params.zero_point);
386     PopulateTensor(index, 0, q.data(), q.data() + q.size());
387   }
388 
SymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)389   void SymmetricQuantizeAndPopulate(int index, const std::vector<float>& data) {
390     std::vector<int8_t> q = QuantizeTensor(index, data);
391     PopulateTensor(index, /*offset=*/0, reinterpret_cast<uint8_t*>(q.data()),
392                    reinterpret_cast<uint8_t*>(q.data() + q.size()));
393   }
394 
SignedSymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)395   void SignedSymmetricQuantizeAndPopulate(int index,
396                                           const std::vector<float>& data) {
397     std::vector<int8_t> q = QuantizeTensor(index, data);
398     PopulateTensor(index, /*offset=*/0, q.data(), q.data() + q.size());
399   }
400 
401   // Quantize and populate data for filter with per channel quantization.
PerChannelSymmetricQuantizeAndPopulate(int index,const std::vector<float> & input_data)402   void PerChannelSymmetricQuantizeAndPopulate(
403       int index, const std::vector<float>& input_data) {
404     TfLiteTensor* t = interpreter_->tensor(index);
405     auto* params =
406         reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
407     const int channel_index = params->quantized_dimension;
408 
409     std::vector<int32_t> shape(t->dims->size);
410     for (size_t i = 0; i < shape.size(); ++i) {
411       shape[i] = t->dims->data[i];
412     }
413     const int32_t num_inputs = input_data.size();
414     const int32_t num_channel = shape[channel_index];
415     std::vector<int8_t> quantized_output(num_inputs);
416     std::vector<float> scales_inv(num_channel);
417     for (int i = 0; i < num_channel; ++i) {
418       const float scale = params->scale->size == 1 ? params->scale->data[0]
419                                                    : params->scale->data[i];
420       scales_inv[i] = 1.0f / scale;
421     }
422     optimize::utils::SymmetricPerChannelQuantizeValues(
423         input_data.data(), scales_inv, shape, channel_index, &quantized_output);
424 
425     PopulateTensor(index, /*offset=*/0, quantized_output.data(),
426                    quantized_output.data() + quantized_output.size());
427   }
428 
429   template <typename T>
PerChannelQuantizeBiasPopulateTensor(const std::vector<float> & input_data,int index,TfLiteAffineQuantization * params)430   void PerChannelQuantizeBiasPopulateTensor(
431       const std::vector<float>& input_data, int index,
432       TfLiteAffineQuantization* params) {
433     const int32_t num_inputs = input_data.size();
434     std::vector<T> quantized_output(num_inputs);
435     for (int i = 0; i < num_inputs; ++i) {
436       const float scale = params->scale->size == 1 ? params->scale->data[0]
437                                                    : params->scale->data[i];
438       quantized_output[i] = input_data[i] / scale;
439     }
440   }
441 
442   template <typename T>
PerChannelQuantizeBiasPopulateTensor(int index,const std::vector<float> & input_data,const TfLiteAffineQuantization * params)443   void PerChannelQuantizeBiasPopulateTensor(
444       int index, const std::vector<float>& input_data,
445       const TfLiteAffineQuantization* params) {
446     const int32_t num_inputs = input_data.size();
447     std::vector<T> quantized_output(num_inputs);
448     for (int i = 0; i < num_inputs; ++i) {
449       const float scale = params->scale->size == 1 ? params->scale->data[0]
450                                                    : params->scale->data[i];
451       quantized_output[i] = input_data[i] / scale;
452     }
453     PopulateTensor(index, /*offset=*/0, quantized_output.data(),
454                    quantized_output.data() + quantized_output.size());
455   }
456 
457   // Quantize and populate data for bias with per channel quantization.
PerChannelQuantizeBias(int index,const std::vector<float> & input_data)458   void PerChannelQuantizeBias(int index, const std::vector<float>& input_data) {
459     TfLiteTensor* t = interpreter_->tensor(index);
460     auto* params =
461         reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
462     CHECK(t->type == kTfLiteInt32 || t->type == kTfLiteInt64);
463     if (t->type == kTfLiteInt32) {
464       PerChannelQuantizeBiasPopulateTensor<int32_t>(index, input_data, params);
465     } else {
466       PerChannelQuantizeBiasPopulateTensor<int64_t>(index, input_data, params);
467     }
468   }
469 
GetShape(int id)470   const std::vector<int>& GetShape(int id) { return tensor_data_.at(id).shape; }
471 
GetScale(int id)472   float GetScale(int id) { return tensor_data_.at(id).scale; }
GetZeroPoint(int id)473   int32_t GetZeroPoint(int id) { return tensor_data_.at(id).zero_point; }
474 
475   // Define the operator in this model.
476   void SetBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
477                     flatbuffers::Offset<void> builtin_options);
478   void SetCustomOp(const string& name,
479                    const std::vector<uint8_t>& custom_option,
480                    const std::function<TfLiteRegistration*()>& registration);
481 
482   // Allocate tensors and apply delegate.
483   // Note that this is called by default in BuiltInterpreter().
484   void AllocateAndDelegate(bool apply_delegate);
485 
486   // Build the interpreter for this model. Also, resize and allocate all
487   // tensors given the shapes of the inputs.
488   // Note: 'apply_delegate' also serves to tell whether default TfLite delegates
489   // should be applied implicitly for a test case. For example, when testing the
490   // specific implementation of a TfLite delegate, it might be necessary to set
491   // this to false.
492   void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
493                         int num_threads, bool allow_fp32_relax_to_fp16,
494                         bool apply_delegate, bool allocate_and_delegate = true);
495 
496   void BuildInterpreter(std::vector<std::vector<int>> input_shapes);
497 
498   // Executes inference, asserting success.
499   void Invoke();
500 
501   // Executes inference *without* asserting success.
502   TfLiteStatus InvokeUnchecked();
503 
PopulateStringTensor(int index,const std::vector<string> & content)504   void PopulateStringTensor(int index, const std::vector<string>& content) {
505     auto tensor = interpreter_->tensor(index);
506     DynamicBuffer buf;
507     for (const string& s : content) {
508       buf.AddString(s.data(), s.length());
509     }
510     buf.WriteToTensor(tensor, /*new_shape=*/nullptr);
511   }
512 
513   // Populate the tensor given its index.
514   // TODO(b/110696148) clean up and merge with vector-taking variant below.
515   template <typename T>
PopulateTensor(int index,const std::initializer_list<T> & data)516   void PopulateTensor(int index, const std::initializer_list<T>& data) {
517     T* v = interpreter_->typed_tensor<T>(index);
518     if (!v) {
519       auto* t = interpreter_->tensor(index);
520       CHECK(t) << "No tensor with index " << index << ".";
521       CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
522       CHECK_EQ(t->type, typeToTfLiteType<T>())
523           << "Type mismatch for tensor with index " << index << ". Requested "
524           << TfLiteTypeGetName(typeToTfLiteType<T>()) << ", got "
525           << TfLiteTypeGetName(t->type) << ".";
526       LOG(FATAL) << "Unknown tensor error.";
527     }
528     for (const T& f : data) {
529       *v = f;
530       ++v;
531     }
532   }
533 
534   // Populate the tensor given its index.
535   // TODO(b/110696148) clean up and merge with initializer_list-taking variant
536   // above.
537   template <typename T>
PopulateTensor(int index,const std::vector<T> & data)538   void PopulateTensor(int index, const std::vector<T>& data) {
539     T* v = interpreter_->typed_tensor<T>(index);
540     if (!v) {
541       auto* t = interpreter_->tensor(index);
542       CHECK(t) << "No tensor with index " << index << ".";
543       CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
544       CHECK_EQ(t->type, typeToTfLiteType<T>())
545           << "Type mismatch for tensor with index " << index << ". Requested "
546           << TfLiteTypeGetName(typeToTfLiteType<T>()) << ", got "
547           << TfLiteTypeGetName(t->type) << ".";
548       LOG(FATAL) << "Unknown tensor error.";
549     }
550     for (const T& f : data) {
551       *v = f;
552       ++v;
553     }
554   }
555 
556   // Partially populate the tensor, starting at the given offset.
557   template <typename T>
PopulateTensor(int index,int offset,T * begin,T * end)558   void PopulateTensor(int index, int offset, T* begin, T* end) {
559     T* v = interpreter_->typed_tensor<T>(index);
560     if (!v) {
561       auto* t = interpreter_->tensor(index);
562       CHECK(t) << "No tensor with index " << index << ".";
563       CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
564       CHECK(v) << "Type mismatch for tensor with index " << index
565                << ". Requested " << typeToTfLiteType<T>() << ", got "
566                << t->type;
567     }
568     memcpy(v + offset, begin, (end - begin) * sizeof(T));
569   }
570 
571   // Return a vector with the flattened contents of a tensor.
572   template <typename T>
ExtractVector(int index)573   std::vector<T> ExtractVector(int index) const {
574     const T* v = interpreter_->typed_tensor<T>(index);
575     const auto* tensor = interpreter_->tensor(index);
576     CHECK(v) << "Could not extract vector at index: " << index;
577     int tensor_size;
578     if (tensor->sparsity) {
579       // Getting the size of the sparse buffer this way is based on the
580       // assumption that the last dimension of the tensor is a compressed
581       // dimension.
582       tensor_size = tensor->sparsity
583                         ->dim_metadata[tensor->sparsity->dim_metadata_size - 1]
584                         .array_indices->size;
585     } else {
586       tensor_size = GetTensorSize(index);
587     }
588 
589     return std::vector<T>(v, v + tensor_size);
590   }
591 
592   // Return the TFLite model buffer, only available after BuildInterpreter.
GetModelBuffer()593   const uint8_t* GetModelBuffer() { return builder_.GetBufferPointer(); }
594 
GetTensorShape(int index)595   std::vector<int> GetTensorShape(int index) {
596     std::vector<int> result;
597     TfLiteTensor* t = interpreter_->tensor(index);
598     result.reserve(t->dims->size);
599     for (int i = 0; i < t->dims->size; ++i) {
600       result.push_back(t->dims->data[i]);
601     }
602     return result;
603   }
604 
SetNumThreads(int num_threads)605   void SetNumThreads(int num_threads) {
606     CHECK(interpreter_ != nullptr);
607     interpreter_->SetNumThreads(num_threads);
608   }
609 
SetResolver(std::unique_ptr<OpResolver> resolver)610   void SetResolver(std::unique_ptr<OpResolver> resolver) {
611     resolver_ = std::move(resolver);
612   }
613 
614   // Indicate whether the test has the NNAPI delegate applied.
615   static bool GetForceUseNnapi();
616   int CountOpsExecutedByCpuKernel();
617 
618  protected:
619   int32_t GetTensorSize(int index) const;
620 
621   flatbuffers::FlatBufferBuilder builder_;
622   std::unique_ptr<tflite::Interpreter> interpreter_;
623   std::unique_ptr<OpResolver> resolver_;
624 
625   std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
626   std::vector<flatbuffers::Offset<Operator>> operators_;
627   std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
628 
629   template <typename T>
630   int AddTensor(TensorData t, std::initializer_list<T> data,
631                 bool is_variable = false) {
632     int id = tensors_.size();
633 
634     // This is slightly different depending on whether we are adding a
635     // quantized or a regular tensor.
636     bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
637 
638     flatbuffers::Offset<QuantizationParameters> q_params = 0;
639 
640     if (is_quantized) {
641       if (t.min != 0 || t.max != 0) {
642         if (t.type == TensorType_UINT8) {
643           std::tie(t.scale, t.zero_point) =
644               QuantizationParams<uint8_t>(t.min, t.max);
645         } else if (t.type == TensorType_INT8) {
646           std::tie(t.scale, t.zero_point) =
647               QuantizationParams<int8_t>(t.min, t.max);
648         } else if (t.type == TensorType_INT32) {
649           std::tie(t.scale, t.zero_point) =
650               QuantizationParams<int32_t>(t.min, t.max);
651         } else if (t.type == TensorType_INT16) {
652           std::tie(t.scale, t.zero_point) =
653               QuantizationParams<int16_t>(t.min, t.max);
654         } else {
655           LOG(FATAL) << "No support for the requested quantized type";
656         }
657         t.min = 0;
658         t.max = 0;
659       }
660 
661       q_params = CreateQuantizationParameters(
662           builder_, /*min=*/0, /*max=*/0,
663           builder_.CreateVector<float>({t.scale}),
664           builder_.CreateVector<int64_t>({t.zero_point}));
665     }
666 
667     int buffer_id = 0;
668     if (data.size()) {
669       // Initialize buffers list with empty buffer to allow for non-const
670       // tensors.
671       if (buffers_.empty()) {
672         buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
673       }
674 
675       // Add data as a Buffer to buffers list.
676       buffer_id = buffers_.size();
677       auto data_buffer =
678           builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
679                                 sizeof(T) * data.size());
680       buffers_.push_back(CreateBuffer(builder_, data_buffer));
681     }
682 
683     tensors_.push_back(CreateTensor(
684         builder_, builder_.CreateVector<int>(t.shape), t.type,
685         /*buffer=*/buffer_id,
686         /*name=*/0, q_params, is_variable,
687         /*sparsity=*/0, builder_.CreateVector<int>(t.shape_signature)));
688 
689     tensor_data_[id] = t;
690 
691     return id;
692   }
693 
694  private:
695   template <typename T>
QuantizationParams(float f_min,float f_max)696   std::pair<float, int32_t> QuantizationParams(float f_min, float f_max) {
697     int32_t zero_point = 0;
698     float scale = 0;
699     const T qmin = std::numeric_limits<T>::min();
700     const T qmax = std::numeric_limits<T>::max();
701     const float qmin_double = qmin;
702     const float qmax_double = qmax;
703     // 0 should always be a representable value. Let's assume that the initial
704     // min,max range contains 0.
705     CHECK_LE(f_min, 0);
706     CHECK_GE(f_max, 0);
707     if (f_min == f_max) {
708       // Special case where the min,max range is a point. Should be {0}.
709       CHECK_EQ(f_min, 0);
710       CHECK_EQ(f_max, 0);
711       return {scale, zero_point};
712     }
713 
714     // General case.
715     //
716     // First determine the scale.
717     scale = (f_max - f_min) / (qmax_double - qmin_double);
718 
719     // Zero-point computation.
720     // First the initial floating-point computation. The zero-point can be
721     // determined from solving an affine equation for any known pair
722     // (real value, corresponding quantized value).
723     // We know two such pairs: (rmin, qmin) and (rmax, qmax).
724     // The arithmetic error on the zero point computed from either pair
725     // will be roughly machine_epsilon * (sum of absolute values of terms)
726     // so we want to use the variant that adds the smaller terms.
727     const float zero_point_from_min = qmin_double - f_min / scale;
728     const float zero_point_from_max = qmax_double - f_max / scale;
729 
730     const float zero_point_from_min_error =
731         std::abs(qmin_double) + std::abs(f_min / scale);
732 
733     const float zero_point_from_max_error =
734         std::abs(qmax_double) + std::abs(f_max / scale);
735 
736     const float zero_point_double =
737         zero_point_from_min_error < zero_point_from_max_error
738             ? zero_point_from_min
739             : zero_point_from_max;
740 
741     // Now we need to nudge the zero point to be an integer
742     // (our zero points are integer, and this is motivated by the requirement
743     // to be able to represent the real value "0" exactly as a quantized value,
744     // which is required in multiple places, for example in Im2col with SAME
745     //  padding).
746 
747     T nudged_zero_point = 0;
748     if (zero_point_double < qmin_double) {
749       nudged_zero_point = qmin;
750     } else if (zero_point_double > qmax_double) {
751       nudged_zero_point = qmax;
752     } else {
753       nudged_zero_point = static_cast<T>(std::round(zero_point_double));
754     }
755 
756     // The zero point should always be in the range of quantized value,
757     // // [qmin, qmax].
758     CHECK_GE(nudged_zero_point, qmin);
759     CHECK_LE(nudged_zero_point, qmax);
760 
761     zero_point = nudged_zero_point;
762     // finally, return the values
763     return {scale, zero_point};
764   }
765 
AddTensorPerChannelQuant(const TensorData & t)766   int AddTensorPerChannelQuant(const TensorData& t) {
767     // type does not matter when adding empty data.
768     return AddTensorPerChannelQuant<uint8_t>(t, {});
769   }
770 
771   template <typename T>
AddTensorPerChannelQuant(const TensorData & t,const std::initializer_list<T> & data)772   int AddTensorPerChannelQuant(const TensorData& t,
773                                const std::initializer_list<T>& data) {
774     const int id = tensors_.size();
775     flatbuffers::Offset<QuantizationParameters> q_params = 0;
776     q_params = CreateQuantizationParameters(
777         builder_, /*min=*/0, /*max=*/0,
778         /*scale=*/
779         builder_.CreateVector<float>(t.per_channel_quantization_scales),
780         /*zero point=*/
781         builder_.CreateVector<int64_t>(t.per_channel_quantization_offsets),
782         QuantizationDetails_NONE, 0, t.channel_index);
783 
784     int buffer_id = 0;
785     if (data.size()) {
786       // Initialize buffers list with empty buffer to allow for non-const
787       // tensors.
788       if (buffers_.empty()) {
789         buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
790       }
791 
792       // Add data as a Buffer to buffers list.
793       buffer_id = buffers_.size();
794       auto data_buffer =
795           builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
796                                 sizeof(T) * data.size());
797       buffers_.push_back(CreateBuffer(builder_, data_buffer));
798     }
799 
800     tensors_.push_back(
801         CreateTensor(builder_, builder_.CreateVector<int>(t.shape), t.type,
802                      /*buffer=*/buffer_id,
803                      /*name=*/0, q_params, /*is_variable=*/false));
804     tensor_data_[id] = t;
805     return id;
806   }
807 
QuantizeTensor(int index,const std::vector<float> & data)808   std::vector<int8_t> QuantizeTensor(int index,
809                                      const std::vector<float>& data) {
810     TfLiteTensor* t = interpreter_->tensor(index);
811     const int length = data.size();
812     std::vector<int8_t> q(length);
813     float min, max, scaling_factor;
814     tensor_utils::SymmetricQuantizeFloats(data.data(), length, q.data(), &min,
815                                           &max, &scaling_factor);
816     // Update quantization params.
817     t->params.scale = scaling_factor;
818     t->params.zero_point = 0;
819     // Populate the new quantization params.
820     TfLiteQuantizationFree(&t->quantization);
821     t->quantization.type = kTfLiteAffineQuantization;
822     auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
823         malloc(sizeof(TfLiteAffineQuantization)));
824     affine_quantization->quantized_dimension = 0;
825     affine_quantization->scale = TfLiteFloatArrayCreate(1);
826     affine_quantization->zero_point = TfLiteIntArrayCreate(1);
827     affine_quantization->scale->data[0] = scaling_factor;
828     affine_quantization->zero_point->data[0] = 0;
829     t->quantization.params = affine_quantization;
830     return q;
831   }
832 
833   // Checks if acceleration has been done as expected.
834   // Currently supports only NNAPI.
835   // It verifies if the test was configured to run with NNAPI acceleration
836   // or not (SetForceUseNnapi(true)).
837   // In affirmative case it checks if:
838   // - the test case has been listed in the list of nnapi-accelerated cases
839   // - the test is running on a device (NNAPI has been loaded)
840   //
841   // The list of nnapi-accelerated test cases is a file containing regex to
842   // include or exclude specific test cases plus the minimum android SDK version
843   // the acceleration should be enabled for. For example:
844   // To enable the test BorderFloat in TopKV2OpTest only from
845   // android_sdk_version 29:
846   //
847   // TopKV2OpTest/BorderFloat,29
848   //
849   // And to have it always excluded while enabling all other Float tests
850   // (the order of the rules is important, the first one matching is used):
851   //
852   // -TopKV2OpTest/BorderFloat
853   // TopKV2OpTest/.+Float
854 
855   void ValidateAcceleration();
856 
857   // If the test was configured to use NNAPI and NNAPI was actually loaded,
858   // checks if the single operation in the model has been accelerated.
859   void ExpectOpAcceleratedWithNnapi(const std::string& test_id);
860 
861   std::map<int, TensorData> tensor_data_;
862   std::vector<int32_t> inputs_;
863   std::vector<int32_t> intermediates_;
864   std::vector<int32_t> outputs_;
865   std::vector<flatbuffers::Offset<Tensor>> tensors_;
866   std::vector<flatbuffers::Offset<Buffer>> buffers_;
867   TfLiteDelegate* delegate_ = nullptr;
868   int num_applied_delegates_ = 0;
869 };
870 
871 // Populate string tensors.
872 template <>
873 inline void SingleOpModel::PopulateTensor<string>(
874     int index, const std::initializer_list<string>& data) {
875   PopulateStringTensor(index, data);
876 }
877 
878 // Base class for single op unit tests.
879 // The tests are parameterized to test multiple kernels for a single op.
880 // The parameters are strings like "optimized" and "reference" to have better
881 // readability in test reports.
882 //
883 // To use this class:
884 // * Define a constant map from strings to TfLiteRegistration.
885 // * Implement a test class that inherits SingleOpTest.
886 // * Instantiate the test cases with SingleOpTest::GetKernelTags helper
887 //   function.
888 // * Call GetRegistration to get the TfLiteRegistration to be used before
889 //   building the interpreter.
890 class SingleOpTest : public ::testing::TestWithParam<string> {
891  public:
GetKernelTags(const std::map<string,TfLiteRegistration * > & kernel_map)892   static std::vector<string> GetKernelTags(
893       const std::map<string, TfLiteRegistration*>& kernel_map) {
894     std::vector<string> tags;
895     tags.reserve(kernel_map.size());
896     for (const auto& it : kernel_map) {
897       tags.push_back(it.first);
898     }
899     return tags;
900   }
901 
902  protected:
903   virtual const std::map<string, TfLiteRegistration*>& GetKernelMap() = 0;
GetRegistration()904   TfLiteRegistration* GetRegistration() {
905     return GetKernelMap().at(GetParam());
906   }
907 };
908 
909 // Returns the corresponding TensorType given the type T.
910 template <typename T>
GetTensorType()911 TensorType GetTensorType() {
912   if (std::is_same<T, float>::value) return TensorType_FLOAT32;
913   if (std::is_same<T, TfLiteFloat16>::value) return TensorType_FLOAT16;
914   if (std::is_same<T, double>::value) return TensorType_FLOAT64;
915   if (std::is_same<T, int8_t>::value) return TensorType_INT8;
916   if (std::is_same<T, int16_t>::value) return TensorType_INT16;
917   if (std::is_same<T, int32_t>::value) return TensorType_INT32;
918   if (std::is_same<T, uint32_t>::value) return TensorType_UINT32;
919   if (std::is_same<T, int64_t>::value) return TensorType_INT64;
920   if (std::is_same<T, uint8_t>::value) return TensorType_UINT8;
921   if (std::is_same<T, string>::value) return TensorType_STRING;
922   if (std::is_same<T, bool>::value) return TensorType_BOOL;
923   return TensorType_MIN;  // default value
924 }
925 
926 // Strings have a special implementation that is in test_util.cc
927 template <>
928 std::vector<string> SingleOpModel::ExtractVector(int index) const;
929 
930 // The TypeUnion struct specializations hold a collection of related types.
931 // Each struct holds: 1. a primitive type (e.g. float), 2. a TensorType (e.g.
932 // TensorType_FLOAT32, and 3. a TfLiteType (e.g. kTfLiteFloat32). The latter
933 // two are actually enum values and not raw types, but these specializations
934 // make it easy to use gUnit Typed Test Suite:
935 // https://github.com/google/googletest/blob/master/googletest/docs/advanced.md#typed-tests
936 template <typename T>
937 struct TypeUnion;
938 
939 template <>
940 struct TypeUnion<float> {
941  public:
942   // NOLINTNEXTLINE
943   static constexpr TensorType tensor_type = TensorType::TensorType_FLOAT32;
944   // NOLINTNEXTLINE
945   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteFloat32;
946   typedef float ScalarType;
947 };
948 
949 template <>
950 struct TypeUnion<int32_t> {
951  public:
952   // NOLINTNEXTLINE
953   static constexpr TensorType tensor_type = TensorType::TensorType_INT32;
954   // NOLINTNEXTLINE
955   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt32;
956   typedef int32_t ScalarType;
957 };
958 
959 template <>
960 struct TypeUnion<uint32_t> {
961  public:
962   // NOLINTNEXTLINE
963   static constexpr TensorType tensor_type = TensorType::TensorType_UINT32;
964   // NOLINTNEXTLINE
965   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteUInt32;
966   typedef uint32_t ScalarType;
967 };
968 
969 template <>
970 struct TypeUnion<int16_t> {
971  public:
972   // NOLINTNEXTLINE
973   static constexpr TensorType tensor_type = TensorType::TensorType_INT16;
974   // NOLINTNEXTLINE
975   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt16;
976   typedef int16_t ScalarType;
977 };
978 
979 template <>
980 struct TypeUnion<int8_t> {
981  public:
982   // NOLINTNEXTLINE
983   static constexpr TensorType tensor_type = TensorType::TensorType_INT8;
984   // NOLINTNEXTLINE
985   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt8;
986   typedef int8_t ScalarType;
987 };
988 
989 template <>
990 struct TypeUnion<uint8_t> {
991  public:
992   // NOLINTNEXTLINE
993   static constexpr TensorType tensor_type = TensorType::TensorType_UINT8;
994   // NOLINTNEXTLINE
995   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteUInt8;
996   typedef uint8_t ScalarType;
997 };
998 
999 class MultiOpModel : public SingleOpModel {
1000  public:
1001   MultiOpModel() : SingleOpModel() {}
1002   ~MultiOpModel() {}
1003 
1004   void AddBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
1005                     const flatbuffers::Offset<void>& builtin_options,
1006                     const std::vector<int32_t>& inputs,
1007                     const std::vector<int32_t>& outputs);
1008 
1009   void AddCustomOp(const string& name,
1010                    const std::vector<uint8_t>& custom_option,
1011                    const std::function<TfLiteRegistration*()>& registration,
1012                    const std::vector<int32_t>& inputs,
1013                    const std::vector<int32_t>& outputs);
1014 
1015   template <typename T>
1016   int AddInnerTensor(TensorData t) {
1017     return AddTensor<T>(t, {}, false);
1018   }
1019 };
1020 }  // namespace tflite
1021 
1022 #endif  // TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
1023