1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
16 #define TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
17 
18 #include <complex>
19 #include <vector>
20 
21 #include <gmock/gmock.h>
22 #include <gtest/gtest.h>
23 
24 #include "tensorflow/core/platform/logging.h"
25 #include "tensorflow/lite/interpreter.h"
26 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
27 #include "tensorflow/lite/kernels/register.h"
28 #include "tensorflow/lite/model.h"
29 #include "tensorflow/lite/string_util.h"
30 #include "tensorflow/lite/testing/util.h"
31 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
32 
33 namespace tflite {
34 
35 // A gmock matcher that check that elements of a float vector match to a given
36 // tolerance.
37 std::vector<::testing::Matcher<float>> ArrayFloatNear(
38     const std::vector<float>& values, float max_abs_error = 1e-5);
39 
40 // A gmock matcher that check that elements of a complex vector match to a given
41 // tolerance.
42 std::vector<::testing::Matcher<std::complex<float>>> ArrayComplex64Near(
43     const std::vector<std::complex<float>>& values, float max_abs_error = 1e-5);
44 
45 template <typename T>
Quantize(const std::vector<float> & data,float scale,int32_t zero_point)46 inline std::vector<T> Quantize(const std::vector<float>& data, float scale,
47                                int32_t zero_point) {
48   std::vector<T> q;
49   for (float f : data) {
50     q.push_back(static_cast<T>(std::max<float>(
51         std::numeric_limits<T>::min(),
52         std::min<float>(std::numeric_limits<T>::max(),
53                         std::round(zero_point + (f / scale))))));
54   }
55   return q;
56 }
57 
58 template <typename T>
Dequantize(const std::vector<T> & data,float scale,int32_t zero_point)59 inline std::vector<float> Dequantize(const std::vector<T>& data, float scale,
60                                      int32_t zero_point) {
61   std::vector<float> f;
62   for (T q : data) {
63     f.push_back(scale * (q - zero_point));
64   }
65   return f;
66 }
67 
68 // A test model that contains a single operator. All operator inputs and
69 // output are external to the model, so the tests can directly access them.
70 // Typical usage:
71 //    SingleOpModel m;
72 //    int a = m.AddInput({TensorType_FLOAT32, a_shape});
73 //    int b = m.AddInput({TensorType_FLOAT32, b_shape});
74 //    int c = m.AddOutput({TensorType_FLOAT32, {}});
75 //    m.SetBuiltinOp(...);
76 //    m.BuildInterpreter({GetShape(a), GetShape(b)});
77 //    m.PopulateTensor(a, {...});
78 //    m.PopulateTensor(b, {...});
79 //    m.Invoke();
80 //    EXPECT_THAT(m.ExtractVector<float>(c), ArrayFloatNear({...}));
81 //
82 
83 // A helper struct to construct test tensors. This is particularly useful for
84 // quantized tensor which must have their scale and zero_point defined before
85 // the actual data is known. This mimics what happens in practice: quantization
86 // parameters are calculated during training or post training..
87 struct TensorData {
88   TensorData(TensorType type = TensorType_FLOAT32, std::vector<int> shape = {},
89              float min = 0.0f, float max = 0.0f, float scale = 0.0f,
90              int32_t zero_point = 0, bool per_channel_quantization = false,
91              std::vector<float> per_channel_quantization_scales = {},
92              std::vector<int64_t> per_channel_quantization_offsets = {},
93              int32_t channel_index = 0)
typeTensorData94       : type(type),
95         shape(shape),
96         min(min),
97         max(max),
98         scale(scale),
99         zero_point(zero_point),
100         per_channel_quantization(per_channel_quantization),
101         per_channel_quantization_scales(
102             std::move(per_channel_quantization_scales)),
103         per_channel_quantization_offsets(
104             std::move(per_channel_quantization_offsets)),
105         channel_index(channel_index) {}
106   TensorType type;
107   std::vector<int> shape;
108   float min;
109   float max;
110   float scale;
111   int32_t zero_point;
112   bool per_channel_quantization;
113   std::vector<float> per_channel_quantization_scales;
114   std::vector<int64_t> per_channel_quantization_offsets;
115   int32_t channel_index;
116 };
117 
118 class SingleOpResolver : public OpResolver {
119  public:
SingleOpResolver(const BuiltinOperator op,TfLiteRegistration * registration)120   SingleOpResolver(const BuiltinOperator op, TfLiteRegistration* registration)
121       : op_(op), registration_(*registration) {
122     registration_.builtin_code = static_cast<int32_t>(op);
123     registration_.version = 1;
124   }
FindOp(BuiltinOperator op,int version)125   const TfLiteRegistration* FindOp(BuiltinOperator op,
126                                    int version) const override {
127     if (op == op_) {
128       return &registration_;
129     }
130     return nullptr;
131   }
FindOp(const char * op,int version)132   const TfLiteRegistration* FindOp(const char* op, int version) const override {
133     return nullptr;
134   }
135 
136  private:
137   const BuiltinOperator op_;
138   TfLiteRegistration registration_;
139 };
140 
141 class SingleOpModel {
142  public:
SingleOpModel()143   SingleOpModel() {}
~SingleOpModel()144   ~SingleOpModel() {}
145 
146   // Set a function callback that is run right after graph is prepared
147   // that allows applying external delegates. This is useful for testing
148   // other runtimes like NN API or GPU.
SetApplyDelegate(std::function<void (Interpreter *)> apply_delegate_fn)149   void SetApplyDelegate(std::function<void(Interpreter*)> apply_delegate_fn) {
150     apply_delegate_fn_ = apply_delegate_fn;
151   }
152 
153   // Copying or assignment is disallowed to simplify ownership semantics.
154   SingleOpModel(const SingleOpModel&) = delete;
155   SingleOpModel& operator=(const SingleOpModel&) = delete;
156 
157   // Add a TensorType input tensor and return its index.
158   int AddInput(TensorType type, bool is_variable = false) {
159     return AddInput(TensorData{type}, is_variable);
160   }
161   int AddInput(const TensorData& t, bool is_variable = false);
162 
163   // Templated version of AddConstInput().
164   template <typename T>
AddConstInput(TensorType type,std::initializer_list<T> data,std::initializer_list<int> shape)165   int AddConstInput(TensorType type, std::initializer_list<T> data,
166                     std::initializer_list<int> shape) {
167     int id = AddTensor(TensorData{type, shape}, data);
168     inputs_.push_back(id);
169     return id;
170   }
171 
172   // Add a null input tensor (optional input) and return kOptionalTensor.
173   int AddNullInput();
174 
175   // Add a TensorType output tensor and return its index.
AddOutput(TensorType type)176   int AddOutput(TensorType type) { return AddOutput(TensorData{type}); }
177   int AddOutput(const TensorData& t);
178 
179   template <typename T>
QuantizeAndPopulate(int index,const std::vector<float> & data)180   void QuantizeAndPopulate(int index, const std::vector<float>& data) {
181     TfLiteTensor* t = interpreter_->tensor(index);
182     auto q = Quantize<T>(data, t->params.scale, t->params.zero_point);
183     PopulateTensor(index, 0, q.data(), q.data() + q.size());
184   }
185 
SymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)186   void SymmetricQuantizeAndPopulate(int index, const std::vector<float>& data) {
187     std::vector<int8_t> q = QuantizeTensor(index, data);
188     PopulateTensor(index, /*offset=*/0, reinterpret_cast<uint8_t*>(q.data()),
189                    reinterpret_cast<uint8_t*>(q.data() + q.size()));
190   }
191 
SignedSymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)192   void SignedSymmetricQuantizeAndPopulate(int index,
193                                           const std::vector<float>& data) {
194     std::vector<int8_t> q = QuantizeTensor(index, data);
195     PopulateTensor(index, /*offset=*/0, q.data(), q.data() + q.size());
196   }
197 
198   // Quantize and populate data for filter with per channel quantization.
PerChannelSymmetricQuantizeAndPopulate(int index,const std::vector<float> & input_data)199   void PerChannelSymmetricQuantizeAndPopulate(
200       int index, const std::vector<float>& input_data) {
201     TfLiteTensor* t = interpreter_->tensor(index);
202     auto* params =
203         reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
204     const int channel_index = params->quantized_dimension;
205 
206     std::vector<int32_t> shape(t->dims->size);
207     for (int i = 0; i < shape.size(); ++i) {
208       shape[i] = t->dims->data[i];
209     }
210     const int32_t num_inputs = input_data.size();
211     const int32_t num_channel = shape[channel_index];
212     std::vector<int8_t> quantized_output(num_inputs);
213     std::vector<float> scales_inv(num_channel);
214     for (int i = 0; i < num_channel; ++i) {
215       scales_inv[i] = 1.0f / params->scale->data[i];
216     }
217     optimize::utils::SymmetricPerChannelQuantizeValues(
218         input_data.data(), scales_inv, shape, channel_index, &quantized_output);
219 
220     PopulateTensor(index, /*offset=*/0, quantized_output.data(),
221                    quantized_output.data() + quantized_output.size());
222   }
223 
224   // Quantize and populate data for bias with per channel quantization.
PerChannelQuantizeBias(int index,const std::vector<float> & input_data)225   void PerChannelQuantizeBias(int index, const std::vector<float>& input_data) {
226     const int32_t num_inputs = input_data.size();
227     std::vector<int32_t> quantized_output(num_inputs);
228     TfLiteTensor* t = interpreter_->tensor(index);
229     auto* params =
230         reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
231     for (int i = 0; i < num_inputs; ++i) {
232       quantized_output[i] = input_data[i] * params->scale->data[i];
233     }
234     PopulateTensor(index, /*offset=*/0, quantized_output.data(),
235                    quantized_output.data() + quantized_output.size());
236   }
237 
GetShape(int id)238   const std::vector<int>& GetShape(int id) { return tensor_data_.at(id).shape; }
239 
GetScale(int id)240   float GetScale(int id) { return tensor_data_.at(id).scale; }
GetZeroPoint(int id)241   int32_t GetZeroPoint(int id) { return tensor_data_.at(id).zero_point; }
242 
243   // Define the operator in this model.
244   void SetBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
245                     flatbuffers::Offset<void> builtin_options);
246   void SetCustomOp(const string& name,
247                    const std::vector<uint8_t>& custom_option,
248                    const std::function<TfLiteRegistration*()>& registeration);
249 
250   // Build the interpreter for this model. Also, resize and allocate all
251   // tensors given the shapes of the inputs.
252   void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
253                         bool allow_fp32_relax_to_fp16 = false);
254 
255   void Invoke();
256 
PopulateStringTensor(int index,const std::vector<string> & content)257   void PopulateStringTensor(int index, const std::vector<string>& content) {
258     auto tensor = interpreter_->tensor(index);
259     DynamicBuffer buf;
260     for (const string& s : content) {
261       buf.AddString(s.data(), s.length());
262     }
263     buf.WriteToTensor(tensor, /*new_shape=*/nullptr);
264   }
265 
266   // Populate the tensor given its index.
267   // TODO(b/110696148) clean up and merge with vector-taking variant below.
268   template <typename T>
PopulateTensor(int index,const std::initializer_list<T> & data)269   void PopulateTensor(int index, const std::initializer_list<T>& data) {
270     T* v = interpreter_->typed_tensor<T>(index);
271     if (!v) {
272       auto* t = interpreter_->tensor(index);
273       CHECK(t) << "No tensor with index " << index << ".";
274       CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
275       CHECK(v) << "Type mismatch for tensor with index " << index
276                << ". Requested " << typeToTfLiteType<T>() << ", got "
277                << t->type;
278     }
279     for (T f : data) {
280       *v = f;
281       ++v;
282     }
283   }
284 
285   // Populate the tensor given its index.
286   // TODO(b/110696148) clean up and merge with initializer_list-taking variant
287   // above.
288   template <typename T>
PopulateTensor(int index,const std::vector<T> & data)289   void PopulateTensor(int index, const std::vector<T>& data) {
290     T* v = interpreter_->typed_tensor<T>(index);
291     if (!v) {
292       auto* t = interpreter_->tensor(index);
293       CHECK(t) << "No tensor with index " << index << ".";
294       CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
295       CHECK(v) << "Type mismatch for tensor with index " << index
296                << ". Requested " << typeToTfLiteType<T>() << ", got "
297                << t->type;
298     }
299     for (T f : data) {
300       *v = f;
301       ++v;
302     }
303   }
304 
305   // Partially populate the tensor, starting at the given offset.
306   template <typename T>
PopulateTensor(int index,int offset,T * begin,T * end)307   void PopulateTensor(int index, int offset, T* begin, T* end) {
308     T* v = interpreter_->typed_tensor<T>(index);
309     memcpy(v + offset, begin, (end - begin) * sizeof(T));
310   }
311 
312   // Return a vector with the flattened contents of a tensor.
313   template <typename T>
ExtractVector(int index)314   std::vector<T> ExtractVector(int index) {
315     T* v = interpreter_->typed_tensor<T>(index);
316     CHECK(v);
317     return std::vector<T>(v, v + GetTensorSize(index));
318   }
319 
GetTensorShape(int index)320   std::vector<int> GetTensorShape(int index) {
321     std::vector<int> result;
322     TfLiteTensor* t = interpreter_->tensor(index);
323     for (int i = 0; i < t->dims->size; ++i) {
324       result.push_back(t->dims->data[i]);
325     }
326     return result;
327   }
328 
SetNumThreads(int num_threads)329   void SetNumThreads(int num_threads) {
330     interpreter_->SetNumThreads(num_threads);
331   }
332 
SetResolver(std::unique_ptr<OpResolver> resolver)333   void SetResolver(std::unique_ptr<OpResolver> resolver) {
334     resolver_ = std::move(resolver);
335   }
336 
337  protected:
338   int32_t GetTensorSize(int index) const;
339 
340   flatbuffers::FlatBufferBuilder builder_;
341   std::unique_ptr<tflite::Interpreter> interpreter_;
342   std::unique_ptr<OpResolver> resolver_;
343 
344  private:
345   // TODO(gavinbelson): sync this method with
346   // //tensorflow/lite/kernels/internal/quantization_util.h?l=31
347   template <typename T>
QuantizationParams(float f_min,float f_max)348   std::pair<float, int32_t> QuantizationParams(float f_min, float f_max) {
349     // These are required by many quantized operations.
350     CHECK_LE(f_min, 0);
351     CHECK_GE(f_max, 0);
352     T q_min = std::numeric_limits<T>::min();
353     T q_max = std::numeric_limits<T>::max();
354     float range = q_max - q_min;
355     float scale = (f_max - f_min) / range;
356     int32_t zero_point = std::min(
357         q_max,
358         std::max(q_min, static_cast<T>(std::round(q_min - f_min / scale))));
359     return {scale, zero_point};
360   }
361 
AddTensorPerChannelQuant(TensorData t)362   int AddTensorPerChannelQuant(TensorData t) {
363     const int id = tensors_.size();
364     flatbuffers::Offset<QuantizationParameters> q_params = 0;
365     q_params = CreateQuantizationParameters(
366         builder_, /*min=*/0, /*max=*/0,
367         /*scale=*/
368         builder_.CreateVector<float>(t.per_channel_quantization_scales),
369         /*zero point=*/
370         builder_.CreateVector<int64_t>(t.per_channel_quantization_offsets),
371         QuantizationDetails_NONE, 0, t.channel_index);
372     tensors_.push_back(
373         CreateTensor(builder_, builder_.CreateVector<int>(t.shape), t.type,
374                      /*buffer=*/0,
375                      /*name=*/0, q_params, /*is_variable=*/false));
376     tensor_data_[id] = t;
377     return id;
378   }
379 
380   template <typename T>
381   int AddTensor(TensorData t, std::initializer_list<T> data,
382                 bool is_variable = false) {
383     int id = tensors_.size();
384 
385     // This is slightly different depending on whether we are adding a
386     // quantized or a regular tensor.
387     bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
388 
389     flatbuffers::Offset<QuantizationParameters> q_params = 0;
390 
391     if (is_quantized) {
392       if (t.min != 0 || t.max != 0) {
393         if (t.type == TensorType_UINT8) {
394           std::tie(t.scale, t.zero_point) =
395               QuantizationParams<uint8_t>(t.min, t.max);
396         } else if (t.type == TensorType_INT8) {
397           std::tie(t.scale, t.zero_point) =
398               QuantizationParams<int8_t>(t.min, t.max);
399         } else if (t.type == TensorType_INT32) {
400           std::tie(t.scale, t.zero_point) =
401               QuantizationParams<int32_t>(t.min, t.max);
402         } else if (t.type == TensorType_INT16) {
403           std::tie(t.scale, t.zero_point) =
404               QuantizationParams<int16_t>(t.min, t.max);
405         } else {
406           LOG(FATAL) << "No support for the requested quantized type";
407         }
408         t.min = 0;
409         t.max = 0;
410       }
411 
412       q_params = CreateQuantizationParameters(
413           builder_, /*min=*/0, /*max=*/0,
414           builder_.CreateVector<float>({t.scale}),
415           builder_.CreateVector<int64_t>({t.zero_point}));
416     }
417 
418     int buffer_id = 0;
419     if (data.size()) {
420       // Initialize buffers list with empty buffer to allow for non-const
421       // tensors.
422       if (buffers_.empty()) {
423         buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
424       }
425 
426       // Add data as a Buffer to buffers list.
427       buffer_id = buffers_.size();
428       auto data_buffer =
429           builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
430                                 sizeof(T) * data.size());
431       buffers_.push_back(CreateBuffer(builder_, data_buffer));
432     }
433 
434     tensors_.push_back(CreateTensor(builder_,
435                                     builder_.CreateVector<int>(t.shape), t.type,
436                                     /*buffer=*/buffer_id,
437                                     /*name=*/0, q_params, is_variable));
438 
439     tensor_data_[id] = t;
440 
441     return id;
442   }
443 
QuantizeTensor(int index,const std::vector<float> & data)444   std::vector<int8_t> QuantizeTensor(int index,
445                                      const std::vector<float>& data) {
446     TfLiteTensor* t = interpreter_->tensor(index);
447     const int length = data.size();
448     std::vector<int8_t> q(length);
449     float min, max, scaling_factor;
450     tensor_utils::SymmetricQuantizeFloats(data.data(), length, q.data(), &min,
451                                           &max, &scaling_factor);
452     // Update quantization params.
453     t->params.scale = scaling_factor;
454     t->params.zero_point = 0;
455     // Populate the new quantization params.
456     TfLiteQuantizationFree(&t->quantization);
457     t->quantization.type = kTfLiteAffineQuantization;
458     auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
459         malloc(sizeof(TfLiteAffineQuantization)));
460     affine_quantization->quantized_dimension = 0;
461     affine_quantization->scale = TfLiteFloatArrayCreate(1);
462     affine_quantization->zero_point = TfLiteIntArrayCreate(1);
463     affine_quantization->scale->data[0] = scaling_factor;
464     affine_quantization->zero_point->data[0] = 0;
465     t->quantization.params = affine_quantization;
466     return q;
467   }
468 
469   std::map<int, TensorData> tensor_data_;
470   std::vector<int32_t> inputs_;
471   std::vector<int32_t> outputs_;
472   std::vector<flatbuffers::Offset<Tensor>> tensors_;
473   std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
474   std::vector<flatbuffers::Offset<Operator>> operators_;
475   std::vector<flatbuffers::Offset<Buffer>> buffers_;
476   std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
477   // A function pointer that gets called after the interpreter is created but
478   // before evaluation happens. This is useful for applying a delegate.
479   std::function<void(Interpreter*)> apply_delegate_fn_;
480 };
481 
482 // Base class for single op unit tests.
483 // The tests are parameterized to test multiple kernels for a single op.
484 // The parameters are strings like "optimized" and "reference" to have better
485 // readability in test reports.
486 //
487 // To use this class:
488 // * Define a constant map from strings to TfLiteRegistration.
489 // * Implement a test class that inherits SingleOpTest.
490 // * Instantiate the test cases with SingleOpTest::GetKernelTags helper
491 //   function.
492 // * Call GetRegistration to get the TfLiteRegistration to be used before
493 //   building the interpreter.
494 class SingleOpTest : public ::testing::TestWithParam<string> {
495  public:
GetKernelTags(const std::map<string,TfLiteRegistration * > & kernel_map)496   static std::vector<string> GetKernelTags(
497       const std::map<string, TfLiteRegistration*>& kernel_map) {
498     std::vector<string> tags;
499     for (auto it : kernel_map) {
500       tags.push_back(it.first);
501     }
502     return tags;
503   }
504 
505  protected:
506   virtual const std::map<string, TfLiteRegistration*>& GetKernelMap() = 0;
GetRegistration()507   TfLiteRegistration* GetRegistration() {
508     return GetKernelMap().at(GetParam());
509   }
510 };
511 
512 // Returns the corresponding TensorType given the type T.
513 template <typename T>
GetTensorType()514 TensorType GetTensorType() {
515   if (std::is_same<T, float>::value) return TensorType_FLOAT32;
516   if (std::is_same<T, int32_t>::value) return TensorType_INT32;
517   if (std::is_same<T, uint8_t>::value) return TensorType_UINT8;
518   if (std::is_same<T, string>::value) return TensorType_STRING;
519   return TensorType_MIN;  // default value
520 }
521 
522 // Strings have a special implementation that is in test_util.cc
523 template <>
524 std::vector<string> SingleOpModel::ExtractVector(int index);
525 }  // namespace tflite
526 
527 #endif  // TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
528