1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
16 #define TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
17
18 #include <complex>
19 #include <vector>
20
21 #include <gmock/gmock.h>
22 #include <gtest/gtest.h>
23
24 #include "tensorflow/core/platform/logging.h"
25 #include "tensorflow/lite/interpreter.h"
26 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
27 #include "tensorflow/lite/kernels/register.h"
28 #include "tensorflow/lite/model.h"
29 #include "tensorflow/lite/string_util.h"
30 #include "tensorflow/lite/testing/util.h"
31 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
32
33 namespace tflite {
34
35 // A gmock matcher that check that elements of a float vector match to a given
36 // tolerance.
37 std::vector<::testing::Matcher<float>> ArrayFloatNear(
38 const std::vector<float>& values, float max_abs_error = 1e-5);
39
40 // A gmock matcher that check that elements of a complex vector match to a given
41 // tolerance.
42 std::vector<::testing::Matcher<std::complex<float>>> ArrayComplex64Near(
43 const std::vector<std::complex<float>>& values, float max_abs_error = 1e-5);
44
45 template <typename T>
Quantize(const std::vector<float> & data,float scale,int32_t zero_point)46 inline std::vector<T> Quantize(const std::vector<float>& data, float scale,
47 int32_t zero_point) {
48 std::vector<T> q;
49 for (float f : data) {
50 q.push_back(static_cast<T>(std::max<float>(
51 std::numeric_limits<T>::min(),
52 std::min<float>(std::numeric_limits<T>::max(),
53 std::round(zero_point + (f / scale))))));
54 }
55 return q;
56 }
57
58 template <typename T>
Dequantize(const std::vector<T> & data,float scale,int32_t zero_point)59 inline std::vector<float> Dequantize(const std::vector<T>& data, float scale,
60 int32_t zero_point) {
61 std::vector<float> f;
62 for (T q : data) {
63 f.push_back(scale * (q - zero_point));
64 }
65 return f;
66 }
67
68 // A test model that contains a single operator. All operator inputs and
69 // output are external to the model, so the tests can directly access them.
70 // Typical usage:
71 // SingleOpModel m;
72 // int a = m.AddInput({TensorType_FLOAT32, a_shape});
73 // int b = m.AddInput({TensorType_FLOAT32, b_shape});
74 // int c = m.AddOutput({TensorType_FLOAT32, {}});
75 // m.SetBuiltinOp(...);
76 // m.BuildInterpreter({GetShape(a), GetShape(b)});
77 // m.PopulateTensor(a, {...});
78 // m.PopulateTensor(b, {...});
79 // m.Invoke();
80 // EXPECT_THAT(m.ExtractVector<float>(c), ArrayFloatNear({...}));
81 //
82
83 // A helper struct to construct test tensors. This is particularly useful for
84 // quantized tensor which must have their scale and zero_point defined before
85 // the actual data is known. This mimics what happens in practice: quantization
86 // parameters are calculated during training or post training..
87 struct TensorData {
88 TensorData(TensorType type = TensorType_FLOAT32, std::vector<int> shape = {},
89 float min = 0.0f, float max = 0.0f, float scale = 0.0f,
90 int32_t zero_point = 0, bool per_channel_quantization = false,
91 std::vector<float> per_channel_quantization_scales = {},
92 std::vector<int64_t> per_channel_quantization_offsets = {},
93 int32_t channel_index = 0)
typeTensorData94 : type(type),
95 shape(shape),
96 min(min),
97 max(max),
98 scale(scale),
99 zero_point(zero_point),
100 per_channel_quantization(per_channel_quantization),
101 per_channel_quantization_scales(
102 std::move(per_channel_quantization_scales)),
103 per_channel_quantization_offsets(
104 std::move(per_channel_quantization_offsets)),
105 channel_index(channel_index) {}
106 TensorType type;
107 std::vector<int> shape;
108 float min;
109 float max;
110 float scale;
111 int32_t zero_point;
112 bool per_channel_quantization;
113 std::vector<float> per_channel_quantization_scales;
114 std::vector<int64_t> per_channel_quantization_offsets;
115 int32_t channel_index;
116 };
117
118 class SingleOpResolver : public OpResolver {
119 public:
SingleOpResolver(const BuiltinOperator op,TfLiteRegistration * registration)120 SingleOpResolver(const BuiltinOperator op, TfLiteRegistration* registration)
121 : op_(op), registration_(*registration) {
122 registration_.builtin_code = static_cast<int32_t>(op);
123 registration_.version = 1;
124 }
FindOp(BuiltinOperator op,int version)125 const TfLiteRegistration* FindOp(BuiltinOperator op,
126 int version) const override {
127 if (op == op_) {
128 return ®istration_;
129 }
130 return nullptr;
131 }
FindOp(const char * op,int version)132 const TfLiteRegistration* FindOp(const char* op, int version) const override {
133 return nullptr;
134 }
135
136 private:
137 const BuiltinOperator op_;
138 TfLiteRegistration registration_;
139 };
140
141 class SingleOpModel {
142 public:
SingleOpModel()143 SingleOpModel() {}
~SingleOpModel()144 ~SingleOpModel() {}
145
146 // Set a function callback that is run right after graph is prepared
147 // that allows applying external delegates. This is useful for testing
148 // other runtimes like NN API or GPU.
SetApplyDelegate(std::function<void (Interpreter *)> apply_delegate_fn)149 void SetApplyDelegate(std::function<void(Interpreter*)> apply_delegate_fn) {
150 apply_delegate_fn_ = apply_delegate_fn;
151 }
152
153 // Copying or assignment is disallowed to simplify ownership semantics.
154 SingleOpModel(const SingleOpModel&) = delete;
155 SingleOpModel& operator=(const SingleOpModel&) = delete;
156
157 // Add a TensorType input tensor and return its index.
158 int AddInput(TensorType type, bool is_variable = false) {
159 return AddInput(TensorData{type}, is_variable);
160 }
161 int AddInput(const TensorData& t, bool is_variable = false);
162
163 // Templated version of AddConstInput().
164 template <typename T>
AddConstInput(TensorType type,std::initializer_list<T> data,std::initializer_list<int> shape)165 int AddConstInput(TensorType type, std::initializer_list<T> data,
166 std::initializer_list<int> shape) {
167 int id = AddTensor(TensorData{type, shape}, data);
168 inputs_.push_back(id);
169 return id;
170 }
171
172 // Add a null input tensor (optional input) and return kOptionalTensor.
173 int AddNullInput();
174
175 // Add a TensorType output tensor and return its index.
AddOutput(TensorType type)176 int AddOutput(TensorType type) { return AddOutput(TensorData{type}); }
177 int AddOutput(const TensorData& t);
178
179 template <typename T>
QuantizeAndPopulate(int index,const std::vector<float> & data)180 void QuantizeAndPopulate(int index, const std::vector<float>& data) {
181 TfLiteTensor* t = interpreter_->tensor(index);
182 auto q = Quantize<T>(data, t->params.scale, t->params.zero_point);
183 PopulateTensor(index, 0, q.data(), q.data() + q.size());
184 }
185
SymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)186 void SymmetricQuantizeAndPopulate(int index, const std::vector<float>& data) {
187 std::vector<int8_t> q = QuantizeTensor(index, data);
188 PopulateTensor(index, /*offset=*/0, reinterpret_cast<uint8_t*>(q.data()),
189 reinterpret_cast<uint8_t*>(q.data() + q.size()));
190 }
191
SignedSymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)192 void SignedSymmetricQuantizeAndPopulate(int index,
193 const std::vector<float>& data) {
194 std::vector<int8_t> q = QuantizeTensor(index, data);
195 PopulateTensor(index, /*offset=*/0, q.data(), q.data() + q.size());
196 }
197
198 // Quantize and populate data for filter with per channel quantization.
PerChannelSymmetricQuantizeAndPopulate(int index,const std::vector<float> & input_data)199 void PerChannelSymmetricQuantizeAndPopulate(
200 int index, const std::vector<float>& input_data) {
201 TfLiteTensor* t = interpreter_->tensor(index);
202 auto* params =
203 reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
204 const int channel_index = params->quantized_dimension;
205
206 std::vector<int32_t> shape(t->dims->size);
207 for (int i = 0; i < shape.size(); ++i) {
208 shape[i] = t->dims->data[i];
209 }
210 const int32_t num_inputs = input_data.size();
211 const int32_t num_channel = shape[channel_index];
212 std::vector<int8_t> quantized_output(num_inputs);
213 std::vector<float> scales_inv(num_channel);
214 for (int i = 0; i < num_channel; ++i) {
215 scales_inv[i] = 1.0f / params->scale->data[i];
216 }
217 optimize::utils::SymmetricPerChannelQuantizeValues(
218 input_data.data(), scales_inv, shape, channel_index, &quantized_output);
219
220 PopulateTensor(index, /*offset=*/0, quantized_output.data(),
221 quantized_output.data() + quantized_output.size());
222 }
223
224 // Quantize and populate data for bias with per channel quantization.
PerChannelQuantizeBias(int index,const std::vector<float> & input_data)225 void PerChannelQuantizeBias(int index, const std::vector<float>& input_data) {
226 const int32_t num_inputs = input_data.size();
227 std::vector<int32_t> quantized_output(num_inputs);
228 TfLiteTensor* t = interpreter_->tensor(index);
229 auto* params =
230 reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
231 for (int i = 0; i < num_inputs; ++i) {
232 quantized_output[i] = input_data[i] * params->scale->data[i];
233 }
234 PopulateTensor(index, /*offset=*/0, quantized_output.data(),
235 quantized_output.data() + quantized_output.size());
236 }
237
GetShape(int id)238 const std::vector<int>& GetShape(int id) { return tensor_data_.at(id).shape; }
239
GetScale(int id)240 float GetScale(int id) { return tensor_data_.at(id).scale; }
GetZeroPoint(int id)241 int32_t GetZeroPoint(int id) { return tensor_data_.at(id).zero_point; }
242
243 // Define the operator in this model.
244 void SetBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
245 flatbuffers::Offset<void> builtin_options);
246 void SetCustomOp(const string& name,
247 const std::vector<uint8_t>& custom_option,
248 const std::function<TfLiteRegistration*()>& registeration);
249
250 // Build the interpreter for this model. Also, resize and allocate all
251 // tensors given the shapes of the inputs.
252 void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
253 bool allow_fp32_relax_to_fp16 = false);
254
255 void Invoke();
256
PopulateStringTensor(int index,const std::vector<string> & content)257 void PopulateStringTensor(int index, const std::vector<string>& content) {
258 auto tensor = interpreter_->tensor(index);
259 DynamicBuffer buf;
260 for (const string& s : content) {
261 buf.AddString(s.data(), s.length());
262 }
263 buf.WriteToTensor(tensor, /*new_shape=*/nullptr);
264 }
265
266 // Populate the tensor given its index.
267 // TODO(b/110696148) clean up and merge with vector-taking variant below.
268 template <typename T>
PopulateTensor(int index,const std::initializer_list<T> & data)269 void PopulateTensor(int index, const std::initializer_list<T>& data) {
270 T* v = interpreter_->typed_tensor<T>(index);
271 if (!v) {
272 auto* t = interpreter_->tensor(index);
273 CHECK(t) << "No tensor with index " << index << ".";
274 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
275 CHECK(v) << "Type mismatch for tensor with index " << index
276 << ". Requested " << typeToTfLiteType<T>() << ", got "
277 << t->type;
278 }
279 for (T f : data) {
280 *v = f;
281 ++v;
282 }
283 }
284
285 // Populate the tensor given its index.
286 // TODO(b/110696148) clean up and merge with initializer_list-taking variant
287 // above.
288 template <typename T>
PopulateTensor(int index,const std::vector<T> & data)289 void PopulateTensor(int index, const std::vector<T>& data) {
290 T* v = interpreter_->typed_tensor<T>(index);
291 if (!v) {
292 auto* t = interpreter_->tensor(index);
293 CHECK(t) << "No tensor with index " << index << ".";
294 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
295 CHECK(v) << "Type mismatch for tensor with index " << index
296 << ". Requested " << typeToTfLiteType<T>() << ", got "
297 << t->type;
298 }
299 for (T f : data) {
300 *v = f;
301 ++v;
302 }
303 }
304
305 // Partially populate the tensor, starting at the given offset.
306 template <typename T>
PopulateTensor(int index,int offset,T * begin,T * end)307 void PopulateTensor(int index, int offset, T* begin, T* end) {
308 T* v = interpreter_->typed_tensor<T>(index);
309 memcpy(v + offset, begin, (end - begin) * sizeof(T));
310 }
311
312 // Return a vector with the flattened contents of a tensor.
313 template <typename T>
ExtractVector(int index)314 std::vector<T> ExtractVector(int index) {
315 T* v = interpreter_->typed_tensor<T>(index);
316 CHECK(v);
317 return std::vector<T>(v, v + GetTensorSize(index));
318 }
319
GetTensorShape(int index)320 std::vector<int> GetTensorShape(int index) {
321 std::vector<int> result;
322 TfLiteTensor* t = interpreter_->tensor(index);
323 for (int i = 0; i < t->dims->size; ++i) {
324 result.push_back(t->dims->data[i]);
325 }
326 return result;
327 }
328
SetNumThreads(int num_threads)329 void SetNumThreads(int num_threads) {
330 interpreter_->SetNumThreads(num_threads);
331 }
332
SetResolver(std::unique_ptr<OpResolver> resolver)333 void SetResolver(std::unique_ptr<OpResolver> resolver) {
334 resolver_ = std::move(resolver);
335 }
336
337 protected:
338 int32_t GetTensorSize(int index) const;
339
340 flatbuffers::FlatBufferBuilder builder_;
341 std::unique_ptr<tflite::Interpreter> interpreter_;
342 std::unique_ptr<OpResolver> resolver_;
343
344 private:
345 // TODO(gavinbelson): sync this method with
346 // //tensorflow/lite/kernels/internal/quantization_util.h?l=31
347 template <typename T>
QuantizationParams(float f_min,float f_max)348 std::pair<float, int32_t> QuantizationParams(float f_min, float f_max) {
349 // These are required by many quantized operations.
350 CHECK_LE(f_min, 0);
351 CHECK_GE(f_max, 0);
352 T q_min = std::numeric_limits<T>::min();
353 T q_max = std::numeric_limits<T>::max();
354 float range = q_max - q_min;
355 float scale = (f_max - f_min) / range;
356 int32_t zero_point = std::min(
357 q_max,
358 std::max(q_min, static_cast<T>(std::round(q_min - f_min / scale))));
359 return {scale, zero_point};
360 }
361
AddTensorPerChannelQuant(TensorData t)362 int AddTensorPerChannelQuant(TensorData t) {
363 const int id = tensors_.size();
364 flatbuffers::Offset<QuantizationParameters> q_params = 0;
365 q_params = CreateQuantizationParameters(
366 builder_, /*min=*/0, /*max=*/0,
367 /*scale=*/
368 builder_.CreateVector<float>(t.per_channel_quantization_scales),
369 /*zero point=*/
370 builder_.CreateVector<int64_t>(t.per_channel_quantization_offsets),
371 QuantizationDetails_NONE, 0, t.channel_index);
372 tensors_.push_back(
373 CreateTensor(builder_, builder_.CreateVector<int>(t.shape), t.type,
374 /*buffer=*/0,
375 /*name=*/0, q_params, /*is_variable=*/false));
376 tensor_data_[id] = t;
377 return id;
378 }
379
380 template <typename T>
381 int AddTensor(TensorData t, std::initializer_list<T> data,
382 bool is_variable = false) {
383 int id = tensors_.size();
384
385 // This is slightly different depending on whether we are adding a
386 // quantized or a regular tensor.
387 bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
388
389 flatbuffers::Offset<QuantizationParameters> q_params = 0;
390
391 if (is_quantized) {
392 if (t.min != 0 || t.max != 0) {
393 if (t.type == TensorType_UINT8) {
394 std::tie(t.scale, t.zero_point) =
395 QuantizationParams<uint8_t>(t.min, t.max);
396 } else if (t.type == TensorType_INT8) {
397 std::tie(t.scale, t.zero_point) =
398 QuantizationParams<int8_t>(t.min, t.max);
399 } else if (t.type == TensorType_INT32) {
400 std::tie(t.scale, t.zero_point) =
401 QuantizationParams<int32_t>(t.min, t.max);
402 } else if (t.type == TensorType_INT16) {
403 std::tie(t.scale, t.zero_point) =
404 QuantizationParams<int16_t>(t.min, t.max);
405 } else {
406 LOG(FATAL) << "No support for the requested quantized type";
407 }
408 t.min = 0;
409 t.max = 0;
410 }
411
412 q_params = CreateQuantizationParameters(
413 builder_, /*min=*/0, /*max=*/0,
414 builder_.CreateVector<float>({t.scale}),
415 builder_.CreateVector<int64_t>({t.zero_point}));
416 }
417
418 int buffer_id = 0;
419 if (data.size()) {
420 // Initialize buffers list with empty buffer to allow for non-const
421 // tensors.
422 if (buffers_.empty()) {
423 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
424 }
425
426 // Add data as a Buffer to buffers list.
427 buffer_id = buffers_.size();
428 auto data_buffer =
429 builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
430 sizeof(T) * data.size());
431 buffers_.push_back(CreateBuffer(builder_, data_buffer));
432 }
433
434 tensors_.push_back(CreateTensor(builder_,
435 builder_.CreateVector<int>(t.shape), t.type,
436 /*buffer=*/buffer_id,
437 /*name=*/0, q_params, is_variable));
438
439 tensor_data_[id] = t;
440
441 return id;
442 }
443
QuantizeTensor(int index,const std::vector<float> & data)444 std::vector<int8_t> QuantizeTensor(int index,
445 const std::vector<float>& data) {
446 TfLiteTensor* t = interpreter_->tensor(index);
447 const int length = data.size();
448 std::vector<int8_t> q(length);
449 float min, max, scaling_factor;
450 tensor_utils::SymmetricQuantizeFloats(data.data(), length, q.data(), &min,
451 &max, &scaling_factor);
452 // Update quantization params.
453 t->params.scale = scaling_factor;
454 t->params.zero_point = 0;
455 // Populate the new quantization params.
456 TfLiteQuantizationFree(&t->quantization);
457 t->quantization.type = kTfLiteAffineQuantization;
458 auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
459 malloc(sizeof(TfLiteAffineQuantization)));
460 affine_quantization->quantized_dimension = 0;
461 affine_quantization->scale = TfLiteFloatArrayCreate(1);
462 affine_quantization->zero_point = TfLiteIntArrayCreate(1);
463 affine_quantization->scale->data[0] = scaling_factor;
464 affine_quantization->zero_point->data[0] = 0;
465 t->quantization.params = affine_quantization;
466 return q;
467 }
468
469 std::map<int, TensorData> tensor_data_;
470 std::vector<int32_t> inputs_;
471 std::vector<int32_t> outputs_;
472 std::vector<flatbuffers::Offset<Tensor>> tensors_;
473 std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
474 std::vector<flatbuffers::Offset<Operator>> operators_;
475 std::vector<flatbuffers::Offset<Buffer>> buffers_;
476 std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
477 // A function pointer that gets called after the interpreter is created but
478 // before evaluation happens. This is useful for applying a delegate.
479 std::function<void(Interpreter*)> apply_delegate_fn_;
480 };
481
482 // Base class for single op unit tests.
483 // The tests are parameterized to test multiple kernels for a single op.
484 // The parameters are strings like "optimized" and "reference" to have better
485 // readability in test reports.
486 //
487 // To use this class:
488 // * Define a constant map from strings to TfLiteRegistration.
489 // * Implement a test class that inherits SingleOpTest.
490 // * Instantiate the test cases with SingleOpTest::GetKernelTags helper
491 // function.
492 // * Call GetRegistration to get the TfLiteRegistration to be used before
493 // building the interpreter.
494 class SingleOpTest : public ::testing::TestWithParam<string> {
495 public:
GetKernelTags(const std::map<string,TfLiteRegistration * > & kernel_map)496 static std::vector<string> GetKernelTags(
497 const std::map<string, TfLiteRegistration*>& kernel_map) {
498 std::vector<string> tags;
499 for (auto it : kernel_map) {
500 tags.push_back(it.first);
501 }
502 return tags;
503 }
504
505 protected:
506 virtual const std::map<string, TfLiteRegistration*>& GetKernelMap() = 0;
GetRegistration()507 TfLiteRegistration* GetRegistration() {
508 return GetKernelMap().at(GetParam());
509 }
510 };
511
512 // Returns the corresponding TensorType given the type T.
513 template <typename T>
GetTensorType()514 TensorType GetTensorType() {
515 if (std::is_same<T, float>::value) return TensorType_FLOAT32;
516 if (std::is_same<T, int32_t>::value) return TensorType_INT32;
517 if (std::is_same<T, uint8_t>::value) return TensorType_UINT8;
518 if (std::is_same<T, string>::value) return TensorType_STRING;
519 return TensorType_MIN; // default value
520 }
521
522 // Strings have a special implementation that is in test_util.cc
523 template <>
524 std::vector<string> SingleOpModel::ExtractVector(int index);
525 } // namespace tflite
526
527 #endif // TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
528