1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/quantization_util.h"
17 
18 #include <stdint.h>
19 
20 #include <algorithm>
21 #include <limits>
22 #include <memory>
23 #include <vector>
24 
25 #include <gmock/gmock.h>
26 #include <gtest/gtest.h>
27 #include "absl/container/flat_hash_map.h"
28 #include "absl/status/status.h"
29 #include "tensorflow/lite/c/common.h"
30 #include "tensorflow/lite/util.h"
31 
32 using ::testing::Eq;
33 using ::testing::FloatNear;
34 using ::testing::Pointwise;
35 
36 namespace tflite {
37 namespace gpu {
38 namespace {
39 
BuildTfLiteIntArray(const std::vector<int> & data)40 std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> BuildTfLiteIntArray(
41     const std::vector<int>& data) {
42   std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> result(
43       TfLiteIntArrayCreate(data.size()));
44   std::copy(data.begin(), data.end(), result->data);
45   return result;
46 }
47 
48 // TODO(b/158578883): this function is copied from the Micro codebase. Consider
49 // moving to a shared location.
PopulateContext(std::vector<TfLiteTensor> & tensors,TfLiteContext & context)50 void PopulateContext(std::vector<TfLiteTensor>& tensors,
51                      TfLiteContext& context) {
52   context.tensors_size = tensors.size();
53   context.tensors = tensors.data();
54   context.recommended_num_threads = 1;
55 }
56 
57 // TODO(b/158578883): this function is copied from the Micro codebase. Consider
58 // moving to a shared location.
ElementCount(const TfLiteIntArray & dims)59 int ElementCount(const TfLiteIntArray& dims) {
60   int result = 1;
61   for (int i = 0; i < dims.size; ++i) {
62     result *= dims.data[i];
63   }
64   return result;
65 }
66 
67 // TODO(b/158578883): this function is copied from the Micro codebase. Consider
68 // moving to a shared location.
69 template <typename T>
ScaleFromMinMax(const float min,const float max)70 inline float ScaleFromMinMax(const float min, const float max) {
71   return (max - min) / ((std::numeric_limits<T>::max() * 1.0) -
72                         std::numeric_limits<T>::min());
73 }
74 
75 // TODO(b/158578883): this function is copied from the Micro codebase. Consider
76 // moving to a shared location.
77 template <typename T>
ZeroPointFromMinMax(const float min,const float max)78 inline int ZeroPointFromMinMax(const float min, const float max) {
79   return static_cast<int>(std::numeric_limits<T>::min()) +
80          static_cast<int>(-min / ScaleFromMinMax<T>(min, max) + 0.5f);
81 }
82 
83 // TODO(b/158578883): this function is copied from the Micro codebase. Consider
84 // moving to a shared location.
CreateQuantizedTensor(const int8_t * data,TfLiteIntArray * dims,const char * name,float min,float max,bool is_variable)85 TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
86                                    const char* name, float min, float max,
87                                    bool is_variable) {
88   TfLiteTensor result;
89   result.type = kTfLiteInt8;
90   result.data.int8 = const_cast<int8_t*>(data);
91   result.dims = dims;
92   result.params = {ScaleFromMinMax<int8_t>(min, max),
93                    ZeroPointFromMinMax<int8_t>(min, max)};
94   result.allocation_type = kTfLiteMemNone;
95   result.bytes = ElementCount(*dims) * sizeof(int8_t);
96   result.allocation = nullptr;
97   result.name = name;
98   result.is_variable = is_variable;
99   return result;
100 }
101 
102 // TODO(b/158578883): this function is copied from the Micro codebase. Consider
103 // moving to a shared location.
CreateQuantizedTensor(const uint8_t * data,TfLiteIntArray * dims,const char * name,float min,float max,bool is_variable)104 TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims,
105                                    const char* name, float min, float max,
106                                    bool is_variable) {
107   TfLiteTensor result;
108   result.type = kTfLiteUInt8;
109   result.data.uint8 = const_cast<uint8_t*>(data);
110   result.dims = dims;
111   result.params = {ScaleFromMinMax<uint8_t>(min, max),
112                    ZeroPointFromMinMax<uint8_t>(min, max)};
113   result.allocation_type = kTfLiteMemNone;
114   result.bytes = ElementCount(*dims) * sizeof(uint8_t);
115   result.allocation = nullptr;
116   result.name = name;
117   result.is_variable = false;
118   return result;
119 }
120 
121 // TODO(b/158578883): this function is copied from the Micro codebase. Consider
122 // moving to a shared location.
CreateTensor(TfLiteIntArray * dims,const char * name,bool is_variable)123 TfLiteTensor CreateTensor(TfLiteIntArray* dims, const char* name,
124                           bool is_variable) {
125   TfLiteTensor result;
126   result.dims = dims;
127   result.name = name;
128   result.params = {};
129   result.quantization = {kTfLiteNoQuantization, nullptr};
130   result.is_variable = is_variable;
131   result.allocation_type = kTfLiteMemNone;
132   result.allocation = nullptr;
133   return result;
134 }
135 
136 // TODO(b/158578883): this function is copied from the Micro codebase. Consider
137 // moving to a shared location.
CreateFloatTensor(const float * data,TfLiteIntArray * dims,const char * name,bool is_variable)138 TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims,
139                                const char* name, bool is_variable) {
140   TfLiteTensor result = CreateTensor(dims, name, is_variable);
141   result.type = kTfLiteFloat32;
142   result.data.f = const_cast<float*>(data);
143   result.bytes = ElementCount(*dims) * sizeof(float);
144   return result;
145 }
146 
TEST(DequantizeInputs,Int8)147 TEST(DequantizeInputs, Int8) {
148   TfLiteContext context;
149   auto input_dims = BuildTfLiteIntArray({1, 3, 2, 1});
150   std::vector<int8_t> data = {-3, -2, -1, 1, 2, 3};
151   std::vector<float> dequantized_data(data.size());
152 
153   TfLiteTensor input = CreateQuantizedTensor(
154       data.data(), input_dims.get(), "input",
155       /*min=*/-12.8f, /*max=*/12.7f, /*is_variable=*/false);
156   TfLiteTensor dequantized_input = CreateFloatTensor(
157       dequantized_data.data(), input_dims.get(), "input_dequant",
158       /*is_variable=*/true);
159 
160   std::vector<TfLiteTensor> tensors{input, dequantized_input};
161   PopulateContext(tensors, context);
162 
163   std::vector<uint32_t> input_indices = {1};
164   absl::flat_hash_map<int, int> quant_conversion_map = {{1, 0}};
165 
166   auto status = DequantizeInputs(&context, input_indices, quant_conversion_map);
167   EXPECT_TRUE(status.ok());
168   EXPECT_THAT(dequantized_data,
169               Pointwise(FloatNear(1e-6), {-0.3, -0.2, -0.1, 0.1, 0.2, 0.3}));
170 }
171 
TEST(DequantizeInputs,UInt8)172 TEST(DequantizeInputs, UInt8) {
173   TfLiteContext context;
174   auto input_dims = BuildTfLiteIntArray({1, 3, 2, 1});
175   std::vector<uint8_t> data = {0, 1, 2, 3, 4, 5};
176   std::vector<float> dequantized_data(data.size());
177 
178   TfLiteTensor input =
179       CreateQuantizedTensor(data.data(), input_dims.get(), "input",
180                             /*min=*/0.0f, /*max=*/25.5f, /*is_variable=*/false);
181   TfLiteTensor dequantized_input = CreateFloatTensor(
182       dequantized_data.data(), input_dims.get(), "input_dequant",
183       /*is_variable=*/true);
184 
185   std::vector<TfLiteTensor> tensors{input, dequantized_input};
186   PopulateContext(tensors, context);
187 
188   std::vector<int64_t> input_indices = {1};
189   absl::flat_hash_map<int, int> quant_conversion_map = {{1, 0}};
190 
191   auto status = DequantizeInputs(&context, input_indices, quant_conversion_map);
192   EXPECT_TRUE(status.ok());
193   EXPECT_THAT(dequantized_data,
194               Pointwise(FloatNear(1e-6), {0.0, 0.1, 0.2, 0.3, 0.4, 0.5}));
195 }
196 
TEST(QuantizeOutputs,Int8)197 TEST(QuantizeOutputs, Int8) {
198   TfLiteContext context;
199   auto input_dims = BuildTfLiteIntArray({1, 3, 2, 1});
200   std::vector<float> data = {-0.3, -0.2, -0.1, 0.1, 0.2, 0.3};
201   std::vector<int8_t> quantized_data(data.size());
202   TfLiteTensor output = CreateFloatTensor(data.data(), input_dims.get(),
203                                           "output", /*is_variable=*/false);
204   TfLiteTensor quantized_output = CreateQuantizedTensor(
205       quantized_data.data(), input_dims.get(), "output_quant",
206       /*min=*/-12.8f, /*max=*/12.7f, /*is_variable=*/true);
207 
208   std::vector<TfLiteTensor> tensors{output, quantized_output};
209   PopulateContext(tensors, context);
210 
211   std::vector<uint32_t> output_indices = {0};
212   absl::flat_hash_map<int, int> quant_conversion_map = {{0, 1}};
213 
214   auto status = QuantizeOutputs(&context, output_indices, quant_conversion_map);
215   EXPECT_TRUE(status.ok());
216   EXPECT_THAT(quantized_data, Pointwise(Eq(), {-3, -2, -1, 1, 2, 3}));
217 }
218 
TEST(QuantizeOutputs,UInt8)219 TEST(QuantizeOutputs, UInt8) {
220   TfLiteContext context;
221   auto input_dims = BuildTfLiteIntArray({1, 3, 2, 1});
222   std::vector<float> data = {0.0, 0.1, 0.2, 0.3, 0.4, 0.5};
223   std::vector<uint8_t> quantized_data(data.size());
224   TfLiteTensor output = CreateFloatTensor(data.data(), input_dims.get(),
225                                           "output", /*is_variable=*/false);
226   TfLiteTensor quantized_output = CreateQuantizedTensor(
227       quantized_data.data(), input_dims.get(), "output_quant",
228       /*min=*/0.0f, /*max=*/25.5f, /*is_variable=*/true);
229 
230   std::vector<TfLiteTensor> tensors{output, quantized_output};
231   PopulateContext(tensors, context);
232 
233   std::vector<int64_t> output_indices = {0};
234   absl::flat_hash_map<int, int> quant_conversion_map = {{0, 1}};
235 
236   auto status = QuantizeOutputs(&context, output_indices, quant_conversion_map);
237   EXPECT_TRUE(status.ok());
238   EXPECT_THAT(quantized_data, Pointwise(Eq(), {0, 1, 2, 3, 4, 5}));
239 }
240 
241 }  // namespace
242 }  // namespace gpu
243 }  // namespace tflite
244