1 // Quantized calculation utilities.
2 // TODO(vddang): Replace this with tensorflow/lite/kernels/internal/tensor_utils(common).h
3 // after TFLite module has been synced.
4
5 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_QUANTUTILS_H
6 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_QUANTUTILS_H
7
8 #pragma clang diagnostic push
9 #pragma clang diagnostic ignored "-Wsign-compare"
10 #include <public/gemmlowp.h>
11 #pragma clang diagnostic pop
12
13 #include <cassert>
14 #include <limits>
15 #include <memory>
16
17 #include "LegacyUtils.h"
18 #include "OperationsExecutionUtils.h"
19
20 namespace android {
21 namespace nn {
22
MultiplyByQuantizedMultiplier(int32_t x,int32_t quantized_multiplier,int shift)23 inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift) {
24 using gemmlowp::RoundingDivideByPOT;
25 using gemmlowp::SaturatingRoundingDoublingHighMul;
26 int left_shift = shift > 0 ? shift : 0;
27 int right_shift = shift > 0 ? 0 : -shift;
28 return RoundingDivideByPOT(
29 SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier),
30 right_shift);
31 }
32
33 template <typename T>
MatrixBatchVectorMultiplyAccumulate(const int8_t * input,const int32_t * bias,const int8_t * input_to_gate_weights,int32_t multiplier,int32_t shift,int32_t n_batch,int32_t n_input,int32_t n_output,int32_t output_zp,T * output)34 void MatrixBatchVectorMultiplyAccumulate(const int8_t* input, const int32_t* bias,
35 const int8_t* input_to_gate_weights, int32_t multiplier,
36 int32_t shift, int32_t n_batch, int32_t n_input,
37 int32_t n_output, int32_t output_zp, T* output) {
38 const int16_t output_max = std::numeric_limits<T>::max();
39 const int16_t output_min = std::numeric_limits<T>::min();
40 for (int batch = 0; batch < n_batch; ++batch) {
41 for (int row = 0; row < n_output; ++row) {
42 int32_t acc = bias[row];
43 for (int col = 0; col < n_input; ++col) {
44 int8_t input_val = input[batch * n_input + col];
45 int8_t weights_val = input_to_gate_weights[row * n_input + col];
46 acc += input_val * weights_val;
47 }
48 acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
49 acc += output_zp;
50 acc += output[batch * n_output + row];
51 if (acc > output_max) {
52 acc = output_max;
53 }
54 if (acc < output_min) {
55 acc = output_min;
56 }
57 output[batch * n_output + row] = static_cast<T>(acc);
58 }
59 }
60 }
61
62 template <typename T>
CountLeadingZeros(T integer_input)63 int CountLeadingZeros(T integer_input) {
64 static_assert(std::is_unsigned<T>::value, "Only unsigned integer types handled.");
65 #if defined(__GNUC__)
66 return integer_input ? __builtin_clz(integer_input) : std::numeric_limits<T>::digits;
67 #else
68 if (integer_input == 0) {
69 return std::numeric_limits<T>::digits;
70 }
71
72 const T one_in_leading_positive = static_cast<T>(1) << (std::numeric_limits<T>::digits - 1);
73 int leading_zeros = 0;
74 while (integer_input < one_in_leading_positive) {
75 integer_input <<= 1;
76 ++leading_zeros;
77 }
78 return leading_zeros;
79 #endif
80 }
81
GetInvSqrtQuantizedMultiplierExp(int32_t input,int reverse_shift,int32_t * output_inv_sqrt,int * output_shift)82 inline bool GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
83 int32_t* output_inv_sqrt, int* output_shift) {
84 NN_RET_CHECK_GE(input, 0);
85 if (input <= 1) {
86 // Handle the input value 1 separately to avoid overflow in that case
87 // in the general computation below. Also handle 0 as if it
88 // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid
89 // but rare/unrealistic input value. We can expect both to occur in some
90 // incompletely trained models, but probably not in fully trained models.
91 *output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
92 *output_shift = 0;
93 return true;
94 }
95
96 *output_shift = 11;
97 while (input >= (1 << 29)) {
98 input /= 4;
99 ++*output_shift;
100 }
101 const unsigned max_left_shift_bits = CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
102 const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
103 const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
104 *output_shift -= left_shift_bit_pairs;
105 input <<= 2 * left_shift_bit_pairs;
106 NN_RET_CHECK_GE(input, (1 << 27));
107 NN_RET_CHECK_LT(input, (1 << 29));
108 using gemmlowp::FixedPoint;
109 using gemmlowp::Rescale;
110 using gemmlowp::SaturatingRoundingMultiplyByPOT;
111 // Using 3 integer bits gives us enough room for the internal arithmetic in
112 // this Newton-Raphson iteration.
113 using F3 = FixedPoint<int32_t, 3>;
114 using F0 = FixedPoint<int32_t, 0>;
115 const F3 fixedpoint_input = F3::FromRaw(input >> 1);
116 const F3 fixedpoint_half_input = SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
117 const F3 fixedpoint_half_three =
118 GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
119 // Newton-Raphson iteration
120 // Naive unoptimized starting guess: x = 1
121 F3 x = F3::One();
122 // Naive unoptimized number of iterations: 5
123 for (int i = 0; i < 5; i++) {
124 const F3 x3 = Rescale<3>(x * x * x);
125 x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
126 }
127 const F0 fixedpoint_half_sqrt_2 =
128 GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
129 x = x * fixedpoint_half_sqrt_2;
130 *output_inv_sqrt = x.raw();
131 if (*output_shift < 0) {
132 *output_inv_sqrt <<= -*output_shift;
133 *output_shift = 0;
134 }
135 // Convert right shift (right is positive) to left shift.
136 *output_shift *= reverse_shift;
137 return true;
138 }
139
140 void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights, const int32_t* bias,
141 int32_t layer_norm_scale_a, int32_t layer_norm_scale_b, int32_t variance_limit,
142 int n_batch, int n_input, int16_t* output);
143
144 void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar, int32_t n_row,
145 int32_t n_col, int32_t* output);
146
147 bool PrecomputeZeroPointTimesWeightWithBias(int32_t zero_point, const int8_t* weight_tensor,
148 const Shape& weight_shape, const int32_t* bias_tensor,
149 std::unique_ptr<int32_t[]>* output);
150
151 void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input, int16_t* output);
152
153 template <int IntegerBits>
ApplyTanh(const int16_t * input,int32_t n_batch,int32_t n_input,int16_t * output)154 void ApplyTanh(const int16_t* input, int32_t n_batch, int32_t n_input, int16_t* output) {
155 using FX = gemmlowp::FixedPoint<std::int16_t, IntegerBits>;
156 using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
157 for (int batch = 0; batch < n_batch; ++batch) {
158 for (int i = 0; i < n_input; ++i) {
159 const int index = batch * n_input + i;
160 FX tanh_input = FX::FromRaw(input[index]);
161 F0 tanh_output = gemmlowp::tanh(tanh_input);
162 output[index] = tanh_output.raw();
163 }
164 }
165 }
166
ApplyTanh(int32_t integer_bits,const int16_t * input,int32_t n_batch,int32_t n_input,int16_t * output)167 inline void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch, int32_t n_input,
168 int16_t* output) {
169 assert(integer_bits <= 6);
170 #define DISPATCH_TANH(i) \
171 case i: \
172 ApplyTanh<i>(input, n_batch, n_input, output); \
173 break;
174 switch (integer_bits) {
175 DISPATCH_TANH(0);
176 DISPATCH_TANH(1);
177 DISPATCH_TANH(2);
178 DISPATCH_TANH(3);
179 DISPATCH_TANH(4);
180 DISPATCH_TANH(5);
181 DISPATCH_TANH(6);
182 default:
183 return;
184 }
185 #undef DISPATCH_TANH
186 }
187
188 void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch, int n_input, int shift,
189 int16_t* output);
190 void CwiseMul(const int16_t* input_1, const int16_t* input_2, int32_t multiplier, int32_t shift,
191 int32_t n_batch, int32_t n_input, int32_t output_zp, int8_t* output);
192
193 bool CheckedLog2(const float x, int* log2_result);
194
195 void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, int n_input,
196 int16_t* output);
197
Sub1Vector(const int16_t * vector,int v_size,int16_t * result)198 inline void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) {
199 static const int16_t kOne = 32767;
200 for (int v = 0; v < v_size; v++) {
201 *result++ = kOne - *vector++;
202 }
203 }
204
205 void CwiseClipping(int16_t* input, const int16_t clipping_value, int32_t n_batch, int32_t n_input);
206
207 void CwiseClipping(int8_t* input, const int8_t clipping_value, int32_t n_batch, int32_t n_input);
208
209 void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
210 const int16_t* batch_vector, int n_batch,
211 int32_t multiplier, int shift, int16_t* result);
212
213 } // namespace nn
214 } // namespace android
215
216 #endif // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_QUANTUTILS_H
217