1 // Copyright 2016 The Gemmlowp Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // test_fixedpoint.cc: unit tests covering the fixedpoint/ directory.
16 
17 #define GEMMLOWP_ENABLE_FIXEDPOINT_CONSTANTS_CHECKS
18 
19 #include <algorithm>
20 #include <cinttypes>
21 #include <cmath>
22 #include <cstdio>
23 #include <random>
24 #include <vector>
25 
26 #include "../fixedpoint/fixedpoint.h"
27 #include "test.h"
28 
29 namespace gemmlowp {
30 
31 namespace {
32 
33 template <typename T>
Load(const typename FixedPointRawTypeTraits<T>::ScalarRawType * src)34 T Load(const typename FixedPointRawTypeTraits<T>::ScalarRawType* src) {
35   return *src;
36 }
37 template <typename T>
Store(typename FixedPointRawTypeTraits<T>::ScalarRawType * dst,T v)38 void Store(typename FixedPointRawTypeTraits<T>::ScalarRawType* dst, T v) {
39   *dst = v;
40 }
41 #ifdef GEMMLOWP_NEON
42 template <>
Load(const std::int32_t * src)43 int32x4_t Load<int32x4_t>(const std::int32_t* src) {
44   return vld1q_s32(src);
45 }
46 template <>
Load(const std::int16_t * src)47 int16x8_t Load<int16x8_t>(const std::int16_t* src) {
48   return vld1q_s16(src);
49 }
50 template <>
Store(std::int32_t * dst,int32x4_t v)51 void Store<int32x4_t>(std::int32_t* dst, int32x4_t v) {
52   vst1q_s32(dst, v);
53 }
54 template <>
Store(std::int16_t * dst,int16x8_t v)55 void Store<int16x8_t>(std::int16_t* dst, int16x8_t v) {
56   vst1q_s16(dst, v);
57 }
58 #endif
59 #ifdef GEMMLOWP_SSE4
60 template <>
Load(const std::int32_t * src)61 __m128i Load<__m128i>(const std::int32_t* src) {
62   return _mm_loadu_si128(reinterpret_cast<const __m128i*>(src));
63 }
64 template <>
Store(std::int32_t * dst,__m128i v)65 void Store<__m128i>(std::int32_t* dst, __m128i v) {
66   _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), v);
67 }
68 template <>
Load(const std::int16_t * src)69 int16x8_m128i Load<int16x8_m128i>(const std::int16_t* src) {
70   return to_int16x8_m128i(
71       _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)));
72 }
73 template <>
Store(std::int16_t * dst,int16x8_m128i v)74 void Store<int16x8_m128i>(std::int16_t* dst, int16x8_m128i v) {
75   _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), v.v);
76 }
77 #endif
78 #ifdef GEMMLOWP_MSA
79 template <>
Load(const std::int32_t * src)80 v4i32 Load<v4i32>(const std::int32_t* src) {
81   return __builtin_msa_ld_w(const_cast<std::int32_t*>(src), 0);
82 }
83 template <>
Load(const std::int16_t * src)84 v8i16 Load<v8i16>(const std::int16_t* src) {
85   return __builtin_msa_ld_h(const_cast<std::int16_t*>(src), 0);
86 }
87 template <>
Store(std::int32_t * dst,v4i32 v)88 void Store<v4i32>(std::int32_t* dst, v4i32 v) {
89   __builtin_msa_st_w(v, dst, 0);
90 }
91 template <>
Store(std::int16_t * dst,v8i16 v)92 void Store<v8i16>(std::int16_t* dst, v8i16 v) {
93   __builtin_msa_st_h(v, dst, 0);
94 }
95 #endif
96 
97 #ifdef GEMMLOWP_AVX2
98 template <>
Load(const std::int32_t * src)99 __m256i Load<__m256i>(const std::int32_t* src) {
100   return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(src));
101 }
102 
103 template <>
Load(const std::int16_t * src)104 int16x16_m256i Load<int16x16_m256i>(const std::int16_t* src) {
105   return to_int16x16_m256i(
106       _mm256_loadu_si256(reinterpret_cast<const __m256i*>(src)));
107 }
108 
109 template <>
Store(std::int32_t * dst,__m256i v)110 void Store<__m256i>(std::int32_t* dst, __m256i v) {
111   _mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), v);
112 }
113 
114 template <>
Store(std::int16_t * dst,int16x16_m256i v)115 void Store<int16x16_m256i>(std::int16_t* dst, int16x16_m256i v) {
116   _mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), v.v);
117 }
118 #endif
119 
120 template <typename tSimdType>
121 class TestFixedPoint {
122  public:
123   using SimdType = tSimdType;
124   using SimdTypeTraits = FixedPointRawTypeTraits<SimdType>;
125   using ScalarType = typename SimdTypeTraits::ScalarRawType;
126   static constexpr int kSimdLanes = SimdTypeTraits::kLanes;
127   static constexpr int kScalarTypeBits = 8 * sizeof(ScalarType);
128 
129   // Explanation of UnaryOpBase, its *Op subclasses below, and TestUnaryOp:
130   // Most (though not all) of the fixedpoint functionality being tested
131   // consists of functions taking one fixedpoint value and returning one
132   // fixedpoint value, e.g. "exp" or "tanh". We call them "unary operators".
133   // We factor a lot of testing boilerplate into a common TestUnaryOp function
134   // taking a "unary op" object that fully describes the function to be tested.
135   // These objects inherit UnaryOpBase mostly as a means to share some default
136   // values for some properties.
137   //
138   // An important design element here is that the fixed-point values are passed
139   // around as raw integers (e.g. int32_t or SIMD types such as int32x4_t), not
140   // as higher-level FixedPoint objects. The motivation for this design is 1) to
141   // avoid having to templatize everything in the tIntegerBits parameter of
142   // class FixedPoint, and 2) to allow directly testing low-level functions
143   // operating on raw types (e.g. RoundingDivideByPOT) without needlessly
144   // requiring
145   // wrapping raw values in FixedPoint objects.
146   class UnaryOpBase {
147    public:
148     // Min bound of the input range of this op. For example, an op only handling
149     // nonnegative values would return 0.
MinInput() const150     ScalarType MinInput() const {
151       return std::numeric_limits<ScalarType>::min();
152     }
153     // Max bound of the input range of this op. For example, an op only handling
154     // nonpositive values would return 0.
MaxInput() const155     ScalarType MaxInput() const {
156       return std::numeric_limits<ScalarType>::max();
157     }
158     // Tolerated difference between actual and reference ScalarType values.
159     // Note that the corresponding real-numbers tolerance depends on the number
160     // of integer bits of the fixed-point representation of the results of this
161     // op.
162     // For example, for an op returning fixed-point values with 0 integer bits,
163     // the correspondence between real-number values and raw values is
164     // real_number = (2^31) * raw_value.
Tolerance() const165     ScalarType Tolerance() const { return 0; }
166   };
167 
168   // Op wrapping RoundingDivideByPOT
169   class RoundingDivideByPOTOp final : public UnaryOpBase {
170    public:
RoundingDivideByPOTOp(int exponent)171     RoundingDivideByPOTOp(int exponent) : exponent_(exponent) {}
ReferenceOp(ScalarType x) const172     ScalarType ReferenceOp(ScalarType x) const {
173       const double d = static_cast<double>(x) / (1ll << exponent_);
174       return static_cast<ScalarType>(std::round(d));
175     }
176     template <typename RawType>
Op(RawType x) const177     RawType Op(RawType x) const {
178       return RoundingDivideByPOT(x, exponent_);
179     }
180 
181    private:
182     const int exponent_;
183   };
184 
185   // Op wrapping SaturatingRoundingMultiplyByPOT
186   template <int tExponent>
187   class SaturatingRoundingMultiplyByPOTOp final : public UnaryOpBase {
188    public:
ReferenceOp(ScalarType x) const189     ScalarType ReferenceOp(ScalarType x) const {
190       const double d = static_cast<double>(x) * std::pow(2., tExponent);
191       const double clamp_min = std::numeric_limits<ScalarType>::min();
192       const double clamp_max = std::numeric_limits<ScalarType>::max();
193       const double clamped = std::min(clamp_max, std::max(clamp_min, d));
194       return static_cast<ScalarType>(std::round(clamped));
195     }
196     template <typename RawType>
Op(RawType x) const197     RawType Op(RawType x) const {
198       return SaturatingRoundingMultiplyByPOT<tExponent>(x);
199     }
200   };
201 
202   // Op wrapping exp_on_interval_between_negative_one_quarter_and_0_excl
203   class ExpOnIntervalBetweenNegativeOneQuarterAnd0ExclOp final
204       : public UnaryOpBase {
205    public:
MinInput() const206     ScalarType MinInput() const { return -(1 << (kScalarTypeBits - 3)); }
MaxInput() const207     ScalarType MaxInput() const { return 0; }
Tolerance() const208     ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 500 : 1; }
ReferenceOp(ScalarType x) const209     ScalarType ReferenceOp(ScalarType x) const {
210       using F = FixedPoint<ScalarType, 0>;
211       const double d = ToDouble(F::FromRaw(x));
212       const double e = std::exp(d);
213       return F::FromDouble(e).raw();
214     }
215     template <typename RawType>
Op(RawType x) const216     RawType Op(RawType x) const {
217       using F = FixedPoint<RawType, 0>;
218       const F f = F::FromRaw(x);
219       const F e = exp_on_interval_between_negative_one_quarter_and_0_excl(f);
220       return e.raw();
221     }
222   };
223 
224   // Op wrapping exp_on_negative_values
225   template <int tIntegerBits>
226   class ExpOnNegativeValuesOp final : public UnaryOpBase {
227    public:
MaxInput() const228     ScalarType MaxInput() const { return 0; }
Tolerance() const229     ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 500 : 2; }
ReferenceOp(ScalarType x) const230     ScalarType ReferenceOp(ScalarType x) const {
231       using F = FixedPoint<ScalarType, tIntegerBits>;
232       using F0 = FixedPoint<ScalarType, 0>;
233       const double d = ToDouble(F::FromRaw(x));
234       const double e = std::exp(d);
235       return F0::FromDouble(e).raw();
236     }
237     template <typename RawType>
Op(RawType x) const238     RawType Op(RawType x) const {
239       using F = FixedPoint<RawType, tIntegerBits>;
240       const F f = F::FromRaw(x);
241       return exp_on_negative_values(f).raw();
242     }
243   };
244 
245   // Op wrapping one_minus_x_over_one_plus_x_for_x_in_0_1
246   class OneMinusXOverOnePlusXForXIn01Op final : public UnaryOpBase {
247    public:
MinInput() const248     ScalarType MinInput() const { return 0; }
Tolerance() const249     ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 12 : 11; }
ReferenceOp(ScalarType x) const250     ScalarType ReferenceOp(ScalarType x) const {
251       using F = FixedPoint<ScalarType, 0>;
252       const double d = ToDouble(F::FromRaw(x));
253       const double e = (1 - d) / (1 + d);
254       return F::FromDouble(e).raw();
255     }
256     template <typename RawType>
Op(RawType x) const257     RawType Op(RawType x) const {
258       using F = FixedPoint<RawType, 0>;
259       const F f = F::FromRaw(x);
260       return one_minus_x_over_one_plus_x_for_x_in_0_1(f).raw();
261     }
262   };
263 
264   // Op wrapping tanh
265   template <int tIntegerBits>
266   class TanhOp final : public UnaryOpBase {
267    public:
Tolerance() const268     ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 310 : 12; }
ReferenceOp(ScalarType x) const269     ScalarType ReferenceOp(ScalarType x) const {
270       using F = FixedPoint<ScalarType, tIntegerBits>;
271       using F0 = FixedPoint<ScalarType, 0>;
272       const double d = ToDouble(F::FromRaw(x));
273       const double e = std::tanh(d);
274       return F0::FromDouble(e).raw();
275     }
276     template <typename RawType>
Op(RawType x) const277     RawType Op(RawType x) const {
278       using F = FixedPoint<RawType, tIntegerBits>;
279       const F f = F::FromRaw(x);
280       return tanh(f).raw();
281     }
282   };
283 
284   // Op wrapping one_over_one_plus_x_for_x_in_0_1
285   class OneOverOnePlusXForXIn01Op final : public UnaryOpBase {
286    public:
MinInput() const287     ScalarType MinInput() const { return 0; }
Tolerance() const288     ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 6 : 5; }
ReferenceOp(ScalarType x) const289     ScalarType ReferenceOp(ScalarType x) const {
290       using F = FixedPoint<ScalarType, 0>;
291       const double d = ToDouble(F::FromRaw(x));
292       const double e = 1 / (1 + d);
293       return F::FromDouble(e).raw();
294     }
295     template <typename RawType>
Op(RawType x) const296     RawType Op(RawType x) const {
297       using F = FixedPoint<RawType, 0>;
298       const F f = F::FromRaw(x);
299       return one_over_one_plus_x_for_x_in_0_1(f).raw();
300     }
301   };
302 
303   // Op wrapping logistic
304   template <int tIntegerBits>
305   class LogisticOp final : public UnaryOpBase {
306    public:
Tolerance() const307     ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 155 : 6; }
ReferenceOp(ScalarType x) const308     ScalarType ReferenceOp(ScalarType x) const {
309       using F = FixedPoint<ScalarType, tIntegerBits>;
310       using F0 = FixedPoint<ScalarType, 0>;
311       const double d = ToDouble(F::FromRaw(x));
312       const double e = 1 / (1 + std::exp(-d));
313       return F0::FromDouble(e).raw();
314     }
315     template <typename RawType>
Op(RawType x) const316     RawType Op(RawType x) const {
317       using F = FixedPoint<RawType, tIntegerBits>;
318       const F f = F::FromRaw(x);
319       return logistic(f).raw();
320     }
321   };
322 
323   // Tests a given op, on a given list of int32 input values.
324   template <typename tUnaryOpType>
TestUnaryOp(const tUnaryOpType & unary_op,const std::vector<ScalarType> & testvals)325   void TestUnaryOp(const tUnaryOpType& unary_op,
326                    const std::vector<ScalarType>& testvals) {
327     Check(0 == (testvals.size() % kSimdLanes));
328     for (std::size_t i = 0; i < testvals.size(); i += kSimdLanes) {
329       // First, clamp input values accoding to the MinInput() and MaxInput()
330       // bounds returned by the op.
331       ScalarType input[kSimdLanes] = {0};
332       for (std::size_t j = 0; j < kSimdLanes; j++) {
333         const ScalarType raw_input = testvals[i + j];
334         input[j] = std::min(unary_op.MaxInput(),
335                             std::max(unary_op.MinInput(), raw_input));
336       }
337       // Compute reference results and check that the actual results on
338       // scalar inputs agree with them, to the Tolerance() returned by the op.
339       ScalarType reference[kSimdLanes] = {0};
340       ScalarType actual_scalar[kSimdLanes] = {0};
341       for (std::size_t j = 0; j < kSimdLanes; j++) {
342         reference[j] = unary_op.ReferenceOp(input[j]);
343         actual_scalar[j] = unary_op.Op(input[j]);
344         const std::int64_t diff = static_cast<std::int64_t>(actual_scalar[j]) -
345                                   static_cast<std::int64_t>(reference[j]);
346         if (std::abs(diff) > unary_op.Tolerance()) {
347           fprintf(stderr, "abs(diff) (%" PRId64 ") > tolerance (%d)\n", diff,
348                   unary_op.Tolerance());
349         }
350         Check(std::abs(diff) <= unary_op.Tolerance());
351       }
352       // Check that the actual results on SIMD inputs agree *exactly* with the
353       // actual results on scalar inputs. I.e. SIMD must make absolutely no
354       // difference
355       // to the results, regardless of the fact that both scalar and SIMD
356       // results may differ from the reference results.
357       ScalarType actual_simd[kSimdLanes] = {0};
358       Store<SimdType>(actual_simd, unary_op.Op(Load<SimdType>(input)));
359       for (std::size_t j = 0; j < kSimdLanes; j++) {
360         if (actual_simd[j] != actual_scalar[j]) {
361           fprintf(stderr, "SIMD (%d) != scalar (%d)\n", actual_simd[j],
362                   actual_scalar[j]);
363         }
364         Check(actual_simd[j] == actual_scalar[j]);
365       }
366     }
367   }
368 
369   template <int tIntegerBits>
test_convert(FixedPoint<ScalarType,tIntegerBits> x)370   void test_convert(FixedPoint<ScalarType, tIntegerBits> x) {
371     typedef FixedPoint<ScalarType, tIntegerBits> F;
372     F y = F::FromDouble(ToDouble(x));
373     Check(y == x);
374   }
375 
376   template <int tIntegerBits_a, int tIntegerBits_b>
test_Rescale(FixedPoint<ScalarType,tIntegerBits_a> a)377   void test_Rescale(FixedPoint<ScalarType, tIntegerBits_a> a) {
378     FixedPoint<ScalarType, tIntegerBits_b> actual = Rescale<tIntegerBits_b>(a);
379     FixedPoint<ScalarType, tIntegerBits_b> expected =
380         FixedPoint<ScalarType, tIntegerBits_b>::FromDouble(ToDouble(a));
381     Check(actual == expected);
382   }
383 
384   template <int tIntegerBits_a, int tIntegerBits_b>
test_Rescale(const std::vector<ScalarType> & testvals)385   void test_Rescale(const std::vector<ScalarType>& testvals) {
386     for (auto a : testvals) {
387       FixedPoint<ScalarType, tIntegerBits_a> aq;
388       aq.raw() = a;
389       test_Rescale<tIntegerBits_a, tIntegerBits_b>(aq);
390     }
391   }
392 
393   template <int tIntegerBits_a, int tIntegerBits_b>
test_mul(FixedPoint<ScalarType,tIntegerBits_a> a,FixedPoint<ScalarType,tIntegerBits_b> b)394   void test_mul(FixedPoint<ScalarType, tIntegerBits_a> a,
395                 FixedPoint<ScalarType, tIntegerBits_b> b) {
396     static const int ProductIntegerBits = tIntegerBits_a + tIntegerBits_b;
397     using ProductFixedPoint = FixedPoint<ScalarType, ProductIntegerBits>;
398     ProductFixedPoint ab;
399     ab = a * b;
400     double a_double = ToDouble(a);
401     double b_double = ToDouble(b);
402     double ab_double = a_double * b_double;
403     ProductFixedPoint expected = ProductFixedPoint::FromDouble(ab_double);
404     std::int64_t diff = std::int64_t(ab.raw()) - std::int64_t(expected.raw());
405     Check(std::abs(diff) <= 1);
406   }
407 
408   template <int tIntegerBits_a, int tIntegerBits_b>
test_mul(const std::vector<ScalarType> & testvals)409   void test_mul(const std::vector<ScalarType>& testvals) {
410     for (auto a : testvals) {
411       for (auto b : testvals) {
412         FixedPoint<ScalarType, tIntegerBits_a> aq;
413         FixedPoint<ScalarType, tIntegerBits_b> bq;
414         aq.raw() = a;
415         bq.raw() = b;
416         test_mul(aq, bq);
417       }
418     }
419   }
420 
421   template <int tExponent, int tIntegerBits_a>
test_ExactMulByPot(FixedPoint<ScalarType,tIntegerBits_a> a)422   void test_ExactMulByPot(FixedPoint<ScalarType, tIntegerBits_a> a) {
423     double x = ToDouble(a) * std::pow(2.0, tExponent);
424     double y = ToDouble(ExactMulByPot<tExponent>(a));
425     Check(x == y);
426   }
427 
428   template <int tExponent, int tIntegerBits_a>
test_ExactMulByPot(const std::vector<ScalarType> & testvals)429   void test_ExactMulByPot(const std::vector<ScalarType>& testvals) {
430     for (auto a : testvals) {
431       FixedPoint<ScalarType, tIntegerBits_a> aq;
432       aq.raw() = a;
433       test_ExactMulByPot<tExponent, tIntegerBits_a>(aq);
434     }
435   }
436 
437   // Make the list of test values to test each op against.
MakeTestVals()438   std::vector<ScalarType> MakeTestVals() {
439     std::vector<ScalarType> testvals;
440 
441     for (int i = 0; i < kScalarTypeBits - 1; i++) {
442       testvals.push_back((1 << i) - 2);
443       testvals.push_back((1 << i) - 1);
444       testvals.push_back((1 << i));
445       testvals.push_back((1 << i) + 1);
446       testvals.push_back((1 << i) + 2);
447       testvals.push_back(-(1 << i) - 2);
448       testvals.push_back(-(1 << i) - 1);
449       testvals.push_back(-(1 << i));
450       testvals.push_back(-(1 << i) + 1);
451       testvals.push_back(-(1 << i) + 2);
452     }
453     testvals.push_back(std::numeric_limits<ScalarType>::min());
454     testvals.push_back(std::numeric_limits<ScalarType>::min() + 1);
455     testvals.push_back(std::numeric_limits<ScalarType>::min() + 2);
456     testvals.push_back(std::numeric_limits<ScalarType>::max() - 2);
457     testvals.push_back(std::numeric_limits<ScalarType>::max() - 1);
458     testvals.push_back(std::numeric_limits<ScalarType>::max());
459 
460     std::mt19937 random_engine;
461     std::uniform_int_distribution<ScalarType> uniform_distribution(
462         std::numeric_limits<ScalarType>::min(),
463         std::numeric_limits<ScalarType>::max());
464     for (int i = 0; i < 1000; i++) {
465       testvals.push_back(uniform_distribution(random_engine));
466     }
467 
468     // SIMD tests will require the length of testvals to be a multiple
469     // of SIMD vector size.
470     while (testvals.size() % kSimdLanes) {
471       testvals.push_back(0);
472     }
473 
474     std::sort(testvals.begin(), testvals.end());
475     return testvals;
476   }
477 
RunTests(const char * msg)478   void RunTests(const char* msg) {
479     const std::vector<ScalarType> testvals = MakeTestVals();
480 
481     for (int s = 0; s < kScalarTypeBits; s++) {
482       TestUnaryOp(RoundingDivideByPOTOp(s), testvals);
483     }
484 
485     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<1 - kScalarTypeBits>(),
486                 testvals);
487     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<2 - kScalarTypeBits>(),
488                 testvals);
489     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<3 - kScalarTypeBits>(),
490                 testvals);
491     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<14 - kScalarTypeBits>(),
492                 testvals);
493     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<15 - kScalarTypeBits>(),
494                 testvals);
495     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-15>(), testvals);
496     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-4>(), testvals);
497     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-3>(), testvals);
498     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-2>(), testvals);
499     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-1>(), testvals);
500     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<0>(), testvals);
501     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<1>(), testvals);
502     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<2>(), testvals);
503     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<3>(), testvals);
504     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<4>(), testvals);
505     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<15>(), testvals);
506     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<kScalarTypeBits - 15>(),
507                 testvals);
508     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<kScalarTypeBits - 14>(),
509                 testvals);
510     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<kScalarTypeBits - 3>(),
511                 testvals);
512     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<kScalarTypeBits - 2>(),
513                 testvals);
514     TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<kScalarTypeBits - 1>(),
515                 testvals);
516 
517     TestUnaryOp(ExpOnIntervalBetweenNegativeOneQuarterAnd0ExclOp(), testvals);
518     TestUnaryOp(ExpOnNegativeValuesOp<0>(), testvals);
519     TestUnaryOp(ExpOnNegativeValuesOp<1>(), testvals);
520     TestUnaryOp(ExpOnNegativeValuesOp<2>(), testvals);
521     TestUnaryOp(ExpOnNegativeValuesOp<3>(), testvals);
522     TestUnaryOp(ExpOnNegativeValuesOp<4>(), testvals);
523     TestUnaryOp(ExpOnNegativeValuesOp<5>(), testvals);
524     TestUnaryOp(ExpOnNegativeValuesOp<6>(), testvals);
525 
526     TestUnaryOp(OneMinusXOverOnePlusXForXIn01Op(), testvals);
527     TestUnaryOp(TanhOp<0>(), testvals);
528     TestUnaryOp(TanhOp<1>(), testvals);
529     TestUnaryOp(TanhOp<2>(), testvals);
530     TestUnaryOp(TanhOp<3>(), testvals);
531     TestUnaryOp(TanhOp<4>(), testvals);
532     TestUnaryOp(TanhOp<5>(), testvals);
533     TestUnaryOp(TanhOp<6>(), testvals);
534 
535     TestUnaryOp(OneOverOnePlusXForXIn01Op(), testvals);
536     TestUnaryOp(LogisticOp<0>(), testvals);
537     TestUnaryOp(LogisticOp<1>(), testvals);
538     TestUnaryOp(LogisticOp<2>(), testvals);
539     TestUnaryOp(LogisticOp<3>(), testvals);
540     TestUnaryOp(LogisticOp<4>(), testvals);
541     TestUnaryOp(LogisticOp<5>(), testvals);
542     TestUnaryOp(LogisticOp<6>(), testvals);
543 
544     for (auto a : testvals) {
545       FixedPoint<ScalarType, 4> x;
546       x.raw() = a;
547       test_convert(x);
548     }
549 
550     test_mul<0, 0>(testvals);
551     test_mul<0, 1>(testvals);
552     test_mul<2, 0>(testvals);
553     test_mul<1, 1>(testvals);
554     test_mul<4, 4>(testvals);
555     test_mul<3, 5>(testvals);
556     test_mul<7, 2>(testvals);
557     test_mul<kScalarTypeBits / 2 - 1, kScalarTypeBits / 2 - 2>(testvals);
558 
559     test_Rescale<0, 0>(testvals);
560     test_Rescale<0, 1>(testvals);
561     test_Rescale<2, 0>(testvals);
562     test_Rescale<4, 4>(testvals);
563     test_Rescale<4, 5>(testvals);
564     test_Rescale<6, 3>(testvals);
565     test_Rescale<13, 9>(testvals);
566 
567     test_ExactMulByPot<0, 0>(testvals);
568     test_ExactMulByPot<0, 4>(testvals);
569     test_ExactMulByPot<1, 4>(testvals);
570     test_ExactMulByPot<3, 2>(testvals);
571     test_ExactMulByPot<-4, 5>(testvals);
572     test_ExactMulByPot<-2, 6>(testvals);
573 
574     fprintf(stderr, "PASS (%s)\n", msg);
575   }
576 };
577 
578 }  // end anonymous namespace
579 
580 }  // end namespace gemmlowp
581 
main()582 int main() {
583   gemmlowp::TestFixedPoint<std::int32_t>().RunTests("Scalar int32");
584   gemmlowp::TestFixedPoint<std::int16_t>().RunTests("Scalar int16");
585 #ifdef GEMMLOWP_SSE4
586   gemmlowp::TestFixedPoint<__m128i>().RunTests("SSE4 __m128i = int32x4");
587   gemmlowp::TestFixedPoint<gemmlowp::int16x8_m128i>().RunTests(
588       "SSE4 __m128i = int16x8");
589 #endif
590 #ifdef GEMMLOWP_NEON
591   gemmlowp::TestFixedPoint<int32x4_t>().RunTests("NEON int32x4_t");
592   gemmlowp::TestFixedPoint<int16x8_t>().RunTests("NEON int16x8_t");
593 #endif
594 #ifdef GEMMLOWP_MSA
595   gemmlowp::TestFixedPoint<v4i32>().RunTests("MSA v4i32");
596   gemmlowp::TestFixedPoint<v8i16>().RunTests("MSA v8i16");
597 #endif
598 #ifdef GEMMLOWP_AVX2
599   gemmlowp::TestFixedPoint<__m256i>().RunTests("AVX __m256i");
600   gemmlowp::TestFixedPoint<gemmlowp::int16x16_m256i>().RunTests(
601       "AVX2 __m256i = int16x16");
602 #endif
603 }
604