1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cmath>
13 #include <cstddef>
14 #include <cstdint>
15 #include <functional>
16 #include <random>
17 #include <vector>
18 
19 #include <xnnpack.h>
20 #include <xnnpack/AlignedAllocator.h>
21 #include <xnnpack/params.h>
22 
23 
24 class IBilinearMicrokernelTester {
25  public:
pixels(uint32_t pixels)26   inline IBilinearMicrokernelTester& pixels(uint32_t pixels) {
27     assert(pixels >= 1);
28     this->pixels_ = pixels;
29     return *this;
30   }
31 
pixels()32   inline uint32_t pixels() const {
33     return this->pixels_;
34   }
35 
channels(uint32_t channels)36   inline IBilinearMicrokernelTester& channels(uint32_t channels) {
37     assert(channels >= 1);
38     this->channels_ = channels;
39     return *this;
40   }
41 
channels()42   inline uint32_t channels() const {
43     return this->channels_;
44   }
45 
input_offset(uint32_t input_offset)46   inline IBilinearMicrokernelTester& input_offset(uint32_t input_offset) {
47     this->input_offset_ = input_offset;
48     return *this;
49   }
50 
input_offset()51   inline uint32_t input_offset() const {
52     return this->input_offset_;
53   }
54 
output_stride(uint32_t output_stride)55   inline IBilinearMicrokernelTester& output_stride(uint32_t output_stride) {
56     assert(output_stride != 0);
57     this->output_stride_ = output_stride;
58     return *this;
59   }
60 
output_stride()61   inline uint32_t output_stride() const {
62     if (this->output_stride_ == 0) {
63       return channels();
64     } else {
65       assert(this->output_stride_ >= channels());
66       return this->output_stride_;
67     }
68   }
69 
iterations(size_t iterations)70   inline IBilinearMicrokernelTester& iterations(size_t iterations) {
71     this->iterations_ = iterations;
72     return *this;
73   }
74 
iterations()75   inline size_t iterations() const {
76     return this->iterations_;
77   }
78 
input_stride(uint32_t input_stride)79   inline IBilinearMicrokernelTester& input_stride(uint32_t input_stride) {
80     assert(input_stride != 0);
81     this->input_stride_ = input_stride;
82     return *this;
83   }
84 
input_stride()85   inline uint32_t input_stride() const {
86     if (this->input_stride_ == 0) {
87       return 4 * pixels();
88     } else {
89       assert(this->input_stride_ >= 4 * pixels());
90       return this->input_stride_;
91     }
92   }
93 
Test(xnn_f32_ibilinear_ukernel_function ibilinear)94   void Test(xnn_f32_ibilinear_ukernel_function ibilinear) const {
95     std::random_device random_device;
96     auto rng = std::mt19937(random_device());
97     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
98 
99     std::vector<const float*> indirection(pixels() * 4);
100     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + indirection.size() * channels());
101     std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2);
102     std::vector<float> output((pixels() - 1) * output_stride() + channels());
103     std::vector<float> output_ref(pixels() * channels());
104 
105     for (size_t iteration = 0; iteration < iterations(); iteration++) {
106       std::generate(input.begin(), input.end(), std::ref(f32rng));
107       std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng));
108       std::fill(output.begin(), output.end(), nanf(""));
109 
110       for (size_t i = 0; i < indirection.size(); i++) {
111         indirection[i] = input.data() + i * channels() - input_offset();
112       }
113       std::shuffle(indirection.begin(), indirection.end(), rng);
114 
115       // Compute reference results.
116       for (size_t i = 0; i < pixels(); i++) {
117         for (size_t c = 0; c < channels(); c++) {
118           const float alpha_h = packed_weights[i * 2 + 0];
119           const float alpha_v = packed_weights[i * 2 + 1];
120           output_ref[i * channels() + c] =
121             indirection[i * 4 + 0][c + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) +
122             indirection[i * 4 + 1][c + input_offset()] * alpha_h * (1.0f - alpha_v) +
123             indirection[i * 4 + 2][c + input_offset()] * (1.0f - alpha_h) * alpha_v +
124             indirection[i * 4 + 3][c + input_offset()] * alpha_h * alpha_v;
125         }
126       }
127 
128       // Call optimized micro-kernel.
129       ibilinear(
130         pixels(), channels() * sizeof(float),
131         indirection.data(), input_offset() * sizeof(float),
132         packed_weights.data(), output.data(),
133         (output_stride() - channels()) * sizeof(float));
134 
135       // Verify results.
136       for (size_t i = 0; i < pixels(); i++) {
137         for (size_t c = 0; c < channels(); c++) {
138           ASSERT_NEAR(
139               output_ref[i * channels() + c],
140               output[i * output_stride() + c],
141               std::abs(output_ref[i * channels() + c]) * 1.0e-4)
142             << "i = " << i << ", channel = " << c;
143         }
144       }
145     }
146   }
147 
TestCHW(xnn_f32_ibilinear_chw_ukernel_function ibilinear)148   void TestCHW(xnn_f32_ibilinear_chw_ukernel_function ibilinear) const {
149     std::random_device random_device;
150     auto rng = std::mt19937(random_device());
151     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
152 
153     std::vector<const float*> indirection(pixels() * 2);
154     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + (channels() - 1) * input_stride() + 4 * pixels());
155     std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2);
156     std::vector<float> output(pixels() * channels());
157     std::vector<float> output_ref(pixels() * channels());
158 
159     for (size_t iteration = 0; iteration < iterations(); iteration++) {
160       std::generate(input.begin(), input.end(), std::ref(f32rng));
161       std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng));
162       std::fill(output.begin(), output.end(), nanf(""));
163 
164       // Indirection will point to the even ("left") pixels of the input.
165       // The kernels will expect "right" pixels to be placed right next to them.
166       for (size_t i = 0; i < indirection.size(); i++) {
167         const float* left_corner = input.data() + 2 * i - input_offset();
168         indirection[i] = left_corner;
169       }
170       std::shuffle(indirection.begin(), indirection.end(), rng);
171 
172       // Compute reference results.
173       for (size_t i = 0; i < pixels(); i++) {
174         for (size_t c = 0; c < channels(); c++) {
175           const float alpha_h = packed_weights[i * 2 + 0];
176           const float alpha_v = packed_weights[i * 2 + 1];
177           // `c * pixels() + i` because the output is NCHW.
178           output_ref[c * pixels() + i] =
179             // `c * indirection.size()` because the input is NCHW.
180             (indirection[i * 2 + 0] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) +
181             (indirection[i * 2 + 0] + 1)[c * input_stride() + input_offset()] * alpha_h * (1.0f - alpha_v) +
182             (indirection[i * 2 + 1] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * alpha_v +
183             (indirection[i * 2 + 1] + 1)[c * input_stride() + input_offset()] * alpha_h * alpha_v;
184         }
185       }
186 
187       // Call optimized micro-kernel.
188       ibilinear(
189         pixels(), channels(),
190         indirection.data(), input_offset() * sizeof(float),
191         packed_weights.data(), output.data(), input_stride() * sizeof(float));
192 
193       // Verify results.
194       for (size_t c = 0; c < channels(); c++) {
195         for (size_t i = 0; i < pixels(); i++) {
196           ASSERT_NEAR(
197               output_ref[c * pixels() + i],
198               output[c * pixels() + i],
199               std::abs(output_ref[c * pixels() + i]) * 1.0e-4)
200             << "i = " << i << ", channel = " << c;
201         }
202       }
203     }
204   }
205 
206  private:
207   uint32_t channels_{1};
208   uint32_t pixels_{1};
209   uint32_t output_stride_{0};
210   uint32_t input_stride_{0};
211   uint32_t input_offset_{0};
212   size_t iterations_{3};
213 };
214