1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <algorithm> 14 #include <cassert> 15 #include <cstddef> 16 #include <cstdlib> 17 #include <functional> 18 #include <limits> 19 #include <random> 20 #include <vector> 21 22 #include <xnnpack.h> 23 #include <xnnpack/params-init.h> 24 #include <xnnpack/params.h> 25 26 27 class MaxPoolMicrokernelTester { 28 public: 29 enum class Variant { 30 Native, 31 Scalar, 32 }; 33 output_pixels(size_t output_pixels)34 inline MaxPoolMicrokernelTester& output_pixels(size_t output_pixels) { 35 assert(output_pixels != 0); 36 this->output_pixels_ = output_pixels; 37 return *this; 38 } 39 output_pixels()40 inline size_t output_pixels() const { 41 return this->output_pixels_; 42 } 43 step(size_t step)44 inline MaxPoolMicrokernelTester& step(size_t step) { 45 assert(step != 0); 46 this->step_ = step; 47 return *this; 48 } 49 step()50 inline size_t step() const { 51 return this->step_; 52 } 53 input_offset(size_t input_offset)54 inline MaxPoolMicrokernelTester& input_offset(size_t input_offset) { 55 assert(input_offset != 0); 56 this->input_offset_ = input_offset; 57 return *this; 58 } 59 input_offset()60 inline size_t input_offset() const { 61 return this->input_offset_; 62 } 63 pooling_elements(size_t pooling_elements)64 inline MaxPoolMicrokernelTester& pooling_elements(size_t pooling_elements) { 65 assert(pooling_elements != 0); 66 this->pooling_elements_ = pooling_elements; 67 return *this; 68 } 69 pooling_elements()70 inline size_t pooling_elements() const { 71 return this->pooling_elements_; 72 } 73 packed_pooling_elements()74 inline size_t packed_pooling_elements() const { 75 if (pooling_elements() <= primary_pooling_tile()) { 76 return primary_pooling_tile(); 77 } else { 78 return (pooling_elements() - primary_pooling_tile()) % incremental_pooling_tile() == 0 ? pooling_elements() : ((pooling_elements() - primary_pooling_tile()) / incremental_pooling_tile() + 1) * incremental_pooling_tile() + primary_pooling_tile(); 79 } 80 } 81 pooling_tile(size_t primary_tile,size_t incremental_tile)82 inline MaxPoolMicrokernelTester& pooling_tile(size_t primary_tile, size_t incremental_tile) { 83 assert(primary_tile != 0); 84 this->primary_pooling_tile_ = primary_tile; 85 this->incremental_pooling_tile_ = incremental_tile; 86 return *this; 87 } 88 primary_pooling_tile(size_t primary_pooling_tile)89 inline MaxPoolMicrokernelTester& primary_pooling_tile(size_t primary_pooling_tile) { 90 assert(primary_pooling_tile != 0); 91 this->primary_pooling_tile_ = primary_pooling_tile; 92 return *this; 93 } 94 primary_pooling_tile()95 inline size_t primary_pooling_tile() const { 96 return this->primary_pooling_tile_; 97 } 98 incremental_pooling_tile(size_t incremental_pooling_tile)99 inline MaxPoolMicrokernelTester& incremental_pooling_tile(size_t incremental_pooling_tile) { 100 assert(incremental_pooling_tile != 0); 101 this->incremental_pooling_tile_ = incremental_pooling_tile; 102 return *this; 103 } 104 incremental_pooling_tile()105 inline size_t incremental_pooling_tile() const { 106 return this->incremental_pooling_tile_; 107 } 108 channels(size_t channels)109 inline MaxPoolMicrokernelTester& channels(size_t channels) { 110 assert(channels != 0); 111 this->channels_ = channels; 112 return *this; 113 } 114 channels()115 inline size_t channels() const { 116 return this->channels_; 117 } 118 output_stride(size_t output_stride)119 inline MaxPoolMicrokernelTester& output_stride(size_t output_stride) { 120 assert(output_stride != 0); 121 this->output_stride_ = output_stride; 122 return *this; 123 } 124 output_stride()125 inline size_t output_stride() const { 126 if (this->output_stride_ == 0) { 127 return channels(); 128 } else { 129 assert(this->output_stride_ >= channels()); 130 return this->output_stride_; 131 } 132 } 133 qmin(uint8_t qmin)134 inline MaxPoolMicrokernelTester& qmin(uint8_t qmin) { 135 this->qmin_ = qmin; 136 return *this; 137 } 138 qmin()139 inline uint8_t qmin() const { 140 return this->qmin_; 141 } 142 qmax(uint8_t qmax)143 inline MaxPoolMicrokernelTester& qmax(uint8_t qmax) { 144 this->qmax_ = qmax; 145 return *this; 146 } 147 qmax()148 inline uint8_t qmax() const { 149 return this->qmax_; 150 } 151 iterations(size_t iterations)152 inline MaxPoolMicrokernelTester& iterations(size_t iterations) { 153 this->iterations_ = iterations; 154 return *this; 155 } 156 iterations()157 inline size_t iterations() const { 158 return this->iterations_; 159 } 160 161 void Test(xnn_u8_maxpool_ukernel_function maxpool, Variant variant = Variant::Native) const { 162 std::random_device random_device; 163 auto rng = std::mt19937(random_device()); 164 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng); 165 166 std::vector<const uint8_t*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements()); 167 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + 168 indirect_input.size() * channels()); 169 std::vector<uint8_t> output(XNN_EXTRA_BYTES / sizeof(uint8_t) + 170 (output_pixels() - 1) * output_stride() + channels()); 171 std::vector<uint8_t> output_ref(output_pixels() * channels()); 172 for (size_t iteration = 0; iteration < iterations(); iteration++) { 173 do { 174 std::generate(input.begin(), input.end(), std::ref(u8rng)); 175 } while (input.size() > 1 && *std::max_element(input.cbegin(), input.cend()) == *std::min_element(input.cbegin(), input.cend())); 176 std::fill(output.begin(), output.end(), 0xA5); 177 178 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) { 179 indirect_input[i] = input.data() + i * channels() - input_offset(); 180 } 181 std::shuffle(indirect_input.begin(), 182 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng); 183 184 // Prepare parameters. 185 xnn_u8_minmax_params params = { }; 186 switch (variant) { 187 case Variant::Native: 188 params = xnn_init_u8_minmax_params(qmin(), qmax()); 189 break; 190 case Variant::Scalar: 191 params = xnn_init_scalar_u8_minmax_params(qmin(), qmax()); 192 break; 193 } 194 195 // Compute reference results. 196 for (size_t x = 0; x < output_pixels(); x++) { 197 for (size_t c = 0; c < channels(); c++) { 198 uint8_t max_value = 0; 199 for (size_t p = 0; p < pooling_elements(); p++) { 200 max_value = std::max(max_value, indirect_input[x * step() + p][c + input_offset()]); 201 } 202 max_value = std::min(max_value, qmax()); 203 max_value = std::max(max_value, qmin()); 204 output_ref[x * channels() + c] = max_value; 205 } 206 } 207 208 // Call optimized micro-kernel. 209 maxpool(output_pixels(), pooling_elements(), channels(), 210 indirect_input.data(), input_offset() * sizeof(uint8_t), output.data(), 211 (step() - packed_pooling_elements()) * sizeof(void*), 212 (output_stride() - channels()) * sizeof(uint8_t), 213 ¶ms); 214 215 // Verify results. 216 for (size_t x = 0; x < output_pixels(); x++) { 217 for (size_t c = 0; c < channels(); c++) { 218 ASSERT_GE(uint32_t(output[x * output_stride() + c]), uint32_t(qmin())) 219 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 220 << ", pooling elements = " << pooling_elements() << ", step = " << step() 221 << ", input offset = " << input_offset(); 222 ASSERT_LE(uint32_t(output[x * output_stride() + c]), uint32_t(qmax())) 223 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 224 << ", pooling elements = " << pooling_elements() << ", step = " << step() 225 << ", input offset = " << input_offset(); 226 ASSERT_EQ(uint32_t(output_ref[x * channels() + c]), uint32_t(output[x * output_stride() + c])) 227 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 228 << ", pooling elements = " << pooling_elements() << ", step = " << step() 229 << ", input offset = " << input_offset(); 230 } 231 } 232 } 233 } 234 235 void Test(xnn_f32_maxpool_ukernel_function maxpool, Variant variant = Variant::Native) const { 236 std::random_device random_device; 237 auto rng = std::mt19937(random_device()); 238 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng); 239 240 std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements()); 241 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 242 ((output_pixels() - 1) * step() + pooling_elements()) * channels()); 243 std::vector<float> output(XNN_EXTRA_BYTES / sizeof(float) + 244 (output_pixels() - 1) * output_stride() + channels()); 245 std::vector<float> output_ref(output_pixels() * channels()); 246 for (size_t iteration = 0; iteration < iterations(); iteration++) { 247 std::generate(input.begin(), input.end(), std::ref(f32rng)); 248 std::fill(output.begin(), output.end(), nanf("")); 249 250 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) { 251 indirect_input[i] = input.data() + i * channels() - input_offset(); 252 } 253 std::shuffle(indirect_input.begin(), 254 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng); 255 256 // Compute reference results, without clamping. 257 for (size_t x = 0; x < output_pixels(); x++) { 258 for (size_t c = 0; c < channels(); c++) { 259 float max_value = -std::numeric_limits<float>::infinity(); 260 for (size_t p = 0; p < pooling_elements(); p++) { 261 max_value = std::max(max_value, indirect_input[x * step() + p][c + input_offset()]); 262 } 263 output_ref[x * channels() + c] = max_value; 264 } 265 } 266 267 // Compute clamping parameters. 268 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 269 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 270 const float accumulated_range = accumulated_max - accumulated_min; 271 const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range; 272 const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range; 273 274 275 // Prepare parameters. 276 xnn_f32_minmax_params params = { }; 277 switch (variant) { 278 case Variant::Native: 279 params = xnn_init_f32_minmax_params(output_min, output_max); 280 break; 281 case Variant::Scalar: 282 params = xnn_init_scalar_f32_minmax_params(output_min, output_max); 283 break; 284 } 285 286 // Clamp reference results. 287 for (float& output_value : output_ref) { 288 output_value = std::max(std::min(output_value, output_max), output_min); 289 } 290 291 // Call optimized micro-kernel. 292 maxpool(output_pixels(), pooling_elements(), channels(), 293 indirect_input.data(), input_offset() * sizeof(float), output.data(), 294 (step() - packed_pooling_elements()) * sizeof(void*), 295 (output_stride() - channels()) * sizeof(float), 296 ¶ms); 297 298 // Verify results. 299 for (size_t x = 0; x < output_pixels(); x++) { 300 for (size_t c = 0; c < channels(); c++) { 301 ASSERT_GE(output[x * output_stride() + c], output_min) 302 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 303 << ", pooling elements = " << pooling_elements() << ", step = " << step() 304 << ", input offset = " << input_offset(); 305 ASSERT_LE(output[x * output_stride() + c], output_max) 306 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 307 << ", pooling elements = " << pooling_elements() << ", step = " << step() 308 << ", input offset = " << input_offset(); 309 ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c]) 310 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 311 << ", pooling elements = " << pooling_elements() << ", step = " << step() 312 << ", input offset = " << input_offset(); 313 } 314 } 315 } 316 } 317 318 private: 319 size_t output_pixels_{1}; 320 size_t pooling_elements_{1}; 321 size_t channels_{1}; 322 size_t input_offset_{0}; 323 size_t step_{1}; 324 size_t primary_pooling_tile_{1}; 325 size_t incremental_pooling_tile_{1}; 326 size_t output_stride_{0}; 327 uint8_t qmin_{0}; 328 uint8_t qmax_{255}; 329 size_t iterations_{3}; 330 }; 331