1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <cstddef> 14 #include <cstdlib> 15 #include <algorithm> 16 #include <cmath> 17 #include <functional> 18 #include <limits> 19 #include <random> 20 #include <vector> 21 22 #include <fp16.h> 23 24 #include <xnnpack.h> 25 26 27 class GlobalAveragePoolingOperatorTester { 28 public: channels(size_t channels)29 inline GlobalAveragePoolingOperatorTester& channels(size_t channels) { 30 assert(channels != 0); 31 this->channels_ = channels; 32 return *this; 33 } 34 channels()35 inline size_t channels() const { 36 return this->channels_; 37 } 38 width(size_t width)39 inline GlobalAveragePoolingOperatorTester& width(size_t width) { 40 assert(width != 0); 41 this->width_ = width; 42 return *this; 43 } 44 width()45 inline size_t width() const { 46 return this->width_; 47 } 48 input_stride(size_t input_stride)49 inline GlobalAveragePoolingOperatorTester& input_stride(size_t input_stride) { 50 assert(input_stride != 0); 51 this->input_stride_ = input_stride; 52 return *this; 53 } 54 input_stride()55 inline size_t input_stride() const { 56 if (this->input_stride_ == 0) { 57 return channels(); 58 } else { 59 assert(this->input_stride_ >= channels()); 60 return this->input_stride_; 61 } 62 } 63 output_stride(size_t output_stride)64 inline GlobalAveragePoolingOperatorTester& output_stride(size_t output_stride) { 65 assert(output_stride != 0); 66 this->output_stride_ = output_stride; 67 return *this; 68 } 69 output_stride()70 inline size_t output_stride() const { 71 if (this->output_stride_ == 0) { 72 return channels(); 73 } else { 74 assert(this->output_stride_ >= channels()); 75 return this->output_stride_; 76 } 77 } 78 batch_size(size_t batch_size)79 inline GlobalAveragePoolingOperatorTester& batch_size(size_t batch_size) { 80 assert(batch_size != 0); 81 this->batch_size_ = batch_size; 82 return *this; 83 } 84 batch_size()85 inline size_t batch_size() const { 86 return this->batch_size_; 87 } 88 input_scale(float input_scale)89 inline GlobalAveragePoolingOperatorTester& input_scale(float input_scale) { 90 assert(input_scale > 0.0f); 91 assert(std::isnormal(input_scale)); 92 this->input_scale_ = input_scale; 93 return *this; 94 } 95 input_scale()96 inline float input_scale() const { 97 return this->input_scale_; 98 } 99 input_zero_point(uint8_t input_zero_point)100 inline GlobalAveragePoolingOperatorTester& input_zero_point(uint8_t input_zero_point) { 101 this->input_zero_point_ = input_zero_point; 102 return *this; 103 } 104 input_zero_point()105 inline uint8_t input_zero_point() const { 106 return this->input_zero_point_; 107 } 108 output_scale(float output_scale)109 inline GlobalAveragePoolingOperatorTester& output_scale(float output_scale) { 110 assert(output_scale > 0.0f); 111 assert(std::isnormal(output_scale)); 112 this->output_scale_ = output_scale; 113 return *this; 114 } 115 output_scale()116 inline float output_scale() const { 117 return this->output_scale_; 118 } 119 output_zero_point(uint8_t output_zero_point)120 inline GlobalAveragePoolingOperatorTester& output_zero_point(uint8_t output_zero_point) { 121 this->output_zero_point_ = output_zero_point; 122 return *this; 123 } 124 output_zero_point()125 inline uint8_t output_zero_point() const { 126 return this->output_zero_point_; 127 } 128 qmin(uint8_t qmin)129 inline GlobalAveragePoolingOperatorTester& qmin(uint8_t qmin) { 130 this->qmin_ = qmin; 131 return *this; 132 } 133 qmin()134 inline uint8_t qmin() const { 135 return this->qmin_; 136 } 137 qmax(uint8_t qmax)138 inline GlobalAveragePoolingOperatorTester& qmax(uint8_t qmax) { 139 this->qmax_ = qmax; 140 return *this; 141 } 142 qmax()143 inline uint8_t qmax() const { 144 return this->qmax_; 145 } 146 iterations(size_t iterations)147 inline GlobalAveragePoolingOperatorTester& iterations(size_t iterations) { 148 this->iterations_ = iterations; 149 return *this; 150 } 151 iterations()152 inline size_t iterations() const { 153 return this->iterations_; 154 } 155 TestNWCxQU8()156 void TestNWCxQU8() const { 157 std::random_device random_device; 158 auto rng = std::mt19937(random_device()); 159 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng); 160 161 std::vector<uint8_t> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 162 std::vector<uint8_t> output(batch_size() * output_stride()); 163 std::vector<float> output_ref(batch_size() * channels()); 164 for (size_t iteration = 0; iteration < iterations(); iteration++) { 165 std::generate(input.begin(), input.end(), std::ref(u8rng)); 166 std::fill(output.begin(), output.end(), 0xA5); 167 168 // Compute reference results. 169 const double scale = double(input_scale()) / (double(width()) * double(output_scale())); 170 for (size_t i = 0; i < batch_size(); i++) { 171 for (size_t j = 0; j < channels(); j++) { 172 double acc = 0.0f; 173 for (size_t k = 0; k < width(); k++) { 174 acc += double(int32_t(input[(i * width() + k) * input_stride() + j]) - int32_t(input_zero_point())); 175 } 176 output_ref[i * channels() + j] = float(acc * scale + double(output_zero_point())); 177 output_ref[i * channels() + j] = std::min<float>(output_ref[i * channels() + j], float(qmax())); 178 output_ref[i * channels() + j] = std::max<float>(output_ref[i * channels() + j], float(qmin())); 179 } 180 } 181 182 // Create, setup, run, and destroy Global Average Pooling operator. 183 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 184 xnn_operator_t global_average_pooling_op = nullptr; 185 186 xnn_status status = xnn_create_global_average_pooling_nwc_qu8( 187 channels(), input_stride(), output_stride(), 188 input_zero_point(), input_scale(), 189 output_zero_point(), output_scale(), 190 qmin(), qmax(), 191 0, &global_average_pooling_op); 192 if (status == xnn_status_unsupported_hardware) { 193 GTEST_SKIP(); 194 } 195 ASSERT_EQ(xnn_status_success, status); 196 ASSERT_NE(nullptr, global_average_pooling_op); 197 198 // Smart pointer to automatically delete global_average_pooling_op. 199 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); 200 201 ASSERT_EQ(xnn_status_success, 202 xnn_setup_global_average_pooling_nwc_qu8( 203 global_average_pooling_op, 204 batch_size(), width(), 205 input.data(), output.data(), 206 nullptr /* thread pool */)); 207 208 ASSERT_EQ(xnn_status_success, 209 xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */)); 210 211 // Verify results. 212 for (size_t i = 0; i < batch_size(); i++) { 213 for (size_t c = 0; c < channels(); c++) { 214 ASSERT_LE(uint32_t(output[i * output_stride() + c]), uint32_t(qmax())); 215 ASSERT_GE(uint32_t(output[i * output_stride() + c]), uint32_t(qmin())); 216 ASSERT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.80f) 217 << "at batch index " << i << " / " << batch_size() 218 << ", channel " << c << " / " << channels(); 219 } 220 } 221 } 222 } 223 TestNWCxQS8()224 void TestNWCxQS8() const { 225 std::random_device random_device; 226 auto rng = std::mt19937(random_device()); 227 auto i8rng = std::bind( 228 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), rng); 229 230 std::vector<int8_t> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); 231 std::vector<int8_t> output(batch_size() * output_stride()); 232 std::vector<float> output_ref(batch_size() * channels()); 233 for (size_t iteration = 0; iteration < iterations(); iteration++) { 234 std::generate(input.begin(), input.end(), std::ref(i8rng)); 235 std::fill(output.begin(), output.end(), 0xA5); 236 237 // Compute reference results. 238 const double scale = double(input_scale()) / (double(width()) * double(output_scale())); 239 for (size_t i = 0; i < batch_size(); i++) { 240 for (size_t j = 0; j < channels(); j++) { 241 double acc = 0.0f; 242 for (size_t k = 0; k < width(); k++) { 243 acc += double(int32_t(input[(i * width() + k) * input_stride() + j]) - int32_t(input_zero_point() - 0x80)); 244 } 245 output_ref[i * channels() + j] = float(acc * scale + double(output_zero_point() - 0x80)); 246 output_ref[i * channels() + j] = std::min<float>(output_ref[i * channels() + j], float(qmax() - 0x80)); 247 output_ref[i * channels() + j] = std::max<float>(output_ref[i * channels() + j], float(qmin() - 0x80)); 248 } 249 } 250 251 // Create, setup, run, and destroy Global Average Pooling operator. 252 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 253 xnn_operator_t global_average_pooling_op = nullptr; 254 255 xnn_status status = xnn_create_global_average_pooling_nwc_qs8( 256 channels(), input_stride(), output_stride(), 257 int8_t(input_zero_point() - 0x80), input_scale(), 258 int8_t(output_zero_point() - 0x80), output_scale(), 259 int8_t(qmin() - 0x80), int8_t(qmax() - 0x80), 260 0, &global_average_pooling_op); 261 if (status == xnn_status_unsupported_hardware) { 262 GTEST_SKIP(); 263 } 264 ASSERT_EQ(xnn_status_success, status); 265 ASSERT_NE(nullptr, global_average_pooling_op); 266 267 // Smart pointer to automatically delete global_average_pooling_op. 268 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); 269 270 ASSERT_EQ(xnn_status_success, 271 xnn_setup_global_average_pooling_nwc_qs8( 272 global_average_pooling_op, 273 batch_size(), width(), 274 input.data(), output.data(), 275 nullptr /* thread pool */)); 276 277 ASSERT_EQ(xnn_status_success, 278 xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */)); 279 280 // Verify results. 281 for (size_t i = 0; i < batch_size(); i++) { 282 for (size_t c = 0; c < channels(); c++) { 283 ASSERT_LE(int32_t(output[i * output_stride() + c]), int32_t(qmax() - 0x80)); 284 ASSERT_GE(int32_t(output[i * output_stride() + c]), int32_t(qmin() - 0x80)); 285 ASSERT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.80f) 286 << "at batch index " << i << " / " << batch_size() 287 << ", channel " << c << " / " << channels(); 288 } 289 } 290 } 291 } 292 TestNWCxF16()293 void TestNWCxF16() const { 294 std::random_device random_device; 295 auto rng = std::mt19937(random_device()); 296 auto f32rng = std::bind(std::uniform_real_distribution<float>(1.0e-3f, 1.0f), rng); 297 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng); 298 299 std::vector<uint16_t> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 300 std::vector<uint16_t> output(batch_size() * output_stride()); 301 std::vector<float> output_ref(batch_size() * channels()); 302 for (size_t iteration = 0; iteration < iterations(); iteration++) { 303 std::generate(input.begin(), input.end(), std::ref(f16rng)); 304 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 305 306 // Compute reference results, without clamping. 307 for (size_t i = 0; i < batch_size(); i++) { 308 for (size_t j = 0; j < channels(); j++) { 309 float acc = 0.0f; 310 for (size_t k = 0; k < width(); k++) { 311 acc += fp16_ieee_to_fp32_value(input[(i * width() + k) * input_stride() + j]); 312 } 313 output_ref[i * channels() + j] = acc / float(width()); 314 } 315 } 316 317 // Compute clamping parameters. 318 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 319 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 320 const float accumulated_range = accumulated_max - accumulated_min; 321 const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin()))); 322 const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax()))); 323 const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min; 324 const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max; 325 326 // Clamp reference results. 327 for (float& value : output_ref) { 328 value = std::max(std::min(value, output_max), output_min); 329 } 330 331 // Create, setup, run, and destroy Global Average Pooling operator. 332 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 333 xnn_operator_t global_average_pooling_op = nullptr; 334 335 xnn_status status = xnn_create_global_average_pooling_nwc_f16( 336 channels(), input_stride(), output_stride(), 337 output_min, output_max, 338 0, &global_average_pooling_op); 339 if (status == xnn_status_unsupported_hardware) { 340 GTEST_SKIP(); 341 } 342 ASSERT_EQ(xnn_status_success, status); 343 ASSERT_NE(nullptr, global_average_pooling_op); 344 345 // Smart pointer to automatically delete global_average_pooling_op. 346 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); 347 348 ASSERT_EQ(xnn_status_success, 349 xnn_setup_global_average_pooling_nwc_f16( 350 global_average_pooling_op, 351 batch_size(), width(), 352 input.data(), output.data(), 353 nullptr /* thread pool */)); 354 355 ASSERT_EQ(xnn_status_success, 356 xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */)); 357 358 // Verify results. 359 for (size_t i = 0; i < batch_size(); i++) { 360 for (size_t c = 0; c < channels(); c++) { 361 ASSERT_LE(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_max); 362 ASSERT_GE(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_min); 363 ASSERT_NEAR(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_ref[i * channels() + c], std::max(1.0e-4f, std::abs(output_ref[i * channels() + c]) * 1.0e-2f)) 364 << "at batch index " << i << " / " << batch_size() 365 << ", channel " << c << " / " << channels(); 366 } 367 } 368 } 369 } 370 TestNWCxF32()371 void TestNWCxF32() const { 372 std::random_device random_device; 373 auto rng = std::mt19937(random_device()); 374 auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng); 375 376 std::vector<float> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); 377 std::vector<float> output(batch_size() * output_stride()); 378 std::vector<float> output_ref(batch_size() * channels()); 379 for (size_t iteration = 0; iteration < iterations(); iteration++) { 380 std::generate(input.begin(), input.end(), std::ref(f32rng)); 381 std::fill(output.begin(), output.end(), std::nanf("")); 382 383 // Compute reference results, without clamping. 384 for (size_t i = 0; i < batch_size(); i++) { 385 for (size_t j = 0; j < channels(); j++) { 386 float acc = 0.0f; 387 for (size_t k = 0; k < width(); k++) { 388 acc += input[(i * width() + k) * input_stride() + j]; 389 } 390 output_ref[i * channels() + j] = acc / float(width()); 391 } 392 } 393 394 // Compute clamping parameters. 395 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 396 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 397 const float accumulated_range = accumulated_max - accumulated_min; 398 const float output_min = accumulated_range == 0.0f ? 399 -std::numeric_limits<float>::infinity() : 400 accumulated_min + accumulated_range / 255.0f * float(qmin()); 401 const float output_max = accumulated_range == 0.0f ? 402 +std::numeric_limits<float>::infinity() : 403 accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); 404 405 // Clamp reference results. 406 for (float& value : output_ref) { 407 value = std::max(std::min(value, output_max), output_min); 408 } 409 410 // Create, setup, run, and destroy Global Average Pooling operator. 411 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 412 xnn_operator_t global_average_pooling_op = nullptr; 413 414 xnn_status status = xnn_create_global_average_pooling_nwc_f32( 415 channels(), input_stride(), output_stride(), 416 output_min, output_max, 417 0, &global_average_pooling_op); 418 if (status == xnn_status_unsupported_hardware) { 419 GTEST_SKIP(); 420 } 421 ASSERT_EQ(xnn_status_success, status); 422 ASSERT_NE(nullptr, global_average_pooling_op); 423 424 // Smart pointer to automatically delete global_average_pooling_op. 425 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); 426 427 ASSERT_EQ(xnn_status_success, 428 xnn_setup_global_average_pooling_nwc_f32( 429 global_average_pooling_op, 430 batch_size(), width(), 431 input.data(), output.data(), 432 nullptr /* thread pool */)); 433 434 ASSERT_EQ(xnn_status_success, 435 xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */)); 436 437 // Verify results. 438 for (size_t i = 0; i < batch_size(); i++) { 439 for (size_t c = 0; c < channels(); c++) { 440 ASSERT_LE(output[i * output_stride() + c], output_max); 441 ASSERT_GE(output[i * output_stride() + c], output_min); 442 ASSERT_NEAR(output[i * output_stride() + c], output_ref[i * channels() + c], std::abs(output_ref[i * channels() + c]) * 1.0e-6f) 443 << "at batch index " << i << " / " << batch_size() 444 << ", channel " << c << " / " << channels(); 445 } 446 } 447 } 448 } 449 TestNCWxF32()450 void TestNCWxF32() const { 451 std::random_device random_device; 452 auto rng = std::mt19937(random_device()); 453 auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng); 454 455 std::vector<float> input(batch_size() * channels() * width() + XNN_EXTRA_BYTES / sizeof(float)); 456 std::vector<float> output(batch_size() * channels()); 457 std::vector<float> output_ref(batch_size() * channels()); 458 for (size_t iteration = 0; iteration < iterations(); iteration++) { 459 std::generate(input.begin(), input.end(), std::ref(f32rng)); 460 std::fill(output.begin(), output.end(), std::nanf("")); 461 462 // Compute reference results, without clamping. 463 for (size_t i = 0; i < batch_size(); i++) { 464 for (size_t j = 0; j < channels(); j++) { 465 float acc = 0.0f; 466 for (size_t k = 0; k < width(); k++) { 467 acc += input[(i * channels() + j) * width() + k]; 468 } 469 output_ref[i * channels() + j] = acc / float(width()); 470 } 471 } 472 473 // Compute clamping parameters. 474 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 475 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 476 const float accumulated_range = accumulated_max - accumulated_min; 477 const float output_min = accumulated_range == 0.0f ? 478 -std::numeric_limits<float>::infinity() : 479 accumulated_min + accumulated_range / 255.0f * float(qmin()); 480 const float output_max = accumulated_range == 0.0f ? 481 +std::numeric_limits<float>::infinity() : 482 accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); 483 484 // Clamp reference results. 485 for (float& value : output_ref) { 486 value = std::max(std::min(value, output_max), output_min); 487 } 488 489 // Create, setup, run, and destroy Global Average Pooling operator. 490 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 491 xnn_operator_t global_average_pooling_op = nullptr; 492 493 xnn_status status = xnn_create_global_average_pooling_ncw_f32( 494 channels(), output_min, output_max, 495 0, &global_average_pooling_op); 496 if (status == xnn_status_unsupported_parameter) { 497 GTEST_SKIP(); 498 } 499 ASSERT_EQ(xnn_status_success, status); 500 501 // Smart pointer to automatically delete global_average_pooling_op. 502 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); 503 504 ASSERT_EQ(xnn_status_success, 505 xnn_setup_global_average_pooling_ncw_f32( 506 global_average_pooling_op, 507 batch_size(), width(), 508 input.data(), output.data(), 509 nullptr /* thread pool */)); 510 511 ASSERT_EQ(xnn_status_success, 512 xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */)); 513 514 // Verify results. 515 for (size_t i = 0; i < batch_size(); i++) { 516 for (size_t c = 0; c < channels(); c++) { 517 ASSERT_LE(output[i * channels() + c], output_max); 518 ASSERT_GE(output[i * channels() + c], output_min); 519 ASSERT_NEAR(output[i * channels() + c], output_ref[i * channels() + c], std::abs(output_ref[i * channels() + c]) * 1.0e-5f) 520 << "at batch index " << i << " / " << batch_size() 521 << ", channel " << c << " / " << channels(); 522 } 523 } 524 } 525 } 526 527 private: 528 size_t batch_size_{1}; 529 size_t width_{1}; 530 size_t channels_{1}; 531 size_t input_stride_{0}; 532 size_t output_stride_{0}; 533 float input_scale_{1.0f}; 534 float output_scale_{1.0f}; 535 uint8_t input_zero_point_{121}; 536 uint8_t output_zero_point_{133}; 537 uint8_t qmin_{0}; 538 uint8_t qmax_{255}; 539 size_t iterations_{1}; 540 }; 541