1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2015 5 // Mehdi Goli Codeplay Software Ltd. 6 // Ralph Potter Codeplay Software Ltd. 7 // Luke Iwanski Codeplay Software Ltd. 8 // Contact: <eigen@codeplay.com> 9 // 10 // This Source Code Form is subject to the terms of the Mozilla 11 // Public License v. 2.0. If a copy of the MPL was not distributed 12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 13 14 #define EIGEN_TEST_NO_LONGDOUBLE 15 #define EIGEN_TEST_NO_COMPLEX 16 #define EIGEN_TEST_FUNC cxx11_tensor_reduction_sycl 17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int 18 #define EIGEN_USE_SYCL 19 20 #include "main.h" 21 #include <unsupported/Eigen/CXX11/Tensor> 22 23 24 25 static void test_full_reductions_sycl(const Eigen::SyclDevice& sycl_device) { 26 27 const int num_rows = 452; 28 const int num_cols = 765; 29 array<int, 2> tensorRange = {{num_rows, num_cols}}; 30 31 Tensor<float, 2> in(tensorRange); 32 Tensor<float, 0> full_redux; 33 Tensor<float, 0> full_redux_gpu; 34 35 in.setRandom(); 36 37 full_redux = in.sum(); 38 39 float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float))); 40 float* gpu_out_data =(float*)sycl_device.allocate(sizeof(float)); 41 42 TensorMap<Tensor<float, 2> > in_gpu(gpu_in_data, tensorRange); 43 TensorMap<Tensor<float, 0> > out_gpu(gpu_out_data); 44 45 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float)); 46 out_gpu.device(sycl_device) = in_gpu.sum(); 47 sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(float)); 48 // Check that the CPU and GPU reductions return the same result. 49 VERIFY_IS_APPROX(full_redux_gpu(), full_redux()); 50 51 sycl_device.deallocate(gpu_in_data); 52 sycl_device.deallocate(gpu_out_data); 53 } 54 55 static void test_first_dim_reductions_sycl(const Eigen::SyclDevice& sycl_device) { 56 57 int dim_x = 145; 58 int dim_y = 1; 59 int dim_z = 67; 60 61 array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}}; 62 Eigen::array<int, 1> red_axis; 63 red_axis[0] = 0; 64 array<int, 2> reduced_tensorRange = {{dim_y, dim_z}}; 65 66 Tensor<float, 3> in(tensorRange); 67 Tensor<float, 2> redux(reduced_tensorRange); 68 Tensor<float, 2> redux_gpu(reduced_tensorRange); 69 70 in.setRandom(); 71 72 redux= in.sum(red_axis); 73 74 float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float))); 75 float* gpu_out_data = static_cast<float*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float))); 76 77 TensorMap<Tensor<float, 3> > in_gpu(gpu_in_data, tensorRange); 78 TensorMap<Tensor<float, 2> > out_gpu(gpu_out_data, reduced_tensorRange); 79 80 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float)); 81 out_gpu.device(sycl_device) = in_gpu.sum(red_axis); 82 sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float)); 83 84 // Check that the CPU and GPU reductions return the same result. 85 for(int j=0; j<reduced_tensorRange[0]; j++ ) 86 for(int k=0; k<reduced_tensorRange[1]; k++ ) 87 VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k)); 88 89 sycl_device.deallocate(gpu_in_data); 90 sycl_device.deallocate(gpu_out_data); 91 } 92 93 static void test_last_dim_reductions_sycl(const Eigen::SyclDevice &sycl_device) { 94 95 int dim_x = 567; 96 int dim_y = 1; 97 int dim_z = 47; 98 99 array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}}; 100 Eigen::array<int, 1> red_axis; 101 red_axis[0] = 2; 102 array<int, 2> reduced_tensorRange = {{dim_x, dim_y}}; 103 104 Tensor<float, 3> in(tensorRange); 105 Tensor<float, 2> redux(reduced_tensorRange); 106 Tensor<float, 2> redux_gpu(reduced_tensorRange); 107 108 in.setRandom(); 109 110 redux= in.sum(red_axis); 111 112 float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float))); 113 float* gpu_out_data = static_cast<float*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float))); 114 115 TensorMap<Tensor<float, 3> > in_gpu(gpu_in_data, tensorRange); 116 TensorMap<Tensor<float, 2> > out_gpu(gpu_out_data, reduced_tensorRange); 117 118 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float)); 119 out_gpu.device(sycl_device) = in_gpu.sum(red_axis); 120 sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float)); 121 // Check that the CPU and GPU reductions return the same result. 122 for(int j=0; j<reduced_tensorRange[0]; j++ ) 123 for(int k=0; k<reduced_tensorRange[1]; k++ ) 124 VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k)); 125 126 sycl_device.deallocate(gpu_in_data); 127 sycl_device.deallocate(gpu_out_data); 128 129 } 130 131 void test_cxx11_tensor_reduction_sycl() { 132 cl::sycl::gpu_selector s; 133 Eigen::SyclDevice sycl_device(s); 134 CALL_SUBTEST((test_full_reductions_sycl(sycl_device))); 135 CALL_SUBTEST((test_first_dim_reductions_sycl(sycl_device))); 136 CALL_SUBTEST((test_last_dim_reductions_sycl(sycl_device))); 137 138 } 139