1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #define EIGEN_USE_THREADS
11 
12 
13 #include "main.h"
14 #include <iostream>
15 #include <Eigen/CXX11/Tensor>
16 
17 using Eigen::Tensor;
18 
19 
test_multithread_elementwise()20 void test_multithread_elementwise()
21 {
22   Tensor<float, 3> in1(2,3,7);
23   Tensor<float, 3> in2(2,3,7);
24   Tensor<float, 3> out(2,3,7);
25 
26   in1.setRandom();
27   in2.setRandom();
28 
29   Eigen::ThreadPool tp(internal::random<int>(3, 11));
30   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
31   out.device(thread_pool_device) = in1 + in2 * 3.14f;
32 
33   for (int i = 0; i < 2; ++i) {
34     for (int j = 0; j < 3; ++j) {
35       for (int k = 0; k < 7; ++k) {
36         VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f);
37       }
38     }
39   }
40 }
41 
42 
test_multithread_compound_assignment()43 void test_multithread_compound_assignment()
44 {
45   Tensor<float, 3> in1(2,3,7);
46   Tensor<float, 3> in2(2,3,7);
47   Tensor<float, 3> out(2,3,7);
48 
49   in1.setRandom();
50   in2.setRandom();
51 
52   Eigen::ThreadPool tp(internal::random<int>(3, 11));
53   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
54   out.device(thread_pool_device) = in1;
55   out.device(thread_pool_device) += in2 * 3.14f;
56 
57   for (int i = 0; i < 2; ++i) {
58     for (int j = 0; j < 3; ++j) {
59       for (int k = 0; k < 7; ++k) {
60         VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f);
61       }
62     }
63   }
64 }
65 
66 template<int DataLayout>
test_multithread_contraction()67 void test_multithread_contraction()
68 {
69   Tensor<float, 4, DataLayout> t_left(30, 50, 37, 31);
70   Tensor<float, 5, DataLayout> t_right(37, 31, 70, 2, 10);
71   Tensor<float, 5, DataLayout> t_result(30, 50, 70, 2, 10);
72 
73   t_left.setRandom();
74   t_right.setRandom();
75 
76   // this contraction should be equivalent to a single matrix multiplication
77   typedef Tensor<float, 1>::DimensionPair DimPair;
78   Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
79 
80   typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
81   MapXf m_left(t_left.data(), 1500, 1147);
82   MapXf m_right(t_right.data(), 1147, 1400);
83   Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
84 
85   Eigen::ThreadPool tp(4);
86   Eigen::ThreadPoolDevice thread_pool_device(&tp, 4);
87 
88   // compute results by separate methods
89   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
90   m_result = m_left * m_right;
91 
92  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
93     VERIFY(&t_result.data()[i] != &m_result.data()[i]);
94     if (fabsf(t_result(i) - m_result(i)) < 1e-4f) {
95       continue;
96     }
97     if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4f)) {
98       continue;
99     }
100     std::cout << "mismatch detected at index " << i << ": " << t_result(i)
101               << " vs " <<  m_result(i) << std::endl;
102     assert(false);
103   }
104 }
105 
106 template<int DataLayout>
test_contraction_corner_cases()107 void test_contraction_corner_cases()
108 {
109   Tensor<float, 2, DataLayout> t_left(32, 500);
110   Tensor<float, 2, DataLayout> t_right(32, 28*28);
111   Tensor<float, 2, DataLayout> t_result(500, 28*28);
112 
113   t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
114   t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
115   t_result = t_result.constant(NAN);
116 
117   // this contraction should be equivalent to a single matrix multiplication
118   typedef Tensor<float, 1>::DimensionPair DimPair;
119   Eigen::array<DimPair, 1> dims{{DimPair(0, 0)}};
120 
121   typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
122   MapXf m_left(t_left.data(), 32, 500);
123   MapXf m_right(t_right.data(), 32, 28*28);
124   Matrix<float, Dynamic, Dynamic, DataLayout> m_result(500, 28*28);
125 
126   Eigen::ThreadPool tp(12);
127   Eigen::ThreadPoolDevice thread_pool_device(&tp, 12);
128 
129   // compute results by separate methods
130   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
131   m_result = m_left.transpose() * m_right;
132 
133   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
134     assert(!(numext::isnan)(t_result.data()[i]));
135     if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
136       std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
137       assert(false);
138     }
139   }
140 
141   t_left.resize(32, 1);
142   t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
143   t_result.resize (1, 28*28);
144   t_result = t_result.constant(NAN);
145   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
146   new(&m_left) MapXf(t_left.data(), 32, 1);
147   m_result = m_left.transpose() * m_right;
148   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
149     assert(!(numext::isnan)(t_result.data()[i]));
150     if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
151       std::cout << "mismatch detected: " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
152       assert(false);
153     }
154   }
155 
156   t_left.resize(32, 500);
157   t_right.resize(32, 4);
158   t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
159   t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
160   t_result.resize (500, 4);
161   t_result = t_result.constant(NAN);
162   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
163   new(&m_left) MapXf(t_left.data(), 32, 500);
164   new(&m_right) MapXf(t_right.data(), 32, 4);
165   m_result = m_left.transpose() * m_right;
166   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
167     assert(!(numext::isnan)(t_result.data()[i]));
168     if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
169       std::cout << "mismatch detected: " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
170       assert(false);
171     }
172   }
173 
174   t_left.resize(32, 1);
175   t_right.resize(32, 4);
176   t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
177   t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
178   t_result.resize (1, 4);
179   t_result = t_result.constant(NAN);
180   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
181   new(&m_left) MapXf(t_left.data(), 32, 1);
182   new(&m_right) MapXf(t_right.data(), 32, 4);
183   m_result = m_left.transpose() * m_right;
184   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
185     assert(!(numext::isnan)(t_result.data()[i]));
186     if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
187       std::cout << "mismatch detected: " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
188       assert(false);
189     }
190   }
191 }
192 
193 template<int DataLayout>
test_multithread_contraction_agrees_with_singlethread()194 void test_multithread_contraction_agrees_with_singlethread() {
195   int contract_size = internal::random<int>(1, 5000);
196 
197   Tensor<float, 3, DataLayout> left(internal::random<int>(1, 80),
198                                     contract_size,
199                                     internal::random<int>(1, 100));
200 
201   Tensor<float, 4, DataLayout> right(internal::random<int>(1, 25),
202                                      internal::random<int>(1, 37),
203                                      contract_size,
204                                      internal::random<int>(1, 51));
205 
206   left.setRandom();
207   right.setRandom();
208 
209   // add constants to shift values away from 0 for more precision
210   left += left.constant(1.5f);
211   right += right.constant(1.5f);
212 
213   typedef Tensor<float, 1>::DimensionPair DimPair;
214   Eigen::array<DimPair, 1> dims({{DimPair(1, 2)}});
215 
216   Eigen::ThreadPool tp(internal::random<int>(2, 11));
217   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
218 
219   Tensor<float, 5, DataLayout> st_result;
220   st_result = left.contract(right, dims);
221 
222   Tensor<float, 5, DataLayout> tp_result(st_result.dimensions());
223   tp_result.device(thread_pool_device) = left.contract(right, dims);
224 
225   VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
226   for (ptrdiff_t i = 0; i < st_result.size(); i++) {
227     // if both of the values are very small, then do nothing (because the test will fail
228     // due to numerical precision issues when values are small)
229     if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) {
230       VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
231     }
232   }
233 }
234 
235 
236 template<int DataLayout>
test_full_contraction()237 void test_full_contraction() {
238   int contract_size1 = internal::random<int>(1, 500);
239   int contract_size2 = internal::random<int>(1, 500);
240 
241   Tensor<float, 2, DataLayout> left(contract_size1,
242                                     contract_size2);
243   Tensor<float, 2, DataLayout> right(contract_size1,
244                                     contract_size2);
245   left.setRandom();
246   right.setRandom();
247 
248   // add constants to shift values away from 0 for more precision
249   left += left.constant(1.5f);
250   right += right.constant(1.5f);
251 
252   typedef Tensor<float, 2>::DimensionPair DimPair;
253   Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}});
254 
255   Eigen::ThreadPool tp(internal::random<int>(2, 11));
256   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
257 
258   Tensor<float, 0, DataLayout> st_result;
259   st_result = left.contract(right, dims);
260 
261   Tensor<float, 0, DataLayout> tp_result;
262   tp_result.device(thread_pool_device) = left.contract(right, dims);
263 
264   VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
265   // if both of the values are very small, then do nothing (because the test will fail
266   // due to numerical precision issues when values are small)
267   if (numext::abs(st_result() - tp_result()) >= 1e-4f) {
268     VERIFY_IS_APPROX(st_result(), tp_result());
269   }
270 }
271 
272 template<int DataLayout>
test_multithreaded_reductions()273 void test_multithreaded_reductions() {
274   const int num_threads = internal::random<int>(3, 11);
275   ThreadPool thread_pool(num_threads);
276   Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads);
277 
278   const int num_rows = internal::random<int>(13, 732);
279   const int num_cols = internal::random<int>(13, 732);
280   Tensor<float, 2, DataLayout> t1(num_rows, num_cols);
281   t1.setRandom();
282 
283   Tensor<float, 0, DataLayout> full_redux;
284   full_redux = t1.sum();
285 
286   Tensor<float, 0, DataLayout> full_redux_tp;
287   full_redux_tp.device(thread_pool_device) = t1.sum();
288 
289   // Check that the single threaded and the multi threaded reductions return
290   // the same result.
291   VERIFY_IS_APPROX(full_redux(), full_redux_tp());
292 }
293 
294 
test_memcpy()295 void test_memcpy() {
296 
297   for (int i = 0; i < 5; ++i) {
298     const int num_threads = internal::random<int>(3, 11);
299     Eigen::ThreadPool tp(num_threads);
300     Eigen::ThreadPoolDevice thread_pool_device(&tp, num_threads);
301 
302     const int size = internal::random<int>(13, 7632);
303     Tensor<float, 1> t1(size);
304     t1.setRandom();
305     std::vector<float> result(size);
306     thread_pool_device.memcpy(&result[0], t1.data(), size*sizeof(float));
307     for (int j = 0; j < size; j++) {
308       VERIFY_IS_EQUAL(t1(j), result[j]);
309     }
310   }
311 }
312 
313 
test_multithread_random()314 void test_multithread_random()
315 {
316   Eigen::ThreadPool tp(2);
317   Eigen::ThreadPoolDevice device(&tp, 2);
318   Tensor<float, 1> t(1 << 20);
319   t.device(device) = t.random<Eigen::internal::NormalRandomGenerator<float>>();
320 }
321 
322 template<int DataLayout>
test_multithread_shuffle()323 void test_multithread_shuffle()
324 {
325   Tensor<float, 4, DataLayout> tensor(17,5,7,11);
326   tensor.setRandom();
327 
328   const int num_threads = internal::random<int>(2, 11);
329   ThreadPool threads(num_threads);
330   Eigen::ThreadPoolDevice device(&threads, num_threads);
331 
332   Tensor<float, 4, DataLayout> shuffle(7,5,11,17);
333   array<ptrdiff_t, 4> shuffles = {{2,1,3,0}};
334   shuffle.device(device) = tensor.shuffle(shuffles);
335 
336   for (int i = 0; i < 17; ++i) {
337     for (int j = 0; j < 5; ++j) {
338       for (int k = 0; k < 7; ++k) {
339         for (int l = 0; l < 11; ++l) {
340           VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,j,l,i));
341         }
342       }
343     }
344   }
345 }
346 
347 
test_cxx11_tensor_thread_pool()348 void test_cxx11_tensor_thread_pool()
349 {
350   CALL_SUBTEST_1(test_multithread_elementwise());
351   CALL_SUBTEST_1(test_multithread_compound_assignment());
352 
353   CALL_SUBTEST_2(test_multithread_contraction<ColMajor>());
354   CALL_SUBTEST_2(test_multithread_contraction<RowMajor>());
355 
356   CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<ColMajor>());
357   CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<RowMajor>());
358 
359   // Exercise various cases that have been problematic in the past.
360   CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>());
361   CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>());
362 
363   CALL_SUBTEST_4(test_full_contraction<ColMajor>());
364   CALL_SUBTEST_4(test_full_contraction<RowMajor>());
365 
366   CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>());
367   CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>());
368 
369   CALL_SUBTEST_6(test_memcpy());
370   CALL_SUBTEST_6(test_multithread_random());
371   CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>());
372   CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>());
373 }
374