1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10 #define EIGEN_USE_THREADS
11
12
13 #include "main.h"
14 #include <iostream>
15 #include <Eigen/CXX11/Tensor>
16
17 using Eigen::Tensor;
18
19
test_multithread_elementwise()20 void test_multithread_elementwise()
21 {
22 Tensor<float, 3> in1(2,3,7);
23 Tensor<float, 3> in2(2,3,7);
24 Tensor<float, 3> out(2,3,7);
25
26 in1.setRandom();
27 in2.setRandom();
28
29 Eigen::ThreadPool tp(internal::random<int>(3, 11));
30 Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
31 out.device(thread_pool_device) = in1 + in2 * 3.14f;
32
33 for (int i = 0; i < 2; ++i) {
34 for (int j = 0; j < 3; ++j) {
35 for (int k = 0; k < 7; ++k) {
36 VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f);
37 }
38 }
39 }
40 }
41
42
test_multithread_compound_assignment()43 void test_multithread_compound_assignment()
44 {
45 Tensor<float, 3> in1(2,3,7);
46 Tensor<float, 3> in2(2,3,7);
47 Tensor<float, 3> out(2,3,7);
48
49 in1.setRandom();
50 in2.setRandom();
51
52 Eigen::ThreadPool tp(internal::random<int>(3, 11));
53 Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
54 out.device(thread_pool_device) = in1;
55 out.device(thread_pool_device) += in2 * 3.14f;
56
57 for (int i = 0; i < 2; ++i) {
58 for (int j = 0; j < 3; ++j) {
59 for (int k = 0; k < 7; ++k) {
60 VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f);
61 }
62 }
63 }
64 }
65
66 template<int DataLayout>
test_multithread_contraction()67 void test_multithread_contraction()
68 {
69 Tensor<float, 4, DataLayout> t_left(30, 50, 37, 31);
70 Tensor<float, 5, DataLayout> t_right(37, 31, 70, 2, 10);
71 Tensor<float, 5, DataLayout> t_result(30, 50, 70, 2, 10);
72
73 t_left.setRandom();
74 t_right.setRandom();
75
76 // this contraction should be equivalent to a single matrix multiplication
77 typedef Tensor<float, 1>::DimensionPair DimPair;
78 Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
79
80 typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
81 MapXf m_left(t_left.data(), 1500, 1147);
82 MapXf m_right(t_right.data(), 1147, 1400);
83 Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
84
85 Eigen::ThreadPool tp(4);
86 Eigen::ThreadPoolDevice thread_pool_device(&tp, 4);
87
88 // compute results by separate methods
89 t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
90 m_result = m_left * m_right;
91
92 for (ptrdiff_t i = 0; i < t_result.size(); i++) {
93 VERIFY(&t_result.data()[i] != &m_result.data()[i]);
94 if (fabsf(t_result(i) - m_result(i)) < 1e-4f) {
95 continue;
96 }
97 if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4f)) {
98 continue;
99 }
100 std::cout << "mismatch detected at index " << i << ": " << t_result(i)
101 << " vs " << m_result(i) << std::endl;
102 assert(false);
103 }
104 }
105
106 template<int DataLayout>
test_contraction_corner_cases()107 void test_contraction_corner_cases()
108 {
109 Tensor<float, 2, DataLayout> t_left(32, 500);
110 Tensor<float, 2, DataLayout> t_right(32, 28*28);
111 Tensor<float, 2, DataLayout> t_result(500, 28*28);
112
113 t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
114 t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
115 t_result = t_result.constant(NAN);
116
117 // this contraction should be equivalent to a single matrix multiplication
118 typedef Tensor<float, 1>::DimensionPair DimPair;
119 Eigen::array<DimPair, 1> dims{{DimPair(0, 0)}};
120
121 typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
122 MapXf m_left(t_left.data(), 32, 500);
123 MapXf m_right(t_right.data(), 32, 28*28);
124 Matrix<float, Dynamic, Dynamic, DataLayout> m_result(500, 28*28);
125
126 Eigen::ThreadPool tp(12);
127 Eigen::ThreadPoolDevice thread_pool_device(&tp, 12);
128
129 // compute results by separate methods
130 t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
131 m_result = m_left.transpose() * m_right;
132
133 for (ptrdiff_t i = 0; i < t_result.size(); i++) {
134 assert(!(numext::isnan)(t_result.data()[i]));
135 if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
136 std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
137 assert(false);
138 }
139 }
140
141 t_left.resize(32, 1);
142 t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
143 t_result.resize (1, 28*28);
144 t_result = t_result.constant(NAN);
145 t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
146 new(&m_left) MapXf(t_left.data(), 32, 1);
147 m_result = m_left.transpose() * m_right;
148 for (ptrdiff_t i = 0; i < t_result.size(); i++) {
149 assert(!(numext::isnan)(t_result.data()[i]));
150 if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
151 std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
152 assert(false);
153 }
154 }
155
156 t_left.resize(32, 500);
157 t_right.resize(32, 4);
158 t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
159 t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
160 t_result.resize (500, 4);
161 t_result = t_result.constant(NAN);
162 t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
163 new(&m_left) MapXf(t_left.data(), 32, 500);
164 new(&m_right) MapXf(t_right.data(), 32, 4);
165 m_result = m_left.transpose() * m_right;
166 for (ptrdiff_t i = 0; i < t_result.size(); i++) {
167 assert(!(numext::isnan)(t_result.data()[i]));
168 if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
169 std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
170 assert(false);
171 }
172 }
173
174 t_left.resize(32, 1);
175 t_right.resize(32, 4);
176 t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
177 t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
178 t_result.resize (1, 4);
179 t_result = t_result.constant(NAN);
180 t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
181 new(&m_left) MapXf(t_left.data(), 32, 1);
182 new(&m_right) MapXf(t_right.data(), 32, 4);
183 m_result = m_left.transpose() * m_right;
184 for (ptrdiff_t i = 0; i < t_result.size(); i++) {
185 assert(!(numext::isnan)(t_result.data()[i]));
186 if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
187 std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
188 assert(false);
189 }
190 }
191 }
192
193 template<int DataLayout>
test_multithread_contraction_agrees_with_singlethread()194 void test_multithread_contraction_agrees_with_singlethread() {
195 int contract_size = internal::random<int>(1, 5000);
196
197 Tensor<float, 3, DataLayout> left(internal::random<int>(1, 80),
198 contract_size,
199 internal::random<int>(1, 100));
200
201 Tensor<float, 4, DataLayout> right(internal::random<int>(1, 25),
202 internal::random<int>(1, 37),
203 contract_size,
204 internal::random<int>(1, 51));
205
206 left.setRandom();
207 right.setRandom();
208
209 // add constants to shift values away from 0 for more precision
210 left += left.constant(1.5f);
211 right += right.constant(1.5f);
212
213 typedef Tensor<float, 1>::DimensionPair DimPair;
214 Eigen::array<DimPair, 1> dims({{DimPair(1, 2)}});
215
216 Eigen::ThreadPool tp(internal::random<int>(2, 11));
217 Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
218
219 Tensor<float, 5, DataLayout> st_result;
220 st_result = left.contract(right, dims);
221
222 Tensor<float, 5, DataLayout> tp_result(st_result.dimensions());
223 tp_result.device(thread_pool_device) = left.contract(right, dims);
224
225 VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
226 for (ptrdiff_t i = 0; i < st_result.size(); i++) {
227 // if both of the values are very small, then do nothing (because the test will fail
228 // due to numerical precision issues when values are small)
229 if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) {
230 VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
231 }
232 }
233 }
234
235
236 template<int DataLayout>
test_full_contraction()237 void test_full_contraction() {
238 int contract_size1 = internal::random<int>(1, 500);
239 int contract_size2 = internal::random<int>(1, 500);
240
241 Tensor<float, 2, DataLayout> left(contract_size1,
242 contract_size2);
243 Tensor<float, 2, DataLayout> right(contract_size1,
244 contract_size2);
245 left.setRandom();
246 right.setRandom();
247
248 // add constants to shift values away from 0 for more precision
249 left += left.constant(1.5f);
250 right += right.constant(1.5f);
251
252 typedef Tensor<float, 2>::DimensionPair DimPair;
253 Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}});
254
255 Eigen::ThreadPool tp(internal::random<int>(2, 11));
256 Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
257
258 Tensor<float, 0, DataLayout> st_result;
259 st_result = left.contract(right, dims);
260
261 Tensor<float, 0, DataLayout> tp_result;
262 tp_result.device(thread_pool_device) = left.contract(right, dims);
263
264 VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
265 // if both of the values are very small, then do nothing (because the test will fail
266 // due to numerical precision issues when values are small)
267 if (numext::abs(st_result() - tp_result()) >= 1e-4f) {
268 VERIFY_IS_APPROX(st_result(), tp_result());
269 }
270 }
271
272 template<int DataLayout>
test_multithreaded_reductions()273 void test_multithreaded_reductions() {
274 const int num_threads = internal::random<int>(3, 11);
275 ThreadPool thread_pool(num_threads);
276 Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads);
277
278 const int num_rows = internal::random<int>(13, 732);
279 const int num_cols = internal::random<int>(13, 732);
280 Tensor<float, 2, DataLayout> t1(num_rows, num_cols);
281 t1.setRandom();
282
283 Tensor<float, 0, DataLayout> full_redux;
284 full_redux = t1.sum();
285
286 Tensor<float, 0, DataLayout> full_redux_tp;
287 full_redux_tp.device(thread_pool_device) = t1.sum();
288
289 // Check that the single threaded and the multi threaded reductions return
290 // the same result.
291 VERIFY_IS_APPROX(full_redux(), full_redux_tp());
292 }
293
294
test_memcpy()295 void test_memcpy() {
296
297 for (int i = 0; i < 5; ++i) {
298 const int num_threads = internal::random<int>(3, 11);
299 Eigen::ThreadPool tp(num_threads);
300 Eigen::ThreadPoolDevice thread_pool_device(&tp, num_threads);
301
302 const int size = internal::random<int>(13, 7632);
303 Tensor<float, 1> t1(size);
304 t1.setRandom();
305 std::vector<float> result(size);
306 thread_pool_device.memcpy(&result[0], t1.data(), size*sizeof(float));
307 for (int j = 0; j < size; j++) {
308 VERIFY_IS_EQUAL(t1(j), result[j]);
309 }
310 }
311 }
312
313
test_multithread_random()314 void test_multithread_random()
315 {
316 Eigen::ThreadPool tp(2);
317 Eigen::ThreadPoolDevice device(&tp, 2);
318 Tensor<float, 1> t(1 << 20);
319 t.device(device) = t.random<Eigen::internal::NormalRandomGenerator<float>>();
320 }
321
322 template<int DataLayout>
test_multithread_shuffle()323 void test_multithread_shuffle()
324 {
325 Tensor<float, 4, DataLayout> tensor(17,5,7,11);
326 tensor.setRandom();
327
328 const int num_threads = internal::random<int>(2, 11);
329 ThreadPool threads(num_threads);
330 Eigen::ThreadPoolDevice device(&threads, num_threads);
331
332 Tensor<float, 4, DataLayout> shuffle(7,5,11,17);
333 array<ptrdiff_t, 4> shuffles = {{2,1,3,0}};
334 shuffle.device(device) = tensor.shuffle(shuffles);
335
336 for (int i = 0; i < 17; ++i) {
337 for (int j = 0; j < 5; ++j) {
338 for (int k = 0; k < 7; ++k) {
339 for (int l = 0; l < 11; ++l) {
340 VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,j,l,i));
341 }
342 }
343 }
344 }
345 }
346
347
test_cxx11_tensor_thread_pool()348 void test_cxx11_tensor_thread_pool()
349 {
350 CALL_SUBTEST_1(test_multithread_elementwise());
351 CALL_SUBTEST_1(test_multithread_compound_assignment());
352
353 CALL_SUBTEST_2(test_multithread_contraction<ColMajor>());
354 CALL_SUBTEST_2(test_multithread_contraction<RowMajor>());
355
356 CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<ColMajor>());
357 CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<RowMajor>());
358
359 // Exercise various cases that have been problematic in the past.
360 CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>());
361 CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>());
362
363 CALL_SUBTEST_4(test_full_contraction<ColMajor>());
364 CALL_SUBTEST_4(test_full_contraction<RowMajor>());
365
366 CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>());
367 CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>());
368
369 CALL_SUBTEST_6(test_memcpy());
370 CALL_SUBTEST_6(test_multithread_random());
371 CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>());
372 CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>());
373 }
374