1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/kernels/eigen_spatial_convolutions.h"
17 
18 #include "absl/strings/str_cat.h"
19 #include "tensorflow/core/kernels/eigen_cuboid_convolution.h"
20 #include "tensorflow/core/platform/test.h"
21 #include "tensorflow/core/platform/test_benchmark.h"
22 
23 namespace Eigen {
24 
25 #define EigenApprox(a, b) \
26   { ASSERT_TRUE(std::abs(a - b) <= std::min(std::abs(a), std::abs(b)) * 1e-3); }
ceil_div(int a,int b)27 static int ceil_div(int a, int b) { return (a + b - 1) / b; }
28 
TEST(EigenSpatialConvolutionsTest,Simple)29 TEST(EigenSpatialConvolutionsTest, Simple) {
30   const int input_depth = 7;
31   const int input_rows = 4;
32   const int input_cols = 5;
33   const int output_depth = 10;
34   const int patch_rows = 3;
35   const int patch_cols = 4;
36   const int output_rows = input_rows;
37   const int output_cols = input_cols;
38 
39   Tensor<float, 3> input(input_depth, input_rows, input_cols);
40   Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
41   Tensor<float, 3> result(output_depth, output_rows, output_cols);
42 
43   input = input.constant(11.0f) + input.random();
44   kernel = kernel.constant(2.0f) + kernel.random();
45   result.setRandom();
46 
47   result = SpatialConvolution(input, kernel);
48 
49   EXPECT_EQ(result.dimension(0), output_depth);
50   EXPECT_EQ(result.dimension(1), output_rows);
51   EXPECT_EQ(result.dimension(2), output_cols);
52 
53   for (int od = 0; od < output_depth; ++od) {
54     for (int i = 0; i < output_rows; ++i) {
55       for (int j = 0; j < output_cols; ++j) {
56         float expected = 0.0f;
57         for (int c = 0; c < patch_cols; ++c) {
58           for (int r = 0; r < patch_rows; ++r) {
59             for (int id = 0; id < input_depth; ++id) {
60               if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < output_rows &&
61                   c - 1 + j < output_cols) {
62                 expected +=
63                     input(id, r - 1 + i, c - 1 + j) * kernel(od, id, r, c);
64               }
65             }
66           }
67         }
68         EigenApprox(result(od, i, j), expected);
69       }
70     }
71   }
72 }
73 
TEST(EigenSpatialConvolutionsTest,SimpleRowMajor)74 TEST(EigenSpatialConvolutionsTest, SimpleRowMajor) {
75   const int input_depth = 7;
76   const int input_rows = 4;
77   const int input_cols = 5;
78   const int output_depth = 10;
79   const int patch_rows = 3;
80   const int patch_cols = 4;
81   const int output_rows = input_rows;
82   const int output_cols = input_cols;
83 
84   Tensor<float, 3, RowMajor> input(input_cols, input_rows, input_depth);
85   Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
86                                     output_depth);
87   Tensor<float, 3, RowMajor> result(output_cols, output_rows, output_depth);
88   input = input.constant(11.0f) + input.random();
89   kernel = kernel.constant(2.0f) + kernel.random();
90   result.setRandom();
91 
92   result = SpatialConvolution(input, kernel);
93 
94   EXPECT_EQ(result.dimension(0), output_cols);
95   EXPECT_EQ(result.dimension(1), output_rows);
96   EXPECT_EQ(result.dimension(2), output_depth);
97 
98   for (int od = 0; od < output_depth; ++od) {
99     for (int i = 0; i < output_rows; ++i) {
100       for (int j = 0; j < output_cols; ++j) {
101         float expected = 0.0f;
102         for (int c = 0; c < patch_cols; ++c) {
103           for (int r = 0; r < patch_rows; ++r) {
104             for (int id = 0; id < input_depth; ++id) {
105               if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < output_rows &&
106                   c - 1 + j < output_cols) {
107                 expected +=
108                     input(c - 1 + j, r - 1 + i, id) * kernel(c, r, id, od);
109               }
110             }
111           }
112         }
113         EigenApprox(result(j, i, od), expected);
114       }
115     }
116   }
117 }
118 
TEST(EigenSpatialConvolutionsTest,BatchedSpatialConvolution)119 TEST(EigenSpatialConvolutionsTest, BatchedSpatialConvolution) {
120   Tensor<float, 4> input(10, 5, 5, 13);
121   Tensor<float, 4> kernel(7, 10, 3, 3);
122   Tensor<float, 4> result(7, 5, 5, 13);
123   input = input.constant(11.0f) + input.random();
124   kernel = kernel.constant(2.0f) + kernel.random();
125   result.setRandom();
126 
127   result = SpatialConvolution(input, kernel);
128 
129   EXPECT_EQ(result.dimension(0), 7);
130   EXPECT_EQ(result.dimension(1), 5);
131   EXPECT_EQ(result.dimension(2), 5);
132 
133   for (int b = 0; b < 13; ++b) {
134     for (int od = 0; od < 7; ++od) {
135       for (int i = 0; i < 5; ++i) {
136         for (int j = 0; j < 5; ++j) {
137           float expected = 0.0f;
138           for (int c = 0; c < 3; ++c) {
139             for (int r = 0; r < 3; ++r) {
140               for (int id = 0; id < 10; ++id) {
141                 if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < 5 &&
142                     c - 1 + j < 5) {
143                   expected +=
144                       input(id, r - 1 + i, c - 1 + j, b) * kernel(od, id, r, c);
145                 }
146               }
147             }
148           }
149           EigenApprox(result(od, i, j, b), expected);
150         }
151       }
152     }
153   }
154 }
155 
TEST(EigenSpatialConvolutionsTest,BatchedSpatialConvolutionRowMajor)156 TEST(EigenSpatialConvolutionsTest, BatchedSpatialConvolutionRowMajor) {
157   Tensor<float, 4, RowMajor> input(13, 5, 5, 10);
158   Tensor<float, 4, RowMajor> kernel(3, 3, 10, 7);
159   Tensor<float, 4, RowMajor> result(13, 5, 5, 7);
160   input = input.constant(11.0f) + input.random();
161   kernel = kernel.constant(2.0f) + kernel.random();
162   result.setRandom();
163 
164   result = SpatialConvolution(input, kernel);
165 
166   EXPECT_EQ(result.dimension(1), 5);
167   EXPECT_EQ(result.dimension(2), 5);
168   EXPECT_EQ(result.dimension(3), 7);
169 
170   for (int b = 0; b < 13; ++b) {
171     for (int od = 0; od < 7; ++od) {
172       for (int i = 0; i < 5; ++i) {
173         for (int j = 0; j < 5; ++j) {
174           float expected = 0.0f;
175           for (int c = 0; c < 3; ++c) {
176             for (int r = 0; r < 3; ++r) {
177               for (int id = 0; id < 10; ++id) {
178                 if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < 5 &&
179                     c - 1 + j < 5) {
180                   expected +=
181                       input(b, c - 1 + j, r - 1 + i, id) * kernel(c, r, id, od);
182                 }
183               }
184             }
185           }
186           EigenApprox(result(b, j, i, od), expected);
187         }
188       }
189     }
190   }
191 }
192 
TEST(EigenSpatialConvolutionsTest,ValidSpatialConvolution)193 TEST(EigenSpatialConvolutionsTest, ValidSpatialConvolution) {
194   const int input_depth = 10;
195   const int input_rows = 5;
196   const int input_cols = 5;
197   const int num_batches = 13;
198   const int output_depth = 7;
199   const int patch_rows = 4;
200   const int patch_cols = 4;
201   const int output_rows = input_rows - patch_rows + 1;
202   const int output_cols = input_cols - patch_cols + 1;
203 
204   Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
205   Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
206   Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
207   input = input.constant(11.0f) + input.random();
208   kernel = kernel.constant(2.0f) + kernel.random();
209   result.setRandom();
210 
211   // Apply a spatial convolution using a 4x4 kernel, valid padding, and a stride
212   // of 1.
213   const int stride = 1;
214   result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
215 
216   EXPECT_EQ(result.dimension(0), output_depth);
217   EXPECT_EQ(result.dimension(1), output_rows);
218   EXPECT_EQ(result.dimension(2), output_cols);
219   EXPECT_EQ(result.dimension(3), num_batches);
220 
221   for (int b = 0; b < num_batches; ++b) {
222     for (int od = 0; od < output_depth; ++od) {
223       for (int i = 0; i < output_rows; ++i) {
224         for (int j = 0; j < output_cols; ++j) {
225           float expected = 0.0f;
226           for (int c = 0; c < patch_cols; ++c) {
227             for (int r = 0; r < patch_rows; ++r) {
228               for (int id = 0; id < input_depth; ++id) {
229                 expected += input(id, r + i, c + j, b) * kernel(od, id, r, c);
230               }
231             }
232           }
233           if (result(od, i, j, b) != expected) {
234             std::cout << "at od=" << od << " b=" << b << " i=" << i
235                       << " j=" << j << " " << result(od, i, j, b) << " vs "
236                       << expected << std::endl;
237           }
238           EigenApprox(result(od, i, j, b), expected);
239         }
240       }
241     }
242   }
243 }
244 
TEST(EigenSpatialConvolutionsTest,ValidSpatialConvolutionUnequalStrides)245 TEST(EigenSpatialConvolutionsTest, ValidSpatialConvolutionUnequalStrides) {
246   const int input_depth = 10;
247   const int input_rows = 5;
248   const int input_cols = 5;
249   const int num_batches = 13;
250   const int output_depth = 7;
251   const int patch_rows = 4;
252   const int patch_cols = 4;
253 
254   const int row_stride = 1;
255   const int col_stride = 2;
256   const int output_rows = 2;
257   const int output_cols = 1;
258 
259   Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
260   Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
261   Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
262   input = input.constant(11.0f) + input.random();
263   kernel = kernel.constant(2.0f) + kernel.random();
264   result.setRandom();
265 
266   // Apply a spatial convolution using a 4x4 kernel, valid padding, and a stride
267   // of 1.
268   result =
269       SpatialConvolution(input, kernel, row_stride, col_stride, PADDING_VALID);
270 
271   EXPECT_EQ(result.dimension(0), output_depth);
272   EXPECT_EQ(result.dimension(1), output_rows);
273   EXPECT_EQ(result.dimension(2), output_cols);
274   EXPECT_EQ(result.dimension(3), num_batches);
275   if (true) return;
276 
277   for (int b = 0; b < num_batches; ++b) {
278     for (int od = 0; od < output_depth; ++od) {
279       for (int i = 0; i < output_rows; ++i) {
280         for (int j = 0; j < output_cols; ++j) {
281           float expected = 0.0f;
282           for (int c = 0; c < patch_cols; ++c) {
283             for (int r = 0; r < patch_rows; ++r) {
284               for (int id = 0; id < input_depth; ++id) {
285                 expected +=
286                     input(id, r + row_stride * i, c + col_stride * j, b) *
287                     kernel(od, id, r, c);
288               }
289             }
290           }
291           if (result(od, i, j, b) != expected) {
292             std::cout << "at od=" << od << " b=" << b << " i=" << i
293                       << " j=" << j << " " << result(od, i, j, b) << " vs "
294                       << expected << std::endl;
295           }
296           EigenApprox(result(od, i, j, b), expected);
297         }
298       }
299     }
300   }
301 }
302 
TEST(EigenSpatialConvolutionsTest,ValidSpatialConvolutionRowMajor)303 TEST(EigenSpatialConvolutionsTest, ValidSpatialConvolutionRowMajor) {
304   const int input_depth = 10;
305   const int input_rows = 5;
306   const int input_cols = 5;
307   const int num_batches = 13;
308   const int output_depth = 7;
309   const int patch_rows = 4;
310   const int patch_cols = 4;
311   const int output_rows = input_rows - patch_rows + 1;
312   const int output_cols = input_cols - patch_cols + 1;
313 
314   Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows,
315                                    input_depth);
316   Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
317                                     output_depth);
318   Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows,
319                                     output_depth);
320 
321   input = input.constant(11.0f) + input.random();
322   kernel = kernel.constant(2.0f) + kernel.random();
323   result.setRandom();
324 
325   // Apply a spatial convolution using a 4x4 kernel, valid padding, and a stride
326   // of 1.
327   const int stride = 1;
328   result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
329 
330   EXPECT_EQ(result.dimension(0), num_batches);
331   EXPECT_EQ(result.dimension(1), output_cols);
332   EXPECT_EQ(result.dimension(2), output_rows);
333   EXPECT_EQ(result.dimension(3), output_depth);
334 
335   for (int b = 0; b < num_batches; ++b) {
336     for (int od = 0; od < output_depth; ++od) {
337       for (int i = 0; i < output_rows; ++i) {
338         for (int j = 0; j < output_cols; ++j) {
339           float expected = 0.0f;
340           for (int c = 0; c < patch_rows; ++c) {
341             for (int r = 0; r < patch_cols; ++r) {
342               for (int id = 0; id < input_depth; ++id) {
343                 expected += input(b, c + j, r + i, id) * kernel(c, r, id, od);
344               }
345             }
346           }
347           if (result(b, j, i, od) != expected) {
348             std::cout << "at od=" << od << " b=" << b << " i=" << i
349                       << " j=" << j << " " << result(b, j, i, od) << " vs "
350                       << expected << std::endl;
351           }
352           EigenApprox(result(b, j, i, od), expected);
353         }
354       }
355     }
356   }
357 }
358 
TEST(EigenSpatialConvolutionsTest,StridedSpatialConvolution)359 TEST(EigenSpatialConvolutionsTest, StridedSpatialConvolution) {
360   const int input_depth = 10;
361   const int input_rows = 5;
362   const int input_cols = 5;
363   const int num_batches = 13;
364   const int output_depth = 7;
365   const int patch_rows = 3;
366   const int patch_cols = 3;
367   const int output_rows = 2;
368   const int output_cols = 2;
369 
370   Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
371   Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
372   Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
373   input = input.constant(11.0f) + input.random();
374   kernel = kernel.constant(2.0f) + kernel.random();
375   result.setRandom();
376 
377   // Apply a spatial convolution using a 3x3 kernel, valid padding, and a stride
378   // of 2.
379   int stride = 2;
380   result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
381 
382   EXPECT_EQ(result.dimension(0), output_depth);
383   EXPECT_EQ(result.dimension(1), output_rows);
384   EXPECT_EQ(result.dimension(2), output_cols);
385   EXPECT_EQ(result.dimension(3), num_batches);
386 
387   for (int b = 0; b < num_batches; ++b) {
388     for (int od = 0; od < output_depth; ++od) {
389       for (int i = 0; i < output_rows; ++i) {
390         for (int j = 0; j < output_cols; ++j) {
391           float expected = 0.0f;
392           for (int c = 0; c < patch_cols; ++c) {
393             for (int r = 0; r < patch_rows; ++r) {
394               for (int id = 0; id < input_depth; ++id) {
395                 expected += input(id, r + stride * i, c + stride * j, b) *
396                             kernel(od, id, r, c);
397               }
398             }
399           }
400           EigenApprox(result(od, i, j, b), expected);
401         }
402       }
403     }
404   }
405 }
406 
TEST(EigenSpatialConvolutionsTest,KernelSmallerThanStride)407 TEST(EigenSpatialConvolutionsTest, KernelSmallerThanStride) {
408   const int input_depth = 2;
409   const int input_rows = 3;
410   const int input_cols = 3;
411   const int num_batches = 5;
412   const int output_depth = 6;
413   const int patch_rows = 1;
414   const int patch_cols = 1;
415   const int output_rows = 2;
416   const int output_cols = 2;
417 
418   Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
419   Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
420   Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
421   input = input.constant(11.0f) + input.random();
422   kernel = kernel.constant(2.0f) + kernel.random();
423   result.setRandom();
424 
425   // Apply a spatial convolution using a 1x1 kernel, valid padding, and a stride
426   // of 2.
427   int stride = 2;
428   result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
429 
430   EXPECT_EQ(result.dimension(0), output_depth);
431   EXPECT_EQ(result.dimension(1), output_rows);
432   EXPECT_EQ(result.dimension(2), output_cols);
433   EXPECT_EQ(result.dimension(3), num_batches);
434 
435   for (int b = 0; b < num_batches; ++b) {
436     for (int od = 0; od < output_depth; ++od) {
437       for (int i = 0; i < output_rows; ++i) {
438         for (int j = 0; j < output_cols; ++j) {
439           float expected = 0.0f;
440           for (int c = 0; c < patch_cols; ++c) {
441             for (int r = 0; r < patch_rows; ++r) {
442               for (int id = 0; id < input_depth; ++id) {
443                 expected += input(id, r + stride * i, c + stride * j, b) *
444                             kernel(od, id, r, c);
445               }
446             }
447           }
448           EigenApprox(result(od, i, j, b), expected);
449         }
450       }
451     }
452   }
453 }
454 
TEST(EigenSpatialConvolutionsTest,StridedSpatialConvolutionRowMajor)455 TEST(EigenSpatialConvolutionsTest, StridedSpatialConvolutionRowMajor) {
456   const int input_depth = 10;
457   const int input_rows = 5;
458   const int input_cols = 5;
459   const int num_batches = 13;
460   const int output_depth = 7;
461   const int patch_rows = 3;
462   const int patch_cols = 3;
463   const int output_rows = 2;
464   const int output_cols = 2;
465 
466   Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows,
467                                    input_depth);
468   Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
469                                     output_depth);
470   Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows,
471                                     output_depth);
472   input = input.constant(11.0f) + input.random();
473   kernel = kernel.constant(2.0f) + kernel.random();
474   result.setRandom();
475 
476   // Apply a spatial convolution using a 3x3 kernel, valid padding, and a stride
477   // of 2.
478   int stride = 2;
479   result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
480 
481   EXPECT_EQ(result.dimension(0), num_batches);
482   EXPECT_EQ(result.dimension(1), output_cols);
483   EXPECT_EQ(result.dimension(2), output_rows);
484   EXPECT_EQ(result.dimension(3), output_depth);
485 
486   for (int b = 0; b < num_batches; ++b) {
487     for (int od = 0; od < output_depth; ++od) {
488       for (int i = 0; i < output_rows; ++i) {
489         for (int j = 0; j < output_cols; ++j) {
490           float expected = 0.0f;
491           for (int c = 0; c < patch_cols; ++c) {
492             for (int r = 0; r < patch_rows; ++r) {
493               for (int id = 0; id < input_depth; ++id) {
494                 expected += input(b, c + stride * j, r + stride * i, id) *
495                             kernel(c, r, id, od);
496               }
497             }
498           }
499           EigenApprox(result(b, j, i, od), expected);
500         }
501       }
502     }
503   }
504 }
505 
TEST(EigenSpatialConvolutionsTest,AtrousSpatial)506 TEST(EigenSpatialConvolutionsTest, AtrousSpatial) {
507   const int input_depth = 10;
508   const int input_rows = 7;
509   const int input_cols = 7;
510   const int num_batches = 13;
511   const int output_depth = 7;
512   const int patch_rows = 3;
513   const int patch_cols = 3;
514   const int output_rows = 3;
515   const int output_cols = 3;
516 
517   Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
518   Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
519   Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
520   input = input.constant(11.0f) + input.random();
521   kernel = kernel.constant(2.0f) + kernel.random();
522   result.setRandom();
523 
524   // Apply a spatial convolution using a 3x3 kernel, valid padding
525   // output (standard) stride 1, and input (atrous) stride of 2.
526   int stride = 1;
527   int in_stride = 2;
528   result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID,
529                               in_stride, in_stride);
530 
531   EXPECT_EQ(result.dimension(0), output_depth);
532   EXPECT_EQ(result.dimension(1), output_rows);
533   EXPECT_EQ(result.dimension(2), output_cols);
534   EXPECT_EQ(result.dimension(3), num_batches);
535 
536   for (int b = 0; b < num_batches; ++b) {
537     for (int od = 0; od < output_depth; ++od) {
538       for (int i = 0; i < output_rows; ++i) {
539         for (int j = 0; j < output_cols; ++j) {
540           float expected = 0.0f;
541           for (int c = 0; c < patch_cols; ++c) {
542             for (int r = 0; r < patch_rows; ++r) {
543               for (int id = 0; id < input_depth; ++id) {
544                 expected += input(id, in_stride * r + stride * i,
545                                   in_stride * c + stride * j, b) *
546                             kernel(od, id, r, c);
547               }
548             }
549           }
550           EigenApprox(result(od, i, j, b), expected);
551         }
552       }
553     }
554   }
555 }
556 
TEST(EigenSpatialConvolutionsTest,AtrousSpatialRowMajor)557 TEST(EigenSpatialConvolutionsTest, AtrousSpatialRowMajor) {
558   const int input_depth = 10;
559   const int input_rows = 7;
560   const int input_cols = 7;
561   const int num_batches = 13;
562   const int output_depth = 7;
563   const int patch_rows = 3;
564   const int patch_cols = 3;
565   const int output_rows = 3;
566   const int output_cols = 3;
567 
568   Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows,
569                                    input_depth);
570   Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
571                                     output_depth);
572   Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows,
573                                     output_depth);
574   input = input.constant(11.0f) + input.random();
575   kernel = kernel.constant(2.0f) + kernel.random();
576   result.setRandom();
577 
578   // Apply a spatial convolution using a 3x3 kernel, valid padding
579   // output (standard) stride 1, and input (atrous) stride of 2.
580   int stride = 1;
581   int in_stride = 2;
582   result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID,
583                               in_stride, in_stride);
584 
585   EXPECT_EQ(result.dimension(0), num_batches);
586   EXPECT_EQ(result.dimension(1), output_cols);
587   EXPECT_EQ(result.dimension(2), output_rows);
588   EXPECT_EQ(result.dimension(3), output_depth);
589 
590   for (int b = 0; b < num_batches; ++b) {
591     for (int od = 0; od < output_depth; ++od) {
592       for (int i = 0; i < output_rows; ++i) {
593         for (int j = 0; j < output_cols; ++j) {
594           float expected = 0.0f;
595           for (int c = 0; c < patch_cols; ++c) {
596             for (int r = 0; r < patch_rows; ++r) {
597               for (int id = 0; id < input_depth; ++id) {
598                 expected += input(b, in_stride * c + stride * j,
599                                   in_stride * r + stride * i, id) *
600                             kernel(c, r, id, od);
601               }
602             }
603           }
604           EigenApprox(result(b, j, i, od), expected);
605         }
606       }
607     }
608   }
609 }
610 
TEST(EigenSpatialConvolutionsTest,AtrousSpatialRowMajorUnequalStrides)611 TEST(EigenSpatialConvolutionsTest, AtrousSpatialRowMajorUnequalStrides) {
612   const int input_depth = 10;
613   const int input_rows = 7;
614   const int input_cols = 7;
615   const int num_batches = 13;
616   const int output_depth = 7;
617   const int patch_rows = 3;
618   const int patch_cols = 3;
619   const int output_rows = 1;
620   const int output_cols = 3;
621 
622   Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows,
623                                    input_depth);
624   Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
625                                     output_depth);
626   Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows,
627                                     output_depth);
628   input = input.constant(11.0f) + input.random();
629   kernel = kernel.constant(2.0f) + kernel.random();
630   result.setRandom();
631 
632   // Apply a spatial convolution using a 3x3 kernel, valid padding
633   // output (standard) stride 1, and input (atrous) stride of 2.
634   int row_stride = 1;
635   int col_stride = 2;
636   int row_in_stride = 3;
637   int col_in_stride = 1;
638   result = SpatialConvolution(input, kernel, row_stride, col_stride,
639                               PADDING_VALID, row_in_stride, col_in_stride);
640 
641   EXPECT_EQ(result.dimension(0), num_batches);
642   EXPECT_EQ(result.dimension(1), output_cols);
643   EXPECT_EQ(result.dimension(2), output_rows);
644   EXPECT_EQ(result.dimension(3), output_depth);
645 
646   for (int b = 0; b < num_batches; ++b) {
647     for (int od = 0; od < output_depth; ++od) {
648       for (int i = 0; i < output_rows; ++i) {
649         for (int j = 0; j < output_cols; ++j) {
650           float expected = 0.0f;
651           for (int c = 0; c < patch_cols; ++c) {
652             for (int r = 0; r < patch_rows; ++r) {
653               for (int id = 0; id < input_depth; ++id) {
654                 expected += input(b, col_in_stride * c + col_stride * j,
655                                   row_in_stride * r + row_stride * i, id) *
656                             kernel(c, r, id, od);
657               }
658             }
659           }
660           EigenApprox(result(b, j, i, od), expected);
661         }
662       }
663     }
664   }
665 }
666 
TEST(EigenSpatialConvolutionsTest,Cuboid)667 TEST(EigenSpatialConvolutionsTest, Cuboid) {
668   const int in_channels = 10;
669   const int in_depth = 5;
670   const int in_rows = 8;
671   const int in_cols = 7;
672 
673   const int kern_filters = 7;
674   const int kern_depth = 3;
675   const int kern_width = 4;
676   const int kern_height = 4;
677 
678   const int out_depth = in_depth;
679   const int out_height = in_rows;
680   const int out_width = in_cols;
681 
682   Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols);
683   Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
684                           kern_width);
685   Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width);
686   input = input.constant(11.0f) + input.random();
687   kernel = kernel.constant(2.0f) + kernel.random();
688   result.setRandom();
689 
690   result = CuboidConvolution(input, kernel);
691 
692   EXPECT_EQ(result.dimension(0), kern_filters);
693   EXPECT_EQ(result.dimension(1), out_depth);
694   EXPECT_EQ(result.dimension(2), out_height);
695   EXPECT_EQ(result.dimension(3), out_width);
696 
697   const int off_p = (kern_depth - 1) / 2;
698   const int off_r = (kern_height - 1) / 2;
699   const int off_c = (kern_width - 1) / 2;
700 
701   for (int od = 0; od < kern_filters; ++od) {
702     for (int i = 0; i < out_depth; ++i) {
703       for (int j = 0; j < out_height; ++j) {
704         for (int k = 0; k < out_width; ++k) {
705           float expected = 0.0f;
706           for (int c = 0; c < kern_width; ++c) {
707             for (int r = 0; r < kern_height; ++r) {
708               for (int p = 0; p < kern_depth; ++p) {
709                 for (int id = 0; id < in_channels; ++id) {
710                   if (p - off_p + i >= 0 && r - off_r + j >= 0 &&
711                       c - off_c + k >= 0 && p - off_p + i < in_depth &&
712                       r - off_r + j < in_rows && c - off_c + k < in_cols) {
713                     expected +=
714                         input(id, p - off_p + i, r - off_r + j, c - off_c + k) *
715                         kernel(od, id, p, r, c);
716                   }
717                 }
718               }
719             }
720           }
721           EigenApprox(result(od, i, j, k), expected);
722         }
723       }
724     }
725   }
726 }
727 
TEST(EigenSpatialConvolutionsTest,CuboidRowMajor)728 TEST(EigenSpatialConvolutionsTest, CuboidRowMajor) {
729   const int in_channels = 10;
730   const int in_depth = 5;
731   const int in_rows = 8;
732   const int in_cols = 7;
733 
734   const int kern_filters = 7;
735   const int kern_depth = 3;
736   const int kern_width = 4;
737   const int kern_height = 4;
738 
739   const int out_depth = in_depth;
740   const int out_height = in_rows;
741   const int out_width = in_cols;
742 
743   Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels);
744   Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
745                                     in_channels, kern_filters);
746   Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth,
747                                     kern_filters);
748   input = input.constant(11.0f) + input.random();
749   kernel = kernel.constant(2.0f) + kernel.random();
750   result.setRandom();
751 
752   result = CuboidConvolution(input, kernel);
753 
754   EXPECT_EQ(result.dimension(3), kern_filters);
755   EXPECT_EQ(result.dimension(2), out_depth);
756   EXPECT_EQ(result.dimension(1), out_height);
757   EXPECT_EQ(result.dimension(0), out_width);
758 
759   const int off_p = (kern_depth - 1) / 2;
760   const int off_r = (kern_height - 1) / 2;
761   const int off_c = (kern_width - 1) / 2;
762 
763   for (int od = 0; od < kern_filters; ++od) {
764     for (int i = 0; i < out_depth; ++i) {
765       for (int j = 0; j < out_height; ++j) {
766         for (int k = 0; k < out_width; ++k) {
767           float expected = 0.0f;
768           for (int c = 0; c < kern_width; ++c) {
769             for (int r = 0; r < kern_height; ++r) {
770               for (int p = 0; p < kern_depth; ++p) {
771                 for (int id = 0; id < in_channels; ++id) {
772                   if (p - off_p + i >= 0 && r - off_r + j >= 0 &&
773                       c - off_c + k >= 0 && p - off_p + i < in_depth &&
774                       r - off_r + j < in_rows && c - off_c + k < in_cols) {
775                     expected +=
776                         input(c - off_c + k, r - off_r + j, p - off_p + i, id) *
777                         kernel(c, r, p, id, od);
778                   }
779                 }
780               }
781             }
782           }
783           EigenApprox(result(k, j, i, od), expected);
784         }
785       }
786     }
787   }
788 }
789 
TEST(EigenSpatialConvolutionsTest,ValidCuboid)790 TEST(EigenSpatialConvolutionsTest, ValidCuboid) {
791   const int in_channels = 10;
792   const int in_depth = 5;
793   const int in_rows = 5;
794   const int in_cols = 5;
795 
796   const int kern_filters = 7;
797   const int kern_depth = 3;
798   const int kern_width = 3;
799   const int kern_height = 3;
800 
801   const int out_depth = 3;
802   const int out_height = 3;
803   const int out_width = 3;
804 
805   Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols);
806   Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
807                           kern_width);
808   Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width);
809   input = input.constant(11.0f) + input.random();
810   kernel = kernel.constant(2.0f) + kernel.random();
811   result.setRandom();
812 
813   result = CuboidConvolution(input, kernel, 1, 1, 1, PADDING_VALID);
814 
815   EXPECT_EQ(result.dimension(0), kern_filters);
816   EXPECT_EQ(result.dimension(1), out_depth);
817   EXPECT_EQ(result.dimension(2), out_height);
818   EXPECT_EQ(result.dimension(3), out_width);
819 
820   for (int od = 0; od < kern_filters; ++od) {
821     for (int i = 0; i < out_depth; ++i) {
822       for (int j = 0; j < out_height; ++j) {
823         for (int k = 0; k < out_width; ++k) {
824           float expected = 0.0f;
825           for (int c = 0; c < kern_width; ++c) {
826             for (int r = 0; r < kern_height; ++r) {
827               for (int p = 0; p < kern_depth; ++p) {
828                 for (int id = 0; id < in_channels; ++id) {
829                   expected +=
830                       input(id, p + i, r + j, c + k) * kernel(od, id, p, r, c);
831                 }
832               }
833             }
834           }
835           EigenApprox(result(od, i, j, k), expected);
836         }
837       }
838     }
839   }
840 }
841 
TEST(EigenSpatialConvolutionsTest,ValidCuboidRowMajor)842 TEST(EigenSpatialConvolutionsTest, ValidCuboidRowMajor) {
843   const int in_channels = 10;
844   const int in_depth = 5;
845   const int in_rows = 5;
846   const int in_cols = 5;
847 
848   const int kern_filters = 7;
849   const int kern_depth = 3;
850   const int kern_width = 3;
851   const int kern_height = 3;
852 
853   const int out_depth = 3;
854   const int out_height = 3;
855   const int out_width = 3;
856 
857   Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels);
858   Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
859                                     in_channels, kern_filters);
860   Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth,
861                                     kern_filters);
862   input = input.constant(11.0f) + input.random();
863   kernel = kernel.constant(2.0f) + kernel.random();
864   result.setRandom();
865 
866   result = CuboidConvolution(input, kernel, 1, 1, 1, PADDING_VALID);
867 
868   EXPECT_EQ(result.dimension(3), kern_filters);
869   EXPECT_EQ(result.dimension(2), out_depth);
870   EXPECT_EQ(result.dimension(1), out_height);
871   EXPECT_EQ(result.dimension(0), out_width);
872 
873   for (int od = 0; od < kern_filters; ++od) {
874     for (int i = 0; i < out_depth; ++i) {
875       for (int j = 0; j < out_height; ++j) {
876         for (int k = 0; k < out_width; ++k) {
877           float expected = 0.0f;
878           for (int c = 0; c < kern_width; ++c) {
879             for (int r = 0; r < kern_height; ++r) {
880               for (int p = 0; p < kern_depth; ++p) {
881                 for (int id = 0; id < in_channels; ++id) {
882                   expected +=
883                       input(c + k, r + j, p + i, id) * kernel(c, r, p, id, od);
884                 }
885               }
886             }
887           }
888           EigenApprox(result(k, j, i, od), expected);
889         }
890       }
891     }
892   }
893 }
894 
TEST(EigenSpatialConvolutionsTest,BatchedCuboid)895 TEST(EigenSpatialConvolutionsTest, BatchedCuboid) {
896   const int batches = 2;
897   const int in_channels = 10;
898   const int in_depth = 5;
899   const int in_rows = 8;
900   const int in_cols = 7;
901 
902   const int kern_filters = 7;
903   const int kern_depth = 3;
904   const int kern_width = 4;
905   const int kern_height = 4;
906 
907   const int out_depth = in_depth;
908   const int out_height = in_rows;
909   const int out_width = in_cols;
910 
911   Tensor<float, 5> input(in_channels, in_depth, in_rows, in_cols, batches);
912   Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
913                           kern_width);
914   Tensor<float, 5> result(kern_filters, out_depth, out_height, out_width,
915                           batches);
916   input = input.constant(11.0f) + input.random();
917   kernel = kernel.constant(2.0f) + kernel.random();
918   result.setRandom();
919 
920   result = CuboidConvolution(input, kernel);
921 
922   EXPECT_EQ(result.dimension(0), kern_filters);
923   EXPECT_EQ(result.dimension(1), out_depth);
924   EXPECT_EQ(result.dimension(2), out_height);
925   EXPECT_EQ(result.dimension(3), out_width);
926   EXPECT_EQ(result.dimension(4), batches);
927 
928   const int off_p = (kern_depth - 1) / 2;
929   const int off_r = (kern_height - 1) / 2;
930   const int off_c = (kern_width - 1) / 2;
931 
932   for (int b = 0; b < batches; b++) {
933     for (int od = 0; od < kern_filters; ++od) {
934       for (int i = 0; i < out_depth; ++i) {
935         for (int j = 0; j < out_height; ++j) {
936           for (int k = 0; k < out_width; ++k) {
937             float expected = 0.0f;
938             for (int c = 0; c < kern_width; ++c) {
939               for (int r = 0; r < kern_height; ++r) {
940                 for (int p = 0; p < kern_depth; ++p) {
941                   for (int id = 0; id < in_channels; ++id) {
942                     if (p - off_p + i >= 0 && r - off_r + j >= 0 &&
943                         c - off_c + k >= 0 && p - off_p + i < in_depth &&
944                         r - off_r + j < in_rows && c - off_c + k < in_cols) {
945                       expected += input(id, p - off_p + i, r - off_r + j,
946                                         c - off_c + k, b) *
947                                   kernel(od, id, p, r, c);
948                     }
949                   }
950                 }
951               }
952             }
953             EigenApprox(result(od, i, j, k, b), expected);
954           }
955         }
956       }
957     }
958   }
959 }
960 
TEST(EigenSpatialConvolutionsTest,BatchedCuboidRowMajor)961 TEST(EigenSpatialConvolutionsTest, BatchedCuboidRowMajor) {
962   const int batches = 2;
963   const int in_channels = 10;
964   const int in_depth = 5;
965   const int in_rows = 8;
966   const int in_cols = 7;
967 
968   const int kern_filters = 7;
969   const int kern_depth = 3;
970   const int kern_width = 4;
971   const int kern_height = 4;
972 
973   const int out_depth = in_depth;
974   const int out_height = in_rows;
975   const int out_width = in_cols;
976 
977   Tensor<float, 5, RowMajor> input(batches, in_cols, in_rows, in_depth,
978                                    in_channels);
979   Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
980                                     in_channels, kern_filters);
981   Tensor<float, 5, RowMajor> result(batches, out_width, out_height, out_depth,
982                                     kern_filters);
983   input = input.constant(11.0f) + input.random();
984   kernel = kernel.constant(2.0f) + kernel.random();
985   result.setRandom();
986 
987   result = CuboidConvolution(input, kernel);
988 
989   EXPECT_EQ(result.dimension(4), kern_filters);
990   EXPECT_EQ(result.dimension(3), out_depth);
991   EXPECT_EQ(result.dimension(2), out_height);
992   EXPECT_EQ(result.dimension(1), out_width);
993   EXPECT_EQ(result.dimension(0), batches);
994 
995   const int off_p = (kern_depth - 1) / 2;
996   const int off_r = (kern_height - 1) / 2;
997   const int off_c = (kern_width - 1) / 2;
998 
999   for (int b = 0; b < batches; b++) {
1000     for (int od = 0; od < kern_filters; ++od) {
1001       for (int i = 0; i < out_depth; ++i) {
1002         for (int j = 0; j < out_height; ++j) {
1003           for (int k = 0; k < out_width; ++k) {
1004             float expected = 0.0f;
1005             for (int c = 0; c < kern_width; ++c) {
1006               for (int r = 0; r < kern_height; ++r) {
1007                 for (int p = 0; p < kern_depth; ++p) {
1008                   for (int id = 0; id < in_channels; ++id) {
1009                     if (p - off_p + i >= 0 && r - off_r + j >= 0 &&
1010                         c - off_c + k >= 0 && p - off_p + i < in_depth &&
1011                         r - off_r + j < in_rows && c - off_c + k < in_cols) {
1012                       expected += input(b, c - off_c + k, r - off_r + j,
1013                                         p - off_p + i, id) *
1014                                   kernel(c, r, p, id, od);
1015                     }
1016                   }
1017                 }
1018               }
1019             }
1020             EigenApprox(result(b, k, j, i, od), expected);
1021           }
1022         }
1023       }
1024     }
1025   }
1026 }
1027 
TEST(EigenSpatialConvolutionsTest,StridedValidCuboid)1028 TEST(EigenSpatialConvolutionsTest, StridedValidCuboid) {
1029   const int in_channels = 10;
1030   const int in_depth = 8;
1031   const int in_rows = 7;
1032   const int in_cols = 5;
1033 
1034   const int kern_filters = 7;
1035   const int kern_depth = 3;
1036   const int kern_width = 3;
1037   const int kern_height = 3;
1038 
1039   const int out_depth = 3;
1040   const int out_height = 3;
1041   const int out_width = 2;
1042 
1043   Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols);
1044   Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
1045                           kern_width);
1046   Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width);
1047   input = input.constant(11.0f) + input.random();
1048   kernel = kernel.constant(2.0f) + kernel.random();
1049   result.setRandom();
1050 
1051   const int stride = 2;
1052   result =
1053       CuboidConvolution(input, kernel, stride, stride, stride, PADDING_VALID);
1054 
1055   EXPECT_EQ(result.dimension(0), kern_filters);
1056   EXPECT_EQ(result.dimension(1), out_depth);
1057   EXPECT_EQ(result.dimension(2), out_height);
1058   EXPECT_EQ(result.dimension(3), out_width);
1059 
1060   for (int od = 0; od < kern_filters; ++od) {
1061     for (int i = 0; i < out_depth; ++i) {
1062       for (int j = 0; j < out_height; ++j) {
1063         for (int k = 0; k < out_width; ++k) {
1064           float expected = 0.0f;
1065           for (int c = 0; c < kern_width; ++c) {
1066             for (int r = 0; r < kern_height; ++r) {
1067               for (int p = 0; p < kern_depth; ++p) {
1068                 for (int id = 0; id < in_channels; ++id) {
1069                   expected += input(id, p + stride * i, r + stride * j,
1070                                     c + stride * k) *
1071                               kernel(od, id, p, r, c);
1072                 }
1073               }
1074             }
1075           }
1076           EigenApprox(result(od, i, j, k), expected);
1077         }
1078       }
1079     }
1080   }
1081 }
1082 
TEST(EigenSpatialConvolutionsTest,StridedValidCuboidRowMajor)1083 TEST(EigenSpatialConvolutionsTest, StridedValidCuboidRowMajor) {
1084   const int in_channels = 10;
1085   const int in_depth = 8;
1086   const int in_rows = 7;
1087   const int in_cols = 5;
1088 
1089   const int kern_filters = 7;
1090   const int kern_depth = 3;
1091   const int kern_width = 3;
1092   const int kern_height = 3;
1093 
1094   const int out_depth = 3;
1095   const int out_height = 3;
1096   const int out_width = 2;
1097 
1098   Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels);
1099   Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
1100                                     in_channels, kern_filters);
1101   Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth,
1102                                     kern_filters);
1103   input = input.constant(11.0f) + input.random();
1104   kernel = kernel.constant(2.0f) + kernel.random();
1105   result.setRandom();
1106 
1107   const int stride = 2;
1108   result =
1109       CuboidConvolution(input, kernel, stride, stride, stride, PADDING_VALID);
1110 
1111   EXPECT_EQ(result.dimension(3), kern_filters);
1112   EXPECT_EQ(result.dimension(2), out_depth);
1113   EXPECT_EQ(result.dimension(1), out_height);
1114   EXPECT_EQ(result.dimension(0), out_width);
1115 
1116   for (int od = 0; od < kern_filters; ++od) {
1117     for (int i = 0; i < out_depth; ++i) {
1118       for (int j = 0; j < out_height; ++j) {
1119         for (int k = 0; k < out_width; ++k) {
1120           float expected = 0.0f;
1121           for (int c = 0; c < kern_width; ++c) {
1122             for (int r = 0; r < kern_height; ++r) {
1123               for (int p = 0; p < kern_depth; ++p) {
1124                 for (int id = 0; id < in_channels; ++id) {
1125                   expected += input(c + stride * k, r + stride * j,
1126                                     p + stride * i, id) *
1127                               kernel(c, r, p, id, od);
1128                 }
1129               }
1130             }
1131           }
1132           EigenApprox(result(k, j, i, od), expected);
1133         }
1134       }
1135     }
1136   }
1137 }
1138 
TEST(EigenSpatialConvolutionsTest,StridedSameCuboid)1139 TEST(EigenSpatialConvolutionsTest, StridedSameCuboid) {
1140   const int in_channels = 10;
1141   const int in_depth = 8;
1142   const int in_rows = 7;
1143   const int in_cols = 5;
1144 
1145   const int kern_filters = 7;
1146   const int kern_depth = 3;
1147   const int kern_width = 3;
1148   const int kern_height = 3;
1149 
1150   const int stride = 2;
1151   const int out_depth = ceil_div(in_depth, stride);
1152   const int out_height = ceil_div(in_rows, stride);
1153   const int out_width = ceil_div(in_cols, stride);
1154 
1155   Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols);
1156   Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
1157                           kern_width);
1158   Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width);
1159   input = input.constant(11.0f) + input.random();
1160   kernel = kernel.constant(2.0f) + kernel.random();
1161   result.setRandom();
1162 
1163   result =
1164       CuboidConvolution(input, kernel, stride, stride, stride, PADDING_SAME);
1165 
1166   EXPECT_EQ(result.dimension(0), kern_filters);
1167   EXPECT_EQ(result.dimension(1), out_depth);
1168   EXPECT_EQ(result.dimension(2), out_height);
1169   EXPECT_EQ(result.dimension(3), out_width);
1170 
1171   const int pad_p = (out_depth - 1) * stride - in_depth + kern_depth;
1172   const int pad_r = (out_height - 1) * stride - in_rows + kern_height;
1173   const int pad_c = (out_width - 1) * stride - in_cols + kern_width;
1174 
1175   // Number of pixels the input is extended with at the lower end in every
1176   // dimension.
1177   const int dp = pad_p / 2;
1178   const int dr = pad_r / 2;
1179   const int dc = pad_c / 2;
1180 
1181   for (int od = 0; od < kern_filters; ++od) {
1182     for (int i = 0; i < out_depth; ++i) {
1183       for (int j = 0; j < out_height; ++j) {
1184         for (int k = 0; k < out_width; ++k) {
1185           float expected = 0.0f;
1186           for (int c = 0; c < kern_width; ++c) {
1187             for (int r = 0; r < kern_height; ++r) {
1188               for (int p = 0; p < kern_depth; ++p) {
1189                 for (int id = 0; id < in_channels; ++id) {
1190                   const int in_p = p - dp + i * stride;
1191                   const int in_r = r - dr + j * stride;
1192                   const int in_c = c - dc + k * stride;
1193                   if (in_p >= 0 && in_r >= 0 && in_c >= 0 && in_p < in_depth &&
1194                       in_r < in_rows && in_c < in_cols) {
1195                     expected +=
1196                         input(id, in_p, in_r, in_c) * kernel(od, id, p, r, c);
1197                   }
1198                 }
1199               }
1200             }
1201           }
1202           EigenApprox(result(od, i, j, k), expected);
1203         }
1204       }
1205     }
1206   }
1207 }
1208 
TEST(EigenSpatialConvolutionsTest,StridedSameCuboidRowMajor)1209 TEST(EigenSpatialConvolutionsTest, StridedSameCuboidRowMajor) {
1210   const int in_channels = 10;
1211   const int in_depth = 8;
1212   const int in_rows = 7;
1213   const int in_cols = 5;
1214 
1215   const int kern_filters = 7;
1216   const int kern_depth = 3;
1217   const int kern_width = 3;
1218   const int kern_height = 3;
1219 
1220   const int stride = 2;
1221   const int out_depth = ceil_div(in_depth, stride);
1222   const int out_height = ceil_div(in_rows, stride);
1223   const int out_width = ceil_div(in_cols, stride);
1224 
1225   Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels);
1226   Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
1227                                     in_channels, kern_filters);
1228   Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth,
1229                                     kern_filters);
1230   input = input.constant(11.0f) + input.random();
1231   kernel = kernel.constant(2.0f) + kernel.random();
1232   result.setRandom();
1233 
1234   result =
1235       CuboidConvolution(input, kernel, stride, stride, stride, PADDING_SAME);
1236 
1237   EXPECT_EQ(result.dimension(3), kern_filters);
1238   EXPECT_EQ(result.dimension(2), out_depth);
1239   EXPECT_EQ(result.dimension(1), out_height);
1240   EXPECT_EQ(result.dimension(0), out_width);
1241 
1242   const int pad_p = (out_depth - 1) * stride - in_depth + kern_depth;
1243   const int pad_r = (out_height - 1) * stride - in_rows + kern_height;
1244   const int pad_c = (out_width - 1) * stride - in_cols + kern_width;
1245 
1246   // Number of pixels the input is extended with at the lower end in every
1247   // dimension.
1248   const int dp = pad_p / 2;
1249   const int dr = pad_r / 2;
1250   const int dc = pad_c / 2;
1251 
1252   for (int od = 0; od < kern_filters; ++od) {
1253     for (int i = 0; i < out_depth; ++i) {
1254       for (int j = 0; j < out_height; ++j) {
1255         for (int k = 0; k < out_width; ++k) {
1256           float expected = 0.0f;
1257           for (int c = 0; c < kern_width; ++c) {
1258             for (int r = 0; r < kern_height; ++r) {
1259               for (int p = 0; p < kern_depth; ++p) {
1260                 for (int id = 0; id < in_channels; ++id) {
1261                   const int in_p = p - dp + i * stride;
1262                   const int in_r = r - dr + j * stride;
1263                   const int in_c = c - dc + k * stride;
1264                   if (in_p >= 0 && in_r >= 0 && in_c >= 0 && in_p < in_depth &&
1265                       in_r < in_rows && in_c < in_cols) {
1266                     expected +=
1267                         input(in_c, in_r, in_p, id) * kernel(c, r, p, id, od);
1268                   }
1269                 }
1270               }
1271             }
1272           }
1273           EigenApprox(result(k, j, i, od), expected);
1274         }
1275       }
1276     }
1277   }
1278 }
1279 
1280 // A test case discovered when testing backward spatial convolution where the
1281 // special tensor contraction mapper for spatial convolution contains a bug.
TEST(EigenSpatialConvolutionsTest,SpatialConvContractionMapper)1282 TEST(EigenSpatialConvolutionsTest, SpatialConvContractionMapper) {
1283   // We have a 3x4 input image with 2x2 patch and stride of 2.
1284   // The output has size 1x2.
1285   typedef Tensor<float, 1>::DimensionPair DimPair;
1286   Tensor<float, 4> out(1, 1, 2, 1);
1287   Tensor<float, 4> kern(1, 1, 2, 2);
1288   for (int i = 0; i < kern.size(); ++i) {
1289     kern.coeffRef(i) = static_cast<float>(i) + 1;
1290   }
1291   for (int i = 0; i < out.size(); ++i) {
1292     out.coeffRef(i) = static_cast<float>(i) + 1;
1293   }
1294 
1295   DSizes<ptrdiff_t, 4> strides;
1296   strides[0] = 1;
1297   strides[1] = 2;
1298   strides[2] = 2;
1299   strides[3] = 1;
1300 
1301   array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings;
1302   paddings[0] = std::make_pair(0, 0);
1303   paddings[1] = std::make_pair(1, 2);
1304   paddings[2] = std::make_pair(1, 1);
1305   paddings[3] = std::make_pair(0, 0);
1306 
1307   DSizes<ptrdiff_t, 3> out_dim;
1308   out_dim[0] = 1;
1309   out_dim[1] = 4;
1310   out_dim[2] = 12;
1311 
1312   array<bool, 4> kernel_reverse;
1313   kernel_reverse[0] = false;
1314   kernel_reverse[1] = false;
1315   kernel_reverse[2] = true;
1316   kernel_reverse[3] = true;
1317 
1318   DSizes<ptrdiff_t, 3> k_dims;
1319   k_dims[0] = 1;
1320   k_dims[1] = 1;
1321   k_dims[2] = 4;
1322 
1323   array<DimPair, 2> contract_dims;
1324   contract_dims[0] = DimPair(0, 0);
1325   contract_dims[1] = DimPair(2, 1);
1326 
1327   DSizes<ptrdiff_t, 4> in_dim;
1328   in_dim[0] = 1;
1329   in_dim[1] = 3;
1330   in_dim[2] = 4;
1331   in_dim[3] = 1;
1332 
1333   DSizes<ptrdiff_t, 2> in_dbg_dim;
1334   in_dbg_dim[0] = 3;
1335   in_dbg_dim[1] = 4;
1336 
1337   DSizes<ptrdiff_t, 2> out_dbg_dim;
1338   out_dbg_dim[0] = 4;
1339   out_dbg_dim[1] = 12;
1340 
1341   // This is the formula for computing the backward prop for input with a
1342   // spatial convolution.
1343   Tensor<float, 4> direct =
1344       kern.reverse(kernel_reverse)
1345           .reshape(k_dims)
1346           .contract(
1347               out.extract_image_patches(2, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 0)
1348                   .reshape(out_dim),
1349               contract_dims)
1350           .reshape(in_dim);
1351 
1352   Tensor<float, 4> indirect =
1353       kern.reverse(kernel_reverse)
1354           .reshape(k_dims)
1355           .contract(
1356               out.inflate(strides)
1357                   .pad(paddings)
1358                   .extract_image_patches(2, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
1359                   .reshape(out_dim),
1360               contract_dims)
1361           .reshape(in_dim);
1362 
1363   eigen_assert(dimensions_match(direct.dimensions(), indirect.dimensions()));
1364   for (size_t i = 0; i < direct.dimensions().TotalSize(); ++i) {
1365     EigenApprox(direct.data()[i], indirect.data()[i]);
1366   }
1367   EigenApprox(1.0f, direct(0, 0, 0, 0));
1368   EigenApprox(3.0f, direct(0, 0, 1, 0));
1369   EigenApprox(2.0f, direct(0, 0, 2, 0));
1370   EigenApprox(6.0f, direct(0, 0, 3, 0));
1371 
1372   EigenApprox(2.0f, direct(0, 1, 0, 0));
1373   EigenApprox(4.0f, direct(0, 1, 1, 0));
1374   EigenApprox(4.0f, direct(0, 1, 2, 0));
1375   EigenApprox(8.0f, direct(0, 1, 3, 0));
1376 }
1377 
PackRhsHelper(int iters,int input_batches,int input_cols,int input_rows,int input_depth,int filter_count,int filter_cols,int filter_rows,int col_strides,int row_strides,Index block_rows,Index block_cols)1378 static void PackRhsHelper(int iters,
1379                           /* Input dimensions: */
1380                           int input_batches, int input_cols, int input_rows,
1381                           int input_depth,
1382                           /* Filter (kernel) dimensions: */
1383                           int filter_count, int filter_cols, int filter_rows,
1384                           /* Input strides: */
1385                           int col_strides, int row_strides,
1386                           /* Block dimensions: */
1387                           Index block_rows, Index block_cols) {
1388   // Set random seed for benchmark repeatability.
1389   srand(12345);
1390 
1391   tensorflow::testing::UseRealTime();
1392   tensorflow::testing::StopTiming();
1393 
1394   using Dimensions = Eigen::DSizes<Eigen::Index, 4>;
1395 
1396   // Default Eigen::Tensor layout is column major, so we configure dimensions
1397   // starting from the inner most (channels aka depth in this case).
1398   Dimensions input_dims(input_depth, input_rows, input_cols, input_batches);
1399 
1400   using Traits = typename Eigen::internal::gebp_traits<float, float>;
1401   static const int packet_size = Eigen::internal::packet_traits<float>::size;
1402 
1403   // Reshape dimensions.
1404   using NewDimension = Eigen::DSizes<Index, 2>;
1405 
1406   // Contraction dimensions.
1407   using nocontract_t = Eigen::array<Eigen::Index, 1>;
1408   using contract_t = Eigen::array<Eigen::Index, 1>;
1409 
1410   // Input to the TensorImagePatchOp. It is the tensorflow TTypes<float>::Tensor
1411   // with ColMajor layout, instead of RowMajor. But that doesn't make any
1412   // difference, because TensorContraction swaps LHS with RHS for row major
1413   // inputs, and contraction mapper always works with column major data.
1414   using ArgType = TensorMap<Tensor<float, 4>, Eigen::Aligned>;
1415 
1416   using Evaluator = TensorEvaluator<
1417       const TensorReshapingOp<
1418           NewDimension, const TensorImagePatchOp<Dynamic, Dynamic, ArgType>>,
1419       Eigen::DefaultDevice>;
1420 
1421   using InputMapper = Eigen::internal::TensorContractionInputMapper<
1422       float, Index, Eigen::internal::Rhs, Evaluator,  //
1423       nocontract_t, contract_t,                       //
1424       packet_size,                                    //
1425       /*inner_dim_contiguous*/ true,                  //
1426       /*inner_dim_reordered*/ false,                  //
1427       /*Alignment*/ 0>;
1428 
1429   using SubMapper = Eigen::internal::TensorContractionSubMapper<
1430       float, Index, Eigen::internal::Rhs, Evaluator,  //
1431       nocontract_t, contract_t,                       //
1432       packet_size,                                    //
1433       /*inner_dim_contiguous*/ true,                  //
1434       /*inner_dim_reordered*/ false,                  //
1435       /*Alignment*/ 0>;
1436 
1437 #if defined(TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL)
1438   using PackRhsImpl =
1439       Eigen::internal::gemm_pack_colmajor_block<float, Eigen::Index, SubMapper,
1440                                                 ColMajor>;
1441 #else
1442   using PackRhsImpl =
1443       Eigen::internal::gemm_pack_rhs<float, Eigen::Index, SubMapper,  //
1444                                      Traits::nr,                      //
1445                                      ColMajor,                        //
1446                                      /*Conjugate*/ false,             //
1447                                      /*PanelMode*/ false>;
1448 #endif
1449 
1450   Eigen::DefaultDevice device;
1451 
1452   // Actual contract dimensions are not important.
1453   const Eigen::Index not_important = -1234;
1454   nocontract_t nocontract_dim = {not_important};
1455   contract_t contract_dim = {not_important};
1456 
1457   // We use tensor of the same dimensions to store packed data.
1458   Tensor<float, 4> packed(input_dims);
1459 
1460   // We generate multiple input tensors, around 512mb in total size to measure
1461   // realistic workload when input data in not in L1-L3 cache.
1462   size_t input_bytes = input_dims.TotalSize() * sizeof(float);
1463   size_t mem_size_bytes = 1024 * 1024 * 512;
1464   size_t num_inputs =
1465       std::max(static_cast<size_t>(1), mem_size_bytes / input_bytes);
1466 
1467   std::vector<Tensor<float, 4>> inputs;
1468   std::vector<Evaluator> evaluators;
1469   std::vector<InputMapper> input_mappers;
1470 
1471   for (int i = 0; i < num_inputs; ++i) {
1472     inputs.emplace_back(input_dims);
1473     inputs[i].setRandom();
1474 
1475     ArgType tensor_map(inputs[i].data(), input_dims);
1476 
1477     // 1. Extract image patches from input tensor. All strides are `1`.
1478     const auto image_patch_op = TensorImagePatchOp<Dynamic, Dynamic, ArgType>(
1479         tensor_map,                                            //
1480         filter_rows, filter_cols,                              //
1481         row_strides, col_strides,                              //
1482         /*in_row_strides=*/1, /*in_col_strides=*/1,            //
1483         /*row_inflate_strides=*/1, /*col_inflate_strides=*/1,  //
1484         Eigen::PADDING_SAME, /*padding_value=*/0.0);
1485 
1486     // 2. Reshape extracted patches into "virtual" 2d tensor.
1487     // NOTE: This is valid for PADDING_SAME only.
1488     Index output_rows = input_rows / row_strides;
1489     Index output_cols = input_cols / col_strides;
1490     NewDimension reshape_dims;
1491     reshape_dims[0] = input_depth * filter_rows * filter_cols;    // patch size
1492     reshape_dims[1] = output_rows * output_cols * input_batches;  // num_patches
1493 
1494     const auto reshape_op =
1495         TensorReshapingOp<NewDimension, decltype(image_patch_op)>(
1496             image_patch_op, reshape_dims);
1497 
1498     evaluators.emplace_back(reshape_op, device);
1499 
1500     input_mappers.emplace_back(evaluators[i], nocontract_dim, nocontract_dim,
1501                                contract_dim, contract_dim);
1502   }
1503 
1504   // We read properties of extracted image patches directly from evaluator.
1505   const Index patch_depth = evaluators[0].impl().dimensions()[0];
1506   const Index patch_rows = evaluators[0].impl().dimensions()[1];
1507   const Index patch_cols = evaluators[0].impl().dimensions()[2];
1508 
1509   // Number of patches is the same as the maximum column available through the
1510   // InputMapper (SubMapper).
1511   const Index num_patches = evaluators[0].impl().dimensions()[3];
1512 
1513   // The size of a single patch, it's the same as the maximum depth available
1514   // through the InputMapper (SubMapper).
1515   const Index patch_size = patch_depth * patch_rows * patch_cols;
1516 
1517   PackRhsImpl pack_rhs;
1518 
1519   const Index packed_total_size = input_dims.TotalSize();
1520 
1521   tensorflow::testing::StartTiming();
1522   for (int i = 0; i < iters; ++i) {
1523     int input_idx =
1524         num_inputs == 1 ? 1 : internal::random<int>(0, num_inputs - 1);
1525 
1526     // Depth offset must be a multiple of 8 (float packet size with AVX2).
1527     Index depth_offset =
1528         (patch_size > block_rows)
1529             ? (internal::random<Index>(0, patch_size - 10) / 8) * 8
1530             : 0;
1531     Index col_offset = internal::random<Index>(0, num_patches - 10);
1532 
1533     Index depth = std::min(block_rows, patch_size - depth_offset);
1534     Index cols = std::min(block_cols, num_patches - col_offset);
1535 
1536     // Write packed data to random memory location to emulate cold caches.
1537     Index packed_size = depth * cols;
1538     Index packed_offset =
1539         internal::random<Index>(0, packed_total_size - packed_size - 1);
1540 
1541     SubMapper sub_mapper =
1542         input_mappers[input_idx].getSubMapper(depth_offset, col_offset);
1543     pack_rhs(packed.data() + packed_offset, sub_mapper, depth, cols);
1544   }
1545   tensorflow::testing::StopTiming();
1546   tensorflow::testing::SetLabel(
1547       absl::StrCat("patch: ", patch_rows, "x", patch_cols, " D", patch_depth,
1548                    "; num_patches=", num_patches, " patch_size=", patch_size,
1549                    " num_inputs=", num_inputs));
1550 }
1551 
PackLhsHelper(int iters,int input_depth,int filter_count,int filter_cols,int filter_rows,Index block_rows,Index block_cols)1552 static void PackLhsHelper(int iters,
1553                           /* Input dimensions: */
1554                           int input_depth,
1555                           /* Filter (kernel) dimensions: */
1556                           int filter_count, int filter_cols, int filter_rows,
1557                           /* Block dimensions: */
1558                           Index block_rows, Index block_cols) {
1559   // Set random seed for benchmark repeatability.
1560   srand(12345);
1561 
1562   eigen_assert(block_rows <= filter_count);
1563   eigen_assert(block_cols <= input_depth * filter_rows * filter_cols);
1564 
1565   tensorflow::testing::UseRealTime();
1566   tensorflow::testing::StopTiming();
1567 
1568   using Dimensions = Eigen::DSizes<Eigen::Index, 4>;
1569 
1570   // Default Eigen::Tensor layout is column major, so we configure dimensions
1571   // starting from the inner most (`filter count` aka `kernel filers`).
1572   Dimensions filter_dims(filter_count, filter_rows, filter_cols, input_depth);
1573 
1574   static const int packet_size = Eigen::internal::packet_traits<float>::size;
1575 
1576   // We are going to reshape filter into 2D tensor.
1577   using NewDimension = Eigen::DSizes<Index, 2>;
1578 
1579   // Contraction dimensions.
1580   using nocontract_t = Eigen::array<Eigen::Index, 1>;
1581   using contract_t = Eigen::array<Eigen::Index, 1>;
1582 
1583   // Input to the ReshapeOp. It is the tensorflow TTypes<float>::Tensor
1584   // with ColMajor layout, instead of RowMajor. But that doesn't make any
1585   // difference, because TensorContraction swaps LHS with RHS for row major
1586   // inputs, and contraction mapper always works with column major data.
1587   using ArgType = TensorMap<Tensor<float, 4>, Eigen::Aligned>;
1588 
1589   using Evaluator =
1590       TensorEvaluator<const TensorReshapingOp<NewDimension, ArgType>,
1591                       Eigen::DefaultDevice>;
1592 
1593   using InputMapper = Eigen::internal::TensorContractionInputMapper<
1594       float, Index, Eigen::internal::Lhs, Evaluator,  //
1595       nocontract_t, contract_t,                       //
1596       packet_size,                                    //
1597       /*inner_dim_contiguous*/ true,                  //
1598       /*inner_dim_reordered*/ false,                  //
1599       /*Alignment*/ 0>;
1600 
1601   using SubMapper = Eigen::internal::TensorContractionSubMapper<
1602       float, Index, Eigen::internal::Lhs, Evaluator,  //
1603       nocontract_t, contract_t,                       //
1604       packet_size,                                    //
1605       /*inner_dim_contiguous*/ true,                  //
1606       /*inner_dim_reordered*/ false,                  //
1607       /*Alignment*/ 0>;
1608 
1609 #if defined(TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL)
1610   using PackLhsImpl =
1611       Eigen::internal::gemm_pack_colmajor_block<float, Eigen::Index, SubMapper,
1612                                                 ColMajor>;
1613 #else
1614   using Traits = typename Eigen::internal::gebp_traits<float, float>;
1615   using PackLhsImpl =
1616       Eigen::internal::gemm_pack_lhs<float, Eigen::Index, SubMapper,      //
1617                                      Traits::mr,                          //
1618                                      Traits::LhsProgress,                 //
1619                                      typename Traits::LhsPacket4Packing,  //
1620                                      ColMajor>;
1621 #endif
1622 
1623   Eigen::DefaultDevice device;
1624 
1625   // We will reshape kernel into 2D tensor.
1626   NewDimension reshape_dims;
1627   reshape_dims[0] = filter_count;
1628   reshape_dims[1] = input_depth * filter_rows * filter_cols;
1629 
1630   // We are going to contract along the 'in_depth * filter_rows * filter_cols`.
1631   nocontract_t nocontract_dim = {0};
1632   contract_t contract_dim = {1};
1633 
1634   // These values computed using the algorithm in TensorContraction.h, with
1635   // 'nocontract_dim' and 'contract_dim' values specified above.
1636   nocontract_t nocontract_strides = {1};
1637   contract_t contract_strides = {filter_count};
1638   nocontract_t i_strides = {1};
1639   contract_t k_strides = {1};
1640 
1641   // We use tensor of the same dimensions to store packed data.
1642   Tensor<float, 4> packed(filter_dims);
1643 
1644   // We generate multiple filter tensors, around 512mb in total size to measure
1645   // realistic workload when input data in not in L1-L3 cache.
1646   size_t input_bytes = filter_dims.TotalSize() * sizeof(float);
1647   size_t mem_size_bytes = 1024 * 1024 * 512;
1648   size_t num_filters =
1649       std::max(static_cast<size_t>(1), mem_size_bytes / input_bytes);
1650 
1651   std::vector<Tensor<float, 4>> filters;
1652   std::vector<Evaluator> evaluators;
1653   std::vector<InputMapper> input_mappers;
1654 
1655   for (int i = 0; i < num_filters; ++i) {
1656     filters.emplace_back(filter_dims);
1657     filters[i].setRandom();
1658 
1659     ArgType tensor_map(filters[i].data(), filter_dims);
1660 
1661     const auto reshape_op =
1662         TensorReshapingOp<NewDimension, ArgType>(tensor_map, reshape_dims);
1663 
1664     evaluators.emplace_back(reshape_op, device);
1665 
1666     input_mappers.emplace_back(evaluators[i], nocontract_strides, i_strides,
1667                                contract_strides, k_strides);
1668   }
1669 
1670   PackLhsImpl pack_lhs;
1671 
1672   const Index packed_total_size = filter_dims.TotalSize();
1673 
1674   // Round up row/col/memory offsets to make them multiple of packet size.
1675   const auto round_up = [](const Index idx) {
1676     return (idx / packet_size) * packet_size;
1677   };
1678 
1679   // Block rows is in the [0, filter_count) range.
1680   // Block cols is in the [0, filter_rows * filter_cols * input_depth) range.
1681 
1682   const Index max_row = filter_count;
1683   const Index max_col = filter_rows * filter_cols * input_depth;
1684 
1685   tensorflow::testing::StartTiming();
1686   for (int i = 0; i < iters; ++i) {
1687     int filter_idx =
1688         num_filters == 1 ? 1 : internal::random<int>(0, num_filters - 1);
1689 
1690     Index row_offset = round_up(internal::random<Index>(0, max_row - 10));
1691     Index col_offset = round_up(internal::random<Index>(0, max_col - 10));
1692 
1693     Index rows = std::min(block_rows, max_row - row_offset);
1694     Index cols = std::min(block_cols, max_col - col_offset);
1695 
1696     // Write packed data to random memory location to emulate cold caches.
1697     Index packed_offset = round_up(
1698         internal::random<Index>(0, packed_total_size - rows * cols - 1));
1699 
1700     SubMapper sub_mapper =
1701         input_mappers[filter_idx].getSubMapper(row_offset, col_offset);
1702 
1703 // NOTE: Eigen gemm_pack_lhs accepts contraction depth (k-th dimension) as a
1704 // first argument (aka block cols). MKL-DNN pack is generic for lhs and rhs
1705 // and accepts block rows and cols in the same order for lhs and rhs.
1706 #if defined(TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL)
1707     pack_lhs(packed.data() + packed_offset, sub_mapper, rows, cols);
1708 #else
1709     pack_lhs(packed.data() + packed_offset, sub_mapper, cols, rows);
1710 #endif
1711   }
1712   tensorflow::testing::StopTiming();
1713   tensorflow::testing::SetLabel(absl::StrCat(
1714       "filter: count=", filter_count, " dims=", filter_rows, "x", filter_cols,
1715       "; input: depth=", input_depth, "; num_filers=", num_filters));
1716 }
1717 
1718 // -------------------------------------------------------------------------- //
1719 // Pack RHS
1720 //
1721 // Macro argument names:
1722 //    N: batch size
1723 //    H: height
1724 //    W: width
1725 //    C: input channels
1726 //   FC: filter channels
1727 //   FH: filter height
1728 //   FW: filter width
1729 //   SH: stride in height dimensions
1730 //   SW: stride in width dimensions
1731 //   BR: block rows
1732 //   BC: block cols
1733 
1734 #define BM_CONCAT(a, b) a##b
1735 
1736 #define BM_RHS_NAME(prefix, N, H, W, C, FC, FH, FW, SH, SW, BR, BC)       \
1737   BM_CONCAT(BM_##prefix##_##N##_##H##x##W##_IC##C##_FC##FC##_##FH##x##FW, \
1738             _s##SH##x##SW##_B##BR##x##BC)
1739 
1740 #define BM_PackRhs(N, H, W, C, FC, FH, FW, SH, SW, BR, BC)             \
1741   static void BM_RHS_NAME(PackRhs, N, H, W, C, FC, FH, FW, SH, SW, BR, \
1742                           BC)(int iters) {                             \
1743     PackRhsHelper(iters, N, H, W, C, FC, FH, FW, SH, SW, BR, BC);      \
1744   }                                                                    \
1745   BENCHMARK(BM_RHS_NAME(PackRhs, N, H, W, C, FC, FH, FW, SH, SW, BR, BC))
1746 
1747 // Number of input channel (input depth) it equal to the number of patch
1748 // channels (patch depth).
1749 
1750 // NOTE: This is the most common case in Tensorflow models.
1751 // Fast path: input channel dimension is the multiple of the packet size.
1752 BM_PackRhs(/*batch*/ 32,        //
1753            /*image*/ 64, 64,    //
1754            /*channels*/ 32,     //
1755            /*num_filters*/ 64,  //
1756            /*filter*/ 5, 5,     //
1757            /*stride*/ 1, 1,     //
1758            /*block*/ 256, 56);
1759 
1760 BM_PackRhs(/*batch*/ 32,        //
1761            /*image*/ 64, 64,    //
1762            /*channels*/ 32,     //
1763            /*num_filters*/ 64,  //
1764            /*filter*/ 5, 5,     //
1765            /*stride*/ 2, 2,     //
1766            /*block*/ 256, 56);
1767 
1768 // Slow path: input channel dimension is not the multiple of the packet size.
1769 BM_PackRhs(/*batch*/ 32,        //
1770            /*image*/ 64, 64,    //
1771            /*channels*/ 30,     //
1772            /*num_filters*/ 64,  //
1773            /*filter*/ 5, 5,     //
1774            /*stride*/ 1, 1,     //
1775            /*block*/ 256, 56);
1776 
1777 BM_PackRhs(/*batch*/ 32,        //
1778            /*image*/ 64, 64,    //
1779            /*channels*/ 30,     //
1780            /*num_filters*/ 64,  //
1781            /*filter*/ 5, 5,     //
1782            /*stride*/ 2, 2,     //
1783            /*block*/ 256, 56);
1784 
1785 // Slow path with input channel dimension smaller than the packet size.
1786 BM_PackRhs(/*batch*/ 32,        //
1787            /*image*/ 256, 256,  //
1788            /*channels*/ 4,      //
1789            /*num_filters*/ 16,  //
1790            /*filter*/ 8, 8,     //
1791            /*stride*/ 1, 1,     //
1792            /*block*/ 256, 56);
1793 
1794 BM_PackRhs(/*batch*/ 32,        //
1795            /*image*/ 256, 256,  //
1796            /*channels*/ 4,      //
1797            /*num_filters*/ 16,  //
1798            /*filter*/ 8, 8,     //
1799            /*stride*/ 2, 4,     //
1800            /*block*/ 256, 56);
1801 
1802 // Short and wide block with small input channel dimension.
1803 BM_PackRhs(/*batch*/ 32,        //
1804            /*image*/ 64, 64,    //
1805            /*channels*/ 4,      //
1806            /*num_filters*/ 16,  //
1807            /*filter*/ 3, 3,     //
1808            /*stride*/ 1, 1,     //
1809            /*block*/ 36, 432);
1810 
1811 BM_PackRhs(/*batch*/ 32,        //
1812            /*image*/ 64, 64,    //
1813            /*channels*/ 4,      //
1814            /*num_filters*/ 16,  //
1815            /*filter*/ 3, 3,     //
1816            /*stride*/ 2, 2,     //
1817            /*block*/ 36, 432);
1818 
1819 // -------------------------------------------------------------------------- //
1820 // Pack LHS
1821 //
1822 // Macro argument names:
1823 //    C: input channels
1824 //   FC: filter channels
1825 //   FH: filter height
1826 //   FW: filter width
1827 //   BR: block rows
1828 //   BC: block cols
1829 
1830 #define BM_LHS_NAME(prefix, C, FC, FH, FW, BR, BC) \
1831   BM_CONCAT(BM_##prefix##_##C##_FC##FC##_##FH##x##FW, _B##BR##x##BC)
1832 
1833 #define BM_PackLhs(C, FC, FH, FW, BR, BC)                              \
1834   static void BM_LHS_NAME(PackLhs, C, FC, FH, FW, BR, BC)(int iters) { \
1835     PackLhsHelper(iters, C, FC, FH, FW, BR, BC);                       \
1836   }                                                                    \
1837   BENCHMARK(BM_LHS_NAME(PackLhs, C, FC, FH, FW, BR, BC))
1838 
1839 // Number of input channel (input depth) it equal to the number of patch
1840 // channels (patch depth).
1841 
1842 BM_PackLhs(/*input channels*/ 128,    //
1843            /*filter channels*/ 1024,  //
1844            /*filter dims*/ 3, 3,      //
1845            /*block*/ 256, 56);
1846 
1847 BM_PackLhs(/*input channels*/ 128,    //
1848            /*filter channels*/ 1024,  //
1849            /*filter dims*/ 3, 3,      //
1850            /*block*/ 56, 256);
1851 }  // namespace Eigen
1852