1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_REFERENCE_UTIL_H_
17 #define TENSORFLOW_COMPILER_XLA_REFERENCE_UTIL_H_
18 
19 #include <array>
20 #include <functional>
21 #include <memory>
22 #include <utility>
23 #include <vector>
24 
25 #include "absl/memory/memory.h"
26 #include "absl/types/span.h"
27 #include "tensorflow/compiler/xla/array2d.h"
28 #include "tensorflow/compiler/xla/array3d.h"
29 #include "tensorflow/compiler/xla/array4d.h"
30 #include "tensorflow/compiler/xla/client/padding.h"
31 #include "tensorflow/compiler/xla/service/hlo_evaluator.h"
32 #include "tensorflow/compiler/xla/util.h"
33 #include "tensorflow/compiler/xla/xla_data.pb.h"
34 #include "tensorflow/core/platform/macros.h"
35 #include "tensorflow/core/platform/types.h"
36 
37 namespace xla {
38 
39 // Utility class for reference implementations of linear algebra routines.
40 class ReferenceUtil {
41  public:
42   // Returns the result of a transpose operation on the input matrix.
43   template <typename T>
TransposeArray2D(const Array2D<T> & operand)44   static std::unique_ptr<Array2D<T>> TransposeArray2D(
45       const Array2D<T>& operand) {
46     auto result =
47         absl::make_unique<Array2D<T>>(operand.width(), operand.height());
48     for (int64 w = 0; w < operand.width(); ++w) {
49       for (int64 h = 0; h < operand.height(); ++h) {
50         (*result)(w, h) = operand(h, w);
51       }
52     }
53 
54     return result;
55   }
56 
57   // Returns the result of a matrix multiply `lhs x rhs`.
58   template <typename T>
MatmulArray2D(const Array2D<T> & lhs,const Array2D<T> & rhs)59   static std::unique_ptr<Array2D<T>> MatmulArray2D(const Array2D<T>& lhs,
60                                                    const Array2D<T>& rhs) {
61     return HloEvaluator::MatmulArray2D(lhs, rhs);
62   }
63 
64   // Converts the input operand to use f64 values instead of f32 values.
65   static std::unique_ptr<Array2D<double>> Array2DF32ToF64(
66       const Array2D<float>& input);
67 
68   // Returns the result of a convolution `lhs <conv> rhs`, with the default
69   // convolution dimension numbers returned from
70   // ComputationBuilder::CreateDefaultConvDimensionNumbers().
71   static std::unique_ptr<Array4D<float>> ConvArray4D(
72       const Array4D<float>& lhs, const Array4D<float>& rhs,
73       std::pair<int64, int64> kernel_stride, Padding padding);
74 
75   // Returns the result of a convolution `lhs <conv> rhs`, with the given
76   // convolution dimension numbers.
77   static std::unique_ptr<Array4D<float>> ConvArray4DGeneralDimensions(
78       const Array4D<float>& lhs, const Array4D<float>& rhs,
79       std::pair<int64, int64> kernel_stride, Padding padding,
80       ConvolutionDimensionNumbers dimension_numbers);
81 
82   // Returns the result of a convolution `lhs <conv> rhs`, with the given
83   // dilation factors.
84   static std::unique_ptr<Array4D<float>> ConvArray4DGeneralDimensionsDilated(
85       const Array4D<float>& lhs, const Array4D<float>& rhs,
86       std::pair<int64, int64> kernel_stride, Padding padding,
87       std::pair<int64, int64> lhs_dilation,
88       std::pair<int64, int64> rhs_dilation, ConvolutionDimensionNumbers dnums);
89 
90   // Returns the result of a convolution `lhs <conv> rhs`, with the default
91   // convolution dimension numbers returned from
92   // ComputationBuilder::CreateDefaultConvDimensionNumbers().
93   static std::unique_ptr<Array3D<float>> ConvArray3D(const Array3D<float>& lhs,
94                                                      const Array3D<float>& rhs,
95                                                      int64 kernel_stride,
96                                                      Padding padding);
97 
98   // Returns the result of a convolution `lhs <conv> rhs`.
99   static std::unique_ptr<Array3D<float>> ConvArray3DGeneralDimensionsDilated(
100       const Array3D<float>& lhs, const Array3D<float>& rhs, int64 kernel_stride,
101       Padding padding, int64 lhs_dilation, int64 rhs_dilation,
102       const ConvolutionDimensionNumbers& dnums);
103 
104   // Returns the result of a separable  convolution with the given parameters.
105   // kernel_stride and padding applies to the depthwise convolution during
106   // the separable convolution. pointwise_weights.depth() must be equal to
107   // input.depth() * depthwise_weights.planes().
108   static std::unique_ptr<Array4D<float>> SeparableConvArray4D(
109       const Array4D<float>& input, const Array4D<float>& depthwise_weights,
110       const Array4D<float>& pointwise_weights,
111       std::pair<int64, int64> kernel_stride, Padding padding);
112 
113   // Returns the result of reducing a matrix to a column vector. init is the
114   // initial value for the reduce operation, and reduce_function is the function
115   // to apply for each reduction step.
116   static std::unique_ptr<std::vector<float>> ReduceToColArray2D(
117       const Array2D<float>& matrix, float init,
118       const std::function<float(float, float)>& reduce_function);
119 
120   // Returns the result of reducing a matrix to a row vector. init is the
121   // initial value for the reduce operation, and reduce_function is the function
122   // to apply for each reduction step.
123   static std::unique_ptr<std::vector<float>> ReduceToRowArray2D(
124       const Array2D<float>& matrix, float init,
125       const std::function<float(float, float)>& reduce_function);
126 
127   // Performs a R2=>R1 reduction by reducing away the dimension specified in
128   // 'dimension_to_reduce'.
129   template <typename T>
ReduceR2ToR1(const Array2D<T> & input,int dimension_to_reduce,T init,const std::function<T (T,T)> & freduce)130   static std::vector<T> ReduceR2ToR1(const Array2D<T>& input,
131                                      int dimension_to_reduce, T init,
132                                      const std::function<T(T, T)>& freduce) {
133     std::vector<T> result(dimension_to_reduce == 0 ? input.n2() : input.n1(),
134                           init);
135     for (int i0 = 0; i0 < input.n1(); ++i0) {
136       for (int i1 = 0; i1 < input.n2(); ++i1) {
137         int output = dimension_to_reduce == 0 ? i1 : i0;
138         result[output] = freduce(result[output], input(i0, i1));
139       }
140     }
141     return result;
142   }
143 
144   // Returns the result of reducing the 4D array to a vector, reducing away
145   // the dimensions specified in dims.
146   static std::vector<float> Reduce4DTo1D(
147       const Array4D<float>& array, float init, absl::Span<const int64> dims,
148       const std::function<float(float, float)>& reduce_function);
149 
150   // Broadcast 1D dimension to 4D, from the dimension `broadcast_from_dim`.
151   static std::unique_ptr<Array4D<float>> Broadcast1DTo4D(
152       const std::vector<float>& array, const std::vector<int64>& bounds,
153       int64 broadcast_from_dim);
154 
155   // Returns the result of reducing the 3D array to a 2D array, reducing away
156   // the dimensions specified in dims.
157   static std::unique_ptr<Array2D<float>> Reduce3DTo2D(
158       const Array3D<float>& array, float init, absl::Span<const int64> dims,
159       const std::function<float(float, float)>& reduce_function);
160 
161   // Applies map_function to each element in the input (2D array) and returns
162   // the result.
163   static std::unique_ptr<Array2D<float>> MapArray2D(
164       const Array2D<float>& matrix,
165       const std::function<float(float)>& map_function);
166 
167   // Applies map_function to each pair of corresponding elements in the two
168   // inputs arrays and returns the result.
169   static std::unique_ptr<Array2D<float>> MapArray2D(
170       const Array2D<float>& lhs, const Array2D<float>& rhs,
171       const std::function<float(float, float)>& map_function);
172 
173   // Number of windows in a given dimension. Calculation taken from
174   // xla::MakePadding().
175   static int64 WindowCount(int64 unpadded_width, int64 window_len, int64 stride,
176                            Padding padding);
177 
178   // Windowed reductions with Add as the function to apply.
179   static std::unique_ptr<std::vector<float>> ReduceWindow1DAdd(
180       absl::Span<const float> operand, float init,
181       absl::Span<const int64> window, absl::Span<const int64> stride,
182       Padding padding);
183   static std::unique_ptr<Array3D<float>> ReduceWindow3DAdd(
184       const Array3D<float>& operand, float init, absl::Span<const int64> window,
185       absl::Span<const int64> stride, Padding padding);
186   static std::unique_ptr<Array4D<float>> ReduceWindow4DAdd(
187       const Array4D<float>& operand, float init, absl::Span<const int64> window,
188       absl::Span<const int64> stride, Padding padding);
189 
190   // Windowed reductions with a generic reduce function.
191   static std::unique_ptr<std::vector<float>> ReduceWindow1DGeneric(
192       absl::Span<const float> operand, float init,
193       const std::function<float(float, float)>& reduce_func,
194       absl::Span<const int64> window, absl::Span<const int64> stride,
195       absl::Span<const std::pair<int64, int64>> padding);
196   static std::unique_ptr<Array4D<float>> ReduceWindow4DGeneric(
197       const Array4D<float>& operand, float init,
198       const std::function<float(float, float)>& reduce_func,
199       absl::Span<const int64> window, absl::Span<const int64> stride,
200       Padding padding);
201   // With arbitrary padding.
202   static std::unique_ptr<Array4D<float>> ReduceWindow4DGeneric(
203       const Array4D<float>& operand, float init,
204       const std::function<float(float, float)>& reduce_func,
205       absl::Span<const int64> window, absl::Span<const int64> stride,
206       absl::Span<const std::pair<int64, int64>> padding);
207 
208   // Batch normalize data.
209   static std::unique_ptr<Array4D<float>> BatchNorm4D(
210       const Array4D<float>& input, const Array4D<float>& mean,
211       const Array4D<float>& var, const Array4D<float>& scale,
212       const Array4D<float>& offset, float epsilon);
213 
214   // Performs select and scatter with Greater Than or equal as the select, plus
215   // as the scatter, and Same Padding.
216   // TODO(b/74533103) Switch tests to evaluator and remove this implementation.
217   static std::unique_ptr<Array4D<float>> SelectAndScatter4DGePlus(
218       const Array4D<float>& operand, const Array4D<float>& source, float init,
219       absl::Span<const int64> window, absl::Span<const int64> stride,
220       bool same_padding);
221 
222   // Concatenates the lhs and rhs arrays along the concatenate_dimension.
223   // E.g. if concatenate_dimension is 0, the "n1"/height dimension is
224   // concatenated, so the arrays are stacked on top of each other.
225   template <typename T>
Concat2D(const Array2D<T> & lhs,const Array2D<T> & rhs,int concatenate_dimension)226   static std::unique_ptr<Array2D<T>> Concat2D(const Array2D<T>& lhs,
227                                               const Array2D<T>& rhs,
228                                               int concatenate_dimension) {
229     CHECK(0 <= concatenate_dimension && concatenate_dimension < 2);
230     auto result = absl::make_unique<Array2D<T>>(
231         concatenate_dimension == 0 ? lhs.n1() + rhs.n1() : lhs.n1(),
232         concatenate_dimension == 1 ? lhs.n2() + rhs.n2() : lhs.n2());
233     for (int64 i0 = 0; i0 < result->n1(); ++i0) {
234       for (int64 i1 = 0; i1 < result->n2(); ++i1) {
235         // If we exceed the bounds of the LHS, draw from the RHS, where the
236         // result index is adjusted by the number of values present in the LHS.
237         (*result)(i0, i1) = i0 < lhs.n1() && i1 < lhs.n2()
238                                 ? lhs(i0, i1)
239                                 : rhs(i0 >= lhs.n1() ? i0 - lhs.n1() : i0,
240                                       i1 >= lhs.n2() ? i1 - lhs.n2() : i1);
241       }
242     }
243     return result;
244   }
245 
246   // Concatenates the lhs and rhs 3D arrays along the concatenate_dimension. lhs
247   // and rhs must have the same dimensions except for the concatenate dimension.
248   template <typename T>
Concat3D(const Array3D<T> & lhs,const Array3D<T> & rhs,int concatenate_dimension)249   static std::unique_ptr<Array3D<T>> Concat3D(const Array3D<T>& lhs,
250                                               const Array3D<T>& rhs,
251                                               int concatenate_dimension) {
252     CHECK(0 <= concatenate_dimension && concatenate_dimension < 3);
253     const int64 lhs_dims[] = {lhs.n1(), lhs.n2(), lhs.n3()};
254     const int64 rhs_dims[] = {rhs.n1(), rhs.n2(), rhs.n3()};
255     int64 out_dims[] = {rhs.n1(), rhs.n2(), rhs.n3()};
256     for (int i = 0; i < 3; ++i) {
257       if (i != concatenate_dimension) {
258         out_dims[i] = lhs_dims[i];
259         CHECK_EQ(lhs_dims[i], rhs_dims[i]);
260       } else {
261         out_dims[i] = lhs_dims[i] + rhs_dims[i];
262       }
263     }
264     auto result =
265         absl::make_unique<Array3D<T>>(out_dims[0], out_dims[1], out_dims[2]);
266     for (int64 i0 = 0; i0 < result->n1(); ++i0) {
267       for (int64 i1 = 0; i1 < result->n2(); ++i1) {
268         for (int64 i2 = 0; i2 < result->n3(); ++i2) {
269           (*result)(i0, i1, i2) =
270               i0 < lhs.n1() && i1 < lhs.n2() && i2 < lhs.n3()
271                   ? lhs(i0, i1, i2)
272                   : rhs(i0 >= lhs.n1() ? i0 - lhs.n1() : i0,
273                         i1 >= lhs.n2() ? i1 - lhs.n2() : i1,
274                         i2 >= lhs.n3() ? i2 - lhs.n3() : i2);
275         }
276       }
277     }
278     return result;
279   }
280 
281   // Concatenates the lhs and rhs 4D arrays along the concatenate_dimension. lhs
282   // and rhs must have the same dimensions except for the concatenate dimension.
283   template <typename T>
Concat4D(const Array4D<T> & lhs,const Array4D<T> & rhs,int concatenate_dimension)284   static std::unique_ptr<Array4D<T>> Concat4D(const Array4D<T>& lhs,
285                                               const Array4D<T>& rhs,
286                                               int concatenate_dimension) {
287     CHECK(0 <= concatenate_dimension && concatenate_dimension < 4);
288     const int64 lhs_dims[] = {lhs.n1(), lhs.n2(), lhs.n3(), lhs.n4()};
289     const int64 rhs_dims[] = {rhs.n1(), rhs.n2(), rhs.n3(), rhs.n4()};
290     int64 out_dims[] = {rhs.n1(), rhs.n2(), rhs.n3(), rhs.n4()};
291     for (int i = 0; i < 4; ++i) {
292       if (i != concatenate_dimension) {
293         out_dims[i] = lhs_dims[i];
294         CHECK_EQ(lhs_dims[i], rhs_dims[i]);
295       } else {
296         out_dims[i] = lhs_dims[i] + rhs_dims[i];
297       }
298     }
299     auto result = absl::make_unique<Array4D<T>>(out_dims[0], out_dims[1],
300                                                 out_dims[2], out_dims[3]);
301     for (int64 i0 = 0; i0 < result->n1(); ++i0) {
302       for (int64 i1 = 0; i1 < result->n2(); ++i1) {
303         for (int64 i2 = 0; i2 < result->n3(); ++i2) {
304           for (int64 i3 = 0; i3 < result->n4(); ++i3) {
305             (*result)(i0, i1, i2, i3) =
306                 i0 < lhs.n1() && i1 < lhs.n2() && i2 < lhs.n3() && i3 < lhs.n4()
307                     ? lhs(i0, i1, i2, i3)
308                     : rhs(i0 >= lhs.n1() ? i0 - lhs.n1() : i0,
309                           i1 >= lhs.n2() ? i1 - lhs.n2() : i1,
310                           i2 >= lhs.n3() ? i2 - lhs.n3() : i2,
311                           i3 >= lhs.n4() ? i3 - lhs.n4() : i3);
312           }
313         }
314       }
315     }
316     return result;
317   }
318 
319   // Slices with index clamping
320   template <typename T>
ClampSlice1D(absl::Span<const T> input,int64 start,int64 size)321   static std::vector<T> ClampSlice1D(absl::Span<const T> input, int64 start,
322                                      int64 size) {
323     start = std::min<int64>(std::max<int64>(0, start), input.size() - size);
324     std::vector<T> result;
325     for (int64 i = 0; i < size; ++i) {
326       result.push_back(input[(start + i)]);
327     }
328     return result;
329   }
330 
331   // Slices the input array given starting indices, limit indices, and strides
332   // in each dimension.
333   template <typename T>
Slice2D(const Array2D<T> & input,std::array<int64,2> starts,std::array<int64,2> limits,std::array<int64,2> strides)334   static std::unique_ptr<Array2D<T>> Slice2D(const Array2D<T>& input,
335                                              std::array<int64, 2> starts,
336                                              std::array<int64, 2> limits,
337                                              std::array<int64, 2> strides) {
338     CHECK_LE(starts[0], input.n1());
339     CHECK_LE(starts[1], input.n2());
340     CHECK_LE(limits[0], input.n1());
341     CHECK_LE(limits[1], input.n2());
342     CHECK_GE(strides[0], 1);
343     CHECK_GE(strides[1], 1);
344     auto result = absl::make_unique<Array2D<T>>(
345         CeilOfRatio(limits[0] - starts[0], strides[0]),
346         CeilOfRatio(limits[1] - starts[1], strides[1]));
347     for (int64 i0 = 0; i0 < result->n1(); ++i0) {
348       for (int64 i1 = 0; i1 < result->n2(); ++i1) {
349         (*result)(i0, i1) =
350             input(starts[0] + i0 * strides[0], starts[1] + i1 * strides[1]);
351       }
352     }
353     return result;
354   }
355 
356   template <typename T>
Slice3D(const Array3D<T> & input,std::array<int64,3> starts,std::array<int64,3> limits,std::array<int64,3> strides)357   static std::unique_ptr<Array3D<T>> Slice3D(const Array3D<T>& input,
358                                              std::array<int64, 3> starts,
359                                              std::array<int64, 3> limits,
360                                              std::array<int64, 3> strides) {
361     CHECK_LE(starts[0], input.n1());
362     CHECK_LE(starts[1], input.n2());
363     CHECK_LE(starts[2], input.n3());
364     CHECK_LE(limits[0], input.n1());
365     CHECK_LE(limits[1], input.n2());
366     CHECK_LE(limits[2], input.n3());
367     CHECK_GE(strides[0], 1);
368     CHECK_GE(strides[1], 1);
369     CHECK_GE(strides[2], 1);
370     auto result = absl::make_unique<Array3D<T>>(
371         CeilOfRatio(limits[0] - starts[0], strides[0]),
372         CeilOfRatio(limits[1] - starts[1], strides[1]),
373         CeilOfRatio(limits[2] - starts[2], strides[2]));
374 
375     for (int64 i0 = 0; i0 < result->n1(); ++i0) {
376       for (int64 i1 = 0; i1 < result->n2(); ++i1) {
377         for (int64 i2 = 0; i2 < result->n3(); ++i2) {
378           (*result)(i0, i1, i2) =
379               input(starts[0] + i0 * strides[0], starts[1] + i1 * strides[1],
380                     starts[2] + i2 * strides[2]);
381         }
382       }
383     }
384     return result;
385   }
386 
387   template <typename T>
Slice4D(const Array4D<T> & input,std::array<int64,4> starts,std::array<int64,4> limits,std::array<int64,4> strides)388   static std::unique_ptr<Array4D<T>> Slice4D(const Array4D<T>& input,
389                                              std::array<int64, 4> starts,
390                                              std::array<int64, 4> limits,
391                                              std::array<int64, 4> strides) {
392     CHECK_LE(starts[0], input.n1());
393     CHECK_LE(starts[1], input.n2());
394     CHECK_LE(starts[2], input.n3());
395     CHECK_LE(starts[3], input.n4());
396     CHECK_LE(limits[0], input.n1());
397     CHECK_LE(limits[1], input.n2());
398     CHECK_LE(limits[2], input.n3());
399     CHECK_LE(limits[3], input.n4());
400     CHECK_GE(strides[0], 1);
401     CHECK_GE(strides[1], 1);
402     CHECK_GE(strides[2], 1);
403     CHECK_GE(strides[3], 1);
404     auto result = absl::make_unique<Array4D<T>>(
405         CeilOfRatio(limits[0] - starts[0], strides[0]),
406         CeilOfRatio(limits[1] - starts[1], strides[1]),
407         CeilOfRatio(limits[2] - starts[2], strides[2]),
408         CeilOfRatio(limits[3] - starts[3], strides[3]));
409     for (int64 i0 = 0; i0 < result->n1(); ++i0) {
410       for (int64 i1 = 0; i1 < result->n2(); ++i1) {
411         for (int64 i2 = 0; i2 < result->n3(); ++i2) {
412           for (int64 i3 = 0; i3 < result->n4(); ++i3) {
413             (*result)(i0, i1, i2, i3) =
414                 input(starts[0] + i0 * strides[0], starts[1] + i1 * strides[1],
415                       starts[2] + i2 * strides[2], starts[3] + i3 * strides[3]);
416           }
417         }
418       }
419     }
420     return result;
421   }
422 
423   // Applies map_function to each element in the input (2D array) and returns
424   // the result.
425   // (row, column) index of each element is also provided as arguments to
426   // map_function.
427   static std::unique_ptr<Array2D<float>> MapWithIndexArray2D(
428       const Array2D<float>& matrix,
429       const std::function<float(float, int64, int64)>& map_function);
430 
431   // Applies map_function to each element in the input (4D array) and returns
432   // the result.
433   template <typename F>
MapArray4D(const Array4D<float> & input,F && map_function)434   static std::unique_ptr<Array4D<float>> MapArray4D(const Array4D<float>& input,
435                                                     F&& map_function) {
436     return MapWithIndexArray4D(input,
437                                [&](float value, int64, int64, int64, int64) {
438                                  return map_function(value);
439                                });
440   }
441 
442   // Applies map_function to each element in the input (4D array) and returns
443   // the result.
444   // (plane, depth, height, width) index of each element is also provided as
445   // arguments to map_function.
446   template <typename F>
MapWithIndexArray4D(const Array4D<float> & input,F && map_function)447   static std::unique_ptr<Array4D<float>> MapWithIndexArray4D(
448       const Array4D<float>& input, F&& map_function) {
449     auto result = absl::make_unique<Array4D<float>>(
450         input.planes(), input.depth(), input.height(), input.width());
451     for (int64 plane = 0; plane < input.planes(); ++plane) {
452       for (int64 depth = 0; depth < input.depth(); ++depth) {
453         for (int64 height = 0; height < input.height(); ++height) {
454           for (int64 width = 0; width < input.width(); ++width) {
455             (*result)(plane, depth, height, width) =
456                 map_function(input(plane, depth, height, width), plane, depth,
457                              height, width);
458           }
459         }
460       }
461     }
462     return result;
463   }
464 
465   // Applies map_function to each pair of elements in the input lhs and rhs
466   // (4D array) and returns the result.
467   template <typename F>
MapArray4D(const Array4D<float> & lhs,const Array4D<float> & rhs,F && map_function)468   static std::unique_ptr<Array4D<float>> MapArray4D(const Array4D<float>& lhs,
469                                                     const Array4D<float>& rhs,
470                                                     F&& map_function) {
471     return MapWithIndexArray4D(
472         lhs, rhs, [&](float lhs, float rhs, int64, int64, int64, int64) {
473           return map_function(lhs, rhs);
474         });
475   }
476 
477   // Applies map_function to each pair of element in lhs and rhs (4D array) and
478   // returns the result.
479   // (plane, depth, height, width) index of each element is also provided as
480   // arguments to map_function.
481   template <typename F>
MapWithIndexArray4D(const Array4D<float> & lhs,const Array4D<float> & rhs,F && map_function)482   static std::unique_ptr<Array4D<float>> MapWithIndexArray4D(
483       const Array4D<float>& lhs, const Array4D<float>& rhs, F&& map_function) {
484     auto result = absl::make_unique<Array4D<float>>(lhs.planes(), lhs.depth(),
485                                                     lhs.height(), lhs.width());
486     for (int64 plane = 0; plane < lhs.planes(); ++plane) {
487       for (int64 depth = 0; depth < lhs.depth(); ++depth) {
488         for (int64 height = 0; height < lhs.height(); ++height) {
489           for (int64 width = 0; width < lhs.width(); ++width) {
490             (*result)(plane, depth, height, width) = map_function(
491                 lhs(plane, depth, height, width),
492                 rhs(plane, depth, height, width), plane, depth, height, width);
493           }
494         }
495       }
496     }
497     return result;
498   }
499 
500   // Returns the result of a 2D pad on an input matrix.
501   template <typename NativeT>
PadArray2D(const Array2D<NativeT> & operand,const PaddingConfig & padding,const NativeT pad)502   static std::unique_ptr<Array2D<NativeT>> PadArray2D(
503       const Array2D<NativeT>& operand, const PaddingConfig& padding,
504       const NativeT pad) {
505     int64 in0 = operand.n1();
506     int64 high_padding0 = padding.dimensions(0).edge_padding_high();
507     int64 low_padding0 = padding.dimensions(0).edge_padding_low();
508     int64 interior_padding0 = padding.dimensions(0).interior_padding();
509     int64 out0 =
510         in0 + low_padding0 + high_padding0 + (in0 - 1) * interior_padding0;
511 
512     int64 in1 = operand.n2();
513     int64 high_padding1 = padding.dimensions(1).edge_padding_high();
514     int64 low_padding1 = padding.dimensions(1).edge_padding_low();
515     int64 interior_padding1 = padding.dimensions(1).interior_padding();
516     int64 out1 =
517         in1 + low_padding1 + high_padding1 + (in1 - 1) * interior_padding1;
518 
519     auto result = absl::make_unique<Array2D<NativeT>>(out0, out1);
520     result->Fill(pad);
521     int64 o0 = low_padding0;
522     for (int64 i0 = 0; i0 < in0; ++i0) {
523       int64 o1 = low_padding1;
524       for (int64 i1 = 0; i1 < in1; ++i1) {
525         if (o0 >= 0 && o1 >= 0 && o0 < out0 && o1 < out1) {
526           (*result)(o0, o1) = operand(i0, i1);
527         }
528         o1 += interior_padding1 + 1;
529       }
530       o0 += interior_padding0 + 1;
531     }
532     return result;
533   }
534 
535   // Returns the result of a 3D pad on an input matrix.
536   template <typename NativeT>
PadArray3D(const Array3D<NativeT> & operand,const PaddingConfig & padding,const NativeT pad)537   static Array3D<NativeT> PadArray3D(const Array3D<NativeT>& operand,
538                                      const PaddingConfig& padding,
539                                      const NativeT pad) {
540     CHECK_EQ(padding.dimensions_size(), 3);
541 
542     const int64 input_bounds[] = {operand.n1(), operand.n2(), operand.n3()};
543     int64 pad_low[3];
544     int64 pad_high[3];
545     int64 pad_interior[3];
546     int64 output_bounds[3];
547     for (int64 i = 0; i < 3; ++i) {
548       pad_low[i] = padding.dimensions(i).edge_padding_low();
549       pad_high[i] = padding.dimensions(i).edge_padding_high();
550       CHECK_LE(0, pad_low[i]);
551       CHECK_LE(0, pad_high[i]);
552       CHECK_LE(0, padding.dimensions(i).interior_padding())
553           << "not implemented";
554       pad_interior[i] = padding.dimensions(i).interior_padding();
555 
556       output_bounds[i] = pad_low[i] + input_bounds[i] + pad_high[i] +
557                          (input_bounds[i] - 1) * pad_interior[i];
558     }
559 
560     Array3D<NativeT> result(output_bounds[0], output_bounds[1],
561                             output_bounds[2]);
562     int indices[] = {0, 0, 0};
563     for (indices[0] = 0; indices[0] < output_bounds[0]; ++indices[0]) {
564       for (indices[1] = 0; indices[1] < output_bounds[1]; ++indices[1]) {
565         for (indices[2] = 0; indices[2] < output_bounds[2]; ++indices[2]) {
566           NativeT* value = &result(indices[0], indices[1], indices[2]);
567           bool value_padded = false;
568           for (int i = 0; i < 3; ++i) {
569             bool in_low_padding = indices[i] < pad_low[i];
570             bool in_high_padding = indices[i] >= output_bounds[i] - pad_high[i];
571             if (in_low_padding || in_high_padding) {
572               *value = pad;
573               value_padded = true;
574             }
575             if (pad_interior[i] &&
576                 (indices[i] - pad_low[i]) % (pad_interior[i] + 1)) {
577               *value = pad;
578               value_padded = true;
579             }
580           }
581           if (value_padded) {
582             continue;
583           }
584           *value = operand((indices[0] - pad_low[0]) / (pad_interior[0] + 1),
585                            (indices[1] - pad_low[1]) / (pad_interior[1] + 1),
586                            (indices[2] - pad_low[2]) / (pad_interior[2] + 1));
587         }
588       }
589     }
590     return result;
591   }
592 
593   // Returns the result of a 4D pad on an input array.
594   template <typename NativeT>
PadArray4D(const Array4D<NativeT> & operand,const PaddingConfig & padding,const NativeT pad)595   static Array4D<NativeT> PadArray4D(const Array4D<NativeT>& operand,
596                                      const PaddingConfig& padding,
597                                      const NativeT pad) {
598     CHECK_EQ(padding.dimensions_size(), 4);
599 
600     const int64 input_bounds[] = {operand.n1(), operand.n2(), operand.n3(),
601                                   operand.n4()};
602     int64 pad_low[4];
603     int64 pad_high[4];
604     int64 pad_interior[4];
605     int64 output_bounds[4];
606     for (int64 i = 0; i < 4; ++i) {
607       pad_low[i] = padding.dimensions(i).edge_padding_low();
608       pad_high[i] = padding.dimensions(i).edge_padding_high();
609       CHECK_LE(0, padding.dimensions(i).interior_padding())
610           << "not implemented";
611       pad_interior[i] = padding.dimensions(i).interior_padding();
612 
613       output_bounds[i] = pad_low[i] + input_bounds[i] + pad_high[i] +
614                          (input_bounds[i] - 1) * pad_interior[i];
615     }
616 
617     Array4D<NativeT> result(output_bounds[0], output_bounds[1],
618                             output_bounds[2], output_bounds[3]);
619     result.Each(
620         [&](absl::Span<const int64> indices, NativeT* value) {
621           for (int i = 0; i < 4; ++i) {
622             bool in_low_padding = indices[i] < pad_low[i];
623             bool in_high_padding = indices[i] >= output_bounds[i] - pad_high[i];
624             if (in_low_padding || in_high_padding) {
625               *value = pad;
626               return;
627             }
628             if (pad_interior[i] &&
629                 (indices[i] - pad_low[i]) % (pad_interior[i] + 1)) {
630               *value = pad;
631               return;
632             }
633           }
634           *value = operand((indices[0] - pad_low[0]) / (pad_interior[0] + 1),
635                            (indices[1] - pad_low[1]) / (pad_interior[1] + 1),
636                            (indices[2] - pad_low[2]) / (pad_interior[2] + 1),
637                            (indices[3] - pad_low[3]) / (pad_interior[3] + 1));
638         });
639     return result;
640   }
641 
642   // ApplyElementwise2D(f, x, y, ...) returns the Array2D formed by running
643   // f(x[i], y[i], ...) for each array element in the Array2Ds x, y, ....
644   //
645   // The given arrays must have the same size and element type, and the return
646   // type of f must be implicitly convertible to the arrays' element type.
647   //
648   // Example usage:
649   //
650   //   Array2D<float> x, y, z = ...;
651   //   std::unique_ptr<Array2D> result = ReferenceUtil::ApplyElementwise2D(
652   //     [](float a, float b, float c) { return a * b + c; }, x, y, z);
653   //
654   template <typename F, typename T1, typename... Ts>
ApplyElementwise2D(F && f,const Array2D<T1> & array1,const Array2D<Ts> &...arrays)655   static std::unique_ptr<Array2D<T1>> ApplyElementwise2D(
656       F&& f, const Array2D<T1>& array1, const Array2D<Ts>&... arrays) {
657     AssertSameSize2D(array1, arrays...);
658     auto result = absl::make_unique<Array2D<T1>>(array1.n1(), array1.n2());
659     for (int64 i = 0; i < array1.n1(); ++i) {
660       for (int64 j = 0; j < array1.n2(); ++j) {
661         (*result)(i, j) = f(array1(i, j), arrays(i, j)...);
662       }
663     }
664     return result;
665   }
666 
667  private:
668   template <typename T1, typename T2, typename... Ts>
AssertSameSize2D(const Array2D<T1> & array1,const Array2D<T2> & array2,const Array2D<Ts> &...arrays)669   static void AssertSameSize2D(const Array2D<T1>& array1,
670                                const Array2D<T2>& array2,
671                                const Array2D<Ts>&... arrays) {
672     static_assert(std::is_same<T1, T2>::value, "Args must be same type.");
673     CHECK_EQ(array1.n1(), array2.n1());
674     CHECK_EQ(array1.n2(), array2.n2());
675     AssertSameSize2D(array2, arrays...);
676   }
677 
678   // Recursive base case for AssertSameSize2D.
679   template <typename Array1>
AssertSameSize2D(const Array1 & array1)680   static void AssertSameSize2D(const Array1& array1) {}
681 
682   TF_DISALLOW_COPY_AND_ASSIGN(ReferenceUtil);
683 };
684 
685 }  // namespace xla
686 
687 #endif  // TENSORFLOW_COMPILER_XLA_REFERENCE_UTIL_H_
688