1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
12 
13 namespace Eigen {
14 
15 /** \class TensorReshaping
16   * \ingroup CXX11_Tensor_Module
17   *
18   * \brief Tensor reshaping class.
19   *
20   *
21   */
22 namespace internal {
23 template<typename NewDimensions, typename XprType>
24 struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
25 {
26   typedef typename XprType::Scalar Scalar;
27   typedef traits<XprType> XprTraits;
28   typedef typename XprTraits::StorageKind StorageKind;
29   typedef typename XprTraits::Index Index;
30   typedef typename XprType::Nested Nested;
31   typedef typename remove_reference<Nested>::type _Nested;
32   static const int NumDimensions = array_size<NewDimensions>::value;
33   static const int Layout = XprTraits::Layout;
34 };
35 
36 template<typename NewDimensions, typename XprType>
37 struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
38 {
39   typedef const TensorReshapingOp<NewDimensions, XprType>& type;
40 };
41 
42 template<typename NewDimensions, typename XprType>
43 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
44 {
45   typedef TensorReshapingOp<NewDimensions, XprType> type;
46 };
47 
48 }  // end namespace internal
49 
50 
51 
52 template<typename NewDimensions, typename XprType>
53 class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
54 {
55   public:
56   typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
57   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
58   typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
59   typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
60   typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
61 
62   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
63       : m_xpr(expr), m_dims(dims) {}
64 
65     EIGEN_DEVICE_FUNC
66     const NewDimensions& dimensions() const { return m_dims; }
67 
68     EIGEN_DEVICE_FUNC
69     const typename internal::remove_all<typename XprType::Nested>::type&
70     expression() const { return m_xpr; }
71 
72     EIGEN_DEVICE_FUNC
73     EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other)
74     {
75       typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
76       Assign assign(*this, other);
77       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
78       return *this;
79     }
80 
81     template<typename OtherDerived>
82     EIGEN_DEVICE_FUNC
83     EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other)
84     {
85       typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
86       Assign assign(*this, other);
87       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
88       return *this;
89     }
90 
91   protected:
92     typename XprType::Nested m_xpr;
93     const NewDimensions m_dims;
94 };
95 
96 
97 // Eval as rvalue
98 template<typename NewDimensions, typename ArgType, typename Device>
99 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
100 {
101   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
102   typedef NewDimensions Dimensions;
103 
104   enum {
105     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
106     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
107     Layout = TensorEvaluator<ArgType, Device>::Layout,
108     CoordAccess = false,  // to be implemented
109     RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
110   };
111 
112   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
113       : m_impl(op.expression(), device), m_dimensions(op.dimensions())
114   {
115     // The total size of the reshaped tensor must be equal to the total size
116     // of the input tensor.
117     eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
118   }
119 
120   typedef typename XprType::Index Index;
121   typedef typename XprType::Scalar Scalar;
122   typedef typename XprType::CoeffReturnType CoeffReturnType;
123   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
124 
125   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
126 
127   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
128     return m_impl.evalSubExprsIfNeeded(data);
129   }
130   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
131     m_impl.cleanup();
132   }
133 
134   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
135   {
136     return m_impl.coeff(index);
137   }
138 
139   template<int LoadMode>
140   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
141   {
142     return m_impl.template packet<LoadMode>(index);
143   }
144 
145   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
146     return m_impl.costPerCoeff(vectorized);
147   }
148 
149   EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); }
150 
151   EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
152 
153  protected:
154   TensorEvaluator<ArgType, Device> m_impl;
155   NewDimensions m_dimensions;
156 };
157 
158 
159 // Eval as lvalue
160 template<typename NewDimensions, typename ArgType, typename Device>
161   struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
162   : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
163 
164 {
165   typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
166   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
167   typedef NewDimensions Dimensions;
168 
169   enum {
170     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
171     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
172     Layout = TensorEvaluator<ArgType, Device>::Layout,
173     CoordAccess = false,  // to be implemented
174     RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
175   };
176 
177   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
178     : Base(op, device)
179   { }
180 
181   typedef typename XprType::Index Index;
182   typedef typename XprType::Scalar Scalar;
183   typedef typename XprType::CoeffReturnType CoeffReturnType;
184   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
185 
186   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
187   {
188     return this->m_impl.coeffRef(index);
189   }
190   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
191   void writePacket(Index index, const PacketReturnType& x)
192   {
193     this->m_impl.template writePacket<StoreMode>(index, x);
194   }
195 };
196 
197 
198 /** \class TensorSlicing
199   * \ingroup CXX11_Tensor_Module
200   *
201   * \brief Tensor slicing class.
202   *
203   *
204   */
205 namespace internal {
206 template<typename StartIndices, typename Sizes, typename XprType>
207 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
208 {
209   typedef typename XprType::Scalar Scalar;
210   typedef traits<XprType> XprTraits;
211   typedef typename XprTraits::StorageKind StorageKind;
212   typedef typename XprTraits::Index Index;
213   typedef typename XprType::Nested Nested;
214   typedef typename remove_reference<Nested>::type _Nested;
215   static const int NumDimensions = array_size<StartIndices>::value;
216   static const int Layout = XprTraits::Layout;
217 };
218 
219 template<typename StartIndices, typename Sizes, typename XprType>
220 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
221 {
222   typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type;
223 };
224 
225 template<typename StartIndices, typename Sizes, typename XprType>
226 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
227 {
228   typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
229 };
230 
231 }  // end namespace internal
232 
233 
234 
235 template<typename StartIndices, typename Sizes, typename XprType>
236 class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
237 {
238   public:
239   typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
240   typedef typename XprType::CoeffReturnType CoeffReturnType;
241   typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
242   typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
243   typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
244 
245   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
246       : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
247 
248     EIGEN_DEVICE_FUNC
249     const StartIndices& startIndices() const { return m_indices; }
250     EIGEN_DEVICE_FUNC
251     const Sizes& sizes() const { return m_sizes; }
252 
253     EIGEN_DEVICE_FUNC
254     const typename internal::remove_all<typename XprType::Nested>::type&
255     expression() const { return m_xpr; }
256 
257     template<typename OtherDerived>
258     EIGEN_DEVICE_FUNC
259     EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other)
260     {
261       typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
262       Assign assign(*this, other);
263       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
264       return *this;
265     }
266 
267     EIGEN_DEVICE_FUNC
268     EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other)
269     {
270       typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
271       Assign assign(*this, other);
272       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
273       return *this;
274     }
275 
276 
277   protected:
278     typename XprType::Nested m_xpr;
279     const StartIndices m_indices;
280     const Sizes m_sizes;
281 };
282 
283 
284 // Fixme: figure out the exact threshold
285 namespace {
286 template <typename Index, typename Device> struct MemcpyTriggerForSlicing {
287   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
288   EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; }
289 
290  private:
291   Index threshold_;
292 };
293 
294 // It is very expensive to start the memcpy kernel on GPU: we therefore only
295 // use it for large copies.
296 #ifdef EIGEN_USE_GPU
297 template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice>  {
298   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
299   EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
300 };
301 #endif
302 }
303 
304 // Eval as rvalue
305 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
306 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
307 {
308   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
309   static const int NumDims = internal::array_size<Sizes>::value;
310 
311   enum {
312     // Alignment can't be guaranteed at compile time since it depends on the
313     // slice offsets and sizes.
314     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
315     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
316     Layout = TensorEvaluator<ArgType, Device>::Layout,
317     CoordAccess = false,
318     RawAccess = false
319   };
320 
321   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
322       : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
323   {
324     for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
325       eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
326     }
327 
328     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
329     const Sizes& output_dims = op.sizes();
330     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
331       m_inputStrides[0] = 1;
332       for (int i = 1; i < NumDims; ++i) {
333         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
334       }
335 
336      // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
337       m_outputStrides[0] = 1;
338       for (int i = 1; i < NumDims; ++i) {
339         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
340         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
341       }
342     } else {
343       m_inputStrides[NumDims-1] = 1;
344       for (int i = NumDims - 2; i >= 0; --i) {
345         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
346       }
347 
348      // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
349       m_outputStrides[NumDims-1] = 1;
350       for (int i = NumDims - 2; i >= 0; --i) {
351         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
352         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
353       }
354     }
355   }
356 
357   typedef typename XprType::Index Index;
358   typedef typename XprType::Scalar Scalar;
359   typedef typename XprType::CoeffReturnType CoeffReturnType;
360   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
361   typedef Sizes Dimensions;
362 
363   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
364 
365 
366   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
367     m_impl.evalSubExprsIfNeeded(NULL);
368     if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data && m_impl.data()) {
369       Index contiguous_values = 1;
370       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
371         for (int i = 0; i < NumDims; ++i) {
372           contiguous_values *= dimensions()[i];
373           if (dimensions()[i] != m_impl.dimensions()[i]) {
374             break;
375           }
376         }
377       } else {
378         for (int i = NumDims-1; i >= 0; --i) {
379           contiguous_values *= dimensions()[i];
380           if (dimensions()[i] != m_impl.dimensions()[i]) {
381             break;
382           }
383         }
384       }
385       // Use memcpy if it's going to be faster than using the regular evaluation.
386       const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
387       if (trigger(contiguous_values)) {
388         Scalar* src = (Scalar*)m_impl.data();
389         for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
390           Index offset = srcCoeff(i);
391           m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar));
392         }
393         return false;
394       }
395     }
396     return true;
397   }
398 
399   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
400     m_impl.cleanup();
401   }
402 
403   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
404   {
405     return m_impl.coeff(srcCoeff(index));
406   }
407 
408   template<int LoadMode>
409   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
410   {
411     const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
412     EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
413     eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
414 
415     Index inputIndices[] = {0, 0};
416     Index indices[] = {index, index + packetSize - 1};
417     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
418       for (int i = NumDims - 1; i > 0; --i) {
419         const Index idx0 = indices[0] / m_fastOutputStrides[i];
420         const Index idx1 = indices[1] / m_fastOutputStrides[i];
421         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
422         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
423         indices[0] -= idx0 * m_outputStrides[i];
424         indices[1] -= idx1 * m_outputStrides[i];
425       }
426       inputIndices[0] += (indices[0] + m_offsets[0]);
427       inputIndices[1] += (indices[1] + m_offsets[0]);
428     } else {
429       for (int i = 0; i < NumDims - 1; ++i) {
430         const Index idx0 = indices[0] / m_fastOutputStrides[i];
431         const Index idx1 = indices[1] / m_fastOutputStrides[i];
432         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
433         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
434         indices[0] -= idx0 * m_outputStrides[i];
435         indices[1] -= idx1 * m_outputStrides[i];
436       }
437       inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
438       inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
439     }
440     if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
441       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
442       return rslt;
443     }
444     else {
445       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
446       values[0] = m_impl.coeff(inputIndices[0]);
447       values[packetSize-1] = m_impl.coeff(inputIndices[1]);
448       for (int i = 1; i < packetSize-1; ++i) {
449         values[i] = coeff(index+i);
450       }
451       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
452       return rslt;
453     }
454   }
455 
456   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
457     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
458   }
459 
460 
461   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
462     Scalar* result = m_impl.data();
463     if (result) {
464       Index offset = 0;
465       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
466         for (int i = 0; i < NumDims; ++i) {
467           if (m_dimensions[i] != m_impl.dimensions()[i]) {
468             offset += m_offsets[i] * m_inputStrides[i];
469             for (int j = i+1; j < NumDims; ++j) {
470               if (m_dimensions[j] > 1) {
471                 return NULL;
472               }
473               offset += m_offsets[j] * m_inputStrides[j];
474             }
475             break;
476           }
477         }
478       } else {
479         for (int i = NumDims - 1; i >= 0; --i) {
480           if (m_dimensions[i] != m_impl.dimensions()[i]) {
481             offset += m_offsets[i] * m_inputStrides[i];
482             for (int j = i-1; j >= 0; --j) {
483               if (m_dimensions[j] > 1) {
484                 return NULL;
485               }
486               offset += m_offsets[j] * m_inputStrides[j];
487             }
488             break;
489           }
490         }
491       }
492       return result + offset;
493     }
494     return NULL;
495   }
496 
497  protected:
498   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
499   {
500     Index inputIndex = 0;
501     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
502       for (int i = NumDims - 1; i > 0; --i) {
503         const Index idx = index / m_fastOutputStrides[i];
504         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
505         index -= idx * m_outputStrides[i];
506       }
507       inputIndex += (index + m_offsets[0]);
508     } else {
509       for (int i = 0; i < NumDims - 1; ++i) {
510         const Index idx = index / m_fastOutputStrides[i];
511         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
512         index -= idx * m_outputStrides[i];
513       }
514       inputIndex += (index + m_offsets[NumDims-1]);
515     }
516     return inputIndex;
517   }
518 
519   array<Index, NumDims> m_outputStrides;
520   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
521   array<Index, NumDims> m_inputStrides;
522   TensorEvaluator<ArgType, Device> m_impl;
523   const Device& m_device;
524   Dimensions m_dimensions;
525   const StartIndices m_offsets;
526 };
527 
528 
529 // Eval as lvalue
530 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
531 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
532   : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
533 {
534   typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
535   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
536   static const int NumDims = internal::array_size<Sizes>::value;
537 
538   enum {
539     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
540     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
541     Layout = TensorEvaluator<ArgType, Device>::Layout,
542     CoordAccess = false,
543     RawAccess = false
544   };
545 
546   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
547     : Base(op, device)
548     { }
549 
550   typedef typename XprType::Index Index;
551   typedef typename XprType::Scalar Scalar;
552   typedef typename XprType::CoeffReturnType CoeffReturnType;
553   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
554   typedef Sizes Dimensions;
555 
556   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
557   {
558     return this->m_impl.coeffRef(this->srcCoeff(index));
559   }
560 
561   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
562   void writePacket(Index index, const PacketReturnType& x)
563   {
564     const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
565     Index inputIndices[] = {0, 0};
566     Index indices[] = {index, index + packetSize - 1};
567     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
568       for (int i = NumDims - 1; i > 0; --i) {
569         const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
570         const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
571         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
572         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
573         indices[0] -= idx0 * this->m_outputStrides[i];
574         indices[1] -= idx1 * this->m_outputStrides[i];
575       }
576       inputIndices[0] += (indices[0] + this->m_offsets[0]);
577       inputIndices[1] += (indices[1] + this->m_offsets[0]);
578     } else {
579       for (int i = 0; i < NumDims - 1; ++i) {
580         const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
581         const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
582         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
583         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
584         indices[0] -= idx0 * this->m_outputStrides[i];
585         indices[1] -= idx1 * this->m_outputStrides[i];
586       }
587       inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
588       inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
589     }
590     if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
591       this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
592     }
593     else {
594       EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
595       internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
596       this->m_impl.coeffRef(inputIndices[0]) = values[0];
597       this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
598       for (int i = 1; i < packetSize-1; ++i) {
599         this->coeffRef(index+i) = values[i];
600       }
601     }
602   }
603 };
604 
605 
606 
607 namespace internal {
608 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
609 struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType>
610 {
611   typedef typename XprType::Scalar Scalar;
612   typedef traits<XprType> XprTraits;
613   typedef typename XprTraits::StorageKind StorageKind;
614   typedef typename XprTraits::Index Index;
615   typedef typename XprType::Nested Nested;
616   typedef typename remove_reference<Nested>::type _Nested;
617   static const int NumDimensions = array_size<StartIndices>::value;
618   static const int Layout = XprTraits::Layout;
619 };
620 
621 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
622 struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
623 {
624   typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type;
625 };
626 
627 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
628 struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
629 {
630   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
631 };
632 
633 }  // end namespace internal
634 
635 
636 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
637 class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
638 {
639   public:
640   typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
641   typedef typename XprType::CoeffReturnType CoeffReturnType;
642   typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
643   typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
644   typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
645 
646   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
647     const XprType& expr, const StartIndices& startIndices,
648     const StopIndices& stopIndices, const Strides& strides)
649       : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
650         m_strides(strides) {}
651 
652     EIGEN_DEVICE_FUNC
653     const StartIndices& startIndices() const { return m_startIndices; }
654     EIGEN_DEVICE_FUNC
655     const StartIndices& stopIndices() const { return m_stopIndices; }
656     EIGEN_DEVICE_FUNC
657     const StartIndices& strides() const { return m_strides; }
658 
659     EIGEN_DEVICE_FUNC
660     const typename internal::remove_all<typename XprType::Nested>::type&
661     expression() const { return m_xpr; }
662 
663     EIGEN_DEVICE_FUNC
664     EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other)
665     {
666       typedef TensorAssignOp<TensorStridingSlicingOp, const TensorStridingSlicingOp> Assign;
667       Assign assign(*this, other);
668       internal::TensorExecutor<const Assign, DefaultDevice>::run(
669           assign, DefaultDevice());
670       return *this;
671     }
672 
673     template<typename OtherDerived>
674     EIGEN_DEVICE_FUNC
675     EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other)
676     {
677       typedef TensorAssignOp<TensorStridingSlicingOp, const OtherDerived> Assign;
678       Assign assign(*this, other);
679       internal::TensorExecutor<const Assign, DefaultDevice>::run(
680           assign, DefaultDevice());
681       return *this;
682     }
683 
684   protected:
685     typename XprType::Nested m_xpr;
686     const StartIndices m_startIndices;
687     const StopIndices m_stopIndices;
688     const Strides m_strides;
689 };
690 
691 // Eval as rvalue
692 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
693 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
694 {
695   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
696   static const int NumDims = internal::array_size<Strides>::value;
697 
698   enum {
699     // Alignment can't be guaranteed at compile time since it depends on the
700     // slice offsets and sizes.
701     IsAligned = false,
702     PacketAccess = false,
703     BlockAccess = false,
704     Layout = TensorEvaluator<ArgType, Device>::Layout,
705     RawAccess = false
706   };
707 
708   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
709       : m_impl(op.expression(), device), m_device(device), m_strides(op.strides())
710   {
711     // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
712     DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
713     for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
714       eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
715       if(m_strides[i]>0){
716         startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
717         stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
718       }else{
719         /* implies m_strides[i]<0 by assert */
720         startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
721         stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
722       }
723       m_startIndices[i] = startIndicesClamped[i];
724     }
725 
726     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
727 
728     // check for degenerate intervals and compute output tensor shape
729     bool degenerate = false;;
730     for(int i = 0; i < NumDims; i++){
731       Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
732       if(interval == 0 || ((interval<0) != (m_strides[i]<0))){
733         m_dimensions[i] = 0;
734         degenerate = true;
735       }else{
736         m_dimensions[i] = interval / m_strides[i]
737                           + (interval % m_strides[i] != 0 ? 1 : 0);
738         eigen_assert(m_dimensions[i] >= 0);
739       }
740     }
741     Strides output_dims = m_dimensions;
742 
743     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
744       m_inputStrides[0] = m_strides[0];
745       m_offsets[0] = startIndicesClamped[0];
746       Index previousDimProduct = 1;
747       for (int i = 1; i < NumDims; ++i) {
748         previousDimProduct *= input_dims[i-1];
749         m_inputStrides[i] = previousDimProduct * m_strides[i];
750         m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
751       }
752 
753       // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
754       m_outputStrides[0] = 1;
755       for (int i = 1; i < NumDims; ++i) {
756         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
757         // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
758         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
759       }
760     } else {
761       m_inputStrides[NumDims-1] = m_strides[NumDims-1];
762       m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
763       Index previousDimProduct = 1;
764       for (int i = NumDims - 2; i >= 0; --i) {
765         previousDimProduct *= input_dims[i+1];
766         m_inputStrides[i] = previousDimProduct * m_strides[i];
767         m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
768       }
769 
770       m_outputStrides[NumDims-1] = 1;
771       for (int i = NumDims - 2; i >= 0; --i) {
772         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
773         // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
774         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
775       }
776     }
777     m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
778                                           device.lastLevelCacheSize() /
779                                           sizeof(Scalar));
780   }
781 
782   typedef typename XprType::Index Index;
783   typedef typename XprType::Scalar Scalar;
784   typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
785   typedef typename XprType::CoeffReturnType CoeffReturnType;
786   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
787   typedef Strides Dimensions;
788 
789   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
790 
791 
792   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
793     m_impl.evalSubExprsIfNeeded(NULL);
794     return true;
795   }
796 
797   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
798     m_impl.cleanup();
799   }
800 
801   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
802   {
803     return m_impl.coeff(srcCoeff(index));
804   }
805 
806   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
807     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
808   }
809 
810   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
811     return NULL;
812   }
813 
814  protected:
815   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
816   {
817     Index inputIndex = 0;
818     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
819       for (int i = NumDims - 1; i >= 0; --i) {
820         const Index idx = index / m_fastOutputStrides[i];
821         inputIndex += idx * m_inputStrides[i] + m_offsets[i];
822         index -= idx * m_outputStrides[i];
823       }
824     } else {
825       for (int i = 0; i < NumDims; ++i) {
826         const Index idx = index / m_fastOutputStrides[i];
827         inputIndex += idx * m_inputStrides[i] + m_offsets[i];
828         index -= idx * m_outputStrides[i];
829       }
830     }
831     return inputIndex;
832   }
833 
834   static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
835     return numext::maxi(min, numext::mini(max,value));
836   }
837 
838   array<Index, NumDims> m_outputStrides;
839   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
840   array<Index, NumDims> m_inputStrides;
841   TensorEvaluator<ArgType, Device> m_impl;
842   const Device& m_device;
843   DSizes<Index, NumDims> m_startIndices; // clamped startIndices
844   DSizes<Index, NumDims> m_dimensions;
845   DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
846   const Strides m_strides;
847   std::size_t m_block_total_size_max;
848 };
849 
850 // Eval as lvalue
851 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
852 struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
853   : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
854 {
855   typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
856   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
857   static const int NumDims = internal::array_size<Strides>::value;
858 
859   enum {
860     IsAligned = false,
861     PacketAccess = false,
862     BlockAccess = false,
863     Layout = TensorEvaluator<ArgType, Device>::Layout,
864     CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
865     RawAccess = false
866   };
867 
868   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
869     : Base(op, device)
870     { }
871 
872   typedef typename XprType::Index Index;
873   typedef typename XprType::Scalar Scalar;
874   typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
875   typedef typename XprType::CoeffReturnType CoeffReturnType;
876   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
877   typedef Strides Dimensions;
878 
879   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
880   {
881     return this->m_impl.coeffRef(this->srcCoeff(index));
882   }
883 };
884 
885 
886 } // end namespace Eigen
887 
888 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
889