1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
12 
13 namespace Eigen {
14 
15 /** \class TensorKChippingReshaping
16   * \ingroup CXX11_Tensor_Module
17   *
18   * \brief A chip is a thin slice, corresponding to a column or a row in a 2-d tensor.
19   *
20   *
21   */
22 
23 namespace internal {
24 template<DenseIndex DimId, typename XprType>
25 struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType>
26 {
27   typedef typename XprType::Scalar Scalar;
28   typedef traits<XprType> XprTraits;
29   typedef typename XprTraits::StorageKind StorageKind;
30   typedef typename XprTraits::Index Index;
31   typedef typename XprType::Nested Nested;
32   typedef typename remove_reference<Nested>::type _Nested;
33   static const int NumDimensions = XprTraits::NumDimensions - 1;
34   static const int Layout = XprTraits::Layout;
35 };
36 
37 template<DenseIndex DimId, typename XprType>
38 struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense>
39 {
40   typedef const TensorChippingOp<DimId, XprType>& type;
41 };
42 
43 template<DenseIndex DimId, typename XprType>
44 struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type>
45 {
46   typedef TensorChippingOp<DimId, XprType> type;
47 };
48 
49 template <DenseIndex DimId>
50 struct DimensionId
51 {
52   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) {
53     eigen_assert(dim == DimId);
54   }
55   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const {
56     return DimId;
57   }
58 };
59 template <>
60 struct DimensionId<Dynamic>
61 {
62   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) {
63     eigen_assert(dim >= 0);
64   }
65   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const {
66     return actual_dim;
67   }
68  private:
69   const DenseIndex actual_dim;
70 };
71 
72 
73 }  // end namespace internal
74 
75 
76 
77 template<DenseIndex DimId, typename XprType>
78 class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
79 {
80   public:
81   typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar;
82   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
83   typedef typename XprType::CoeffReturnType CoeffReturnType;
84   typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested;
85   typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind;
86   typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index;
87 
88   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim)
89       : m_xpr(expr), m_offset(offset), m_dim(dim) {
90   }
91 
92   EIGEN_DEVICE_FUNC
93   const Index offset() const { return m_offset; }
94   EIGEN_DEVICE_FUNC
95   const Index dim() const { return m_dim.actualDim(); }
96 
97   EIGEN_DEVICE_FUNC
98   const typename internal::remove_all<typename XprType::Nested>::type&
99   expression() const { return m_xpr; }
100 
101   EIGEN_DEVICE_FUNC
102   EIGEN_STRONG_INLINE TensorChippingOp& operator = (const TensorChippingOp& other)
103   {
104     typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign;
105     Assign assign(*this, other);
106     internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
107     return *this;
108   }
109 
110   template<typename OtherDerived>
111   EIGEN_DEVICE_FUNC
112   EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other)
113   {
114     typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign;
115     Assign assign(*this, other);
116     internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
117     return *this;
118   }
119 
120   protected:
121     typename XprType::Nested m_xpr;
122     const Index m_offset;
123     const internal::DimensionId<DimId> m_dim;
124 };
125 
126 
127 // Eval as rvalue
128 template<DenseIndex DimId, typename ArgType, typename Device>
129 struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
130 {
131   typedef TensorChippingOp<DimId, ArgType> XprType;
132   static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
133   static const int NumDims = NumInputDims-1;
134   typedef typename XprType::Index Index;
135   typedef DSizes<Index, NumDims> Dimensions;
136   typedef typename XprType::Scalar Scalar;
137   typedef typename XprType::CoeffReturnType CoeffReturnType;
138   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
139   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
140 
141 
142   enum {
143     // Alignment can't be guaranteed at compile time since it depends on the
144     // slice offsets.
145     IsAligned = false,
146     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
147     Layout = TensorEvaluator<ArgType, Device>::Layout,
148     CoordAccess = false,  // to be implemented
149     RawAccess = false
150   };
151 
152   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
153       : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
154   {
155     EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
156     eigen_assert(NumInputDims > m_dim.actualDim());
157 
158     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
159     eigen_assert(op.offset() < input_dims[m_dim.actualDim()]);
160 
161     int j = 0;
162     for (int i = 0; i < NumInputDims; ++i) {
163       if (i != m_dim.actualDim()) {
164         m_dimensions[j] = input_dims[i];
165         ++j;
166       }
167     }
168 
169     m_stride = 1;
170     m_inputStride = 1;
171     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
172       for (int i = 0; i < m_dim.actualDim(); ++i) {
173         m_stride *= input_dims[i];
174         m_inputStride *= input_dims[i];
175       }
176     } else {
177       for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) {
178         m_stride *= input_dims[i];
179         m_inputStride *= input_dims[i];
180       }
181     }
182     m_inputStride *= input_dims[m_dim.actualDim()];
183     m_inputOffset = m_stride * op.offset();
184   }
185 
186   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
187 
188   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
189     m_impl.evalSubExprsIfNeeded(NULL);
190     return true;
191   }
192 
193   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
194     m_impl.cleanup();
195   }
196 
197   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
198   {
199     return m_impl.coeff(srcCoeff(index));
200   }
201 
202   template<int LoadMode>
203   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
204   {
205     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
206     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
207 
208     if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
209 	(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
210       // m_stride is equal to 1, so let's avoid the integer division.
211       eigen_assert(m_stride == 1);
212       Index inputIndex = index * m_inputStride + m_inputOffset;
213       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
214       for (int i = 0; i < PacketSize; ++i) {
215         values[i] = m_impl.coeff(inputIndex);
216         inputIndex += m_inputStride;
217       }
218       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
219       return rslt;
220     } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims - 1) ||
221 	       (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
222       // m_stride is aways greater than index, so let's avoid the integer division.
223       eigen_assert(m_stride > index);
224       return m_impl.template packet<LoadMode>(index + m_inputOffset);
225     } else {
226       const Index idx = index / m_stride;
227       const Index rem = index - idx * m_stride;
228       if (rem + PacketSize <= m_stride) {
229         Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
230         return m_impl.template packet<LoadMode>(inputIndex);
231       } else {
232         // Cross the stride boundary. Fallback to slow path.
233         EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
234         for (int i = 0; i < PacketSize; ++i) {
235           values[i] = coeff(index);
236           ++index;
237         }
238         PacketReturnType rslt = internal::pload<PacketReturnType>(values);
239         return rslt;
240       }
241     }
242   }
243 
244   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
245   costPerCoeff(bool vectorized) const {
246     double cost = 0;
247     if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
248          m_dim.actualDim() == 0) ||
249         (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
250          m_dim.actualDim() == NumInputDims - 1)) {
251       cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
252     } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
253                 m_dim.actualDim() == NumInputDims - 1) ||
254                (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
255                 m_dim.actualDim() == 0)) {
256       cost += TensorOpCost::AddCost<Index>();
257     } else {
258       cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() +
259               3 * TensorOpCost::AddCost<Index>();
260     }
261 
262     return m_impl.costPerCoeff(vectorized) +
263            TensorOpCost(0, 0, cost, vectorized, PacketSize);
264   }
265 
266   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const {
267     CoeffReturnType* result = const_cast<CoeffReturnType*>(m_impl.data());
268     if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumDims) ||
269          (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) &&
270         result) {
271       return result + m_inputOffset;
272     } else {
273       return NULL;
274     }
275   }
276 
277  protected:
278   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
279   {
280     Index inputIndex;
281     if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
282 	(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
283       // m_stride is equal to 1, so let's avoid the integer division.
284       eigen_assert(m_stride == 1);
285       inputIndex = index * m_inputStride + m_inputOffset;
286     } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims-1) ||
287 	       (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
288       // m_stride is aways greater than index, so let's avoid the integer division.
289       eigen_assert(m_stride > index);
290       inputIndex = index + m_inputOffset;
291     } else {
292       const Index idx = index / m_stride;
293       inputIndex = idx * m_inputStride + m_inputOffset;
294       index -= idx * m_stride;
295       inputIndex += index;
296     }
297     return inputIndex;
298   }
299 
300   Dimensions m_dimensions;
301   Index m_stride;
302   Index m_inputOffset;
303   Index m_inputStride;
304   TensorEvaluator<ArgType, Device> m_impl;
305   const internal::DimensionId<DimId> m_dim;
306   const Device& m_device;
307 };
308 
309 
310 // Eval as lvalue
311 template<DenseIndex DimId, typename ArgType, typename Device>
312 struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
313   : public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
314 {
315   typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base;
316   typedef TensorChippingOp<DimId, ArgType> XprType;
317   static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
318   static const int NumDims = NumInputDims-1;
319   typedef typename XprType::Index Index;
320   typedef DSizes<Index, NumDims> Dimensions;
321   typedef typename XprType::Scalar Scalar;
322   typedef typename XprType::CoeffReturnType CoeffReturnType;
323   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
324   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
325 
326   enum {
327     IsAligned = false,
328     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
329     RawAccess = false
330   };
331 
332   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
333     : Base(op, device)
334     { }
335 
336   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
337   {
338     return this->m_impl.coeffRef(this->srcCoeff(index));
339   }
340 
341   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
342   void writePacket(Index index, const PacketReturnType& x)
343   {
344     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
345 
346     if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == 0) ||
347 	(static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) {
348       // m_stride is equal to 1, so let's avoid the integer division.
349       eigen_assert(this->m_stride == 1);
350       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
351       internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
352       Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
353       for (int i = 0; i < PacketSize; ++i) {
354         this->m_impl.coeffRef(inputIndex) = values[i];
355         inputIndex += this->m_inputStride;
356       }
357     } else if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == NumInputDims-1) ||
358 	       (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == 0)) {
359       // m_stride is aways greater than index, so let's avoid the integer division.
360       eigen_assert(this->m_stride > index);
361       this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x);
362     } else {
363       const Index idx = index / this->m_stride;
364       const Index rem = index - idx * this->m_stride;
365       if (rem + PacketSize <= this->m_stride) {
366         const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
367         this->m_impl.template writePacket<StoreMode>(inputIndex, x);
368       } else {
369         // Cross stride boundary. Fallback to slow path.
370         EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
371         internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
372         for (int i = 0; i < PacketSize; ++i) {
373           this->coeffRef(index) = values[i];
374           ++index;
375         }
376       }
377     }
378   }
379 };
380 
381 
382 } // end namespace Eigen
383 
384 #endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
385