1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
12 
13 namespace Eigen {
14 namespace internal {
15 
16 
17 template <uint64_t n>
18 struct static_val {
19   static const uint64_t value = n;
uint64_tstatic_val20   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; }
21 
static_valstatic_val22   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { }
23 
24   template <typename T>
static_valstatic_val25   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) {
26     eigen_assert(v == n);
27   }
28 };
29 
30 
31 template <typename HIGH = uint64_t, typename LOW = uint64_t>
32 struct TensorUInt128
33 {
34   HIGH high;
35   LOW low;
36 
37   template<typename OTHER_HIGH, typename OTHER_LOW>
38   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128TensorUInt12839   TensorUInt128(const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) : high(other.high), low(other.low) {
40     EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE);
41     EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE);
42   }
43 
44   template<typename OTHER_HIGH, typename OTHER_LOW>
45   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
46   TensorUInt128& operator = (const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) {
47     EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE);
48     EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE);
49     high = other.high;
50     low = other.low;
51     return *this;
52   }
53 
54   template<typename T>
55   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128TensorUInt12856   explicit TensorUInt128(const T& x) : high(0), low(x) {
57     eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest()));
58     eigen_assert(x >= 0);
59   }
60 
61   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128TensorUInt12862   TensorUInt128(HIGH y, LOW x) : high(y), low(x) { }
63 
LOWTensorUInt12864   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const {
65     return low;
66   }
lowerTensorUInt12867   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const {
68     return low;
69   }
upperTensorUInt12870   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const {
71     return high;
72   }
73 };
74 
75 
76 template <typename HL, typename LL, typename HR, typename LR>
77 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
78 bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
79 {
80   return (lhs.high == rhs.high) & (lhs.low == rhs.low);
81 }
82 
83 template <typename HL, typename LL, typename HR, typename LR>
84 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
85 bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
86 {
87   return (lhs.high != rhs.high) | (lhs.low != rhs.low);
88 }
89 
90 template <typename HL, typename LL, typename HR, typename LR>
91 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
92 bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
93 {
94   if (lhs.high != rhs.high) {
95     return lhs.high > rhs.high;
96   }
97   return lhs.low >= rhs.low;
98 }
99 
100 template <typename HL, typename LL, typename HR, typename LR>
101 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
102 bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
103 {
104   if (lhs.high != rhs.high) {
105     return lhs.high < rhs.high;
106   }
107   return lhs.low < rhs.low;
108 }
109 
110 template <typename HL, typename LL, typename HR, typename LR>
111 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
112 TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
113 {
114   TensorUInt128<uint64_t, uint64_t> result(lhs.high + rhs.high, lhs.low + rhs.low);
115   if (result.low < rhs.low) {
116     result.high += 1;
117   }
118   return result;
119 }
120 
121 template <typename HL, typename LL, typename HR, typename LR>
122 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
123 TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
124 {
125   TensorUInt128<uint64_t, uint64_t> result(lhs.high - rhs.high, lhs.low - rhs.low);
126   if (result.low > lhs.low) {
127     result.high -= 1;
128   }
129   return result;
130 }
131 
132 
133 template <typename HL, typename LL, typename HR, typename LR>
134 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
135 TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
136 {
137   // Split each 128-bit integer into 4 32-bit integers, and then do the
138   // multiplications by hand as follow:
139   //   lhs      a  b  c  d
140   //   rhs      e  f  g  h
141   //           -----------
142   //           ah bh ch dh
143   //           bg cg dg
144   //           cf df
145   //           de
146   // The result is stored in 2 64bit integers, high and low.
147 
148   const uint64_t LOW = 0x00000000FFFFFFFFLL;
149   const uint64_t HIGH = 0xFFFFFFFF00000000LL;
150 
151   uint64_t d = lhs.low & LOW;
152   uint64_t c = (lhs.low & HIGH) >> 32LL;
153   uint64_t b = lhs.high & LOW;
154   uint64_t a = (lhs.high & HIGH) >> 32LL;
155 
156   uint64_t h = rhs.low & LOW;
157   uint64_t g = (rhs.low & HIGH) >> 32LL;
158   uint64_t f = rhs.high & LOW;
159   uint64_t e = (rhs.high & HIGH) >> 32LL;
160 
161   // Compute the low 32 bits of low
162   uint64_t acc = d * h;
163   uint64_t low = acc & LOW;
164   //  Compute the high 32 bits of low. Add a carry every time we wrap around
165   acc >>= 32LL;
166   uint64_t carry = 0;
167   uint64_t acc2 = acc + c * h;
168   if (acc2 < acc) {
169     carry++;
170   }
171   acc = acc2 + d * g;
172   if (acc < acc2) {
173     carry++;
174   }
175   low |= (acc << 32LL);
176 
177   // Carry forward the high bits of acc to initiate the computation of the
178   // low 32 bits of high
179   acc2 = (acc >> 32LL) | (carry << 32LL);
180   carry = 0;
181 
182   acc = acc2 + b * h;
183   if (acc < acc2) {
184     carry++;
185   }
186   acc2 = acc + c * g;
187   if (acc2 < acc) {
188     carry++;
189   }
190   acc = acc2 + d * f;
191   if (acc < acc2) {
192     carry++;
193   }
194   uint64_t high = acc & LOW;
195 
196   // Start to compute the high 32 bits of high.
197   acc2 = (acc >> 32LL) | (carry << 32LL);
198 
199   acc = acc2 + a * h;
200   acc2 = acc + b * g;
201   acc = acc2 + c * f;
202   acc2 = acc + d * e;
203   high |= (acc2 << 32LL);
204 
205   return TensorUInt128<uint64_t, uint64_t>(high, low);
206 }
207 
208 template <typename HL, typename LL, typename HR, typename LR>
209 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
210 TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
211 {
212   if (rhs == TensorUInt128<static_val<0>, static_val<1> >(1)) {
213     return TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low);
214   } else if (lhs < rhs) {
215     return TensorUInt128<uint64_t, uint64_t>(0);
216   } else {
217     // calculate the biggest power of 2 times rhs that's less than or equal to lhs
218     TensorUInt128<uint64_t, uint64_t> power2(1);
219     TensorUInt128<uint64_t, uint64_t> d(rhs);
220     TensorUInt128<uint64_t, uint64_t> tmp(lhs - d);
221     while (lhs >= d) {
222       tmp = tmp - d;
223       d = d + d;
224       power2 = power2 + power2;
225     }
226 
227     tmp = TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low);
228     TensorUInt128<uint64_t, uint64_t> result(0);
229     while (power2 != TensorUInt128<static_val<0>, static_val<0> >(0)) {
230       if (tmp >= d) {
231         tmp = tmp - d;
232         result = result + power2;
233       }
234       // Shift right
235       power2 = TensorUInt128<uint64_t, uint64_t>(power2.high >> 1, (power2.low >> 1) | (power2.high << 63));
236       d = TensorUInt128<uint64_t, uint64_t>(d.high >> 1, (d.low >> 1) | (d.high << 63));
237     }
238 
239     return result;
240   }
241 }
242 
243 
244 }  // namespace internal
245 }  // namespace Eigen
246 
247 
248 #endif  // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
249