1 /*
2  * Copyright 2015 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SkNx_DEFINED
9 #define SkNx_DEFINED
10 
11 
12 #define SKNX_NO_SIMDx  // Remove the x to disable SIMD for all SkNx types.
13 
14 
15 #include "SkScalar.h"
16 #include "SkTypes.h"
17 #include <math.h>
18 #define REQUIRE(x) static_assert(x, #x)
19 
20 // The default implementations just fall back on a pair of size N/2.
21 
22 // SkNb is a _very_ minimal class representing a vector of bools returned by comparison operators.
23 // We pass along the byte size of the compared types (Bytes) to help platform specializations.
24 template <int N, int Bytes>
25 class SkNb {
26 public:
SkNb()27     SkNb() {}
SkNb(const SkNb<N/2,Bytes> & lo,const SkNb<N/2,Bytes> & hi)28     SkNb(const SkNb<N/2, Bytes>& lo, const SkNb<N/2, Bytes>& hi) : fLo(lo), fHi(hi) {}
29 
allTrue()30     bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); }
anyTrue()31     bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); }
32 
33 protected:
34     REQUIRE(0 == (N & (N-1)));
35     SkNb<N/2, Bytes> fLo, fHi;
36 };
37 
38 template <int N, typename T>
39 class SkNi {
40 public:
SkNi()41     SkNi() {}
SkNi(const SkNi<N/2,T> & lo,const SkNi<N/2,T> & hi)42     SkNi(const SkNi<N/2, T>& lo, const SkNi<N/2, T>& hi) : fLo(lo), fHi(hi) {}
SkNi(T val)43     explicit SkNi(T val) : fLo(val), fHi(val) {}
Load(const T vals[N])44     static SkNi Load(const T vals[N]) {
45         return SkNi(SkNi<N/2,T>::Load(vals), SkNi<N/2,T>::Load(vals+N/2));
46     }
47 
SkNi(T a,T b)48     SkNi(T a, T b)                                : fLo(a),       fHi(b)       { REQUIRE(N==2); }
SkNi(T a,T b,T c,T d)49     SkNi(T a, T b, T c, T d)                      : fLo(a,b),     fHi(c,d)     { REQUIRE(N==4); }
SkNi(T a,T b,T c,T d,T e,T f,T g,T h)50     SkNi(T a, T b, T c, T d,  T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) { REQUIRE(N==8); }
SkNi(T a,T b,T c,T d,T e,T f,T g,T h,T i,T j,T k,T l,T m,T n,T o,T p)51     SkNi(T a, T b, T c, T d,  T e, T f, T g, T h,
52          T i, T j, T k, T l,  T m, T n, T o, T p)
53         : fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) { REQUIRE(N==16); }
54 
store(T vals[N])55     void store(T vals[N]) const {
56         fLo.store(vals);
57         fHi.store(vals+N/2);
58     }
59 
saturatedAdd(const SkNi & o)60     SkNi saturatedAdd(const SkNi& o) const {
61         return SkNi(fLo.saturatedAdd(o.fLo), fHi.saturatedAdd(o.fHi));
62     }
63 
64     SkNi operator + (const SkNi& o) const { return SkNi(fLo + o.fLo, fHi + o.fHi); }
65     SkNi operator - (const SkNi& o) const { return SkNi(fLo - o.fLo, fHi - o.fHi); }
66     SkNi operator * (const SkNi& o) const { return SkNi(fLo * o.fLo, fHi * o.fHi); }
67 
68     SkNi operator << (int bits) const { return SkNi(fLo << bits, fHi << bits); }
69     SkNi operator >> (int bits) const { return SkNi(fLo >> bits, fHi >> bits); }
70 
Min(const SkNi & a,const SkNi & b)71     static SkNi Min(const SkNi& a, const SkNi& b) {
72         return SkNi(SkNi<N/2, T>::Min(a.fLo, b.fLo), SkNi<N/2, T>::Min(a.fHi, b.fHi));
73     }
74 
75     // TODO: comparisons, max?
76 
kth()77     template <int k> T kth() const {
78         SkASSERT(0 <= k && k < N);
79         return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>();
80     }
81 
82 protected:
83     REQUIRE(0 == (N & (N-1)));
84 
85     SkNi<N/2, T> fLo, fHi;
86 };
87 
88 template <int N, typename T>
89 class SkNf {
90     typedef SkNb<N, sizeof(T)> Nb;
91 
92     static int32_t MyNi(float);
93     static int64_t MyNi(double);
94     typedef SkNi<N, decltype(MyNi(T()))> Ni;
95 public:
SkNf()96     SkNf() {}
SkNf(T val)97     explicit SkNf(T val) : fLo(val),  fHi(val) {}
Load(const T vals[N])98     static SkNf Load(const T vals[N]) {
99         return SkNf(SkNf<N/2,T>::Load(vals), SkNf<N/2,T>::Load(vals+N/2));
100     }
101 
SkNf(T a,T b)102     SkNf(T a, T b)                               : fLo(a),       fHi(b)       { REQUIRE(N==2); }
SkNf(T a,T b,T c,T d)103     SkNf(T a, T b, T c, T d)                     : fLo(a,b),     fHi(c,d)     { REQUIRE(N==4); }
SkNf(T a,T b,T c,T d,T e,T f,T g,T h)104     SkNf(T a, T b, T c, T d, T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) { REQUIRE(N==8); }
105 
store(T vals[N])106     void store(T vals[N]) const {
107         fLo.store(vals);
108         fHi.store(vals+N/2);
109     }
110 
castTrunc()111     Ni castTrunc() const { return Ni(fLo.castTrunc(), fHi.castTrunc()); }
112 
113     SkNf operator + (const SkNf& o) const { return SkNf(fLo + o.fLo, fHi + o.fHi); }
114     SkNf operator - (const SkNf& o) const { return SkNf(fLo - o.fLo, fHi - o.fHi); }
115     SkNf operator * (const SkNf& o) const { return SkNf(fLo * o.fLo, fHi * o.fHi); }
116     SkNf operator / (const SkNf& o) const { return SkNf(fLo / o.fLo, fHi / o.fHi); }
117 
118     Nb operator == (const SkNf& o) const { return Nb(fLo == o.fLo, fHi == o.fHi); }
119     Nb operator != (const SkNf& o) const { return Nb(fLo != o.fLo, fHi != o.fHi); }
120     Nb operator  < (const SkNf& o) const { return Nb(fLo  < o.fLo, fHi  < o.fHi); }
121     Nb operator  > (const SkNf& o) const { return Nb(fLo  > o.fLo, fHi  > o.fHi); }
122     Nb operator <= (const SkNf& o) const { return Nb(fLo <= o.fLo, fHi <= o.fHi); }
123     Nb operator >= (const SkNf& o) const { return Nb(fLo >= o.fLo, fHi >= o.fHi); }
124 
Min(const SkNf & l,const SkNf & r)125     static SkNf Min(const SkNf& l, const SkNf& r) {
126         return SkNf(SkNf<N/2,T>::Min(l.fLo, r.fLo), SkNf<N/2,T>::Min(l.fHi, r.fHi));
127     }
Max(const SkNf & l,const SkNf & r)128     static SkNf Max(const SkNf& l, const SkNf& r) {
129         return SkNf(SkNf<N/2,T>::Max(l.fLo, r.fLo), SkNf<N/2,T>::Max(l.fHi, r.fHi));
130     }
131 
sqrt()132     SkNf  sqrt() const { return SkNf(fLo. sqrt(), fHi. sqrt()); }
133 
134     // Generally, increasing precision, increasing cost.
rsqrt0()135     SkNf rsqrt0() const { return SkNf(fLo.rsqrt0(), fHi.rsqrt0()); }
rsqrt1()136     SkNf rsqrt1() const { return SkNf(fLo.rsqrt1(), fHi.rsqrt1()); }
rsqrt2()137     SkNf rsqrt2() const { return SkNf(fLo.rsqrt2(), fHi.rsqrt2()); }
138 
invert()139     SkNf       invert() const { return SkNf(fLo.      invert(), fHi.      invert()); }
approxInvert()140     SkNf approxInvert() const { return SkNf(fLo.approxInvert(), fHi.approxInvert()); }
141 
kth()142     template <int k> T kth() const {
143         SkASSERT(0 <= k && k < N);
144         return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>();
145     }
146 
147 protected:
148     REQUIRE(0 == (N & (N-1)));
SkNf(const SkNf<N/2,T> & lo,const SkNf<N/2,T> & hi)149     SkNf(const SkNf<N/2, T>& lo, const SkNf<N/2, T>& hi) : fLo(lo), fHi(hi) {}
150 
151     SkNf<N/2, T> fLo, fHi;
152 };
153 
154 
155 // Bottom out the default implementations with scalars when nothing's been specialized.
156 
157 template <int Bytes>
158 class SkNb<1, Bytes> {
159 public:
SkNb()160     SkNb() {}
SkNb(bool val)161     explicit SkNb(bool val) : fVal(val) {}
allTrue()162     bool allTrue() const { return fVal; }
anyTrue()163     bool anyTrue() const { return fVal; }
164 protected:
165     bool fVal;
166 };
167 
168 template <typename T>
169 class SkNi<1,T> {
170 public:
SkNi()171     SkNi() {}
SkNi(T val)172     explicit SkNi(T val) : fVal(val) {}
Load(const T vals[1])173     static SkNi Load(const T vals[1]) { return SkNi(vals[0]); }
174 
store(T vals[1])175     void store(T vals[1]) const { vals[0] = fVal; }
176 
saturatedAdd(const SkNi & o)177     SkNi saturatedAdd(const SkNi& o) const {
178         SkASSERT((T)(~0) > 0); // TODO: support signed T
179         T sum = fVal + o.fVal;
180         return SkNi(sum > fVal ? sum : (T)(~0));
181     }
182 
183     SkNi operator + (const SkNi& o) const { return SkNi(fVal + o.fVal); }
184     SkNi operator - (const SkNi& o) const { return SkNi(fVal - o.fVal); }
185     SkNi operator * (const SkNi& o) const { return SkNi(fVal * o.fVal); }
186 
187     SkNi operator << (int bits) const { return SkNi(fVal << bits); }
188     SkNi operator >> (int bits) const { return SkNi(fVal >> bits); }
189 
Min(const SkNi & a,const SkNi & b)190     static SkNi Min(const SkNi& a, const SkNi& b) { return SkNi(SkTMin(a.fVal, b.fVal)); }
191 
kth()192     template <int k> T kth() const {
193         SkASSERT(0 == k);
194         return fVal;
195     }
196 
197 protected:
198     T fVal;
199 };
200 
201 template <typename T>
202 class SkNf<1,T> {
203     typedef SkNb<1, sizeof(T)> Nb;
204 
205     static int32_t MyNi(float);
206     static int64_t MyNi(double);
207     typedef SkNi<1, decltype(MyNi(T()))> Ni;
208 public:
SkNf()209     SkNf() {}
SkNf(T val)210     explicit SkNf(T val) : fVal(val) {}
Load(const T vals[1])211     static SkNf Load(const T vals[1]) { return SkNf(vals[0]); }
212 
store(T vals[1])213     void store(T vals[1]) const { vals[0] = fVal; }
214 
castTrunc()215     Ni castTrunc() const { return Ni(fVal); }
216 
217     SkNf operator + (const SkNf& o) const { return SkNf(fVal + o.fVal); }
218     SkNf operator - (const SkNf& o) const { return SkNf(fVal - o.fVal); }
219     SkNf operator * (const SkNf& o) const { return SkNf(fVal * o.fVal); }
220     SkNf operator / (const SkNf& o) const { return SkNf(fVal / o.fVal); }
221 
222     Nb operator == (const SkNf& o) const { return Nb(fVal == o.fVal); }
223     Nb operator != (const SkNf& o) const { return Nb(fVal != o.fVal); }
224     Nb operator  < (const SkNf& o) const { return Nb(fVal  < o.fVal); }
225     Nb operator  > (const SkNf& o) const { return Nb(fVal  > o.fVal); }
226     Nb operator <= (const SkNf& o) const { return Nb(fVal <= o.fVal); }
227     Nb operator >= (const SkNf& o) const { return Nb(fVal >= o.fVal); }
228 
Min(const SkNf & l,const SkNf & r)229     static SkNf Min(const SkNf& l, const SkNf& r) { return SkNf(SkTMin(l.fVal, r.fVal)); }
Max(const SkNf & l,const SkNf & r)230     static SkNf Max(const SkNf& l, const SkNf& r) { return SkNf(SkTMax(l.fVal, r.fVal)); }
231 
sqrt()232     SkNf  sqrt() const { return SkNf(Sqrt(fVal));        }
rsqrt0()233     SkNf rsqrt0() const { return SkNf((T)1 / Sqrt(fVal)); }
rsqrt1()234     SkNf rsqrt1() const { return this->rsqrt0(); }
rsqrt2()235     SkNf rsqrt2() const { return this->rsqrt1(); }
236 
invert()237     SkNf       invert() const { return SkNf((T)1 / fVal); }
approxInvert()238     SkNf approxInvert() const { return this->invert();    }
239 
kth()240     template <int k> T kth() const {
241         SkASSERT(k == 0);
242         return fVal;
243     }
244 
245 protected:
246     // We do double sqrts natively, or via floats for any other type.
247     template <typename U>
Sqrt(U val)248     static U      Sqrt(U      val) { return (U) ::sqrtf((float)val); }
Sqrt(double val)249     static double Sqrt(double val) { return     ::sqrt (       val); }
250 
251     T fVal;
252 };
253 
254 
255 // Generic syntax sugar that should work equally well for all implementations.
256 template <typename T> T operator - (const T& l) { return T(0) - l; }
257 
258 template <typename L, typename R> L& operator += (L& l, const R& r) { return (l = l + r); }
259 template <typename L, typename R> L& operator -= (L& l, const R& r) { return (l = l - r); }
260 template <typename L, typename R> L& operator *= (L& l, const R& r) { return (l = l * r); }
261 template <typename L, typename R> L& operator /= (L& l, const R& r) { return (l = l / r); }
262 
263 template <typename L> L& operator <<= (L& l, int bits) { return (l = l << bits); }
264 template <typename L> L& operator >>= (L& l, int bits) { return (l = l >> bits); }
265 
266 // Include platform specific specializations if available.
267 #ifndef SKNX_NO_SIMD
268     #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
269         #include "../opts/SkNx_sse.h"
270     #elif defined(SK_ARM_HAS_NEON)
271         #include "../opts/SkNx_neon.h"
272     #endif
273 #endif
274 
275 #undef REQUIRE
276 
277 typedef SkNf<2,    float> Sk2f;
278 typedef SkNf<2,   double> Sk2d;
279 typedef SkNf<2, SkScalar> Sk2s;
280 
281 typedef SkNf<4,    float> Sk4f;
282 typedef SkNf<4,   double> Sk4d;
283 typedef SkNf<4, SkScalar> Sk4s;
284 
285 typedef SkNi<4,  uint16_t> Sk4h;
286 typedef SkNi<8,  uint16_t> Sk8h;
287 typedef SkNi<16, uint16_t> Sk16h;
288 
289 typedef SkNi<16, uint8_t> Sk16b;
290 
291 typedef SkNi<4,  int32_t> Sk4i;
292 typedef SkNi<4, uint32_t> Sk4u;
293 
294 #endif//SkNx_DEFINED
295