1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2020 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /*
19  * This module implements a variety of mathematical data types and library
20  * functions used by the codec.
21  */
22 
23 #ifndef ASTC_MATHLIB_H_INCLUDED
24 #define ASTC_MATHLIB_H_INCLUDED
25 
26 #include <cmath>
27 #include <cstdint>
28 
29 #ifndef M_PI
30 	#define M_PI 3.14159265358979323846
31 #endif
32 
33 /* ============================================================================
34   Fast math library; note that many of the higher-order functions in this set
35   use approximations which are less accurate, but faster, than <cmath> standard
36   library equivalents.
37 
38   Note: Many of these are not necessarily faster than simple C versions when
39   used on a single scalar value, but are included for testing purposes as most
40   have an option based on SSE intrinsics and therefore provide an obvious route
41   to future vectorization.
42 ============================================================================ */
43 
44 // We support scalar versions of many maths functions which use SSE intrinsics
45 // as an "optimized" path, using just one lane from the SIMD hardware. In
46 // reality these are often slower than standard C due to setup and scheduling
47 // overheads, and the fact that we're not offsetting that cost with any actual
48 // vectorization.
49 //
50 // These variants are only included as a means to test that the accuracy of an
51 // SSE implementation would be acceptable before refactoring code paths to use
52 // an actual vectorized implementation which gets some advantage from SSE. It
53 // is therefore expected that the code will go *slower* with this macro
54 // set to 1 ...
55 #define USE_SCALAR_SSE 0
56 
57 // These are namespaced to avoid colliding with C standard library functions.
58 namespace astc
59 {
60 
61 /**
62  * @brief Test if a float value is a nan.
63  *
64  * @param val The value test.
65  *
66  * @return Zero is not a NaN, non-zero otherwise.
67  */
isnan(float val)68 static inline int isnan(float val)
69 {
70 	return val != val;
71 }
72 
73 /**
74  * @brief Initialize the seed structure for a random number generator.
75  *
76  * Important note: For the purposes of ASTC we want sets of random numbers to
77  * use the codec, but we want the same seed value across instances and threads
78  * to ensure that image output is stable across compressor runs and across
79  * platforms. Every PRNG created by this call will therefore return the same
80  * sequence of values ...
81  *
82  * @param state The state structure to initialize.
83  */
84 void rand_init(uint64_t state[2]);
85 
86 /**
87  * @brief Return the next random number from the generator.
88  *
89  * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
90  * public-domain implementation given by David Blackman & Sebastiano Vigna at
91  * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
92  *
93  * @param state The state structure to use/update.
94  */
95 uint64_t rand(uint64_t state[2]);
96 
97 }
98 
99 /* ============================================================================
100   Utility vector template classes with basic operations
101 ============================================================================ */
102 
103 template <typename T> class vtype4
104 {
105 public:
106 	T x, y, z, w;
vtype4()107 	vtype4() {}
vtype4(T p,T q,T r,T s)108 	vtype4(T p, T q, T r, T s) : x(p),   y(q),   z(r),   w(s)   {}
vtype4(const vtype4 & p)109 	vtype4(const vtype4 & p)   : x(p.x), y(p.y), z(p.z), w(p.w) {}
110 	vtype4 &operator =(const vtype4 &s) {
111 		this->x = s.x;
112 		this->y = s.y;
113 		this->z = s.z;
114 		this->w = s.w;
115 		return *this;
116 	}
117 };
118 
119 typedef vtype4<int>          int4;
120 typedef vtype4<unsigned int> uint4;
121 
122 static inline int4    operator+(int4 p,    int4 q)     { return int4(    p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); }
123 static inline uint4   operator+(uint4 p,   uint4 q)    { return uint4(   p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); }
124 
125 static inline int4    operator-(int4 p,    int4 q)     { return int4(    p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); }
126 static inline uint4   operator-(uint4 p,   uint4 q)    { return uint4(   p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); }
127 
128 static inline int4    operator*(int4 p,    int4 q)     { return int4(    p.x * q.x, p.y * q.y, p.z * q.z, p.w * q.w ); }
129 static inline uint4   operator*(uint4 p,   uint4 q)    { return uint4(   p.x * q.x, p.y * q.y, p.z * q.z, p.w * q.w ); }
130 
131 static inline int4    operator*(int4 p,    int q)      { return int4(    p.x * q, p.y * q, p.z * q, p.w * q ); }
132 static inline uint4   operator*(uint4 p,   uint32_t q) { return uint4(   p.x * q, p.y * q, p.z * q, p.w * q ); }
133 
134 static inline int4    operator*(int p,      int4 q)    { return q * p; }
135 static inline uint4   operator*(uint32_t p, uint4 q)   { return q * p; }
136 
137 #ifndef MIN
138 	#define MIN(x,y) ((x)<(y)?(x):(y))
139 #endif
140 
141 #ifndef MAX
142 	#define MAX(x,y) ((x)>(y)?(x):(y))
143 #endif
144 
145 /* ============================================================================
146   Softfloat library with fp32 and fp16 conversion functionality.
147 ============================================================================ */
148 typedef union if32_
149 {
150 	uint32_t u;
151 	int32_t s;
152 	float f;
153 } if32;
154 
155 uint32_t clz32(uint32_t p);
156 
157 /*	sized soft-float types. These are mapped to the sized integer
158     types of C99, instead of C's floating-point types; this is because
159     the library needs to maintain exact, bit-level control on all
160     operations on these data types. */
161 typedef uint16_t sf16;
162 typedef uint32_t sf32;
163 
164 /* widening float->float conversions */
165 sf32 sf16_to_sf32(sf16);
166 
167 float sf16_to_float(sf16);
168 
169 #endif
170