1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef sw_ShaderCore_hpp
16 #define sw_ShaderCore_hpp
17 
18 #include "Debug.hpp"
19 #include "Shader.hpp"
20 #include "Reactor/Reactor.hpp"
21 
22 namespace sw
23 {
24 	class Vector4s
25 	{
26 	public:
27 		Vector4s();
28 		Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
29 		Vector4s(const Vector4s &rhs);
30 
31 		Short4 &operator[](int i);
32 		Vector4s &operator=(const Vector4s &rhs);
33 
34 		Short4 x;
35 		Short4 y;
36 		Short4 z;
37 		Short4 w;
38 	};
39 
40 	class Vector4i
41 	{
42 	public:
43 		Vector4i();
44 		Vector4i(int x, int y, int z, int w);
45 		Vector4i(const Vector4i &rhs);
46 
47 		Int4 &operator[](int i);
48 		Vector4i &operator=(const Vector4i &rhs);
49 
50 		Int4 x;
51 		Int4 y;
52 		Int4 z;
53 		Int4 w;
54 	};
55 
56 	class Vector4u
57 	{
58 	public:
59 		Vector4u();
60 		Vector4u(unsigned int x, unsigned int y, unsigned int z, unsigned int w);
61 		Vector4u(const Vector4u &rhs);
62 
63 		UInt4 &operator[](int i);
64 		Vector4u &operator=(const Vector4u &rhs);
65 
66 		UInt4 x;
67 		UInt4 y;
68 		UInt4 z;
69 		UInt4 w;
70 	};
71 
72 	class Vector4f
73 	{
74 	public:
75 		Vector4f();
76 		Vector4f(float x, float y, float z, float w);
77 		Vector4f(const Vector4f &rhs);
78 
79 		Float4 &operator[](int i);
80 		Vector4f &operator=(const Vector4f &rhs);
81 
82 		Float4 x;
83 		Float4 y;
84 		Float4 z;
85 		Float4 w;
86 	};
87 
88 	Float4 exponential2(RValue<Float4> x, bool pp = false);
89 	Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false);
90 	Float4 exponential(RValue<Float4> x, bool pp = false);
91 	Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false);
92 	Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false);
93 	Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false);
94 	Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
95 	Float4 modulo(RValue<Float4> x, RValue<Float4> y);
96 	Float4 sine_pi(RValue<Float4> x, bool pp = false);     // limited to [-pi, pi] range
97 	Float4 cosine_pi(RValue<Float4> x, bool pp = false);   // limited to [-pi, pi] range
98 	Float4 sine(RValue<Float4> x, bool pp = false);
99 	Float4 cosine(RValue<Float4> x, bool pp = false);
100 	Float4 tangent(RValue<Float4> x, bool pp = false);
101 	Float4 arccos(RValue<Float4> x, bool pp = false);
102 	Float4 arcsin(RValue<Float4> x, bool pp = false);
103 	Float4 arctan(RValue<Float4> x, bool pp = false);
104 	Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false);
105 	Float4 sineh(RValue<Float4> x, bool pp = false);
106 	Float4 cosineh(RValue<Float4> x, bool pp = false);
107 	Float4 tangenth(RValue<Float4> x, bool pp = false);
108 	Float4 arccosh(RValue<Float4> x, bool pp = false);  // Limited to x >= 1
109 	Float4 arcsinh(RValue<Float4> x, bool pp = false);
110 	Float4 arctanh(RValue<Float4> x, bool pp = false);  // Limited to ]-1, 1[ range
111 
112 	Float4 dot2(const Vector4f &v0, const Vector4f &v1);
113 	Float4 dot3(const Vector4f &v0, const Vector4f &v1);
114 	Float4 dot4(const Vector4f &v0, const Vector4f &v1);
115 
116 	void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
117 	void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
118 	void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
119 	void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
120 	void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
121 	void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
122 	void transpose2x4h(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
123 	void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
124 
125 	class Register
126 	{
127 	public:
Register(const Reference<Float4> & x,const Reference<Float4> & y,const Reference<Float4> & z,const Reference<Float4> & w)128 		Register(const Reference<Float4> &x, const Reference<Float4> &y, const Reference<Float4> &z, const Reference<Float4> &w) : x(x), y(y), z(z), w(w)
129 		{
130 		}
131 
operator [](int i)132 		Reference<Float4> &operator[](int i)
133 		{
134 			switch(i)
135 			{
136 			default:
137 			case 0: return x;
138 			case 1: return y;
139 			case 2: return z;
140 			case 3: return w;
141 			}
142 		}
143 
operator =(const Register & rhs)144 		Register &operator=(const Register &rhs)
145 		{
146 			x = rhs.x;
147 			y = rhs.y;
148 			z = rhs.z;
149 			w = rhs.w;
150 
151 			return *this;
152 		}
153 
operator =(const Vector4f & rhs)154 		Register &operator=(const Vector4f &rhs)
155 		{
156 			x = rhs.x;
157 			y = rhs.y;
158 			z = rhs.z;
159 			w = rhs.w;
160 
161 			return *this;
162 		}
163 
operator Vector4f()164 		operator Vector4f()
165 		{
166 			Vector4f v;
167 
168 			v.x = x;
169 			v.y = y;
170 			v.z = z;
171 			v.w = w;
172 
173 			return v;
174 		}
175 
176 		Reference<Float4> x;
177 		Reference<Float4> y;
178 		Reference<Float4> z;
179 		Reference<Float4> w;
180 	};
181 
182 	template<int S, bool D = false>
183 	class RegisterArray
184 	{
185 	public:
RegisterArray(bool dynamic=D)186 		RegisterArray(bool dynamic = D) : dynamic(dynamic)
187 		{
188 			if(dynamic)
189 			{
190 				x = new Array<Float4>(S);
191 				y = new Array<Float4>(S);
192 				z = new Array<Float4>(S);
193 				w = new Array<Float4>(S);
194 			}
195 			else
196 			{
197 				x = new Array<Float4>[S];
198 				y = new Array<Float4>[S];
199 				z = new Array<Float4>[S];
200 				w = new Array<Float4>[S];
201 			}
202 		}
203 
~RegisterArray()204 		~RegisterArray()
205 		{
206 			if(dynamic)
207 			{
208 				delete x;
209 				delete y;
210 				delete z;
211 				delete w;
212 			}
213 			else
214 			{
215 				delete[] x;
216 				delete[] y;
217 				delete[] z;
218 				delete[] w;
219 			}
220 		}
221 
operator [](int i)222 		Register operator[](int i)
223 		{
224 			if(dynamic)
225 			{
226 				return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
227 			}
228 			else
229 			{
230 				return Register(x[i][0], y[i][0], z[i][0], w[i][0]);
231 			}
232 		}
233 
operator [](RValue<Int> i)234 		Register operator[](RValue<Int> i)
235 		{
236 			ASSERT(dynamic);
237 
238 			return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
239 		}
240 
241 	private:
242 		const bool dynamic;
243 		Array<Float4> *x;
244 		Array<Float4> *y;
245 		Array<Float4> *z;
246 		Array<Float4> *w;
247 	};
248 
249 	class ShaderCore
250 	{
251 		typedef Shader::Control Control;
252 
253 	public:
254 		void mov(Vector4f &dst, const Vector4f &src, bool integerDestination = false);
255 		void neg(Vector4f &dst, const Vector4f &src);
256 		void ineg(Vector4f &dst, const Vector4f &src);
257 		void f2b(Vector4f &dst, const Vector4f &src);
258 		void b2f(Vector4f &dst, const Vector4f &src);
259 		void f2i(Vector4f &dst, const Vector4f &src);
260 		void i2f(Vector4f &dst, const Vector4f &src);
261 		void f2u(Vector4f &dst, const Vector4f &src);
262 		void u2f(Vector4f &dst, const Vector4f &src);
263 		void i2b(Vector4f &dst, const Vector4f &src);
264 		void b2i(Vector4f &dst, const Vector4f &src);
265 		void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
266 		void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
267 		void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
268 		void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
269 		void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
270 		void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
271 		void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
272 		void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
273 		void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false);
274 		void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
275 		void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
276 		void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
277 		void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
278 		void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
279 		void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
280 		void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
281 		void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
282 		void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
283 		void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false);
284 		void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false);
285 		void rsq(Vector4f &dst, const Vector4f &src, bool pp = false);
286 		void len2(Float4 &dst, const Vector4f &src, bool pp = false);
287 		void len3(Float4 &dst, const Vector4f &src, bool pp = false);
288 		void len4(Float4 &dst, const Vector4f &src, bool pp = false);
289 		void dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
290 		void dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
291 		void dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
292 		void dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
293 		void dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
294 		void dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
295 		void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
296 		void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
297 		void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
298 		void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
299 		void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
300 		void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3);
301 		void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
302 		void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
303 		void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
304 		void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
305 		void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
306 		void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
307 		void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
308 		void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
309 		void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false);
310 		void exp2(Vector4f &dst, const Vector4f &src, bool pp = false);
311 		void exp(Vector4f &dst, const Vector4f &src, bool pp = false);
312 		void log2x(Vector4f &dst, const Vector4f &src, bool pp = false);
313 		void log2(Vector4f &dst, const Vector4f &src, bool pp = false);
314 		void log(Vector4f &dst, const Vector4f &src, bool pp = false);
315 		void lit(Vector4f &dst, const Vector4f &src);
316 		void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
317 		void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
318 		void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
319 		void packHalf2x16(Vector4f &dst, const Vector4f &src);
320 		void unpackHalf2x16(Vector4f &dst, const Vector4f &src);
321 		void packSnorm2x16(Vector4f &dst, const Vector4f &src);
322 		void packUnorm2x16(Vector4f &dst, const Vector4f &src);
323 		void unpackSnorm2x16(Vector4f &dst, const Vector4f &src);
324 		void unpackUnorm2x16(Vector4f &dst, const Vector4f &src);
325 		void frc(Vector4f &dst, const Vector4f &src);
326 		void trunc(Vector4f &dst, const Vector4f &src);
327 		void floor(Vector4f &dst, const Vector4f &src);
328 		void round(Vector4f &dst, const Vector4f &src);
329 		void roundEven(Vector4f &dst, const Vector4f &src);
330 		void ceil(Vector4f &dst, const Vector4f &src);
331 		void powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
332 		void pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
333 		void crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
334 		void forward1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
335 		void forward2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
336 		void forward3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
337 		void forward4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
338 		void reflect1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
339 		void reflect2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
340 		void reflect3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
341 		void reflect4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
342 		void refract1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
343 		void refract2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
344 		void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
345 		void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
346 		void sgn(Vector4f &dst, const Vector4f &src);
347 		void isgn(Vector4f &dst, const Vector4f &src);
348 		void abs(Vector4f &dst, const Vector4f &src);
349 		void iabs(Vector4f &dst, const Vector4f &src);
350 		void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false);
351 		void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false);
352 		void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false);
353 		void sincos(Vector4f &dst, const Vector4f &src, bool pp = false);
354 		void cos(Vector4f &dst, const Vector4f &src, bool pp = false);
355 		void sin(Vector4f &dst, const Vector4f &src, bool pp = false);
356 		void tan(Vector4f &dst, const Vector4f &src, bool pp = false);
357 		void acos(Vector4f &dst, const Vector4f &src, bool pp = false);
358 		void asin(Vector4f &dst, const Vector4f &src, bool pp = false);
359 		void atan(Vector4f &dst, const Vector4f &src, bool pp = false);
360 		void atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
361 		void cosh(Vector4f &dst, const Vector4f &src, bool pp = false);
362 		void sinh(Vector4f &dst, const Vector4f &src, bool pp = false);
363 		void tanh(Vector4f &dst, const Vector4f &src, bool pp = false);
364 		void acosh(Vector4f &dst, const Vector4f &src, bool pp = false);
365 		void asinh(Vector4f &dst, const Vector4f &src, bool pp = false);
366 		void atanh(Vector4f &dst, const Vector4f &src, bool pp = false);
367 		void expp(Vector4f &dst, const Vector4f &src, unsigned short version);
368 		void logp(Vector4f &dst, const Vector4f &src, unsigned short version);
369 		void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
370 		void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
371 		void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
372 		void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
373 		void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
374 		void extract(Float4 &dst, const Vector4f &src0, const Float4 &src1);
375 		void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index);
376 		void all(Float4 &dst, const Vector4f &src);
377 		void any(Float4 &dst, const Vector4f &src);
378 		void not(Vector4f &dst, const Vector4f &src);
379 		void or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
380 		void xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
381 		void and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
382 		void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
383 		void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
384 
385 	private:
386 		void sgn(Float4 &dst, const Float4 &src);
387 		void isgn(Float4 &dst, const Float4 &src);
388 		void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
389 		void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
390 		void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2);
391 		void floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits);
392 		void halfToFloatBits(Float4& dst, const Float4& halfBits);
393 	};
394 }
395 
396 #endif   // sw_ShaderCore_hpp
397