1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef sw_ShaderCore_hpp
16 #define sw_ShaderCore_hpp
17 
18 #include "Shader.hpp"
19 #include "Reactor/Reactor.hpp"
20 #include "Reactor/Print.hpp"
21 #include "Common/Debug.hpp"
22 
23 namespace sw
24 {
25 	using namespace rr;
26 
27 	class Vector4s
28 	{
29 	public:
30 		Vector4s();
31 		Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
32 		Vector4s(const Vector4s &rhs);
33 
34 		Short4 &operator[](int i);
35 		Vector4s &operator=(const Vector4s &rhs);
36 
37 		Short4 x;
38 		Short4 y;
39 		Short4 z;
40 		Short4 w;
41 	};
42 
43 	class Vector4f
44 	{
45 	public:
46 		Vector4f();
47 		Vector4f(float x, float y, float z, float w);
48 		Vector4f(const Vector4f &rhs);
49 
50 		Float4 &operator[](int i);
51 		Vector4f &operator=(const Vector4f &rhs);
52 
53 		Float4 x;
54 		Float4 y;
55 		Float4 z;
56 		Float4 w;
57 	};
58 
59 	Float4 exponential2(RValue<Float4> x, bool pp = false);
60 	Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false);
61 	Float4 exponential(RValue<Float4> x, bool pp = false);
62 	Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false);
63 	Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false);
64 	Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false);
65 	Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
66 	Float4 modulo(RValue<Float4> x, RValue<Float4> y);
67 	Float4 sine_pi(RValue<Float4> x, bool pp = false);     // limited to [-pi, pi] range
68 	Float4 cosine_pi(RValue<Float4> x, bool pp = false);   // limited to [-pi, pi] range
69 	Float4 sine(RValue<Float4> x, bool pp = false);
70 	Float4 cosine(RValue<Float4> x, bool pp = false);
71 	Float4 tangent(RValue<Float4> x, bool pp = false);
72 	Float4 arccos(RValue<Float4> x, bool pp = false);
73 	Float4 arcsin(RValue<Float4> x, bool pp = false);
74 	Float4 arctan(RValue<Float4> x, bool pp = false);
75 	Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false);
76 	Float4 sineh(RValue<Float4> x, bool pp = false);
77 	Float4 cosineh(RValue<Float4> x, bool pp = false);
78 	Float4 tangenth(RValue<Float4> x, bool pp = false);
79 	Float4 arccosh(RValue<Float4> x, bool pp = false);  // Limited to x >= 1
80 	Float4 arcsinh(RValue<Float4> x, bool pp = false);
81 	Float4 arctanh(RValue<Float4> x, bool pp = false);  // Limited to ]-1, 1[ range
82 
83 	Float4 dot2(const Vector4f &v0, const Vector4f &v1);
84 	Float4 dot3(const Vector4f &v0, const Vector4f &v1);
85 	Float4 dot4(const Vector4f &v0, const Vector4f &v1);
86 
87 	void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
88 	void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
89 	void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
90 	void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
91 	void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
92 	void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
93 	void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
94 	void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
95 
96 	class Register
97 	{
98 	public:
Register(const Reference<Float4> & x,const Reference<Float4> & y,const Reference<Float4> & z,const Reference<Float4> & w)99 		Register(const Reference<Float4> &x, const Reference<Float4> &y, const Reference<Float4> &z, const Reference<Float4> &w) : x(x), y(y), z(z), w(w)
100 		{
101 		}
102 
operator [](int i)103 		Reference<Float4> &operator[](int i)
104 		{
105 			switch(i)
106 			{
107 			default:
108 			case 0: return x;
109 			case 1: return y;
110 			case 2: return z;
111 			case 3: return w;
112 			}
113 		}
114 
operator =(const Register & rhs)115 		Register &operator=(const Register &rhs)
116 		{
117 			x = rhs.x;
118 			y = rhs.y;
119 			z = rhs.z;
120 			w = rhs.w;
121 
122 			return *this;
123 		}
124 
operator =(const Vector4f & rhs)125 		Register &operator=(const Vector4f &rhs)
126 		{
127 			x = rhs.x;
128 			y = rhs.y;
129 			z = rhs.z;
130 			w = rhs.w;
131 
132 			return *this;
133 		}
134 
operator Vector4f()135 		operator Vector4f()
136 		{
137 			Vector4f v;
138 
139 			v.x = x;
140 			v.y = y;
141 			v.z = z;
142 			v.w = w;
143 
144 			return v;
145 		}
146 
147 		Reference<Float4> x;
148 		Reference<Float4> y;
149 		Reference<Float4> z;
150 		Reference<Float4> w;
151 	};
152 
153 	class RegisterFile
154 	{
155 	public:
RegisterFile(int size,bool indirectAddressable)156 		RegisterFile(int size, bool indirectAddressable) : size(size), indirectAddressable(indirectAddressable)
157 		{
158 			if(indirectAddressable)
159 			{
160 				x = new Array<Float4>(size);
161 				y = new Array<Float4>(size);
162 				z = new Array<Float4>(size);
163 				w = new Array<Float4>(size);
164 			}
165 			else
166 			{
167 				x = new Array<Float4>[size];
168 				y = new Array<Float4>[size];
169 				z = new Array<Float4>[size];
170 				w = new Array<Float4>[size];
171 			}
172 		}
173 
~RegisterFile()174 		~RegisterFile()
175 		{
176 			if(indirectAddressable)
177 			{
178 				delete x;
179 				delete y;
180 				delete z;
181 				delete w;
182 			}
183 			else
184 			{
185 				delete[] x;
186 				delete[] y;
187 				delete[] z;
188 				delete[] w;
189 			}
190 		}
191 
operator [](int i)192 		Register operator[](int i)
193 		{
194 			ASSERT(i < size);
195 			if(indirectAddressable)
196 			{
197 				return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
198 			}
199 			else
200 			{
201 				return Register(x[i][0], y[i][0], z[i][0], w[i][0]);
202 			}
203 		}
204 
operator [](RValue<Int> i)205 		Register operator[](RValue<Int> i)
206 		{
207 			ASSERT(indirectAddressable);
208 
209 			return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
210 		}
211 
212 		const Vector4f operator[](RValue<Int4> i);   // Gather operation (read only).
213 
214 		void scatter_x(Int4 i, RValue<Float4> r);
215 		void scatter_y(Int4 i, RValue<Float4> r);
216 		void scatter_z(Int4 i, RValue<Float4> r);
217 		void scatter_w(Int4 i, RValue<Float4> r);
218 
219 	protected:
220 		const int size;
221 		const bool indirectAddressable;
222 		Array<Float4> *x;
223 		Array<Float4> *y;
224 		Array<Float4> *z;
225 		Array<Float4> *w;
226 	};
227 
228 	template<int S, bool I = false>
229 	class RegisterArray : public RegisterFile
230 	{
231 	public:
RegisterArray(bool indirectAddressable=I)232 		RegisterArray(bool indirectAddressable = I) : RegisterFile(S, indirectAddressable)
233 		{
234 		}
235 	};
236 
237 	class ShaderCore
238 	{
239 		typedef Shader::Control Control;
240 
241 	public:
242 		void mov(Vector4f &dst, const Vector4f &src, bool integerDestination = false);
243 		void neg(Vector4f &dst, const Vector4f &src);
244 		void ineg(Vector4f &dst, const Vector4f &src);
245 		void f2b(Vector4f &dst, const Vector4f &src);
246 		void b2f(Vector4f &dst, const Vector4f &src);
247 		void f2i(Vector4f &dst, const Vector4f &src);
248 		void i2f(Vector4f &dst, const Vector4f &src);
249 		void f2u(Vector4f &dst, const Vector4f &src);
250 		void u2f(Vector4f &dst, const Vector4f &src);
251 		void i2b(Vector4f &dst, const Vector4f &src);
252 		void b2i(Vector4f &dst, const Vector4f &src);
253 		void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
254 		void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
255 		void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
256 		void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
257 		void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
258 		void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
259 		void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
260 		void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
261 		void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false);
262 		void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
263 		void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
264 		void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
265 		void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
266 		void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
267 		void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
268 		void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
269 		void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
270 		void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
271 		void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false);
272 		void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false);
273 		void rsq(Vector4f &dst, const Vector4f &src, bool pp = false);
274 		void len2(Float4 &dst, const Vector4f &src, bool pp = false);
275 		void len3(Float4 &dst, const Vector4f &src, bool pp = false);
276 		void len4(Float4 &dst, const Vector4f &src, bool pp = false);
277 		void dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
278 		void dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
279 		void dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
280 		void dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
281 		void dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
282 		void dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
283 		void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
284 		void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
285 		void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
286 		void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
287 		void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
288 		void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3);
289 		void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
290 		void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
291 		void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
292 		void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
293 		void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
294 		void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
295 		void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
296 		void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
297 		void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false);
298 		void exp2(Vector4f &dst, const Vector4f &src, bool pp = false);
299 		void exp(Vector4f &dst, const Vector4f &src, bool pp = false);
300 		void log2x(Vector4f &dst, const Vector4f &src, bool pp = false);
301 		void log2(Vector4f &dst, const Vector4f &src, bool pp = false);
302 		void log(Vector4f &dst, const Vector4f &src, bool pp = false);
303 		void lit(Vector4f &dst, const Vector4f &src);
304 		void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
305 		void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
306 		void isinf(Vector4f &dst, const Vector4f &src);
307 		void isnan(Vector4f &dst, const Vector4f &src);
308 		void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
309 		void packHalf2x16(Vector4f &dst, const Vector4f &src);
310 		void unpackHalf2x16(Vector4f &dst, const Vector4f &src);
311 		void packSnorm2x16(Vector4f &dst, const Vector4f &src);
312 		void packUnorm2x16(Vector4f &dst, const Vector4f &src);
313 		void unpackSnorm2x16(Vector4f &dst, const Vector4f &src);
314 		void unpackUnorm2x16(Vector4f &dst, const Vector4f &src);
315 		void frc(Vector4f &dst, const Vector4f &src);
316 		void trunc(Vector4f &dst, const Vector4f &src);
317 		void floor(Vector4f &dst, const Vector4f &src);
318 		void round(Vector4f &dst, const Vector4f &src);
319 		void roundEven(Vector4f &dst, const Vector4f &src);
320 		void ceil(Vector4f &dst, const Vector4f &src);
321 		void powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
322 		void pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
323 		void crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
324 		void forward1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
325 		void forward2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
326 		void forward3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
327 		void forward4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
328 		void reflect1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
329 		void reflect2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
330 		void reflect3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
331 		void reflect4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
332 		void refract1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
333 		void refract2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
334 		void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
335 		void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
336 		void sgn(Vector4f &dst, const Vector4f &src);
337 		void isgn(Vector4f &dst, const Vector4f &src);
338 		void abs(Vector4f &dst, const Vector4f &src);
339 		void iabs(Vector4f &dst, const Vector4f &src);
340 		void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false);
341 		void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false);
342 		void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false);
343 		void sincos(Vector4f &dst, const Vector4f &src, bool pp = false);
344 		void cos(Vector4f &dst, const Vector4f &src, bool pp = false);
345 		void sin(Vector4f &dst, const Vector4f &src, bool pp = false);
346 		void tan(Vector4f &dst, const Vector4f &src, bool pp = false);
347 		void acos(Vector4f &dst, const Vector4f &src, bool pp = false);
348 		void asin(Vector4f &dst, const Vector4f &src, bool pp = false);
349 		void atan(Vector4f &dst, const Vector4f &src, bool pp = false);
350 		void atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
351 		void cosh(Vector4f &dst, const Vector4f &src, bool pp = false);
352 		void sinh(Vector4f &dst, const Vector4f &src, bool pp = false);
353 		void tanh(Vector4f &dst, const Vector4f &src, bool pp = false);
354 		void acosh(Vector4f &dst, const Vector4f &src, bool pp = false);
355 		void asinh(Vector4f &dst, const Vector4f &src, bool pp = false);
356 		void atanh(Vector4f &dst, const Vector4f &src, bool pp = false);
357 		void expp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel);
358 		void logp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel);
359 		void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
360 		void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
361 		void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
362 		void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
363 		void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
364 		void extract(Float4 &dst, const Vector4f &src0, const Float4 &src1);
365 		void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index);
366 		void all(Float4 &dst, const Vector4f &src);
367 		void any(Float4 &dst, const Vector4f &src);
368 		void bitwise_not(Vector4f &dst, const Vector4f &src);
369 		void bitwise_or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
370 		void bitwise_xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
371 		void bitwise_and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
372 		void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
373 		void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
374 
375 	private:
376 		void sgn(Float4 &dst, const Float4 &src);
377 		void isgn(Float4 &dst, const Float4 &src);
378 		void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
379 		void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
380 		void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2);
381 		void floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits);
382 		void halfToFloatBits(Float4& dst, const Float4& halfBits);
383 	};
384 }
385 
386 #ifdef ENABLE_RR_PRINT
387 namespace rr {
388 	template <> struct PrintValue::Ty<sw::Vector4f>
389 	{
fmtrr::PrintValue::Ty390 		static std::string fmt(const sw::Vector4f& v)
391 		{
392 			return "[x: " + PrintValue::fmt(v.x) +
393 			       ", y: " + PrintValue::fmt(v.y) +
394 			       ", z: " + PrintValue::fmt(v.z) +
395 			       ", w: " + PrintValue::fmt(v.w) + "]";
396 		}
397 
valrr::PrintValue::Ty398 		static std::vector<rr::Value*> val(const sw::Vector4f& v)
399 		{
400 			return PrintValue::vals(v.x, v.y, v.z, v.w);
401 		}
402 	};
403 	template <> struct PrintValue::Ty<sw::Vector4s>
404 	{
fmtrr::PrintValue::Ty405 		static std::string fmt(const sw::Vector4s& v)
406 		{
407 			return "[x: " + PrintValue::fmt(v.x) +
408 			       ", y: " + PrintValue::fmt(v.y) +
409 			       ", z: " + PrintValue::fmt(v.z) +
410 			       ", w: " + PrintValue::fmt(v.w) + "]";
411 		}
412 
valrr::PrintValue::Ty413 		static std::vector<rr::Value*> val(const sw::Vector4s& v)
414 		{
415 			return PrintValue::vals(v.x, v.y, v.z, v.w);
416 		}
417 	};
418 }
419 #endif // ENABLE_RR_PRINT
420 
421 #endif   // sw_ShaderCore_hpp
422