1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef sw_ShaderCore_hpp 16 #define sw_ShaderCore_hpp 17 18 #include "Debug.hpp" 19 #include "Shader.hpp" 20 #include "Reactor/Reactor.hpp" 21 22 namespace sw 23 { 24 class Vector4s 25 { 26 public: 27 Vector4s(); 28 Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w); 29 Vector4s(const Vector4s &rhs); 30 31 Short4 &operator[](int i); 32 Vector4s &operator=(const Vector4s &rhs); 33 34 Short4 x; 35 Short4 y; 36 Short4 z; 37 Short4 w; 38 }; 39 40 class Vector4i 41 { 42 public: 43 Vector4i(); 44 Vector4i(int x, int y, int z, int w); 45 Vector4i(const Vector4i &rhs); 46 47 Int4 &operator[](int i); 48 Vector4i &operator=(const Vector4i &rhs); 49 50 Int4 x; 51 Int4 y; 52 Int4 z; 53 Int4 w; 54 }; 55 56 class Vector4u 57 { 58 public: 59 Vector4u(); 60 Vector4u(unsigned int x, unsigned int y, unsigned int z, unsigned int w); 61 Vector4u(const Vector4u &rhs); 62 63 UInt4 &operator[](int i); 64 Vector4u &operator=(const Vector4u &rhs); 65 66 UInt4 x; 67 UInt4 y; 68 UInt4 z; 69 UInt4 w; 70 }; 71 72 class Vector4f 73 { 74 public: 75 Vector4f(); 76 Vector4f(float x, float y, float z, float w); 77 Vector4f(const Vector4f &rhs); 78 79 Float4 &operator[](int i); 80 Vector4f &operator=(const Vector4f &rhs); 81 82 Float4 x; 83 Float4 y; 84 Float4 z; 85 Float4 w; 86 }; 87 88 Float4 exponential2(RValue<Float4> x, bool pp = false); 89 Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false); 90 Float4 exponential(RValue<Float4> x, bool pp = false); 91 Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false); 92 Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false); 93 Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false); 94 Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false); 95 Float4 modulo(RValue<Float4> x, RValue<Float4> y); 96 Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range 97 Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range 98 Float4 sine(RValue<Float4> x, bool pp = false); 99 Float4 cosine(RValue<Float4> x, bool pp = false); 100 Float4 tangent(RValue<Float4> x, bool pp = false); 101 Float4 arccos(RValue<Float4> x, bool pp = false); 102 Float4 arcsin(RValue<Float4> x, bool pp = false); 103 Float4 arctan(RValue<Float4> x, bool pp = false); 104 Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false); 105 Float4 sineh(RValue<Float4> x, bool pp = false); 106 Float4 cosineh(RValue<Float4> x, bool pp = false); 107 Float4 tangenth(RValue<Float4> x, bool pp = false); 108 Float4 arccosh(RValue<Float4> x, bool pp = false); // Limited to x >= 1 109 Float4 arcsinh(RValue<Float4> x, bool pp = false); 110 Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range 111 112 Float4 dot2(const Vector4f &v0, const Vector4f &v1); 113 Float4 dot3(const Vector4f &v0, const Vector4f &v1); 114 Float4 dot4(const Vector4f &v0, const Vector4f &v1); 115 116 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); 117 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 118 void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 119 void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 120 void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 121 void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 122 void transpose2x4h(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 123 void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N); 124 125 class Register 126 { 127 public: Register(const Reference<Float4> & x,const Reference<Float4> & y,const Reference<Float4> & z,const Reference<Float4> & w)128 Register(const Reference<Float4> &x, const Reference<Float4> &y, const Reference<Float4> &z, const Reference<Float4> &w) : x(x), y(y), z(z), w(w) 129 { 130 } 131 operator [](int i)132 Reference<Float4> &operator[](int i) 133 { 134 switch(i) 135 { 136 default: 137 case 0: return x; 138 case 1: return y; 139 case 2: return z; 140 case 3: return w; 141 } 142 } 143 operator =(const Register & rhs)144 Register &operator=(const Register &rhs) 145 { 146 x = rhs.x; 147 y = rhs.y; 148 z = rhs.z; 149 w = rhs.w; 150 151 return *this; 152 } 153 operator =(const Vector4f & rhs)154 Register &operator=(const Vector4f &rhs) 155 { 156 x = rhs.x; 157 y = rhs.y; 158 z = rhs.z; 159 w = rhs.w; 160 161 return *this; 162 } 163 operator Vector4f()164 operator Vector4f() 165 { 166 Vector4f v; 167 168 v.x = x; 169 v.y = y; 170 v.z = z; 171 v.w = w; 172 173 return v; 174 } 175 176 Reference<Float4> x; 177 Reference<Float4> y; 178 Reference<Float4> z; 179 Reference<Float4> w; 180 }; 181 182 template<int S, bool D = false> 183 class RegisterArray 184 { 185 public: RegisterArray(bool dynamic=D)186 RegisterArray(bool dynamic = D) : dynamic(dynamic) 187 { 188 if(dynamic) 189 { 190 x = new Array<Float4>(S); 191 y = new Array<Float4>(S); 192 z = new Array<Float4>(S); 193 w = new Array<Float4>(S); 194 } 195 else 196 { 197 x = new Array<Float4>[S]; 198 y = new Array<Float4>[S]; 199 z = new Array<Float4>[S]; 200 w = new Array<Float4>[S]; 201 } 202 } 203 ~RegisterArray()204 ~RegisterArray() 205 { 206 if(dynamic) 207 { 208 delete x; 209 delete y; 210 delete z; 211 delete w; 212 } 213 else 214 { 215 delete[] x; 216 delete[] y; 217 delete[] z; 218 delete[] w; 219 } 220 } 221 operator [](int i)222 Register operator[](int i) 223 { 224 if(dynamic) 225 { 226 return Register(x[0][i], y[0][i], z[0][i], w[0][i]); 227 } 228 else 229 { 230 return Register(x[i][0], y[i][0], z[i][0], w[i][0]); 231 } 232 } 233 operator [](RValue<Int> i)234 Register operator[](RValue<Int> i) 235 { 236 ASSERT(dynamic); 237 238 return Register(x[0][i], y[0][i], z[0][i], w[0][i]); 239 } 240 241 private: 242 const bool dynamic; 243 Array<Float4> *x; 244 Array<Float4> *y; 245 Array<Float4> *z; 246 Array<Float4> *w; 247 }; 248 249 class ShaderCore 250 { 251 typedef Shader::Control Control; 252 253 public: 254 void mov(Vector4f &dst, const Vector4f &src, bool integerDestination = false); 255 void neg(Vector4f &dst, const Vector4f &src); 256 void ineg(Vector4f &dst, const Vector4f &src); 257 void f2b(Vector4f &dst, const Vector4f &src); 258 void b2f(Vector4f &dst, const Vector4f &src); 259 void f2i(Vector4f &dst, const Vector4f &src); 260 void i2f(Vector4f &dst, const Vector4f &src); 261 void f2u(Vector4f &dst, const Vector4f &src); 262 void u2f(Vector4f &dst, const Vector4f &src); 263 void i2b(Vector4f &dst, const Vector4f &src); 264 void b2i(Vector4f &dst, const Vector4f &src); 265 void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 266 void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 267 void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 268 void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 269 void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 270 void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 271 void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 272 void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 273 void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false); 274 void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 275 void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 276 void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 277 void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 278 void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 279 void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 280 void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 281 void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 282 void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 283 void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false); 284 void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false); 285 void rsq(Vector4f &dst, const Vector4f &src, bool pp = false); 286 void len2(Float4 &dst, const Vector4f &src, bool pp = false); 287 void len3(Float4 &dst, const Vector4f &src, bool pp = false); 288 void len4(Float4 &dst, const Vector4f &src, bool pp = false); 289 void dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 290 void dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 291 void dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 292 void dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 293 void dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 294 void dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 295 void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 296 void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 297 void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 298 void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 299 void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 300 void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3); 301 void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 302 void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 303 void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 304 void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 305 void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 306 void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 307 void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 308 void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 309 void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false); 310 void exp2(Vector4f &dst, const Vector4f &src, bool pp = false); 311 void exp(Vector4f &dst, const Vector4f &src, bool pp = false); 312 void log2x(Vector4f &dst, const Vector4f &src, bool pp = false); 313 void log2(Vector4f &dst, const Vector4f &src, bool pp = false); 314 void log(Vector4f &dst, const Vector4f &src, bool pp = false); 315 void lit(Vector4f &dst, const Vector4f &src); 316 void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 317 void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 318 void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 319 void packHalf2x16(Vector4f &dst, const Vector4f &src); 320 void unpackHalf2x16(Vector4f &dst, const Vector4f &src); 321 void packSnorm2x16(Vector4f &dst, const Vector4f &src); 322 void packUnorm2x16(Vector4f &dst, const Vector4f &src); 323 void unpackSnorm2x16(Vector4f &dst, const Vector4f &src); 324 void unpackUnorm2x16(Vector4f &dst, const Vector4f &src); 325 void frc(Vector4f &dst, const Vector4f &src); 326 void trunc(Vector4f &dst, const Vector4f &src); 327 void floor(Vector4f &dst, const Vector4f &src); 328 void round(Vector4f &dst, const Vector4f &src); 329 void roundEven(Vector4f &dst, const Vector4f &src); 330 void ceil(Vector4f &dst, const Vector4f &src); 331 void powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 332 void pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 333 void crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 334 void forward1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 335 void forward2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 336 void forward3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 337 void forward4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 338 void reflect1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 339 void reflect2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 340 void reflect3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 341 void reflect4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 342 void refract1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); 343 void refract2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); 344 void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); 345 void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); 346 void sgn(Vector4f &dst, const Vector4f &src); 347 void isgn(Vector4f &dst, const Vector4f &src); 348 void abs(Vector4f &dst, const Vector4f &src); 349 void iabs(Vector4f &dst, const Vector4f &src); 350 void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false); 351 void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false); 352 void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false); 353 void sincos(Vector4f &dst, const Vector4f &src, bool pp = false); 354 void cos(Vector4f &dst, const Vector4f &src, bool pp = false); 355 void sin(Vector4f &dst, const Vector4f &src, bool pp = false); 356 void tan(Vector4f &dst, const Vector4f &src, bool pp = false); 357 void acos(Vector4f &dst, const Vector4f &src, bool pp = false); 358 void asin(Vector4f &dst, const Vector4f &src, bool pp = false); 359 void atan(Vector4f &dst, const Vector4f &src, bool pp = false); 360 void atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 361 void cosh(Vector4f &dst, const Vector4f &src, bool pp = false); 362 void sinh(Vector4f &dst, const Vector4f &src, bool pp = false); 363 void tanh(Vector4f &dst, const Vector4f &src, bool pp = false); 364 void acosh(Vector4f &dst, const Vector4f &src, bool pp = false); 365 void asinh(Vector4f &dst, const Vector4f &src, bool pp = false); 366 void atanh(Vector4f &dst, const Vector4f &src, bool pp = false); 367 void expp(Vector4f &dst, const Vector4f &src, unsigned short version); 368 void logp(Vector4f &dst, const Vector4f &src, unsigned short version); 369 void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 370 void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control); 371 void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control); 372 void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control); 373 void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 374 void extract(Float4 &dst, const Vector4f &src0, const Float4 &src1); 375 void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index); 376 void all(Float4 &dst, const Vector4f &src); 377 void any(Float4 &dst, const Vector4f &src); 378 void not(Vector4f &dst, const Vector4f &src); 379 void or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 380 void xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 381 void and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 382 void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 383 void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 384 385 private: 386 void sgn(Float4 &dst, const Float4 &src); 387 void isgn(Float4 &dst, const Float4 &src); 388 void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2); 389 void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2); 390 void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2); 391 void floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits); 392 void halfToFloatBits(Float4& dst, const Float4& halfBits); 393 }; 394 } 395 396 #endif // sw_ShaderCore_hpp 397