1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef sw_ShaderCore_hpp 16 #define sw_ShaderCore_hpp 17 18 #include "Shader.hpp" 19 #include "Reactor/Reactor.hpp" 20 #include "Reactor/Print.hpp" 21 #include "Common/Debug.hpp" 22 23 namespace sw 24 { 25 using namespace rr; 26 27 class Vector4s 28 { 29 public: 30 Vector4s(); 31 Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w); 32 Vector4s(const Vector4s &rhs); 33 34 Short4 &operator[](int i); 35 Vector4s &operator=(const Vector4s &rhs); 36 37 Short4 x; 38 Short4 y; 39 Short4 z; 40 Short4 w; 41 }; 42 43 class Vector4f 44 { 45 public: 46 Vector4f(); 47 Vector4f(float x, float y, float z, float w); 48 Vector4f(const Vector4f &rhs); 49 50 Float4 &operator[](int i); 51 Vector4f &operator=(const Vector4f &rhs); 52 53 Float4 x; 54 Float4 y; 55 Float4 z; 56 Float4 w; 57 }; 58 59 Float4 exponential2(RValue<Float4> x, bool pp = false); 60 Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false); 61 Float4 exponential(RValue<Float4> x, bool pp = false); 62 Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false); 63 Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false); 64 Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false); 65 Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false); 66 Float4 modulo(RValue<Float4> x, RValue<Float4> y); 67 Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range 68 Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range 69 Float4 sine(RValue<Float4> x, bool pp = false); 70 Float4 cosine(RValue<Float4> x, bool pp = false); 71 Float4 tangent(RValue<Float4> x, bool pp = false); 72 Float4 arccos(RValue<Float4> x, bool pp = false); 73 Float4 arcsin(RValue<Float4> x, bool pp = false); 74 Float4 arctan(RValue<Float4> x, bool pp = false); 75 Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false); 76 Float4 sineh(RValue<Float4> x, bool pp = false); 77 Float4 cosineh(RValue<Float4> x, bool pp = false); 78 Float4 tangenth(RValue<Float4> x, bool pp = false); 79 Float4 arccosh(RValue<Float4> x, bool pp = false); // Limited to x >= 1 80 Float4 arcsinh(RValue<Float4> x, bool pp = false); 81 Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range 82 83 Float4 dot2(const Vector4f &v0, const Vector4f &v1); 84 Float4 dot3(const Vector4f &v0, const Vector4f &v1); 85 Float4 dot4(const Vector4f &v0, const Vector4f &v1); 86 87 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); 88 void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); 89 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 90 void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 91 void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 92 void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 93 void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 94 void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N); 95 96 class Register 97 { 98 public: Register(const Reference<Float4> & x,const Reference<Float4> & y,const Reference<Float4> & z,const Reference<Float4> & w)99 Register(const Reference<Float4> &x, const Reference<Float4> &y, const Reference<Float4> &z, const Reference<Float4> &w) : x(x), y(y), z(z), w(w) 100 { 101 } 102 operator [](int i)103 Reference<Float4> &operator[](int i) 104 { 105 switch(i) 106 { 107 default: 108 case 0: return x; 109 case 1: return y; 110 case 2: return z; 111 case 3: return w; 112 } 113 } 114 operator =(const Register & rhs)115 Register &operator=(const Register &rhs) 116 { 117 x = rhs.x; 118 y = rhs.y; 119 z = rhs.z; 120 w = rhs.w; 121 122 return *this; 123 } 124 operator =(const Vector4f & rhs)125 Register &operator=(const Vector4f &rhs) 126 { 127 x = rhs.x; 128 y = rhs.y; 129 z = rhs.z; 130 w = rhs.w; 131 132 return *this; 133 } 134 operator Vector4f()135 operator Vector4f() 136 { 137 Vector4f v; 138 139 v.x = x; 140 v.y = y; 141 v.z = z; 142 v.w = w; 143 144 return v; 145 } 146 147 Reference<Float4> x; 148 Reference<Float4> y; 149 Reference<Float4> z; 150 Reference<Float4> w; 151 }; 152 153 class RegisterFile 154 { 155 public: RegisterFile(int size,bool indirectAddressable)156 RegisterFile(int size, bool indirectAddressable) : size(size), indirectAddressable(indirectAddressable) 157 { 158 if(indirectAddressable) 159 { 160 x = new Array<Float4>(size); 161 y = new Array<Float4>(size); 162 z = new Array<Float4>(size); 163 w = new Array<Float4>(size); 164 } 165 else 166 { 167 x = new Array<Float4>[size]; 168 y = new Array<Float4>[size]; 169 z = new Array<Float4>[size]; 170 w = new Array<Float4>[size]; 171 } 172 } 173 ~RegisterFile()174 ~RegisterFile() 175 { 176 if(indirectAddressable) 177 { 178 delete x; 179 delete y; 180 delete z; 181 delete w; 182 } 183 else 184 { 185 delete[] x; 186 delete[] y; 187 delete[] z; 188 delete[] w; 189 } 190 } 191 operator [](int i)192 Register operator[](int i) 193 { 194 ASSERT(i < size); 195 if(indirectAddressable) 196 { 197 return Register(x[0][i], y[0][i], z[0][i], w[0][i]); 198 } 199 else 200 { 201 return Register(x[i][0], y[i][0], z[i][0], w[i][0]); 202 } 203 } 204 operator [](RValue<Int> i)205 Register operator[](RValue<Int> i) 206 { 207 ASSERT(indirectAddressable); 208 209 return Register(x[0][i], y[0][i], z[0][i], w[0][i]); 210 } 211 212 const Vector4f operator[](RValue<Int4> i); // Gather operation (read only). 213 214 void scatter_x(Int4 i, RValue<Float4> r); 215 void scatter_y(Int4 i, RValue<Float4> r); 216 void scatter_z(Int4 i, RValue<Float4> r); 217 void scatter_w(Int4 i, RValue<Float4> r); 218 219 protected: 220 const int size; 221 const bool indirectAddressable; 222 Array<Float4> *x; 223 Array<Float4> *y; 224 Array<Float4> *z; 225 Array<Float4> *w; 226 }; 227 228 template<int S, bool I = false> 229 class RegisterArray : public RegisterFile 230 { 231 public: RegisterArray(bool indirectAddressable=I)232 RegisterArray(bool indirectAddressable = I) : RegisterFile(S, indirectAddressable) 233 { 234 } 235 }; 236 237 class ShaderCore 238 { 239 typedef Shader::Control Control; 240 241 public: 242 void mov(Vector4f &dst, const Vector4f &src, bool integerDestination = false); 243 void neg(Vector4f &dst, const Vector4f &src); 244 void ineg(Vector4f &dst, const Vector4f &src); 245 void f2b(Vector4f &dst, const Vector4f &src); 246 void b2f(Vector4f &dst, const Vector4f &src); 247 void f2i(Vector4f &dst, const Vector4f &src); 248 void i2f(Vector4f &dst, const Vector4f &src); 249 void f2u(Vector4f &dst, const Vector4f &src); 250 void u2f(Vector4f &dst, const Vector4f &src); 251 void i2b(Vector4f &dst, const Vector4f &src); 252 void b2i(Vector4f &dst, const Vector4f &src); 253 void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 254 void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 255 void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 256 void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 257 void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 258 void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 259 void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 260 void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 261 void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false); 262 void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 263 void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 264 void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 265 void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 266 void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 267 void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 268 void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 269 void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 270 void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 271 void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false); 272 void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false); 273 void rsq(Vector4f &dst, const Vector4f &src, bool pp = false); 274 void len2(Float4 &dst, const Vector4f &src, bool pp = false); 275 void len3(Float4 &dst, const Vector4f &src, bool pp = false); 276 void len4(Float4 &dst, const Vector4f &src, bool pp = false); 277 void dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 278 void dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 279 void dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 280 void dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 281 void dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 282 void dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 283 void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 284 void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 285 void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 286 void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 287 void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 288 void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3); 289 void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 290 void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 291 void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 292 void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 293 void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 294 void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 295 void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 296 void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 297 void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false); 298 void exp2(Vector4f &dst, const Vector4f &src, bool pp = false); 299 void exp(Vector4f &dst, const Vector4f &src, bool pp = false); 300 void log2x(Vector4f &dst, const Vector4f &src, bool pp = false); 301 void log2(Vector4f &dst, const Vector4f &src, bool pp = false); 302 void log(Vector4f &dst, const Vector4f &src, bool pp = false); 303 void lit(Vector4f &dst, const Vector4f &src); 304 void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 305 void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 306 void isinf(Vector4f &dst, const Vector4f &src); 307 void isnan(Vector4f &dst, const Vector4f &src); 308 void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 309 void packHalf2x16(Vector4f &dst, const Vector4f &src); 310 void unpackHalf2x16(Vector4f &dst, const Vector4f &src); 311 void packSnorm2x16(Vector4f &dst, const Vector4f &src); 312 void packUnorm2x16(Vector4f &dst, const Vector4f &src); 313 void unpackSnorm2x16(Vector4f &dst, const Vector4f &src); 314 void unpackUnorm2x16(Vector4f &dst, const Vector4f &src); 315 void frc(Vector4f &dst, const Vector4f &src); 316 void trunc(Vector4f &dst, const Vector4f &src); 317 void floor(Vector4f &dst, const Vector4f &src); 318 void round(Vector4f &dst, const Vector4f &src); 319 void roundEven(Vector4f &dst, const Vector4f &src); 320 void ceil(Vector4f &dst, const Vector4f &src); 321 void powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 322 void pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 323 void crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 324 void forward1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 325 void forward2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 326 void forward3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 327 void forward4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 328 void reflect1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 329 void reflect2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 330 void reflect3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 331 void reflect4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 332 void refract1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); 333 void refract2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); 334 void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); 335 void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); 336 void sgn(Vector4f &dst, const Vector4f &src); 337 void isgn(Vector4f &dst, const Vector4f &src); 338 void abs(Vector4f &dst, const Vector4f &src); 339 void iabs(Vector4f &dst, const Vector4f &src); 340 void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false); 341 void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false); 342 void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false); 343 void sincos(Vector4f &dst, const Vector4f &src, bool pp = false); 344 void cos(Vector4f &dst, const Vector4f &src, bool pp = false); 345 void sin(Vector4f &dst, const Vector4f &src, bool pp = false); 346 void tan(Vector4f &dst, const Vector4f &src, bool pp = false); 347 void acos(Vector4f &dst, const Vector4f &src, bool pp = false); 348 void asin(Vector4f &dst, const Vector4f &src, bool pp = false); 349 void atan(Vector4f &dst, const Vector4f &src, bool pp = false); 350 void atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 351 void cosh(Vector4f &dst, const Vector4f &src, bool pp = false); 352 void sinh(Vector4f &dst, const Vector4f &src, bool pp = false); 353 void tanh(Vector4f &dst, const Vector4f &src, bool pp = false); 354 void acosh(Vector4f &dst, const Vector4f &src, bool pp = false); 355 void asinh(Vector4f &dst, const Vector4f &src, bool pp = false); 356 void atanh(Vector4f &dst, const Vector4f &src, bool pp = false); 357 void expp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel); 358 void logp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel); 359 void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 360 void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control); 361 void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control); 362 void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control); 363 void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 364 void extract(Float4 &dst, const Vector4f &src0, const Float4 &src1); 365 void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index); 366 void all(Float4 &dst, const Vector4f &src); 367 void any(Float4 &dst, const Vector4f &src); 368 void bitwise_not(Vector4f &dst, const Vector4f &src); 369 void bitwise_or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 370 void bitwise_xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 371 void bitwise_and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 372 void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 373 void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 374 375 private: 376 void sgn(Float4 &dst, const Float4 &src); 377 void isgn(Float4 &dst, const Float4 &src); 378 void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2); 379 void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2); 380 void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2); 381 void floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits); 382 void halfToFloatBits(Float4& dst, const Float4& halfBits); 383 }; 384 } 385 386 #ifdef ENABLE_RR_PRINT 387 namespace rr { 388 template <> struct PrintValue::Ty<sw::Vector4f> 389 { fmtrr::PrintValue::Ty390 static std::string fmt(const sw::Vector4f& v) 391 { 392 return "[x: " + PrintValue::fmt(v.x) + 393 ", y: " + PrintValue::fmt(v.y) + 394 ", z: " + PrintValue::fmt(v.z) + 395 ", w: " + PrintValue::fmt(v.w) + "]"; 396 } 397 valrr::PrintValue::Ty398 static std::vector<rr::Value*> val(const sw::Vector4f& v) 399 { 400 return PrintValue::vals(v.x, v.y, v.z, v.w); 401 } 402 }; 403 template <> struct PrintValue::Ty<sw::Vector4s> 404 { fmtrr::PrintValue::Ty405 static std::string fmt(const sw::Vector4s& v) 406 { 407 return "[x: " + PrintValue::fmt(v.x) + 408 ", y: " + PrintValue::fmt(v.y) + 409 ", z: " + PrintValue::fmt(v.z) + 410 ", w: " + PrintValue::fmt(v.w) + "]"; 411 } 412 valrr::PrintValue::Ty413 static std::vector<rr::Value*> val(const sw::Vector4s& v) 414 { 415 return PrintValue::vals(v.x, v.y, v.z, v.w); 416 } 417 }; 418 } 419 #endif // ENABLE_RR_PRINT 420 421 #endif // sw_ShaderCore_hpp 422