1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "VertexRoutine.hpp" 16 17 #include "VertexShader.hpp" 18 #include "Constants.hpp" 19 #include "Device/Vertex.hpp" 20 #include "Device/Renderer.hpp" 21 #include "System/Half.hpp" 22 #include "Vulkan/VkDebug.hpp" 23 24 namespace sw 25 { 26 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 27 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] 28 VertexRoutine(const VertexProcessor::State & state,const VertexShader * shader)29 VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) 30 : v(shader && shader->indirectAddressableInput), 31 o(shader && shader->indirectAddressableOutput), 32 state(state) 33 { 34 } 35 ~VertexRoutine()36 VertexRoutine::~VertexRoutine() 37 { 38 } 39 generate()40 void VertexRoutine::generate() 41 { 42 const bool textureSampling = state.textureSampling; 43 44 Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache); 45 Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex); 46 Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag); 47 48 UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount)); 49 UInt primitiveNumber = *Pointer<UInt>(task + OFFSET(VertexTask, primitiveStart)); 50 UInt indexInPrimitive = 0; 51 52 constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); 53 54 Do 55 { 56 UInt index = *Pointer<UInt>(batch); 57 UInt tagIndex = index & 0x0000003C; 58 UInt indexQ = !textureSampling ? UInt(index & 0xFFFFFFFC) : index; // FIXME: TEXLDL hack to have independent LODs, hurts performance. 59 60 If(*Pointer<UInt>(tagCache + tagIndex) != indexQ) 61 { 62 *Pointer<UInt>(tagCache + tagIndex) = indexQ; 63 64 readInput(indexQ); 65 program(indexQ); 66 postTransform(); 67 computeClipFlags(); 68 69 Pointer<Byte> cacheLine0 = vertexCache + tagIndex * UInt((int)sizeof(Vertex)); 70 writeCache(cacheLine0); 71 } 72 73 UInt cacheIndex = index & 0x0000003F; 74 Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex)); 75 writeVertex(vertex, cacheLine); 76 77 if(state.transformFeedbackEnabled != 0) 78 { 79 transformFeedback(vertex, primitiveNumber, indexInPrimitive); 80 81 indexInPrimitive++; 82 If(indexInPrimitive == 3) 83 { 84 primitiveNumber++; 85 indexInPrimitive = 0; 86 } 87 } 88 89 vertex += sizeof(Vertex); 90 batch += sizeof(unsigned int); 91 vertexCount--; 92 } 93 Until(vertexCount == 0) 94 95 Return(); 96 } 97 readInput(UInt & index)98 void VertexRoutine::readInput(UInt &index) 99 { 100 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 101 { 102 Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,input) + sizeof(void*) * i); 103 UInt stride = *Pointer<UInt>(data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i); 104 105 v[i] = readStream(input, stride, state.input[i], index); 106 } 107 } 108 computeClipFlags()109 void VertexRoutine::computeClipFlags() 110 { 111 int pos = state.positionRegister; 112 113 Int4 maxX = CmpLT(o[pos].w, o[pos].x); 114 Int4 maxY = CmpLT(o[pos].w, o[pos].y); 115 Int4 maxZ = CmpLT(o[pos].w, o[pos].z); 116 Int4 minX = CmpNLE(-o[pos].w, o[pos].x); 117 Int4 minY = CmpNLE(-o[pos].w, o[pos].y); 118 Int4 minZ = CmpNLE(Float4(0.0f), o[pos].z); 119 120 clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + SignMask(maxX) * 4); // FIXME: Array indexing 121 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + SignMask(maxY) * 4); 122 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + SignMask(maxZ) * 4); 123 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + SignMask(minX) * 4); 124 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + SignMask(minY) * 4); 125 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + SignMask(minZ) * 4); 126 127 Int4 finiteX = CmpLE(Abs(o[pos].x), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); 128 Int4 finiteY = CmpLE(Abs(o[pos].y), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); 129 Int4 finiteZ = CmpLE(Abs(o[pos].z), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); 130 131 Int4 finiteXYZ = finiteX & finiteY & finiteZ; 132 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + SignMask(finiteXYZ) * 4); 133 } 134 readStream(Pointer<Byte> & buffer,UInt & stride,const Stream & stream,const UInt & index)135 Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index) 136 { 137 const bool textureSampling = state.textureSampling; 138 139 Vector4f v; 140 141 Pointer<Byte> source0 = buffer + index * stride; 142 Pointer<Byte> source1 = source0 + (!textureSampling ? stride : 0); 143 Pointer<Byte> source2 = source1 + (!textureSampling ? stride : 0); 144 Pointer<Byte> source3 = source2 + (!textureSampling ? stride : 0); 145 146 bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) || stream.normalized; 147 148 switch(stream.type) 149 { 150 case STREAMTYPE_FLOAT: 151 { 152 if(stream.count == 0) 153 { 154 // Null stream, all default components 155 } 156 else 157 { 158 if(stream.count == 1) 159 { 160 v.x.x = *Pointer<Float>(source0); 161 v.x.y = *Pointer<Float>(source1); 162 v.x.z = *Pointer<Float>(source2); 163 v.x.w = *Pointer<Float>(source3); 164 } 165 else 166 { 167 v.x = *Pointer<Float4>(source0); 168 v.y = *Pointer<Float4>(source1); 169 v.z = *Pointer<Float4>(source2); 170 v.w = *Pointer<Float4>(source3); 171 172 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 173 } 174 175 switch(stream.attribType) 176 { 177 case SpirvShader::ATTRIBTYPE_INT: 178 if(stream.count >= 1) v.x = As<Float4>(Int4(v.x)); 179 if(stream.count >= 2) v.x = As<Float4>(Int4(v.y)); 180 if(stream.count >= 3) v.x = As<Float4>(Int4(v.z)); 181 if(stream.count >= 4) v.x = As<Float4>(Int4(v.w)); 182 break; 183 case SpirvShader::ATTRIBTYPE_UINT: 184 if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x)); 185 if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y)); 186 if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z)); 187 if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w)); 188 break; 189 default: 190 break; 191 } 192 } 193 } 194 break; 195 case STREAMTYPE_BYTE: 196 if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float 197 { 198 v.x = Float4(*Pointer<Byte4>(source0)); 199 v.y = Float4(*Pointer<Byte4>(source1)); 200 v.z = Float4(*Pointer<Byte4>(source2)); 201 v.w = Float4(*Pointer<Byte4>(source3)); 202 203 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 204 205 if(stream.normalized) 206 { 207 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 208 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 209 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 210 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 211 } 212 } 213 else // Stream: UByte, Shader attrib: Int / UInt 214 { 215 v.x = As<Float4>(Int4(*Pointer<Byte4>(source0))); 216 v.y = As<Float4>(Int4(*Pointer<Byte4>(source1))); 217 v.z = As<Float4>(Int4(*Pointer<Byte4>(source2))); 218 v.w = As<Float4>(Int4(*Pointer<Byte4>(source3))); 219 220 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 221 } 222 break; 223 case STREAMTYPE_SBYTE: 224 if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float 225 { 226 v.x = Float4(*Pointer<SByte4>(source0)); 227 v.y = Float4(*Pointer<SByte4>(source1)); 228 v.z = Float4(*Pointer<SByte4>(source2)); 229 v.w = Float4(*Pointer<SByte4>(source3)); 230 231 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 232 233 if(stream.normalized) 234 { 235 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); 236 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); 237 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); 238 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); 239 } 240 } 241 else // Stream: SByte, Shader attrib: Int / UInt 242 { 243 v.x = As<Float4>(Int4(*Pointer<SByte4>(source0))); 244 v.y = As<Float4>(Int4(*Pointer<SByte4>(source1))); 245 v.z = As<Float4>(Int4(*Pointer<SByte4>(source2))); 246 v.w = As<Float4>(Int4(*Pointer<SByte4>(source3))); 247 248 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 249 } 250 break; 251 case STREAMTYPE_COLOR: 252 { 253 v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 254 v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 255 v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 256 v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 257 258 transpose4x4(v.x, v.y, v.z, v.w); 259 260 // Swap red and blue 261 Float4 t = v.x; 262 v.x = v.z; 263 v.z = t; 264 } 265 break; 266 case STREAMTYPE_SHORT: 267 if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float 268 { 269 v.x = Float4(*Pointer<Short4>(source0)); 270 v.y = Float4(*Pointer<Short4>(source1)); 271 v.z = Float4(*Pointer<Short4>(source2)); 272 v.w = Float4(*Pointer<Short4>(source3)); 273 274 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 275 276 if(stream.normalized) 277 { 278 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); 279 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); 280 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); 281 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); 282 } 283 } 284 else // Stream: Short, Shader attrib: Int/UInt, no type conversion 285 { 286 v.x = As<Float4>(Int4(*Pointer<Short4>(source0))); 287 v.y = As<Float4>(Int4(*Pointer<Short4>(source1))); 288 v.z = As<Float4>(Int4(*Pointer<Short4>(source2))); 289 v.w = As<Float4>(Int4(*Pointer<Short4>(source3))); 290 291 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 292 } 293 break; 294 case STREAMTYPE_USHORT: 295 if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float 296 { 297 v.x = Float4(*Pointer<UShort4>(source0)); 298 v.y = Float4(*Pointer<UShort4>(source1)); 299 v.z = Float4(*Pointer<UShort4>(source2)); 300 v.w = Float4(*Pointer<UShort4>(source3)); 301 302 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 303 304 if(stream.normalized) 305 { 306 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); 307 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); 308 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); 309 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); 310 } 311 } 312 else // Stream: UShort, Shader attrib: Int/UInt, no type conversion 313 { 314 v.x = As<Float4>(Int4(*Pointer<UShort4>(source0))); 315 v.y = As<Float4>(Int4(*Pointer<UShort4>(source1))); 316 v.z = As<Float4>(Int4(*Pointer<UShort4>(source2))); 317 v.w = As<Float4>(Int4(*Pointer<UShort4>(source3))); 318 319 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 320 } 321 break; 322 case STREAMTYPE_INT: 323 if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float 324 { 325 v.x = Float4(*Pointer<Int4>(source0)); 326 v.y = Float4(*Pointer<Int4>(source1)); 327 v.z = Float4(*Pointer<Int4>(source2)); 328 v.w = Float4(*Pointer<Int4>(source3)); 329 330 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 331 332 if(stream.normalized) 333 { 334 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); 335 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); 336 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); 337 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); 338 } 339 } 340 else // Stream: Int, Shader attrib: Int/UInt, no type conversion 341 { 342 v.x = *Pointer<Float4>(source0); 343 v.y = *Pointer<Float4>(source1); 344 v.z = *Pointer<Float4>(source2); 345 v.w = *Pointer<Float4>(source3); 346 347 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 348 } 349 break; 350 case STREAMTYPE_UINT: 351 if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float 352 { 353 v.x = Float4(*Pointer<UInt4>(source0)); 354 v.y = Float4(*Pointer<UInt4>(source1)); 355 v.z = Float4(*Pointer<UInt4>(source2)); 356 v.w = Float4(*Pointer<UInt4>(source3)); 357 358 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 359 360 if(stream.normalized) 361 { 362 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); 363 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); 364 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); 365 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); 366 } 367 } 368 else // Stream: UInt, Shader attrib: Int/UInt, no type conversion 369 { 370 v.x = *Pointer<Float4>(source0); 371 v.y = *Pointer<Float4>(source1); 372 v.z = *Pointer<Float4>(source2); 373 v.w = *Pointer<Float4>(source3); 374 375 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 376 } 377 break; 378 case STREAMTYPE_UDEC3: 379 { 380 // FIXME: Vectorize 381 { 382 Int x, y, z; 383 384 x = y = z = *Pointer<Int>(source0); 385 386 v.x.x = Float(x & 0x000003FF); 387 v.x.y = Float(y & 0x000FFC00); 388 v.x.z = Float(z & 0x3FF00000); 389 } 390 391 { 392 Int x, y, z; 393 394 x = y = z = *Pointer<Int>(source1); 395 396 v.y.x = Float(x & 0x000003FF); 397 v.y.y = Float(y & 0x000FFC00); 398 v.y.z = Float(z & 0x3FF00000); 399 } 400 401 { 402 Int x, y, z; 403 404 x = y = z = *Pointer<Int>(source2); 405 406 v.z.x = Float(x & 0x000003FF); 407 v.z.y = Float(y & 0x000FFC00); 408 v.z.z = Float(z & 0x3FF00000); 409 } 410 411 { 412 Int x, y, z; 413 414 x = y = z = *Pointer<Int>(source3); 415 416 v.w.x = Float(x & 0x000003FF); 417 v.w.y = Float(y & 0x000FFC00); 418 v.w.z = Float(z & 0x3FF00000); 419 } 420 421 transpose4x3(v.x, v.y, v.z, v.w); 422 423 v.y *= Float4(1.0f / 0x00000400); 424 v.z *= Float4(1.0f / 0x00100000); 425 } 426 break; 427 case STREAMTYPE_DEC3N: 428 { 429 // FIXME: Vectorize 430 { 431 Int x, y, z; 432 433 x = y = z = *Pointer<Int>(source0); 434 435 v.x.x = Float((x << 22) & 0xFFC00000); 436 v.x.y = Float((y << 12) & 0xFFC00000); 437 v.x.z = Float((z << 2) & 0xFFC00000); 438 } 439 440 { 441 Int x, y, z; 442 443 x = y = z = *Pointer<Int>(source1); 444 445 v.y.x = Float((x << 22) & 0xFFC00000); 446 v.y.y = Float((y << 12) & 0xFFC00000); 447 v.y.z = Float((z << 2) & 0xFFC00000); 448 } 449 450 { 451 Int x, y, z; 452 453 x = y = z = *Pointer<Int>(source2); 454 455 v.z.x = Float((x << 22) & 0xFFC00000); 456 v.z.y = Float((y << 12) & 0xFFC00000); 457 v.z.z = Float((z << 2) & 0xFFC00000); 458 } 459 460 { 461 Int x, y, z; 462 463 x = y = z = *Pointer<Int>(source3); 464 465 v.w.x = Float((x << 22) & 0xFFC00000); 466 v.w.y = Float((y << 12) & 0xFFC00000); 467 v.w.z = Float((z << 2) & 0xFFC00000); 468 } 469 470 transpose4x3(v.x, v.y, v.z, v.w); 471 472 v.x *= Float4(1.0f / 0x00400000 / 511.0f); 473 v.y *= Float4(1.0f / 0x00400000 / 511.0f); 474 v.z *= Float4(1.0f / 0x00400000 / 511.0f); 475 } 476 break; 477 case STREAMTYPE_FIXED: 478 { 479 v.x = Float4(*Pointer<Int4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); 480 v.y = Float4(*Pointer<Int4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); 481 v.z = Float4(*Pointer<Int4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); 482 v.w = Float4(*Pointer<Int4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); 483 484 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 485 } 486 break; 487 case STREAMTYPE_HALF: 488 { 489 if(stream.count >= 1) 490 { 491 UShort x0 = *Pointer<UShort>(source0 + 0); 492 UShort x1 = *Pointer<UShort>(source1 + 0); 493 UShort x2 = *Pointer<UShort>(source2 + 0); 494 UShort x3 = *Pointer<UShort>(source3 + 0); 495 496 v.x.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x0) * 4); 497 v.x.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x1) * 4); 498 v.x.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x2) * 4); 499 v.x.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x3) * 4); 500 } 501 502 if(stream.count >= 2) 503 { 504 UShort y0 = *Pointer<UShort>(source0 + 2); 505 UShort y1 = *Pointer<UShort>(source1 + 2); 506 UShort y2 = *Pointer<UShort>(source2 + 2); 507 UShort y3 = *Pointer<UShort>(source3 + 2); 508 509 v.y.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y0) * 4); 510 v.y.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y1) * 4); 511 v.y.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y2) * 4); 512 v.y.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y3) * 4); 513 } 514 515 if(stream.count >= 3) 516 { 517 UShort z0 = *Pointer<UShort>(source0 + 4); 518 UShort z1 = *Pointer<UShort>(source1 + 4); 519 UShort z2 = *Pointer<UShort>(source2 + 4); 520 UShort z3 = *Pointer<UShort>(source3 + 4); 521 522 v.z.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z0) * 4); 523 v.z.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z1) * 4); 524 v.z.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z2) * 4); 525 v.z.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z3) * 4); 526 } 527 528 if(stream.count >= 4) 529 { 530 UShort w0 = *Pointer<UShort>(source0 + 6); 531 UShort w1 = *Pointer<UShort>(source1 + 6); 532 UShort w2 = *Pointer<UShort>(source2 + 6); 533 UShort w3 = *Pointer<UShort>(source3 + 6); 534 535 v.w.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w0) * 4); 536 v.w.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w1) * 4); 537 v.w.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w2) * 4); 538 v.w.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w3) * 4); 539 } 540 } 541 break; 542 case STREAMTYPE_INDICES: 543 { 544 v.x.x = *Pointer<Float>(source0); 545 v.x.y = *Pointer<Float>(source1); 546 v.x.z = *Pointer<Float>(source2); 547 v.x.w = *Pointer<Float>(source3); 548 } 549 break; 550 case STREAMTYPE_2_10_10_10_INT: 551 { 552 Int4 src; 553 src = Insert(src, *Pointer<Int>(source0), 0); 554 src = Insert(src, *Pointer<Int>(source1), 1); 555 src = Insert(src, *Pointer<Int>(source2), 2); 556 src = Insert(src, *Pointer<Int>(source3), 3); 557 558 v.x = Float4((src << 22) >> 22); 559 v.y = Float4((src << 12) >> 22); 560 v.z = Float4((src << 02) >> 22); 561 v.w = Float4(src >> 30); 562 563 if(stream.normalized) 564 { 565 v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f)); 566 v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f)); 567 v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f)); 568 v.w = Max(v.w, Float4(-1.0f)); 569 } 570 } 571 break; 572 case STREAMTYPE_2_10_10_10_UINT: 573 { 574 Int4 src; 575 src = Insert(src, *Pointer<Int>(source0), 0); 576 src = Insert(src, *Pointer<Int>(source1), 1); 577 src = Insert(src, *Pointer<Int>(source2), 2); 578 src = Insert(src, *Pointer<Int>(source3), 3); 579 580 v.x = Float4(src & Int4(0x3FF)); 581 v.y = Float4((src >> 10) & Int4(0x3FF)); 582 v.z = Float4((src >> 20) & Int4(0x3FF)); 583 v.w = Float4((src >> 30) & Int4(0x3)); 584 585 if(stream.normalized) 586 { 587 v.x *= Float4(1.0f / 0x3FF); 588 v.y *= Float4(1.0f / 0x3FF); 589 v.z *= Float4(1.0f / 0x3FF); 590 v.w *= Float4(1.0f / 0x3); 591 } 592 } 593 break; 594 default: 595 ASSERT(false); 596 } 597 598 if(stream.count < 1) v.x = Float4(0.0f); 599 if(stream.count < 2) v.y = Float4(0.0f); 600 if(stream.count < 3) v.z = Float4(0.0f); 601 if(stream.count < 4) v.w = isNativeFloatAttrib ? As<Float4>(Float4(1.0f)) : As<Float4>(Int4(0)); 602 603 return v; 604 } 605 postTransform()606 void VertexRoutine::postTransform() 607 { 608 int pos = state.positionRegister; 609 610 if(!halfIntegerCoordinates) 611 { 612 o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,halfPixelX)) * o[pos].w; 613 o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,halfPixelY)) * o[pos].w; 614 } 615 } 616 writeCache(Pointer<Byte> & cacheLine)617 void VertexRoutine::writeCache(Pointer<Byte> &cacheLine) 618 { 619 Vector4f v; 620 621 for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++) 622 { 623 if(state.output[i].write) 624 { 625 v.x = o[i].x; 626 v.y = o[i].y; 627 v.z = o[i].z; 628 v.w = o[i].w; 629 630 if(state.output[i].xClamp) 631 { 632 v.x = Max(v.x, Float4(0.0f)); 633 v.x = Min(v.x, Float4(1.0f)); 634 } 635 636 if(state.output[i].yClamp) 637 { 638 v.y = Max(v.y, Float4(0.0f)); 639 v.y = Min(v.y, Float4(1.0f)); 640 } 641 642 if(state.output[i].zClamp) 643 { 644 v.z = Max(v.z, Float4(0.0f)); 645 v.z = Min(v.z, Float4(1.0f)); 646 } 647 648 if(state.output[i].wClamp) 649 { 650 v.w = Max(v.w, Float4(0.0f)); 651 v.w = Min(v.w, Float4(1.0f)); 652 } 653 654 if(state.output[i].write == 0x01) 655 { 656 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0) = v.x.x; 657 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1) = v.x.y; 658 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2) = v.x.z; 659 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3) = v.x.w; 660 } 661 else 662 { 663 if(state.output[i].write == 0x03) 664 { 665 transpose2x4(v.x, v.y, v.z, v.w); 666 } 667 else 668 { 669 transpose4x4(v.x, v.y, v.z, v.w); 670 } 671 672 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0, 16) = v.x; 673 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1, 16) = v.y; 674 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2, 16) = v.z; 675 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3, 16) = v.w; 676 } 677 } 678 } 679 680 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (clipFlags >> 0) & 0x0000000FF; 681 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (clipFlags >> 8) & 0x0000000FF; 682 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (clipFlags >> 16) & 0x0000000FF; 683 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF; 684 685 // Viewport transform 686 int pos = state.positionRegister; 687 688 v.x = o[pos].x; 689 v.y = o[pos].y; 690 v.z = o[pos].z; 691 v.w = o[pos].w; 692 693 Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f)))); 694 Float4 rhw = Float4(1.0f) / w; 695 696 v.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Wx16)))); 697 v.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Hx16)))); 698 v.z = v.z * rhw; 699 v.w = rhw; 700 701 transpose4x4(v.x, v.y, v.z, v.w); 702 703 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 0, 16) = v.x; 704 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 1, 16) = v.y; 705 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 2, 16) = v.z; 706 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 3, 16) = v.w; 707 } 708 writeVertex(const Pointer<Byte> & vertex,Pointer<Byte> & cache)709 void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache) 710 { 711 for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++) 712 { 713 if(state.output[i].write) 714 { 715 *Pointer<Int4>(vertex + OFFSET(Vertex,v[i]), 16) = *Pointer<Int4>(cache + OFFSET(Vertex,v[i]), 16); 716 } 717 } 718 719 *Pointer<Int4>(vertex + OFFSET(Vertex,X)) = *Pointer<Int4>(cache + OFFSET(Vertex,X)); 720 *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags)); 721 } 722 transformFeedback(const Pointer<Byte> & vertex,const UInt & primitiveNumber,const UInt & indexInPrimitive)723 void VertexRoutine::transformFeedback(const Pointer<Byte> &vertex, const UInt &primitiveNumber, const UInt &indexInPrimitive) 724 { 725 If(indexInPrimitive < state.verticesPerPrimitive) 726 { 727 UInt tOffset = primitiveNumber * state.verticesPerPrimitive + indexInPrimitive; 728 729 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 730 { 731 if(state.transformFeedbackEnabled & (1ULL << i)) 732 { 733 UInt reg = *Pointer<UInt>(data + OFFSET(DrawData, vs.reg[i])); 734 UInt row = *Pointer<UInt>(data + OFFSET(DrawData, vs.row[i])); 735 UInt col = *Pointer<UInt>(data + OFFSET(DrawData, vs.col[i])); 736 UInt str = *Pointer<UInt>(data + OFFSET(DrawData, vs.str[i])); 737 738 Pointer<Byte> t = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.t[i])) + (tOffset * str * sizeof(float)); 739 Pointer<Byte> v = vertex + OFFSET(Vertex, v) + reg * sizeof(float); 740 741 For(UInt r = 0, r < row, r++) 742 { 743 UInt rOffsetX = r * col * sizeof(float); 744 UInt rOffset4 = r * sizeof(float4); 745 746 For(UInt c = 0, c < col, c++) 747 { 748 UInt cOffset = c * sizeof(float); 749 *Pointer<Float>(t + rOffsetX + cOffset) = *Pointer<Float>(v + rOffset4 + cOffset); 750 } 751 } 752 } 753 } 754 } 755 } 756 } 757