1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "VertexRoutine.hpp" 16 17 #include "VertexShader.hpp" 18 #include "Vertex.hpp" 19 #include "Half.hpp" 20 #include "Renderer.hpp" 21 #include "Constants.hpp" 22 #include "Debug.hpp" 23 24 namespace sw 25 { 26 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 27 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] 28 VertexRoutine(const VertexProcessor::State & state,const VertexShader * shader)29 VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) 30 : v(shader && shader->dynamicallyIndexedInput), 31 o(shader && shader->dynamicallyIndexedOutput), 32 state(state) 33 { 34 } 35 ~VertexRoutine()36 VertexRoutine::~VertexRoutine() 37 { 38 } 39 generate()40 void VertexRoutine::generate() 41 { 42 const bool textureSampling = state.textureSampling; 43 44 Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache); 45 Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex); 46 Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag); 47 48 UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount)); 49 UInt primitiveNumber = *Pointer<UInt>(task + OFFSET(VertexTask, primitiveStart)); 50 UInt indexInPrimitive = 0; 51 52 constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); 53 54 Do 55 { 56 UInt index = *Pointer<UInt>(batch); 57 UInt tagIndex = index & 0x0000003C; 58 UInt indexQ = !textureSampling ? UInt(index & 0xFFFFFFFC) : index; // FIXME: TEXLDL hack to have independent LODs, hurts performance. 59 60 If(*Pointer<UInt>(tagCache + tagIndex) != indexQ) 61 { 62 *Pointer<UInt>(tagCache + tagIndex) = indexQ; 63 64 readInput(indexQ); 65 pipeline(); 66 postTransform(); 67 computeClipFlags(); 68 69 Pointer<Byte> cacheLine0 = vertexCache + tagIndex * UInt((int)sizeof(Vertex)); 70 writeCache(cacheLine0); 71 } 72 73 UInt cacheIndex = index & 0x0000003F; 74 Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex)); 75 writeVertex(vertex, cacheLine); 76 77 if(state.transformFeedbackEnabled != 0) 78 { 79 transformFeedback(vertex, primitiveNumber, indexInPrimitive); 80 81 indexInPrimitive++; 82 If(indexInPrimitive == 3) 83 { 84 primitiveNumber++; 85 indexInPrimitive = 0; 86 } 87 } 88 89 vertex += sizeof(Vertex); 90 batch += sizeof(unsigned int); 91 vertexCount--; 92 } 93 Until(vertexCount == 0) 94 95 Return(); 96 } 97 readInput(UInt & index)98 void VertexRoutine::readInput(UInt &index) 99 { 100 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 101 { 102 Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,input) + sizeof(void*) * i); 103 UInt stride = *Pointer<UInt>(data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i); 104 105 v[i] = readStream(input, stride, state.input[i], index); 106 } 107 } 108 computeClipFlags()109 void VertexRoutine::computeClipFlags() 110 { 111 int pos = state.positionRegister; 112 113 Int4 maxX = CmpLT(o[pos].w, o[pos].x); 114 Int4 maxY = CmpLT(o[pos].w, o[pos].y); 115 Int4 maxZ = CmpLT(o[pos].w, o[pos].z); 116 Int4 minX = CmpNLE(-o[pos].w, o[pos].x); 117 Int4 minY = CmpNLE(-o[pos].w, o[pos].y); 118 Int4 minZ = symmetricNormalizedDepth ? CmpNLE(-o[pos].w, o[pos].z) : CmpNLE(Float4(0.0f), o[pos].z); 119 120 clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + SignMask(maxX) * 4); // FIXME: Array indexing 121 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + SignMask(maxY) * 4); 122 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + SignMask(maxZ) * 4); 123 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + SignMask(minX) * 4); 124 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + SignMask(minY) * 4); 125 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + SignMask(minZ) * 4); 126 127 Int4 finiteX = CmpLE(Abs(o[pos].x), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); 128 Int4 finiteY = CmpLE(Abs(o[pos].y), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); 129 Int4 finiteZ = CmpLE(Abs(o[pos].z), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); 130 131 Int4 finiteXYZ = finiteX & finiteY & finiteZ; 132 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + SignMask(finiteXYZ) * 4); 133 134 if(state.preTransformed) 135 { 136 clipFlags &= 0xFBFBFBFB; // Don't clip against far clip plane 137 } 138 } 139 readStream(Pointer<Byte> & buffer,UInt & stride,const Stream & stream,const UInt & index)140 Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index) 141 { 142 const bool textureSampling = state.textureSampling; 143 144 Vector4f v; 145 146 Pointer<Byte> source0 = buffer + index * stride; 147 Pointer<Byte> source1 = source0 + (!textureSampling ? stride : 0); 148 Pointer<Byte> source2 = source1 + (!textureSampling ? stride : 0); 149 Pointer<Byte> source3 = source2 + (!textureSampling ? stride : 0); 150 151 switch(stream.type) 152 { 153 case STREAMTYPE_FLOAT: 154 { 155 if(stream.count == 0) 156 { 157 // Null stream, all default components 158 } 159 else if(stream.count == 1) 160 { 161 v.x.x = *Pointer<Float>(source0); 162 v.x.y = *Pointer<Float>(source1); 163 v.x.z = *Pointer<Float>(source2); 164 v.x.w = *Pointer<Float>(source3); 165 } 166 else 167 { 168 v.x = *Pointer<Float4>(source0); 169 v.y = *Pointer<Float4>(source1); 170 v.z = *Pointer<Float4>(source2); 171 v.w = *Pointer<Float4>(source3); 172 173 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 174 } 175 } 176 break; 177 case STREAMTYPE_BYTE: 178 { 179 v.x = Float4(*Pointer<Byte4>(source0)); 180 v.y = Float4(*Pointer<Byte4>(source1)); 181 v.z = Float4(*Pointer<Byte4>(source2)); 182 v.w = Float4(*Pointer<Byte4>(source3)); 183 184 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 185 186 if(stream.normalized) 187 { 188 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 189 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 190 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 191 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 192 } 193 } 194 break; 195 case STREAMTYPE_SBYTE: 196 { 197 v.x = Float4(*Pointer<SByte4>(source0)); 198 v.y = Float4(*Pointer<SByte4>(source1)); 199 v.z = Float4(*Pointer<SByte4>(source2)); 200 v.w = Float4(*Pointer<SByte4>(source3)); 201 202 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 203 204 if(stream.normalized) 205 { 206 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); 207 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); 208 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); 209 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); 210 } 211 } 212 break; 213 case STREAMTYPE_COLOR: 214 { 215 v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 216 v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 217 v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 218 v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 219 220 transpose4x4(v.x, v.y, v.z, v.w); 221 222 // Swap red and blue 223 Float4 t = v.x; 224 v.x = v.z; 225 v.z = t; 226 } 227 break; 228 case STREAMTYPE_SHORT: 229 { 230 v.x = Float4(*Pointer<Short4>(source0)); 231 v.y = Float4(*Pointer<Short4>(source1)); 232 v.z = Float4(*Pointer<Short4>(source2)); 233 v.w = Float4(*Pointer<Short4>(source3)); 234 235 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 236 237 if(stream.normalized) 238 { 239 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); 240 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); 241 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); 242 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); 243 } 244 } 245 break; 246 case STREAMTYPE_USHORT: 247 { 248 v.x = Float4(*Pointer<UShort4>(source0)); 249 v.y = Float4(*Pointer<UShort4>(source1)); 250 v.z = Float4(*Pointer<UShort4>(source2)); 251 v.w = Float4(*Pointer<UShort4>(source3)); 252 253 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 254 255 if(stream.normalized) 256 { 257 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); 258 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); 259 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); 260 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); 261 } 262 } 263 break; 264 case STREAMTYPE_INT: 265 { 266 if(stream.normalized) 267 { 268 v.x = Float4(*Pointer<Int4>(source0)); 269 v.y = Float4(*Pointer<Int4>(source1)); 270 v.z = Float4(*Pointer<Int4>(source2)); 271 v.w = Float4(*Pointer<Int4>(source3)); 272 273 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 274 275 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); 276 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); 277 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); 278 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); 279 } 280 else 281 { 282 v.x = As<Float4>(*Pointer<Int4>(source0)); 283 v.y = As<Float4>(*Pointer<Int4>(source1)); 284 v.z = As<Float4>(*Pointer<Int4>(source2)); 285 v.w = As<Float4>(*Pointer<Int4>(source3)); 286 287 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 288 } 289 } 290 break; 291 case STREAMTYPE_UINT: 292 { 293 if(stream.normalized) 294 { 295 v.x = Float4(*Pointer<UInt4>(source0)); 296 v.y = Float4(*Pointer<UInt4>(source1)); 297 v.z = Float4(*Pointer<UInt4>(source2)); 298 v.w = Float4(*Pointer<UInt4>(source3)); 299 300 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 301 302 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); 303 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); 304 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); 305 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); 306 } 307 else 308 { 309 v.x = As<Float4>(*Pointer<UInt4>(source0)); 310 v.y = As<Float4>(*Pointer<UInt4>(source1)); 311 v.z = As<Float4>(*Pointer<UInt4>(source2)); 312 v.w = As<Float4>(*Pointer<UInt4>(source3)); 313 314 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 315 } 316 } 317 break; 318 case STREAMTYPE_UDEC3: 319 { 320 // FIXME: Vectorize 321 { 322 Int x, y, z; 323 324 x = y = z = *Pointer<Int>(source0); 325 326 v.x.x = Float(x & 0x000003FF); 327 v.x.y = Float(y & 0x000FFC00); 328 v.x.z = Float(z & 0x3FF00000); 329 } 330 331 { 332 Int x, y, z; 333 334 x = y = z = *Pointer<Int>(source1); 335 336 v.y.x = Float(x & 0x000003FF); 337 v.y.y = Float(y & 0x000FFC00); 338 v.y.z = Float(z & 0x3FF00000); 339 } 340 341 { 342 Int x, y, z; 343 344 x = y = z = *Pointer<Int>(source2); 345 346 v.z.x = Float(x & 0x000003FF); 347 v.z.y = Float(y & 0x000FFC00); 348 v.z.z = Float(z & 0x3FF00000); 349 } 350 351 { 352 Int x, y, z; 353 354 x = y = z = *Pointer<Int>(source3); 355 356 v.w.x = Float(x & 0x000003FF); 357 v.w.y = Float(y & 0x000FFC00); 358 v.w.z = Float(z & 0x3FF00000); 359 } 360 361 transpose4x3(v.x, v.y, v.z, v.w); 362 363 v.y *= Float4(1.0f / 0x00000400); 364 v.z *= Float4(1.0f / 0x00100000); 365 } 366 break; 367 case STREAMTYPE_DEC3N: 368 { 369 // FIXME: Vectorize 370 { 371 Int x, y, z; 372 373 x = y = z = *Pointer<Int>(source0); 374 375 v.x.x = Float((x << 22) & 0xFFC00000); 376 v.x.y = Float((y << 12) & 0xFFC00000); 377 v.x.z = Float((z << 2) & 0xFFC00000); 378 } 379 380 { 381 Int x, y, z; 382 383 x = y = z = *Pointer<Int>(source1); 384 385 v.y.x = Float((x << 22) & 0xFFC00000); 386 v.y.y = Float((y << 12) & 0xFFC00000); 387 v.y.z = Float((z << 2) & 0xFFC00000); 388 } 389 390 { 391 Int x, y, z; 392 393 x = y = z = *Pointer<Int>(source2); 394 395 v.z.x = Float((x << 22) & 0xFFC00000); 396 v.z.y = Float((y << 12) & 0xFFC00000); 397 v.z.z = Float((z << 2) & 0xFFC00000); 398 } 399 400 { 401 Int x, y, z; 402 403 x = y = z = *Pointer<Int>(source3); 404 405 v.w.x = Float((x << 22) & 0xFFC00000); 406 v.w.y = Float((y << 12) & 0xFFC00000); 407 v.w.z = Float((z << 2) & 0xFFC00000); 408 } 409 410 transpose4x3(v.x, v.y, v.z, v.w); 411 412 v.x *= Float4(1.0f / 0x00400000 / 511.0f); 413 v.y *= Float4(1.0f / 0x00400000 / 511.0f); 414 v.z *= Float4(1.0f / 0x00400000 / 511.0f); 415 } 416 break; 417 case STREAMTYPE_FIXED: 418 { 419 v.x = Float4(*Pointer<Int4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); 420 v.y = Float4(*Pointer<Int4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); 421 v.z = Float4(*Pointer<Int4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); 422 v.w = Float4(*Pointer<Int4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); 423 424 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 425 } 426 break; 427 case STREAMTYPE_HALF: 428 { 429 if(stream.count >= 1) 430 { 431 UShort x0 = *Pointer<UShort>(source0 + 0); 432 UShort x1 = *Pointer<UShort>(source1 + 0); 433 UShort x2 = *Pointer<UShort>(source2 + 0); 434 UShort x3 = *Pointer<UShort>(source3 + 0); 435 436 v.x.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x0) * 4); 437 v.x.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x1) * 4); 438 v.x.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x2) * 4); 439 v.x.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x3) * 4); 440 } 441 442 if(stream.count >= 2) 443 { 444 UShort y0 = *Pointer<UShort>(source0 + 2); 445 UShort y1 = *Pointer<UShort>(source1 + 2); 446 UShort y2 = *Pointer<UShort>(source2 + 2); 447 UShort y3 = *Pointer<UShort>(source3 + 2); 448 449 v.y.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y0) * 4); 450 v.y.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y1) * 4); 451 v.y.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y2) * 4); 452 v.y.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y3) * 4); 453 } 454 455 if(stream.count >= 3) 456 { 457 UShort z0 = *Pointer<UShort>(source0 + 4); 458 UShort z1 = *Pointer<UShort>(source1 + 4); 459 UShort z2 = *Pointer<UShort>(source2 + 4); 460 UShort z3 = *Pointer<UShort>(source3 + 4); 461 462 v.z.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z0) * 4); 463 v.z.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z1) * 4); 464 v.z.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z2) * 4); 465 v.z.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z3) * 4); 466 } 467 468 if(stream.count >= 4) 469 { 470 UShort w0 = *Pointer<UShort>(source0 + 6); 471 UShort w1 = *Pointer<UShort>(source1 + 6); 472 UShort w2 = *Pointer<UShort>(source2 + 6); 473 UShort w3 = *Pointer<UShort>(source3 + 6); 474 475 v.w.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w0) * 4); 476 v.w.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w1) * 4); 477 v.w.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w2) * 4); 478 v.w.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w3) * 4); 479 } 480 } 481 break; 482 case STREAMTYPE_INDICES: 483 { 484 v.x.x = *Pointer<Float>(source0); 485 v.x.y = *Pointer<Float>(source1); 486 v.x.z = *Pointer<Float>(source2); 487 v.x.w = *Pointer<Float>(source3); 488 } 489 break; 490 case STREAMTYPE_2_10_10_10_INT: 491 { 492 Int4 src; 493 src = Insert(src, *Pointer<Int>(source0), 0); 494 src = Insert(src, *Pointer<Int>(source1), 1); 495 src = Insert(src, *Pointer<Int>(source2), 2); 496 src = Insert(src, *Pointer<Int>(source3), 3); 497 498 v.x = Float4((src << 22) >> 22); 499 v.y = Float4((src << 12) >> 22); 500 v.z = Float4((src << 02) >> 22); 501 v.w = Float4(src >> 30); 502 503 if(stream.normalized) 504 { 505 v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f)); 506 v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f)); 507 v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f)); 508 v.w = Max(v.w, Float4(-1.0f)); 509 } 510 } 511 break; 512 case STREAMTYPE_2_10_10_10_UINT: 513 { 514 Int4 src; 515 src = Insert(src, *Pointer<Int>(source0), 0); 516 src = Insert(src, *Pointer<Int>(source1), 1); 517 src = Insert(src, *Pointer<Int>(source2), 2); 518 src = Insert(src, *Pointer<Int>(source3), 3); 519 520 v.x = Float4(src & Int4(0x3FF)); 521 v.y = Float4((src >> 10) & Int4(0x3FF)); 522 v.z = Float4((src >> 20) & Int4(0x3FF)); 523 v.w = Float4((src >> 30) & Int4(0x3)); 524 525 if(stream.normalized) 526 { 527 v.x *= Float4(1.0f / 0x3FF); 528 v.y *= Float4(1.0f / 0x3FF); 529 v.z *= Float4(1.0f / 0x3FF); 530 v.w *= Float4(1.0f / 0x3); 531 } 532 } 533 break; 534 default: 535 ASSERT(false); 536 } 537 538 if(stream.count < 1) v.x = Float4(0.0f); 539 if(stream.count < 2) v.y = Float4(0.0f); 540 if(stream.count < 3) v.z = Float4(0.0f); 541 if(stream.count < 4) v.w = Float4(1.0f); 542 543 return v; 544 } 545 postTransform()546 void VertexRoutine::postTransform() 547 { 548 int pos = state.positionRegister; 549 550 // Backtransform 551 if(state.preTransformed) 552 { 553 Float4 rhw = Float4(1.0f) / o[pos].w; 554 555 Float4 W = *Pointer<Float4>(data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f); 556 Float4 H = *Pointer<Float4>(data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f); 557 Float4 L = *Pointer<Float4>(data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f); 558 Float4 T = *Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f); 559 560 o[pos].x = (o[pos].x - L) / W * rhw; 561 o[pos].y = (o[pos].y - T) / H * rhw; 562 o[pos].z = o[pos].z * rhw; 563 o[pos].w = rhw; 564 } 565 566 if(!halfIntegerCoordinates && !state.preTransformed) 567 { 568 o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,halfPixelX)) * o[pos].w; 569 o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,halfPixelY)) * o[pos].w; 570 } 571 572 if(state.superSampling) 573 { 574 o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,XXXX)) * o[pos].w; 575 o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,YYYY)) * o[pos].w; 576 } 577 } 578 writeCache(Pointer<Byte> & cacheLine)579 void VertexRoutine::writeCache(Pointer<Byte> &cacheLine) 580 { 581 Vector4f v; 582 583 for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++) 584 { 585 if(state.output[i].write) 586 { 587 v.x = o[i].x; 588 v.y = o[i].y; 589 v.z = o[i].z; 590 v.w = o[i].w; 591 592 if(state.output[i].xClamp) 593 { 594 v.x = Max(v.x, Float4(0.0f)); 595 v.x = Min(v.x, Float4(1.0f)); 596 } 597 598 if(state.output[i].yClamp) 599 { 600 v.y = Max(v.y, Float4(0.0f)); 601 v.y = Min(v.y, Float4(1.0f)); 602 } 603 604 if(state.output[i].zClamp) 605 { 606 v.z = Max(v.z, Float4(0.0f)); 607 v.z = Min(v.z, Float4(1.0f)); 608 } 609 610 if(state.output[i].wClamp) 611 { 612 v.w = Max(v.w, Float4(0.0f)); 613 v.w = Min(v.w, Float4(1.0f)); 614 } 615 616 if(state.output[i].write == 0x01) 617 { 618 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0) = v.x.x; 619 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1) = v.x.y; 620 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2) = v.x.z; 621 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3) = v.x.w; 622 } 623 else 624 { 625 if(state.output[i].write == 0x02) 626 { 627 transpose2x4(v.x, v.y, v.z, v.w); 628 } 629 else 630 { 631 transpose4x4(v.x, v.y, v.z, v.w); 632 } 633 634 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0, 16) = v.x; 635 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1, 16) = v.y; 636 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2, 16) = v.z; 637 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3, 16) = v.w; 638 } 639 } 640 } 641 642 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (clipFlags >> 0) & 0x0000000FF; 643 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (clipFlags >> 8) & 0x0000000FF; 644 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (clipFlags >> 16) & 0x0000000FF; 645 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF; 646 647 // Viewport transform 648 int pos = state.positionRegister; 649 650 v.x = o[pos].x; 651 v.y = o[pos].y; 652 v.z = o[pos].z; 653 v.w = o[pos].w; 654 655 if(symmetricNormalizedDepth) 656 { 657 v.z = (v.z + v.w) * Float4(0.5f); // [-1, 1] -> [0, 1] 658 } 659 660 Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f)))); 661 Float4 rhw = Float4(1.0f) / w; 662 663 v.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Wx16)))); 664 v.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Hx16)))); 665 v.z = v.z * rhw; 666 v.w = rhw; 667 668 transpose4x4(v.x, v.y, v.z, v.w); 669 670 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 0, 16) = v.x; 671 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 1, 16) = v.y; 672 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 2, 16) = v.z; 673 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 3, 16) = v.w; 674 } 675 writeVertex(const Pointer<Byte> & vertex,Pointer<Byte> & cache)676 void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache) 677 { 678 for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++) 679 { 680 if(state.output[i].write) 681 { 682 *Pointer<Int4>(vertex + OFFSET(Vertex,v[i]), 16) = *Pointer<Int4>(cache + OFFSET(Vertex,v[i]), 16); 683 } 684 } 685 686 *Pointer<Int4>(vertex + OFFSET(Vertex,X)) = *Pointer<Int4>(cache + OFFSET(Vertex,X)); 687 *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags)); 688 } 689 transformFeedback(const Pointer<Byte> & vertex,const UInt & primitiveNumber,const UInt & indexInPrimitive)690 void VertexRoutine::transformFeedback(const Pointer<Byte> &vertex, const UInt &primitiveNumber, const UInt &indexInPrimitive) 691 { 692 If(indexInPrimitive < state.verticesPerPrimitive) 693 { 694 UInt tOffset = primitiveNumber * state.verticesPerPrimitive + indexInPrimitive; 695 696 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 697 { 698 if(state.transformFeedbackEnabled & (1ULL << i)) 699 { 700 UInt reg = *Pointer<UInt>(data + OFFSET(DrawData, vs.reg[i])); 701 UInt row = *Pointer<UInt>(data + OFFSET(DrawData, vs.row[i])); 702 UInt col = *Pointer<UInt>(data + OFFSET(DrawData, vs.col[i])); 703 UInt str = *Pointer<UInt>(data + OFFSET(DrawData, vs.str[i])); 704 705 Pointer<Byte> t = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.t[i])) + (tOffset * str * sizeof(float)); 706 Pointer<Byte> v = vertex + OFFSET(Vertex, v) + reg * sizeof(float); 707 708 For(UInt r = 0, r < row, r++) 709 { 710 UInt rOffsetX = r * col * sizeof(float); 711 UInt rOffset4 = r * sizeof(float4); 712 713 For(UInt c = 0, c < col, c++) 714 { 715 UInt cOffset = c * sizeof(float); 716 *Pointer<Float>(t + rOffsetX + cOffset) = *Pointer<Float>(v + rOffset4 + cOffset); 717 } 718 } 719 } 720 } 721 } 722 } 723 } 724