1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Blitter.hpp" 16 17 #include "Shader/ShaderCore.hpp" 18 #include "Reactor/Reactor.hpp" 19 #include "Common/Memory.hpp" 20 #include "Common/Debug.hpp" 21 22 namespace sw 23 { 24 using namespace rr; 25 Blitter()26 Blitter::Blitter() 27 { 28 blitCache = new RoutineCache<State>(1024); 29 } 30 ~Blitter()31 Blitter::~Blitter() 32 { 33 delete blitCache; 34 } 35 clear(void * pixel,sw::Format format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)36 void Blitter::clear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask) 37 { 38 if(fastClear(pixel, format, dest, dRect, rgbaMask)) 39 { 40 return; 41 } 42 43 sw::Surface *color = sw::Surface::create(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format)); 44 SliceRectF sRect(0.5f, 0.5f, 0.5f, 0.5f, 0); // Sample from the middle. 45 blit(color, sRect, dest, dRect, {rgbaMask}); 46 delete color; 47 } 48 fastClear(void * pixel,sw::Format format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)49 bool Blitter::fastClear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask) 50 { 51 if(format != FORMAT_A32B32G32R32F) 52 { 53 return false; 54 } 55 56 float *color = (float*)pixel; 57 float r = color[0]; 58 float g = color[1]; 59 float b = color[2]; 60 float a = color[3]; 61 62 uint32_t packed; 63 64 switch(dest->getFormat()) 65 { 66 case FORMAT_R5G6B5: 67 if((rgbaMask & 0x7) != 0x7) return false; 68 packed = ((uint16_t)(31 * b + 0.5f) << 0) | 69 ((uint16_t)(63 * g + 0.5f) << 5) | 70 ((uint16_t)(31 * r + 0.5f) << 11); 71 break; 72 case FORMAT_X8B8G8R8: 73 if((rgbaMask & 0x7) != 0x7) return false; 74 packed = ((uint32_t)(255) << 24) | 75 ((uint32_t)(255 * b + 0.5f) << 16) | 76 ((uint32_t)(255 * g + 0.5f) << 8) | 77 ((uint32_t)(255 * r + 0.5f) << 0); 78 break; 79 case FORMAT_A8B8G8R8: 80 if((rgbaMask & 0xF) != 0xF) return false; 81 packed = ((uint32_t)(255 * a + 0.5f) << 24) | 82 ((uint32_t)(255 * b + 0.5f) << 16) | 83 ((uint32_t)(255 * g + 0.5f) << 8) | 84 ((uint32_t)(255 * r + 0.5f) << 0); 85 break; 86 case FORMAT_X8R8G8B8: 87 if((rgbaMask & 0x7) != 0x7) return false; 88 packed = ((uint32_t)(255) << 24) | 89 ((uint32_t)(255 * r + 0.5f) << 16) | 90 ((uint32_t)(255 * g + 0.5f) << 8) | 91 ((uint32_t)(255 * b + 0.5f) << 0); 92 break; 93 case FORMAT_A8R8G8B8: 94 if((rgbaMask & 0xF) != 0xF) return false; 95 packed = ((uint32_t)(255 * a + 0.5f) << 24) | 96 ((uint32_t)(255 * r + 0.5f) << 16) | 97 ((uint32_t)(255 * g + 0.5f) << 8) | 98 ((uint32_t)(255 * b + 0.5f) << 0); 99 break; 100 default: 101 return false; 102 } 103 104 bool useDestInternal = !dest->isExternalDirty(); 105 uint8_t *slice = (uint8_t*)dest->lock(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC, useDestInternal); 106 107 for(int j = 0; j < dest->getSamples(); j++) 108 { 109 uint8_t *d = slice; 110 111 switch(Surface::bytes(dest->getFormat())) 112 { 113 case 2: 114 for(int i = dRect.y0; i < dRect.y1; i++) 115 { 116 sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0); 117 d += dest->getPitchB(useDestInternal); 118 } 119 break; 120 case 4: 121 for(int i = dRect.y0; i < dRect.y1; i++) 122 { 123 sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0); 124 d += dest->getPitchB(useDestInternal); 125 } 126 break; 127 default: 128 assert(false); 129 } 130 131 slice += dest->getSliceB(useDestInternal); 132 } 133 134 dest->unlock(useDestInternal); 135 136 return true; 137 } 138 blit(Surface * source,const SliceRectF & sourceRect,Surface * dest,const SliceRect & destRect,const Blitter::Options & options)139 void Blitter::blit(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options) 140 { 141 if(dest->getInternalFormat() == FORMAT_NULL) 142 { 143 return; 144 } 145 146 if(blitReactor(source, sourceRect, dest, destRect, options)) 147 { 148 return; 149 } 150 151 SliceRectF sRect = sourceRect; 152 SliceRect dRect = destRect; 153 154 bool flipX = destRect.x0 > destRect.x1; 155 bool flipY = destRect.y0 > destRect.y1; 156 157 if(flipX) 158 { 159 swap(dRect.x0, dRect.x1); 160 swap(sRect.x0, sRect.x1); 161 } 162 if(flipY) 163 { 164 swap(dRect.y0, dRect.y1); 165 swap(sRect.y0, sRect.y1); 166 } 167 168 source->lockInternal(0, 0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC); 169 dest->lockInternal(0, 0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC); 170 171 float w = sRect.width() / dRect.width(); 172 float h = sRect.height() / dRect.height(); 173 174 float xStart = sRect.x0 + (0.5f - dRect.x0) * w; 175 float yStart = sRect.y0 + (0.5f - dRect.y0) * h; 176 177 for(int j = dRect.y0; j < dRect.y1; j++) 178 { 179 float y = yStart + j * h; 180 181 for(int i = dRect.x0; i < dRect.x1; i++) 182 { 183 float x = xStart + i * w; 184 185 // FIXME: Support RGBA mask 186 dest->copyInternal(source, i, j, x, y, options.filter); 187 } 188 } 189 190 source->unlockInternal(); 191 dest->unlockInternal(); 192 } 193 blit3D(Surface * source,Surface * dest)194 void Blitter::blit3D(Surface *source, Surface *dest) 195 { 196 source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC); 197 dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC); 198 199 float w = static_cast<float>(source->getWidth()) / static_cast<float>(dest->getWidth()); 200 float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight()); 201 float d = static_cast<float>(source->getDepth()) / static_cast<float>(dest->getDepth()); 202 203 for(int k = 0; k < dest->getDepth(); k++) 204 { 205 float z = (k + 0.5f) * d; 206 207 for(int j = 0; j < dest->getHeight(); j++) 208 { 209 float y = (j + 0.5f) * h; 210 211 for(int i = 0; i < dest->getWidth(); i++) 212 { 213 float x = (i + 0.5f) * w; 214 215 dest->copyInternal(source, i, j, k, x, y, z, true); 216 } 217 } 218 } 219 220 source->unlockInternal(); 221 dest->unlockInternal(); 222 } 223 read(Float4 & c,Pointer<Byte> element,const State & state)224 bool Blitter::read(Float4 &c, Pointer<Byte> element, const State &state) 225 { 226 c = Float4(0.0f, 0.0f, 0.0f, 1.0f); 227 228 switch(state.sourceFormat) 229 { 230 case FORMAT_L8: 231 c.xyz = Float(Int(*Pointer<Byte>(element))); 232 c.w = float(0xFF); 233 break; 234 case FORMAT_A8: 235 c.w = Float(Int(*Pointer<Byte>(element))); 236 break; 237 case FORMAT_R8I: 238 case FORMAT_R8_SNORM: 239 c.x = Float(Int(*Pointer<SByte>(element))); 240 c.w = float(0x7F); 241 break; 242 case FORMAT_R8: 243 case FORMAT_R8UI: 244 c.x = Float(Int(*Pointer<Byte>(element))); 245 c.w = float(0xFF); 246 break; 247 case FORMAT_R16I: 248 c.x = Float(Int(*Pointer<Short>(element))); 249 c.w = float(0x7FFF); 250 break; 251 case FORMAT_R16UI: 252 c.x = Float(Int(*Pointer<UShort>(element))); 253 c.w = float(0xFFFF); 254 break; 255 case FORMAT_R32I: 256 c.x = Float(*Pointer<Int>(element)); 257 c.w = float(0x7FFFFFFF); 258 break; 259 case FORMAT_R32UI: 260 c.x = Float(*Pointer<UInt>(element)); 261 c.w = float(0xFFFFFFFF); 262 break; 263 case FORMAT_A8R8G8B8: 264 c = Float4(*Pointer<Byte4>(element)).zyxw; 265 break; 266 case FORMAT_A8B8G8R8I: 267 case FORMAT_A8B8G8R8_SNORM: 268 c = Float4(*Pointer<SByte4>(element)); 269 break; 270 case FORMAT_A8B8G8R8: 271 case FORMAT_A8B8G8R8UI: 272 case FORMAT_SRGB8_A8: 273 c = Float4(*Pointer<Byte4>(element)); 274 break; 275 case FORMAT_X8R8G8B8: 276 c = Float4(*Pointer<Byte4>(element)).zyxw; 277 c.w = float(0xFF); 278 break; 279 case FORMAT_R8G8B8: 280 c.z = Float(Int(*Pointer<Byte>(element + 0))); 281 c.y = Float(Int(*Pointer<Byte>(element + 1))); 282 c.x = Float(Int(*Pointer<Byte>(element + 2))); 283 c.w = float(0xFF); 284 break; 285 case FORMAT_B8G8R8: 286 c.x = Float(Int(*Pointer<Byte>(element + 0))); 287 c.y = Float(Int(*Pointer<Byte>(element + 1))); 288 c.z = Float(Int(*Pointer<Byte>(element + 2))); 289 c.w = float(0xFF); 290 break; 291 case FORMAT_X8B8G8R8I: 292 case FORMAT_X8B8G8R8_SNORM: 293 c = Float4(*Pointer<SByte4>(element)); 294 c.w = float(0x7F); 295 break; 296 case FORMAT_X8B8G8R8: 297 case FORMAT_X8B8G8R8UI: 298 case FORMAT_SRGB8_X8: 299 c = Float4(*Pointer<Byte4>(element)); 300 c.w = float(0xFF); 301 break; 302 case FORMAT_A16B16G16R16I: 303 c = Float4(*Pointer<Short4>(element)); 304 break; 305 case FORMAT_A16B16G16R16: 306 case FORMAT_A16B16G16R16UI: 307 c = Float4(*Pointer<UShort4>(element)); 308 break; 309 case FORMAT_X16B16G16R16I: 310 c = Float4(*Pointer<Short4>(element)); 311 c.w = float(0x7FFF); 312 break; 313 case FORMAT_X16B16G16R16UI: 314 c = Float4(*Pointer<UShort4>(element)); 315 c.w = float(0xFFFF); 316 break; 317 case FORMAT_A32B32G32R32I: 318 c = Float4(*Pointer<Int4>(element)); 319 break; 320 case FORMAT_A32B32G32R32UI: 321 c = Float4(*Pointer<UInt4>(element)); 322 break; 323 case FORMAT_X32B32G32R32I: 324 c = Float4(*Pointer<Int4>(element)); 325 c.w = float(0x7FFFFFFF); 326 break; 327 case FORMAT_X32B32G32R32UI: 328 c = Float4(*Pointer<UInt4>(element)); 329 c.w = float(0xFFFFFFFF); 330 break; 331 case FORMAT_G8R8I: 332 case FORMAT_G8R8_SNORM: 333 c.x = Float(Int(*Pointer<SByte>(element + 0))); 334 c.y = Float(Int(*Pointer<SByte>(element + 1))); 335 c.w = float(0x7F); 336 break; 337 case FORMAT_G8R8: 338 case FORMAT_G8R8UI: 339 c.x = Float(Int(*Pointer<Byte>(element + 0))); 340 c.y = Float(Int(*Pointer<Byte>(element + 1))); 341 c.w = float(0xFF); 342 break; 343 case FORMAT_G16R16I: 344 c.x = Float(Int(*Pointer<Short>(element + 0))); 345 c.y = Float(Int(*Pointer<Short>(element + 2))); 346 c.w = float(0x7FFF); 347 break; 348 case FORMAT_G16R16: 349 case FORMAT_G16R16UI: 350 c.x = Float(Int(*Pointer<UShort>(element + 0))); 351 c.y = Float(Int(*Pointer<UShort>(element + 2))); 352 c.w = float(0xFFFF); 353 break; 354 case FORMAT_G32R32I: 355 c.x = Float(*Pointer<Int>(element + 0)); 356 c.y = Float(*Pointer<Int>(element + 4)); 357 c.w = float(0x7FFFFFFF); 358 break; 359 case FORMAT_G32R32UI: 360 c.x = Float(*Pointer<UInt>(element + 0)); 361 c.y = Float(*Pointer<UInt>(element + 4)); 362 c.w = float(0xFFFFFFFF); 363 break; 364 case FORMAT_A32B32G32R32F: 365 c = *Pointer<Float4>(element); 366 break; 367 case FORMAT_X32B32G32R32F: 368 case FORMAT_X32B32G32R32F_UNSIGNED: 369 case FORMAT_B32G32R32F: 370 c.z = *Pointer<Float>(element + 8); 371 case FORMAT_G32R32F: 372 c.x = *Pointer<Float>(element + 0); 373 c.y = *Pointer<Float>(element + 4); 374 break; 375 case FORMAT_R32F: 376 c.x = *Pointer<Float>(element); 377 break; 378 case FORMAT_R5G6B5: 379 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11))); 380 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5))); 381 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F))); 382 break; 383 case FORMAT_A2B10G10R10: 384 case FORMAT_A2B10G10R10UI: 385 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF)))); 386 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10)); 387 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20)); 388 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30)); 389 break; 390 case FORMAT_D16: 391 c.x = Float(Int((*Pointer<UShort>(element)))); 392 break; 393 case FORMAT_D24S8: 394 case FORMAT_D24X8: 395 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8)); 396 break; 397 case FORMAT_D32: 398 c.x = Float(Int((*Pointer<UInt>(element)))); 399 break; 400 case FORMAT_D32F_COMPLEMENTARY: 401 case FORMAT_D32FS8_COMPLEMENTARY: 402 c.x = 1.0f - *Pointer<Float>(element); 403 break; 404 case FORMAT_D32F: 405 case FORMAT_D32FS8: 406 case FORMAT_D32F_LOCKABLE: 407 case FORMAT_D32FS8_TEXTURE: 408 case FORMAT_D32F_SHADOW: 409 case FORMAT_D32FS8_SHADOW: 410 c.x = *Pointer<Float>(element); 411 break; 412 case FORMAT_S8: 413 c.x = Float(Int(*Pointer<Byte>(element))); 414 break; 415 default: 416 return false; 417 } 418 419 return true; 420 } 421 write(Float4 & c,Pointer<Byte> element,const State & state)422 bool Blitter::write(Float4 &c, Pointer<Byte> element, const State &state) 423 { 424 bool writeR = state.writeRed; 425 bool writeG = state.writeGreen; 426 bool writeB = state.writeBlue; 427 bool writeA = state.writeAlpha; 428 bool writeRGBA = writeR && writeG && writeB && writeA; 429 430 switch(state.destFormat) 431 { 432 case FORMAT_L8: 433 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); 434 break; 435 case FORMAT_A8: 436 if(writeA) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.w))); } 437 break; 438 case FORMAT_A8R8G8B8: 439 if(writeRGBA) 440 { 441 Short4 c0 = RoundShort4(c.zyxw); 442 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); 443 } 444 else 445 { 446 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } 447 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 448 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } 449 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } 450 } 451 break; 452 case FORMAT_A8B8G8R8: 453 case FORMAT_SRGB8_A8: 454 if(writeRGBA) 455 { 456 Short4 c0 = RoundShort4(c); 457 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); 458 } 459 else 460 { 461 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); } 462 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 463 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } 464 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } 465 } 466 break; 467 case FORMAT_X8R8G8B8: 468 if(writeRGBA) 469 { 470 Short4 c0 = RoundShort4(c.zyxw) | Short4(0x0000, 0x0000, 0x0000, 0x00FF); 471 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); 472 } 473 else 474 { 475 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } 476 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 477 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } 478 if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); } 479 } 480 break; 481 case FORMAT_X8B8G8R8: 482 case FORMAT_SRGB8_X8: 483 if(writeRGBA) 484 { 485 Short4 c0 = RoundShort4(c) | Short4(0x0000, 0x0000, 0x0000, 0x00FF); 486 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); 487 } 488 else 489 { 490 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); } 491 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 492 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } 493 if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); } 494 } 495 break; 496 case FORMAT_R8G8B8: 497 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } 498 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 499 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } 500 break; 501 case FORMAT_B8G8R8: 502 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); } 503 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 504 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } 505 break; 506 case FORMAT_A32B32G32R32F: 507 if(writeRGBA) 508 { 509 *Pointer<Float4>(element) = c; 510 } 511 else 512 { 513 if(writeR) { *Pointer<Float>(element) = c.x; } 514 if(writeG) { *Pointer<Float>(element + 4) = c.y; } 515 if(writeB) { *Pointer<Float>(element + 8) = c.z; } 516 if(writeA) { *Pointer<Float>(element + 12) = c.w; } 517 } 518 break; 519 case FORMAT_X32B32G32R32F: 520 case FORMAT_X32B32G32R32F_UNSIGNED: 521 if(writeA) { *Pointer<Float>(element + 12) = 1.0f; } 522 case FORMAT_B32G32R32F: 523 if(writeR) { *Pointer<Float>(element) = c.x; } 524 if(writeG) { *Pointer<Float>(element + 4) = c.y; } 525 if(writeB) { *Pointer<Float>(element + 8) = c.z; } 526 break; 527 case FORMAT_G32R32F: 528 if(writeR && writeG) 529 { 530 *Pointer<Float2>(element) = Float2(c); 531 } 532 else 533 { 534 if(writeR) { *Pointer<Float>(element) = c.x; } 535 if(writeG) { *Pointer<Float>(element + 4) = c.y; } 536 } 537 break; 538 case FORMAT_R32F: 539 if(writeR) { *Pointer<Float>(element) = c.x; } 540 break; 541 case FORMAT_A8B8G8R8I: 542 case FORMAT_A8B8G8R8_SNORM: 543 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); } 544 case FORMAT_X8B8G8R8I: 545 case FORMAT_X8B8G8R8_SNORM: 546 if(writeA && (state.destFormat == FORMAT_X8B8G8R8I || state.destFormat == FORMAT_X8B8G8R8_SNORM)) 547 { 548 *Pointer<SByte>(element + 3) = SByte(0x7F); 549 } 550 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); } 551 case FORMAT_G8R8I: 552 case FORMAT_G8R8_SNORM: 553 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); } 554 case FORMAT_R8I: 555 case FORMAT_R8_SNORM: 556 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); } 557 break; 558 case FORMAT_A8B8G8R8UI: 559 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } 560 case FORMAT_X8B8G8R8UI: 561 if(writeA && (state.destFormat == FORMAT_X8B8G8R8UI)) 562 { 563 *Pointer<Byte>(element + 3) = Byte(0xFF); 564 } 565 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } 566 case FORMAT_G8R8UI: 567 case FORMAT_G8R8: 568 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 569 case FORMAT_R8UI: 570 case FORMAT_R8: 571 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); } 572 break; 573 case FORMAT_A16B16G16R16I: 574 if(writeRGBA) 575 { 576 *Pointer<Short4>(element) = Short4(RoundInt(c)); 577 } 578 else 579 { 580 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } 581 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } 582 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); } 583 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); } 584 } 585 break; 586 case FORMAT_X16B16G16R16I: 587 if(writeRGBA) 588 { 589 *Pointer<Short4>(element) = Short4(RoundInt(c)); 590 } 591 else 592 { 593 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } 594 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } 595 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); } 596 } 597 if(writeA) { *Pointer<Short>(element + 6) = Short(0x7F); } 598 break; 599 case FORMAT_G16R16I: 600 if(writeR && writeG) 601 { 602 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c))); 603 } 604 else 605 { 606 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } 607 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } 608 } 609 break; 610 case FORMAT_R16I: 611 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } 612 break; 613 case FORMAT_A16B16G16R16UI: 614 case FORMAT_A16B16G16R16: 615 if(writeRGBA) 616 { 617 *Pointer<UShort4>(element) = UShort4(RoundInt(c)); 618 } 619 else 620 { 621 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } 622 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } 623 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); } 624 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); } 625 } 626 break; 627 case FORMAT_X16B16G16R16UI: 628 if(writeRGBA) 629 { 630 *Pointer<UShort4>(element) = UShort4(RoundInt(c)); 631 } 632 else 633 { 634 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } 635 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } 636 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); } 637 } 638 if(writeA) { *Pointer<UShort>(element + 6) = UShort(0xFF); } 639 break; 640 case FORMAT_G16R16UI: 641 case FORMAT_G16R16: 642 if(writeR && writeG) 643 { 644 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c))); 645 } 646 else 647 { 648 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } 649 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } 650 } 651 break; 652 case FORMAT_R16UI: 653 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } 654 break; 655 case FORMAT_A32B32G32R32I: 656 if(writeRGBA) 657 { 658 *Pointer<Int4>(element) = RoundInt(c); 659 } 660 else 661 { 662 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } 663 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } 664 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); } 665 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); } 666 } 667 break; 668 case FORMAT_X32B32G32R32I: 669 if(writeRGBA) 670 { 671 *Pointer<Int4>(element) = RoundInt(c); 672 } 673 else 674 { 675 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } 676 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } 677 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); } 678 } 679 if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); } 680 break; 681 case FORMAT_G32R32I: 682 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } 683 case FORMAT_R32I: 684 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } 685 break; 686 case FORMAT_A32B32G32R32UI: 687 if(writeRGBA) 688 { 689 *Pointer<UInt4>(element) = UInt4(RoundInt(c)); 690 } 691 else 692 { 693 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } 694 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } 695 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); } 696 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); } 697 } 698 break; 699 case FORMAT_X32B32G32R32UI: 700 if(writeRGBA) 701 { 702 *Pointer<UInt4>(element) = UInt4(RoundInt(c)); 703 } 704 else 705 { 706 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } 707 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } 708 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); } 709 } 710 if(writeA) { *Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF); } 711 break; 712 case FORMAT_G32R32UI: 713 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } 714 case FORMAT_R32UI: 715 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } 716 break; 717 case FORMAT_R5G6B5: 718 if(writeR && writeG && writeB) 719 { 720 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) | 721 (RoundInt(Float(c.y)) << Int(5)) | 722 (RoundInt(Float(c.x)) << Int(11))); 723 } 724 else 725 { 726 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000); 727 unsigned short unmask = ~mask; 728 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | 729 (UShort(RoundInt(Float(c.z)) | 730 (RoundInt(Float(c.y)) << Int(5)) | 731 (RoundInt(Float(c.x)) << Int(11))) & UShort(mask)); 732 } 733 break; 734 case FORMAT_A2B10G10R10: 735 case FORMAT_A2B10G10R10UI: 736 if(writeRGBA) 737 { 738 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) | 739 (RoundInt(Float(c.y)) << 10) | 740 (RoundInt(Float(c.z)) << 20) | 741 (RoundInt(Float(c.w)) << 30)); 742 } 743 else 744 { 745 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) | 746 (writeB ? 0x3FF00000 : 0x0000) | 747 (writeG ? 0x000FFC00 : 0x0000) | 748 (writeR ? 0x000003FF : 0x0000); 749 unsigned int unmask = ~mask; 750 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) | 751 (UInt(RoundInt(Float(c.x)) | 752 (RoundInt(Float(c.y)) << 10) | 753 (RoundInt(Float(c.z)) << 20) | 754 (RoundInt(Float(c.w)) << 30)) & UInt(mask)); 755 } 756 break; 757 case FORMAT_D16: 758 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); 759 break; 760 case FORMAT_D24S8: 761 case FORMAT_D24X8: 762 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8); 763 break; 764 case FORMAT_D32: 765 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x))); 766 break; 767 case FORMAT_D32F_COMPLEMENTARY: 768 case FORMAT_D32FS8_COMPLEMENTARY: 769 *Pointer<Float>(element) = 1.0f - c.x; 770 break; 771 case FORMAT_D32F: 772 case FORMAT_D32FS8: 773 case FORMAT_D32F_LOCKABLE: 774 case FORMAT_D32FS8_TEXTURE: 775 case FORMAT_D32F_SHADOW: 776 case FORMAT_D32FS8_SHADOW: 777 *Pointer<Float>(element) = c.x; 778 break; 779 case FORMAT_S8: 780 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); 781 break; 782 default: 783 return false; 784 } 785 return true; 786 } 787 read(Int4 & c,Pointer<Byte> element,const State & state)788 bool Blitter::read(Int4 &c, Pointer<Byte> element, const State &state) 789 { 790 c = Int4(0, 0, 0, 1); 791 792 switch(state.sourceFormat) 793 { 794 case FORMAT_A8B8G8R8I: 795 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3); 796 case FORMAT_X8B8G8R8I: 797 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2); 798 case FORMAT_G8R8I: 799 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1); 800 case FORMAT_R8I: 801 c = Insert(c, Int(*Pointer<SByte>(element)), 0); 802 break; 803 case FORMAT_A8B8G8R8UI: 804 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3); 805 case FORMAT_X8B8G8R8UI: 806 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2); 807 case FORMAT_G8R8UI: 808 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1); 809 case FORMAT_R8UI: 810 c = Insert(c, Int(*Pointer<Byte>(element)), 0); 811 break; 812 case FORMAT_A16B16G16R16I: 813 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3); 814 case FORMAT_X16B16G16R16I: 815 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2); 816 case FORMAT_G16R16I: 817 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1); 818 case FORMAT_R16I: 819 c = Insert(c, Int(*Pointer<Short>(element)), 0); 820 break; 821 case FORMAT_A16B16G16R16UI: 822 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3); 823 case FORMAT_X16B16G16R16UI: 824 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2); 825 case FORMAT_G16R16UI: 826 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1); 827 case FORMAT_R16UI: 828 c = Insert(c, Int(*Pointer<UShort>(element)), 0); 829 break; 830 case FORMAT_A32B32G32R32I: 831 case FORMAT_A32B32G32R32UI: 832 c = *Pointer<Int4>(element); 833 break; 834 case FORMAT_X32B32G32R32I: 835 case FORMAT_X32B32G32R32UI: 836 c = Insert(c, *Pointer<Int>(element + 8), 2); 837 case FORMAT_G32R32I: 838 case FORMAT_G32R32UI: 839 c = Insert(c, *Pointer<Int>(element + 4), 1); 840 case FORMAT_R32I: 841 case FORMAT_R32UI: 842 c = Insert(c, *Pointer<Int>(element), 0); 843 break; 844 default: 845 return false; 846 } 847 848 return true; 849 } 850 write(Int4 & c,Pointer<Byte> element,const State & state)851 bool Blitter::write(Int4 &c, Pointer<Byte> element, const State &state) 852 { 853 bool writeR = state.writeRed; 854 bool writeG = state.writeGreen; 855 bool writeB = state.writeBlue; 856 bool writeA = state.writeAlpha; 857 bool writeRGBA = writeR && writeG && writeB && writeA; 858 859 switch(state.destFormat) 860 { 861 case FORMAT_A8B8G8R8I: 862 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); } 863 case FORMAT_X8B8G8R8I: 864 if(writeA && (state.destFormat != FORMAT_A8B8G8R8I)) 865 { 866 *Pointer<SByte>(element + 3) = SByte(0x7F); 867 } 868 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); } 869 case FORMAT_G8R8I: 870 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); } 871 case FORMAT_R8I: 872 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); } 873 break; 874 case FORMAT_A8B8G8R8UI: 875 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); } 876 case FORMAT_X8B8G8R8UI: 877 if(writeA && (state.destFormat != FORMAT_A8B8G8R8UI)) 878 { 879 *Pointer<Byte>(element + 3) = Byte(0xFF); 880 } 881 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); } 882 case FORMAT_G8R8UI: 883 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); } 884 case FORMAT_R8UI: 885 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); } 886 break; 887 case FORMAT_A16B16G16R16I: 888 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); } 889 case FORMAT_X16B16G16R16I: 890 if(writeA && (state.destFormat != FORMAT_A16B16G16R16I)) 891 { 892 *Pointer<Short>(element + 6) = Short(0x7FFF); 893 } 894 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); } 895 case FORMAT_G16R16I: 896 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); } 897 case FORMAT_R16I: 898 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); } 899 break; 900 case FORMAT_A16B16G16R16UI: 901 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); } 902 case FORMAT_X16B16G16R16UI: 903 if(writeA && (state.destFormat != FORMAT_A16B16G16R16UI)) 904 { 905 *Pointer<UShort>(element + 6) = UShort(0xFFFF); 906 } 907 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); } 908 case FORMAT_G16R16UI: 909 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); } 910 case FORMAT_R16UI: 911 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); } 912 break; 913 case FORMAT_A32B32G32R32I: 914 if(writeRGBA) 915 { 916 *Pointer<Int4>(element) = c; 917 } 918 else 919 { 920 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } 921 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } 922 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); } 923 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); } 924 } 925 break; 926 case FORMAT_X32B32G32R32I: 927 if(writeRGBA) 928 { 929 *Pointer<Int4>(element) = c; 930 } 931 else 932 { 933 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } 934 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } 935 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); } 936 } 937 if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); } 938 break; 939 case FORMAT_G32R32I: 940 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } 941 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } 942 break; 943 case FORMAT_R32I: 944 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } 945 break; 946 case FORMAT_A32B32G32R32UI: 947 if(writeRGBA) 948 { 949 *Pointer<UInt4>(element) = As<UInt4>(c); 950 } 951 else 952 { 953 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } 954 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } 955 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); } 956 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); } 957 } 958 break; 959 case FORMAT_X32B32G32R32UI: 960 if(writeRGBA) 961 { 962 *Pointer<UInt4>(element) = As<UInt4>(c); 963 } 964 else 965 { 966 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } 967 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } 968 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); } 969 } 970 if(writeA) { *Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF); } 971 break; 972 case FORMAT_G32R32UI: 973 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } 974 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } 975 break; 976 case FORMAT_R32UI: 977 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } 978 break; 979 default: 980 return false; 981 } 982 983 return true; 984 } 985 GetScale(float4 & scale,Format format)986 bool Blitter::GetScale(float4 &scale, Format format) 987 { 988 switch(format) 989 { 990 case FORMAT_L8: 991 case FORMAT_A8: 992 case FORMAT_A8R8G8B8: 993 case FORMAT_X8R8G8B8: 994 case FORMAT_R8: 995 case FORMAT_G8R8: 996 case FORMAT_R8G8B8: 997 case FORMAT_B8G8R8: 998 case FORMAT_X8B8G8R8: 999 case FORMAT_A8B8G8R8: 1000 case FORMAT_SRGB8_X8: 1001 case FORMAT_SRGB8_A8: 1002 scale = vector(0xFF, 0xFF, 0xFF, 0xFF); 1003 break; 1004 case FORMAT_R8_SNORM: 1005 case FORMAT_G8R8_SNORM: 1006 case FORMAT_X8B8G8R8_SNORM: 1007 case FORMAT_A8B8G8R8_SNORM: 1008 scale = vector(0x7F, 0x7F, 0x7F, 0x7F); 1009 break; 1010 case FORMAT_A16B16G16R16: 1011 scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); 1012 break; 1013 case FORMAT_R8I: 1014 case FORMAT_R8UI: 1015 case FORMAT_G8R8I: 1016 case FORMAT_G8R8UI: 1017 case FORMAT_X8B8G8R8I: 1018 case FORMAT_X8B8G8R8UI: 1019 case FORMAT_A8B8G8R8I: 1020 case FORMAT_A8B8G8R8UI: 1021 case FORMAT_R16I: 1022 case FORMAT_R16UI: 1023 case FORMAT_G16R16: 1024 case FORMAT_G16R16I: 1025 case FORMAT_G16R16UI: 1026 case FORMAT_X16B16G16R16I: 1027 case FORMAT_X16B16G16R16UI: 1028 case FORMAT_A16B16G16R16I: 1029 case FORMAT_A16B16G16R16UI: 1030 case FORMAT_R32I: 1031 case FORMAT_R32UI: 1032 case FORMAT_G32R32I: 1033 case FORMAT_G32R32UI: 1034 case FORMAT_X32B32G32R32I: 1035 case FORMAT_X32B32G32R32UI: 1036 case FORMAT_A32B32G32R32I: 1037 case FORMAT_A32B32G32R32UI: 1038 case FORMAT_A32B32G32R32F: 1039 case FORMAT_X32B32G32R32F: 1040 case FORMAT_X32B32G32R32F_UNSIGNED: 1041 case FORMAT_B32G32R32F: 1042 case FORMAT_G32R32F: 1043 case FORMAT_R32F: 1044 case FORMAT_A2B10G10R10UI: 1045 scale = vector(1.0f, 1.0f, 1.0f, 1.0f); 1046 break; 1047 case FORMAT_R5G6B5: 1048 scale = vector(0x1F, 0x3F, 0x1F, 1.0f); 1049 break; 1050 case FORMAT_A2B10G10R10: 1051 scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03); 1052 break; 1053 case FORMAT_D16: 1054 scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f); 1055 break; 1056 case FORMAT_D24S8: 1057 case FORMAT_D24X8: 1058 scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f); 1059 break; 1060 case FORMAT_D32: 1061 scale = vector(static_cast<float>(0xFFFFFFFF), 0.0f, 0.0f, 0.0f); 1062 break; 1063 case FORMAT_D32F: 1064 case FORMAT_D32FS8: 1065 case FORMAT_D32F_COMPLEMENTARY: 1066 case FORMAT_D32FS8_COMPLEMENTARY: 1067 case FORMAT_D32F_LOCKABLE: 1068 case FORMAT_D32FS8_TEXTURE: 1069 case FORMAT_D32F_SHADOW: 1070 case FORMAT_D32FS8_SHADOW: 1071 case FORMAT_S8: 1072 scale = vector(1.0f, 1.0f, 1.0f, 1.0f); 1073 break; 1074 default: 1075 return false; 1076 } 1077 1078 return true; 1079 } 1080 ApplyScaleAndClamp(Float4 & value,const State & state,bool preScaled)1081 bool Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled) 1082 { 1083 float4 scale, unscale; 1084 if(state.clearOperation && 1085 Surface::isNonNormalizedInteger(state.sourceFormat) && 1086 !Surface::isNonNormalizedInteger(state.destFormat)) 1087 { 1088 // If we're clearing a buffer from an int or uint color into a normalized color, 1089 // then the whole range of the int or uint color must be scaled between 0 and 1. 1090 switch(state.sourceFormat) 1091 { 1092 case FORMAT_A32B32G32R32I: 1093 unscale = replicate(static_cast<float>(0x7FFFFFFF)); 1094 break; 1095 case FORMAT_A32B32G32R32UI: 1096 unscale = replicate(static_cast<float>(0xFFFFFFFF)); 1097 break; 1098 default: 1099 return false; 1100 } 1101 } 1102 else if(!GetScale(unscale, state.sourceFormat)) 1103 { 1104 return false; 1105 } 1106 1107 if(!GetScale(scale, state.destFormat)) 1108 { 1109 return false; 1110 } 1111 1112 bool srcSRGB = Surface::isSRGBformat(state.sourceFormat); 1113 bool dstSRGB = Surface::isSRGBformat(state.destFormat); 1114 1115 if(state.convertSRGB && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded. 1116 { 1117 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale 1118 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale 1119 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value); 1120 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale 1121 } 1122 else if(unscale != scale) 1123 { 1124 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w); 1125 } 1126 1127 if(state.destFormat == FORMAT_X32B32G32R32F_UNSIGNED) 1128 { 1129 value = Max(value, Float4(0.0f)); // TODO: Only necessary if source is signed. 1130 } 1131 else if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat)) 1132 { 1133 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w)); 1134 1135 value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x, 1136 Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y, 1137 Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z, 1138 Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w)); 1139 } 1140 1141 return true; 1142 } 1143 ComputeOffset(Int & x,Int & y,Int & pitchB,int bytes,bool quadLayout)1144 Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout) 1145 { 1146 if(!quadLayout) 1147 { 1148 return y * pitchB + x * bytes; 1149 } 1150 else 1151 { 1152 // (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1) 1153 return (y & Int(~1)) * pitchB + 1154 ((y & Int(1)) * 2 + x * 2 - (x & Int(1))) * bytes; 1155 } 1156 } 1157 LinearToSRGB(Float4 & c)1158 Float4 Blitter::LinearToSRGB(Float4 &c) 1159 { 1160 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f); 1161 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f); 1162 1163 Float4 s = c; 1164 s.xyz = Max(lc, ec); 1165 1166 return s; 1167 } 1168 sRGBtoLinear(Float4 & c)1169 Float4 Blitter::sRGBtoLinear(Float4 &c) 1170 { 1171 Float4 lc = c * Float4(1.0f / 12.92f); 1172 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f)); 1173 1174 Int4 linear = CmpLT(c, Float4(0.04045f)); 1175 1176 Float4 s = c; 1177 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // FIXME: IfThenElse() 1178 1179 return s; 1180 } 1181 generate(const State & state)1182 Routine *Blitter::generate(const State &state) 1183 { 1184 Function<Void(Pointer<Byte>)> function; 1185 { 1186 Pointer<Byte> blit(function.Arg<0>()); 1187 1188 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source)); 1189 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest)); 1190 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB)); 1191 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB)); 1192 1193 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0)); 1194 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0)); 1195 Float w = *Pointer<Float>(blit + OFFSET(BlitData,w)); 1196 Float h = *Pointer<Float>(blit + OFFSET(BlitData,h)); 1197 1198 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d)); 1199 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d)); 1200 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d)); 1201 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d)); 1202 1203 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth)); 1204 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight)); 1205 1206 bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat); 1207 bool intDst = Surface::isNonNormalizedInteger(state.destFormat); 1208 bool intBoth = intSrc && intDst; 1209 bool srcQuadLayout = Surface::hasQuadLayout(state.sourceFormat); 1210 bool dstQuadLayout = Surface::hasQuadLayout(state.destFormat); 1211 int srcBytes = Surface::bytes(state.sourceFormat); 1212 int dstBytes = Surface::bytes(state.destFormat); 1213 1214 bool hasConstantColorI = false; 1215 Int4 constantColorI; 1216 bool hasConstantColorF = false; 1217 Float4 constantColorF; 1218 if(state.clearOperation) 1219 { 1220 if(intBoth) // Integer types 1221 { 1222 if(!read(constantColorI, source, state)) 1223 { 1224 return nullptr; 1225 } 1226 hasConstantColorI = true; 1227 } 1228 else 1229 { 1230 if(!read(constantColorF, source, state)) 1231 { 1232 return nullptr; 1233 } 1234 hasConstantColorF = true; 1235 1236 if(!ApplyScaleAndClamp(constantColorF, state)) 1237 { 1238 return nullptr; 1239 } 1240 } 1241 } 1242 1243 For(Int j = y0d, j < y1d, j++) 1244 { 1245 Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h; 1246 Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB; 1247 1248 For(Int i = x0d, i < x1d, i++) 1249 { 1250 Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w; 1251 Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes; 1252 1253 if(hasConstantColorI) 1254 { 1255 if(!write(constantColorI, d, state)) 1256 { 1257 return nullptr; 1258 } 1259 } 1260 else if(hasConstantColorF) 1261 { 1262 for(int s = 0; s < state.destSamples; s++) 1263 { 1264 if(!write(constantColorF, d, state)) 1265 { 1266 return nullptr; 1267 } 1268 1269 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB)); 1270 } 1271 } 1272 else if(intBoth) // Integer types do not support filtering 1273 { 1274 Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision 1275 Int X = Int(x); 1276 Int Y = Int(y); 1277 1278 if(state.clampToEdge) 1279 { 1280 X = Clamp(X, 0, sWidth - 1); 1281 Y = Clamp(Y, 0, sHeight - 1); 1282 } 1283 1284 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout); 1285 1286 if(!read(color, s, state)) 1287 { 1288 return nullptr; 1289 } 1290 1291 if(!write(color, d, state)) 1292 { 1293 return nullptr; 1294 } 1295 } 1296 else 1297 { 1298 Float4 color; 1299 1300 bool preScaled = false; 1301 if(!state.filter || intSrc) 1302 { 1303 Int X = Int(x); 1304 Int Y = Int(y); 1305 1306 if(state.clampToEdge) 1307 { 1308 X = Clamp(X, 0, sWidth - 1); 1309 Y = Clamp(Y, 0, sHeight - 1); 1310 } 1311 1312 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout); 1313 1314 if(!read(color, s, state)) 1315 { 1316 return nullptr; 1317 } 1318 } 1319 else // Bilinear filtering 1320 { 1321 Float X = x; 1322 Float Y = y; 1323 1324 if(state.clampToEdge) 1325 { 1326 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f); 1327 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f); 1328 } 1329 1330 Float x0 = X - 0.5f; 1331 Float y0 = Y - 0.5f; 1332 1333 Int X0 = Max(Int(x0), 0); 1334 Int Y0 = Max(Int(y0), 0); 1335 1336 Int X1 = X0 + 1; 1337 Int Y1 = Y0 + 1; 1338 X1 = IfThenElse(X1 >= sWidth, X0, X1); 1339 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1); 1340 1341 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout); 1342 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout); 1343 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout); 1344 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout); 1345 1346 Float4 c00; if(!read(c00, s00, state)) return nullptr; 1347 Float4 c01; if(!read(c01, s01, state)) return nullptr; 1348 Float4 c10; if(!read(c10, s10, state)) return nullptr; 1349 Float4 c11; if(!read(c11, s11, state)) return nullptr; 1350 1351 if(state.convertSRGB && Surface::isSRGBformat(state.sourceFormat)) // sRGB -> RGB 1352 { 1353 if(!ApplyScaleAndClamp(c00, state)) return nullptr; 1354 if(!ApplyScaleAndClamp(c01, state)) return nullptr; 1355 if(!ApplyScaleAndClamp(c10, state)) return nullptr; 1356 if(!ApplyScaleAndClamp(c11, state)) return nullptr; 1357 preScaled = true; 1358 } 1359 1360 Float4 fx = Float4(x0 - Float(X0)); 1361 Float4 fy = Float4(y0 - Float(Y0)); 1362 Float4 ix = Float4(1.0f) - fx; 1363 Float4 iy = Float4(1.0f) - fy; 1364 1365 color = (c00 * ix + c01 * fx) * iy + 1366 (c10 * ix + c11 * fx) * fy; 1367 } 1368 1369 if(!ApplyScaleAndClamp(color, state, preScaled)) 1370 { 1371 return nullptr; 1372 } 1373 1374 for(int s = 0; s < state.destSamples; s++) 1375 { 1376 if(!write(color, d, state)) 1377 { 1378 return nullptr; 1379 } 1380 1381 d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB)); 1382 } 1383 } 1384 } 1385 } 1386 } 1387 1388 return function("BlitRoutine"); 1389 } 1390 blitReactor(Surface * source,const SliceRectF & sourceRect,Surface * dest,const SliceRect & destRect,const Blitter::Options & options)1391 bool Blitter::blitReactor(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options &options) 1392 { 1393 ASSERT(!options.clearOperation || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1))); 1394 1395 Rect dRect = destRect; 1396 RectF sRect = sourceRect; 1397 if(destRect.x0 > destRect.x1) 1398 { 1399 swap(dRect.x0, dRect.x1); 1400 swap(sRect.x0, sRect.x1); 1401 } 1402 if(destRect.y0 > destRect.y1) 1403 { 1404 swap(dRect.y0, dRect.y1); 1405 swap(sRect.y0, sRect.y1); 1406 } 1407 1408 State state(options); 1409 state.clampToEdge = (sourceRect.x0 < 0.0f) || 1410 (sourceRect.y0 < 0.0f) || 1411 (sourceRect.x1 > (float)source->getWidth()) || 1412 (sourceRect.y1 > (float)source->getHeight()); 1413 1414 bool useSourceInternal = !source->isExternalDirty(); 1415 bool useDestInternal = !dest->isExternalDirty(); 1416 bool isStencil = options.useStencil; 1417 1418 state.sourceFormat = isStencil ? source->getStencilFormat() : source->getFormat(useSourceInternal); 1419 state.destFormat = isStencil ? dest->getStencilFormat() : dest->getFormat(useDestInternal); 1420 state.destSamples = dest->getSamples(); 1421 1422 criticalSection.lock(); 1423 Routine *blitRoutine = blitCache->query(state); 1424 1425 if(!blitRoutine) 1426 { 1427 blitRoutine = generate(state); 1428 1429 if(!blitRoutine) 1430 { 1431 criticalSection.unlock(); 1432 return false; 1433 } 1434 1435 blitCache->add(state, blitRoutine); 1436 } 1437 1438 criticalSection.unlock(); 1439 1440 void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry(); 1441 1442 BlitData data; 1443 1444 bool isRGBA = options.writeMask == 0xF; 1445 bool isEntireDest = dest->isEntire(destRect); 1446 1447 data.source = isStencil ? source->lockStencil(0, 0, 0, sw::PUBLIC) : 1448 source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal); 1449 data.dest = isStencil ? dest->lockStencil(0, 0, 0, sw::PUBLIC) : 1450 dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal); 1451 data.sPitchB = isStencil ? source->getStencilPitchB() : source->getPitchB(useSourceInternal); 1452 data.dPitchB = isStencil ? dest->getStencilPitchB() : dest->getPitchB(useDestInternal); 1453 data.dSliceB = isStencil ? dest->getStencilSliceB() : dest->getSliceB(useDestInternal); 1454 1455 data.w = sRect.width() / dRect.width(); 1456 data.h = sRect.height() / dRect.height(); 1457 data.x0 = sRect.x0 + (0.5f - dRect.x0) * data.w; 1458 data.y0 = sRect.y0 + (0.5f - dRect.y0) * data.h; 1459 1460 data.x0d = dRect.x0; 1461 data.x1d = dRect.x1; 1462 data.y0d = dRect.y0; 1463 data.y1d = dRect.y1; 1464 1465 data.sWidth = source->getWidth(); 1466 data.sHeight = source->getHeight(); 1467 1468 blitFunction(&data); 1469 1470 if(isStencil) 1471 { 1472 source->unlockStencil(); 1473 dest->unlockStencil(); 1474 } 1475 else 1476 { 1477 source->unlock(useSourceInternal); 1478 dest->unlock(useDestInternal); 1479 } 1480 1481 return true; 1482 } 1483 } 1484