1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Blitter.hpp" 16 17 #include "Pipeline/ShaderCore.hpp" 18 #include "Reactor/Reactor.hpp" 19 #include "System/Memory.hpp" 20 #include "Vulkan/VkDebug.hpp" 21 22 namespace sw 23 { Blitter()24 Blitter::Blitter() 25 { 26 blitCache = new RoutineCache<State>(1024); 27 } 28 ~Blitter()29 Blitter::~Blitter() 30 { 31 delete blitCache; 32 } 33 clear(void * pixel,VkFormat format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)34 void Blitter::clear(void *pixel, VkFormat format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask) 35 { 36 if(fastClear(pixel, format, dest, dRect, rgbaMask)) 37 { 38 return; 39 } 40 41 sw::Surface *color = sw::Surface::create(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format)); 42 SliceRectF sRect(0.5f, 0.5f, 0.5f, 0.5f, 0); // Sample from the middle. 43 blit(color, sRect, dest, dRect, {rgbaMask}); 44 delete color; 45 } 46 fastClear(void * pixel,VkFormat format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)47 bool Blitter::fastClear(void *pixel, VkFormat format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask) 48 { 49 if(format != VK_FORMAT_R32G32B32A32_SFLOAT) 50 { 51 return false; 52 } 53 54 float *color = (float*)pixel; 55 float r = color[0]; 56 float g = color[1]; 57 float b = color[2]; 58 float a = color[3]; 59 60 uint32_t packed; 61 62 switch(dest->getFormat()) 63 { 64 case VK_FORMAT_R5G6B5_UNORM_PACK16: 65 if((rgbaMask & 0x7) != 0x7) return false; 66 packed = ((uint16_t)(31 * b + 0.5f) << 0) | 67 ((uint16_t)(63 * g + 0.5f) << 5) | 68 ((uint16_t)(31 * r + 0.5f) << 11); 69 break; 70 case VK_FORMAT_B5G6R5_UNORM_PACK16: 71 if((rgbaMask & 0x7) != 0x7) return false; 72 packed = ((uint16_t)(31 * r + 0.5f) << 0) | 73 ((uint16_t)(63 * g + 0.5f) << 5) | 74 ((uint16_t)(31 * b + 0.5f) << 11); 75 break; 76 case VK_FORMAT_A8B8G8R8_UINT_PACK32: 77 case VK_FORMAT_A8B8G8R8_UNORM_PACK32: 78 case VK_FORMAT_R8G8B8A8_UNORM: 79 if((rgbaMask & 0xF) != 0xF) return false; 80 packed = ((uint32_t)(255 * a + 0.5f) << 24) | 81 ((uint32_t)(255 * b + 0.5f) << 16) | 82 ((uint32_t)(255 * g + 0.5f) << 8) | 83 ((uint32_t)(255 * r + 0.5f) << 0); 84 break; 85 case VK_FORMAT_B8G8R8A8_UNORM: 86 if((rgbaMask & 0xF) != 0xF) return false; 87 packed = ((uint32_t)(255 * a + 0.5f) << 24) | 88 ((uint32_t)(255 * r + 0.5f) << 16) | 89 ((uint32_t)(255 * g + 0.5f) << 8) | 90 ((uint32_t)(255 * b + 0.5f) << 0); 91 break; 92 case VK_FORMAT_B10G11R11_UFLOAT_PACK32: 93 if((rgbaMask & 0x7) != 0x7) return false; 94 packed = R11G11B10F(color); 95 break; 96 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: 97 if((rgbaMask & 0x7) != 0x7) return false; 98 packed = RGB9E5(color); 99 break; 100 default: 101 return false; 102 } 103 104 bool useDestInternal = !dest->isExternalDirty(); 105 uint8_t *slice = (uint8_t*)dest->lock(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC, useDestInternal); 106 107 for(int j = 0; j < dest->getSamples(); j++) 108 { 109 uint8_t *d = slice; 110 111 switch(Surface::bytes(dest->getFormat())) 112 { 113 case 2: 114 for(int i = dRect.y0; i < dRect.y1; i++) 115 { 116 sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0); 117 d += dest->getPitchB(useDestInternal); 118 } 119 break; 120 case 4: 121 for(int i = dRect.y0; i < dRect.y1; i++) 122 { 123 sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0); 124 d += dest->getPitchB(useDestInternal); 125 } 126 break; 127 default: 128 assert(false); 129 } 130 131 slice += dest->getSliceB(useDestInternal); 132 } 133 134 dest->unlock(useDestInternal); 135 136 return true; 137 } 138 blit(Surface * source,const SliceRectF & sourceRect,Surface * dest,const SliceRect & destRect,const Blitter::Options & options)139 void Blitter::blit(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options) 140 { 141 if(dest->getInternalFormat() == VK_FORMAT_UNDEFINED) 142 { 143 return; 144 } 145 146 if(blitReactor(source, sourceRect, dest, destRect, options)) 147 { 148 return; 149 } 150 151 SliceRectF sRect = sourceRect; 152 SliceRect dRect = destRect; 153 154 bool flipX = destRect.x0 > destRect.x1; 155 bool flipY = destRect.y0 > destRect.y1; 156 157 if(flipX) 158 { 159 swap(dRect.x0, dRect.x1); 160 swap(sRect.x0, sRect.x1); 161 } 162 if(flipY) 163 { 164 swap(dRect.y0, dRect.y1); 165 swap(sRect.y0, sRect.y1); 166 } 167 168 source->lockInternal(0, 0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC); 169 dest->lockInternal(0, 0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC); 170 171 float w = sRect.width() / dRect.width(); 172 float h = sRect.height() / dRect.height(); 173 174 float xStart = sRect.x0 + (0.5f - dRect.x0) * w; 175 float yStart = sRect.y0 + (0.5f - dRect.y0) * h; 176 177 for(int j = dRect.y0; j < dRect.y1; j++) 178 { 179 float y = yStart + j * h; 180 181 for(int i = dRect.x0; i < dRect.x1; i++) 182 { 183 float x = xStart + i * w; 184 185 // FIXME: Support RGBA mask 186 dest->copyInternal(source, i, j, x, y, options.filter); 187 } 188 } 189 190 source->unlockInternal(); 191 dest->unlockInternal(); 192 } 193 blit3D(Surface * source,Surface * dest)194 void Blitter::blit3D(Surface *source, Surface *dest) 195 { 196 source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC); 197 dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC); 198 199 float w = static_cast<float>(source->getWidth()) / static_cast<float>(dest->getWidth()); 200 float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight()); 201 float d = static_cast<float>(source->getDepth()) / static_cast<float>(dest->getDepth()); 202 203 for(int k = 0; k < dest->getDepth(); k++) 204 { 205 float z = (k + 0.5f) * d; 206 207 for(int j = 0; j < dest->getHeight(); j++) 208 { 209 float y = (j + 0.5f) * h; 210 211 for(int i = 0; i < dest->getWidth(); i++) 212 { 213 float x = (i + 0.5f) * w; 214 215 dest->copyInternal(source, i, j, k, x, y, z, true); 216 } 217 } 218 } 219 220 source->unlockInternal(); 221 dest->unlockInternal(); 222 } 223 read(Float4 & c,Pointer<Byte> element,const State & state)224 bool Blitter::read(Float4 &c, Pointer<Byte> element, const State &state) 225 { 226 c = Float4(0.0f, 0.0f, 0.0f, 1.0f); 227 228 switch(state.sourceFormat) 229 { 230 case VK_FORMAT_B4G4R4A4_UNORM_PACK16: 231 c.w = Float(Int(*Pointer<Byte>(element)) & Int(0xF)); 232 c.x = Float((Int(*Pointer<Byte>(element)) >> 4) & Int(0xF)); 233 c.y = Float(Int(*Pointer<Byte>(element + 1)) & Int(0xF)); 234 c.z = Float((Int(*Pointer<Byte>(element + 1)) >> 4) & Int(0xF)); 235 break; 236 case VK_FORMAT_R8_SINT: 237 case VK_FORMAT_R8_SNORM: 238 c.x = Float(Int(*Pointer<SByte>(element))); 239 c.w = float(0x7F); 240 break; 241 case VK_FORMAT_R8_UNORM: 242 case VK_FORMAT_R8_UINT: 243 c.x = Float(Int(*Pointer<Byte>(element))); 244 c.w = float(0xFF); 245 break; 246 case VK_FORMAT_R16_SINT: 247 c.x = Float(Int(*Pointer<Short>(element))); 248 c.w = float(0x7FFF); 249 break; 250 case VK_FORMAT_R16_UINT: 251 c.x = Float(Int(*Pointer<UShort>(element))); 252 c.w = float(0xFFFF); 253 break; 254 case VK_FORMAT_R32_SINT: 255 c.x = Float(*Pointer<Int>(element)); 256 c.w = float(0x7FFFFFFF); 257 break; 258 case VK_FORMAT_R32_UINT: 259 c.x = Float(*Pointer<UInt>(element)); 260 c.w = float(0xFFFFFFFF); 261 break; 262 case VK_FORMAT_B8G8R8A8_SRGB: 263 case VK_FORMAT_B8G8R8A8_UNORM: 264 c = Float4(*Pointer<Byte4>(element)).zyxw; 265 break; 266 case VK_FORMAT_A8B8G8R8_SINT_PACK32: 267 case VK_FORMAT_R8G8B8A8_SINT: 268 case VK_FORMAT_A8B8G8R8_SNORM_PACK32: 269 case VK_FORMAT_R8G8B8A8_SNORM: 270 c = Float4(*Pointer<SByte4>(element)); 271 break; 272 case VK_FORMAT_A8B8G8R8_UINT_PACK32: 273 case VK_FORMAT_A8B8G8R8_UNORM_PACK32: 274 case VK_FORMAT_R8G8B8A8_UNORM: 275 case VK_FORMAT_R8G8B8A8_UINT: 276 case VK_FORMAT_A8B8G8R8_SRGB_PACK32: 277 case VK_FORMAT_R8G8B8A8_SRGB: 278 c = Float4(*Pointer<Byte4>(element)); 279 break; 280 case VK_FORMAT_R16G16B16A16_SINT: 281 c = Float4(*Pointer<Short4>(element)); 282 break; 283 case VK_FORMAT_R16G16B16A16_UNORM: 284 case VK_FORMAT_R16G16B16A16_UINT: 285 c = Float4(*Pointer<UShort4>(element)); 286 break; 287 case VK_FORMAT_R32G32B32A32_SINT: 288 c = Float4(*Pointer<Int4>(element)); 289 break; 290 case VK_FORMAT_R32G32B32A32_UINT: 291 c = Float4(*Pointer<UInt4>(element)); 292 break; 293 case VK_FORMAT_R8G8_SINT: 294 case VK_FORMAT_R8G8_SNORM: 295 c.x = Float(Int(*Pointer<SByte>(element + 0))); 296 c.y = Float(Int(*Pointer<SByte>(element + 1))); 297 c.w = float(0x7F); 298 break; 299 case VK_FORMAT_R8G8_UNORM: 300 case VK_FORMAT_R8G8_UINT: 301 c.x = Float(Int(*Pointer<Byte>(element + 0))); 302 c.y = Float(Int(*Pointer<Byte>(element + 1))); 303 c.w = float(0xFF); 304 break; 305 case VK_FORMAT_R16G16_SINT: 306 c.x = Float(Int(*Pointer<Short>(element + 0))); 307 c.y = Float(Int(*Pointer<Short>(element + 2))); 308 c.w = float(0x7FFF); 309 break; 310 case VK_FORMAT_R16G16_UNORM: 311 case VK_FORMAT_R16G16_UINT: 312 c.x = Float(Int(*Pointer<UShort>(element + 0))); 313 c.y = Float(Int(*Pointer<UShort>(element + 2))); 314 c.w = float(0xFFFF); 315 break; 316 case VK_FORMAT_R32G32_SINT: 317 c.x = Float(*Pointer<Int>(element + 0)); 318 c.y = Float(*Pointer<Int>(element + 4)); 319 c.w = float(0x7FFFFFFF); 320 break; 321 case VK_FORMAT_R32G32_UINT: 322 c.x = Float(*Pointer<UInt>(element + 0)); 323 c.y = Float(*Pointer<UInt>(element + 4)); 324 c.w = float(0xFFFFFFFF); 325 break; 326 case VK_FORMAT_R32G32B32A32_SFLOAT: 327 c = *Pointer<Float4>(element); 328 break; 329 case VK_FORMAT_R32G32_SFLOAT: 330 c.x = *Pointer<Float>(element + 0); 331 c.y = *Pointer<Float>(element + 4); 332 break; 333 case VK_FORMAT_R32_SFLOAT: 334 c.x = *Pointer<Float>(element); 335 break; 336 case VK_FORMAT_R16G16B16A16_SFLOAT: 337 c.w = Float(*Pointer<Half>(element + 6)); 338 case VK_FORMAT_R16G16B16_SFLOAT: 339 c.z = Float(*Pointer<Half>(element + 4)); 340 case VK_FORMAT_R16G16_SFLOAT: 341 c.y = Float(*Pointer<Half>(element + 2)); 342 case VK_FORMAT_R16_SFLOAT: 343 c.x = Float(*Pointer<Half>(element)); 344 break; 345 case VK_FORMAT_B10G11R11_UFLOAT_PACK32: 346 // 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa. 347 // Since the Half float format also has a 5 bit exponent, we can convert these formats to half by 348 // copy/pasting the bits so the the exponent bits and top mantissa bits are aligned to the half format. 349 // In this case, we have: 350 // B B B B B B B B B B G G G G G G G G G G G R R R R R R R R R R R 351 // 1st Short: |xxxxxxxxxx---------------------| 352 // 2nd Short: |xxxx---------------------xxxxxx| 353 // 3rd Short: |--------------------xxxxxxxxxxxx| 354 // These memory reads overlap, but each of them contains an entire channel, so we can read this without 355 // any int -> short conversion. 356 c.x = Float(As<Half>((*Pointer<UShort>(element + 0) & UShort(0x07FF)) << UShort(4))); 357 c.y = Float(As<Half>((*Pointer<UShort>(element + 1) & UShort(0x3FF8)) << UShort(1))); 358 c.z = Float(As<Half>((*Pointer<UShort>(element + 2) & UShort(0xFFC0)) >> UShort(1))); 359 break; 360 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: 361 // This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B. 362 c.x = Float(*Pointer<UInt>(element) & UInt(0x000001FF)); // R's mantissa (bits 0-8) 363 c.y = Float((*Pointer<UInt>(element) & UInt(0x0003FE00)) >> 9); // G's mantissa (bits 9-17) 364 c.z = Float((*Pointer<UInt>(element) & UInt(0x07FC0000)) >> 18); // B's mantissa (bits 18-26) 365 c *= Float4( 366 // 2^E, using the exponent (bits 27-31) and treating it as an unsigned integer value 367 Float(UInt(1) << ((*Pointer<UInt>(element) & UInt(0xF8000000)) >> 27)) * 368 // Since the 9 bit mantissa values currently stored in RGB were converted straight 369 // from int to float (in the [0, 1<<9] range instead of the [0, 1] range), they 370 // are (1 << 9) times too high. 371 // Also, the exponent has 5 bits and we compute the exponent bias of floating point 372 // formats using "2^(k-1) - 1", so, in this case, the exponent bias is 2^(5-1)-1 = 15 373 // Exponent bias (15) + number of mantissa bits per component (9) = 24 374 Float(1.0f / (1 << 24))); 375 c.w = 1.0f; 376 break; 377 case VK_FORMAT_R5G6B5_UNORM_PACK16: 378 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11))); 379 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5))); 380 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F))); 381 break; 382 case VK_FORMAT_A1R5G5B5_UNORM_PACK16: 383 c.w = Float(Int((*Pointer<UShort>(element) & UShort(0x8000)) >> UShort(15))); 384 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x7C00)) >> UShort(10))); 385 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x03E0)) >> UShort(5))); 386 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F))); 387 break; 388 case VK_FORMAT_A2B10G10R10_UNORM_PACK32: 389 case VK_FORMAT_A2B10G10R10_UINT_PACK32: 390 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF)))); 391 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10)); 392 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20)); 393 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30)); 394 break; 395 case VK_FORMAT_D16_UNORM: 396 c.x = Float(Int((*Pointer<UShort>(element)))); 397 break; 398 case VK_FORMAT_D24_UNORM_S8_UINT: 399 case VK_FORMAT_X8_D24_UNORM_PACK32: 400 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8)); 401 break; 402 case VK_FORMAT_D32_SFLOAT: 403 case VK_FORMAT_D32_SFLOAT_S8_UINT: 404 c.x = *Pointer<Float>(element); 405 break; 406 case VK_FORMAT_S8_UINT: 407 c.x = Float(Int(*Pointer<Byte>(element))); 408 break; 409 default: 410 return false; 411 } 412 413 return true; 414 } 415 write(Float4 & c,Pointer<Byte> element,const State & state)416 bool Blitter::write(Float4 &c, Pointer<Byte> element, const State &state) 417 { 418 bool writeR = state.writeRed; 419 bool writeG = state.writeGreen; 420 bool writeB = state.writeBlue; 421 bool writeA = state.writeAlpha; 422 bool writeRGBA = writeR && writeG && writeB && writeA; 423 424 switch(state.destFormat) 425 { 426 case VK_FORMAT_R4G4_UNORM_PACK8: 427 if(writeR | writeG) 428 { 429 if(!writeR) 430 { 431 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) | 432 (*Pointer<Byte>(element) & Byte(0xF0)); 433 } 434 else if(!writeG) 435 { 436 *Pointer<Byte>(element) = (*Pointer<Byte>(element) & Byte(0xF)) | 437 (Byte(RoundInt(Float(c.x))) << Byte(4)); 438 } 439 else 440 { 441 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) | 442 (Byte(RoundInt(Float(c.x))) << Byte(4)); 443 } 444 } 445 break; 446 case VK_FORMAT_R4G4B4A4_UNORM_PACK16: 447 if(writeR || writeG || writeB || writeA) 448 { 449 *Pointer<UShort>(element) = (writeR ? ((UShort(RoundInt(Float(c.x))) & UShort(0xF)) << UShort(12)) : 450 (*Pointer<UShort>(element) & UShort(0x000F))) | 451 (writeG ? ((UShort(RoundInt(Float(c.y))) & UShort(0xF)) << UShort(8)) : 452 (*Pointer<UShort>(element) & UShort(0x00F0))) | 453 (writeB ? ((UShort(RoundInt(Float(c.z))) & UShort(0xF)) << UShort(4)) : 454 (*Pointer<UShort>(element) & UShort(0x0F00))) | 455 (writeA ? (UShort(RoundInt(Float(c.w))) & UShort(0xF)) : 456 (*Pointer<UShort>(element) & UShort(0xF000))); 457 } 458 break; 459 case VK_FORMAT_B4G4R4A4_UNORM_PACK16: 460 if(writeRGBA) 461 { 462 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) & Int(0xF)) | 463 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) | 464 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) | 465 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12); 466 } 467 else 468 { 469 unsigned short mask = (writeA ? 0x000F : 0x0000) | 470 (writeR ? 0x00F0 : 0x0000) | 471 (writeG ? 0x0F00 : 0x0000) | 472 (writeB ? 0xF000 : 0x0000); 473 unsigned short unmask = ~mask; 474 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | 475 ((UShort(RoundInt(Float(c.w)) & Int(0xF)) | 476 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) | 477 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) | 478 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12)) & UShort(mask)); 479 } 480 break; 481 case VK_FORMAT_B8G8R8A8_SRGB: 482 case VK_FORMAT_B8G8R8A8_UNORM: 483 if(writeRGBA) 484 { 485 Short4 c0 = RoundShort4(c.zyxw); 486 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); 487 } 488 else 489 { 490 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } 491 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 492 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } 493 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } 494 } 495 break; 496 case VK_FORMAT_B8G8R8_SNORM: 497 if(writeB) { *Pointer<SByte>(element + 0) = SByte(RoundInt(Float(c.z))); } 498 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); } 499 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); } 500 break; 501 case VK_FORMAT_B8G8R8_UNORM: 502 case VK_FORMAT_B8G8R8_SRGB: 503 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } 504 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 505 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } 506 break; 507 case VK_FORMAT_A8B8G8R8_UNORM_PACK32: 508 case VK_FORMAT_R8G8B8A8_UNORM: 509 case VK_FORMAT_A8B8G8R8_SRGB_PACK32: 510 case VK_FORMAT_R8G8B8A8_SRGB: 511 case VK_FORMAT_A8B8G8R8_UINT_PACK32: 512 case VK_FORMAT_R8G8B8A8_UINT: 513 case VK_FORMAT_R8G8B8A8_USCALED: 514 case VK_FORMAT_A8B8G8R8_USCALED_PACK32: 515 if(writeRGBA) 516 { 517 Short4 c0 = RoundShort4(c); 518 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); 519 } 520 else 521 { 522 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); } 523 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 524 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } 525 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } 526 } 527 break; 528 case VK_FORMAT_R32G32B32A32_SFLOAT: 529 if(writeRGBA) 530 { 531 *Pointer<Float4>(element) = c; 532 } 533 else 534 { 535 if(writeR) { *Pointer<Float>(element) = c.x; } 536 if(writeG) { *Pointer<Float>(element + 4) = c.y; } 537 if(writeB) { *Pointer<Float>(element + 8) = c.z; } 538 if(writeA) { *Pointer<Float>(element + 12) = c.w; } 539 } 540 break; 541 case VK_FORMAT_R32G32B32_SFLOAT: 542 if(writeR) { *Pointer<Float>(element) = c.x; } 543 if(writeG) { *Pointer<Float>(element + 4) = c.y; } 544 if(writeB) { *Pointer<Float>(element + 8) = c.z; } 545 break; 546 case VK_FORMAT_R32G32_SFLOAT: 547 if(writeR && writeG) 548 { 549 *Pointer<Float2>(element) = Float2(c); 550 } 551 else 552 { 553 if(writeR) { *Pointer<Float>(element) = c.x; } 554 if(writeG) { *Pointer<Float>(element + 4) = c.y; } 555 } 556 break; 557 case VK_FORMAT_R32_SFLOAT: 558 if(writeR) { *Pointer<Float>(element) = c.x; } 559 break; 560 case VK_FORMAT_R16G16B16A16_SFLOAT: 561 if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); } 562 case VK_FORMAT_R16G16B16_SFLOAT: 563 if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); } 564 case VK_FORMAT_R16G16_SFLOAT: 565 if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); } 566 case VK_FORMAT_R16_SFLOAT: 567 if(writeR) { *Pointer<Half>(element) = Half(c.x); } 568 break; 569 case VK_FORMAT_B8G8R8A8_SNORM: 570 if(writeB) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.z))); } 571 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); } 572 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); } 573 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); } 574 break; 575 case VK_FORMAT_A8B8G8R8_SINT_PACK32: 576 case VK_FORMAT_R8G8B8A8_SINT: 577 case VK_FORMAT_A8B8G8R8_SNORM_PACK32: 578 case VK_FORMAT_R8G8B8A8_SNORM: 579 case VK_FORMAT_R8G8B8A8_SSCALED: 580 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32: 581 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); } 582 case VK_FORMAT_R8G8B8_SINT: 583 case VK_FORMAT_R8G8B8_SNORM: 584 case VK_FORMAT_R8G8B8_SSCALED: 585 case VK_FORMAT_R8G8B8_SRGB: 586 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); } 587 case VK_FORMAT_R8G8_SINT: 588 case VK_FORMAT_R8G8_SNORM: 589 case VK_FORMAT_R8G8_SSCALED: 590 case VK_FORMAT_R8G8_SRGB: 591 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); } 592 case VK_FORMAT_R8_SINT: 593 case VK_FORMAT_R8_SNORM: 594 case VK_FORMAT_R8_SSCALED: 595 case VK_FORMAT_R8_SRGB: 596 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); } 597 break; 598 case VK_FORMAT_R8G8B8_UINT: 599 case VK_FORMAT_R8G8B8_UNORM: 600 case VK_FORMAT_R8G8B8_USCALED: 601 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } 602 case VK_FORMAT_R8G8_UINT: 603 case VK_FORMAT_R8G8_UNORM: 604 case VK_FORMAT_R8G8_USCALED: 605 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 606 case VK_FORMAT_R8_UINT: 607 case VK_FORMAT_R8_UNORM: 608 case VK_FORMAT_R8_USCALED: 609 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); } 610 break; 611 case VK_FORMAT_R16G16B16A16_SINT: 612 case VK_FORMAT_R16G16B16A16_SNORM: 613 case VK_FORMAT_R16G16B16A16_SSCALED: 614 if(writeRGBA) 615 { 616 *Pointer<Short4>(element) = Short4(RoundInt(c)); 617 } 618 else 619 { 620 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } 621 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } 622 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); } 623 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); } 624 } 625 break; 626 case VK_FORMAT_R16G16B16_SINT: 627 case VK_FORMAT_R16G16B16_SNORM: 628 case VK_FORMAT_R16G16B16_SSCALED: 629 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } 630 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } 631 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); } 632 break; 633 case VK_FORMAT_R16G16_SINT: 634 case VK_FORMAT_R16G16_SNORM: 635 case VK_FORMAT_R16G16_SSCALED: 636 if(writeR && writeG) 637 { 638 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c))); 639 } 640 else 641 { 642 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } 643 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } 644 } 645 break; 646 case VK_FORMAT_R16_SINT: 647 case VK_FORMAT_R16_SNORM: 648 case VK_FORMAT_R16_SSCALED: 649 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } 650 break; 651 case VK_FORMAT_R16G16B16A16_UINT: 652 case VK_FORMAT_R16G16B16A16_UNORM: 653 case VK_FORMAT_R16G16B16A16_USCALED: 654 if(writeRGBA) 655 { 656 *Pointer<UShort4>(element) = UShort4(RoundInt(c)); 657 } 658 else 659 { 660 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } 661 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } 662 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); } 663 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); } 664 } 665 break; 666 case VK_FORMAT_R16G16B16_UINT: 667 case VK_FORMAT_R16G16B16_UNORM: 668 case VK_FORMAT_R16G16B16_USCALED: 669 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } 670 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } 671 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); } 672 break; 673 case VK_FORMAT_R16G16_UINT: 674 case VK_FORMAT_R16G16_UNORM: 675 case VK_FORMAT_R16G16_USCALED: 676 if(writeR && writeG) 677 { 678 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c))); 679 } 680 else 681 { 682 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } 683 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } 684 } 685 break; 686 case VK_FORMAT_R16_UINT: 687 case VK_FORMAT_R16_UNORM: 688 case VK_FORMAT_R16_USCALED: 689 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } 690 break; 691 case VK_FORMAT_R32G32B32A32_SINT: 692 if(writeRGBA) 693 { 694 *Pointer<Int4>(element) = RoundInt(c); 695 } 696 else 697 { 698 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } 699 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } 700 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); } 701 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); } 702 } 703 break; 704 case VK_FORMAT_R32G32B32_SINT: 705 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); } 706 case VK_FORMAT_R32G32_SINT: 707 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } 708 case VK_FORMAT_R32_SINT: 709 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } 710 break; 711 case VK_FORMAT_R32G32B32A32_UINT: 712 if(writeRGBA) 713 { 714 *Pointer<UInt4>(element) = UInt4(RoundInt(c)); 715 } 716 else 717 { 718 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } 719 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } 720 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); } 721 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); } 722 } 723 break; 724 case VK_FORMAT_R32G32B32_UINT: 725 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); } 726 case VK_FORMAT_R32G32_UINT: 727 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } 728 case VK_FORMAT_R32_UINT: 729 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } 730 break; 731 case VK_FORMAT_R5G6B5_UNORM_PACK16: 732 if(writeR && writeG && writeB) 733 { 734 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) | 735 (RoundInt(Float(c.y)) << Int(5)) | 736 (RoundInt(Float(c.x)) << Int(11))); 737 } 738 else 739 { 740 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000); 741 unsigned short unmask = ~mask; 742 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | 743 (UShort(RoundInt(Float(c.z)) | 744 (RoundInt(Float(c.y)) << Int(5)) | 745 (RoundInt(Float(c.x)) << Int(11))) & UShort(mask)); 746 } 747 break; 748 case VK_FORMAT_R5G5B5A1_UNORM_PACK16: 749 if(writeRGBA) 750 { 751 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) | 752 (RoundInt(Float(c.z)) << Int(1)) | 753 (RoundInt(Float(c.y)) << Int(6)) | 754 (RoundInt(Float(c.x)) << Int(11))); 755 } 756 else 757 { 758 unsigned short mask = (writeA ? 0x8000 : 0x0000) | 759 (writeR ? 0x7C00 : 0x0000) | 760 (writeG ? 0x03E0 : 0x0000) | 761 (writeB ? 0x001F : 0x0000); 762 unsigned short unmask = ~mask; 763 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | 764 (UShort(RoundInt(Float(c.w)) | 765 (RoundInt(Float(c.z)) << Int(1)) | 766 (RoundInt(Float(c.y)) << Int(6)) | 767 (RoundInt(Float(c.x)) << Int(11))) & UShort(mask)); 768 } 769 break; 770 case VK_FORMAT_B5G5R5A1_UNORM_PACK16: 771 if(writeRGBA) 772 { 773 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) | 774 (RoundInt(Float(c.x)) << Int(1)) | 775 (RoundInt(Float(c.y)) << Int(6)) | 776 (RoundInt(Float(c.z)) << Int(11))); 777 } 778 else 779 { 780 unsigned short mask = (writeA ? 0x8000 : 0x0000) | 781 (writeR ? 0x7C00 : 0x0000) | 782 (writeG ? 0x03E0 : 0x0000) | 783 (writeB ? 0x001F : 0x0000); 784 unsigned short unmask = ~mask; 785 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | 786 (UShort(RoundInt(Float(c.w)) | 787 (RoundInt(Float(c.x)) << Int(1)) | 788 (RoundInt(Float(c.y)) << Int(6)) | 789 (RoundInt(Float(c.z)) << Int(11))) & UShort(mask)); 790 } 791 break; 792 case VK_FORMAT_A1R5G5B5_UNORM_PACK16: 793 if(writeRGBA) 794 { 795 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) | 796 (RoundInt(Float(c.y)) << Int(5)) | 797 (RoundInt(Float(c.x)) << Int(10)) | 798 (RoundInt(Float(c.w)) << Int(15))); 799 } 800 else 801 { 802 unsigned short mask = (writeA ? 0x8000 : 0x0000) | 803 (writeR ? 0x7C00 : 0x0000) | 804 (writeG ? 0x03E0 : 0x0000) | 805 (writeB ? 0x001F : 0x0000); 806 unsigned short unmask = ~mask; 807 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | 808 (UShort(RoundInt(Float(c.z)) | 809 (RoundInt(Float(c.y)) << Int(5)) | 810 (RoundInt(Float(c.x)) << Int(10)) | 811 (RoundInt(Float(c.w)) << Int(15))) & UShort(mask)); 812 } 813 break; 814 case VK_FORMAT_A2B10G10R10_UNORM_PACK32: 815 case VK_FORMAT_A2B10G10R10_UINT_PACK32: 816 case VK_FORMAT_A2B10G10R10_SNORM_PACK32: 817 if(writeRGBA) 818 { 819 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) | 820 (RoundInt(Float(c.y)) << 10) | 821 (RoundInt(Float(c.z)) << 20) | 822 (RoundInt(Float(c.w)) << 30)); 823 } 824 else 825 { 826 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) | 827 (writeB ? 0x3FF00000 : 0x0000) | 828 (writeG ? 0x000FFC00 : 0x0000) | 829 (writeR ? 0x000003FF : 0x0000); 830 unsigned int unmask = ~mask; 831 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) | 832 (UInt(RoundInt(Float(c.x)) | 833 (RoundInt(Float(c.y)) << 10) | 834 (RoundInt(Float(c.z)) << 20) | 835 (RoundInt(Float(c.w)) << 30)) & UInt(mask)); 836 } 837 break; 838 case VK_FORMAT_A2R10G10B10_UNORM_PACK32: 839 case VK_FORMAT_A2R10G10B10_UINT_PACK32: 840 case VK_FORMAT_A2R10G10B10_SNORM_PACK32: 841 if(writeRGBA) 842 { 843 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.z)) | 844 (RoundInt(Float(c.y)) << 10) | 845 (RoundInt(Float(c.x)) << 20) | 846 (RoundInt(Float(c.w)) << 30)); 847 } 848 else 849 { 850 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) | 851 (writeR ? 0x3FF00000 : 0x0000) | 852 (writeG ? 0x000FFC00 : 0x0000) | 853 (writeB ? 0x000003FF : 0x0000); 854 unsigned int unmask = ~mask; 855 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) | 856 (UInt(RoundInt(Float(c.z)) | 857 (RoundInt(Float(c.y)) << 10) | 858 (RoundInt(Float(c.x)) << 20) | 859 (RoundInt(Float(c.w)) << 30)) & UInt(mask)); 860 } 861 break; 862 case VK_FORMAT_D16_UNORM: 863 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); 864 break; 865 case VK_FORMAT_D24_UNORM_S8_UINT: 866 case VK_FORMAT_X8_D24_UNORM_PACK32: 867 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8); 868 break; 869 case VK_FORMAT_D32_SFLOAT: 870 case VK_FORMAT_D32_SFLOAT_S8_UINT: 871 *Pointer<Float>(element) = c.x; 872 break; 873 case VK_FORMAT_S8_UINT: 874 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); 875 break; 876 default: 877 return false; 878 } 879 return true; 880 } 881 read(Int4 & c,Pointer<Byte> element,const State & state)882 bool Blitter::read(Int4 &c, Pointer<Byte> element, const State &state) 883 { 884 c = Int4(0, 0, 0, 1); 885 886 switch(state.sourceFormat) 887 { 888 case VK_FORMAT_A8B8G8R8_SINT_PACK32: 889 case VK_FORMAT_R8G8B8A8_SINT: 890 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3); 891 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2); 892 case VK_FORMAT_R8G8_SINT: 893 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1); 894 case VK_FORMAT_R8_SINT: 895 c = Insert(c, Int(*Pointer<SByte>(element)), 0); 896 break; 897 case VK_FORMAT_A2B10G10R10_UINT_PACK32: 898 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 0); 899 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1); 900 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 2); 901 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3); 902 break; 903 case VK_FORMAT_A8B8G8R8_UINT_PACK32: 904 case VK_FORMAT_R8G8B8A8_UINT: 905 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3); 906 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2); 907 case VK_FORMAT_R8G8_UINT: 908 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1); 909 case VK_FORMAT_R8_UINT: 910 c = Insert(c, Int(*Pointer<Byte>(element)), 0); 911 break; 912 case VK_FORMAT_R16G16B16A16_SINT: 913 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3); 914 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2); 915 case VK_FORMAT_R16G16_SINT: 916 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1); 917 case VK_FORMAT_R16_SINT: 918 c = Insert(c, Int(*Pointer<Short>(element)), 0); 919 break; 920 case VK_FORMAT_R16G16B16A16_UINT: 921 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3); 922 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2); 923 case VK_FORMAT_R16G16_UINT: 924 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1); 925 case VK_FORMAT_R16_UINT: 926 c = Insert(c, Int(*Pointer<UShort>(element)), 0); 927 break; 928 case VK_FORMAT_R32G32B32A32_SINT: 929 case VK_FORMAT_R32G32B32A32_UINT: 930 c = *Pointer<Int4>(element); 931 break; 932 case VK_FORMAT_R32G32_SINT: 933 case VK_FORMAT_R32G32_UINT: 934 c = Insert(c, *Pointer<Int>(element + 4), 1); 935 case VK_FORMAT_R32_SINT: 936 case VK_FORMAT_R32_UINT: 937 c = Insert(c, *Pointer<Int>(element), 0); 938 break; 939 default: 940 return false; 941 } 942 943 return true; 944 } 945 write(Int4 & c,Pointer<Byte> element,const State & state)946 bool Blitter::write(Int4 &c, Pointer<Byte> element, const State &state) 947 { 948 bool writeR = state.writeRed; 949 bool writeG = state.writeGreen; 950 bool writeB = state.writeBlue; 951 bool writeA = state.writeAlpha; 952 bool writeRGBA = writeR && writeG && writeB && writeA; 953 954 switch(state.destFormat) 955 { 956 case VK_FORMAT_A2B10G10R10_UINT_PACK32: 957 c = Min(As<UInt4>(c), UInt4(0x03FF, 0x03FF, 0x03FF, 0x0003)); 958 break; 959 case VK_FORMAT_A8B8G8R8_UINT_PACK32: 960 case VK_FORMAT_R8G8B8A8_UINT: 961 case VK_FORMAT_R8G8B8_UINT: 962 case VK_FORMAT_R8G8_UINT: 963 case VK_FORMAT_R8_UINT: 964 case VK_FORMAT_R8G8B8A8_USCALED: 965 case VK_FORMAT_R8G8B8_USCALED: 966 case VK_FORMAT_R8G8_USCALED: 967 case VK_FORMAT_R8_USCALED: 968 c = Min(As<UInt4>(c), UInt4(0xFF)); 969 break; 970 case VK_FORMAT_R16G16B16A16_UINT: 971 case VK_FORMAT_R16G16B16_UINT: 972 case VK_FORMAT_R16G16_UINT: 973 case VK_FORMAT_R16_UINT: 974 case VK_FORMAT_R16G16B16A16_USCALED: 975 case VK_FORMAT_R16G16B16_USCALED: 976 case VK_FORMAT_R16G16_USCALED: 977 case VK_FORMAT_R16_USCALED: 978 c = Min(As<UInt4>(c), UInt4(0xFFFF)); 979 break; 980 case VK_FORMAT_A8B8G8R8_SINT_PACK32: 981 case VK_FORMAT_R8G8B8A8_SINT: 982 case VK_FORMAT_R8G8_SINT: 983 case VK_FORMAT_R8_SINT: 984 case VK_FORMAT_R8G8B8A8_SSCALED: 985 case VK_FORMAT_R8G8B8_SSCALED: 986 case VK_FORMAT_R8G8_SSCALED: 987 case VK_FORMAT_R8_SSCALED: 988 c = Min(Max(c, Int4(-0x80)), Int4(0x7F)); 989 break; 990 case VK_FORMAT_R16G16B16A16_SINT: 991 case VK_FORMAT_R16G16B16_SINT: 992 case VK_FORMAT_R16G16_SINT: 993 case VK_FORMAT_R16_SINT: 994 case VK_FORMAT_R16G16B16A16_SSCALED: 995 case VK_FORMAT_R16G16B16_SSCALED: 996 case VK_FORMAT_R16G16_SSCALED: 997 case VK_FORMAT_R16_SSCALED: 998 c = Min(Max(c, Int4(-0x8000)), Int4(0x7FFF)); 999 break; 1000 default: 1001 break; 1002 } 1003 1004 switch(state.destFormat) 1005 { 1006 case VK_FORMAT_B8G8R8A8_SINT: 1007 case VK_FORMAT_B8G8R8A8_SSCALED: 1008 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); } 1009 case VK_FORMAT_B8G8R8_SINT: 1010 case VK_FORMAT_B8G8R8_SRGB: 1011 case VK_FORMAT_B8G8R8_SSCALED: 1012 if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); } 1013 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); } 1014 if(writeR) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 0)); } 1015 break; 1016 case VK_FORMAT_A8B8G8R8_SINT_PACK32: 1017 case VK_FORMAT_R8G8B8A8_SINT: 1018 case VK_FORMAT_R8G8B8A8_SSCALED: 1019 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32: 1020 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); } 1021 case VK_FORMAT_R8G8B8_SINT: 1022 case VK_FORMAT_R8G8B8_SSCALED: 1023 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); } 1024 case VK_FORMAT_R8G8_SINT: 1025 case VK_FORMAT_R8G8_SSCALED: 1026 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); } 1027 case VK_FORMAT_R8_SINT: 1028 case VK_FORMAT_R8_SSCALED: 1029 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); } 1030 break; 1031 case VK_FORMAT_A2B10G10R10_UINT_PACK32: 1032 case VK_FORMAT_A2B10G10R10_SINT_PACK32: 1033 case VK_FORMAT_A2B10G10R10_USCALED_PACK32: 1034 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32: 1035 if(writeRGBA) 1036 { 1037 *Pointer<UInt>(element) = 1038 UInt((Extract(c, 0)) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30)); 1039 } 1040 else 1041 { 1042 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) | 1043 (writeB ? 0x3FF00000 : 0x0000) | 1044 (writeG ? 0x000FFC00 : 0x0000) | 1045 (writeR ? 0x000003FF : 0x0000); 1046 unsigned int unmask = ~mask; 1047 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) | 1048 (UInt(Extract(c, 0) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30)) & UInt(mask)); 1049 } 1050 break; 1051 case VK_FORMAT_A2R10G10B10_UINT_PACK32: 1052 case VK_FORMAT_A2R10G10B10_SINT_PACK32: 1053 case VK_FORMAT_A2R10G10B10_USCALED_PACK32: 1054 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32: 1055 if(writeRGBA) 1056 { 1057 *Pointer<UInt>(element) = 1058 UInt((Extract(c, 2)) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30)); 1059 } 1060 else 1061 { 1062 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) | 1063 (writeR ? 0x3FF00000 : 0x0000) | 1064 (writeG ? 0x000FFC00 : 0x0000) | 1065 (writeB ? 0x000003FF : 0x0000); 1066 unsigned int unmask = ~mask; 1067 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) | 1068 (UInt(Extract(c, 2) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30)) & UInt(mask)); 1069 } 1070 break; 1071 case VK_FORMAT_B8G8R8A8_UINT: 1072 case VK_FORMAT_B8G8R8A8_USCALED: 1073 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); } 1074 case VK_FORMAT_B8G8R8_UINT: 1075 case VK_FORMAT_B8G8R8_USCALED: 1076 if(writeB) { *Pointer<Byte>(element) = Byte(Extract(c, 2)); } 1077 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); } 1078 if(writeR) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 0)); } 1079 break; 1080 case VK_FORMAT_A8B8G8R8_UINT_PACK32: 1081 case VK_FORMAT_R8G8B8A8_UINT: 1082 case VK_FORMAT_R8G8B8A8_USCALED: 1083 case VK_FORMAT_A8B8G8R8_USCALED_PACK32: 1084 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); } 1085 case VK_FORMAT_R8G8B8_UINT: 1086 case VK_FORMAT_R8G8B8_USCALED: 1087 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); } 1088 case VK_FORMAT_R8G8_UINT: 1089 case VK_FORMAT_R8G8_USCALED: 1090 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); } 1091 case VK_FORMAT_R8_UINT: 1092 case VK_FORMAT_R8_USCALED: 1093 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); } 1094 break; 1095 case VK_FORMAT_R16G16B16A16_SINT: 1096 case VK_FORMAT_R16G16B16A16_SSCALED: 1097 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); } 1098 case VK_FORMAT_R16G16B16_SINT: 1099 case VK_FORMAT_R16G16B16_SSCALED: 1100 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); } 1101 case VK_FORMAT_R16G16_SINT: 1102 case VK_FORMAT_R16G16_SSCALED: 1103 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); } 1104 case VK_FORMAT_R16_SINT: 1105 case VK_FORMAT_R16_SSCALED: 1106 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); } 1107 break; 1108 case VK_FORMAT_R16G16B16A16_UINT: 1109 case VK_FORMAT_R16G16B16A16_USCALED: 1110 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); } 1111 case VK_FORMAT_R16G16B16_UINT: 1112 case VK_FORMAT_R16G16B16_USCALED: 1113 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); } 1114 case VK_FORMAT_R16G16_UINT: 1115 case VK_FORMAT_R16G16_USCALED: 1116 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); } 1117 case VK_FORMAT_R16_UINT: 1118 case VK_FORMAT_R16_USCALED: 1119 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); } 1120 break; 1121 case VK_FORMAT_R32G32B32A32_SINT: 1122 if(writeRGBA) 1123 { 1124 *Pointer<Int4>(element) = c; 1125 } 1126 else 1127 { 1128 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } 1129 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } 1130 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); } 1131 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); } 1132 } 1133 break; 1134 case VK_FORMAT_R32G32B32_SINT: 1135 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } 1136 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } 1137 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); } 1138 break; 1139 case VK_FORMAT_R32G32_SINT: 1140 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } 1141 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } 1142 break; 1143 case VK_FORMAT_R32_SINT: 1144 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } 1145 break; 1146 case VK_FORMAT_R32G32B32A32_UINT: 1147 if(writeRGBA) 1148 { 1149 *Pointer<UInt4>(element) = As<UInt4>(c); 1150 } 1151 else 1152 { 1153 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } 1154 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } 1155 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); } 1156 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); } 1157 } 1158 break; 1159 case VK_FORMAT_R32G32B32_UINT: 1160 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); } 1161 case VK_FORMAT_R32G32_UINT: 1162 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } 1163 case VK_FORMAT_R32_UINT: 1164 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } 1165 break; 1166 default: 1167 return false; 1168 } 1169 1170 return true; 1171 } 1172 GetScale(float4 & scale,VkFormat format)1173 bool Blitter::GetScale(float4 &scale, VkFormat format) 1174 { 1175 switch(format) 1176 { 1177 case VK_FORMAT_R4G4_UNORM_PACK8: 1178 case VK_FORMAT_R4G4B4A4_UNORM_PACK16: 1179 case VK_FORMAT_B4G4R4A4_UNORM_PACK16: 1180 scale = vector(0xF, 0xF, 0xF, 0xF); 1181 break; 1182 case VK_FORMAT_R8_UNORM: 1183 case VK_FORMAT_R8G8_UNORM: 1184 case VK_FORMAT_R8G8B8_UNORM: 1185 case VK_FORMAT_B8G8R8_UNORM: 1186 case VK_FORMAT_A8B8G8R8_UNORM_PACK32: 1187 case VK_FORMAT_R8G8B8A8_UNORM: 1188 case VK_FORMAT_B8G8R8A8_UNORM: 1189 case VK_FORMAT_R8_SRGB: 1190 case VK_FORMAT_R8G8_SRGB: 1191 case VK_FORMAT_R8G8B8_SRGB: 1192 case VK_FORMAT_B8G8R8_SRGB: 1193 case VK_FORMAT_A8B8G8R8_SRGB_PACK32: 1194 case VK_FORMAT_R8G8B8A8_SRGB: 1195 case VK_FORMAT_B8G8R8A8_SRGB: 1196 scale = vector(0xFF, 0xFF, 0xFF, 0xFF); 1197 break; 1198 case VK_FORMAT_R8_SNORM: 1199 case VK_FORMAT_R8G8_SNORM: 1200 case VK_FORMAT_R8G8B8_SNORM: 1201 case VK_FORMAT_B8G8R8_SNORM: 1202 case VK_FORMAT_A8B8G8R8_SNORM_PACK32: 1203 case VK_FORMAT_R8G8B8A8_SNORM: 1204 case VK_FORMAT_B8G8R8A8_SNORM: 1205 scale = vector(0x7F, 0x7F, 0x7F, 0x7F); 1206 break; 1207 case VK_FORMAT_R16_UNORM: 1208 case VK_FORMAT_R16G16_UNORM: 1209 case VK_FORMAT_R16G16B16_UNORM: 1210 case VK_FORMAT_R16G16B16A16_UNORM: 1211 scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); 1212 break; 1213 case VK_FORMAT_R16_SNORM: 1214 case VK_FORMAT_R16G16_SNORM: 1215 case VK_FORMAT_R16G16B16_SNORM: 1216 case VK_FORMAT_R16G16B16A16_SNORM: 1217 scale = vector(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF); 1218 break; 1219 case VK_FORMAT_R8_SINT: 1220 case VK_FORMAT_R8_UINT: 1221 case VK_FORMAT_R8G8_SINT: 1222 case VK_FORMAT_R8G8_UINT: 1223 case VK_FORMAT_R8G8B8_SINT: 1224 case VK_FORMAT_R8G8B8_UINT: 1225 case VK_FORMAT_B8G8R8_SINT: 1226 case VK_FORMAT_B8G8R8_UINT: 1227 case VK_FORMAT_R8G8B8A8_SINT: 1228 case VK_FORMAT_R8G8B8A8_UINT: 1229 case VK_FORMAT_A8B8G8R8_SINT_PACK32: 1230 case VK_FORMAT_A8B8G8R8_UINT_PACK32: 1231 case VK_FORMAT_B8G8R8A8_SINT: 1232 case VK_FORMAT_B8G8R8A8_UINT: 1233 case VK_FORMAT_R8_USCALED: 1234 case VK_FORMAT_R8G8_USCALED: 1235 case VK_FORMAT_R8G8B8_USCALED: 1236 case VK_FORMAT_B8G8R8_USCALED: 1237 case VK_FORMAT_R8G8B8A8_USCALED: 1238 case VK_FORMAT_B8G8R8A8_USCALED: 1239 case VK_FORMAT_A8B8G8R8_USCALED_PACK32: 1240 case VK_FORMAT_R8_SSCALED: 1241 case VK_FORMAT_R8G8_SSCALED: 1242 case VK_FORMAT_R8G8B8_SSCALED: 1243 case VK_FORMAT_B8G8R8_SSCALED: 1244 case VK_FORMAT_R8G8B8A8_SSCALED: 1245 case VK_FORMAT_B8G8R8A8_SSCALED: 1246 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32: 1247 case VK_FORMAT_R16_SINT: 1248 case VK_FORMAT_R16_UINT: 1249 case VK_FORMAT_R16G16_SINT: 1250 case VK_FORMAT_R16G16_UINT: 1251 case VK_FORMAT_R16G16B16A16_SINT: 1252 case VK_FORMAT_R16G16B16A16_UINT: 1253 case VK_FORMAT_R16_SSCALED: 1254 case VK_FORMAT_R16G16_SSCALED: 1255 case VK_FORMAT_R16G16B16_SSCALED: 1256 case VK_FORMAT_R16G16B16A16_SSCALED: 1257 case VK_FORMAT_R16_USCALED: 1258 case VK_FORMAT_R16G16_USCALED: 1259 case VK_FORMAT_R16G16B16_USCALED: 1260 case VK_FORMAT_R16G16B16A16_USCALED: 1261 case VK_FORMAT_R32_SINT: 1262 case VK_FORMAT_R32_UINT: 1263 case VK_FORMAT_R32G32_SINT: 1264 case VK_FORMAT_R32G32_UINT: 1265 case VK_FORMAT_R32G32B32_SINT: 1266 case VK_FORMAT_R32G32B32_UINT: 1267 case VK_FORMAT_R32G32B32A32_SINT: 1268 case VK_FORMAT_R32G32B32A32_UINT: 1269 case VK_FORMAT_R32G32B32A32_SFLOAT: 1270 case VK_FORMAT_R32G32B32_SFLOAT: 1271 case VK_FORMAT_R32G32_SFLOAT: 1272 case VK_FORMAT_R32_SFLOAT: 1273 case VK_FORMAT_R16G16B16A16_SFLOAT: 1274 case VK_FORMAT_R16G16B16_SFLOAT: 1275 case VK_FORMAT_R16G16_SFLOAT: 1276 case VK_FORMAT_R16_SFLOAT: 1277 case VK_FORMAT_B10G11R11_UFLOAT_PACK32: 1278 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: 1279 case VK_FORMAT_A2R10G10B10_USCALED_PACK32: 1280 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32: 1281 case VK_FORMAT_A2R10G10B10_UINT_PACK32: 1282 case VK_FORMAT_A2R10G10B10_SINT_PACK32: 1283 case VK_FORMAT_A2B10G10R10_USCALED_PACK32: 1284 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32: 1285 case VK_FORMAT_A2B10G10R10_UINT_PACK32: 1286 case VK_FORMAT_A2B10G10R10_SINT_PACK32: 1287 scale = vector(1.0f, 1.0f, 1.0f, 1.0f); 1288 break; 1289 case VK_FORMAT_R5G5B5A1_UNORM_PACK16: 1290 case VK_FORMAT_B5G5R5A1_UNORM_PACK16: 1291 case VK_FORMAT_A1R5G5B5_UNORM_PACK16: 1292 scale = vector(0x1F, 0x1F, 0x1F, 0x01); 1293 break; 1294 case VK_FORMAT_R5G6B5_UNORM_PACK16: 1295 case VK_FORMAT_B5G6R5_UNORM_PACK16: 1296 scale = vector(0x1F, 0x3F, 0x1F, 1.0f); 1297 break; 1298 case VK_FORMAT_A2R10G10B10_UNORM_PACK32: 1299 case VK_FORMAT_A2B10G10R10_UNORM_PACK32: 1300 scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03); 1301 break; 1302 case VK_FORMAT_A2R10G10B10_SNORM_PACK32: 1303 case VK_FORMAT_A2B10G10R10_SNORM_PACK32: 1304 scale = vector(0x1FF, 0x1FF, 0x1FF, 0x01); 1305 break; 1306 case VK_FORMAT_D16_UNORM: 1307 scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f); 1308 break; 1309 case VK_FORMAT_D24_UNORM_S8_UINT: 1310 case VK_FORMAT_X8_D24_UNORM_PACK32: 1311 scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f); 1312 break; 1313 case VK_FORMAT_D32_SFLOAT: 1314 case VK_FORMAT_D32_SFLOAT_S8_UINT: 1315 case VK_FORMAT_S8_UINT: 1316 scale = vector(1.0f, 1.0f, 1.0f, 1.0f); 1317 break; 1318 default: 1319 return false; 1320 } 1321 1322 return true; 1323 } 1324 ApplyScaleAndClamp(Float4 & value,const State & state,bool preScaled)1325 bool Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled) 1326 { 1327 float4 scale, unscale; 1328 if(state.clearOperation && 1329 Surface::isNonNormalizedInteger(state.sourceFormat) && 1330 !Surface::isNonNormalizedInteger(state.destFormat)) 1331 { 1332 // If we're clearing a buffer from an int or uint color into a normalized color, 1333 // then the whole range of the int or uint color must be scaled between 0 and 1. 1334 switch(state.sourceFormat) 1335 { 1336 case VK_FORMAT_R32G32B32A32_SINT: 1337 unscale = replicate(static_cast<float>(0x7FFFFFFF)); 1338 break; 1339 case VK_FORMAT_R32G32B32A32_UINT: 1340 unscale = replicate(static_cast<float>(0xFFFFFFFF)); 1341 break; 1342 default: 1343 return false; 1344 } 1345 } 1346 else if(!GetScale(unscale, state.sourceFormat)) 1347 { 1348 return false; 1349 } 1350 1351 if(!GetScale(scale, state.destFormat)) 1352 { 1353 return false; 1354 } 1355 1356 bool srcSRGB = Surface::isSRGBformat(state.sourceFormat); 1357 bool dstSRGB = Surface::isSRGBformat(state.destFormat); 1358 1359 if(state.convertSRGB && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded. 1360 { 1361 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale 1362 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale 1363 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value); 1364 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale 1365 } 1366 else if(unscale != scale) 1367 { 1368 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w); 1369 } 1370 1371 if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat)) 1372 { 1373 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w)); 1374 1375 value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x, 1376 Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y, 1377 Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z, 1378 Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w)); 1379 } 1380 1381 return true; 1382 } 1383 ComputeOffset(Int & x,Int & y,Int & pitchB,int bytes,bool quadLayout)1384 Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout) 1385 { 1386 if(!quadLayout) 1387 { 1388 return y * pitchB + x * bytes; 1389 } 1390 else 1391 { 1392 // (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1) 1393 return (y & Int(~1)) * pitchB + 1394 ((y & Int(1)) * 2 + x * 2 - (x & Int(1))) * bytes; 1395 } 1396 } 1397 LinearToSRGB(Float4 & c)1398 Float4 Blitter::LinearToSRGB(Float4 &c) 1399 { 1400 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f); 1401 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f); 1402 1403 Float4 s = c; 1404 s.xyz = Max(lc, ec); 1405 1406 return s; 1407 } 1408 sRGBtoLinear(Float4 & c)1409 Float4 Blitter::sRGBtoLinear(Float4 &c) 1410 { 1411 Float4 lc = c * Float4(1.0f / 12.92f); 1412 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f)); 1413 1414 Int4 linear = CmpLT(c, Float4(0.04045f)); 1415 1416 Float4 s = c; 1417 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // FIXME: IfThenElse() 1418 1419 return s; 1420 } 1421 generate(const State & state)1422 Routine *Blitter::generate(const State &state) 1423 { 1424 Function<Void(Pointer<Byte>)> function; 1425 { 1426 Pointer<Byte> blit(function.Arg<0>()); 1427 1428 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source)); 1429 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest)); 1430 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB)); 1431 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB)); 1432 1433 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0)); 1434 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0)); 1435 Float w = *Pointer<Float>(blit + OFFSET(BlitData,w)); 1436 Float h = *Pointer<Float>(blit + OFFSET(BlitData,h)); 1437 1438 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d)); 1439 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d)); 1440 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d)); 1441 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d)); 1442 1443 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth)); 1444 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight)); 1445 1446 bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat); 1447 bool intDst = Surface::isNonNormalizedInteger(state.destFormat); 1448 bool intBoth = intSrc && intDst; 1449 bool srcQuadLayout = Surface::hasQuadLayout(state.sourceFormat); 1450 bool dstQuadLayout = Surface::hasQuadLayout(state.destFormat); 1451 int srcBytes = Surface::bytes(state.sourceFormat); 1452 int dstBytes = Surface::bytes(state.destFormat); 1453 1454 bool hasConstantColorI = false; 1455 Int4 constantColorI; 1456 bool hasConstantColorF = false; 1457 Float4 constantColorF; 1458 if(state.clearOperation) 1459 { 1460 if(intBoth) // Integer types 1461 { 1462 if(!read(constantColorI, source, state)) 1463 { 1464 return nullptr; 1465 } 1466 hasConstantColorI = true; 1467 } 1468 else 1469 { 1470 if(!read(constantColorF, source, state)) 1471 { 1472 return nullptr; 1473 } 1474 hasConstantColorF = true; 1475 1476 if(!ApplyScaleAndClamp(constantColorF, state)) 1477 { 1478 return nullptr; 1479 } 1480 } 1481 } 1482 1483 For(Int j = y0d, j < y1d, j++) 1484 { 1485 Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h; 1486 Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB; 1487 1488 For(Int i = x0d, i < x1d, i++) 1489 { 1490 Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w; 1491 Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes; 1492 1493 if(hasConstantColorI) 1494 { 1495 if(!write(constantColorI, d, state)) 1496 { 1497 return nullptr; 1498 } 1499 } 1500 else if(hasConstantColorF) 1501 { 1502 for(int s = 0; s < state.destSamples; s++) 1503 { 1504 if(!write(constantColorF, d, state)) 1505 { 1506 return nullptr; 1507 } 1508 1509 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB)); 1510 } 1511 } 1512 else if(intBoth) // Integer types do not support filtering 1513 { 1514 Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision 1515 Int X = Int(x); 1516 Int Y = Int(y); 1517 1518 if(state.clampToEdge) 1519 { 1520 X = Clamp(X, 0, sWidth - 1); 1521 Y = Clamp(Y, 0, sHeight - 1); 1522 } 1523 1524 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout); 1525 1526 if(!read(color, s, state)) 1527 { 1528 return nullptr; 1529 } 1530 1531 if(!write(color, d, state)) 1532 { 1533 return nullptr; 1534 } 1535 } 1536 else 1537 { 1538 Float4 color; 1539 1540 bool preScaled = false; 1541 if(!state.filter || intSrc) 1542 { 1543 Int X = Int(x); 1544 Int Y = Int(y); 1545 1546 if(state.clampToEdge) 1547 { 1548 X = Clamp(X, 0, sWidth - 1); 1549 Y = Clamp(Y, 0, sHeight - 1); 1550 } 1551 1552 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout); 1553 1554 if(!read(color, s, state)) 1555 { 1556 return nullptr; 1557 } 1558 } 1559 else // Bilinear filtering 1560 { 1561 Float X = x; 1562 Float Y = y; 1563 1564 if(state.clampToEdge) 1565 { 1566 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f); 1567 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f); 1568 } 1569 1570 Float x0 = X - 0.5f; 1571 Float y0 = Y - 0.5f; 1572 1573 Int X0 = Max(Int(x0), 0); 1574 Int Y0 = Max(Int(y0), 0); 1575 1576 Int X1 = X0 + 1; 1577 Int Y1 = Y0 + 1; 1578 X1 = IfThenElse(X1 >= sWidth, X0, X1); 1579 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1); 1580 1581 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout); 1582 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout); 1583 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout); 1584 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout); 1585 1586 Float4 c00; if(!read(c00, s00, state)) return nullptr; 1587 Float4 c01; if(!read(c01, s01, state)) return nullptr; 1588 Float4 c10; if(!read(c10, s10, state)) return nullptr; 1589 Float4 c11; if(!read(c11, s11, state)) return nullptr; 1590 1591 if(state.convertSRGB && Surface::isSRGBformat(state.sourceFormat)) // sRGB -> RGB 1592 { 1593 if(!ApplyScaleAndClamp(c00, state)) return nullptr; 1594 if(!ApplyScaleAndClamp(c01, state)) return nullptr; 1595 if(!ApplyScaleAndClamp(c10, state)) return nullptr; 1596 if(!ApplyScaleAndClamp(c11, state)) return nullptr; 1597 preScaled = true; 1598 } 1599 1600 Float4 fx = Float4(x0 - Float(X0)); 1601 Float4 fy = Float4(y0 - Float(Y0)); 1602 Float4 ix = Float4(1.0f) - fx; 1603 Float4 iy = Float4(1.0f) - fy; 1604 1605 color = (c00 * ix + c01 * fx) * iy + 1606 (c10 * ix + c11 * fx) * fy; 1607 } 1608 1609 if(!ApplyScaleAndClamp(color, state, preScaled)) 1610 { 1611 return nullptr; 1612 } 1613 1614 for(int s = 0; s < state.destSamples; s++) 1615 { 1616 if(!write(color, d, state)) 1617 { 1618 return nullptr; 1619 } 1620 1621 d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB)); 1622 } 1623 } 1624 } 1625 } 1626 } 1627 1628 return function("BlitRoutine"); 1629 } 1630 blitReactor(Surface * source,const SliceRectF & sourceRect,Surface * dest,const SliceRect & destRect,const Blitter::Options & options)1631 bool Blitter::blitReactor(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options &options) 1632 { 1633 ASSERT(!options.clearOperation || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1))); 1634 1635 Rect dRect = destRect; 1636 RectF sRect = sourceRect; 1637 if(destRect.x0 > destRect.x1) 1638 { 1639 swap(dRect.x0, dRect.x1); 1640 swap(sRect.x0, sRect.x1); 1641 } 1642 if(destRect.y0 > destRect.y1) 1643 { 1644 swap(dRect.y0, dRect.y1); 1645 swap(sRect.y0, sRect.y1); 1646 } 1647 1648 State state(options); 1649 state.clampToEdge = (sourceRect.x0 < 0.0f) || 1650 (sourceRect.y0 < 0.0f) || 1651 (sourceRect.x1 > (float)source->getWidth()) || 1652 (sourceRect.y1 > (float)source->getHeight()); 1653 1654 bool useSourceInternal = !source->isExternalDirty(); 1655 bool useDestInternal = !dest->isExternalDirty(); 1656 bool isStencil = options.useStencil; 1657 1658 state.sourceFormat = isStencil ? source->getStencilFormat() : source->getFormat(useSourceInternal); 1659 state.destFormat = isStencil ? dest->getStencilFormat() : dest->getFormat(useDestInternal); 1660 state.destSamples = dest->getSamples(); 1661 1662 criticalSection.lock(); 1663 Routine *blitRoutine = blitCache->query(state); 1664 1665 if(!blitRoutine) 1666 { 1667 blitRoutine = generate(state); 1668 1669 if(!blitRoutine) 1670 { 1671 criticalSection.unlock(); 1672 return false; 1673 } 1674 1675 blitCache->add(state, blitRoutine); 1676 } 1677 1678 criticalSection.unlock(); 1679 1680 void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry(); 1681 1682 BlitData data; 1683 1684 bool isRGBA = options.writeMask == 0xF; 1685 bool isEntireDest = dest->isEntire(destRect); 1686 1687 data.source = isStencil ? source->lockStencil(0, 0, 0, sw::PUBLIC) : 1688 source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal); 1689 data.dest = isStencil ? dest->lockStencil(0, 0, 0, sw::PUBLIC) : 1690 dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal); 1691 data.sPitchB = isStencil ? source->getStencilPitchB() : source->getPitchB(useSourceInternal); 1692 data.dPitchB = isStencil ? dest->getStencilPitchB() : dest->getPitchB(useDestInternal); 1693 data.dSliceB = isStencil ? dest->getStencilSliceB() : dest->getSliceB(useDestInternal); 1694 1695 data.w = sRect.width() / dRect.width(); 1696 data.h = sRect.height() / dRect.height(); 1697 data.x0 = sRect.x0 + (0.5f - dRect.x0) * data.w; 1698 data.y0 = sRect.y0 + (0.5f - dRect.y0) * data.h; 1699 1700 data.x0d = dRect.x0; 1701 data.x1d = dRect.x1; 1702 data.y0d = dRect.y0; 1703 data.y1d = dRect.y1; 1704 1705 data.sWidth = source->getWidth(); 1706 data.sHeight = source->getHeight(); 1707 1708 blitFunction(&data); 1709 1710 if(isStencil) 1711 { 1712 source->unlockStencil(); 1713 dest->unlockStencil(); 1714 } 1715 else 1716 { 1717 source->unlock(useSourceInternal); 1718 dest->unlock(useDestInternal); 1719 } 1720 1721 return true; 1722 } 1723 } 1724