1// GENERATED FILE - DO NOT EDIT. 2// Generated by gen_mtl_internal_shaders.py 3// 4// Copyright 2020 The ANGLE Project Authors. All rights reserved. 5// Use of this source code is governed by a BSD-style license that can be 6// found in the LICENSE file. 7// 8 9// C++ string version of combined Metal default shaders. 10 11 12 13static char gDefaultMetallibSrc[] = R"( 14# 1 "temp_master_source.metal" 15# 1 "<built-in>" 1 16# 1 "<built-in>" 3 17# 386 "<built-in>" 3 18# 1 "<command line>" 1 19# 1 "<built-in>" 2 20# 1 "temp_master_source.metal" 2 21# 1 "./blit.metal" 1 22 23 24 25 26 27 28 29# 1 "./common.h" 1 30# 13 "./common.h" 31# include <simd/simd.h> 32# include <metal_stdlib> 33 34 35 36# 1 "./constants.h" 1 37# 11 "./constants.h" 38namespace rx 39{ 40namespace mtl_shader 41{ 42 43enum 44{ 45 kTextureType2D = 0, 46 kTextureType2DMultisample = 1, 47 kTextureType2DArray = 2, 48 kTextureTypeCube = 3, 49 kTextureType3D = 4, 50 kTextureTypeCount = 5, 51}; 52 53 54 55 56} 57} 58# 18 "./common.h" 2 59 60 61 62 63 64 65 66using namespace metal; 67 68 69 70constant uint32_t kNumColorOutputs [[function_constant(0)]]; 71constant bool kColorOutputAvailable0 = kNumColorOutputs > 0; 72constant bool kColorOutputAvailable1 = kNumColorOutputs > 1; 73constant bool kColorOutputAvailable2 = kNumColorOutputs > 2; 74constant bool kColorOutputAvailable3 = kNumColorOutputs > 3; 75 76namespace rx 77{ 78namespace mtl_shader 79{ 80 81 82constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)}; 83 84template <typename T> 85struct MultipleColorOutputs 86{ 87 vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]]; 88 vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]]; 89 vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]]; 90 vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]]; 91}; 92# 61 "./common.h" 93template <typename T> 94static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color) 95{ 96 MultipleColorOutputs<T> re; 97 98 do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0); 99 do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0); 100 do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0); 101 do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0); 102 103 return re; 104} 105 106static inline float3 cubeTexcoords(float2 texcoords, int face) 107{ 108 texcoords = 2.0 * texcoords - 1.0; 109 switch (face) 110 { 111 case 0: 112 return float3(1.0, -texcoords.y, -texcoords.x); 113 case 1: 114 return float3(-1.0, -texcoords.y, texcoords.x); 115 case 2: 116 return float3(texcoords.x, 1.0, texcoords.y); 117 case 3: 118 return float3(texcoords.x, -1.0, -texcoords.y); 119 case 4: 120 return float3(texcoords.x, -texcoords.y, 1.0); 121 case 5: 122 return float3(-texcoords.x, -texcoords.y, -1.0); 123 } 124 return float3(texcoords, 0); 125} 126 127template <typename T> 128static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords) 129{ 130 uint samples = srcTexture.get_num_samples(); 131 132 vec<T, 4> output(0); 133 134 for (uint sample = 0; sample < samples; ++sample) 135 { 136 output += srcTexture.read(coords, sample); 137 } 138 139 output = output / samples; 140 141 return output; 142} 143 144static inline float4 sRGBtoLinear(float4 color) 145{ 146 float3 linear1 = color.rgb / 12.92; 147 float3 linear2 = pow((color.rgb + float3(0.055)) / 1.055, 2.4); 148 float3 factor = float3(color.rgb <= float3(0.04045)); 149 float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a); 150 151 return linear; 152} 153 154static inline float linearToSRGB(float color) 155{ 156 if (color <= 0.0f) 157 return 0.0f; 158 else if (color < 0.0031308f) 159 return 12.92f * color; 160 else if (color < 1.0f) 161 return 1.055f * pow(color, 0.41666f) - 0.055f; 162 else 163 return 1.0f; 164} 165 166static inline float4 linearToSRGB(float4 color) 167{ 168 return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a); 169} 170 171template <typename Short> 172static inline Short bytesToShort(constant uchar *input, uint offset) 173{ 174 Short inputLo = input[offset]; 175 Short inputHi = input[offset + 1]; 176 177 return inputLo | (inputHi << 8); 178} 179 180template <typename Int> 181static inline Int bytesToInt(constant uchar *input, uint offset) 182{ 183 Int input0 = input[offset]; 184 Int input1 = input[offset + 1]; 185 Int input2 = input[offset + 2]; 186 Int input3 = input[offset + 3]; 187 188 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 189} 190 191template <typename Short> 192static inline void shortToBytes(Short val, uint offset, device uchar *output) 193{ 194 ushort valUnsigned = as_type<ushort>(val); 195 output[offset] = valUnsigned & 0xff; 196 output[offset + 1] = (valUnsigned >> 8) & 0xff; 197} 198 199template <typename Int> 200static inline void intToBytes(Int val, uint offset, device uchar *output) 201{ 202 uint valUnsigned = as_type<uint>(val); 203 output[offset] = valUnsigned & 0xff; 204 output[offset + 1] = (valUnsigned >> 8) & 0xff; 205 output[offset + 2] = (valUnsigned >> 16) & 0xff; 206 output[offset + 3] = (valUnsigned >> 24) & 0xff; 207} 208 209static inline void floatToBytes(float val, uint offset, device uchar *output) 210{ 211 intToBytes(as_type<uint>(val), offset, output); 212} 213 214static inline void int24bitToBytes(uint val, uint offset, device uchar *output) 215{ 216 output[offset] = val & 0xff; 217 output[offset + 1] = (val >> 8) & 0xff; 218 output[offset + 2] = (val >> 16) & 0xff; 219} 220 221template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 222static inline T getShiftedData(T input) 223{ 224 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 225 "T must have at least as many bits as inputBitCount + inputBitStart."); 226 const T mask = (1 << inputBitCount) - 1; 227 return (input >> inputBitStart) & mask; 228} 229 230template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 231static inline T shiftData(T input) 232{ 233 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 234 "T must have at least as many bits as inputBitCount + inputBitStart."); 235 const T mask = (1 << inputBitCount) - 1; 236 return (input & mask) << inputBitStart; 237} 238 239template <unsigned int inputBitCount, typename T> 240static inline float normalizedToFloat(T input) 241{ 242 static_assert(inputBitCount <= (sizeof(T) * 8), 243 "T must have more bits than or same bits as inputBitCount."); 244 static_assert(inputBitCount <= 23, "Only single precision is supported"); 245 246 constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1); 247 return input * inverseMax; 248} 249 250template <typename T> 251static inline float normalizedToFloat(T input) 252{ 253 return normalizedToFloat<sizeof(T) * 8, T>(input); 254} 255 256template <> 257inline float normalizedToFloat(short input) 258{ 259 constexpr float inverseMax = 1.0f / 0x7fff; 260 return static_cast<float>(input) * inverseMax; 261} 262 263template <> 264inline float normalizedToFloat(int input) 265{ 266 constexpr float inverseMax = 1.0f / 0x7fffffff; 267 return static_cast<float>(input) * inverseMax; 268} 269 270template <> 271inline float normalizedToFloat(uint input) 272{ 273 constexpr float inverseMax = 1.0f / 0xffffffff; 274 return static_cast<float>(input) * inverseMax; 275} 276 277template <unsigned int outputBitCount, typename T> 278static inline T floatToNormalized(float input) 279{ 280 static_assert(outputBitCount <= (sizeof(T) * 8), 281 "T must have more bits than or same bits as inputBitCount."); 282 static_assert(outputBitCount <= 23, "Only single precision is supported"); 283 284 return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f); 285} 286 287template <typename T> 288static inline T floatToNormalized(float input) 289{ 290 return floatToNormalized<sizeof(T) * 8, T>(input); 291} 292 293} 294} 295# 9 "./blit.metal" 2 296 297using namespace rx::mtl_shader; 298 299 300constant bool kPremultiplyAlpha [[function_constant(1)]]; 301constant bool kUnmultiplyAlpha [[function_constant(2)]]; 302constant int kSourceTextureType [[function_constant(3)]]; 303constant int kSourceTexture2Type [[function_constant(4)]]; 304 305constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D; 306constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray; 307constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample; 308constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube; 309constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D; 310 311constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D; 312constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray; 313constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample; 314constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube; 315 316struct BlitParams 317{ 318 319 float2 srcTexCoords[3]; 320 int srcLevel; 321 int srcLayer; 322 bool dstFlipViewportX; 323 bool dstFlipViewportY; 324 bool dstLuminance; 325 uint8_t padding[13]; 326}; 327 328struct BlitVSOut 329{ 330 float4 position [[position]]; 331 float2 texCoords [[user(locn1)]]; 332}; 333 334vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]]) 335{ 336 BlitVSOut output; 337 output.position = float4(gCorners[vid], 0.0, 1.0); 338 output.texCoords = options.srcTexCoords[vid]; 339 340 if (options.dstFlipViewportX) 341 { 342 output.position.x = -output.position.x; 343 } 344 if (!options.dstFlipViewportY) 345 { 346 347 348 output.position.y = -output.position.y; 349 } 350 351 return output; 352} 353 354template <typename SrcTexture2d> 355static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords) 356{ 357 uint2 dimens(srcTexture.get_width(), srcTexture.get_height()); 358 uint2 coords = uint2(texCoords * float2(dimens)); 359 360 return coords; 361} 362 363template <typename T> 364static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords) 365{ 366 uint2 coords = getImageCoords(srcTexture, texCoords); 367 return resolveTextureMS(srcTexture, coords); 368} 369 370template <typename T> 371static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture, 372 sampler textureSampler, 373 float2 texCoords, 374 constant BlitParams &options) 375{ 376 uint depth = srcTexture.get_depth(options.srcLevel); 377 float zCoord = (float(options.srcLayer) + 0.5) / float(depth); 378 379 return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel)); 380} 381# 112 "./blit.metal" 382template <typename T> 383static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 384{ 385 vec<T, 4> output; 386 387 switch (kSourceTextureType) 388 { 389 case kTextureType2D: 390 output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel)); 391 break; 392 case kTextureType2DArray: 393 output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer, 394 level(options.srcLevel)); 395 break; 396 case kTextureType2DMultisample: 397 output = blitSampleTextureMS(srcTexture2dMS, input.texCoords); 398 break; 399 case kTextureTypeCube: 400 output = srcTextureCube.sample(textureSampler, 401 cubeTexcoords(input.texCoords, options.srcLayer), 402 level(options.srcLevel)); 403 break; 404 case kTextureType3D: 405 output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options); 406 break; 407 } 408 409 if (kPremultiplyAlpha) 410 { 411 output.xyz *= output.a; 412 } 413 else if (kUnmultiplyAlpha) 414 { 415 if (output.a != 0.0) 416 { 417 output.xyz /= output.a; 418 } 419 } 420 421 if (options.dstLuminance) 422 { 423 output.g = output.b = output.r; 424 } 425 426 return output; 427} 428 429template <typename T> 430static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 431{ 432 vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 433 434 return toMultipleColorOutputs(output); 435} 436 437fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 438{ 439 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 440} 441fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 442{ 443 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 444} 445fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 446{ 447 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 448} 449 450fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 451{ 452 float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 453 uint4 output = uint4(inputColor * float4(255.0)); 454 455 return toMultipleColorOutputs(output); 456} 457 458 459struct FragmentDepthOut 460{ 461 float depth [[depth(any)]]; 462}; 463 464static inline float sampleDepth( 465 texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]], 466 texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]], 467 texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]], 468 texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]], 469 float2 texCoords, 470 constant BlitParams &options) 471{ 472 float4 output; 473 474 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 475 476 switch (kSourceTextureType) 477 { 478 case kTextureType2D: 479 output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel)); 480 break; 481 case kTextureType2DArray: 482 output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer, 483 level(options.srcLevel)); 484 break; 485 case kTextureType2DMultisample: 486 487 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 488 break; 489 case kTextureTypeCube: 490 output = 491 srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer), 492 level(options.srcLevel)); 493 break; 494 } 495 496 return output.r; 497} 498 499fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]], 500 texture2d<float> srcTexture2d 501 [[texture(0), function_constant(kSourceTextureType2D)]], 502 texture2d_array<float> srcTexture2dArray 503 [[texture(0), function_constant(kSourceTextureType2DArray)]], 504 texture2d_ms<float> srcTexture2dMS 505 [[texture(0), function_constant(kSourceTextureType2DMS)]], 506 texturecube<float> srcTextureCube 507 [[texture(0), function_constant(kSourceTextureTypeCube)]], 508 constant BlitParams &options [[buffer(0)]]) 509{ 510 FragmentDepthOut re; 511 512 re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 513 input.texCoords, options); 514 515 return re; 516} 517 518static inline uint32_t sampleStencil( 519 texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]], 520 texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]], 521 texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]], 522 texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]], 523 float2 texCoords, 524 int srcLevel, 525 int srcLayer) 526{ 527 uint4 output; 528 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 529 530 switch (kSourceTexture2Type) 531 { 532 case kTextureType2D: 533 output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel)); 534 break; 535 case kTextureType2DArray: 536 output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel)); 537 break; 538 case kTextureType2DMultisample: 539 540 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 541 break; 542 case kTextureTypeCube: 543 output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer), 544 level(srcLevel)); 545 break; 546 } 547 548 return output.r; 549} 550 551 552struct BlitStencilToBufferParams 553{ 554 float2 srcStartTexCoords; 555 float2 srcTexCoordSteps; 556 int srcLevel; 557 int srcLayer; 558 559 uint2 dstSize; 560 uint dstBufferRowPitch; 561 562 bool resolveMS; 563}; 564 565kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]], 566 texture2d<uint32_t> srcTexture2d 567 [[texture(1), function_constant(kSourceTexture2Type2D)]], 568 texture2d_array<uint32_t> srcTexture2dArray 569 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 570 texture2d_ms<uint32_t> srcTexture2dMS 571 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 572 texturecube<uint32_t> srcTextureCube 573 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 574 constant BlitStencilToBufferParams &options [[buffer(0)]], 575 device uchar *buffer [[buffer(1)]]) 576{ 577 if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y) 578 { 579 return; 580 } 581 582 float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps; 583 584 if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS) 585 { 586 uint samples = srcTexture2dMS.get_num_samples(); 587 uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords); 588 uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x; 589 590 for (uint sample = 0; sample < samples; ++sample) 591 { 592 uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r; 593 buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample); 594 } 595 } 596 else 597 { 598 uint32_t stencil = 599 sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 600 srcTexCoords, options.srcLevel, options.srcLayer); 601 602 buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil); 603 } 604} 605 606 607#if __METAL_VERSION__ >= 210 608 609struct FragmentStencilOut 610{ 611 uint32_t stencil [[stencil]]; 612}; 613 614struct FragmentDepthStencilOut 615{ 616 float depth [[depth(any)]]; 617 uint32_t stencil [[stencil]]; 618}; 619 620fragment FragmentStencilOut blitStencilFS( 621 BlitVSOut input [[stage_in]], 622 texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]], 623 texture2d_array<uint32_t> srcTexture2dArray 624 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 625 texture2d_ms<uint32_t> srcTexture2dMS 626 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 627 texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]], 628 constant BlitParams &options [[buffer(0)]]) 629{ 630 FragmentStencilOut re; 631 632 re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 633 input.texCoords, options.srcLevel, options.srcLayer); 634 635 return re; 636} 637 638fragment FragmentDepthStencilOut blitDepthStencilFS( 639 BlitVSOut input [[stage_in]], 640 641 texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], 642 texture2d_array<float> srcDepthTexture2dArray 643 [[texture(0), function_constant(kSourceTextureType2DArray)]], 644 texture2d_ms<float> srcDepthTexture2dMS 645 [[texture(0), function_constant(kSourceTextureType2DMS)]], 646 texturecube<float> srcDepthTextureCube 647 [[texture(0), function_constant(kSourceTextureTypeCube)]], 648 649 650 texture2d<uint32_t> srcStencilTexture2d 651 [[texture(1), function_constant(kSourceTexture2Type2D)]], 652 texture2d_array<uint32_t> srcStencilTexture2dArray 653 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 654 texture2d_ms<uint32_t> srcStencilTexture2dMS 655 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 656 texturecube<uint32_t> srcStencilTextureCube 657 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 658 659 constant BlitParams &options [[buffer(0)]]) 660{ 661 FragmentDepthStencilOut re; 662 663 re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS, 664 srcDepthTextureCube, input.texCoords, options); 665 re.stencil = 666 sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS, 667 srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer); 668 return re; 669} 670#endif 671# 2 "temp_master_source.metal" 2 672# 1 "./clear.metal" 1 673# 10 "./clear.metal" 674using namespace rx::mtl_shader; 675 676struct ClearParams 677{ 678 float4 clearColor; 679 float clearDepth; 680}; 681 682vertex float4 clearVS(unsigned int vid [[ vertex_id ]], 683 constant ClearParams &clearParams [[buffer(0)]]) 684{ 685 return float4(gCorners[vid], clearParams.clearDepth, 1.0); 686} 687 688fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]]) 689{ 690 return toMultipleColorOutputs(clearParams.clearColor); 691} 692 693fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]]) 694{ 695 return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor)); 696} 697 698fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]]) 699{ 700 return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor)); 701} 702# 3 "temp_master_source.metal" 2 703# 1 "./gen_indices.metal" 1 704 705 706 707 708 709 710 711 712using namespace rx::mtl_shader; 713 714 715constant bool kSourceBufferAligned[[function_constant(100)]]; 716constant bool kSourceIndexIsU8[[function_constant(200)]]; 717constant bool kSourceIndexIsU16[[function_constant(300)]]; 718constant bool kSourceIndexIsU32[[function_constant(400)]]; 719constant bool kSourceBufferUnaligned = !kSourceBufferAligned; 720constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned; 721constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned; 722constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned; 723 724struct IndexConversionParams 725{ 726 uint32_t srcOffset; 727 uint32_t indexCount; 728 bool primitiveRestartEnabled; 729}; 730 731 732 733inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx) 734{ 735 return inputAligned[offset / 2 + idx]; 736} 737inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx) 738{ 739 return inputAligned[offset / 4 + idx]; 740} 741inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx) 742{ 743 return input[offset + idx]; 744} 745inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx) 746{ 747 ushort inputLo = input[offset + 2 * idx]; 748 ushort inputHi = input[offset + 2 * idx + 1]; 749 750 return inputLo | (inputHi << 8); 751} 752inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx) 753{ 754 uint input0 = input[offset + 4 * idx]; 755 uint input1 = input[offset + 4 * idx + 1]; 756 uint input2 = input[offset + 4 * idx + 2]; 757 uint input3 = input[offset + 4 * idx + 3]; 758 759 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 760} 761 762kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]], 763 constant IndexConversionParams &options [[buffer(0)]], 764 constant uchar *input [[buffer(1)]], 765 device ushort *output [[buffer(2)]]) 766{ 767 if (idx >= options.indexCount) { return; }; 768 769 uchar value = getIndexAligned(input, options.srcOffset, idx); 770 771 if (options.primitiveRestartEnabled && value == 0xff) 772 { 773 output[idx] = 0xffff; 774 } 775 else 776 { 777 output[idx] = value; 778 } 779} 780 781kernel void convertIndexU16(uint idx [[thread_position_in_grid]], 782 constant IndexConversionParams &options [[buffer(0)]], 783 constant uchar *input 784 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 785 constant ushort *inputAligned 786 [[buffer(1), function_constant(kSourceBufferAligned)]], 787 device ushort *output [[buffer(2)]]) 788{ 789 if (idx >= options.indexCount) { return; }; 790 791 ushort value; 792 if (kSourceBufferAligned) 793 { 794 value = getIndexAligned(inputAligned, options.srcOffset, idx); 795 } 796 else 797 { 798 value = getIndexUnalignedU16(input, options.srcOffset, idx); 799 } 800 output[idx] = value; 801} 802 803kernel void convertIndexU32(uint idx [[thread_position_in_grid]], 804 constant IndexConversionParams &options [[buffer(0)]], 805 constant uchar *input 806 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 807 constant uint *inputAligned 808 [[buffer(1), function_constant(kSourceBufferAligned)]], 809 device uint *output [[buffer(2)]]) 810{ 811 if (idx >= options.indexCount) { return; }; 812 813 uint value; 814 if (kSourceBufferAligned) 815 { 816 value = getIndexAligned(inputAligned, options.srcOffset, idx); 817 } 818 else 819 { 820 value = getIndexUnalignedU32(input, options.srcOffset, idx); 821 } 822 output[idx] = value; 823} 824 825struct IndexFromArrayParams 826{ 827 uint firstVertex; 828 829 uint vertexCount; 830}; 831 832 833kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]], 834 constant IndexFromArrayParams &options [[buffer(0)]], 835 device uint *output [[buffer(2)]]) 836{ 837 if (idx >= options.vertexCount) { return; }; 838 839 uint vertexIdx = options.firstVertex + 2 + idx; 840 841 output[3 * idx] = options.firstVertex; 842 output[3 * idx + 1] = vertexIdx - 1; 843 output[3 * idx + 2] = vertexIdx; 844} 845 846inline uint getIndexU32(uint offset, 847 uint idx, 848 constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]], 849 constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]], 850 constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]]) 851{ 852 if (kUseSourceBufferU8) 853 { 854 if (kSourceIndexIsU16) 855 { 856 return getIndexUnalignedU16(inputU8, offset, idx); 857 } 858 else if (kSourceIndexIsU32) 859 { 860 return getIndexUnalignedU32(inputU8, offset, idx); 861 } 862 return getIndexAligned(inputU8, offset, idx); 863 } 864 else if (kUseSourceBufferU16) 865 { 866 return getIndexAligned(inputU16, offset, idx); 867 } 868 else if (kUseSourceBufferU32) 869 { 870 return getIndexAligned(inputU32, offset, idx); 871 } 872 return 0; 873} 874 875 876 877 878kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]], 879 constant IndexConversionParams &options [[buffer(0)]], 880 constant uchar *inputU8 881 [[buffer(1), function_constant(kUseSourceBufferU8)]], 882 constant ushort *inputU16 883 [[buffer(1), function_constant(kUseSourceBufferU16)]], 884 constant uint *inputU32 885 [[buffer(1), function_constant(kUseSourceBufferU32)]], 886 device uint *output [[buffer(2)]]) 887{ 888 if (idx >= options.indexCount) { return; }; 889 890 uint elemIdx = 2 + idx; 891 892 output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32); 893 output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32); 894 output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32); 895} 896 897 898kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]], 899 constant IndexFromArrayParams &options [[buffer(0)]], 900 device uint *output [[buffer(2)]]) 901{ 902 uint totalIndices = options.vertexCount + 1; 903 if (idx >= totalIndices) { return; }; 904 905 output[idx] = options.firstVertex + idx % options.vertexCount; 906} 907 908 909 910kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]], 911 constant IndexConversionParams &options [[buffer(0)]], 912 constant uchar *inputU8 913 [[buffer(1), function_constant(kUseSourceBufferU8)]], 914 constant ushort *inputU16 915 [[buffer(1), function_constant(kUseSourceBufferU16)]], 916 constant uint *inputU32 917 [[buffer(1), function_constant(kUseSourceBufferU32)]], 918 device uint *output [[buffer(2)]]) 919{ 920 uint totalTargetIndices = options.indexCount + 1; 921 if (idx >= totalTargetIndices) { return; }; 922 923 output[idx] = 924 getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32); 925} 926# 4 "temp_master_source.metal" 2 927# 1 "./gen_mipmap.metal" 1 928 929 930 931 932 933 934 935 936using namespace rx::mtl_shader; 937# 31 "./gen_mipmap.metal" 938struct GenMipParams 939{ 940 uint srcLevel; 941 uint numMipLevelsToGen; 942 bool sRGB; 943}; 944 945 946 947kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 948 ushort3 gIndices [[thread_position_in_grid]], 949 texture3d<float> srcTexture [[texture(0)]], 950 texture3d<float, access::write> dstMip1 [[texture(1)]], 951 texture3d<float, access::write> dstMip2 [[texture(2)]], 952 texture3d<float, access::write> dstMip3 [[texture(3)]], 953 texture3d<float, access::write> dstMip4 [[texture(4)]], 954 constant GenMipParams &options [[buffer(0)]]) 955{ 956 ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth()); 957 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z; 958 959 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 960 961 962 963 964 threadgroup float sR[(8 * 8 * 8)]; 965 threadgroup float sG[(8 * 8 * 8)]; 966 threadgroup float sB[(8 * 8 * 8)]; 967 threadgroup float sA[(8 * 8 * 8)]; 968 969 970 float4 texel1; 971 if (validThread) 972 { 973 float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize); 974 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 975 976 977 dstMip1.write(texel1, gIndices); 978 } 979 else 980 { 981 982 lIndex = 0xffffffff; 983 } 984 985 if (options.numMipLevelsToGen == 1) 986 { 987 return; 988 } 989 990 991 992 993 if (options.sRGB) 994 { 995 texel1 = linearToSRGB(texel1); 996 } 997 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 998 999 threadgroup_barrier(mem_flags::mem_threadgroup); 1000 1001 1002 if ((lIndex & 0x49) == 0) 1003 { 1004 bool3 atEdge = gIndices == (mipSize - ushort3(1)); 1005 1006 1007 1008 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1009 1010 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1011 1012 float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)])); 1013 1014 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1015 1016 1017 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)])); 1018 1019 1020 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)])); 1021 1022 1023 float4 texel8 = 1024 (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)])); 1025 1026 1027 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1028 1029 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 1030 1031 1032 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1033 } 1034 1035 if (options.numMipLevelsToGen == 2) 1036 { 1037 return; 1038 } 1039 1040 1041 threadgroup_barrier(mem_flags::mem_threadgroup); 1042 1043 1044 if ((lIndex & 0xdb) == 0) 1045 { 1046 mipSize = max(mipSize >> 1, ushort3(1)); 1047 bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1)); 1048 1049 1050 1051 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1052 1053 float4 texel3 = 1054 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)])); 1055 1056 float4 texel4 = 1057 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))])); 1058 1059 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1060 1061 1062 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)])); 1063 1064 1065 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)])); 1066 1067 1068 1069 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)])); 1070 1071 1072 1073 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1074 1075 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 1076 1077 1078 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1079 } 1080 1081 if (options.numMipLevelsToGen == 3) 1082 { 1083 return; 1084 } 1085 1086 1087 threadgroup_barrier(mem_flags::mem_threadgroup); 1088 1089 1090 if ((lIndex & 0x1ff) == 0) 1091 { 1092 mipSize = max(mipSize >> 1, ushort3(1)); 1093 bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1)); 1094 1095 1096 1097 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1098 1099 float4 texel3 = 1100 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)])); 1101 1102 float4 texel4 = 1103 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))])); 1104 1105 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1106 1107 1108 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)])); 1109 1110 1111 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)])); 1112 1113 1114 1115 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)])); 1116 1117 1118 1119 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1120 1121 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 1122 } 1123} 1124 1125kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1126 ushort2 gIndices [[thread_position_in_grid]], 1127 texture2d<float> srcTexture [[texture(0)]], 1128 texture2d<float, access::write> dstMip1 [[texture(1)]], 1129 texture2d<float, access::write> dstMip2 [[texture(2)]], 1130 texture2d<float, access::write> dstMip3 [[texture(3)]], 1131 texture2d<float, access::write> dstMip4 [[texture(4)]], 1132 constant GenMipParams &options [[buffer(0)]]) 1133{ 1134 uint firstMipLevel = options.srcLevel + 1; 1135 ushort2 mipSize = 1136 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1137 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y; 1138 1139 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1140 1141 1142 1143 1144 threadgroup float sR[(8 * 8)]; 1145 threadgroup float sG[(8 * 8)]; 1146 threadgroup float sB[(8 * 8)]; 1147 threadgroup float sA[(8 * 8)]; 1148 1149 1150 float4 texel1; 1151 if (validThread) 1152 { 1153 float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize); 1154 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 1155 1156 1157 dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices); 1158 } 1159 else 1160 { 1161 1162 lIndex = 0xffffffff; 1163 } 1164 1165 if (options.numMipLevelsToGen == 1) 1166 { 1167 return; 1168 } 1169 1170 1171 1172 1173 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1174 1175 threadgroup_barrier(mem_flags::mem_threadgroup); 1176 1177 1178 if ((lIndex & 0x09) == 0) 1179 { 1180 bool2 atEdge = gIndices == (mipSize - ushort2(1)); 1181 1182 1183 1184 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1185 1186 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1187 1188 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1189 1190 1191 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1192 1193 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 1194 1195 1196 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1197 } 1198 1199 if (options.numMipLevelsToGen == 2) 1200 { 1201 return; 1202 } 1203 1204 1205 threadgroup_barrier(mem_flags::mem_threadgroup); 1206 1207 1208 if ((lIndex & 0x1b) == 0) 1209 { 1210 mipSize = max(mipSize >> 1, ushort2(1)); 1211 bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1)); 1212 1213 1214 1215 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1216 1217 float4 texel3 = 1218 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 1219 1220 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1221 1222 1223 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1224 1225 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 1226 1227 1228 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1229 } 1230 1231 if (options.numMipLevelsToGen == 3) 1232 { 1233 return; 1234 } 1235 1236 1237 threadgroup_barrier(mem_flags::mem_threadgroup); 1238 1239 1240 if ((lIndex & 0x3f) == 0) 1241 { 1242 mipSize = max(mipSize >> 1, ushort2(1)); 1243 bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1)); 1244 1245 1246 1247 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1248 1249 float4 texel3 = 1250 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 1251 1252 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1253 1254 1255 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1256 1257 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 1258 } 1259} 1260 1261template <typename TextureTypeR, typename TextureTypeW> 1262static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps( 1263 uint lIndex, 1264 ushort3 gIndices, 1265 TextureTypeR srcTexture, 1266 TextureTypeW dstMip2, 1267 TextureTypeW dstMip3, 1268 TextureTypeW dstMip4, 1269 ushort2 mip1Size, 1270 float4 mip1Texel, 1271 threadgroup float *sR, 1272 threadgroup float *sG, 1273 threadgroup float *sB, 1274 threadgroup float *sA, 1275 constant GenMipParams &options) 1276{ 1277 ushort2 mipSize = mip1Size; 1278 float4 texel1 = mip1Texel; 1279 1280 1281 1282 1283 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1284 1285 threadgroup_barrier(mem_flags::mem_threadgroup); 1286 1287 1288 if ((lIndex & 0x09) == 0) 1289 { 1290 bool2 atEdge = gIndices.xy == (mipSize - ushort2(1)); 1291 1292 1293 1294 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1295 1296 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1297 1298 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1299 1300 1301 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1302 1303 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z); 1304 1305 1306 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1307 } 1308 1309 if (options.numMipLevelsToGen == 2) 1310 { 1311 return; 1312 } 1313 1314 1315 threadgroup_barrier(mem_flags::mem_threadgroup); 1316 1317 1318 if ((lIndex & 0x1b) == 0) 1319 { 1320 mipSize = max(mipSize >> 1, ushort2(1)); 1321 bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1)); 1322 1323 1324 1325 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1326 1327 float4 texel3 = 1328 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 1329 1330 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1331 1332 1333 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1334 1335 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z); 1336 1337 1338 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1339 } 1340 1341 if (options.numMipLevelsToGen == 3) 1342 { 1343 return; 1344 } 1345 1346 1347 threadgroup_barrier(mem_flags::mem_threadgroup); 1348 1349 1350 if ((lIndex & 0x3f) == 0) 1351 { 1352 mipSize = max(mipSize >> 1, ushort2(1)); 1353 bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1)); 1354 1355 1356 1357 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1358 1359 float4 texel3 = 1360 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 1361 1362 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1363 1364 1365 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1366 1367 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z); 1368 } 1369} 1370 1371kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1372 ushort3 gIndices [[thread_position_in_grid]], 1373 texturecube<float> srcTexture [[texture(0)]], 1374 texturecube<float, access::write> dstMip1 [[texture(1)]], 1375 texturecube<float, access::write> dstMip2 [[texture(2)]], 1376 texturecube<float, access::write> dstMip3 [[texture(3)]], 1377 texturecube<float, access::write> dstMip4 [[texture(4)]], 1378 constant GenMipParams &options [[buffer(0)]]) 1379{ 1380 uint firstMipLevel = options.srcLevel + 1; 1381 ushort2 mip1Size = 1382 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1383 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1384 1385 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1386 1387 1388 float4 mip1Texel; 1389 if (validThread) 1390 { 1391 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1392 mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)), 1393 level(options.srcLevel)); 1394 1395 1396 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1397 } 1398 else 1399 { 1400 1401 lIndex = 0xffffffff; 1402 } 1403 1404 if (options.numMipLevelsToGen == 1) 1405 { 1406 return; 1407 } 1408 1409 1410 threadgroup float sR[(8 * 8)]; 1411 threadgroup float sG[(8 * 8)]; 1412 threadgroup float sB[(8 * 8)]; 1413 threadgroup float sA[(8 * 8)]; 1414 1415 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1416 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1417} 1418 1419kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1420 ushort3 gIndices [[thread_position_in_grid]], 1421 texture2d_array<float> srcTexture [[texture(0)]], 1422 texture2d_array<float, access::write> dstMip1 [[texture(1)]], 1423 texture2d_array<float, access::write> dstMip2 [[texture(2)]], 1424 texture2d_array<float, access::write> dstMip3 [[texture(3)]], 1425 texture2d_array<float, access::write> dstMip4 [[texture(4)]], 1426 constant GenMipParams &options [[buffer(0)]]) 1427{ 1428 uint firstMipLevel = options.srcLevel + 1; 1429 ushort2 mip1Size = 1430 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1431 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1432 1433 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1434 1435 1436 float4 mip1Texel; 1437 if (validThread) 1438 { 1439 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1440 mip1Texel = 1441 srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel)); 1442 1443 1444 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1445 } 1446 else 1447 { 1448 1449 lIndex = 0xffffffff; 1450 } 1451 1452 if (options.numMipLevelsToGen == 1) 1453 { 1454 return; 1455 } 1456 1457 1458 threadgroup float sR[(8 * 8)]; 1459 threadgroup float sG[(8 * 8)]; 1460 threadgroup float sB[(8 * 8)]; 1461 threadgroup float sA[(8 * 8)]; 1462 1463 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1464 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1465} 1466# 5 "temp_master_source.metal" 2 1467# 1 "./copy_buffer.metal" 1 1468# 12 "./copy_buffer.metal" 1469#include <metal_pack> 1470 1471 1472 1473# 1 "./format_autogen.h" 1 1474# 11 "./format_autogen.h" 1475namespace rx 1476{ 1477namespace mtl_shader 1478{ 1479 1480namespace FormatID 1481{ 1482enum 1483{ 1484 NONE, 1485 D16_UNORM, 1486 D24_UNORM_S8_UINT, 1487 D24_UNORM_X8_UINT, 1488 D32_FLOAT, 1489 D32_FLOAT_S8X24_UINT, 1490 D32_UNORM, 1491 S8_UINT, 1492 A16_FLOAT, 1493 A1R5G5B5_UNORM, 1494 A2R10G10B10_SINT_VERTEX, 1495 A2R10G10B10_SNORM_VERTEX, 1496 A2R10G10B10_SSCALED_VERTEX, 1497 A2R10G10B10_UINT_VERTEX, 1498 A2R10G10B10_UNORM_VERTEX, 1499 A2R10G10B10_USCALED_VERTEX, 1500 A32_FLOAT, 1501 A8_UNORM, 1502 ASTC_10x10_SRGB_BLOCK, 1503 ASTC_10x10_UNORM_BLOCK, 1504 ASTC_10x5_SRGB_BLOCK, 1505 ASTC_10x5_UNORM_BLOCK, 1506 ASTC_10x6_SRGB_BLOCK, 1507 ASTC_10x6_UNORM_BLOCK, 1508 ASTC_10x8_SRGB_BLOCK, 1509 ASTC_10x8_UNORM_BLOCK, 1510 ASTC_12x10_SRGB_BLOCK, 1511 ASTC_12x10_UNORM_BLOCK, 1512 ASTC_12x12_SRGB_BLOCK, 1513 ASTC_12x12_UNORM_BLOCK, 1514 ASTC_3x3x3_UNORM_BLOCK, 1515 ASTC_3x3x3_UNORM_SRGB_BLOCK, 1516 ASTC_4x3x3_UNORM_BLOCK, 1517 ASTC_4x3x3_UNORM_SRGB_BLOCK, 1518 ASTC_4x4_SRGB_BLOCK, 1519 ASTC_4x4_UNORM_BLOCK, 1520 ASTC_4x4x3_UNORM_BLOCK, 1521 ASTC_4x4x3_UNORM_SRGB_BLOCK, 1522 ASTC_4x4x4_UNORM_BLOCK, 1523 ASTC_4x4x4_UNORM_SRGB_BLOCK, 1524 ASTC_5x4_SRGB_BLOCK, 1525 ASTC_5x4_UNORM_BLOCK, 1526 ASTC_5x4x4_UNORM_BLOCK, 1527 ASTC_5x4x4_UNORM_SRGB_BLOCK, 1528 ASTC_5x5_SRGB_BLOCK, 1529 ASTC_5x5_UNORM_BLOCK, 1530 ASTC_5x5x4_UNORM_BLOCK, 1531 ASTC_5x5x4_UNORM_SRGB_BLOCK, 1532 ASTC_5x5x5_UNORM_BLOCK, 1533 ASTC_5x5x5_UNORM_SRGB_BLOCK, 1534 ASTC_6x5_SRGB_BLOCK, 1535 ASTC_6x5_UNORM_BLOCK, 1536 ASTC_6x5x5_UNORM_BLOCK, 1537 ASTC_6x5x5_UNORM_SRGB_BLOCK, 1538 ASTC_6x6_SRGB_BLOCK, 1539 ASTC_6x6_UNORM_BLOCK, 1540 ASTC_6x6x5_UNORM_BLOCK, 1541 ASTC_6x6x5_UNORM_SRGB_BLOCK, 1542 ASTC_6x6x6_UNORM_BLOCK, 1543 ASTC_6x6x6_UNORM_SRGB_BLOCK, 1544 ASTC_8x5_SRGB_BLOCK, 1545 ASTC_8x5_UNORM_BLOCK, 1546 ASTC_8x6_SRGB_BLOCK, 1547 ASTC_8x6_UNORM_BLOCK, 1548 ASTC_8x8_SRGB_BLOCK, 1549 ASTC_8x8_UNORM_BLOCK, 1550 B10G10R10A2_UNORM, 1551 B4G4R4A4_UNORM, 1552 B5G5R5A1_UNORM, 1553 B5G6R5_UNORM, 1554 B8G8R8A8_TYPELESS, 1555 B8G8R8A8_TYPELESS_SRGB, 1556 B8G8R8A8_UNORM, 1557 B8G8R8A8_UNORM_SRGB, 1558 B8G8R8X8_UNORM, 1559 BC1_RGBA_UNORM_BLOCK, 1560 BC1_RGBA_UNORM_SRGB_BLOCK, 1561 BC1_RGB_UNORM_BLOCK, 1562 BC1_RGB_UNORM_SRGB_BLOCK, 1563 BC2_RGBA_UNORM_BLOCK, 1564 BC2_RGBA_UNORM_SRGB_BLOCK, 1565 BC3_RGBA_UNORM_BLOCK, 1566 BC3_RGBA_UNORM_SRGB_BLOCK, 1567 BC4_RED_SNORM_BLOCK, 1568 BC4_RED_UNORM_BLOCK, 1569 BC5_RG_SNORM_BLOCK, 1570 BC5_RG_UNORM_BLOCK, 1571 BC6H_RGB_SFLOAT_BLOCK, 1572 BC6H_RGB_UFLOAT_BLOCK, 1573 BC7_RGBA_UNORM_BLOCK, 1574 BC7_RGBA_UNORM_SRGB_BLOCK, 1575 EAC_R11G11_SNORM_BLOCK, 1576 EAC_R11G11_UNORM_BLOCK, 1577 EAC_R11_SNORM_BLOCK, 1578 EAC_R11_UNORM_BLOCK, 1579 ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK, 1580 ETC1_R8G8B8_UNORM_BLOCK, 1581 ETC2_R8G8B8A1_SRGB_BLOCK, 1582 ETC2_R8G8B8A1_UNORM_BLOCK, 1583 ETC2_R8G8B8A8_SRGB_BLOCK, 1584 ETC2_R8G8B8A8_UNORM_BLOCK, 1585 ETC2_R8G8B8_SRGB_BLOCK, 1586 ETC2_R8G8B8_UNORM_BLOCK, 1587 G8_B8R8_2PLANE_420_UNORM, 1588 G8_B8_R8_3PLANE_420_UNORM, 1589 L16A16_FLOAT, 1590 L16_FLOAT, 1591 L32A32_FLOAT, 1592 L32_FLOAT, 1593 L8A8_UNORM, 1594 L8_UNORM, 1595 PVRTC1_RGBA_2BPP_UNORM_BLOCK, 1596 PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK, 1597 PVRTC1_RGBA_4BPP_UNORM_BLOCK, 1598 PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK, 1599 PVRTC1_RGB_2BPP_UNORM_BLOCK, 1600 PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK, 1601 PVRTC1_RGB_4BPP_UNORM_BLOCK, 1602 PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK, 1603 R10G10B10A2_SINT, 1604 R10G10B10A2_SNORM, 1605 R10G10B10A2_SSCALED, 1606 R10G10B10A2_UINT, 1607 R10G10B10A2_UNORM, 1608 R10G10B10A2_USCALED, 1609 R10G10B10X2_UNORM, 1610 R11G11B10_FLOAT, 1611 R16G16B16A16_FLOAT, 1612 R16G16B16A16_SINT, 1613 R16G16B16A16_SNORM, 1614 R16G16B16A16_SSCALED, 1615 R16G16B16A16_UINT, 1616 R16G16B16A16_UNORM, 1617 R16G16B16A16_USCALED, 1618 R16G16B16_FLOAT, 1619 R16G16B16_SINT, 1620 R16G16B16_SNORM, 1621 R16G16B16_SSCALED, 1622 R16G16B16_UINT, 1623 R16G16B16_UNORM, 1624 R16G16B16_USCALED, 1625 R16G16_FLOAT, 1626 R16G16_SINT, 1627 R16G16_SNORM, 1628 R16G16_SSCALED, 1629 R16G16_UINT, 1630 R16G16_UNORM, 1631 R16G16_USCALED, 1632 R16_FLOAT, 1633 R16_SINT, 1634 R16_SNORM, 1635 R16_SSCALED, 1636 R16_UINT, 1637 R16_UNORM, 1638 R16_USCALED, 1639 R32G32B32A32_FIXED, 1640 R32G32B32A32_FLOAT, 1641 R32G32B32A32_SINT, 1642 R32G32B32A32_SNORM, 1643 R32G32B32A32_SSCALED, 1644 R32G32B32A32_UINT, 1645 R32G32B32A32_UNORM, 1646 R32G32B32A32_USCALED, 1647 R32G32B32_FIXED, 1648 R32G32B32_FLOAT, 1649 R32G32B32_SINT, 1650 R32G32B32_SNORM, 1651 R32G32B32_SSCALED, 1652 R32G32B32_UINT, 1653 R32G32B32_UNORM, 1654 R32G32B32_USCALED, 1655 R32G32_FIXED, 1656 R32G32_FLOAT, 1657 R32G32_SINT, 1658 R32G32_SNORM, 1659 R32G32_SSCALED, 1660 R32G32_UINT, 1661 R32G32_UNORM, 1662 R32G32_USCALED, 1663 R32_FIXED, 1664 R32_FLOAT, 1665 R32_SINT, 1666 R32_SNORM, 1667 R32_SSCALED, 1668 R32_UINT, 1669 R32_UNORM, 1670 R32_USCALED, 1671 R4G4B4A4_UNORM, 1672 R5G5B5A1_UNORM, 1673 R5G6B5_UNORM, 1674 R8G8B8A8_SINT, 1675 R8G8B8A8_SNORM, 1676 R8G8B8A8_SSCALED, 1677 R8G8B8A8_TYPELESS, 1678 R8G8B8A8_TYPELESS_SRGB, 1679 R8G8B8A8_UINT, 1680 R8G8B8A8_UNORM, 1681 R8G8B8A8_UNORM_SRGB, 1682 R8G8B8A8_USCALED, 1683 R8G8B8_SINT, 1684 R8G8B8_SNORM, 1685 R8G8B8_SSCALED, 1686 R8G8B8_UINT, 1687 R8G8B8_UNORM, 1688 R8G8B8_UNORM_SRGB, 1689 R8G8B8_USCALED, 1690 R8G8_SINT, 1691 R8G8_SNORM, 1692 R8G8_SSCALED, 1693 R8G8_UINT, 1694 R8G8_UNORM, 1695 R8G8_UNORM_SRGB, 1696 R8G8_USCALED, 1697 R8_SINT, 1698 R8_SNORM, 1699 R8_SSCALED, 1700 R8_UINT, 1701 R8_UNORM, 1702 R8_UNORM_SRGB, 1703 R8_USCALED, 1704 R9G9B9E5_SHAREDEXP, 1705 X2R10G10B10_SINT_VERTEX, 1706 X2R10G10B10_SNORM_VERTEX, 1707 X2R10G10B10_SSCALED_VERTEX, 1708 X2R10G10B10_UINT_VERTEX, 1709 X2R10G10B10_UNORM_VERTEX, 1710 X2R10G10B10_USCALED_VERTEX 1711}; 1712 1713} 1714 1715} 1716} 1717# 16 "./copy_buffer.metal" 2 1718 1719using namespace rx::mtl_shader; 1720 1721constant int kCopyFormatType [[function_constant(10)]]; 1722 1723 1724constant int kCopyTextureType [[function_constant(20)]]; 1725constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D; 1726constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray; 1727constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample; 1728constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube; 1729constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D; 1730 1731struct CopyPixelParams 1732{ 1733 uint3 copySize; 1734 uint3 textureOffset; 1735 1736 uint bufferStartOffset; 1737 uint pixelSize; 1738 uint bufferRowPitch; 1739 uint bufferDepthPitch; 1740}; 1741 1742struct WritePixelParams 1743{ 1744 uint2 copySize; 1745 uint2 textureOffset; 1746 1747 uint bufferStartOffset; 1748 1749 uint pixelSize; 1750 uint bufferRowPitch; 1751 1752 uint textureLevel; 1753 uint textureLayer; 1754 1755 bool reverseTextureRowOrder; 1756}; 1757# 120 "./copy_buffer.metal" 1758template <typename T> 1759static inline void textureWrite(ushort3 gIndices, 1760 constant CopyPixelParams &options, 1761 vec<T, 4> color, 1762 texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 1763{ 1764 uint3 writeIndices = options.textureOffset + uint3(gIndices); 1765 switch (kCopyTextureType) 1766 { 1767 case kTextureType2D: 1768 dstTexture2d.write(color, writeIndices.xy); 1769 break; 1770 case kTextureType2DArray: 1771 dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z); 1772 break; 1773 case kTextureType3D: 1774 dstTexture3d.write(color, writeIndices); 1775 break; 1776 case kTextureTypeCube: 1777 dstTextureCube.write(color, writeIndices.xy, writeIndices.z); 1778 break; 1779 } 1780} 1781 1782 1783template <typename T> 1784static inline vec<T, 4> textureRead(ushort2 gIndices, 1785 constant WritePixelParams &options, 1786 texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]]) 1787{ 1788 vec<T, 4> color; 1789 uint2 coords = uint2(gIndices); 1790 if (options.reverseTextureRowOrder) 1791 { 1792 coords.y = options.copySize.y - 1 - gIndices.y; 1793 } 1794 coords += options.textureOffset; 1795 switch (kCopyTextureType) 1796 { 1797 case kTextureType2D: 1798 color = srcTexture2d.read(coords.xy, options.textureLevel); 1799 break; 1800 case kTextureType2DArray: 1801 color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel); 1802 break; 1803 case kTextureType2DMultisample: 1804 color = resolveTextureMS(srcTexture2dMS, coords.xy); 1805 break; 1806 case kTextureType3D: 1807 color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel); 1808 break; 1809 case kTextureTypeCube: 1810 color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel); 1811 break; 1812 } 1813 return color; 1814} 1815# 215 "./copy_buffer.metal" 1816static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer) 1817{ 1818 float4 color; 1819 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1820 1821 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1822 color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src)); 1823 color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src)); 1824 color.a = 1.0; 1825 return color; 1826} 1827static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1828{ 1829 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1830 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) | 1831 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b)); 1832 1833 shortToBytes(dst, bufferOffset, buffer); 1834} 1835 1836 1837static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer) 1838{ 1839 float4 color; 1840 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1841 1842 color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src)); 1843 color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src)); 1844 color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src)); 1845 color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src)); 1846 return color; 1847} 1848static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1849{ 1850 ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) | 1851 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) | 1852 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) | 1853 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a)); 1854 ; 1855 1856 shortToBytes(dst, bufferOffset, buffer); 1857} 1858 1859 1860static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer) 1861{ 1862 float4 color; 1863 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1864 1865 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1866 color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src)); 1867 color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src)); 1868 color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src)); 1869 return color; 1870} 1871static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1872{ 1873 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1874 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) | 1875 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) | 1876 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a)); 1877 ; 1878 1879 shortToBytes(dst, bufferOffset, buffer); 1880} 1881 1882 1883static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer) 1884{ 1885 int4 color; 1886 int src = bytesToInt<int>(buffer, bufferOffset); 1887 1888 constexpr int3 rgbSignMask(0x200); 1889 constexpr int3 negativeMask(0xfffffc00); 1890 constexpr int alphaSignMask = 0x2; 1891 constexpr int alphaNegMask = 0xfffffffc; 1892 1893 color.r = getShiftedData<10, 0>(src); 1894 color.g = getShiftedData<10, 10>(src); 1895 color.b = getShiftedData<10, 20>(src); 1896 1897 int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9; 1898 color.rgb = (isRgbNegative * negativeMask) | color.rgb; 1899 1900 color.a = getShiftedData<2, 30>(src); 1901 int isAlphaNegative = color.a & alphaSignMask >> 1; 1902 color.a = (isAlphaNegative * alphaNegMask) | color.a; 1903 return color; 1904} 1905 1906static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer) 1907{ 1908 uint4 color; 1909 uint src = bytesToInt<uint>(buffer, bufferOffset); 1910 1911 color.r = getShiftedData<10, 0>(src); 1912 color.g = getShiftedData<10, 10>(src); 1913 color.b = getShiftedData<10, 20>(src); 1914 color.a = getShiftedData<2, 30>(src); 1915 return color; 1916} 1917 1918 1919static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1920{ 1921 float4 color; 1922 uint src = bytesToInt<uint>(buffer, bufferOffset); 1923 1924 if (isSRGB) 1925 { 1926 color = unpack_unorm4x8_srgb_to_float(src); 1927 } 1928 else 1929 { 1930 color = unpack_unorm4x8_to_float(src); 1931 } 1932 return color; 1933} 1934static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1935{ 1936 uint dst; 1937 1938 if (isSRGB) 1939 { 1940 dst = pack_float_to_srgb_unorm4x8(color); 1941 } 1942 else 1943 { 1944 dst = pack_float_to_unorm4x8(color); 1945 } 1946 1947 intToBytes(dst, bufferOffset, buffer); 1948} 1949 1950static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1951{ 1952 float4 color; 1953 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 1954 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 1955 color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]); 1956 color.a = 1.0; 1957 1958 if (isSRGB) 1959 { 1960 color = sRGBtoLinear(color); 1961 } 1962 return color; 1963} 1964static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1965{ 1966 color.a = 1.0; 1967 uint dst; 1968 1969 if (isSRGB) 1970 { 1971 dst = pack_float_to_srgb_unorm4x8(color); 1972 } 1973 else 1974 { 1975 dst = pack_float_to_unorm4x8(color); 1976 } 1977 int24bitToBytes(dst, bufferOffset, buffer); 1978} 1979 1980 1981static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer) 1982{ 1983 float4 color; 1984 uint src = bytesToInt<uint>(buffer, bufferOffset); 1985 1986 color = unpack_snorm4x8_to_float(src); 1987 1988 return color; 1989} 1990static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1991{ 1992 uint dst = pack_float_to_snorm4x8(color); 1993 1994 intToBytes(dst, bufferOffset, buffer); 1995} 1996 1997 1998static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer) 1999{ 2000 float4 color; 2001 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2002 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 2003 color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]); 2004 color.a = 1.0; 2005 2006 return color; 2007} 2008static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2009{ 2010 uint dst = pack_float_to_snorm4x8(color); 2011 2012 int24bitToBytes(dst, bufferOffset, buffer); 2013} 2014 2015 2016static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2017{ 2018 return readR8G8B8A8(bufferOffset, buffer, false); 2019} 2020static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2021{ 2022 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 2023} 2024 2025static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2026{ 2027 return readR8G8B8A8(bufferOffset, buffer, true); 2028} 2029static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2030{ 2031 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 2032} 2033 2034 2035static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2036{ 2037 return readR8G8B8A8(bufferOffset, buffer, false).bgra; 2038} 2039static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2040{ 2041 color.rgba = color.bgra; 2042 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 2043} 2044 2045static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2046{ 2047 return readR8G8B8A8(bufferOffset, buffer, true).bgra; 2048} 2049static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2050{ 2051 color.rgba = color.bgra; 2052 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 2053} 2054 2055 2056static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer) 2057{ 2058 return readR8G8B8(bufferOffset, buffer, false); 2059} 2060static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2061{ 2062 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false); 2063} 2064 2065static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2066{ 2067 return readR8G8B8(bufferOffset, buffer, true); 2068} 2069static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2070{ 2071 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true); 2072} 2073 2074 2075static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer) 2076{ 2077 float4 color; 2078 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 2079 color.a = 1.0; 2080 return color; 2081} 2082static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2083{ 2084 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2085} 2086 2087 2088static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2089{ 2090 buffer[bufferOffset] = floatToNormalized<uchar>(color.a); 2091} 2092 2093 2094static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2095{ 2096 float4 color; 2097 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 2098 color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 2099 return color; 2100} 2101static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2102{ 2103 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2104 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a); 2105} 2106 2107 2108static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer) 2109{ 2110 float4 color; 2111 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 2112 color.g = color.b = 0.0; 2113 color.a = 1.0; 2114 return color; 2115} 2116static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2117{ 2118 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2119} 2120 2121static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer) 2122{ 2123 float4 color; 2124 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2125 color.g = color.b = 0.0; 2126 color.a = 1.0; 2127 return color; 2128} 2129static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2130{ 2131 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r)); 2132} 2133 2134 2135static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer) 2136{ 2137 int4 color; 2138 color.r = as_type<char>(buffer[bufferOffset]); 2139 color.g = color.b = 0; 2140 color.a = 1; 2141 return color; 2142} 2143static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2144{ 2145 buffer[bufferOffset] = static_cast<uchar>(color.r); 2146} 2147 2148 2149static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer) 2150{ 2151 uint4 color; 2152 color.r = as_type<uchar>(buffer[bufferOffset]); 2153 color.g = color.b = 0; 2154 color.a = 1; 2155 return color; 2156} 2157static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2158{ 2159 buffer[bufferOffset] = static_cast<uchar>(color.r); 2160} 2161 2162 2163static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer) 2164{ 2165 float4 color; 2166 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 2167 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 2168 color.b = 0.0; 2169 color.a = 1.0; 2170 return color; 2171} 2172static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2173{ 2174 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2175 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g); 2176} 2177 2178static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer) 2179{ 2180 float4 color; 2181 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2182 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 2183 color.b = 0.0; 2184 color.a = 1.0; 2185 return color; 2186} 2187static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2188{ 2189 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r)); 2190 buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<7, char>(color.g)); 2191} 2192 2193 2194static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer) 2195{ 2196 int4 color; 2197 color.r = as_type<char>(buffer[bufferOffset]); 2198 color.g = as_type<char>(buffer[bufferOffset + 1]); 2199 color.b = 0; 2200 color.a = 1; 2201 return color; 2202} 2203static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2204{ 2205 buffer[bufferOffset] = static_cast<uchar>(color.r); 2206 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2207} 2208 2209 2210static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer) 2211{ 2212 uint4 color; 2213 color.r = as_type<uchar>(buffer[bufferOffset]); 2214 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2215 color.b = 0; 2216 color.a = 1; 2217 return color; 2218} 2219static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2220{ 2221 buffer[bufferOffset] = static_cast<uchar>(color.r); 2222 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2223} 2224 2225 2226static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer) 2227{ 2228 int4 color; 2229 color.r = as_type<char>(buffer[bufferOffset]); 2230 color.g = as_type<char>(buffer[bufferOffset + 1]); 2231 color.b = as_type<char>(buffer[bufferOffset + 2]); 2232 color.a = 1; 2233 return color; 2234} 2235 2236 2237static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer) 2238{ 2239 uint4 color; 2240 color.r = as_type<uchar>(buffer[bufferOffset]); 2241 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2242 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 2243 color.a = 1; 2244 return color; 2245} 2246 2247 2248static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer) 2249{ 2250 int4 color; 2251 color.r = as_type<char>(buffer[bufferOffset]); 2252 color.g = as_type<char>(buffer[bufferOffset + 1]); 2253 color.b = as_type<char>(buffer[bufferOffset + 2]); 2254 color.a = as_type<char>(buffer[bufferOffset + 3]); 2255 return color; 2256} 2257static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2258{ 2259 buffer[bufferOffset] = static_cast<uchar>(color.r); 2260 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2261 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 2262 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 2263} 2264 2265 2266static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer) 2267{ 2268 uint4 color; 2269 color.r = as_type<uchar>(buffer[bufferOffset]); 2270 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2271 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 2272 color.a = as_type<uchar>(buffer[bufferOffset + 3]); 2273 return color; 2274} 2275static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2276{ 2277 buffer[bufferOffset] = static_cast<uchar>(color.r); 2278 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2279 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 2280 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 2281} 2282 2283 2284static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer) 2285{ 2286 float4 color; 2287 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2288 color.g = color.b = 0.0; 2289 color.a = 1.0; 2290 return color; 2291} 2292static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2293{ 2294 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2295} 2296 2297template <typename ShortType> 2298static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer) 2299{ 2300 float4 color; 2301 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2302 color.g = color.b = 0.0; 2303 color.a = 1.0; 2304 return color; 2305} 2306 2307 2308 2309 2310static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer) 2311{ 2312 int4 color; 2313 color.r = bytesToShort<short>(buffer, bufferOffset); 2314 color.g = color.b = 0; 2315 color.a = 1; 2316 return color; 2317} 2318static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2319{ 2320 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2321} 2322 2323 2324static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer) 2325{ 2326 uint4 color; 2327 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2328 color.g = color.b = 0; 2329 color.a = 1; 2330 return color; 2331} 2332static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2333{ 2334 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2335} 2336 2337 2338static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer) 2339{ 2340 float4 color; 2341 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2342 color.rgb = 0.0; 2343 return color; 2344} 2345static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2346{ 2347 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer); 2348} 2349 2350 2351static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer) 2352{ 2353 float4 color; 2354 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2355 color.a = 1.0; 2356 return color; 2357} 2358static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2359{ 2360 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2361} 2362 2363 2364static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 2365{ 2366 float4 color; 2367 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2368 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2369 return color; 2370} 2371static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2372{ 2373 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2374 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer); 2375} 2376 2377 2378static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer) 2379{ 2380 float4 color; 2381 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2382 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2383 color.b = 0.0; 2384 color.a = 1.0; 2385 return color; 2386} 2387static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2388{ 2389 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2390 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 2391} 2392 2393 2394template <typename ShortType> 2395static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer) 2396{ 2397 float4 color; 2398 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2399 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2400 color.b = 0.0; 2401 color.a = 1.0; 2402 return color; 2403} 2404 2405 2406 2407 2408static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer) 2409{ 2410 int4 color; 2411 color.r = bytesToShort<short>(buffer, bufferOffset); 2412 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2413 color.b = 0; 2414 color.a = 1; 2415 return color; 2416} 2417static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2418{ 2419 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2420 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2421} 2422 2423 2424static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer) 2425{ 2426 uint4 color; 2427 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2428 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2429 color.b = 0; 2430 color.a = 1; 2431 return color; 2432} 2433static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2434{ 2435 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2436 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2437} 2438 2439 2440static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer) 2441{ 2442 float4 color; 2443 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2444 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2445 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 2446 color.a = 1.0; 2447 return color; 2448} 2449 2450 2451template <typename ShortType> 2452static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer) 2453{ 2454 float4 color; 2455 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2456 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2457 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2458 color.a = 1.0; 2459 return color; 2460} 2461 2462 2463 2464static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer) 2465{ 2466 int4 color; 2467 color.r = bytesToShort<short>(buffer, bufferOffset); 2468 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2469 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2470 color.a = 1; 2471 return color; 2472} 2473 2474 2475static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer) 2476{ 2477 uint4 color; 2478 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2479 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2480 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2481 color.a = 1; 2482 return color; 2483} 2484 2485 2486static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 2487{ 2488 float4 color; 2489 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2490 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2491 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 2492 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6)); 2493 return color; 2494} 2495static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2496{ 2497 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2498 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 2499 shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer); 2500 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer); 2501} 2502 2503 2504template <typename ShortType> 2505static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer) 2506{ 2507 float4 color; 2508 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2509 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2510 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2511 color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6)); 2512 return color; 2513} 2514 2515 2516 2517 2518static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer) 2519{ 2520 int4 color; 2521 color.r = bytesToShort<short>(buffer, bufferOffset); 2522 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2523 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2524 color.a = bytesToShort<short>(buffer, bufferOffset + 6); 2525 return color; 2526} 2527static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2528{ 2529 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2530 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2531 shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer); 2532 shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer); 2533} 2534 2535 2536static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer) 2537{ 2538 uint4 color; 2539 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2540 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2541 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2542 color.a = bytesToShort<ushort>(buffer, bufferOffset + 6); 2543 return color; 2544} 2545static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2546{ 2547 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2548 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2549 shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer); 2550 shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer); 2551} 2552 2553 2554static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer) 2555{ 2556 float4 color; 2557 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2558 color.g = color.b = 0.0; 2559 color.a = 1.0; 2560 return color; 2561} 2562static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2563{ 2564 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2565} 2566 2567 2568template <typename IntType> 2569static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer) 2570{ 2571 float4 color; 2572 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2573 color.g = color.b = 0.0; 2574 color.a = 1.0; 2575 return color; 2576} 2577 2578 2579 2580 2581static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer) 2582{ 2583 float4 color; 2584 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2585 color.rgb = 0.0; 2586 return color; 2587} 2588static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2589{ 2590 intToBytes(as_type<uint>(color.a), bufferOffset, buffer); 2591} 2592 2593 2594static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer) 2595{ 2596 float4 color; 2597 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2598 color.a = 1.0; 2599 return color; 2600} 2601static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2602{ 2603 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2604} 2605 2606 2607static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer) 2608{ 2609 int4 color; 2610 color.r = bytesToInt<int>(buffer, bufferOffset); 2611 color.g = color.b = 0; 2612 color.a = 1; 2613 return color; 2614} 2615static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2616{ 2617 intToBytes(color.r, bufferOffset, buffer); 2618} 2619 2620 2621static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer) 2622{ 2623 float4 color; 2624 constexpr float kDivisor = 1.0f / (1 << 16); 2625 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2626 color.g = color.b = 0.0; 2627 color.a = 1.0; 2628 return color; 2629} 2630 2631 2632static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer) 2633{ 2634 uint4 color; 2635 color.r = bytesToInt<uint>(buffer, bufferOffset); 2636 color.g = color.b = 0; 2637 color.a = 1; 2638 return color; 2639} 2640static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2641{ 2642 intToBytes(color.r, bufferOffset, buffer); 2643} 2644 2645 2646static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2647{ 2648 float4 color; 2649 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2650 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2651 return color; 2652} 2653static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2654{ 2655 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2656 intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer); 2657} 2658 2659 2660static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer) 2661{ 2662 float4 color; 2663 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2664 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2665 color.b = 0.0; 2666 color.a = 1.0; 2667 return color; 2668} 2669static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2670{ 2671 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2672 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2673} 2674 2675 2676template <typename IntType> 2677static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer) 2678{ 2679 float4 color; 2680 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2681 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2682 color.b = 0.0; 2683 color.a = 1.0; 2684 return color; 2685} 2686 2687 2688 2689 2690static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer) 2691{ 2692 int4 color; 2693 color.r = bytesToInt<int>(buffer, bufferOffset); 2694 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2695 color.b = 0; 2696 color.a = 1; 2697 return color; 2698} 2699static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2700{ 2701 intToBytes(color.r, bufferOffset, buffer); 2702 intToBytes(color.g, bufferOffset + 4, buffer); 2703} 2704 2705 2706static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer) 2707{ 2708 float4 color; 2709 constexpr float kDivisor = 1.0f / (1 << 16); 2710 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2711 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2712 color.b = 0.0; 2713 color.a = 1.0; 2714 return color; 2715} 2716 2717 2718static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer) 2719{ 2720 uint4 color; 2721 color.r = bytesToInt<uint>(buffer, bufferOffset); 2722 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2723 color.b = 0; 2724 color.a = 1; 2725 return color; 2726} 2727static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2728{ 2729 intToBytes(color.r, bufferOffset, buffer); 2730 intToBytes(color.g, bufferOffset + 4, buffer); 2731} 2732 2733 2734static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer) 2735{ 2736 float4 color; 2737 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2738 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2739 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2740 color.a = 1.0; 2741 return color; 2742} 2743 2744 2745template <typename IntType> 2746static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer) 2747{ 2748 float4 color; 2749 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2750 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2751 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2752 color.a = 1.0; 2753 return color; 2754} 2755 2756 2757 2758 2759static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer) 2760{ 2761 int4 color; 2762 color.r = bytesToInt<int>(buffer, bufferOffset); 2763 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2764 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2765 color.a = 1; 2766 return color; 2767} 2768 2769 2770static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer) 2771{ 2772 float4 color; 2773 constexpr float kDivisor = 1.0f / (1 << 16); 2774 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2775 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2776 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2777 color.a = 1.0; 2778 return color; 2779} 2780 2781 2782static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer) 2783{ 2784 uint4 color; 2785 color.r = bytesToInt<uint>(buffer, bufferOffset); 2786 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2787 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2788 color.a = 1; 2789 return color; 2790} 2791 2792 2793static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2794{ 2795 float4 color; 2796 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2797 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2798 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2799 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12)); 2800 return color; 2801} 2802static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2803{ 2804 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2805 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2806 intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer); 2807 intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer); 2808} 2809 2810 2811template <typename IntType> 2812static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer) 2813{ 2814 float4 color; 2815 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2816 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2817 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2818 color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12)); 2819 return color; 2820} 2821 2822 2823 2824 2825static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer) 2826{ 2827 int4 color; 2828 color.r = bytesToInt<int>(buffer, bufferOffset); 2829 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2830 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2831 color.a = bytesToInt<int>(buffer, bufferOffset + 12); 2832 return color; 2833} 2834static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2835{ 2836 intToBytes(color.r, bufferOffset, buffer); 2837 intToBytes(color.g, bufferOffset + 4, buffer); 2838 intToBytes(color.b, bufferOffset + 8, buffer); 2839 intToBytes(color.a, bufferOffset + 12, buffer); 2840} 2841 2842static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer) 2843{ 2844 float4 color; 2845 constexpr float kDivisor = 1.0f / (1 << 16); 2846 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2847 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2848 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2849 color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor; 2850 return color; 2851} 2852 2853 2854static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer) 2855{ 2856 uint4 color; 2857 color.r = bytesToInt<uint>(buffer, bufferOffset); 2858 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2859 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2860 color.a = bytesToInt<uint>(buffer, bufferOffset + 12); 2861 return color; 2862} 2863static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2864{ 2865 intToBytes(color.r, bufferOffset, buffer); 2866 intToBytes(color.g, bufferOffset + 4, buffer); 2867 intToBytes(color.b, bufferOffset + 8, buffer); 2868 intToBytes(color.a, bufferOffset + 12, buffer); 2869} 2870# 1292 "./copy_buffer.metal" 2871static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); } 2872static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); } 2873static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); } 2874 2875static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); } 2876 2877 2878kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2879{ 2880 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2881# 1336 "./copy_buffer.metal" 2882 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2883 2884 switch (kCopyFormatType) 2885 { 2886 case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2887 } 2888 2889 2890} 2891 2892kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2893{ 2894 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2895# 1364 "./copy_buffer.metal" 2896 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2897 2898 switch (kCopyFormatType) 2899 { 2900 case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2901 } 2902 2903 2904} 2905 2906kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2907{ 2908 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2909# 1392 "./copy_buffer.metal" 2910 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2911 2912 switch (kCopyFormatType) 2913 { 2914 case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2915 } 2916 2917 2918} 2919 2920 2921kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2922{ 2923 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2924# 1439 "./copy_buffer.metal" 2925 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2926 2927 switch (kCopyFormatType) 2928 { 2929 case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; 2930 } 2931 2932 2933} 2934 2935kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2936{ 2937 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2938# 1464 "./copy_buffer.metal" 2939 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2940 2941 switch (kCopyFormatType) 2942 { 2943 case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break; 2944 } 2945 2946 2947} 2948 2949kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2950{ 2951 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2952# 1489 "./copy_buffer.metal" 2953 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2954 2955 switch (kCopyFormatType) 2956 { 2957 case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break; 2958 } 2959 2960 2961} 2962 2963 2964struct CopyVertexParams 2965{ 2966 uint srcBufferStartOffset; 2967 uint srcStride; 2968 uint srcComponentBytes; 2969 uint srcComponents; 2970 2971 2972 2973 uchar4 srcDefaultAlphaData; 2974 2975 uint dstBufferStartOffset; 2976 uint dstStride; 2977 uint dstComponents; 2978 2979 uint vertexCount; 2980}; 2981# 1539 "./copy_buffer.metal" 2982template <typename IntType> 2983static inline void writeFloatVertex(constant CopyVertexParams &options, 2984 uint idx, 2985 vec<IntType, 4> data, 2986 device uchar *dst) 2987{ 2988 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 2989 2990 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 2991 { 2992 floatToBytes(static_cast<float>(data[component]), dstOffset, dst); 2993 } 2994} 2995 2996template <> 2997inline void writeFloatVertex(constant CopyVertexParams &options, 2998 uint idx, 2999 vec<float, 4> data, 3000 device uchar *dst) 3001{ 3002 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 3003 3004 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 3005 { 3006 floatToBytes(data[component], dstOffset, dst); 3007 } 3008} 3009 3010 3011static inline void convertToFloatVertexFormat(uint index, 3012 constant CopyVertexParams &options, 3013 constant uchar *srcBuffer, 3014 device uchar *dstBuffer) 3015{ 3016# 1585 "./copy_buffer.metal" 3017 uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index; 3018# 1594 "./copy_buffer.metal" 3019 switch (kCopyFormatType) 3020 { 3021 case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; 3022 } 3023 3024 3025} 3026 3027 3028kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]], 3029 constant CopyVertexParams &options [[buffer(0)]], 3030 constant uchar *srcBuffer [[buffer(1)]], 3031 device uchar *dstBuffer [[buffer(2)]]) 3032{ 3033 if (index >= options.vertexCount) { return; }; 3034 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 3035} 3036 3037 3038vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]], 3039 constant CopyVertexParams &options [[buffer(0)]], 3040 constant uchar *srcBuffer [[buffer(1)]], 3041 device uchar *dstBuffer [[buffer(2)]]) 3042{ 3043 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 3044} 3045 3046 3047static inline void expandVertexFormatComponents(uint index, 3048 constant CopyVertexParams &options, 3049 constant uchar *srcBuffer, 3050 device uchar *dstBuffer) 3051{ 3052 uint srcOffset = options.srcBufferStartOffset + options.srcStride * index; 3053 uint dstOffset = options.dstBufferStartOffset + options.dstStride * index; 3054 3055 uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u); 3056 uint component; 3057 for (component = 0; component < options.srcComponents; ++component, 3058 srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes) 3059 { 3060 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3061 { 3062 dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte]; 3063 } 3064 } 3065 3066 for (; component < dstComponentsBeforeAlpha; 3067 ++component, dstOffset += options.srcComponentBytes) 3068 { 3069 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3070 { 3071 dstBuffer[dstOffset + byte] = 0; 3072 } 3073 } 3074 3075 if (component < options.dstComponents) 3076 { 3077 3078 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3079 { 3080 dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte]; 3081 } 3082 } 3083} 3084 3085 3086kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]], 3087 constant CopyVertexParams &options [[buffer(0)]], 3088 constant uchar *srcBuffer [[buffer(1)]], 3089 device uchar *dstBuffer [[buffer(2)]]) 3090{ 3091 if (index >= options.vertexCount) { return; }; 3092 3093 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 3094} 3095 3096 3097vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]], 3098 constant CopyVertexParams &options [[buffer(0)]], 3099 constant uchar *srcBuffer [[buffer(1)]], 3100 device uchar *dstBuffer [[buffer(2)]]) 3101{ 3102 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 3103} 3104# 6 "temp_master_source.metal" 2 3105# 1 "./visibility.metal" 1 3106 3107 3108 3109 3110 3111 3112 3113 3114constant bool kCombineWithExistingResult [[function_constant(1000)]]; 3115 3116 3117 3118struct CombineVisibilityResultOptions 3119{ 3120 3121 uint startOffset; 3122 3123 uint numOffsets; 3124}; 3125 3126kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]], 3127 constant CombineVisibilityResultOptions &options [[buffer(0)]], 3128 constant ushort4 *renderpassVisibilityResult [[buffer(1)]], 3129 device ushort4 *finalResults [[buffer(2)]]) 3130{ 3131 if (idx > 0) 3132 { 3133 3134 3135 3136 return; 3137 } 3138 ushort4 finalResult16x4; 3139 3140 if (kCombineWithExistingResult) 3141 { 3142 finalResult16x4 = finalResults[0]; 3143 } 3144 else 3145 { 3146 finalResult16x4 = ushort4(0, 0, 0, 0); 3147 } 3148 3149 for (uint i = 0; i < options.numOffsets; ++i) 3150 { 3151 uint offset = options.startOffset + i; 3152 ushort4 renderpassResult = renderpassVisibilityResult[offset]; 3153 3154 3155 finalResult16x4 = finalResult16x4 | renderpassResult; 3156 } 3157 finalResults[0] = finalResult16x4; 3158} 3159# 7 "temp_master_source.metal" 2 3160 3161 3162)"; 3163