• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// GENERATED FILE - DO NOT EDIT.
2// Generated by gen_mtl_internal_shaders.py
3//
4// Copyright 2020 The ANGLE Project Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the LICENSE file.
7//
8
9// C++ string version of combined Metal default shaders.
10
11
12
13static char gDefaultMetallibSrc[] = R"(
14# 1 "temp_master_source.metal"
15# 1 "<built-in>" 1
16# 1 "<built-in>" 3
17# 386 "<built-in>" 3
18# 1 "<command line>" 1
19# 1 "<built-in>" 2
20# 1 "temp_master_source.metal" 2
21# 1 "./blit.metal" 1
22
23
24
25
26
27
28
29# 1 "./common.h" 1
30# 13 "./common.h"
31# include <simd/simd.h>
32# include <metal_stdlib>
33
34
35
36# 1 "./constants.h" 1
37# 11 "./constants.h"
38namespace rx
39{
40namespace mtl_shader
41{
42
43enum
44{
45    kTextureType2D = 0,
46    kTextureType2DMultisample = 1,
47    kTextureType2DArray = 2,
48    kTextureTypeCube = 3,
49    kTextureType3D = 4,
50    kTextureTypeCount = 5,
51};
52
53
54
55
56}
57}
58# 18 "./common.h" 2
59
60
61
62
63
64
65
66using namespace metal;
67
68
69
70constant uint32_t kNumColorOutputs [[function_constant(0)]];
71constant bool kColorOutputAvailable0 = kNumColorOutputs > 0;
72constant bool kColorOutputAvailable1 = kNumColorOutputs > 1;
73constant bool kColorOutputAvailable2 = kNumColorOutputs > 2;
74constant bool kColorOutputAvailable3 = kNumColorOutputs > 3;
75
76namespace rx
77{
78namespace mtl_shader
79{
80
81
82constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)};
83
84template <typename T>
85struct MultipleColorOutputs
86{
87    vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]];
88    vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]];
89    vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]];
90    vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]];
91};
92# 61 "./common.h"
93template <typename T>
94static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color)
95{
96    MultipleColorOutputs<T> re;
97
98    do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0);
99    do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0);
100    do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0);
101    do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0);
102
103    return re;
104}
105
106static inline float3 cubeTexcoords(float2 texcoords, int face)
107{
108    texcoords = 2.0 * texcoords - 1.0;
109    switch (face)
110    {
111        case 0:
112            return float3(1.0, -texcoords.y, -texcoords.x);
113        case 1:
114            return float3(-1.0, -texcoords.y, texcoords.x);
115        case 2:
116            return float3(texcoords.x, 1.0, texcoords.y);
117        case 3:
118            return float3(texcoords.x, -1.0, -texcoords.y);
119        case 4:
120            return float3(texcoords.x, -texcoords.y, 1.0);
121        case 5:
122            return float3(-texcoords.x, -texcoords.y, -1.0);
123    }
124    return float3(texcoords, 0);
125}
126
127template <typename T>
128static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords)
129{
130    uint samples = srcTexture.get_num_samples();
131
132    vec<T, 4> output(0);
133
134    for (uint sample = 0; sample < samples; ++sample)
135    {
136        output += srcTexture.read(coords, sample);
137    }
138
139    output = output / samples;
140
141    return output;
142}
143
144static inline float4 sRGBtoLinear(float4 color)
145{
146    float3 linear1 = color.rgb / 12.92;
147    float3 linear2 = pow((color.rgb + float3(0.055)) / 1.055, 2.4);
148    float3 factor = float3(color.rgb <= float3(0.04045));
149    float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a);
150
151    return linear;
152}
153
154static inline float linearToSRGB(float color)
155{
156    if (color <= 0.0f)
157        return 0.0f;
158    else if (color < 0.0031308f)
159        return 12.92f * color;
160    else if (color < 1.0f)
161        return 1.055f * pow(color, 0.41666f) - 0.055f;
162    else
163        return 1.0f;
164}
165
166static inline float4 linearToSRGB(float4 color)
167{
168    return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a);
169}
170
171template <typename Short>
172static inline Short bytesToShort(constant uchar *input, uint offset)
173{
174    Short inputLo = input[offset];
175    Short inputHi = input[offset + 1];
176
177    return inputLo | (inputHi << 8);
178}
179
180template <typename Int>
181static inline Int bytesToInt(constant uchar *input, uint offset)
182{
183    Int input0 = input[offset];
184    Int input1 = input[offset + 1];
185    Int input2 = input[offset + 2];
186    Int input3 = input[offset + 3];
187
188    return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
189}
190
191template <typename Short>
192static inline void shortToBytes(Short val, uint offset, device uchar *output)
193{
194    ushort valUnsigned = as_type<ushort>(val);
195    output[offset] = valUnsigned & 0xff;
196    output[offset + 1] = (valUnsigned >> 8) & 0xff;
197}
198
199template <typename Int>
200static inline void intToBytes(Int val, uint offset, device uchar *output)
201{
202    uint valUnsigned = as_type<uint>(val);
203    output[offset] = valUnsigned & 0xff;
204    output[offset + 1] = (valUnsigned >> 8) & 0xff;
205    output[offset + 2] = (valUnsigned >> 16) & 0xff;
206    output[offset + 3] = (valUnsigned >> 24) & 0xff;
207}
208
209static inline void floatToBytes(float val, uint offset, device uchar *output)
210{
211    intToBytes(as_type<uint>(val), offset, output);
212}
213
214static inline void int24bitToBytes(uint val, uint offset, device uchar *output)
215{
216    output[offset] = val & 0xff;
217    output[offset + 1] = (val >> 8) & 0xff;
218    output[offset + 2] = (val >> 16) & 0xff;
219}
220
221template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
222static inline T getShiftedData(T input)
223{
224    static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
225                  "T must have at least as many bits as inputBitCount + inputBitStart.");
226    const T mask = (1 << inputBitCount) - 1;
227    return (input >> inputBitStart) & mask;
228}
229
230template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
231static inline T shiftData(T input)
232{
233    static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
234                  "T must have at least as many bits as inputBitCount + inputBitStart.");
235    const T mask = (1 << inputBitCount) - 1;
236    return (input & mask) << inputBitStart;
237}
238
239template <unsigned int inputBitCount, typename T>
240static inline float normalizedToFloat(T input)
241{
242    static_assert(inputBitCount <= (sizeof(T) * 8),
243                  "T must have more bits than or same bits as inputBitCount.");
244    static_assert(inputBitCount <= 23, "Only single precision is supported");
245
246    constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
247    return input * inverseMax;
248}
249
250template <typename T>
251static inline float normalizedToFloat(T input)
252{
253    return normalizedToFloat<sizeof(T) * 8, T>(input);
254}
255
256template <>
257inline float normalizedToFloat(short input)
258{
259    constexpr float inverseMax = 1.0f / 0x7fff;
260    return static_cast<float>(input) * inverseMax;
261}
262
263template <>
264inline float normalizedToFloat(int input)
265{
266    constexpr float inverseMax = 1.0f / 0x7fffffff;
267    return static_cast<float>(input) * inverseMax;
268}
269
270template <>
271inline float normalizedToFloat(uint input)
272{
273    constexpr float inverseMax = 1.0f / 0xffffffff;
274    return static_cast<float>(input) * inverseMax;
275}
276
277template <unsigned int outputBitCount, typename T>
278static inline T floatToNormalized(float input)
279{
280    static_assert(outputBitCount <= (sizeof(T) * 8),
281                  "T must have more bits than or same bits as inputBitCount.");
282    static_assert(outputBitCount <= 23, "Only single precision is supported");
283
284    return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f);
285}
286
287template <typename T>
288static inline T floatToNormalized(float input)
289{
290    return floatToNormalized<sizeof(T) * 8, T>(input);
291}
292
293}
294}
295# 9 "./blit.metal" 2
296
297using namespace rx::mtl_shader;
298
299
300constant bool kPremultiplyAlpha [[function_constant(1)]];
301constant bool kUnmultiplyAlpha [[function_constant(2)]];
302constant int kSourceTextureType [[function_constant(3)]];
303constant int kSourceTexture2Type [[function_constant(4)]];
304
305constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D;
306constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray;
307constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample;
308constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube;
309constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D;
310
311constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D;
312constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray;
313constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample;
314constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube;
315
316struct BlitParams
317{
318
319    float2 srcTexCoords[3];
320    int srcLevel;
321    int srcLayer;
322    bool dstFlipViewportX;
323    bool dstFlipViewportY;
324    bool dstLuminance;
325    uint8_t padding[13];
326};
327
328struct BlitVSOut
329{
330    float4 position [[position]];
331    float2 texCoords [[user(locn1)]];
332};
333
334vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]])
335{
336    BlitVSOut output;
337    output.position = float4(gCorners[vid], 0.0, 1.0);
338    output.texCoords = options.srcTexCoords[vid];
339
340    if (options.dstFlipViewportX)
341    {
342        output.position.x = -output.position.x;
343    }
344    if (!options.dstFlipViewportY)
345    {
346
347
348        output.position.y = -output.position.y;
349    }
350
351    return output;
352}
353
354template <typename SrcTexture2d>
355static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords)
356{
357    uint2 dimens(srcTexture.get_width(), srcTexture.get_height());
358    uint2 coords = uint2(texCoords * float2(dimens));
359
360    return coords;
361}
362
363template <typename T>
364static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords)
365{
366    uint2 coords = getImageCoords(srcTexture, texCoords);
367    return resolveTextureMS(srcTexture, coords);
368}
369
370template <typename T>
371static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture,
372                                            sampler textureSampler,
373                                            float2 texCoords,
374                                            constant BlitParams &options)
375{
376    uint depth = srcTexture.get_depth(options.srcLevel);
377    float zCoord = (float(options.srcLayer) + 0.5) / float(depth);
378
379    return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel));
380}
381# 112 "./blit.metal"
382template <typename T>
383static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
384{
385    vec<T, 4> output;
386
387    switch (kSourceTextureType)
388    {
389        case kTextureType2D:
390            output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel));
391            break;
392        case kTextureType2DArray:
393            output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer,
394                                              level(options.srcLevel));
395            break;
396        case kTextureType2DMultisample:
397            output = blitSampleTextureMS(srcTexture2dMS, input.texCoords);
398            break;
399        case kTextureTypeCube:
400            output = srcTextureCube.sample(textureSampler,
401                                           cubeTexcoords(input.texCoords, options.srcLayer),
402                                           level(options.srcLevel));
403            break;
404        case kTextureType3D:
405            output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options);
406            break;
407    }
408
409    if (kPremultiplyAlpha)
410    {
411        output.xyz *= output.a;
412    }
413    else if (kUnmultiplyAlpha)
414    {
415        if (output.a != 0.0)
416        {
417            output.xyz /= output.a;
418        }
419    }
420
421    if (options.dstLuminance)
422    {
423        output.g = output.b = output.r;
424    }
425
426    return output;
427}
428
429template <typename T>
430static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
431{
432    vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
433
434    return toMultipleColorOutputs(output);
435}
436
437fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
438{
439    return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
440}
441fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
442{
443    return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
444}
445fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
446{
447    return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
448}
449
450fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
451{
452    float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
453    uint4 output = uint4(inputColor * float4(255.0));
454
455    return toMultipleColorOutputs(output);
456}
457
458
459struct FragmentDepthOut
460{
461    float depth [[depth(any)]];
462};
463
464static inline float sampleDepth(
465    texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]],
466    texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]],
467    texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]],
468    texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]],
469    float2 texCoords,
470    constant BlitParams &options)
471{
472    float4 output;
473
474    constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
475
476    switch (kSourceTextureType)
477    {
478        case kTextureType2D:
479            output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel));
480            break;
481        case kTextureType2DArray:
482            output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer,
483                                              level(options.srcLevel));
484            break;
485        case kTextureType2DMultisample:
486
487            output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
488            break;
489        case kTextureTypeCube:
490            output =
491                srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer),
492                                      level(options.srcLevel));
493            break;
494    }
495
496    return output.r;
497}
498
499fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]],
500                                      texture2d<float> srcTexture2d
501                                      [[texture(0), function_constant(kSourceTextureType2D)]],
502                                      texture2d_array<float> srcTexture2dArray
503                                      [[texture(0), function_constant(kSourceTextureType2DArray)]],
504                                      texture2d_ms<float> srcTexture2dMS
505                                      [[texture(0), function_constant(kSourceTextureType2DMS)]],
506                                      texturecube<float> srcTextureCube
507                                      [[texture(0), function_constant(kSourceTextureTypeCube)]],
508                                      constant BlitParams &options [[buffer(0)]])
509{
510    FragmentDepthOut re;
511
512    re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
513                           input.texCoords, options);
514
515    return re;
516}
517
518static inline uint32_t sampleStencil(
519    texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]],
520    texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]],
521    texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]],
522    texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]],
523    float2 texCoords,
524    int srcLevel,
525    int srcLayer)
526{
527    uint4 output;
528    constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
529
530    switch (kSourceTexture2Type)
531    {
532        case kTextureType2D:
533            output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel));
534            break;
535        case kTextureType2DArray:
536            output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel));
537            break;
538        case kTextureType2DMultisample:
539
540            output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
541            break;
542        case kTextureTypeCube:
543            output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer),
544                                           level(srcLevel));
545            break;
546    }
547
548    return output.r;
549}
550
551
552struct BlitStencilToBufferParams
553{
554    float2 srcStartTexCoords;
555    float2 srcTexCoordSteps;
556    int srcLevel;
557    int srcLayer;
558
559    uint2 dstSize;
560    uint dstBufferRowPitch;
561
562    bool resolveMS;
563};
564
565kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]],
566                                  texture2d<uint32_t> srcTexture2d
567                                  [[texture(1), function_constant(kSourceTexture2Type2D)]],
568                                  texture2d_array<uint32_t> srcTexture2dArray
569                                  [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
570                                  texture2d_ms<uint32_t> srcTexture2dMS
571                                  [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
572                                  texturecube<uint32_t> srcTextureCube
573                                  [[texture(1), function_constant(kSourceTexture2TypeCube)]],
574                                  constant BlitStencilToBufferParams &options [[buffer(0)]],
575                                  device uchar *buffer [[buffer(1)]])
576{
577    if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y)
578    {
579        return;
580    }
581
582    float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps;
583
584    if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS)
585    {
586        uint samples = srcTexture2dMS.get_num_samples();
587        uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords);
588        uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x;
589
590        for (uint sample = 0; sample < samples; ++sample)
591        {
592            uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r;
593            buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample);
594        }
595    }
596    else
597    {
598        uint32_t stencil =
599            sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
600                          srcTexCoords, options.srcLevel, options.srcLayer);
601
602        buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil);
603    }
604}
605
606
607#if __METAL_VERSION__ >= 210
608
609struct FragmentStencilOut
610{
611    uint32_t stencil [[stencil]];
612};
613
614struct FragmentDepthStencilOut
615{
616    float depth [[depth(any)]];
617    uint32_t stencil [[stencil]];
618};
619
620fragment FragmentStencilOut blitStencilFS(
621    BlitVSOut input [[stage_in]],
622    texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]],
623    texture2d_array<uint32_t> srcTexture2dArray
624    [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
625    texture2d_ms<uint32_t> srcTexture2dMS
626    [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
627    texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]],
628    constant BlitParams &options [[buffer(0)]])
629{
630    FragmentStencilOut re;
631
632    re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
633                               input.texCoords, options.srcLevel, options.srcLayer);
634
635    return re;
636}
637
638fragment FragmentDepthStencilOut blitDepthStencilFS(
639    BlitVSOut input [[stage_in]],
640
641    texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]],
642    texture2d_array<float> srcDepthTexture2dArray
643    [[texture(0), function_constant(kSourceTextureType2DArray)]],
644    texture2d_ms<float> srcDepthTexture2dMS
645    [[texture(0), function_constant(kSourceTextureType2DMS)]],
646    texturecube<float> srcDepthTextureCube
647    [[texture(0), function_constant(kSourceTextureTypeCube)]],
648
649
650    texture2d<uint32_t> srcStencilTexture2d
651    [[texture(1), function_constant(kSourceTexture2Type2D)]],
652    texture2d_array<uint32_t> srcStencilTexture2dArray
653    [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
654    texture2d_ms<uint32_t> srcStencilTexture2dMS
655    [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
656    texturecube<uint32_t> srcStencilTextureCube
657    [[texture(1), function_constant(kSourceTexture2TypeCube)]],
658
659    constant BlitParams &options [[buffer(0)]])
660{
661    FragmentDepthStencilOut re;
662
663    re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS,
664                           srcDepthTextureCube, input.texCoords, options);
665    re.stencil =
666        sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS,
667                      srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer);
668    return re;
669}
670#endif
671# 2 "temp_master_source.metal" 2
672# 1 "./clear.metal" 1
673# 10 "./clear.metal"
674using namespace rx::mtl_shader;
675
676struct ClearParams
677{
678    float4 clearColor;
679    float clearDepth;
680};
681
682vertex float4 clearVS(unsigned int vid [[ vertex_id ]],
683                      constant ClearParams &clearParams [[buffer(0)]])
684{
685    return float4(gCorners[vid], clearParams.clearDepth, 1.0);
686}
687
688fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]])
689{
690    return toMultipleColorOutputs(clearParams.clearColor);
691}
692
693fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]])
694{
695    return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor));
696}
697
698fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]])
699{
700    return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor));
701}
702# 3 "temp_master_source.metal" 2
703# 1 "./gen_indices.metal" 1
704
705
706
707
708
709
710
711
712using namespace rx::mtl_shader;
713
714
715constant bool kSourceBufferAligned[[function_constant(100)]];
716constant bool kSourceIndexIsU8[[function_constant(200)]];
717constant bool kSourceIndexIsU16[[function_constant(300)]];
718constant bool kSourceIndexIsU32[[function_constant(400)]];
719constant bool kSourceBufferUnaligned = !kSourceBufferAligned;
720constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned;
721constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned;
722constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned;
723
724struct IndexConversionParams
725{
726    uint32_t srcOffset;
727    uint32_t indexCount;
728    bool primitiveRestartEnabled;
729};
730
731
732
733inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx)
734{
735    return inputAligned[offset / 2 + idx];
736}
737inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx)
738{
739    return inputAligned[offset / 4 + idx];
740}
741inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx)
742{
743    return input[offset + idx];
744}
745inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx)
746{
747    ushort inputLo = input[offset + 2 * idx];
748    ushort inputHi = input[offset + 2 * idx + 1];
749
750    return inputLo | (inputHi << 8);
751}
752inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx)
753{
754    uint input0 = input[offset + 4 * idx];
755    uint input1 = input[offset + 4 * idx + 1];
756    uint input2 = input[offset + 4 * idx + 2];
757    uint input3 = input[offset + 4 * idx + 3];
758
759    return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
760}
761
762kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]],
763                                constant IndexConversionParams &options [[buffer(0)]],
764                                constant uchar *input [[buffer(1)]],
765                                device ushort *output [[buffer(2)]])
766{
767    if (idx >= options.indexCount) { return; };
768
769    uchar value = getIndexAligned(input, options.srcOffset, idx);
770
771    if (options.primitiveRestartEnabled && value == 0xff)
772    {
773        output[idx] = 0xffff;
774    }
775    else
776    {
777        output[idx] = value;
778    }
779}
780
781kernel void convertIndexU16(uint idx [[thread_position_in_grid]],
782                            constant IndexConversionParams &options [[buffer(0)]],
783                            constant uchar *input
784                            [[buffer(1), function_constant(kSourceBufferUnaligned)]],
785                            constant ushort *inputAligned
786                            [[buffer(1), function_constant(kSourceBufferAligned)]],
787                            device ushort *output [[buffer(2)]])
788{
789    if (idx >= options.indexCount) { return; };
790
791    ushort value;
792    if (kSourceBufferAligned)
793    {
794        value = getIndexAligned(inputAligned, options.srcOffset, idx);
795    }
796    else
797    {
798        value = getIndexUnalignedU16(input, options.srcOffset, idx);
799    }
800    output[idx] = value;
801}
802
803kernel void convertIndexU32(uint idx [[thread_position_in_grid]],
804                            constant IndexConversionParams &options [[buffer(0)]],
805                            constant uchar *input
806                            [[buffer(1), function_constant(kSourceBufferUnaligned)]],
807                            constant uint *inputAligned
808                            [[buffer(1), function_constant(kSourceBufferAligned)]],
809                            device uint *output [[buffer(2)]])
810{
811    if (idx >= options.indexCount) { return; };
812
813    uint value;
814    if (kSourceBufferAligned)
815    {
816        value = getIndexAligned(inputAligned, options.srcOffset, idx);
817    }
818    else
819    {
820        value = getIndexUnalignedU32(input, options.srcOffset, idx);
821    }
822    output[idx] = value;
823}
824
825struct IndexFromArrayParams
826{
827    uint firstVertex;
828
829    uint vertexCount;
830};
831
832
833kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]],
834                                      constant IndexFromArrayParams &options [[buffer(0)]],
835                                      device uint *output [[buffer(2)]])
836{
837    if (idx >= options.vertexCount) { return; };
838
839    uint vertexIdx = options.firstVertex + 2 + idx;
840
841    output[3 * idx] = options.firstVertex;
842    output[3 * idx + 1] = vertexIdx - 1;
843    output[3 * idx + 2] = vertexIdx;
844}
845
846inline uint getIndexU32(uint offset,
847                        uint idx,
848                        constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]],
849                        constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]],
850                        constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]])
851{
852    if (kUseSourceBufferU8)
853    {
854        if (kSourceIndexIsU16)
855        {
856            return getIndexUnalignedU16(inputU8, offset, idx);
857        }
858        else if (kSourceIndexIsU32)
859        {
860            return getIndexUnalignedU32(inputU8, offset, idx);
861        }
862        return getIndexAligned(inputU8, offset, idx);
863    }
864    else if (kUseSourceBufferU16)
865    {
866        return getIndexAligned(inputU16, offset, idx);
867    }
868    else if (kUseSourceBufferU32)
869    {
870        return getIndexAligned(inputU32, offset, idx);
871    }
872    return 0;
873}
874
875
876
877
878kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]],
879                                         constant IndexConversionParams &options [[buffer(0)]],
880                                         constant uchar *inputU8
881                                         [[buffer(1), function_constant(kUseSourceBufferU8)]],
882                                         constant ushort *inputU16
883                                         [[buffer(1), function_constant(kUseSourceBufferU16)]],
884                                         constant uint *inputU32
885                                         [[buffer(1), function_constant(kUseSourceBufferU32)]],
886                                         device uint *output [[buffer(2)]])
887{
888    if (idx >= options.indexCount) { return; };
889
890    uint elemIdx = 2 + idx;
891
892    output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32);
893    output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32);
894    output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32);
895}
896
897
898kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]],
899                                        constant IndexFromArrayParams &options [[buffer(0)]],
900                                        device uint *output [[buffer(2)]])
901{
902    uint totalIndices = options.vertexCount + 1;
903    if (idx >= totalIndices) { return; };
904
905    output[idx] = options.firstVertex + idx % options.vertexCount;
906}
907
908
909
910kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]],
911                                           constant IndexConversionParams &options [[buffer(0)]],
912                                           constant uchar *inputU8
913                                           [[buffer(1), function_constant(kUseSourceBufferU8)]],
914                                           constant ushort *inputU16
915                                           [[buffer(1), function_constant(kUseSourceBufferU16)]],
916                                           constant uint *inputU32
917                                           [[buffer(1), function_constant(kUseSourceBufferU32)]],
918                                           device uint *output [[buffer(2)]])
919{
920    uint totalTargetIndices = options.indexCount + 1;
921    if (idx >= totalTargetIndices) { return; };
922
923    output[idx] =
924        getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32);
925}
926# 4 "temp_master_source.metal" 2
927# 1 "./gen_mipmap.metal" 1
928
929
930
931
932
933
934
935
936using namespace rx::mtl_shader;
937# 31 "./gen_mipmap.metal"
938struct GenMipParams
939{
940    uint srcLevel;
941    uint numMipLevelsToGen;
942    bool sRGB;
943};
944
945
946
947kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
948                              ushort3 gIndices [[thread_position_in_grid]],
949                              texture3d<float> srcTexture [[texture(0)]],
950                              texture3d<float, access::write> dstMip1 [[texture(1)]],
951                              texture3d<float, access::write> dstMip2 [[texture(2)]],
952                              texture3d<float, access::write> dstMip3 [[texture(3)]],
953                              texture3d<float, access::write> dstMip4 [[texture(4)]],
954                              constant GenMipParams &options [[buffer(0)]])
955{
956    ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth());
957    bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z;
958
959    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
960
961
962
963
964    threadgroup float sR[(8 * 8 * 8)];
965    threadgroup float sG[(8 * 8 * 8)];
966    threadgroup float sB[(8 * 8 * 8)];
967    threadgroup float sA[(8 * 8 * 8)];
968
969
970    float4 texel1;
971    if (validThread)
972    {
973        float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize);
974        texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
975
976
977        dstMip1.write(texel1, gIndices);
978    }
979    else
980    {
981
982        lIndex = 0xffffffff;
983    }
984
985    if (options.numMipLevelsToGen == 1)
986    {
987        return;
988    }
989
990
991
992
993    if (options.sRGB)
994    {
995        texel1 = linearToSRGB(texel1);
996    }
997    sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
998
999    threadgroup_barrier(mem_flags::mem_threadgroup);
1000
1001
1002    if ((lIndex & 0x49) == 0)
1003    {
1004        bool3 atEdge = gIndices == (mipSize - ushort3(1));
1005
1006
1007
1008        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1009
1010        float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1011
1012        float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)]));
1013
1014        float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1015
1016
1017        float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)]));
1018
1019
1020        float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)]));
1021
1022
1023        float4 texel8 =
1024            (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)]));
1025
1026
1027        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1028
1029        dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
1030
1031
1032        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1033    }
1034
1035    if (options.numMipLevelsToGen == 2)
1036    {
1037        return;
1038    }
1039
1040
1041    threadgroup_barrier(mem_flags::mem_threadgroup);
1042
1043
1044    if ((lIndex & 0xdb) == 0)
1045    {
1046        mipSize = max(mipSize >> 1, ushort3(1));
1047        bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1));
1048
1049
1050
1051        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1052
1053        float4 texel3 =
1054            (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)]));
1055
1056        float4 texel4 =
1057            (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))]));
1058
1059        float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1060
1061
1062        float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)]));
1063
1064
1065        float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)]));
1066
1067
1068
1069        float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)]));
1070
1071
1072
1073        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1074
1075        dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
1076
1077
1078        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1079    }
1080
1081    if (options.numMipLevelsToGen == 3)
1082    {
1083        return;
1084    }
1085
1086
1087    threadgroup_barrier(mem_flags::mem_threadgroup);
1088
1089
1090    if ((lIndex & 0x1ff) == 0)
1091    {
1092        mipSize = max(mipSize >> 1, ushort3(1));
1093        bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1));
1094
1095
1096
1097        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1098
1099        float4 texel3 =
1100            (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)]));
1101
1102        float4 texel4 =
1103            (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))]));
1104
1105        float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1106
1107
1108        float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)]));
1109
1110
1111        float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)]));
1112
1113
1114
1115        float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)]));
1116
1117
1118
1119        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1120
1121        dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
1122    }
1123}
1124
1125kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1126                              ushort2 gIndices [[thread_position_in_grid]],
1127                              texture2d<float> srcTexture [[texture(0)]],
1128                              texture2d<float, access::write> dstMip1 [[texture(1)]],
1129                              texture2d<float, access::write> dstMip2 [[texture(2)]],
1130                              texture2d<float, access::write> dstMip3 [[texture(3)]],
1131                              texture2d<float, access::write> dstMip4 [[texture(4)]],
1132                              constant GenMipParams &options [[buffer(0)]])
1133{
1134    uint firstMipLevel = options.srcLevel + 1;
1135    ushort2 mipSize =
1136        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1137    bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y;
1138
1139    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1140
1141
1142
1143
1144    threadgroup float sR[(8 * 8)];
1145    threadgroup float sG[(8 * 8)];
1146    threadgroup float sB[(8 * 8)];
1147    threadgroup float sA[(8 * 8)];
1148
1149
1150    float4 texel1;
1151    if (validThread)
1152    {
1153        float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize);
1154        texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
1155
1156
1157        dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices);
1158    }
1159    else
1160    {
1161
1162        lIndex = 0xffffffff;
1163    }
1164
1165    if (options.numMipLevelsToGen == 1)
1166    {
1167        return;
1168    }
1169
1170
1171
1172
1173    sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1174
1175    threadgroup_barrier(mem_flags::mem_threadgroup);
1176
1177
1178    if ((lIndex & 0x09) == 0)
1179    {
1180        bool2 atEdge = gIndices == (mipSize - ushort2(1));
1181
1182
1183
1184        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1185
1186        float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1187
1188        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1189
1190
1191        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1192
1193        dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
1194
1195
1196        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1197    }
1198
1199    if (options.numMipLevelsToGen == 2)
1200    {
1201        return;
1202    }
1203
1204
1205    threadgroup_barrier(mem_flags::mem_threadgroup);
1206
1207
1208    if ((lIndex & 0x1b) == 0)
1209    {
1210        mipSize = max(mipSize >> 1, ushort2(1));
1211        bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1));
1212
1213
1214
1215        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1216
1217        float4 texel3 =
1218            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
1219
1220        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1221
1222
1223        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1224
1225        dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
1226
1227
1228        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1229    }
1230
1231    if (options.numMipLevelsToGen == 3)
1232    {
1233        return;
1234    }
1235
1236
1237    threadgroup_barrier(mem_flags::mem_threadgroup);
1238
1239
1240    if ((lIndex & 0x3f) == 0)
1241    {
1242        mipSize = max(mipSize >> 1, ushort2(1));
1243        bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1));
1244
1245
1246
1247        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1248
1249        float4 texel3 =
1250            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
1251
1252        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1253
1254
1255        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1256
1257        dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
1258    }
1259}
1260
1261template <typename TextureTypeR, typename TextureTypeW>
1262static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps(
1263    uint lIndex,
1264    ushort3 gIndices,
1265    TextureTypeR srcTexture,
1266    TextureTypeW dstMip2,
1267    TextureTypeW dstMip3,
1268    TextureTypeW dstMip4,
1269    ushort2 mip1Size,
1270    float4 mip1Texel,
1271    threadgroup float *sR,
1272    threadgroup float *sG,
1273    threadgroup float *sB,
1274    threadgroup float *sA,
1275    constant GenMipParams &options)
1276{
1277    ushort2 mipSize = mip1Size;
1278    float4 texel1 = mip1Texel;
1279
1280
1281
1282
1283    sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1284
1285    threadgroup_barrier(mem_flags::mem_threadgroup);
1286
1287
1288    if ((lIndex & 0x09) == 0)
1289    {
1290        bool2 atEdge = gIndices.xy == (mipSize - ushort2(1));
1291
1292
1293
1294        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1295
1296        float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1297
1298        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1299
1300
1301        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1302
1303        dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z);
1304
1305
1306        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1307    }
1308
1309    if (options.numMipLevelsToGen == 2)
1310    {
1311        return;
1312    }
1313
1314
1315    threadgroup_barrier(mem_flags::mem_threadgroup);
1316
1317
1318    if ((lIndex & 0x1b) == 0)
1319    {
1320        mipSize = max(mipSize >> 1, ushort2(1));
1321        bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1));
1322
1323
1324
1325        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1326
1327        float4 texel3 =
1328            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
1329
1330        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1331
1332
1333        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1334
1335        dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z);
1336
1337
1338        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1339    }
1340
1341    if (options.numMipLevelsToGen == 3)
1342    {
1343        return;
1344    }
1345
1346
1347    threadgroup_barrier(mem_flags::mem_threadgroup);
1348
1349
1350    if ((lIndex & 0x3f) == 0)
1351    {
1352        mipSize = max(mipSize >> 1, ushort2(1));
1353        bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1));
1354
1355
1356
1357        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1358
1359        float4 texel3 =
1360            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
1361
1362        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1363
1364
1365        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1366
1367        dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z);
1368    }
1369}
1370
1371kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1372                                ushort3 gIndices [[thread_position_in_grid]],
1373                                texturecube<float> srcTexture [[texture(0)]],
1374                                texturecube<float, access::write> dstMip1 [[texture(1)]],
1375                                texturecube<float, access::write> dstMip2 [[texture(2)]],
1376                                texturecube<float, access::write> dstMip3 [[texture(3)]],
1377                                texturecube<float, access::write> dstMip4 [[texture(4)]],
1378                                constant GenMipParams &options [[buffer(0)]])
1379{
1380    uint firstMipLevel = options.srcLevel + 1;
1381    ushort2 mip1Size =
1382        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1383    bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
1384
1385    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1386
1387
1388    float4 mip1Texel;
1389    if (validThread)
1390    {
1391        float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
1392        mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)),
1393                                      level(options.srcLevel));
1394
1395
1396        dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
1397    }
1398    else
1399    {
1400
1401        lIndex = 0xffffffff;
1402    }
1403
1404    if (options.numMipLevelsToGen == 1)
1405    {
1406        return;
1407    }
1408
1409
1410    threadgroup float sR[(8 * 8)];
1411    threadgroup float sG[(8 * 8)];
1412    threadgroup float sB[(8 * 8)];
1413    threadgroup float sA[(8 * 8)];
1414
1415    generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
1416                                           mip1Size, mip1Texel, sR, sG, sB, sA, options);
1417}
1418
1419kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1420                                   ushort3 gIndices [[thread_position_in_grid]],
1421                                   texture2d_array<float> srcTexture [[texture(0)]],
1422                                   texture2d_array<float, access::write> dstMip1 [[texture(1)]],
1423                                   texture2d_array<float, access::write> dstMip2 [[texture(2)]],
1424                                   texture2d_array<float, access::write> dstMip3 [[texture(3)]],
1425                                   texture2d_array<float, access::write> dstMip4 [[texture(4)]],
1426                                   constant GenMipParams &options [[buffer(0)]])
1427{
1428    uint firstMipLevel = options.srcLevel + 1;
1429    ushort2 mip1Size =
1430        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1431    bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
1432
1433    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1434
1435
1436    float4 mip1Texel;
1437    if (validThread)
1438    {
1439        float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
1440        mip1Texel =
1441            srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel));
1442
1443
1444        dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
1445    }
1446    else
1447    {
1448
1449        lIndex = 0xffffffff;
1450    }
1451
1452    if (options.numMipLevelsToGen == 1)
1453    {
1454        return;
1455    }
1456
1457
1458    threadgroup float sR[(8 * 8)];
1459    threadgroup float sG[(8 * 8)];
1460    threadgroup float sB[(8 * 8)];
1461    threadgroup float sA[(8 * 8)];
1462
1463    generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
1464                                           mip1Size, mip1Texel, sR, sG, sB, sA, options);
1465}
1466# 5 "temp_master_source.metal" 2
1467# 1 "./copy_buffer.metal" 1
1468# 12 "./copy_buffer.metal"
1469#include <metal_pack>
1470
1471
1472
1473# 1 "./format_autogen.h" 1
1474# 11 "./format_autogen.h"
1475namespace rx
1476{
1477namespace mtl_shader
1478{
1479
1480namespace FormatID
1481{
1482enum
1483{
1484    NONE,
1485    D16_UNORM,
1486    D24_UNORM_S8_UINT,
1487    D24_UNORM_X8_UINT,
1488    D32_FLOAT,
1489    D32_FLOAT_S8X24_UINT,
1490    D32_UNORM,
1491    S8_UINT,
1492    A16_FLOAT,
1493    A1R5G5B5_UNORM,
1494    A2R10G10B10_SINT_VERTEX,
1495    A2R10G10B10_SNORM_VERTEX,
1496    A2R10G10B10_SSCALED_VERTEX,
1497    A2R10G10B10_UINT_VERTEX,
1498    A2R10G10B10_UNORM_VERTEX,
1499    A2R10G10B10_USCALED_VERTEX,
1500    A32_FLOAT,
1501    A8_UNORM,
1502    ASTC_10x10_SRGB_BLOCK,
1503    ASTC_10x10_UNORM_BLOCK,
1504    ASTC_10x5_SRGB_BLOCK,
1505    ASTC_10x5_UNORM_BLOCK,
1506    ASTC_10x6_SRGB_BLOCK,
1507    ASTC_10x6_UNORM_BLOCK,
1508    ASTC_10x8_SRGB_BLOCK,
1509    ASTC_10x8_UNORM_BLOCK,
1510    ASTC_12x10_SRGB_BLOCK,
1511    ASTC_12x10_UNORM_BLOCK,
1512    ASTC_12x12_SRGB_BLOCK,
1513    ASTC_12x12_UNORM_BLOCK,
1514    ASTC_3x3x3_UNORM_BLOCK,
1515    ASTC_3x3x3_UNORM_SRGB_BLOCK,
1516    ASTC_4x3x3_UNORM_BLOCK,
1517    ASTC_4x3x3_UNORM_SRGB_BLOCK,
1518    ASTC_4x4_SRGB_BLOCK,
1519    ASTC_4x4_UNORM_BLOCK,
1520    ASTC_4x4x3_UNORM_BLOCK,
1521    ASTC_4x4x3_UNORM_SRGB_BLOCK,
1522    ASTC_4x4x4_UNORM_BLOCK,
1523    ASTC_4x4x4_UNORM_SRGB_BLOCK,
1524    ASTC_5x4_SRGB_BLOCK,
1525    ASTC_5x4_UNORM_BLOCK,
1526    ASTC_5x4x4_UNORM_BLOCK,
1527    ASTC_5x4x4_UNORM_SRGB_BLOCK,
1528    ASTC_5x5_SRGB_BLOCK,
1529    ASTC_5x5_UNORM_BLOCK,
1530    ASTC_5x5x4_UNORM_BLOCK,
1531    ASTC_5x5x4_UNORM_SRGB_BLOCK,
1532    ASTC_5x5x5_UNORM_BLOCK,
1533    ASTC_5x5x5_UNORM_SRGB_BLOCK,
1534    ASTC_6x5_SRGB_BLOCK,
1535    ASTC_6x5_UNORM_BLOCK,
1536    ASTC_6x5x5_UNORM_BLOCK,
1537    ASTC_6x5x5_UNORM_SRGB_BLOCK,
1538    ASTC_6x6_SRGB_BLOCK,
1539    ASTC_6x6_UNORM_BLOCK,
1540    ASTC_6x6x5_UNORM_BLOCK,
1541    ASTC_6x6x5_UNORM_SRGB_BLOCK,
1542    ASTC_6x6x6_UNORM_BLOCK,
1543    ASTC_6x6x6_UNORM_SRGB_BLOCK,
1544    ASTC_8x5_SRGB_BLOCK,
1545    ASTC_8x5_UNORM_BLOCK,
1546    ASTC_8x6_SRGB_BLOCK,
1547    ASTC_8x6_UNORM_BLOCK,
1548    ASTC_8x8_SRGB_BLOCK,
1549    ASTC_8x8_UNORM_BLOCK,
1550    B10G10R10A2_UNORM,
1551    B4G4R4A4_UNORM,
1552    B5G5R5A1_UNORM,
1553    B5G6R5_UNORM,
1554    B8G8R8A8_TYPELESS,
1555    B8G8R8A8_TYPELESS_SRGB,
1556    B8G8R8A8_UNORM,
1557    B8G8R8A8_UNORM_SRGB,
1558    B8G8R8X8_UNORM,
1559    BC1_RGBA_UNORM_BLOCK,
1560    BC1_RGBA_UNORM_SRGB_BLOCK,
1561    BC1_RGB_UNORM_BLOCK,
1562    BC1_RGB_UNORM_SRGB_BLOCK,
1563    BC2_RGBA_UNORM_BLOCK,
1564    BC2_RGBA_UNORM_SRGB_BLOCK,
1565    BC3_RGBA_UNORM_BLOCK,
1566    BC3_RGBA_UNORM_SRGB_BLOCK,
1567    BC4_RED_SNORM_BLOCK,
1568    BC4_RED_UNORM_BLOCK,
1569    BC5_RG_SNORM_BLOCK,
1570    BC5_RG_UNORM_BLOCK,
1571    BC6H_RGB_SFLOAT_BLOCK,
1572    BC6H_RGB_UFLOAT_BLOCK,
1573    BC7_RGBA_UNORM_BLOCK,
1574    BC7_RGBA_UNORM_SRGB_BLOCK,
1575    EAC_R11G11_SNORM_BLOCK,
1576    EAC_R11G11_UNORM_BLOCK,
1577    EAC_R11_SNORM_BLOCK,
1578    EAC_R11_UNORM_BLOCK,
1579    ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK,
1580    ETC1_R8G8B8_UNORM_BLOCK,
1581    ETC2_R8G8B8A1_SRGB_BLOCK,
1582    ETC2_R8G8B8A1_UNORM_BLOCK,
1583    ETC2_R8G8B8A8_SRGB_BLOCK,
1584    ETC2_R8G8B8A8_UNORM_BLOCK,
1585    ETC2_R8G8B8_SRGB_BLOCK,
1586    ETC2_R8G8B8_UNORM_BLOCK,
1587    G8_B8R8_2PLANE_420_UNORM,
1588    G8_B8_R8_3PLANE_420_UNORM,
1589    L16A16_FLOAT,
1590    L16_FLOAT,
1591    L32A32_FLOAT,
1592    L32_FLOAT,
1593    L8A8_UNORM,
1594    L8_UNORM,
1595    PVRTC1_RGBA_2BPP_UNORM_BLOCK,
1596    PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK,
1597    PVRTC1_RGBA_4BPP_UNORM_BLOCK,
1598    PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK,
1599    PVRTC1_RGB_2BPP_UNORM_BLOCK,
1600    PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK,
1601    PVRTC1_RGB_4BPP_UNORM_BLOCK,
1602    PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK,
1603    R10G10B10A2_SINT,
1604    R10G10B10A2_SNORM,
1605    R10G10B10A2_SSCALED,
1606    R10G10B10A2_UINT,
1607    R10G10B10A2_UNORM,
1608    R10G10B10A2_USCALED,
1609    R10G10B10X2_UNORM,
1610    R11G11B10_FLOAT,
1611    R16G16B16A16_FLOAT,
1612    R16G16B16A16_SINT,
1613    R16G16B16A16_SNORM,
1614    R16G16B16A16_SSCALED,
1615    R16G16B16A16_UINT,
1616    R16G16B16A16_UNORM,
1617    R16G16B16A16_USCALED,
1618    R16G16B16_FLOAT,
1619    R16G16B16_SINT,
1620    R16G16B16_SNORM,
1621    R16G16B16_SSCALED,
1622    R16G16B16_UINT,
1623    R16G16B16_UNORM,
1624    R16G16B16_USCALED,
1625    R16G16_FLOAT,
1626    R16G16_SINT,
1627    R16G16_SNORM,
1628    R16G16_SSCALED,
1629    R16G16_UINT,
1630    R16G16_UNORM,
1631    R16G16_USCALED,
1632    R16_FLOAT,
1633    R16_SINT,
1634    R16_SNORM,
1635    R16_SSCALED,
1636    R16_UINT,
1637    R16_UNORM,
1638    R16_USCALED,
1639    R32G32B32A32_FIXED,
1640    R32G32B32A32_FLOAT,
1641    R32G32B32A32_SINT,
1642    R32G32B32A32_SNORM,
1643    R32G32B32A32_SSCALED,
1644    R32G32B32A32_UINT,
1645    R32G32B32A32_UNORM,
1646    R32G32B32A32_USCALED,
1647    R32G32B32_FIXED,
1648    R32G32B32_FLOAT,
1649    R32G32B32_SINT,
1650    R32G32B32_SNORM,
1651    R32G32B32_SSCALED,
1652    R32G32B32_UINT,
1653    R32G32B32_UNORM,
1654    R32G32B32_USCALED,
1655    R32G32_FIXED,
1656    R32G32_FLOAT,
1657    R32G32_SINT,
1658    R32G32_SNORM,
1659    R32G32_SSCALED,
1660    R32G32_UINT,
1661    R32G32_UNORM,
1662    R32G32_USCALED,
1663    R32_FIXED,
1664    R32_FLOAT,
1665    R32_SINT,
1666    R32_SNORM,
1667    R32_SSCALED,
1668    R32_UINT,
1669    R32_UNORM,
1670    R32_USCALED,
1671    R4G4B4A4_UNORM,
1672    R5G5B5A1_UNORM,
1673    R5G6B5_UNORM,
1674    R8G8B8A8_SINT,
1675    R8G8B8A8_SNORM,
1676    R8G8B8A8_SSCALED,
1677    R8G8B8A8_TYPELESS,
1678    R8G8B8A8_TYPELESS_SRGB,
1679    R8G8B8A8_UINT,
1680    R8G8B8A8_UNORM,
1681    R8G8B8A8_UNORM_SRGB,
1682    R8G8B8A8_USCALED,
1683    R8G8B8_SINT,
1684    R8G8B8_SNORM,
1685    R8G8B8_SSCALED,
1686    R8G8B8_UINT,
1687    R8G8B8_UNORM,
1688    R8G8B8_UNORM_SRGB,
1689    R8G8B8_USCALED,
1690    R8G8_SINT,
1691    R8G8_SNORM,
1692    R8G8_SSCALED,
1693    R8G8_UINT,
1694    R8G8_UNORM,
1695    R8G8_UNORM_SRGB,
1696    R8G8_USCALED,
1697    R8_SINT,
1698    R8_SNORM,
1699    R8_SSCALED,
1700    R8_UINT,
1701    R8_UNORM,
1702    R8_UNORM_SRGB,
1703    R8_USCALED,
1704    R9G9B9E5_SHAREDEXP,
1705    X2R10G10B10_SINT_VERTEX,
1706    X2R10G10B10_SNORM_VERTEX,
1707    X2R10G10B10_SSCALED_VERTEX,
1708    X2R10G10B10_UINT_VERTEX,
1709    X2R10G10B10_UNORM_VERTEX,
1710    X2R10G10B10_USCALED_VERTEX
1711};
1712
1713}
1714
1715}
1716}
1717# 16 "./copy_buffer.metal" 2
1718
1719using namespace rx::mtl_shader;
1720
1721constant int kCopyFormatType [[function_constant(10)]];
1722
1723
1724constant int kCopyTextureType [[function_constant(20)]];
1725constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D;
1726constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray;
1727constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample;
1728constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube;
1729constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D;
1730
1731struct CopyPixelParams
1732{
1733    uint3 copySize;
1734    uint3 textureOffset;
1735
1736    uint bufferStartOffset;
1737    uint pixelSize;
1738    uint bufferRowPitch;
1739    uint bufferDepthPitch;
1740};
1741
1742struct WritePixelParams
1743{
1744    uint2 copySize;
1745    uint2 textureOffset;
1746
1747    uint bufferStartOffset;
1748
1749    uint pixelSize;
1750    uint bufferRowPitch;
1751
1752    uint textureLevel;
1753    uint textureLayer;
1754
1755    bool reverseTextureRowOrder;
1756};
1757# 120 "./copy_buffer.metal"
1758template <typename T>
1759static inline void textureWrite(ushort3 gIndices,
1760                                constant CopyPixelParams &options,
1761                                vec<T, 4> color,
1762                                texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
1763{
1764    uint3 writeIndices = options.textureOffset + uint3(gIndices);
1765    switch (kCopyTextureType)
1766    {
1767        case kTextureType2D:
1768            dstTexture2d.write(color, writeIndices.xy);
1769            break;
1770        case kTextureType2DArray:
1771            dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z);
1772            break;
1773        case kTextureType3D:
1774            dstTexture3d.write(color, writeIndices);
1775            break;
1776        case kTextureTypeCube:
1777            dstTextureCube.write(color, writeIndices.xy, writeIndices.z);
1778            break;
1779    }
1780}
1781
1782
1783template <typename T>
1784static inline vec<T, 4> textureRead(ushort2 gIndices,
1785                                    constant WritePixelParams &options,
1786                                    texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]])
1787{
1788    vec<T, 4> color;
1789    uint2 coords = uint2(gIndices);
1790    if (options.reverseTextureRowOrder)
1791    {
1792        coords.y = options.copySize.y - 1 - gIndices.y;
1793    }
1794    coords += options.textureOffset;
1795    switch (kCopyTextureType)
1796    {
1797        case kTextureType2D:
1798            color = srcTexture2d.read(coords.xy, options.textureLevel);
1799            break;
1800        case kTextureType2DArray:
1801            color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel);
1802            break;
1803        case kTextureType2DMultisample:
1804            color = resolveTextureMS(srcTexture2dMS, coords.xy);
1805            break;
1806        case kTextureType3D:
1807            color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel);
1808            break;
1809        case kTextureTypeCube:
1810            color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel);
1811            break;
1812    }
1813    return color;
1814}
1815# 215 "./copy_buffer.metal"
1816static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer)
1817{
1818    float4 color;
1819    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1820
1821    color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
1822    color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src));
1823    color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src));
1824    color.a = 1.0;
1825    return color;
1826}
1827static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1828{
1829    ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
1830                 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) |
1831                 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b));
1832
1833    shortToBytes(dst, bufferOffset, buffer);
1834}
1835
1836
1837static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer)
1838{
1839    float4 color;
1840    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1841
1842    color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src));
1843    color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src));
1844    color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src));
1845    color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src));
1846    return color;
1847}
1848static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1849{
1850    ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) |
1851                 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) |
1852                 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) |
1853                 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a));
1854    ;
1855
1856    shortToBytes(dst, bufferOffset, buffer);
1857}
1858
1859
1860static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer)
1861{
1862    float4 color;
1863    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1864
1865    color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
1866    color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src));
1867    color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src));
1868    color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src));
1869    return color;
1870}
1871static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1872{
1873    ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
1874                 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) |
1875                 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) |
1876                 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a));
1877    ;
1878
1879    shortToBytes(dst, bufferOffset, buffer);
1880}
1881
1882
1883static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer)
1884{
1885    int4 color;
1886    int src = bytesToInt<int>(buffer, bufferOffset);
1887
1888    constexpr int3 rgbSignMask(0x200);
1889    constexpr int3 negativeMask(0xfffffc00);
1890    constexpr int alphaSignMask = 0x2;
1891    constexpr int alphaNegMask = 0xfffffffc;
1892
1893    color.r = getShiftedData<10, 0>(src);
1894    color.g = getShiftedData<10, 10>(src);
1895    color.b = getShiftedData<10, 20>(src);
1896
1897    int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9;
1898    color.rgb = (isRgbNegative * negativeMask) | color.rgb;
1899
1900    color.a = getShiftedData<2, 30>(src);
1901    int isAlphaNegative = color.a & alphaSignMask >> 1;
1902    color.a = (isAlphaNegative * alphaNegMask) | color.a;
1903    return color;
1904}
1905
1906static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer)
1907{
1908    uint4 color;
1909    uint src = bytesToInt<uint>(buffer, bufferOffset);
1910
1911    color.r = getShiftedData<10, 0>(src);
1912    color.g = getShiftedData<10, 10>(src);
1913    color.b = getShiftedData<10, 20>(src);
1914    color.a = getShiftedData<2, 30>(src);
1915    return color;
1916}
1917
1918
1919static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
1920{
1921    float4 color;
1922    uint src = bytesToInt<uint>(buffer, bufferOffset);
1923
1924    if (isSRGB)
1925    {
1926        color = unpack_unorm4x8_srgb_to_float(src);
1927    }
1928    else
1929    {
1930        color = unpack_unorm4x8_to_float(src);
1931    }
1932    return color;
1933}
1934static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
1935{
1936    uint dst;
1937
1938    if (isSRGB)
1939    {
1940        dst = pack_float_to_srgb_unorm4x8(color);
1941    }
1942    else
1943    {
1944        dst = pack_float_to_unorm4x8(color);
1945    }
1946
1947    intToBytes(dst, bufferOffset, buffer);
1948}
1949
1950static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
1951{
1952    float4 color;
1953    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
1954    color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
1955    color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]);
1956    color.a = 1.0;
1957
1958    if (isSRGB)
1959    {
1960        color = sRGBtoLinear(color);
1961    }
1962    return color;
1963}
1964static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
1965{
1966    color.a = 1.0;
1967    uint dst;
1968
1969    if (isSRGB)
1970    {
1971        dst = pack_float_to_srgb_unorm4x8(color);
1972    }
1973    else
1974    {
1975        dst = pack_float_to_unorm4x8(color);
1976    }
1977    int24bitToBytes(dst, bufferOffset, buffer);
1978}
1979
1980
1981static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer)
1982{
1983    float4 color;
1984    uint src = bytesToInt<uint>(buffer, bufferOffset);
1985
1986    color = unpack_snorm4x8_to_float(src);
1987
1988    return color;
1989}
1990static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1991{
1992    uint dst = pack_float_to_snorm4x8(color);
1993
1994    intToBytes(dst, bufferOffset, buffer);
1995}
1996
1997
1998static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer)
1999{
2000    float4 color;
2001    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2002    color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
2003    color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]);
2004    color.a = 1.0;
2005
2006    return color;
2007}
2008static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2009{
2010    uint dst = pack_float_to_snorm4x8(color);
2011
2012    int24bitToBytes(dst, bufferOffset, buffer);
2013}
2014
2015
2016static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2017{
2018    return readR8G8B8A8(bufferOffset, buffer, false);
2019}
2020static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2021{
2022    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
2023}
2024
2025static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2026{
2027    return readR8G8B8A8(bufferOffset, buffer, true);
2028}
2029static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2030{
2031    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
2032}
2033
2034
2035static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2036{
2037    return readR8G8B8A8(bufferOffset, buffer, false).bgra;
2038}
2039static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2040{
2041    color.rgba = color.bgra;
2042    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
2043}
2044
2045static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2046{
2047    return readR8G8B8A8(bufferOffset, buffer, true).bgra;
2048}
2049static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2050{
2051    color.rgba = color.bgra;
2052    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
2053}
2054
2055
2056static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer)
2057{
2058    return readR8G8B8(bufferOffset, buffer, false);
2059}
2060static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2061{
2062    return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false);
2063}
2064
2065static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2066{
2067    return readR8G8B8(bufferOffset, buffer, true);
2068}
2069static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2070{
2071    return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true);
2072}
2073
2074
2075static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer)
2076{
2077    float4 color;
2078    color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
2079    color.a = 1.0;
2080    return color;
2081}
2082static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2083{
2084    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2085}
2086
2087
2088static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2089{
2090    buffer[bufferOffset] = floatToNormalized<uchar>(color.a);
2091}
2092
2093
2094static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2095{
2096    float4 color;
2097    color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
2098    color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
2099    return color;
2100}
2101static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2102{
2103    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2104    buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a);
2105}
2106
2107
2108static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer)
2109{
2110    float4 color;
2111    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
2112    color.g = color.b = 0.0;
2113    color.a = 1.0;
2114    return color;
2115}
2116static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2117{
2118    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2119}
2120
2121static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer)
2122{
2123    float4 color;
2124    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2125    color.g = color.b = 0.0;
2126    color.a = 1.0;
2127    return color;
2128}
2129static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2130{
2131    buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
2132}
2133
2134
2135static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer)
2136{
2137    int4 color;
2138    color.r = as_type<char>(buffer[bufferOffset]);
2139    color.g = color.b = 0;
2140    color.a = 1;
2141    return color;
2142}
2143static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2144{
2145    buffer[bufferOffset] = static_cast<uchar>(color.r);
2146}
2147
2148
2149static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer)
2150{
2151    uint4 color;
2152    color.r = as_type<uchar>(buffer[bufferOffset]);
2153    color.g = color.b = 0;
2154    color.a = 1;
2155    return color;
2156}
2157static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2158{
2159    buffer[bufferOffset] = static_cast<uchar>(color.r);
2160}
2161
2162
2163static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer)
2164{
2165    float4 color;
2166    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
2167    color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
2168    color.b = 0.0;
2169    color.a = 1.0;
2170    return color;
2171}
2172static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2173{
2174    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2175    buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g);
2176}
2177
2178static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer)
2179{
2180    float4 color;
2181    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2182    color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
2183    color.b = 0.0;
2184    color.a = 1.0;
2185    return color;
2186}
2187static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2188{
2189    buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
2190    buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<7, char>(color.g));
2191}
2192
2193
2194static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer)
2195{
2196    int4 color;
2197    color.r = as_type<char>(buffer[bufferOffset]);
2198    color.g = as_type<char>(buffer[bufferOffset + 1]);
2199    color.b = 0;
2200    color.a = 1;
2201    return color;
2202}
2203static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2204{
2205    buffer[bufferOffset] = static_cast<uchar>(color.r);
2206    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2207}
2208
2209
2210static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer)
2211{
2212    uint4 color;
2213    color.r = as_type<uchar>(buffer[bufferOffset]);
2214    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2215    color.b = 0;
2216    color.a = 1;
2217    return color;
2218}
2219static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2220{
2221    buffer[bufferOffset] = static_cast<uchar>(color.r);
2222    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2223}
2224
2225
2226static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer)
2227{
2228    int4 color;
2229    color.r = as_type<char>(buffer[bufferOffset]);
2230    color.g = as_type<char>(buffer[bufferOffset + 1]);
2231    color.b = as_type<char>(buffer[bufferOffset + 2]);
2232    color.a = 1;
2233    return color;
2234}
2235
2236
2237static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer)
2238{
2239    uint4 color;
2240    color.r = as_type<uchar>(buffer[bufferOffset]);
2241    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2242    color.b = as_type<uchar>(buffer[bufferOffset + 2]);
2243    color.a = 1;
2244    return color;
2245}
2246
2247
2248static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer)
2249{
2250    int4 color;
2251    color.r = as_type<char>(buffer[bufferOffset]);
2252    color.g = as_type<char>(buffer[bufferOffset + 1]);
2253    color.b = as_type<char>(buffer[bufferOffset + 2]);
2254    color.a = as_type<char>(buffer[bufferOffset + 3]);
2255    return color;
2256}
2257static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2258{
2259    buffer[bufferOffset] = static_cast<uchar>(color.r);
2260    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2261    buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
2262    buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
2263}
2264
2265
2266static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer)
2267{
2268    uint4 color;
2269    color.r = as_type<uchar>(buffer[bufferOffset]);
2270    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2271    color.b = as_type<uchar>(buffer[bufferOffset + 2]);
2272    color.a = as_type<uchar>(buffer[bufferOffset + 3]);
2273    return color;
2274}
2275static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2276{
2277    buffer[bufferOffset] = static_cast<uchar>(color.r);
2278    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2279    buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
2280    buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
2281}
2282
2283
2284static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer)
2285{
2286    float4 color;
2287    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2288    color.g = color.b = 0.0;
2289    color.a = 1.0;
2290    return color;
2291}
2292static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2293{
2294    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2295}
2296
2297template <typename ShortType>
2298static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer)
2299{
2300    float4 color;
2301    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2302    color.g = color.b = 0.0;
2303    color.a = 1.0;
2304    return color;
2305}
2306
2307
2308
2309
2310static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer)
2311{
2312    int4 color;
2313    color.r = bytesToShort<short>(buffer, bufferOffset);
2314    color.g = color.b = 0;
2315    color.a = 1;
2316    return color;
2317}
2318static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2319{
2320    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2321}
2322
2323
2324static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer)
2325{
2326    uint4 color;
2327    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2328    color.g = color.b = 0;
2329    color.a = 1;
2330    return color;
2331}
2332static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2333{
2334    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2335}
2336
2337
2338static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer)
2339{
2340    float4 color;
2341    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2342    color.rgb = 0.0;
2343    return color;
2344}
2345static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2346{
2347    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer);
2348}
2349
2350
2351static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer)
2352{
2353    float4 color;
2354    color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2355    color.a = 1.0;
2356    return color;
2357}
2358static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2359{
2360    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2361}
2362
2363
2364static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
2365{
2366    float4 color;
2367    color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2368    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2369    return color;
2370}
2371static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2372{
2373    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2374    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer);
2375}
2376
2377
2378static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer)
2379{
2380    float4 color;
2381    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2382    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2383    color.b = 0.0;
2384    color.a = 1.0;
2385    return color;
2386}
2387static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2388{
2389    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2390    shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
2391}
2392
2393
2394template <typename ShortType>
2395static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer)
2396{
2397    float4 color;
2398    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2399    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2400    color.b = 0.0;
2401    color.a = 1.0;
2402    return color;
2403}
2404
2405
2406
2407
2408static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer)
2409{
2410    int4 color;
2411    color.r = bytesToShort<short>(buffer, bufferOffset);
2412    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2413    color.b = 0;
2414    color.a = 1;
2415    return color;
2416}
2417static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2418{
2419    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2420    shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
2421}
2422
2423
2424static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer)
2425{
2426    uint4 color;
2427    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2428    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2429    color.b = 0;
2430    color.a = 1;
2431    return color;
2432}
2433static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2434{
2435    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2436    shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
2437}
2438
2439
2440static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer)
2441{
2442    float4 color;
2443    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2444    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2445    color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
2446    color.a = 1.0;
2447    return color;
2448}
2449
2450
2451template <typename ShortType>
2452static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer)
2453{
2454    float4 color;
2455    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2456    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2457    color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
2458    color.a = 1.0;
2459    return color;
2460}
2461
2462
2463
2464static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer)
2465{
2466    int4 color;
2467    color.r = bytesToShort<short>(buffer, bufferOffset);
2468    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2469    color.b = bytesToShort<short>(buffer, bufferOffset + 4);
2470    color.a = 1;
2471    return color;
2472}
2473
2474
2475static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer)
2476{
2477    uint4 color;
2478    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2479    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2480    color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
2481    color.a = 1;
2482    return color;
2483}
2484
2485
2486static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
2487{
2488    float4 color;
2489    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2490    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2491    color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
2492    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6));
2493    return color;
2494}
2495static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2496{
2497    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2498    shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
2499    shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer);
2500    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer);
2501}
2502
2503
2504template <typename ShortType>
2505static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer)
2506{
2507    float4 color;
2508    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2509    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2510    color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
2511    color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6));
2512    return color;
2513}
2514
2515
2516
2517
2518static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer)
2519{
2520    int4 color;
2521    color.r = bytesToShort<short>(buffer, bufferOffset);
2522    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2523    color.b = bytesToShort<short>(buffer, bufferOffset + 4);
2524    color.a = bytesToShort<short>(buffer, bufferOffset + 6);
2525    return color;
2526}
2527static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2528{
2529    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2530    shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
2531    shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer);
2532    shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer);
2533}
2534
2535
2536static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer)
2537{
2538    uint4 color;
2539    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2540    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2541    color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
2542    color.a = bytesToShort<ushort>(buffer, bufferOffset + 6);
2543    return color;
2544}
2545static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2546{
2547    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2548    shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
2549    shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer);
2550    shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer);
2551}
2552
2553
2554static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer)
2555{
2556    float4 color;
2557    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2558    color.g = color.b = 0.0;
2559    color.a = 1.0;
2560    return color;
2561}
2562static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2563{
2564    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2565}
2566
2567
2568template <typename IntType>
2569static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer)
2570{
2571    float4 color;
2572    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2573    color.g = color.b = 0.0;
2574    color.a = 1.0;
2575    return color;
2576}
2577
2578
2579
2580
2581static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer)
2582{
2583    float4 color;
2584    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2585    color.rgb = 0.0;
2586    return color;
2587}
2588static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2589{
2590    intToBytes(as_type<uint>(color.a), bufferOffset, buffer);
2591}
2592
2593
2594static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer)
2595{
2596    float4 color;
2597    color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2598    color.a = 1.0;
2599    return color;
2600}
2601static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2602{
2603    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2604}
2605
2606
2607static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer)
2608{
2609    int4 color;
2610    color.r = bytesToInt<int>(buffer, bufferOffset);
2611    color.g = color.b = 0;
2612    color.a = 1;
2613    return color;
2614}
2615static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2616{
2617    intToBytes(color.r, bufferOffset, buffer);
2618}
2619
2620
2621static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer)
2622{
2623    float4 color;
2624    constexpr float kDivisor = 1.0f / (1 << 16);
2625    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2626    color.g = color.b = 0.0;
2627    color.a = 1.0;
2628    return color;
2629}
2630
2631
2632static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer)
2633{
2634    uint4 color;
2635    color.r = bytesToInt<uint>(buffer, bufferOffset);
2636    color.g = color.b = 0;
2637    color.a = 1;
2638    return color;
2639}
2640static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2641{
2642    intToBytes(color.r, bufferOffset, buffer);
2643}
2644
2645
2646static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
2647{
2648    float4 color;
2649    color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2650    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2651    return color;
2652}
2653static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2654{
2655    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2656    intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer);
2657}
2658
2659
2660static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer)
2661{
2662    float4 color;
2663    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2664    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2665    color.b = 0.0;
2666    color.a = 1.0;
2667    return color;
2668}
2669static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2670{
2671    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2672    intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
2673}
2674
2675
2676template <typename IntType>
2677static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer)
2678{
2679    float4 color;
2680    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2681    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2682    color.b = 0.0;
2683    color.a = 1.0;
2684    return color;
2685}
2686
2687
2688
2689
2690static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer)
2691{
2692    int4 color;
2693    color.r = bytesToInt<int>(buffer, bufferOffset);
2694    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2695    color.b = 0;
2696    color.a = 1;
2697    return color;
2698}
2699static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2700{
2701    intToBytes(color.r, bufferOffset, buffer);
2702    intToBytes(color.g, bufferOffset + 4, buffer);
2703}
2704
2705
2706static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer)
2707{
2708    float4 color;
2709    constexpr float kDivisor = 1.0f / (1 << 16);
2710    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2711    color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2712    color.b = 0.0;
2713    color.a = 1.0;
2714    return color;
2715}
2716
2717
2718static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer)
2719{
2720    uint4 color;
2721    color.r = bytesToInt<uint>(buffer, bufferOffset);
2722    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2723    color.b = 0;
2724    color.a = 1;
2725    return color;
2726}
2727static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2728{
2729    intToBytes(color.r, bufferOffset, buffer);
2730    intToBytes(color.g, bufferOffset + 4, buffer);
2731}
2732
2733
2734static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer)
2735{
2736    float4 color;
2737    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2738    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2739    color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
2740    color.a = 1.0;
2741    return color;
2742}
2743
2744
2745template <typename IntType>
2746static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer)
2747{
2748    float4 color;
2749    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2750    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2751    color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
2752    color.a = 1.0;
2753    return color;
2754}
2755
2756
2757
2758
2759static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer)
2760{
2761    int4 color;
2762    color.r = bytesToInt<int>(buffer, bufferOffset);
2763    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2764    color.b = bytesToInt<int>(buffer, bufferOffset + 8);
2765    color.a = 1;
2766    return color;
2767}
2768
2769
2770static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer)
2771{
2772    float4 color;
2773    constexpr float kDivisor = 1.0f / (1 << 16);
2774    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2775    color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2776    color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
2777    color.a = 1.0;
2778    return color;
2779}
2780
2781
2782static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer)
2783{
2784    uint4 color;
2785    color.r = bytesToInt<uint>(buffer, bufferOffset);
2786    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2787    color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
2788    color.a = 1;
2789    return color;
2790}
2791
2792
2793static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
2794{
2795    float4 color;
2796    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2797    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2798    color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
2799    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12));
2800    return color;
2801}
2802static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2803{
2804    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2805    intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
2806    intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer);
2807    intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer);
2808}
2809
2810
2811template <typename IntType>
2812static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer)
2813{
2814    float4 color;
2815    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2816    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2817    color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
2818    color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12));
2819    return color;
2820}
2821
2822
2823
2824
2825static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer)
2826{
2827    int4 color;
2828    color.r = bytesToInt<int>(buffer, bufferOffset);
2829    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2830    color.b = bytesToInt<int>(buffer, bufferOffset + 8);
2831    color.a = bytesToInt<int>(buffer, bufferOffset + 12);
2832    return color;
2833}
2834static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2835{
2836    intToBytes(color.r, bufferOffset, buffer);
2837    intToBytes(color.g, bufferOffset + 4, buffer);
2838    intToBytes(color.b, bufferOffset + 8, buffer);
2839    intToBytes(color.a, bufferOffset + 12, buffer);
2840}
2841
2842static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer)
2843{
2844    float4 color;
2845    constexpr float kDivisor = 1.0f / (1 << 16);
2846    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2847    color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2848    color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
2849    color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor;
2850    return color;
2851}
2852
2853
2854static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer)
2855{
2856    uint4 color;
2857    color.r = bytesToInt<uint>(buffer, bufferOffset);
2858    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2859    color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
2860    color.a = bytesToInt<uint>(buffer, bufferOffset + 12);
2861    return color;
2862}
2863static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2864{
2865    intToBytes(color.r, bufferOffset, buffer);
2866    intToBytes(color.g, bufferOffset + 4, buffer);
2867    intToBytes(color.b, bufferOffset + 8, buffer);
2868    intToBytes(color.a, bufferOffset + 12, buffer);
2869}
2870# 1292 "./copy_buffer.metal"
2871static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); }
2872static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); }
2873static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); }
2874
2875static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); }
2876
2877
2878kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2879{
2880    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2881# 1336 "./copy_buffer.metal"
2882    uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2883
2884    switch (kCopyFormatType)
2885    {
2886        case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2887    }
2888
2889
2890}
2891
2892kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2893{
2894    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2895# 1364 "./copy_buffer.metal"
2896    uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2897
2898    switch (kCopyFormatType)
2899    {
2900        case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2901    }
2902
2903
2904}
2905
2906kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2907{
2908    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2909# 1392 "./copy_buffer.metal"
2910    uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2911
2912    switch (kCopyFormatType)
2913    {
2914        case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2915    }
2916
2917
2918}
2919
2920
2921kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2922{
2923    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2924# 1439 "./copy_buffer.metal"
2925    uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2926
2927    switch (kCopyFormatType)
2928    {
2929        case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break;
2930    }
2931
2932
2933}
2934
2935kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2936{
2937    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2938# 1464 "./copy_buffer.metal"
2939    uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2940
2941    switch (kCopyFormatType)
2942    {
2943        case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break;
2944    }
2945
2946
2947}
2948
2949kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2950{
2951    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2952# 1489 "./copy_buffer.metal"
2953    uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2954
2955    switch (kCopyFormatType)
2956    {
2957        case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break;
2958    }
2959
2960
2961}
2962
2963
2964struct CopyVertexParams
2965{
2966    uint srcBufferStartOffset;
2967    uint srcStride;
2968    uint srcComponentBytes;
2969    uint srcComponents;
2970
2971
2972
2973    uchar4 srcDefaultAlphaData;
2974
2975    uint dstBufferStartOffset;
2976    uint dstStride;
2977    uint dstComponents;
2978
2979    uint vertexCount;
2980};
2981# 1539 "./copy_buffer.metal"
2982template <typename IntType>
2983static inline void writeFloatVertex(constant CopyVertexParams &options,
2984                                    uint idx,
2985                                    vec<IntType, 4> data,
2986                                    device uchar *dst)
2987{
2988    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
2989
2990    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
2991    {
2992        floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
2993    }
2994}
2995
2996template <>
2997inline void writeFloatVertex(constant CopyVertexParams &options,
2998                             uint idx,
2999                             vec<float, 4> data,
3000                             device uchar *dst)
3001{
3002    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
3003
3004    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
3005    {
3006        floatToBytes(data[component], dstOffset, dst);
3007    }
3008}
3009
3010
3011static inline void convertToFloatVertexFormat(uint index,
3012                                              constant CopyVertexParams &options,
3013                                              constant uchar *srcBuffer,
3014                                              device uchar *dstBuffer)
3015{
3016# 1585 "./copy_buffer.metal"
3017    uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
3018# 1594 "./copy_buffer.metal"
3019    switch (kCopyFormatType)
3020    {
3021        case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break;
3022    }
3023
3024
3025}
3026
3027
3028kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
3029                                         constant CopyVertexParams &options [[buffer(0)]],
3030                                         constant uchar *srcBuffer [[buffer(1)]],
3031                                         device uchar *dstBuffer [[buffer(2)]])
3032{
3033    if (index >= options.vertexCount) { return; };
3034    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
3035}
3036
3037
3038vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
3039                                         constant CopyVertexParams &options [[buffer(0)]],
3040                                         constant uchar *srcBuffer [[buffer(1)]],
3041                                         device uchar *dstBuffer [[buffer(2)]])
3042{
3043    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
3044}
3045
3046
3047static inline void expandVertexFormatComponents(uint index,
3048                                                constant CopyVertexParams &options,
3049                                                constant uchar *srcBuffer,
3050                                                device uchar *dstBuffer)
3051{
3052    uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
3053    uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;
3054
3055    uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
3056    uint component;
3057    for (component = 0; component < options.srcComponents; ++component,
3058        srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
3059    {
3060        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3061        {
3062            dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
3063        }
3064    }
3065
3066    for (; component < dstComponentsBeforeAlpha;
3067         ++component, dstOffset += options.srcComponentBytes)
3068    {
3069        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3070        {
3071            dstBuffer[dstOffset + byte] = 0;
3072        }
3073    }
3074
3075    if (component < options.dstComponents)
3076    {
3077
3078        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3079        {
3080            dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
3081        }
3082    }
3083}
3084
3085
3086kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
3087                                           constant CopyVertexParams &options [[buffer(0)]],
3088                                           constant uchar *srcBuffer [[buffer(1)]],
3089                                           device uchar *dstBuffer [[buffer(2)]])
3090{
3091    if (index >= options.vertexCount) { return; };
3092
3093    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
3094}
3095
3096
3097vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
3098                                           constant CopyVertexParams &options [[buffer(0)]],
3099                                           constant uchar *srcBuffer [[buffer(1)]],
3100                                           device uchar *dstBuffer [[buffer(2)]])
3101{
3102    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
3103}
3104# 6 "temp_master_source.metal" 2
3105# 1 "./visibility.metal" 1
3106
3107
3108
3109
3110
3111
3112
3113
3114constant bool kCombineWithExistingResult [[function_constant(1000)]];
3115
3116
3117
3118struct CombineVisibilityResultOptions
3119{
3120
3121    uint startOffset;
3122
3123    uint numOffsets;
3124};
3125
3126kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]],
3127                                    constant CombineVisibilityResultOptions &options [[buffer(0)]],
3128                                    constant ushort4 *renderpassVisibilityResult [[buffer(1)]],
3129                                    device ushort4 *finalResults [[buffer(2)]])
3130{
3131    if (idx > 0)
3132    {
3133
3134
3135
3136        return;
3137    }
3138    ushort4 finalResult16x4;
3139
3140    if (kCombineWithExistingResult)
3141    {
3142        finalResult16x4 = finalResults[0];
3143    }
3144    else
3145    {
3146        finalResult16x4 = ushort4(0, 0, 0, 0);
3147    }
3148
3149    for (uint i = 0; i < options.numOffsets; ++i)
3150    {
3151        uint offset = options.startOffset + i;
3152        ushort4 renderpassResult = renderpassVisibilityResult[offset];
3153
3154
3155        finalResult16x4 = finalResult16x4 | renderpassResult;
3156    }
3157    finalResults[0] = finalResult16x4;
3158}
3159# 7 "temp_master_source.metal" 2
3160
3161
3162)";
3163