• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//
2// Copyright 2020 The ANGLE Project. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5//
6// copy_buffer.metal: implements compute shader that copy formatted data from buffer to texture,
7// from texture to buffer and from buffer to buffer.
8// NOTE(hqle): This file is a bit hard to read but there are a lot of repeated works, and it would
9// be a pain to implement without the use of macros.
10//
11
12@@#include <metal_pack>
13
14#include "common.h"
15#include "format_autogen.h"
16
17using namespace rx::mtl_shader;
18
19constant int kCopyFormatType [[function_constant(10)]];
20
21/* -------- copy pixel data between buffer and texture ---------*/
22constant int kCopyTextureType [[function_constant(20)]];
23constant bool kCopyTextureType2D      = kCopyTextureType == kTextureType2D;
24constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray;
25constant bool kCopyTextureType2DMS    = kCopyTextureType == kTextureType2DMultisample;
26constant bool kCopyTextureTypeCube    = kCopyTextureType == kTextureTypeCube;
27constant bool kCopyTextureType3D      = kCopyTextureType == kTextureType3D;
28
29struct CopyPixelParams
30{
31    uint3 copySize;
32    uint3 textureOffset;
33
34    uint bufferStartOffset;
35    uint pixelSize;
36    uint bufferRowPitch;
37    uint bufferDepthPitch;
38};
39
40struct WritePixelParams
41{
42    uint2 copySize;
43    uint2 textureOffset;
44
45    uint bufferStartOffset;
46
47    uint pixelSize;
48    uint bufferRowPitch;
49
50    uint textureLevel;
51    uint textureLayer;
52
53    bool reverseTextureRowOrder;
54};
55
56// clang-format off
57#define TEXTURE_PARAMS(TYPE, ACCESS, NAME_PREFIX)               \
58    texture2d<TYPE, ACCESS> NAME_PREFIX##Texture2d              \
59    [[texture(0), function_constant(kCopyTextureType2D)]],      \
60    texture2d_array<TYPE, ACCESS> NAME_PREFIX##Texture2dArray   \
61    [[texture(0), function_constant(kCopyTextureType2DArray)]], \
62    texture3d<TYPE, ACCESS> NAME_PREFIX##Texture3d              \
63    [[texture(0), function_constant(kCopyTextureType3D)]],      \
64    texturecube<TYPE, ACCESS> NAME_PREFIX##TextureCube          \
65    [[texture(0), function_constant(kCopyTextureTypeCube)]]
66
67#define FORWARD_TEXTURE_PARAMS(NAME_PREFIX) \
68    NAME_PREFIX##Texture2d,                 \
69    NAME_PREFIX##Texture2dArray,            \
70    NAME_PREFIX##Texture3d,                 \
71    NAME_PREFIX##TextureCube
72
73// Params for reading from buffer to texture
74#define DEST_TEXTURE_PARAMS(TYPE)  TEXTURE_PARAMS(TYPE, access::write, dst)
75#define FORWARD_DEST_TEXTURE_PARAMS FORWARD_TEXTURE_PARAMS(dst)
76
77#define COMMON_READ_KERNEL_PARAMS(TEXTURE_TYPE)     \
78    ushort3 gIndices [[thread_position_in_grid]],   \
79    constant CopyPixelParams &options[[buffer(0)]], \
80    constant uchar *buffer [[buffer(1)]],           \
81    DEST_TEXTURE_PARAMS(TEXTURE_TYPE)
82
83#define COMMON_READ_FUNC_PARAMS        \
84    uint bufferOffset,                 \
85    constant uchar *buffer
86
87#define FORWARD_COMMON_READ_FUNC_PARAMS bufferOffset, buffer
88
89// Params for writing to buffer by coping from texture.
90// (NOTE: it has additional multisample source texture parameter)
91#define SRC_TEXTURE_PARAMS(TYPE)                             \
92    TEXTURE_PARAMS(TYPE, access::read, src),                 \
93    texture2d_ms<TYPE, access::read> srcTexture2dMS          \
94    [[texture(0), function_constant(kCopyTextureType2DMS)]]  \
95
96#define FORWARD_SRC_TEXTURE_PARAMS FORWARD_TEXTURE_PARAMS(src), srcTexture2dMS
97
98#define COMMON_WRITE_KERNEL_PARAMS(TEXTURE_TYPE)     \
99    ushort2 gIndices [[thread_position_in_grid]],    \
100    constant WritePixelParams &options[[buffer(0)]], \
101    SRC_TEXTURE_PARAMS(TEXTURE_TYPE),                \
102    device uchar *buffer [[buffer(1)]]               \
103
104#define COMMON_WRITE_FUNC_PARAMS(TYPE) \
105    ushort2 gIndices,                  \
106    constant WritePixelParams &options,\
107    uint bufferOffset,                 \
108    vec<TYPE, 4> color,                \
109    device uchar *buffer               \
110
111#define COMMON_WRITE_FLOAT_FUNC_PARAMS COMMON_WRITE_FUNC_PARAMS(float)
112#define COMMON_WRITE_SINT_FUNC_PARAMS COMMON_WRITE_FUNC_PARAMS(int)
113#define COMMON_WRITE_UINT_FUNC_PARAMS COMMON_WRITE_FUNC_PARAMS(uint)
114
115#define FORWARD_COMMON_WRITE_FUNC_PARAMS gIndices, options, bufferOffset, color, buffer
116
117// clang-format on
118
119// Write to texture code based on texture type:
120template <typename T>
121static inline void textureWrite(ushort3 gIndices,
122                                constant CopyPixelParams &options,
123                                vec<T, 4> color,
124                                DEST_TEXTURE_PARAMS(T))
125{
126    uint3 writeIndices = options.textureOffset + uint3(gIndices);
127    switch (kCopyTextureType)
128    {
129        case kTextureType2D:
130            dstTexture2d.write(color, writeIndices.xy);
131            break;
132        case kTextureType2DArray:
133            dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z);
134            break;
135        case kTextureType3D:
136            dstTexture3d.write(color, writeIndices);
137            break;
138        case kTextureTypeCube:
139            dstTextureCube.write(color, writeIndices.xy, writeIndices.z);
140            break;
141    }
142}
143
144// Read from texture code based on texture type:
145template <typename T>
146static inline vec<T, 4> textureRead(ushort2 gIndices,
147                                    constant WritePixelParams &options,
148                                    SRC_TEXTURE_PARAMS(T))
149{
150    vec<T, 4> color;
151    uint2 coords = uint2(gIndices);
152    if (options.reverseTextureRowOrder)
153    {
154        coords.y = options.copySize.y - 1 - gIndices.y;
155    }
156    coords += options.textureOffset;
157    switch (kCopyTextureType)
158    {
159        case kTextureType2D:
160            color = srcTexture2d.read(coords.xy, options.textureLevel);
161            break;
162        case kTextureType2DArray:
163            color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel);
164            break;
165        case kTextureType2DMultisample:
166            color = resolveTextureMS(srcTexture2dMS, coords.xy);
167            break;
168        case kTextureType3D:
169            color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel);
170            break;
171        case kTextureTypeCube:
172            color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel);
173            break;
174    }
175    return color;
176}
177
178// Calculate offset into buffer:
179#define CALC_BUFFER_READ_OFFSET(pixelSize)                               \
180    options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + \
181                                 gIndices.y * options.bufferRowPitch + gIndices.x * pixelSize)
182
183#define CALC_BUFFER_WRITE_OFFSET(pixelSize) \
184    options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * pixelSize)
185
186// Per format handling code:
187#define READ_FORMAT_SWITCH_CASE(format)                                      \
188    case FormatID::format: {                                                 \
189        auto color = read##format(FORWARD_COMMON_READ_FUNC_PARAMS);          \
190        textureWrite(gIndices, options, color, FORWARD_DEST_TEXTURE_PARAMS); \
191    }                                                                        \
192    break;
193
194#define WRITE_FORMAT_SWITCH_CASE(format)                                         \
195    case FormatID::format: {                                                     \
196        auto color = textureRead(gIndices, options, FORWARD_SRC_TEXTURE_PARAMS); \
197        write##format(FORWARD_COMMON_WRITE_FUNC_PARAMS);                         \
198    }                                                                            \
199    break;
200
201#define READ_KERNEL_GUARD                                                       \
202    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || \
203        gIndices.z >= options.copySize.z)                                       \
204    {                                                                           \
205        return;                                                                 \
206    }
207
208#define WRITE_KERNEL_GUARD                                                    \
209    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) \
210    {                                                                         \
211        return;                                                               \
212    }
213
214// R5G6B5
215static inline float4 readR5G6B5_UNORM(COMMON_READ_FUNC_PARAMS)
216{
217    float4 color;
218    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
219
220    color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
221    color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src));
222    color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src));
223    color.a = 1.0;
224    return color;
225}
226static inline void writeR5G6B5_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
227{
228    ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
229                 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) |
230                 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b));
231
232    shortToBytes(dst, bufferOffset, buffer);
233}
234
235// R4G4B4A4
236static inline float4 readR4G4B4A4_UNORM(COMMON_READ_FUNC_PARAMS)
237{
238    float4 color;
239    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
240
241    color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src));
242    color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src));
243    color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src));
244    color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src));
245    return color;
246}
247static inline void writeR4G4B4A4_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
248{
249    ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) |
250                 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) |
251                 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) |
252                 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a));
253    ;
254
255    shortToBytes(dst, bufferOffset, buffer);
256}
257
258// R5G5B5A1
259static inline float4 readR5G5B5A1_UNORM(COMMON_READ_FUNC_PARAMS)
260{
261    float4 color;
262    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
263
264    color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
265    color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src));
266    color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src));
267    color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src));
268    return color;
269}
270static inline void writeR5G5B5A1_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
271{
272    ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
273                 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) |
274                 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) |
275                 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a));
276    ;
277
278    shortToBytes(dst, bufferOffset, buffer);
279}
280
281// R10G10B10A2_SINT
282static inline int4 readR10G10B10A2_SINT(COMMON_READ_FUNC_PARAMS)
283{
284    int4 color;
285    int src = bytesToInt<int>(buffer, bufferOffset);
286
287    constexpr int3 rgbSignMask(0x200);        // 1 set at the 9 bit
288    constexpr int3 negativeMask(0xfffffc00);  // All bits from 10 to 31 set to 1
289    constexpr int alphaSignMask = 0x2;
290    constexpr int alphaNegMask  = 0xfffffffc;
291
292    color.r = getShiftedData<10, 0>(src);
293    color.g = getShiftedData<10, 10>(src);
294    color.b = getShiftedData<10, 20>(src);
295
296    int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9;
297    color.rgb          = (isRgbNegative * negativeMask) | color.rgb;
298
299    color.a             = getShiftedData<2, 30>(src);
300    int isAlphaNegative = color.a & alphaSignMask >> 1;
301    color.a             = (isAlphaNegative * alphaNegMask) | color.a;
302    return color;
303}
304// R10G10B10A2_UINT
305static inline uint4 readR10G10B10A2_UINT(COMMON_READ_FUNC_PARAMS)
306{
307    uint4 color;
308    uint src = bytesToInt<uint>(buffer, bufferOffset);
309
310    color.r = getShiftedData<10, 0>(src);
311    color.g = getShiftedData<10, 10>(src);
312    color.b = getShiftedData<10, 20>(src);
313    color.a = getShiftedData<2, 30>(src);
314    return color;
315}
316
317// R8G8B8A8 generic
318static inline float4 readR8G8B8A8(COMMON_READ_FUNC_PARAMS, bool isSRGB)
319{
320    float4 color;
321    uint src = bytesToInt<uint>(buffer, bufferOffset);
322
323    if (isSRGB)
324    {
325        color = unpack_unorm4x8_srgb_to_float(src);
326    }
327    else
328    {
329        color = unpack_unorm4x8_to_float(src);
330    }
331    return color;
332}
333static inline void writeR8G8B8A8(COMMON_WRITE_FLOAT_FUNC_PARAMS, bool isSRGB)
334{
335    uint dst;
336
337    if (isSRGB)
338    {
339        dst = pack_float_to_srgb_unorm4x8(color);
340    }
341    else
342    {
343        dst = pack_float_to_unorm4x8(color);
344    }
345
346    intToBytes(dst, bufferOffset, buffer);
347}
348
349static inline float4 readR8G8B8(COMMON_READ_FUNC_PARAMS, bool isSRGB)
350{
351    float4 color;
352    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
353    color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
354    color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]);
355    color.a = 1.0;
356
357    if (isSRGB)
358    {
359        color = sRGBtoLinear(color);
360    }
361    return color;
362}
363static inline void writeR8G8B8(COMMON_WRITE_FLOAT_FUNC_PARAMS, bool isSRGB)
364{
365    color.a = 1.0;
366    uint dst;
367
368    if (isSRGB)
369    {
370        dst = pack_float_to_srgb_unorm4x8(color);
371    }
372    else
373    {
374        dst = pack_float_to_unorm4x8(color);
375    }
376    int24bitToBytes(dst, bufferOffset, buffer);
377}
378
379// RGBA8_SNORM
380static inline float4 readR8G8B8A8_SNORM(COMMON_READ_FUNC_PARAMS)
381{
382    float4 color;
383    uint src = bytesToInt<uint>(buffer, bufferOffset);
384
385    color = unpack_snorm4x8_to_float(src);
386
387    return color;
388}
389static inline void writeR8G8B8A8_SNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
390{
391    uint dst = pack_float_to_snorm4x8(color);
392
393    intToBytes(dst, bufferOffset, buffer);
394}
395
396// RGB8_SNORM
397static inline float4 readR8G8B8_SNORM(COMMON_READ_FUNC_PARAMS)
398{
399    float4 color;
400    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
401    color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
402    color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]);
403    color.a = 1.0;
404
405    return color;
406}
407static inline void writeR8G8B8_SNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
408{
409    uint dst = pack_float_to_snorm4x8(color);
410
411    int24bitToBytes(dst, bufferOffset, buffer);
412}
413
414// RGBA8
415static inline float4 readR8G8B8A8_UNORM(COMMON_READ_FUNC_PARAMS)
416{
417    return readR8G8B8A8(FORWARD_COMMON_READ_FUNC_PARAMS, false);
418}
419static inline void writeR8G8B8A8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
420{
421    return writeR8G8B8A8(FORWARD_COMMON_WRITE_FUNC_PARAMS, false);
422}
423
424static inline float4 readR8G8B8A8_UNORM_SRGB(COMMON_READ_FUNC_PARAMS)
425{
426    return readR8G8B8A8(FORWARD_COMMON_READ_FUNC_PARAMS, true);
427}
428static inline void writeR8G8B8A8_UNORM_SRGB(COMMON_WRITE_FLOAT_FUNC_PARAMS)
429{
430    return writeR8G8B8A8(FORWARD_COMMON_WRITE_FUNC_PARAMS, true);
431}
432
433// BGRA8
434static inline float4 readB8G8R8A8_UNORM(COMMON_READ_FUNC_PARAMS)
435{
436    return readR8G8B8A8(FORWARD_COMMON_READ_FUNC_PARAMS, false).bgra;
437}
438static inline void writeB8G8R8A8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
439{
440    color.rgba = color.bgra;
441    return writeR8G8B8A8(FORWARD_COMMON_WRITE_FUNC_PARAMS, false);
442}
443
444static inline float4 readB8G8R8A8_UNORM_SRGB(COMMON_READ_FUNC_PARAMS)
445{
446    return readR8G8B8A8(FORWARD_COMMON_READ_FUNC_PARAMS, true).bgra;
447}
448static inline void writeB8G8R8A8_UNORM_SRGB(COMMON_WRITE_FLOAT_FUNC_PARAMS)
449{
450    color.rgba = color.bgra;
451    return writeR8G8B8A8(FORWARD_COMMON_WRITE_FUNC_PARAMS, true);
452}
453
454// RGB8
455static inline float4 readR8G8B8_UNORM(COMMON_READ_FUNC_PARAMS)
456{
457    return readR8G8B8(FORWARD_COMMON_READ_FUNC_PARAMS, false);
458}
459static inline void writeR8G8B8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
460{
461    return writeR8G8B8(FORWARD_COMMON_WRITE_FUNC_PARAMS, false);
462}
463
464static inline float4 readR8G8B8_UNORM_SRGB(COMMON_READ_FUNC_PARAMS)
465{
466    return readR8G8B8(FORWARD_COMMON_READ_FUNC_PARAMS, true);
467}
468static inline void writeR8G8B8_UNORM_SRGB(COMMON_WRITE_FLOAT_FUNC_PARAMS)
469{
470    return writeR8G8B8(FORWARD_COMMON_WRITE_FUNC_PARAMS, true);
471}
472
473// L8
474static inline float4 readL8_UNORM(COMMON_READ_FUNC_PARAMS)
475{
476    float4 color;
477    color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
478    color.a   = 1.0;
479    return color;
480}
481static inline void writeL8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
482{
483    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
484}
485
486// A8
487static inline void writeA8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
488{
489    buffer[bufferOffset] = floatToNormalized<uchar>(color.a);
490}
491
492// L8A8
493static inline float4 readL8A8_UNORM(COMMON_READ_FUNC_PARAMS)
494{
495    float4 color;
496    color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
497    color.a   = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
498    return color;
499}
500static inline void writeL8A8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
501{
502    buffer[bufferOffset]     = floatToNormalized<uchar>(color.r);
503    buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a);
504}
505
506// R8
507static inline float4 readR8_UNORM(COMMON_READ_FUNC_PARAMS)
508{
509    float4 color;
510    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
511    color.g = color.b = 0.0;
512    color.a           = 1.0;
513    return color;
514}
515static inline void writeR8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
516{
517    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
518}
519
520static inline float4 readR8_SNORM(COMMON_READ_FUNC_PARAMS)
521{
522    float4 color;
523    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
524    color.g = color.b = 0.0;
525    color.a           = 1.0;
526    return color;
527}
528static inline void writeR8_SNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
529{
530    buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
531}
532
533// R8_SINT
534static inline int4 readR8_SINT(COMMON_READ_FUNC_PARAMS)
535{
536    int4 color;
537    color.r = as_type<char>(buffer[bufferOffset]);
538    color.g = color.b = 0;
539    color.a           = 1;
540    return color;
541}
542static inline void writeR8_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
543{
544    buffer[bufferOffset] = static_cast<uchar>(color.r);
545}
546
547// R8_UINT
548static inline uint4 readR8_UINT(COMMON_READ_FUNC_PARAMS)
549{
550    uint4 color;
551    color.r = as_type<uchar>(buffer[bufferOffset]);
552    color.g = color.b = 0;
553    color.a           = 1;
554    return color;
555}
556static inline void writeR8_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
557{
558    buffer[bufferOffset] = static_cast<uchar>(color.r);
559}
560
561// R8G8
562static inline float4 readR8G8_UNORM(COMMON_READ_FUNC_PARAMS)
563{
564    float4 color;
565    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
566    color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
567    color.b = 0.0;
568    color.a = 1.0;
569    return color;
570}
571static inline void writeR8G8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
572{
573    buffer[bufferOffset]     = floatToNormalized<uchar>(color.r);
574    buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g);
575}
576
577static inline float4 readR8G8_SNORM(COMMON_READ_FUNC_PARAMS)
578{
579    float4 color;
580    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
581    color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
582    color.b = 0.0;
583    color.a = 1.0;
584    return color;
585}
586static inline void writeR8G8_SNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
587{
588    buffer[bufferOffset]     = as_type<uchar>(floatToNormalized<7, char>(color.r));
589    buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<7, char>(color.g));
590}
591
592// RG8_SINT
593static inline int4 readR8G8_SINT(COMMON_READ_FUNC_PARAMS)
594{
595    int4 color;
596    color.r = as_type<char>(buffer[bufferOffset]);
597    color.g = as_type<char>(buffer[bufferOffset + 1]);
598    color.b = 0;
599    color.a = 1;
600    return color;
601}
602static inline void writeR8G8_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
603{
604    buffer[bufferOffset]     = static_cast<uchar>(color.r);
605    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
606}
607
608// RG8_UINT
609static inline uint4 readR8G8_UINT(COMMON_READ_FUNC_PARAMS)
610{
611    uint4 color;
612    color.r = as_type<uchar>(buffer[bufferOffset]);
613    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
614    color.b = 0;
615    color.a = 1;
616    return color;
617}
618static inline void writeR8G8_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
619{
620    buffer[bufferOffset]     = static_cast<uchar>(color.r);
621    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
622}
623
624// R8G8B8_SINT
625static inline int4 readR8G8B8_SINT(COMMON_READ_FUNC_PARAMS)
626{
627    int4 color;
628    color.r = as_type<char>(buffer[bufferOffset]);
629    color.g = as_type<char>(buffer[bufferOffset + 1]);
630    color.b = as_type<char>(buffer[bufferOffset + 2]);
631    color.a = 1;
632    return color;
633}
634
635// R8G8B8_UINT
636static inline uint4 readR8G8B8_UINT(COMMON_READ_FUNC_PARAMS)
637{
638    uint4 color;
639    color.r = as_type<uchar>(buffer[bufferOffset]);
640    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
641    color.b = as_type<uchar>(buffer[bufferOffset + 2]);
642    color.a = 1;
643    return color;
644}
645
646// R8G8G8A8_SINT
647static inline int4 readR8G8B8A8_SINT(COMMON_READ_FUNC_PARAMS)
648{
649    int4 color;
650    color.r = as_type<char>(buffer[bufferOffset]);
651    color.g = as_type<char>(buffer[bufferOffset + 1]);
652    color.b = as_type<char>(buffer[bufferOffset + 2]);
653    color.a = as_type<char>(buffer[bufferOffset + 3]);
654    return color;
655}
656static inline void writeR8G8B8A8_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
657{
658    buffer[bufferOffset]     = static_cast<uchar>(color.r);
659    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
660    buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
661    buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
662}
663
664// R8G8G8A8_UINT
665static inline uint4 readR8G8B8A8_UINT(COMMON_READ_FUNC_PARAMS)
666{
667    uint4 color;
668    color.r = as_type<uchar>(buffer[bufferOffset]);
669    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
670    color.b = as_type<uchar>(buffer[bufferOffset + 2]);
671    color.a = as_type<uchar>(buffer[bufferOffset + 3]);
672    return color;
673}
674static inline void writeR8G8B8A8_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
675{
676    buffer[bufferOffset]     = static_cast<uchar>(color.r);
677    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
678    buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
679    buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
680}
681
682// R16_FLOAT
683static inline float4 readR16_FLOAT(COMMON_READ_FUNC_PARAMS)
684{
685    float4 color;
686    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
687    color.g = color.b = 0.0;
688    color.a           = 1.0;
689    return color;
690}
691static inline void writeR16_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
692{
693    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
694}
695// R16_NORM
696template <typename ShortType>
697static inline float4 readR16_NORM(COMMON_READ_FUNC_PARAMS)
698{
699    float4 color;
700    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
701    color.g = color.b = 0.0;
702    color.a           = 1.0;
703    return color;
704}
705#define readR16_SNORM readR16_NORM<short>
706#define readR16_UNORM readR16_NORM<ushort>
707
708// R16_SINT
709static inline int4 readR16_SINT(COMMON_READ_FUNC_PARAMS)
710{
711    int4 color;
712    color.r = bytesToShort<short>(buffer, bufferOffset);
713    color.g = color.b = 0;
714    color.a           = 1;
715    return color;
716}
717static inline void writeR16_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
718{
719    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
720}
721
722// R16_UINT
723static inline uint4 readR16_UINT(COMMON_READ_FUNC_PARAMS)
724{
725    uint4 color;
726    color.r = bytesToShort<ushort>(buffer, bufferOffset);
727    color.g = color.b = 0;
728    color.a           = 1;
729    return color;
730}
731static inline void writeR16_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
732{
733    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
734}
735
736// A16_FLOAT
737static inline float4 readA16_FLOAT(COMMON_READ_FUNC_PARAMS)
738{
739    float4 color;
740    color.a   = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
741    color.rgb = 0.0;
742    return color;
743}
744static inline void writeA16_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
745{
746    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer);
747}
748
749// L16_FLOAT
750static inline float4 readL16_FLOAT(COMMON_READ_FUNC_PARAMS)
751{
752    float4 color;
753    color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
754    color.a   = 1.0;
755    return color;
756}
757static inline void writeL16_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
758{
759    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
760}
761
762// L16A16_FLOAT
763static inline float4 readL16A16_FLOAT(COMMON_READ_FUNC_PARAMS)
764{
765    float4 color;
766    color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
767    color.a   = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
768    return color;
769}
770static inline void writeL16A16_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
771{
772    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
773    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer);
774}
775
776// R16G16_FLOAT
777static inline float4 readR16G16_FLOAT(COMMON_READ_FUNC_PARAMS)
778{
779    float4 color;
780    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
781    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
782    color.b = 0.0;
783    color.a = 1.0;
784    return color;
785}
786static inline void writeR16G16_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
787{
788    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
789    shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
790}
791
792// R16G16_NORM
793template <typename ShortType>
794static inline float4 readR16G16_NORM(COMMON_READ_FUNC_PARAMS)
795{
796    float4 color;
797    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
798    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
799    color.b = 0.0;
800    color.a = 1.0;
801    return color;
802}
803#define readR16G16_SNORM readR16G16_NORM<short>
804#define readR16G16_UNORM readR16G16_NORM<ushort>
805
806// R16G16_SINT
807static inline int4 readR16G16_SINT(COMMON_READ_FUNC_PARAMS)
808{
809    int4 color;
810    color.r = bytesToShort<short>(buffer, bufferOffset);
811    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
812    color.b = 0;
813    color.a = 1;
814    return color;
815}
816static inline void writeR16G16_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
817{
818    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
819    shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
820}
821
822// R16G16_UINT
823static inline uint4 readR16G16_UINT(COMMON_READ_FUNC_PARAMS)
824{
825    uint4 color;
826    color.r = bytesToShort<ushort>(buffer, bufferOffset);
827    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
828    color.b = 0;
829    color.a = 1;
830    return color;
831}
832static inline void writeR16G16_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
833{
834    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
835    shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
836}
837
838// R16G16B16_FLOAT
839static inline float4 readR16G16B16_FLOAT(COMMON_READ_FUNC_PARAMS)
840{
841    float4 color;
842    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
843    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
844    color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
845    color.a = 1.0;
846    return color;
847}
848
849// R16G16B16_NORM
850template <typename ShortType>
851static inline float4 readR16G16B16_NORM(COMMON_READ_FUNC_PARAMS)
852{
853    float4 color;
854    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
855    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
856    color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
857    color.a = 1.0;
858    return color;
859}
860#define readR16G16B16_SNORM readR16G16B16_NORM<short>
861#define readR16G16B16_UNORM readR16G16B16_NORM<ushort>
862// R16G16B16_SINT
863static inline int4 readR16G16B16_SINT(COMMON_READ_FUNC_PARAMS)
864{
865    int4 color;
866    color.r = bytesToShort<short>(buffer, bufferOffset);
867    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
868    color.b = bytesToShort<short>(buffer, bufferOffset + 4);
869    color.a = 1;
870    return color;
871}
872
873// R16G16B16_UINT
874static inline uint4 readR16G16B16_UINT(COMMON_READ_FUNC_PARAMS)
875{
876    uint4 color;
877    color.r = bytesToShort<ushort>(buffer, bufferOffset);
878    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
879    color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
880    color.a = 1;
881    return color;
882}
883
884// R16G16B16A16_FLOAT
885static inline float4 readR16G16B16A16_FLOAT(COMMON_READ_FUNC_PARAMS)
886{
887    float4 color;
888    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
889    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
890    color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
891    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6));
892    return color;
893}
894static inline void writeR16G16B16A16_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
895{
896    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
897    shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
898    shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer);
899    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer);
900}
901
902// R16G16B16A16_NORM
903template <typename ShortType>
904static inline float4 readR16G16B16A16_NORM(COMMON_READ_FUNC_PARAMS)
905{
906    float4 color;
907    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
908    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
909    color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
910    color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6));
911    return color;
912}
913#define readR16G16B16A16_SNORM readR16G16B16A16_NORM<short>
914#define readR16G16B16A16_UNORM readR16G16B16A16_NORM<ushort>
915
916// R16G16B16A16_SINT
917static inline int4 readR16G16B16A16_SINT(COMMON_READ_FUNC_PARAMS)
918{
919    int4 color;
920    color.r = bytesToShort<short>(buffer, bufferOffset);
921    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
922    color.b = bytesToShort<short>(buffer, bufferOffset + 4);
923    color.a = bytesToShort<short>(buffer, bufferOffset + 6);
924    return color;
925}
926static inline void writeR16G16B16A16_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
927{
928    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
929    shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
930    shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer);
931    shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer);
932}
933
934// R16G16B16A16_UINT
935static inline uint4 readR16G16B16A16_UINT(COMMON_READ_FUNC_PARAMS)
936{
937    uint4 color;
938    color.r = bytesToShort<ushort>(buffer, bufferOffset);
939    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
940    color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
941    color.a = bytesToShort<ushort>(buffer, bufferOffset + 6);
942    return color;
943}
944static inline void writeR16G16B16A16_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
945{
946    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
947    shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
948    shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer);
949    shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer);
950}
951
952// R32_FLOAT
953static inline float4 readR32_FLOAT(COMMON_READ_FUNC_PARAMS)
954{
955    float4 color;
956    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
957    color.g = color.b = 0.0;
958    color.a           = 1.0;
959    return color;
960}
961static inline void writeR32_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
962{
963    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
964}
965
966// R32_NORM
967template <typename IntType>
968static inline float4 readR32_NORM(COMMON_READ_FUNC_PARAMS)
969{
970    float4 color;
971    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
972    color.g = color.b = 0.0;
973    color.a           = 1.0;
974    return color;
975}
976#define readR32_SNORM readR32_NORM<int>
977#define readR32_UNORM readR32_NORM<uint>
978
979// A32_FLOAT
980static inline float4 readA32_FLOAT(COMMON_READ_FUNC_PARAMS)
981{
982    float4 color;
983    color.a   = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
984    color.rgb = 0.0;
985    return color;
986}
987static inline void writeA32_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
988{
989    intToBytes(as_type<uint>(color.a), bufferOffset, buffer);
990}
991
992// L32_FLOAT
993static inline float4 readL32_FLOAT(COMMON_READ_FUNC_PARAMS)
994{
995    float4 color;
996    color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
997    color.a   = 1.0;
998    return color;
999}
1000static inline void writeL32_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
1001{
1002    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
1003}
1004
1005// R32_SINT
1006static inline int4 readR32_SINT(COMMON_READ_FUNC_PARAMS)
1007{
1008    int4 color;
1009    color.r = bytesToInt<int>(buffer, bufferOffset);
1010    color.g = color.b = 0;
1011    color.a           = 1;
1012    return color;
1013}
1014static inline void writeR32_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
1015{
1016    intToBytes(color.r, bufferOffset, buffer);
1017}
1018
1019// R32_FIXED
1020static inline float4 readR32_FIXED(COMMON_READ_FUNC_PARAMS)
1021{
1022    float4 color;
1023    constexpr float kDivisor = 1.0f / (1 << 16);
1024    color.r                  = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
1025    color.g = color.b = 0.0;
1026    color.a           = 1.0;
1027    return color;
1028}
1029
1030// R32_UINT
1031static inline uint4 readR32_UINT(COMMON_READ_FUNC_PARAMS)
1032{
1033    uint4 color;
1034    color.r = bytesToInt<uint>(buffer, bufferOffset);
1035    color.g = color.b = 0;
1036    color.a           = 1;
1037    return color;
1038}
1039static inline void writeR32_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
1040{
1041    intToBytes(color.r, bufferOffset, buffer);
1042}
1043
1044// L32A32_FLOAT
1045static inline float4 readL32A32_FLOAT(COMMON_READ_FUNC_PARAMS)
1046{
1047    float4 color;
1048    color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
1049    color.a   = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
1050    return color;
1051}
1052static inline void writeL32A32_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
1053{
1054    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
1055    intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer);
1056}
1057
1058// R32G32_FLOAT
1059static inline float4 readR32G32_FLOAT(COMMON_READ_FUNC_PARAMS)
1060{
1061    float4 color;
1062    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
1063    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
1064    color.b = 0.0;
1065    color.a = 1.0;
1066    return color;
1067}
1068static inline void writeR32G32_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
1069{
1070    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
1071    intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
1072}
1073
1074// R32G32_NORM
1075template <typename IntType>
1076static inline float4 readR32G32_NORM(COMMON_READ_FUNC_PARAMS)
1077{
1078    float4 color;
1079    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
1080    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
1081    color.b = 0.0;
1082    color.a = 1.0;
1083    return color;
1084}
1085#define readR32G32_SNORM readR32G32_NORM<int>
1086#define readR32G32_UNORM readR32G32_NORM<uint>
1087
1088// R32G32_SINT
1089static inline int4 readR32G32_SINT(COMMON_READ_FUNC_PARAMS)
1090{
1091    int4 color;
1092    color.r = bytesToInt<int>(buffer, bufferOffset);
1093    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
1094    color.b = 0;
1095    color.a = 1;
1096    return color;
1097}
1098static inline void writeR32G32_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
1099{
1100    intToBytes(color.r, bufferOffset, buffer);
1101    intToBytes(color.g, bufferOffset + 4, buffer);
1102}
1103
1104// R32G32_FIXED
1105static inline float4 readR32G32_FIXED(COMMON_READ_FUNC_PARAMS)
1106{
1107    float4 color;
1108    constexpr float kDivisor = 1.0f / (1 << 16);
1109    color.r                  = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
1110    color.g                  = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
1111    color.b                  = 0.0;
1112    color.a                  = 1.0;
1113    return color;
1114}
1115
1116// R32G32_UINT
1117static inline uint4 readR32G32_UINT(COMMON_READ_FUNC_PARAMS)
1118{
1119    uint4 color;
1120    color.r = bytesToInt<uint>(buffer, bufferOffset);
1121    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
1122    color.b = 0;
1123    color.a = 1;
1124    return color;
1125}
1126static inline void writeR32G32_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
1127{
1128    intToBytes(color.r, bufferOffset, buffer);
1129    intToBytes(color.g, bufferOffset + 4, buffer);
1130}
1131
1132// R32G32B32_FLOAT
1133static inline float4 readR32G32B32_FLOAT(COMMON_READ_FUNC_PARAMS)
1134{
1135    float4 color;
1136    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
1137    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
1138    color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
1139    color.a = 1.0;
1140    return color;
1141}
1142
1143// R32G32B32_NORM
1144template <typename IntType>
1145static inline float4 readR32G32B32_NORM(COMMON_READ_FUNC_PARAMS)
1146{
1147    float4 color;
1148    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
1149    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
1150    color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
1151    color.a = 1.0;
1152    return color;
1153}
1154#define readR32G32B32_SNORM readR32G32B32_NORM<int>
1155#define readR32G32B32_UNORM readR32G32B32_NORM<uint>
1156
1157// R32G32B32_SINT
1158static inline int4 readR32G32B32_SINT(COMMON_READ_FUNC_PARAMS)
1159{
1160    int4 color;
1161    color.r = bytesToInt<int>(buffer, bufferOffset);
1162    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
1163    color.b = bytesToInt<int>(buffer, bufferOffset + 8);
1164    color.a = 1;
1165    return color;
1166}
1167
1168// R32G32B32_FIXED
1169static inline float4 readR32G32B32_FIXED(COMMON_READ_FUNC_PARAMS)
1170{
1171    float4 color;
1172    constexpr float kDivisor = 1.0f / (1 << 16);
1173    color.r                  = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
1174    color.g                  = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
1175    color.b                  = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
1176    color.a                  = 1.0;
1177    return color;
1178}
1179
1180// R32G32B32_UINT
1181static inline uint4 readR32G32B32_UINT(COMMON_READ_FUNC_PARAMS)
1182{
1183    uint4 color;
1184    color.r = bytesToInt<uint>(buffer, bufferOffset);
1185    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
1186    color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
1187    color.a = 1;
1188    return color;
1189}
1190
1191// R32G32B32A32_FLOAT
1192static inline float4 readR32G32B32A32_FLOAT(COMMON_READ_FUNC_PARAMS)
1193{
1194    float4 color;
1195    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
1196    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
1197    color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
1198    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12));
1199    return color;
1200}
1201static inline void writeR32G32B32A32_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
1202{
1203    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
1204    intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
1205    intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer);
1206    intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer);
1207}
1208
1209// R32G32B32A32_NORM
1210template <typename IntType>
1211static inline float4 readR32G32B32A32_NORM(COMMON_READ_FUNC_PARAMS)
1212{
1213    float4 color;
1214    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
1215    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
1216    color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
1217    color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12));
1218    return color;
1219}
1220#define readR32G32B32A32_SNORM readR32G32B32A32_NORM<int>
1221#define readR32G32B32A32_UNORM readR32G32B32A32_NORM<uint>
1222
1223// R32G32B32A32_SINT
1224static inline int4 readR32G32B32A32_SINT(COMMON_READ_FUNC_PARAMS)
1225{
1226    int4 color;
1227    color.r = bytesToInt<int>(buffer, bufferOffset);
1228    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
1229    color.b = bytesToInt<int>(buffer, bufferOffset + 8);
1230    color.a = bytesToInt<int>(buffer, bufferOffset + 12);
1231    return color;
1232}
1233static inline void writeR32G32B32A32_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
1234{
1235    intToBytes(color.r, bufferOffset, buffer);
1236    intToBytes(color.g, bufferOffset + 4, buffer);
1237    intToBytes(color.b, bufferOffset + 8, buffer);
1238    intToBytes(color.a, bufferOffset + 12, buffer);
1239}
1240// R32G32B32A32_FIXED
1241static inline float4 readR32G32B32A32_FIXED(COMMON_READ_FUNC_PARAMS)
1242{
1243    float4 color;
1244    constexpr float kDivisor = 1.0f / (1 << 16);
1245    color.r                  = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
1246    color.g                  = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
1247    color.b                  = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
1248    color.a                  = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor;
1249    return color;
1250}
1251
1252// R32G32B32A32_UINT
1253static inline uint4 readR32G32B32A32_UINT(COMMON_READ_FUNC_PARAMS)
1254{
1255    uint4 color;
1256    color.r = bytesToInt<uint>(buffer, bufferOffset);
1257    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
1258    color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
1259    color.a = bytesToInt<uint>(buffer, bufferOffset + 12);
1260    return color;
1261}
1262static inline void writeR32G32B32A32_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
1263{
1264    intToBytes(color.r, bufferOffset, buffer);
1265    intToBytes(color.g, bufferOffset + 4, buffer);
1266    intToBytes(color.b, bufferOffset + 8, buffer);
1267    intToBytes(color.a, bufferOffset + 12, buffer);
1268}
1269
1270#define ALIAS_READ_SINT_FUNC(FORMAT)                                   \
1271    static inline int4 read##FORMAT##_SSCALED(COMMON_READ_FUNC_PARAMS) \
1272    {                                                                  \
1273        return read##FORMAT##_SINT(FORWARD_COMMON_READ_FUNC_PARAMS);   \
1274    }
1275
1276#define ALIAS_READ_UINT_FUNC(FORMAT)                                    \
1277    static inline uint4 read##FORMAT##_USCALED(COMMON_READ_FUNC_PARAMS) \
1278    {                                                                   \
1279        return read##FORMAT##_UINT(FORWARD_COMMON_READ_FUNC_PARAMS);    \
1280    }
1281
1282#define ALIAS_READ_INT_FUNC(FORMAT) \
1283    ALIAS_READ_SINT_FUNC(FORMAT)    \
1284    ALIAS_READ_UINT_FUNC(FORMAT)
1285
1286#define ALIAS_READ_INT_FUNCS(BITS)                 \
1287    ALIAS_READ_INT_FUNC(R##BITS)                   \
1288    ALIAS_READ_INT_FUNC(R##BITS##G##BITS)          \
1289    ALIAS_READ_INT_FUNC(R##BITS##G##BITS##B##BITS) \
1290    ALIAS_READ_INT_FUNC(R##BITS##G##BITS##B##BITS##A##BITS)
1291
1292ALIAS_READ_INT_FUNCS(8)
1293ALIAS_READ_INT_FUNCS(16)
1294ALIAS_READ_INT_FUNCS(32)
1295
1296ALIAS_READ_INT_FUNC(R10G10B10A2)
1297
1298// Copy pixels from buffer to texture
1299kernel void readFromBufferToFloatTexture(COMMON_READ_KERNEL_PARAMS(float))
1300{
1301    READ_KERNEL_GUARD
1302
1303#define SUPPORTED_FORMATS(PROC) \
1304    PROC(R5G6B5_UNORM)          \
1305    PROC(R8G8B8A8_UNORM)        \
1306    PROC(R8G8B8A8_UNORM_SRGB)   \
1307    PROC(R8G8B8A8_SNORM)        \
1308    PROC(B8G8R8A8_UNORM)        \
1309    PROC(B8G8R8A8_UNORM_SRGB)   \
1310    PROC(R8G8B8_UNORM)          \
1311    PROC(R8G8B8_UNORM_SRGB)     \
1312    PROC(R8G8B8_SNORM)          \
1313    PROC(L8_UNORM)              \
1314    PROC(L8A8_UNORM)            \
1315    PROC(R5G5B5A1_UNORM)        \
1316    PROC(R4G4B4A4_UNORM)        \
1317    PROC(R8_UNORM)              \
1318    PROC(R8_SNORM)              \
1319    PROC(R8G8_UNORM)            \
1320    PROC(R8G8_SNORM)            \
1321    PROC(R16_FLOAT)             \
1322    PROC(A16_FLOAT)             \
1323    PROC(L16_FLOAT)             \
1324    PROC(L16A16_FLOAT)          \
1325    PROC(R16G16_FLOAT)          \
1326    PROC(R16G16B16_FLOAT)       \
1327    PROC(R16G16B16A16_FLOAT)    \
1328    PROC(R32_FLOAT)             \
1329    PROC(A32_FLOAT)             \
1330    PROC(L32_FLOAT)             \
1331    PROC(L32A32_FLOAT)          \
1332    PROC(R32G32_FLOAT)          \
1333    PROC(R32G32B32_FLOAT)       \
1334    PROC(R32G32B32A32_FLOAT)
1335
1336    uint bufferOffset = CALC_BUFFER_READ_OFFSET(options.pixelSize);
1337
1338    switch (kCopyFormatType)
1339    {
1340        SUPPORTED_FORMATS(READ_FORMAT_SWITCH_CASE)
1341    }
1342
1343#undef SUPPORTED_FORMATS
1344}
1345
1346kernel void readFromBufferToIntTexture(COMMON_READ_KERNEL_PARAMS(int))
1347{
1348    READ_KERNEL_GUARD
1349
1350#define SUPPORTED_FORMATS(PROC) \
1351    PROC(R8_SINT)               \
1352    PROC(R8G8_SINT)             \
1353    PROC(R8G8B8_SINT)           \
1354    PROC(R8G8B8A8_SINT)         \
1355    PROC(R16_SINT)              \
1356    PROC(R16G16_SINT)           \
1357    PROC(R16G16B16_SINT)        \
1358    PROC(R16G16B16A16_SINT)     \
1359    PROC(R32_SINT)              \
1360    PROC(R32G32_SINT)           \
1361    PROC(R32G32B32_SINT)        \
1362    PROC(R32G32B32A32_SINT)
1363
1364    uint bufferOffset = CALC_BUFFER_READ_OFFSET(options.pixelSize);
1365
1366    switch (kCopyFormatType)
1367    {
1368        SUPPORTED_FORMATS(READ_FORMAT_SWITCH_CASE)
1369    }
1370
1371#undef SUPPORTED_FORMATS
1372}
1373
1374kernel void readFromBufferToUIntTexture(COMMON_READ_KERNEL_PARAMS(uint))
1375{
1376    READ_KERNEL_GUARD
1377
1378#define SUPPORTED_FORMATS(PROC) \
1379    PROC(R8_UINT)               \
1380    PROC(R8G8_UINT)             \
1381    PROC(R8G8B8_UINT)           \
1382    PROC(R8G8B8A8_UINT)         \
1383    PROC(R16_UINT)              \
1384    PROC(R16G16_UINT)           \
1385    PROC(R16G16B16_UINT)        \
1386    PROC(R16G16B16A16_UINT)     \
1387    PROC(R32_UINT)              \
1388    PROC(R32G32_UINT)           \
1389    PROC(R32G32B32_UINT)        \
1390    PROC(R32G32B32A32_UINT)
1391
1392    uint bufferOffset = CALC_BUFFER_READ_OFFSET(options.pixelSize);
1393
1394    switch (kCopyFormatType)
1395    {
1396        SUPPORTED_FORMATS(READ_FORMAT_SWITCH_CASE)
1397    }
1398
1399#undef SUPPORTED_FORMATS
1400}
1401
1402// Copy pixels from texture to buffer
1403kernel void writeFromFloatTextureToBuffer(COMMON_WRITE_KERNEL_PARAMS(float))
1404{
1405    WRITE_KERNEL_GUARD
1406
1407#define SUPPORTED_FORMATS(PROC) \
1408    PROC(R5G6B5_UNORM)          \
1409    PROC(R8G8B8A8_UNORM)        \
1410    PROC(R8G8B8A8_UNORM_SRGB)   \
1411    PROC(R8G8B8A8_SNORM)        \
1412    PROC(B8G8R8A8_UNORM)        \
1413    PROC(B8G8R8A8_UNORM_SRGB)   \
1414    PROC(R8G8B8_UNORM)          \
1415    PROC(R8G8B8_UNORM_SRGB)     \
1416    PROC(R8G8B8_SNORM)          \
1417    PROC(L8_UNORM)              \
1418    PROC(A8_UNORM)              \
1419    PROC(L8A8_UNORM)            \
1420    PROC(R5G5B5A1_UNORM)        \
1421    PROC(R4G4B4A4_UNORM)        \
1422    PROC(R8_UNORM)              \
1423    PROC(R8_SNORM)              \
1424    PROC(R8G8_UNORM)            \
1425    PROC(R8G8_SNORM)            \
1426    PROC(R16_FLOAT)             \
1427    PROC(A16_FLOAT)             \
1428    PROC(L16_FLOAT)             \
1429    PROC(L16A16_FLOAT)          \
1430    PROC(R16G16_FLOAT)          \
1431    PROC(R16G16B16A16_FLOAT)    \
1432    PROC(R32_FLOAT)             \
1433    PROC(A32_FLOAT)             \
1434    PROC(L32_FLOAT)             \
1435    PROC(L32A32_FLOAT)          \
1436    PROC(R32G32_FLOAT)          \
1437    PROC(R32G32B32A32_FLOAT)
1438
1439    uint bufferOffset = CALC_BUFFER_WRITE_OFFSET(options.pixelSize);
1440
1441    switch (kCopyFormatType)
1442    {
1443        SUPPORTED_FORMATS(WRITE_FORMAT_SWITCH_CASE)
1444    }
1445
1446#undef SUPPORTED_FORMATS
1447}
1448
1449kernel void writeFromIntTextureToBuffer(COMMON_WRITE_KERNEL_PARAMS(int))
1450{
1451    WRITE_KERNEL_GUARD
1452
1453#define SUPPORTED_FORMATS(PROC) \
1454    PROC(R8_SINT)               \
1455    PROC(R8G8_SINT)             \
1456    PROC(R8G8B8A8_SINT)         \
1457    PROC(R16_SINT)              \
1458    PROC(R16G16_SINT)           \
1459    PROC(R16G16B16A16_SINT)     \
1460    PROC(R32_SINT)              \
1461    PROC(R32G32_SINT)           \
1462    PROC(R32G32B32A32_SINT)
1463
1464    uint bufferOffset = CALC_BUFFER_WRITE_OFFSET(options.pixelSize);
1465
1466    switch (kCopyFormatType)
1467    {
1468        SUPPORTED_FORMATS(WRITE_FORMAT_SWITCH_CASE)
1469    }
1470
1471#undef SUPPORTED_FORMATS
1472}
1473
1474kernel void writeFromUIntTextureToBuffer(COMMON_WRITE_KERNEL_PARAMS(uint))
1475{
1476    WRITE_KERNEL_GUARD
1477
1478#define SUPPORTED_FORMATS(PROC) \
1479    PROC(R8_UINT)               \
1480    PROC(R8G8_UINT)             \
1481    PROC(R8G8B8A8_UINT)         \
1482    PROC(R16_UINT)              \
1483    PROC(R16G16_UINT)           \
1484    PROC(R16G16B16A16_UINT)     \
1485    PROC(R32_UINT)              \
1486    PROC(R32G32_UINT)           \
1487    PROC(R32G32B32A32_UINT)
1488
1489    uint bufferOffset = CALC_BUFFER_WRITE_OFFSET(options.pixelSize);
1490
1491    switch (kCopyFormatType)
1492    {
1493        SUPPORTED_FORMATS(WRITE_FORMAT_SWITCH_CASE)
1494    }
1495
1496#undef SUPPORTED_FORMATS
1497}
1498
1499/** -----  vertex format conversion --------*/
1500struct CopyVertexParams
1501{
1502    uint srcBufferStartOffset;
1503    uint srcStride;
1504    uint srcComponentBytes;  // unused when convert to float
1505    uint srcComponents;      // unused when convert to float
1506    // Default source alpha when expanding the number of components.
1507    // if source has less than 32 bits per component, only those bits are usable in
1508    // srcDefaultAlpha
1509    uchar4 srcDefaultAlphaData;  // unused when convert to float
1510
1511    uint dstBufferStartOffset;
1512    uint dstStride;
1513    uint dstComponents;
1514
1515    uint vertexCount;
1516};
1517
1518#define INT_FORMAT_PROC(FORMAT, PROC) \
1519    PROC(FORMAT##_UNORM)              \
1520    PROC(FORMAT##_SNORM)              \
1521    PROC(FORMAT##_UINT)               \
1522    PROC(FORMAT##_SINT)               \
1523    PROC(FORMAT##_USCALED)            \
1524    PROC(FORMAT##_SSCALED)
1525
1526#define PURE_INT_FORMAT_PROC(FORMAT, PROC) \
1527    PROC(FORMAT##_UINT)                    \
1528    PROC(FORMAT##_SINT)
1529
1530#define FLOAT_FORMAT_PROC(FORMAT, PROC) PROC(FORMAT##_FLOAT)
1531#define FIXED_FORMAT_PROC(FORMAT, PROC) PROC(FORMAT##_FIXED)
1532
1533#define FORMAT_BITS_PROC(BITS, PROC1, PROC2) \
1534    PROC1(R##BITS, PROC2)                    \
1535    PROC1(R##BITS##G##BITS, PROC2)           \
1536    PROC1(R##BITS##G##BITS##B##BITS, PROC2)  \
1537    PROC1(R##BITS##G##BITS##B##BITS##A##BITS, PROC2)
1538
1539template <typename IntType>
1540static inline void writeFloatVertex(constant CopyVertexParams &options,
1541                                    uint idx,
1542                                    vec<IntType, 4> data,
1543                                    device uchar *dst)
1544{
1545    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
1546
1547    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
1548    {
1549        floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
1550    }
1551}
1552
1553template <>
1554inline void writeFloatVertex(constant CopyVertexParams &options,
1555                             uint idx,
1556                             vec<float, 4> data,
1557                             device uchar *dst)
1558{
1559    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
1560
1561    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
1562    {
1563        floatToBytes(data[component], dstOffset, dst);
1564    }
1565}
1566
1567// Function to convert from any vertex format to float vertex format
1568static inline void convertToFloatVertexFormat(uint index,
1569                                              constant CopyVertexParams &options,
1570                                              constant uchar *srcBuffer,
1571                                              device uchar *dstBuffer)
1572{
1573#define SUPPORTED_FORMATS(PROC)                   \
1574    FORMAT_BITS_PROC(8, INT_FORMAT_PROC, PROC)    \
1575    FORMAT_BITS_PROC(16, INT_FORMAT_PROC, PROC)   \
1576    FORMAT_BITS_PROC(32, INT_FORMAT_PROC, PROC)   \
1577    FORMAT_BITS_PROC(16, FLOAT_FORMAT_PROC, PROC) \
1578    FORMAT_BITS_PROC(32, FLOAT_FORMAT_PROC, PROC) \
1579    FORMAT_BITS_PROC(32, FIXED_FORMAT_PROC, PROC) \
1580    PROC(R10G10B10A2_SINT)                        \
1581    PROC(R10G10B10A2_UINT)                        \
1582    PROC(R10G10B10A2_SSCALED)                     \
1583    PROC(R10G10B10A2_USCALED)
1584
1585    uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
1586
1587#define COMVERT_FLOAT_VERTEX_SWITCH_CASE(FORMAT)           \
1588    case FormatID::FORMAT: {                               \
1589        auto data = read##FORMAT(bufferOffset, srcBuffer); \
1590        writeFloatVertex(options, index, data, dstBuffer); \
1591    }                                                      \
1592    break;
1593
1594    switch (kCopyFormatType)
1595    {
1596        SUPPORTED_FORMATS(COMVERT_FLOAT_VERTEX_SWITCH_CASE)
1597    }
1598
1599#undef SUPPORTED_FORMATS
1600}
1601
1602// Kernel to convert from any vertex format to float vertex format
1603kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
1604                                         constant CopyVertexParams &options [[buffer(0)]],
1605                                         constant uchar *srcBuffer [[buffer(1)]],
1606                                         device uchar *dstBuffer [[buffer(2)]])
1607{
1608    ANGLE_KERNEL_GUARD(index, options.vertexCount);
1609    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
1610}
1611
1612// Vertex shader to convert from any vertex format to float vertex format
1613vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
1614                                         constant CopyVertexParams &options [[buffer(0)]],
1615                                         constant uchar *srcBuffer [[buffer(1)]],
1616                                         device uchar *dstBuffer [[buffer(2)]])
1617{
1618    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
1619}
1620
1621// Function to expand (or just simply copy) the components of the vertex
1622static inline void expandVertexFormatComponents(uint index,
1623                                                constant CopyVertexParams &options,
1624                                                constant uchar *srcBuffer,
1625                                                device uchar *dstBuffer)
1626{
1627    uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
1628    uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;
1629
1630    uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
1631    uint component;
1632    for (component = 0; component < options.srcComponents; ++component,
1633        srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
1634    {
1635        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
1636        {
1637            dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
1638        }
1639    }
1640
1641    for (; component < dstComponentsBeforeAlpha;
1642         ++component, dstOffset += options.srcComponentBytes)
1643    {
1644        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
1645        {
1646            dstBuffer[dstOffset + byte] = 0;
1647        }
1648    }
1649
1650    if (component < options.dstComponents)
1651    {
1652        // Last alpha component
1653        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
1654        {
1655            dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
1656        }
1657    }
1658}
1659
1660// Kernel to expand (or just simply copy) the components of the vertex
1661kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
1662                                           constant CopyVertexParams &options [[buffer(0)]],
1663                                           constant uchar *srcBuffer [[buffer(1)]],
1664                                           device uchar *dstBuffer [[buffer(2)]])
1665{
1666    ANGLE_KERNEL_GUARD(index, options.vertexCount);
1667
1668    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
1669}
1670
1671// Vertex shader to expand (or just simply copy) the components of the vertex
1672vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
1673                                           constant CopyVertexParams &options [[buffer(0)]],
1674                                           constant uchar *srcBuffer [[buffer(1)]],
1675                                           device uchar *dstBuffer [[buffer(2)]])
1676{
1677    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
1678}
1679