1//
2// Copyright 2019 The ANGLE Project. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5//
6// blit.metal: Implements blitting texture content to current frame buffer.
7
8#include "common.h"
9
10using namespace rx::mtl_shader;
11
12// function_constant(0) is already used by common.h
13constant bool kPremultiplyAlpha [[function_constant(1)]];
14constant bool kUnmultiplyAlpha [[function_constant(2)]];
15constant int kSourceTextureType [[function_constant(3)]];   // Source color/depth texture type.
16constant int kSourceTexture2Type [[function_constant(4)]];  // Source stencil texture type.
17
18constant bool kSourceTextureType2D      = kSourceTextureType == kTextureType2D;
19constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray;
20constant bool kSourceTextureType2DMS    = kSourceTextureType == kTextureType2DMultisample;
21constant bool kSourceTextureTypeCube    = kSourceTextureType == kTextureTypeCube;
22constant bool kSourceTextureType3D      = kSourceTextureType == kTextureType3D;
23
24constant bool kSourceTexture2Type2D      = kSourceTexture2Type == kTextureType2D;
25constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray;
26constant bool kSourceTexture2Type2DMS    = kSourceTexture2Type == kTextureType2DMultisample;
27constant bool kSourceTexture2TypeCube    = kSourceTexture2Type == kTextureTypeCube;
28
29struct BlitParams
30{
31    // 0: lower left, 1: lower right, 2: upper left
32    float2 srcTexCoords[3];
33    int srcLevel;  // Source texture level.
34    int srcLayer;  // Source texture layer.
35    bool dstFlipViewportX;
36    bool dstFlipViewportY;
37    bool dstLuminance;  // destination texture is luminance. Unused by depth & stencil blitting.
38    uint8_t padding[13];
39};
40
41struct BlitVSOut
42{
43    float4 position [[position]];
44    float2 texCoords [[user(locn1)]];
45};
46
47vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]])
48{
49    BlitVSOut output;
50    output.position  = float4(gCorners[vid], 0.0, 1.0);
51    output.texCoords = options.srcTexCoords[vid];
52
53    if (options.dstFlipViewportX)
54    {
55        output.position.x = -output.position.x;
56    }
57    if (!options.dstFlipViewportY)
58    {
59        // If viewport is not flipped, we have to flip Y in normalized device coordinates.
60        // Since NDC has Y is opposite direction of viewport coodrinates.
61        output.position.y = -output.position.y;
62    }
63
64    return output;
65}
66
67template <typename SrcTexture2d>
68static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords)
69{
70    uint2 dimens(srcTexture.get_width(), srcTexture.get_height());
71    uint2 coords = uint2(texCoords * float2(dimens));
72
73    return coords;
74}
75
76template <typename T>
77static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords)
78{
79    uint2 coords = getImageCoords(srcTexture, texCoords);
80    return resolveTextureMS(srcTexture, coords);
81}
82
83template <typename T>
84static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture,
85                                            sampler textureSampler,
86                                            float2 texCoords,
87                                            constant BlitParams &options)
88{
89    uint depth   = srcTexture.get_depth(options.srcLevel);
90    float zCoord = (float(options.srcLayer) + 0.5) / float(depth);
91
92    return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel));
93}
94
95// clang-format off
96#define BLIT_COLOR_FS_PARAMS(TYPE)                                                               \
97    BlitVSOut input [[stage_in]],                                                                \
98    texture2d<TYPE> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]],        \
99    texture2d_array<TYPE> srcTexture2dArray                                                      \
100    [[texture(0), function_constant(kSourceTextureType2DArray)]],                                \
101    texture2d_ms<TYPE> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], \
102    texturecube<TYPE> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]],  \
103    texture3d<TYPE> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]],        \
104    sampler textureSampler [[sampler(0)]],                                                       \
105    constant BlitParams &options [[buffer(0)]]
106// clang-format on
107
108#define FORWARD_BLIT_COLOR_FS_PARAMS                                                      \
109    input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, \
110        textureSampler, options
111
112template <typename T>
113static inline vec<T, 4> blitReadTexture(BLIT_COLOR_FS_PARAMS(T))
114{
115    vec<T, 4> output;
116
117    switch (kSourceTextureType)
118    {
119        case kTextureType2D:
120            output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel));
121            break;
122        case kTextureType2DArray:
123            output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer,
124                                              level(options.srcLevel));
125            break;
126        case kTextureType2DMultisample:
127            output = blitSampleTextureMS(srcTexture2dMS, input.texCoords);
128            break;
129        case kTextureTypeCube:
130            output = srcTextureCube.sample(textureSampler,
131                                           cubeTexcoords(input.texCoords, options.srcLayer),
132                                           level(options.srcLevel));
133            break;
134        case kTextureType3D:
135            output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options);
136            break;
137    }
138
139    if (kPremultiplyAlpha)
140    {
141        output.xyz *= output.a;
142    }
143    else if (kUnmultiplyAlpha)
144    {
145        if (output.a != 0.0)
146        {
147            output.xyz /= output.a;
148        }
149    }
150
151    if (options.dstLuminance)
152    {
153        output.g = output.b = output.r;
154    }
155
156    return output;
157}
158
159template <typename T>
160static inline MultipleColorOutputs<T> blitFS(BLIT_COLOR_FS_PARAMS(T))
161{
162    vec<T, 4> output = blitReadTexture(FORWARD_BLIT_COLOR_FS_PARAMS);
163
164    return toMultipleColorOutputs(output);
165}
166
167fragment MultipleColorOutputs<float> blitFloatFS(BLIT_COLOR_FS_PARAMS(float))
168{
169    return blitFS(FORWARD_BLIT_COLOR_FS_PARAMS);
170}
171fragment MultipleColorOutputs<int> blitIntFS(BLIT_COLOR_FS_PARAMS(int))
172{
173    return blitFS(FORWARD_BLIT_COLOR_FS_PARAMS);
174}
175fragment MultipleColorOutputs<uint> blitUIntFS(BLIT_COLOR_FS_PARAMS(uint))
176{
177    return blitFS(FORWARD_BLIT_COLOR_FS_PARAMS);
178}
179
180fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BLIT_COLOR_FS_PARAMS(float))
181{
182    float4 inputColor = blitReadTexture<>(FORWARD_BLIT_COLOR_FS_PARAMS);
183    uint4 output = uint4(inputColor * float4(255.0));
184
185    return toMultipleColorOutputs(output);
186}
187
188// Depth & stencil blitting.
189struct FragmentDepthOut
190{
191    float depth [[depth(any)]];
192};
193
194static inline float sampleDepth(
195    texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]],
196    texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]],
197    texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]],
198    texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]],
199    float2 texCoords,
200    constant BlitParams &options)
201{
202    float4 output;
203
204    constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
205
206    switch (kSourceTextureType)
207    {
208        case kTextureType2D:
209            output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel));
210            break;
211        case kTextureType2DArray:
212            output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer,
213                                              level(options.srcLevel));
214            break;
215        case kTextureType2DMultisample:
216            // Always use sample 0 for depth resolve:
217            output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
218            break;
219        case kTextureTypeCube:
220            output =
221                srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer),
222                                      level(options.srcLevel));
223            break;
224    }
225
226    return output.r;
227}
228
229fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]],
230                                      texture2d<float> srcTexture2d
231                                      [[texture(0), function_constant(kSourceTextureType2D)]],
232                                      texture2d_array<float> srcTexture2dArray
233                                      [[texture(0), function_constant(kSourceTextureType2DArray)]],
234                                      texture2d_ms<float> srcTexture2dMS
235                                      [[texture(0), function_constant(kSourceTextureType2DMS)]],
236                                      texturecube<float> srcTextureCube
237                                      [[texture(0), function_constant(kSourceTextureTypeCube)]],
238                                      constant BlitParams &options [[buffer(0)]])
239{
240    FragmentDepthOut re;
241
242    re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
243                           input.texCoords, options);
244
245    return re;
246}
247
248static inline uint32_t sampleStencil(
249    texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]],
250    texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]],
251    texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]],
252    texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]],
253    float2 texCoords,
254    int srcLevel,
255    int srcLayer)
256{
257    uint4 output;
258    constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
259
260    switch (kSourceTexture2Type)
261    {
262        case kTextureType2D:
263            output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel));
264            break;
265        case kTextureType2DArray:
266            output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel));
267            break;
268        case kTextureType2DMultisample:
269            // Always use sample 0 for stencil resolve:
270            output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
271            break;
272        case kTextureTypeCube:
273            output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer),
274                                           level(srcLevel));
275            break;
276    }
277
278    return output.r;
279}
280
281// Write stencil to a buffer
282struct BlitStencilToBufferParams
283{
284    float2 srcStartTexCoords;
285    float2 srcTexCoordSteps;
286    int srcLevel;
287    int srcLayer;
288
289    uint2 dstSize;
290    uint dstBufferRowPitch;
291    // Is multisample resolve needed?
292    bool resolveMS;
293};
294
295kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]],
296                                  texture2d<uint32_t> srcTexture2d
297                                  [[texture(1), function_constant(kSourceTexture2Type2D)]],
298                                  texture2d_array<uint32_t> srcTexture2dArray
299                                  [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
300                                  texture2d_ms<uint32_t> srcTexture2dMS
301                                  [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
302                                  texturecube<uint32_t> srcTextureCube
303                                  [[texture(1), function_constant(kSourceTexture2TypeCube)]],
304                                  constant BlitStencilToBufferParams &options [[buffer(0)]],
305                                  device uchar *buffer [[buffer(1)]])
306{
307    if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y)
308    {
309        return;
310    }
311
312    float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps;
313
314    if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS)
315    {
316        uint samples      = srcTexture2dMS.get_num_samples();
317        uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords);
318        uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x;
319
320        for (uint sample = 0; sample < samples; ++sample)
321        {
322            uint stencilPerSample         = srcTexture2dMS.read(imageCoords, sample).r;
323            buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample);
324        }
325    }
326    else
327    {
328        uint32_t stencil =
329            sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
330                          srcTexCoords, options.srcLevel, options.srcLayer);
331
332        buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil);
333    }
334}
335
336// Fragment's stencil output is only available since Metal 2.1
337@@#if __METAL_VERSION__ >= 210
338
339struct FragmentStencilOut
340{
341    uint32_t stencil [[stencil]];
342};
343
344struct FragmentDepthStencilOut
345{
346    float depth [[depth(any)]];
347    uint32_t stencil [[stencil]];
348};
349
350fragment FragmentStencilOut blitStencilFS(
351    BlitVSOut input [[stage_in]],
352    texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]],
353    texture2d_array<uint32_t> srcTexture2dArray
354    [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
355    texture2d_ms<uint32_t> srcTexture2dMS
356    [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
357    texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]],
358    constant BlitParams &options [[buffer(0)]])
359{
360    FragmentStencilOut re;
361
362    re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
363                               input.texCoords, options.srcLevel, options.srcLayer);
364
365    return re;
366}
367
368fragment FragmentDepthStencilOut blitDepthStencilFS(
369    BlitVSOut input [[stage_in]],
370    // Source depth texture
371    texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]],
372    texture2d_array<float> srcDepthTexture2dArray
373    [[texture(0), function_constant(kSourceTextureType2DArray)]],
374    texture2d_ms<float> srcDepthTexture2dMS
375    [[texture(0), function_constant(kSourceTextureType2DMS)]],
376    texturecube<float> srcDepthTextureCube
377    [[texture(0), function_constant(kSourceTextureTypeCube)]],
378
379    // Source stencil texture
380    texture2d<uint32_t> srcStencilTexture2d
381    [[texture(1), function_constant(kSourceTexture2Type2D)]],
382    texture2d_array<uint32_t> srcStencilTexture2dArray
383    [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
384    texture2d_ms<uint32_t> srcStencilTexture2dMS
385    [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
386    texturecube<uint32_t> srcStencilTextureCube
387    [[texture(1), function_constant(kSourceTexture2TypeCube)]],
388
389    constant BlitParams &options [[buffer(0)]])
390{
391    FragmentDepthStencilOut re;
392
393    re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS,
394                           srcDepthTextureCube, input.texCoords, options);
395    re.stencil =
396        sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS,
397                      srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer);
398    return re;
399}
400@@#endif  // __METAL_VERSION__ >= 210
401