1//
2// Copyright 2019 The ANGLE Project. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5//
6
7#include "common.h"
8
9using namespace rx::mtl_shader;
10
11// function_constant(0) is already used by common.h
12constant bool kSourceBufferAligned[[function_constant(100)]];
13constant bool kSourceIndexIsU8[[function_constant(200)]];
14constant bool kSourceIndexIsU16[[function_constant(300)]];
15constant bool kSourceIndexIsU32[[function_constant(400)]];
16constant bool kSourceBufferUnaligned = !kSourceBufferAligned;
17constant bool kUseSourceBufferU8     = kSourceIndexIsU8 || kSourceBufferUnaligned;
18constant bool kUseSourceBufferU16    = kSourceIndexIsU16 && kSourceBufferAligned;
19constant bool kUseSourceBufferU32    = kSourceIndexIsU32 && kSourceBufferAligned;
20
21struct IndexConversionParams
22{
23    uint32_t srcOffset;  // offset in bytes
24    uint32_t indexCount;
25    bool primitiveRestartEnabled;
26};
27
28#define ANGLE_IDX_CONVERSION_GUARD(IDX, OPTS) ANGLE_KERNEL_GUARD(IDX, OPTS.indexCount)
29
30inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx)
31{
32    return inputAligned[offset / 2 + idx];
33}
34inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx)
35{
36    return inputAligned[offset / 4 + idx];
37}
38inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx)
39{
40    return input[offset + idx];
41}
42inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx)
43{
44    ushort inputLo = input[offset + 2 * idx];
45    ushort inputHi = input[offset + 2 * idx + 1];
46    // Little endian conversion:
47    return inputLo | (inputHi << 8);
48}
49inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx)
50{
51    uint input0 = input[offset + 4 * idx];
52    uint input1 = input[offset + 4 * idx + 1];
53    uint input2 = input[offset + 4 * idx + 2];
54    uint input3 = input[offset + 4 * idx + 3];
55    // Little endian conversion:
56    return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
57}
58
59kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]],
60                                constant IndexConversionParams &options [[buffer(0)]],
61                                constant uchar *input [[buffer(1)]],
62                                device ushort *output [[buffer(2)]])
63{
64    ANGLE_IDX_CONVERSION_GUARD(idx, options);
65
66    uchar value = getIndexAligned(input, options.srcOffset, idx);
67
68    if (options.primitiveRestartEnabled && value == 0xff)
69    {
70        output[idx] = 0xffff;
71    }
72    else
73    {
74        output[idx] = value;
75    }
76}
77
78kernel void convertIndexU16(uint idx [[thread_position_in_grid]],
79                            constant IndexConversionParams &options [[buffer(0)]],
80                            constant uchar *input
81                            [[buffer(1), function_constant(kSourceBufferUnaligned)]],
82                            constant ushort *inputAligned
83                            [[buffer(1), function_constant(kSourceBufferAligned)]],
84                            device ushort *output [[buffer(2)]])
85{
86    ANGLE_IDX_CONVERSION_GUARD(idx, options);
87
88    ushort value;
89    if (kSourceBufferAligned)
90    {
91        value = getIndexAligned(inputAligned, options.srcOffset, idx);
92    }
93    else
94    {
95        value = getIndexUnalignedU16(input, options.srcOffset, idx);
96    }
97    output[idx] = value;
98}
99
100kernel void convertIndexU32(uint idx [[thread_position_in_grid]],
101                            constant IndexConversionParams &options [[buffer(0)]],
102                            constant uchar *input
103                            [[buffer(1), function_constant(kSourceBufferUnaligned)]],
104                            constant uint *inputAligned
105                            [[buffer(1), function_constant(kSourceBufferAligned)]],
106                            device uint *output [[buffer(2)]])
107{
108    ANGLE_IDX_CONVERSION_GUARD(idx, options);
109
110    uint value;
111    if (kSourceBufferAligned)
112    {
113        value = getIndexAligned(inputAligned, options.srcOffset, idx);
114    }
115    else
116    {
117        value = getIndexUnalignedU32(input, options.srcOffset, idx);
118    }
119    output[idx] = value;
120}
121
122struct IndexFromArrayParams
123{
124    uint firstVertex;
125    // For triangle fan: vertex count excluding the 1st & 2nd vertices.
126    uint vertexCount;
127};
128
129// Generate triangle fan indices for glDrawArray()
130kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]],
131                                      constant IndexFromArrayParams &options [[buffer(0)]],
132                                      device uint *output [[buffer(2)]])
133{
134    ANGLE_KERNEL_GUARD(idx, options.vertexCount);
135
136    uint vertexIdx = options.firstVertex + 2 + idx;
137
138    output[3 * idx]     = options.firstVertex;
139    output[3 * idx + 1] = vertexIdx - 1;
140    output[3 * idx + 2] = vertexIdx;
141}
142
143inline uint getIndexU32(uint offset,
144                        uint idx,
145                        constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]],
146                        constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]],
147                        constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]])
148{
149    if (kUseSourceBufferU8)
150    {
151        if (kSourceIndexIsU16)
152        {
153            return getIndexUnalignedU16(inputU8, offset, idx);
154        }
155        else if (kSourceIndexIsU32)
156        {
157            return getIndexUnalignedU32(inputU8, offset, idx);
158        }
159        return getIndexAligned(inputU8, offset, idx);
160    }
161    else if (kUseSourceBufferU16)
162    {
163        return getIndexAligned(inputU16, offset, idx);
164    }
165    else if (kUseSourceBufferU32)
166    {
167        return getIndexAligned(inputU32, offset, idx);
168    }
169    return 0;
170}
171
172// NOTE(hqle): triangle fan indices generation doesn't support primitive restart.
173// Generate triangle fan indices from an indices buffer. indexCount options indicates number
174// of indices starting from the 3rd.
175kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]],
176                                         constant IndexConversionParams &options [[buffer(0)]],
177                                         constant uchar *inputU8
178                                         [[buffer(1), function_constant(kUseSourceBufferU8)]],
179                                         constant ushort *inputU16
180                                         [[buffer(1), function_constant(kUseSourceBufferU16)]],
181                                         constant uint *inputU32
182                                         [[buffer(1), function_constant(kUseSourceBufferU32)]],
183                                         device uint *output [[buffer(2)]])
184{
185    ANGLE_IDX_CONVERSION_GUARD(idx, options);
186
187    uint elemIdx = 2 + idx;
188
189    output[3 * idx]     = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32);
190    output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32);
191    output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32);
192}
193
194// Generate line loop indices for glDrawArray()
195kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]],
196                                        constant IndexFromArrayParams &options [[buffer(0)]],
197                                        device uint *output [[buffer(2)]])
198{
199    uint totalIndices = options.vertexCount + 1;
200    ANGLE_KERNEL_GUARD(idx, totalIndices);
201
202    output[idx] = options.firstVertex + idx % options.vertexCount;
203}
204
205// NOTE(hqle): lineloop indices generation doesn't support primitive restart.
206// Generate line loop indices for glDrawElements()
207kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]],
208                                           constant IndexConversionParams &options [[buffer(0)]],
209                                           constant uchar *inputU8
210                                           [[buffer(1), function_constant(kUseSourceBufferU8)]],
211                                           constant ushort *inputU16
212                                           [[buffer(1), function_constant(kUseSourceBufferU16)]],
213                                           constant uint *inputU32
214                                           [[buffer(1), function_constant(kUseSourceBufferU32)]],
215                                           device uint *output [[buffer(2)]])
216{
217    uint totalTargetIndices = options.indexCount + 1;
218    ANGLE_KERNEL_GUARD(idx, totalTargetIndices);
219
220    output[idx] =
221        getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32);
222}