1struct Types
2{
3	uint4 u;
4	int4 i;
5	float4 f;
6	double4 d;
7};
8
9RWStructuredBuffer<Types> data;
10
11[numthreads(32, 16, 1)]
12void CSMain(uint3 dti : SV_DispatchThreadID)
13{
14	data[dti.x].u = WaveActiveSum(data[dti.x].u);
15	data[dti.x].u.x = WaveActiveSum(data[dti.x].u.x);
16	data[dti.x].u.xy = WaveActiveSum(data[dti.x].u.xy);
17	data[dti.x].u.xyz = WaveActiveSum(data[dti.x].u.xyz);
18
19	data[dti.x].i = WaveActiveSum(data[dti.x].i);
20	data[dti.x].i.x = WaveActiveSum(data[dti.x].i.x);
21	data[dti.x].i.xy = WaveActiveSum(data[dti.x].i.xy);
22	data[dti.x].i.xyz = WaveActiveSum(data[dti.x].i.xyz);
23
24	data[dti.x].f = WaveActiveSum(data[dti.x].f);
25	data[dti.x].f.x = WaveActiveSum(data[dti.x].f.x);
26	data[dti.x].f.xy = WaveActiveSum(data[dti.x].f.xy);
27	data[dti.x].f.xyz = WaveActiveSum(data[dti.x].f.xyz);
28
29	data[dti.x].d = WaveActiveSum(data[dti.x].d);
30	data[dti.x].d.x = WaveActiveSum(data[dti.x].d.x);
31	data[dti.x].d.xy = WaveActiveSum(data[dti.x].d.xy);
32	data[dti.x].d.xyz = WaveActiveSum(data[dti.x].d.xyz);
33
34	data[dti.x].u = WaveActiveProduct(data[dti.x].u);
35	data[dti.x].u.x = WaveActiveProduct(data[dti.x].u.x);
36	data[dti.x].u.xy = WaveActiveProduct(data[dti.x].u.xy);
37	data[dti.x].u.xyz = WaveActiveProduct(data[dti.x].u.xyz);
38
39	data[dti.x].i = WaveActiveProduct(data[dti.x].i);
40	data[dti.x].i.x = WaveActiveProduct(data[dti.x].i.x);
41	data[dti.x].i.xy = WaveActiveProduct(data[dti.x].i.xy);
42	data[dti.x].i.xyz = WaveActiveProduct(data[dti.x].i.xyz);
43
44	data[dti.x].f = WaveActiveProduct(data[dti.x].f);
45	data[dti.x].f.x = WaveActiveProduct(data[dti.x].f.x);
46	data[dti.x].f.xy = WaveActiveProduct(data[dti.x].f.xy);
47	data[dti.x].f.xyz = WaveActiveProduct(data[dti.x].f.xyz);
48
49	data[dti.x].d = WaveActiveProduct(data[dti.x].d);
50	data[dti.x].d.x = WaveActiveProduct(data[dti.x].d.x);
51	data[dti.x].d.xy = WaveActiveProduct(data[dti.x].d.xy);
52	data[dti.x].d.xyz = WaveActiveProduct(data[dti.x].d.xyz);
53
54	data[dti.x].u = WaveActiveMin(data[dti.x].u);
55	data[dti.x].u.x = WaveActiveMin(data[dti.x].u.x);
56	data[dti.x].u.xy = WaveActiveMin(data[dti.x].u.xy);
57	data[dti.x].u.xyz = WaveActiveMin(data[dti.x].u.xyz);
58
59	data[dti.x].i = WaveActiveMin(data[dti.x].i);
60	data[dti.x].i.x = WaveActiveMin(data[dti.x].i.x);
61	data[dti.x].i.xy = WaveActiveMin(data[dti.x].i.xy);
62	data[dti.x].i.xyz = WaveActiveMin(data[dti.x].i.xyz);
63
64	data[dti.x].f = WaveActiveMin(data[dti.x].f);
65	data[dti.x].f.x = WaveActiveMin(data[dti.x].f.x);
66	data[dti.x].f.xy = WaveActiveMin(data[dti.x].f.xy);
67	data[dti.x].f.xyz = WaveActiveMin(data[dti.x].f.xyz);
68
69	data[dti.x].d = WaveActiveMin(data[dti.x].d);
70	data[dti.x].d.x = WaveActiveMin(data[dti.x].d.x);
71	data[dti.x].d.xy = WaveActiveMin(data[dti.x].d.xy);
72	data[dti.x].d.xyz = WaveActiveMin(data[dti.x].d.xyz);
73
74	data[dti.x].u = WaveActiveMax(data[dti.x].u);
75	data[dti.x].u.x = WaveActiveMax(data[dti.x].u.x);
76	data[dti.x].u.xy = WaveActiveMax(data[dti.x].u.xy);
77	data[dti.x].u.xyz = WaveActiveMax(data[dti.x].u.xyz);
78
79	data[dti.x].i = WaveActiveMax(data[dti.x].i);
80	data[dti.x].i.x = WaveActiveMax(data[dti.x].i.x);
81	data[dti.x].i.xy = WaveActiveMax(data[dti.x].i.xy);
82	data[dti.x].i.xyz = WaveActiveMax(data[dti.x].i.xyz);
83
84	data[dti.x].f = WaveActiveMax(data[dti.x].f);
85	data[dti.x].f.x = WaveActiveMax(data[dti.x].f.x);
86	data[dti.x].f.xy = WaveActiveMax(data[dti.x].f.xy);
87	data[dti.x].f.xyz = WaveActiveMax(data[dti.x].f.xyz);
88
89	data[dti.x].d = WaveActiveMax(data[dti.x].d);
90	data[dti.x].d.x = WaveActiveMax(data[dti.x].d.x);
91	data[dti.x].d.xy = WaveActiveMax(data[dti.x].d.xy);
92	data[dti.x].d.xyz = WaveActiveMax(data[dti.x].d.xyz);
93
94	data[dti.x].u = WaveActiveBitAnd(data[dti.x].u);
95	data[dti.x].u.x = WaveActiveBitAnd(data[dti.x].u.x);
96	data[dti.x].u.xy = WaveActiveBitAnd(data[dti.x].u.xy);
97	data[dti.x].u.xyz = WaveActiveBitAnd(data[dti.x].u.xyz);
98
99	data[dti.x].i = WaveActiveBitAnd(data[dti.x].i);
100	data[dti.x].i.x = WaveActiveBitAnd(data[dti.x].i.x);
101	data[dti.x].i.xy = WaveActiveBitAnd(data[dti.x].i.xy);
102	data[dti.x].i.xyz = WaveActiveBitAnd(data[dti.x].i.xyz);
103
104	data[dti.x].u = WaveActiveBitOr(data[dti.x].u);
105	data[dti.x].u.x = WaveActiveBitOr(data[dti.x].u.x);
106	data[dti.x].u.xy = WaveActiveBitOr(data[dti.x].u.xy);
107	data[dti.x].u.xyz = WaveActiveBitOr(data[dti.x].u.xyz);
108
109	data[dti.x].i = WaveActiveBitOr(data[dti.x].i);
110	data[dti.x].i.x = WaveActiveBitOr(data[dti.x].i.x);
111	data[dti.x].i.xy = WaveActiveBitOr(data[dti.x].i.xy);
112	data[dti.x].i.xyz = WaveActiveBitOr(data[dti.x].i.xyz);
113
114	data[dti.x].u = WaveActiveBitXor(data[dti.x].u);
115	data[dti.x].u.x = WaveActiveBitXor(data[dti.x].u.x);
116	data[dti.x].u.xy = WaveActiveBitXor(data[dti.x].u.xy);
117	data[dti.x].u.xyz = WaveActiveBitXor(data[dti.x].u.xyz);
118
119	data[dti.x].i = WaveActiveBitXor(data[dti.x].i);
120	data[dti.x].i.x = WaveActiveBitXor(data[dti.x].i.x);
121	data[dti.x].i.xy = WaveActiveBitXor(data[dti.x].i.xy);
122	data[dti.x].i.xyz = WaveActiveBitXor(data[dti.x].i.xyz);
123
124	data[dti.x].u.x = WaveActiveCountBits(data[dti.x].u.x == 0);
125}
126