1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "SpirvShader.hpp"
16 
17 #include "ShaderCore.hpp"
18 #include "Device/Primitive.hpp"
19 #include "Pipeline/Constants.hpp"
20 
21 #include <spirv/unified1/GLSL.std.450.h>
22 #include <spirv/unified1/spirv.hpp>
23 
24 namespace {
25 constexpr float PI = 3.141592653589793f;
26 
Interpolate(const sw::SIMD::Float & x,const sw::SIMD::Float & y,const sw::SIMD::Float & rhw,const sw::SIMD::Float & A,const sw::SIMD::Float & B,const sw::SIMD::Float & C,bool flat,bool perspective)27 sw::SIMD::Float Interpolate(const sw::SIMD::Float &x, const sw::SIMD::Float &y, const sw::SIMD::Float &rhw,
28                             const sw::SIMD::Float &A, const sw::SIMD::Float &B, const sw::SIMD::Float &C,
29                             bool flat, bool perspective)
30 {
31 	sw::SIMD::Float interpolant = C;
32 
33 	if(!flat)
34 	{
35 		interpolant += x * A + y * B;
36 
37 		if(perspective)
38 		{
39 			interpolant *= rhw;
40 		}
41 	}
42 
43 	return interpolant;
44 }
45 
46 // TODO(b/179925303): Eliminate when interpolants are tightly packed.
ComputeInterpolantOffset(uint32_t offset,uint32_t components_per_row,bool useArrayOffset)47 uint32_t ComputeInterpolantOffset(uint32_t offset, uint32_t components_per_row, bool useArrayOffset)
48 {
49 	if(useArrayOffset)
50 	{
51 		uint32_t interpolant_offset = offset / components_per_row;
52 		offset = (interpolant_offset * 4) + (offset - interpolant_offset * components_per_row);
53 	}
54 	return offset;
55 }
56 
ComputeInterpolantOffset(rr::Int offset,uint32_t components_per_row,bool useArrayOffset)57 rr::Int ComputeInterpolantOffset(rr::Int offset, uint32_t components_per_row, bool useArrayOffset)
58 {
59 	if(useArrayOffset)
60 	{
61 		rr::Int interpolant_offset = offset / rr::Int(components_per_row);
62 		offset = (interpolant_offset << 2) + (offset - interpolant_offset * rr::Int(components_per_row));
63 	}
64 	return offset;
65 }
66 
67 }  // namespace
68 
69 namespace sw {
70 
EmitExtGLSLstd450(InsnIterator insn,EmitState * state) const71 SpirvShader::EmitResult SpirvShader::EmitExtGLSLstd450(InsnIterator insn, EmitState *state) const
72 {
73 	auto &type = getType(insn.resultTypeId());
74 	auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
75 	auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
76 
77 	switch(extInstIndex)
78 	{
79 		case GLSLstd450FAbs:
80 		{
81 			auto src = Operand(this, state, insn.word(5));
82 			for(auto i = 0u; i < type.componentCount; i++)
83 			{
84 				dst.move(i, Abs(src.Float(i)));
85 			}
86 			break;
87 		}
88 		case GLSLstd450SAbs:
89 		{
90 			auto src = Operand(this, state, insn.word(5));
91 			for(auto i = 0u; i < type.componentCount; i++)
92 			{
93 				dst.move(i, Abs(src.Int(i)));
94 			}
95 			break;
96 		}
97 		case GLSLstd450Cross:
98 		{
99 			auto lhs = Operand(this, state, insn.word(5));
100 			auto rhs = Operand(this, state, insn.word(6));
101 			dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
102 			dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
103 			dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
104 			break;
105 		}
106 		case GLSLstd450Floor:
107 		{
108 			auto src = Operand(this, state, insn.word(5));
109 			for(auto i = 0u; i < type.componentCount; i++)
110 			{
111 				dst.move(i, Floor(src.Float(i)));
112 			}
113 			break;
114 		}
115 		case GLSLstd450Trunc:
116 		{
117 			auto src = Operand(this, state, insn.word(5));
118 			for(auto i = 0u; i < type.componentCount; i++)
119 			{
120 				dst.move(i, Trunc(src.Float(i)));
121 			}
122 			break;
123 		}
124 		case GLSLstd450Ceil:
125 		{
126 			auto src = Operand(this, state, insn.word(5));
127 			for(auto i = 0u; i < type.componentCount; i++)
128 			{
129 				dst.move(i, Ceil(src.Float(i)));
130 			}
131 			break;
132 		}
133 		case GLSLstd450Fract:
134 		{
135 			auto src = Operand(this, state, insn.word(5));
136 			for(auto i = 0u; i < type.componentCount; i++)
137 			{
138 				dst.move(i, Frac(src.Float(i)));
139 			}
140 			break;
141 		}
142 		case GLSLstd450Round:
143 		{
144 			auto src = Operand(this, state, insn.word(5));
145 			for(auto i = 0u; i < type.componentCount; i++)
146 			{
147 				dst.move(i, Round(src.Float(i)));
148 			}
149 			break;
150 		}
151 		case GLSLstd450RoundEven:
152 		{
153 			auto src = Operand(this, state, insn.word(5));
154 			for(auto i = 0u; i < type.componentCount; i++)
155 			{
156 				auto x = Round(src.Float(i));
157 				// dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
158 				dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
159 				                    SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
160 			}
161 			break;
162 		}
163 		case GLSLstd450FMin:
164 		{
165 			auto lhs = Operand(this, state, insn.word(5));
166 			auto rhs = Operand(this, state, insn.word(6));
167 			for(auto i = 0u; i < type.componentCount; i++)
168 			{
169 				dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
170 			}
171 			break;
172 		}
173 		case GLSLstd450FMax:
174 		{
175 			auto lhs = Operand(this, state, insn.word(5));
176 			auto rhs = Operand(this, state, insn.word(6));
177 			for(auto i = 0u; i < type.componentCount; i++)
178 			{
179 				dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
180 			}
181 			break;
182 		}
183 		case GLSLstd450SMin:
184 		{
185 			auto lhs = Operand(this, state, insn.word(5));
186 			auto rhs = Operand(this, state, insn.word(6));
187 			for(auto i = 0u; i < type.componentCount; i++)
188 			{
189 				dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
190 			}
191 			break;
192 		}
193 		case GLSLstd450SMax:
194 		{
195 			auto lhs = Operand(this, state, insn.word(5));
196 			auto rhs = Operand(this, state, insn.word(6));
197 			for(auto i = 0u; i < type.componentCount; i++)
198 			{
199 				dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
200 			}
201 			break;
202 		}
203 		case GLSLstd450UMin:
204 		{
205 			auto lhs = Operand(this, state, insn.word(5));
206 			auto rhs = Operand(this, state, insn.word(6));
207 			for(auto i = 0u; i < type.componentCount; i++)
208 			{
209 				dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
210 			}
211 			break;
212 		}
213 		case GLSLstd450UMax:
214 		{
215 			auto lhs = Operand(this, state, insn.word(5));
216 			auto rhs = Operand(this, state, insn.word(6));
217 			for(auto i = 0u; i < type.componentCount; i++)
218 			{
219 				dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
220 			}
221 			break;
222 		}
223 		case GLSLstd450Step:
224 		{
225 			auto edge = Operand(this, state, insn.word(5));
226 			auto x = Operand(this, state, insn.word(6));
227 			for(auto i = 0u; i < type.componentCount; i++)
228 			{
229 				dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
230 			}
231 			break;
232 		}
233 		case GLSLstd450SmoothStep:
234 		{
235 			auto edge0 = Operand(this, state, insn.word(5));
236 			auto edge1 = Operand(this, state, insn.word(6));
237 			auto x = Operand(this, state, insn.word(7));
238 			for(auto i = 0u; i < type.componentCount; i++)
239 			{
240 				auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
241 				                      (edge1.Float(i) - edge0.Float(i)),
242 				                  SIMD::Float(0.0f)),
243 				              SIMD::Float(1.0f));
244 				dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
245 			}
246 			break;
247 		}
248 		case GLSLstd450FMix:
249 		{
250 			auto x = Operand(this, state, insn.word(5));
251 			auto y = Operand(this, state, insn.word(6));
252 			auto a = Operand(this, state, insn.word(7));
253 			for(auto i = 0u; i < type.componentCount; i++)
254 			{
255 				dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
256 			}
257 			break;
258 		}
259 		case GLSLstd450FClamp:
260 		{
261 			auto x = Operand(this, state, insn.word(5));
262 			auto minVal = Operand(this, state, insn.word(6));
263 			auto maxVal = Operand(this, state, insn.word(7));
264 			for(auto i = 0u; i < type.componentCount; i++)
265 			{
266 				dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
267 			}
268 			break;
269 		}
270 		case GLSLstd450SClamp:
271 		{
272 			auto x = Operand(this, state, insn.word(5));
273 			auto minVal = Operand(this, state, insn.word(6));
274 			auto maxVal = Operand(this, state, insn.word(7));
275 			for(auto i = 0u; i < type.componentCount; i++)
276 			{
277 				dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
278 			}
279 			break;
280 		}
281 		case GLSLstd450UClamp:
282 		{
283 			auto x = Operand(this, state, insn.word(5));
284 			auto minVal = Operand(this, state, insn.word(6));
285 			auto maxVal = Operand(this, state, insn.word(7));
286 			for(auto i = 0u; i < type.componentCount; i++)
287 			{
288 				dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
289 			}
290 			break;
291 		}
292 		case GLSLstd450FSign:
293 		{
294 			auto src = Operand(this, state, insn.word(5));
295 			for(auto i = 0u; i < type.componentCount; i++)
296 			{
297 				auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
298 				auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
299 				dst.move(i, neg | pos);
300 			}
301 			break;
302 		}
303 		case GLSLstd450SSign:
304 		{
305 			auto src = Operand(this, state, insn.word(5));
306 			for(auto i = 0u; i < type.componentCount; i++)
307 			{
308 				auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
309 				auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
310 				dst.move(i, neg | pos);
311 			}
312 			break;
313 		}
314 		case GLSLstd450Reflect:
315 		{
316 			auto I = Operand(this, state, insn.word(5));
317 			auto N = Operand(this, state, insn.word(6));
318 
319 			SIMD::Float d = Dot(type.componentCount, I, N);
320 
321 			for(auto i = 0u; i < type.componentCount; i++)
322 			{
323 				dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
324 			}
325 			break;
326 		}
327 		case GLSLstd450Refract:
328 		{
329 			auto I = Operand(this, state, insn.word(5));
330 			auto N = Operand(this, state, insn.word(6));
331 			auto eta = Operand(this, state, insn.word(7));
332 
333 			SIMD::Float d = Dot(type.componentCount, I, N);
334 			SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
335 			SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
336 			SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
337 
338 			for(auto i = 0u; i < type.componentCount; i++)
339 			{
340 				dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
341 			}
342 			break;
343 		}
344 		case GLSLstd450FaceForward:
345 		{
346 			auto N = Operand(this, state, insn.word(5));
347 			auto I = Operand(this, state, insn.word(6));
348 			auto Nref = Operand(this, state, insn.word(7));
349 
350 			SIMD::Float d = Dot(type.componentCount, I, Nref);
351 			SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
352 
353 			for(auto i = 0u; i < type.componentCount; i++)
354 			{
355 				auto n = N.Float(i);
356 				dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
357 			}
358 			break;
359 		}
360 		case GLSLstd450Length:
361 		{
362 			auto x = Operand(this, state, insn.word(5));
363 			SIMD::Float d = Dot(getType(getObject(insn.word(5))).componentCount, x, x);
364 
365 			dst.move(0, Sqrt(d));
366 			break;
367 		}
368 		case GLSLstd450Normalize:
369 		{
370 			auto x = Operand(this, state, insn.word(5));
371 			SIMD::Float d = Dot(getType(getObject(insn.word(5))).componentCount, x, x);
372 			SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
373 
374 			for(auto i = 0u; i < type.componentCount; i++)
375 			{
376 				dst.move(i, invLength * x.Float(i));
377 			}
378 			break;
379 		}
380 		case GLSLstd450Distance:
381 		{
382 			auto p0 = Operand(this, state, insn.word(5));
383 			auto p1 = Operand(this, state, insn.word(6));
384 
385 			// sqrt(dot(p0-p1, p0-p1))
386 			SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
387 
388 			for(auto i = 1u; i < p0.componentCount; i++)
389 			{
390 				d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
391 			}
392 
393 			dst.move(0, Sqrt(d));
394 			break;
395 		}
396 		case GLSLstd450Modf:
397 		{
398 			auto val = Operand(this, state, insn.word(5));
399 			auto ptrId = Object::ID(insn.word(6));
400 
401 			Intermediate whole(type.componentCount);
402 
403 			for(auto i = 0u; i < type.componentCount; i++)
404 			{
405 				auto wholeAndFrac = Modf(val.Float(i));
406 				dst.move(i, wholeAndFrac.second);
407 				whole.move(i, wholeAndFrac.first);
408 			}
409 
410 			Store(ptrId, whole, false, std::memory_order_relaxed, state);
411 			break;
412 		}
413 		case GLSLstd450ModfStruct:
414 		{
415 			auto val = Operand(this, state, insn.word(5));
416 
417 			for(auto i = 0u; i < val.componentCount; i++)
418 			{
419 				auto wholeAndFrac = Modf(val.Float(i));
420 				dst.move(i, wholeAndFrac.second);
421 				dst.move(val.componentCount + i, wholeAndFrac.first);
422 			}
423 			break;
424 		}
425 		case GLSLstd450PackSnorm4x8:
426 		{
427 			auto val = Operand(this, state, insn.word(5));
428 			dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
429 			             SIMD::Int(0xFF)) |
430 			                ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
431 			                  SIMD::Int(0xFF))
432 			                 << 8) |
433 			                ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
434 			                  SIMD::Int(0xFF))
435 			                 << 16) |
436 			                ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
437 			                  SIMD::Int(0xFF))
438 			                 << 24));
439 			break;
440 		}
441 		case GLSLstd450PackUnorm4x8:
442 		{
443 			auto val = Operand(this, state, insn.word(5));
444 			dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
445 			                ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
446 			                ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
447 			                ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24));
448 			break;
449 		}
450 		case GLSLstd450PackSnorm2x16:
451 		{
452 			auto val = Operand(this, state, insn.word(5));
453 			dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
454 			             SIMD::Int(0xFFFF)) |
455 			                ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
456 			                  SIMD::Int(0xFFFF))
457 			                 << 16));
458 			break;
459 		}
460 		case GLSLstd450PackUnorm2x16:
461 		{
462 			auto val = Operand(this, state, insn.word(5));
463 			dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
464 			             SIMD::UInt(0xFFFF)) |
465 			                ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
466 			                  SIMD::UInt(0xFFFF))
467 			                 << 16));
468 			break;
469 		}
470 		case GLSLstd450PackHalf2x16:
471 		{
472 			auto val = Operand(this, state, insn.word(5));
473 			dst.move(0, floatToHalfBits(val.UInt(0), false) | floatToHalfBits(val.UInt(1), true));
474 			break;
475 		}
476 		case GLSLstd450UnpackSnorm4x8:
477 		{
478 			auto val = Operand(this, state, insn.word(5));
479 			dst.move(0, Min(Max(SIMD::Float(((val.Int(0) << 24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
480 			dst.move(1, Min(Max(SIMD::Float(((val.Int(0) << 16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
481 			dst.move(2, Min(Max(SIMD::Float(((val.Int(0) << 8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
482 			dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
483 			break;
484 		}
485 		case GLSLstd450UnpackUnorm4x8:
486 		{
487 			auto val = Operand(this, state, insn.word(5));
488 			dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
489 			dst.move(1, SIMD::Float(((val.UInt(0) >> 8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
490 			dst.move(2, SIMD::Float(((val.UInt(0) >> 16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
491 			dst.move(3, SIMD::Float(((val.UInt(0) >> 24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
492 			break;
493 		}
494 		case GLSLstd450UnpackSnorm2x16:
495 		{
496 			auto val = Operand(this, state, insn.word(5));
497 			// clamp(f / 32767.0, -1.0, 1.0)
498 			dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) *
499 			                        SIMD::Float(1.0f / float(0x7FFF0000)),
500 			                    SIMD::Float(-1.0f)),
501 			                SIMD::Float(1.0f)));
502 			dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)),
503 			                    SIMD::Float(-1.0f)),
504 			                SIMD::Float(1.0f)));
505 			break;
506 		}
507 		case GLSLstd450UnpackUnorm2x16:
508 		{
509 			auto val = Operand(this, state, insn.word(5));
510 			// f / 65535.0
511 			dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000)));
512 			dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000)));
513 			break;
514 		}
515 		case GLSLstd450UnpackHalf2x16:
516 		{
517 			auto val = Operand(this, state, insn.word(5));
518 			dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
519 			dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
520 			break;
521 		}
522 		case GLSLstd450Fma:
523 		{
524 			auto a = Operand(this, state, insn.word(5));
525 			auto b = Operand(this, state, insn.word(6));
526 			auto c = Operand(this, state, insn.word(7));
527 			for(auto i = 0u; i < type.componentCount; i++)
528 			{
529 				dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i)));
530 			}
531 			break;
532 		}
533 		case GLSLstd450Frexp:
534 		{
535 			auto val = Operand(this, state, insn.word(5));
536 			auto ptrId = Object::ID(insn.word(6));
537 
538 			Intermediate exp(type.componentCount);
539 
540 			for(auto i = 0u; i < type.componentCount; i++)
541 			{
542 				auto significandAndExponent = Frexp(val.Float(i));
543 				dst.move(i, significandAndExponent.first);
544 				exp.move(i, significandAndExponent.second);
545 			}
546 
547 			Store(ptrId, exp, false, std::memory_order_relaxed, state);
548 			break;
549 		}
550 		case GLSLstd450FrexpStruct:
551 		{
552 			auto val = Operand(this, state, insn.word(5));
553 
554 			for(auto i = 0u; i < val.componentCount; i++)
555 			{
556 				auto significandAndExponent = Frexp(val.Float(i));
557 				dst.move(i, significandAndExponent.first);
558 				dst.move(val.componentCount + i, significandAndExponent.second);
559 			}
560 			break;
561 		}
562 		case GLSLstd450Ldexp:
563 		{
564 			auto significand = Operand(this, state, insn.word(5));
565 			auto exponent = Operand(this, state, insn.word(6));
566 			for(auto i = 0u; i < type.componentCount; i++)
567 			{
568 				// Assumes IEEE 754
569 				auto in = significand.Float(i);
570 				auto significandExponent = Exponent(in);
571 				auto combinedExponent = exponent.Int(i) + significandExponent;
572 				auto isSignificandZero = SIMD::UInt(CmpEQ(significand.Int(i), SIMD::Int(0)));
573 				auto isSignificandInf = SIMD::UInt(IsInf(in));
574 				auto isSignificandNaN = SIMD::UInt(IsNan(in));
575 				auto isExponentNotTooSmall = SIMD::UInt(CmpGE(combinedExponent, SIMD::Int(-126)));
576 				auto isExponentNotTooLarge = SIMD::UInt(CmpLE(combinedExponent, SIMD::Int(128)));
577 				auto isExponentInBounds = isExponentNotTooSmall & isExponentNotTooLarge;
578 
579 				SIMD::UInt v;
580 				v = significand.UInt(i) & SIMD::UInt(0x7FFFFF);                          // Add significand.
581 				v |= (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23));  // Add exponent.
582 				v &= isExponentInBounds;                                                 // Clear v if the exponent is OOB.
583 
584 				v |= significand.UInt(i) & SIMD::UInt(0x80000000);     // Add sign bit.
585 				v |= ~isExponentNotTooLarge & SIMD::UInt(0x7F800000);  // Mark as inf if the exponent is too great.
586 
587 				// If the input significand is zero, inf or nan, just return the
588 				// input significand.
589 				auto passthrough = isSignificandZero | isSignificandInf | isSignificandNaN;
590 				v = (v & ~passthrough) | (significand.UInt(i) & passthrough);
591 
592 				dst.move(i, As<SIMD::Float>(v));
593 			}
594 			break;
595 		}
596 		case GLSLstd450Radians:
597 		{
598 			auto degrees = Operand(this, state, insn.word(5));
599 			for(auto i = 0u; i < type.componentCount; i++)
600 			{
601 				dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f));
602 			}
603 			break;
604 		}
605 		case GLSLstd450Degrees:
606 		{
607 			auto radians = Operand(this, state, insn.word(5));
608 			for(auto i = 0u; i < type.componentCount; i++)
609 			{
610 				dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI));
611 			}
612 			break;
613 		}
614 		case GLSLstd450Sin:
615 		{
616 			auto radians = Operand(this, state, insn.word(5));
617 			for(auto i = 0u; i < type.componentCount; i++)
618 			{
619 				dst.move(i, Sin(radians.Float(i)));
620 			}
621 			break;
622 		}
623 		case GLSLstd450Cos:
624 		{
625 			auto radians = Operand(this, state, insn.word(5));
626 			for(auto i = 0u; i < type.componentCount; i++)
627 			{
628 				dst.move(i, Cos(radians.Float(i)));
629 			}
630 			break;
631 		}
632 		case GLSLstd450Tan:
633 		{
634 			auto radians = Operand(this, state, insn.word(5));
635 			for(auto i = 0u; i < type.componentCount; i++)
636 			{
637 				dst.move(i, Tan(radians.Float(i)));
638 			}
639 			break;
640 		}
641 		case GLSLstd450Asin:
642 		{
643 			auto val = Operand(this, state, insn.word(5));
644 			Decorations d;
645 			ApplyDecorationsForId(&d, insn.word(5));
646 			for(auto i = 0u; i < type.componentCount; i++)
647 			{
648 				dst.move(i, Asin(val.Float(i), d.RelaxedPrecision ? Precision::Relaxed : Precision::Full));
649 			}
650 			break;
651 		}
652 		case GLSLstd450Acos:
653 		{
654 			auto val = Operand(this, state, insn.word(5));
655 			Decorations d;
656 			ApplyDecorationsForId(&d, insn.word(5));
657 			for(auto i = 0u; i < type.componentCount; i++)
658 			{
659 				dst.move(i, Acos(val.Float(i), d.RelaxedPrecision ? Precision::Relaxed : Precision::Full));
660 			}
661 			break;
662 		}
663 		case GLSLstd450Atan:
664 		{
665 			auto val = Operand(this, state, insn.word(5));
666 			for(auto i = 0u; i < type.componentCount; i++)
667 			{
668 				dst.move(i, Atan(val.Float(i)));
669 			}
670 			break;
671 		}
672 		case GLSLstd450Sinh:
673 		{
674 			auto val = Operand(this, state, insn.word(5));
675 			for(auto i = 0u; i < type.componentCount; i++)
676 			{
677 				dst.move(i, Sinh(val.Float(i)));
678 			}
679 			break;
680 		}
681 		case GLSLstd450Cosh:
682 		{
683 			auto val = Operand(this, state, insn.word(5));
684 			for(auto i = 0u; i < type.componentCount; i++)
685 			{
686 				dst.move(i, Cosh(val.Float(i)));
687 			}
688 			break;
689 		}
690 		case GLSLstd450Tanh:
691 		{
692 			auto val = Operand(this, state, insn.word(5));
693 			for(auto i = 0u; i < type.componentCount; i++)
694 			{
695 				dst.move(i, Tanh(val.Float(i)));
696 			}
697 			break;
698 		}
699 		case GLSLstd450Asinh:
700 		{
701 			auto val = Operand(this, state, insn.word(5));
702 			for(auto i = 0u; i < type.componentCount; i++)
703 			{
704 				dst.move(i, Asinh(val.Float(i)));
705 			}
706 			break;
707 		}
708 		case GLSLstd450Acosh:
709 		{
710 			auto val = Operand(this, state, insn.word(5));
711 			for(auto i = 0u; i < type.componentCount; i++)
712 			{
713 				dst.move(i, Acosh(val.Float(i)));
714 			}
715 			break;
716 		}
717 		case GLSLstd450Atanh:
718 		{
719 			auto val = Operand(this, state, insn.word(5));
720 			for(auto i = 0u; i < type.componentCount; i++)
721 			{
722 				dst.move(i, Atanh(val.Float(i)));
723 			}
724 			break;
725 		}
726 		case GLSLstd450Atan2:
727 		{
728 			auto x = Operand(this, state, insn.word(5));
729 			auto y = Operand(this, state, insn.word(6));
730 			for(auto i = 0u; i < type.componentCount; i++)
731 			{
732 				dst.move(i, Atan2(x.Float(i), y.Float(i)));
733 			}
734 			break;
735 		}
736 		case GLSLstd450Pow:
737 		{
738 			auto x = Operand(this, state, insn.word(5));
739 			auto y = Operand(this, state, insn.word(6));
740 			for(auto i = 0u; i < type.componentCount; i++)
741 			{
742 				dst.move(i, Pow(x.Float(i), y.Float(i)));
743 			}
744 			break;
745 		}
746 		case GLSLstd450Exp:
747 		{
748 			auto val = Operand(this, state, insn.word(5));
749 			for(auto i = 0u; i < type.componentCount; i++)
750 			{
751 				dst.move(i, Exp(val.Float(i)));
752 			}
753 			break;
754 		}
755 		case GLSLstd450Log:
756 		{
757 			auto val = Operand(this, state, insn.word(5));
758 			for(auto i = 0u; i < type.componentCount; i++)
759 			{
760 				dst.move(i, Log(val.Float(i)));
761 			}
762 			break;
763 		}
764 		case GLSLstd450Exp2:
765 		{
766 			auto val = Operand(this, state, insn.word(5));
767 			for(auto i = 0u; i < type.componentCount; i++)
768 			{
769 				dst.move(i, Exp2(val.Float(i)));
770 			}
771 			break;
772 		}
773 		case GLSLstd450Log2:
774 		{
775 			auto val = Operand(this, state, insn.word(5));
776 			for(auto i = 0u; i < type.componentCount; i++)
777 			{
778 				dst.move(i, Log2(val.Float(i)));
779 			}
780 			break;
781 		}
782 		case GLSLstd450Sqrt:
783 		{
784 			auto val = Operand(this, state, insn.word(5));
785 			for(auto i = 0u; i < type.componentCount; i++)
786 			{
787 				dst.move(i, Sqrt(val.Float(i)));
788 			}
789 			break;
790 		}
791 		case GLSLstd450InverseSqrt:
792 		{
793 			auto val = Operand(this, state, insn.word(5));
794 			Decorations d;
795 			ApplyDecorationsForId(&d, insn.word(5));
796 
797 			for(auto i = 0u; i < type.componentCount; i++)
798 			{
799 				dst.move(i, RcpSqrt(val.Float(i), d.RelaxedPrecision ? Precision::Relaxed : Precision::Full));
800 			}
801 			break;
802 		}
803 		case GLSLstd450Determinant:
804 		{
805 			auto mat = Operand(this, state, insn.word(5));
806 
807 			switch(mat.componentCount)
808 			{
809 				case 4:  // 2x2
810 					dst.move(0, Determinant(
811 					                mat.Float(0), mat.Float(1),
812 					                mat.Float(2), mat.Float(3)));
813 					break;
814 				case 9:  // 3x3
815 					dst.move(0, Determinant(
816 					                mat.Float(0), mat.Float(1), mat.Float(2),
817 					                mat.Float(3), mat.Float(4), mat.Float(5),
818 					                mat.Float(6), mat.Float(7), mat.Float(8)));
819 					break;
820 				case 16:  // 4x4
821 					dst.move(0, Determinant(
822 					                mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
823 					                mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
824 					                mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
825 					                mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)));
826 					break;
827 				default:
828 					UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(mat.componentCount));
829 			}
830 			break;
831 		}
832 		case GLSLstd450MatrixInverse:
833 		{
834 			auto mat = Operand(this, state, insn.word(5));
835 
836 			switch(mat.componentCount)
837 			{
838 				case 4:  // 2x2
839 				{
840 					auto inv = MatrixInverse(
841 					    mat.Float(0), mat.Float(1),
842 					    mat.Float(2), mat.Float(3));
843 					for(uint32_t i = 0; i < inv.size(); i++)
844 					{
845 						dst.move(i, inv[i]);
846 					}
847 					break;
848 				}
849 				case 9:  // 3x3
850 				{
851 					auto inv = MatrixInverse(
852 					    mat.Float(0), mat.Float(1), mat.Float(2),
853 					    mat.Float(3), mat.Float(4), mat.Float(5),
854 					    mat.Float(6), mat.Float(7), mat.Float(8));
855 					for(uint32_t i = 0; i < inv.size(); i++)
856 					{
857 						dst.move(i, inv[i]);
858 					}
859 					break;
860 				}
861 				case 16:  // 4x4
862 				{
863 					auto inv = MatrixInverse(
864 					    mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
865 					    mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
866 					    mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
867 					    mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15));
868 					for(uint32_t i = 0; i < inv.size(); i++)
869 					{
870 						dst.move(i, inv[i]);
871 					}
872 					break;
873 				}
874 				default:
875 					UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(mat.componentCount));
876 			}
877 			break;
878 		}
879 		case GLSLstd450IMix:
880 		{
881 			UNREACHABLE("GLSLstd450IMix has been removed from the specification");
882 			break;
883 		}
884 		case GLSLstd450PackDouble2x32:
885 		{
886 			UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)");
887 			break;
888 		}
889 		case GLSLstd450UnpackDouble2x32:
890 		{
891 			UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)");
892 			break;
893 		}
894 		case GLSLstd450FindILsb:
895 		{
896 			auto val = Operand(this, state, insn.word(5));
897 			for(auto i = 0u; i < type.componentCount; i++)
898 			{
899 				auto v = val.UInt(i);
900 				dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0)));
901 			}
902 			break;
903 		}
904 		case GLSLstd450FindSMsb:
905 		{
906 			auto val = Operand(this, state, insn.word(5));
907 			for(auto i = 0u; i < type.componentCount; i++)
908 			{
909 				auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0)));
910 				dst.move(i, SIMD::UInt(31) - Ctlz(v, false));
911 			}
912 			break;
913 		}
914 		case GLSLstd450FindUMsb:
915 		{
916 			auto val = Operand(this, state, insn.word(5));
917 			for(auto i = 0u; i < type.componentCount; i++)
918 			{
919 				dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false));
920 			}
921 			break;
922 		}
923 		case GLSLstd450InterpolateAtCentroid:
924 		{
925 			Decorations d;
926 			ApplyDecorationsForId(&d, insn.word(5));
927 			auto ptr = state->getPointer(insn.word(5));
928 			for(auto i = 0u; i < type.componentCount; i++)
929 			{
930 				dst.move(i, Interpolate(ptr, d.Location, 0, i, type.componentCount, state, SpirvShader::Centroid));
931 			}
932 			break;
933 		}
934 		case GLSLstd450InterpolateAtSample:
935 		{
936 			Decorations d;
937 			ApplyDecorationsForId(&d, insn.word(5));
938 			auto ptr = state->getPointer(insn.word(5));
939 			for(auto i = 0u; i < type.componentCount; i++)
940 			{
941 				dst.move(i, Interpolate(ptr, d.Location, insn.word(6), i, type.componentCount, state, SpirvShader::AtSample));
942 			}
943 			break;
944 		}
945 		case GLSLstd450InterpolateAtOffset:
946 		{
947 			Decorations d;
948 			ApplyDecorationsForId(&d, insn.word(5));
949 			auto ptr = state->getPointer(insn.word(5));
950 			for(auto i = 0u; i < type.componentCount; i++)
951 			{
952 				dst.move(i, Interpolate(ptr, d.Location, insn.word(6), i, type.componentCount, state, SpirvShader::AtOffset));
953 			}
954 			break;
955 		}
956 		case GLSLstd450NMin:
957 		{
958 			auto x = Operand(this, state, insn.word(5));
959 			auto y = Operand(this, state, insn.word(6));
960 			for(auto i = 0u; i < type.componentCount; i++)
961 			{
962 				dst.move(i, NMin(x.Float(i), y.Float(i)));
963 			}
964 			break;
965 		}
966 		case GLSLstd450NMax:
967 		{
968 			auto x = Operand(this, state, insn.word(5));
969 			auto y = Operand(this, state, insn.word(6));
970 			for(auto i = 0u; i < type.componentCount; i++)
971 			{
972 				dst.move(i, NMax(x.Float(i), y.Float(i)));
973 			}
974 			break;
975 		}
976 		case GLSLstd450NClamp:
977 		{
978 			auto x = Operand(this, state, insn.word(5));
979 			auto minVal = Operand(this, state, insn.word(6));
980 			auto maxVal = Operand(this, state, insn.word(7));
981 			for(auto i = 0u; i < type.componentCount; i++)
982 			{
983 				auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i));
984 				dst.move(i, clamp);
985 			}
986 			break;
987 		}
988 		default:
989 			UNREACHABLE("ExtInst %d", int(extInstIndex));
990 			break;
991 	}
992 
993 	return EmitResult::Continue;
994 }
995 
Interpolate(SIMD::Pointer const & ptr,int32_t location,Object::ID paramId,uint32_t component,uint32_t component_count,EmitState * state,InterpolationType type) const996 SIMD::Float SpirvShader::Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId, uint32_t component,
997                                      uint32_t component_count, EmitState *state, InterpolationType type) const
998 {
999 	uint32_t interpolant = (location * 4);
1000 	uint32_t components_per_row = GetNumInputComponents(location);
1001 	if((location < 0) || (interpolant >= inputs.size()) || (components_per_row == 0))
1002 	{
1003 		return SIMD::Float(0.0f);
1004 	}
1005 
1006 	// Distinguish between the operator[] being used on a vector of on an array
1007 	// If the number of components of the interpolant is 1, then the operator[] automatically means this is an array.
1008 	// Otherwise, if the component_count is 1, than the operator[] can be the result of this operator being called
1009 	// from a vec2, vec3 or vec4, so a component_count greater than 1 means any offset is for an array
1010 	bool useArrayOffset = (components_per_row == 1) || (component_count > 1);
1011 
1012 	const auto &interpolationData = state->routine->interpolationData;
1013 
1014 	SIMD::Float x;
1015 	SIMD::Float y;
1016 	SIMD::Float rhw;
1017 
1018 	switch(type)
1019 	{
1020 		case Centroid:
1021 			x = interpolationData.xCentroid;
1022 			y = interpolationData.yCentroid;
1023 			rhw = interpolationData.rhwCentroid;
1024 			break;
1025 		case AtSample:
1026 			x = SIMD::Float(0.0f);
1027 			y = SIMD::Float(0.0f);
1028 
1029 			if(state->getMultiSampleCount() > 1)
1030 			{
1031 				static constexpr int NUM_SAMPLES = 4;
1032 				ASSERT(state->getMultiSampleCount() == NUM_SAMPLES);
1033 
1034 				Array<Float> sampleX(NUM_SAMPLES);
1035 				Array<Float> sampleY(NUM_SAMPLES);
1036 				for(int i = 0; i < NUM_SAMPLES; ++i)
1037 				{
1038 					sampleX[i] = Constants::SampleLocationsX[i];
1039 					sampleY[i] = Constants::SampleLocationsY[i];
1040 				}
1041 
1042 				auto sampleOperand = Operand(this, state, paramId);
1043 				ASSERT(sampleOperand.componentCount == 1);
1044 
1045 				// If sample does not exist, the position used to interpolate the
1046 				// input variable is undefined, so we just clamp to avoid OOB accesses.
1047 				SIMD::Int samples = sampleOperand.Int(0) & SIMD::Int(NUM_SAMPLES - 1);
1048 
1049 				for(int i = 0; i < SIMD::Width; ++i)
1050 				{
1051 					Int sample = Extract(samples, i);
1052 					x = Insert(x, sampleX[sample], i);
1053 					y = Insert(y, sampleY[sample], i);
1054 				}
1055 			}
1056 
1057 			x += interpolationData.x;
1058 			y += interpolationData.y;
1059 			rhw = interpolationData.rhw;
1060 			break;
1061 		case AtOffset:
1062 		{
1063 			//  An offset of (0, 0) identifies the center of the pixel.
1064 			auto offset = Operand(this, state, paramId);
1065 			ASSERT(offset.componentCount == 2);
1066 
1067 			x = interpolationData.x + offset.Float(0);
1068 			y = interpolationData.y + offset.Float(1);
1069 			rhw = interpolationData.rhw;
1070 		}
1071 		break;
1072 		default:
1073 			UNREACHABLE("Unknown interpolation type: %d", (int)type);
1074 			return SIMD::Float(0.0f);
1075 	}
1076 
1077 	Pointer<Byte> planeEquation = interpolationData.primitive + OFFSET(Primitive, V[interpolant]);
1078 	if(ptr.hasDynamicOffsets)
1079 	{
1080 		// This code assumes all dynamic offsets are equal
1081 		Int offset = ComputeInterpolantOffset(((Extract(ptr.dynamicOffsets, 0) + ptr.staticOffsets[0]) >> 2) + component, components_per_row, useArrayOffset);
1082 		offset = Min(offset, Int(inputs.size() - interpolant - 1));
1083 		planeEquation += (offset * sizeof(PlaneEquation));
1084 	}
1085 	else
1086 	{
1087 		ASSERT(ptr.hasStaticEqualOffsets());
1088 
1089 		uint32_t offset = ComputeInterpolantOffset((ptr.staticOffsets[0] >> 2) + component, components_per_row, useArrayOffset);
1090 		if((interpolant + offset) >= inputs.size())
1091 		{
1092 			return SIMD::Float(0.0f);
1093 		}
1094 		planeEquation += offset * sizeof(PlaneEquation);
1095 	}
1096 
1097 	return SpirvRoutine::interpolateAtXY(x, y, rhw, planeEquation, false, true);
1098 }
1099 
interpolateAtXY(const SIMD::Float & x,const SIMD::Float & y,const SIMD::Float & rhw,Pointer<Byte> planeEquation,bool flat,bool perspective)1100 SIMD::Float SpirvRoutine::interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
1101 {
1102 	SIMD::Float A;
1103 	SIMD::Float B;
1104 	SIMD::Float C = *Pointer<SIMD::Float>(planeEquation + OFFSET(PlaneEquation, C), 16);
1105 
1106 	if(!flat)
1107 	{
1108 		A = *Pointer<SIMD::Float>(planeEquation + OFFSET(PlaneEquation, A), 16);
1109 		B = *Pointer<SIMD::Float>(planeEquation + OFFSET(PlaneEquation, B), 16);
1110 	}
1111 
1112 	return ::Interpolate(x, y, rhw, A, B, C, flat, perspective);
1113 }
1114 
1115 }  // namespace sw