1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef sw_SpirvShader_hpp
16 #define sw_SpirvShader_hpp
17 
18 #include "SamplerCore.hpp"
19 #include "ShaderCore.hpp"
20 #include "SpirvID.hpp"
21 #include "Device/Config.hpp"
22 #include "Device/Sampler.hpp"
23 #include "System/Debug.hpp"
24 #include "System/Math.hpp"
25 #include "System/Types.hpp"
26 #include "Vulkan/VkConfig.hpp"
27 #include "Vulkan/VkDescriptorSet.hpp"
28 
29 #define SPV_ENABLE_UTILITY_CODE
30 #include <spirv/unified1/spirv.hpp>
31 
32 #include <array>
33 #include <atomic>
34 #include <cstdint>
35 #include <cstring>
36 #include <deque>
37 #include <functional>
38 #include <memory>
39 #include <string>
40 #include <type_traits>
41 #include <unordered_map>
42 #include <unordered_set>
43 #include <vector>
44 
45 #undef Yield  // b/127920555
46 
47 namespace vk {
48 
49 class PipelineLayout;
50 class ImageView;
51 class Sampler;
52 class RenderPass;
53 struct SampledImageDescriptor;
54 
55 namespace dbg {
56 class Context;
57 }  // namespace dbg
58 
59 }  // namespace vk
60 
61 namespace sw {
62 
63 // Forward declarations.
64 class SpirvRoutine;
65 
66 // Incrementally constructed complex bundle of rvalues
67 // Effectively a restricted vector, supporting only:
68 // - allocation to a (runtime-known) fixed component count
69 // - in-place construction of elements
70 // - const operator[]
71 class Intermediate
72 {
73 public:
Intermediate(uint32_t componentCount)74 	Intermediate(uint32_t componentCount)
75 	    : componentCount(componentCount)
76 	    , scalar(new rr::Value *[componentCount])
77 	{
78 		for(auto i = 0u; i < componentCount; i++) { scalar[i] = nullptr; }
79 	}
80 
~Intermediate()81 	~Intermediate()
82 	{
83 		delete[] scalar;
84 	}
85 
86 	// TypeHint is used as a hint for rr::PrintValue::Ty<sw::Intermediate> to
87 	// decide the format used to print the intermediate data.
88 	enum class TypeHint
89 	{
90 		Float,
91 		Int,
92 		UInt
93 	};
94 
move(uint32_t i,RValue<SIMD::Float> && scalar)95 	void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value(), TypeHint::Float); }
move(uint32_t i,RValue<SIMD::Int> && scalar)96 	void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value(), TypeHint::Int); }
move(uint32_t i,RValue<SIMD::UInt> && scalar)97 	void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value(), TypeHint::UInt); }
98 
move(uint32_t i,const RValue<SIMD::Float> & scalar)99 	void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value(), TypeHint::Float); }
move(uint32_t i,const RValue<SIMD::Int> & scalar)100 	void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value(), TypeHint::Int); }
move(uint32_t i,const RValue<SIMD::UInt> & scalar)101 	void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value(), TypeHint::UInt); }
102 
103 	// Value retrieval functions.
Float(uint32_t i) const104 	RValue<SIMD::Float> Float(uint32_t i) const
105 	{
106 		ASSERT(i < componentCount);
107 		ASSERT(scalar[i] != nullptr);
108 		return As<SIMD::Float>(scalar[i]);  // TODO(b/128539387): RValue<SIMD::Float>(scalar)
109 	}
110 
Int(uint32_t i) const111 	RValue<SIMD::Int> Int(uint32_t i) const
112 	{
113 		ASSERT(i < componentCount);
114 		ASSERT(scalar[i] != nullptr);
115 		return As<SIMD::Int>(scalar[i]);  // TODO(b/128539387): RValue<SIMD::Int>(scalar)
116 	}
117 
UInt(uint32_t i) const118 	RValue<SIMD::UInt> UInt(uint32_t i) const
119 	{
120 		ASSERT(i < componentCount);
121 		ASSERT(scalar[i] != nullptr);
122 		return As<SIMD::UInt>(scalar[i]);  // TODO(b/128539387): RValue<SIMD::UInt>(scalar)
123 	}
124 
125 	// No copy/move construction or assignment
126 	Intermediate(Intermediate const &) = delete;
127 	Intermediate(Intermediate &&) = delete;
128 	Intermediate &operator=(Intermediate const &) = delete;
129 	Intermediate &operator=(Intermediate &&) = delete;
130 
131 	const uint32_t componentCount;
132 
133 private:
emplace(uint32_t i,rr::Value * value,TypeHint type)134 	void emplace(uint32_t i, rr::Value *value, TypeHint type)
135 	{
136 		ASSERT(i < componentCount);
137 		ASSERT(scalar[i] == nullptr);
138 		scalar[i] = value;
139 		RR_PRINT_ONLY(typeHint = type;)
140 	}
141 
142 	rr::Value **const scalar;
143 
144 #ifdef ENABLE_RR_PRINT
145 	friend struct rr::PrintValue::Ty<sw::Intermediate>;
146 	TypeHint typeHint = TypeHint::Float;
147 #endif  // ENABLE_RR_PRINT
148 };
149 
150 class SpirvShader
151 {
152 public:
153 	using InsnStore = std::vector<uint32_t>;
154 	InsnStore insns;
155 
156 	using ImageSampler = void(void *texture, void *uvsIn, void *texelOut, void *constants);
157 
158 	enum class YieldResult
159 	{
160 		ControlBarrier,
161 	};
162 
163 	class Type;
164 	class Object;
165 
166 	// Pseudo-iterator over SPIRV instructions, designed to support range-based-for.
167 	class InsnIterator
168 	{
169 	public:
170 		InsnIterator(InsnIterator const &other) = default;
171 
172 		InsnIterator() = default;
173 
InsnIterator(InsnStore::const_iterator iter)174 		explicit InsnIterator(InsnStore::const_iterator iter)
175 		    : iter{ iter }
176 		{
177 		}
178 
opcode() const179 		spv::Op opcode() const
180 		{
181 			return static_cast<spv::Op>(*iter & spv::OpCodeMask);
182 		}
183 
wordCount() const184 		uint32_t wordCount() const
185 		{
186 			return *iter >> spv::WordCountShift;
187 		}
188 
word(uint32_t n) const189 		uint32_t word(uint32_t n) const
190 		{
191 			ASSERT(n < wordCount());
192 			return iter[n];
193 		}
194 
wordPointer(uint32_t n) const195 		uint32_t const *wordPointer(uint32_t n) const
196 		{
197 			ASSERT(n < wordCount());
198 			return &iter[n];
199 		}
200 
string(uint32_t n) const201 		const char *string(uint32_t n) const
202 		{
203 			return reinterpret_cast<const char *>(wordPointer(n));
204 		}
205 
hasResultAndType() const206 		bool hasResultAndType() const
207 		{
208 			bool hasResult = false, hasResultType = false;
209 			spv::HasResultAndType(opcode(), &hasResult, &hasResultType);
210 
211 			return hasResultType;
212 		}
213 
resultTypeId() const214 		SpirvID<Type> resultTypeId() const
215 		{
216 			ASSERT(hasResultAndType());
217 			return word(1);
218 		}
219 
resultId() const220 		SpirvID<Object> resultId() const
221 		{
222 			ASSERT(hasResultAndType());
223 			return word(2);
224 		}
225 
operator ==(InsnIterator const & other) const226 		bool operator==(InsnIterator const &other) const
227 		{
228 			return iter == other.iter;
229 		}
230 
operator !=(InsnIterator const & other) const231 		bool operator!=(InsnIterator const &other) const
232 		{
233 			return iter != other.iter;
234 		}
235 
operator *() const236 		InsnIterator operator*() const
237 		{
238 			return *this;
239 		}
240 
operator ++()241 		InsnIterator &operator++()
242 		{
243 			iter += wordCount();
244 			return *this;
245 		}
246 
operator ++(int)247 		InsnIterator const operator++(int)
248 		{
249 			InsnIterator ret{ *this };
250 			iter += wordCount();
251 			return ret;
252 		}
253 
254 	private:
255 		InsnStore::const_iterator iter;
256 	};
257 
258 	/* range-based-for interface */
begin() const259 	InsnIterator begin() const
260 	{
261 		return InsnIterator{ insns.cbegin() + 5 };
262 	}
263 
end() const264 	InsnIterator end() const
265 	{
266 		return InsnIterator{ insns.cend() };
267 	}
268 
269 	class Type
270 	{
271 	public:
272 		using ID = SpirvID<Type>;
273 
opcode() const274 		spv::Op opcode() const { return definition.opcode(); }
275 
276 		InsnIterator definition;
277 		spv::StorageClass storageClass = static_cast<spv::StorageClass>(-1);
278 		uint32_t componentCount = 0;
279 		bool isBuiltInBlock = false;
280 
281 		// Inner element type for pointers, arrays, vectors and matrices.
282 		ID element;
283 	};
284 
285 	class Object
286 	{
287 	public:
288 		using ID = SpirvID<Object>;
289 
opcode() const290 		spv::Op opcode() const { return definition.opcode(); }
typeId() const291 		Type::ID typeId() const { return definition.resultTypeId(); }
id() const292 		Object::ID id() const { return definition.resultId(); }
293 
294 		InsnIterator definition;
295 		std::vector<uint32_t> constantValue;
296 
297 		enum class Kind
298 		{
299 			// Invalid default kind.
300 			// If we get left with an object in this state, the module was
301 			// broken.
302 			Unknown,
303 
304 			// TODO: Better document this kind.
305 			// A shader interface variable pointer.
306 			// Pointer with uniform address across all lanes.
307 			// Pointer held by SpirvRoutine::pointers
308 			InterfaceVariable,
309 
310 			// Constant value held by Object::constantValue.
311 			Constant,
312 
313 			// Value held by SpirvRoutine::intermediates.
314 			Intermediate,
315 
316 			// Pointer held by SpirvRoutine::pointers
317 			Pointer,
318 
319 			// A pointer to a vk::DescriptorSet*.
320 			// Pointer held by SpirvRoutine::pointers.
321 			DescriptorSet,
322 		};
323 
324 		Kind kind = Kind::Unknown;
325 	};
326 
327 	// Block is an interval of SPIR-V instructions, starting with the
328 	// opening OpLabel, and ending with a termination instruction.
329 	class Block
330 	{
331 	public:
332 		using ID = SpirvID<Block>;
333 		using Set = std::unordered_set<ID>;
334 
335 		// Edge represents the graph edge between two blocks.
336 		struct Edge
337 		{
338 			ID from;
339 			ID to;
340 
operator ==sw::SpirvShader::Block::Edge341 			bool operator==(const Edge &other) const { return from == other.from && to == other.to; }
342 
343 			struct Hash
344 			{
operator ()sw::SpirvShader::Block::Edge::Hash345 				std::size_t operator()(const Edge &edge) const noexcept
346 				{
347 					return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value());
348 				}
349 			};
350 		};
351 
352 		Block() = default;
353 		Block(const Block &other) = default;
354 		explicit Block(InsnIterator begin, InsnIterator end);
355 
356 		/* range-based-for interface */
begin() const357 		inline InsnIterator begin() const { return begin_; }
end() const358 		inline InsnIterator end() const { return end_; }
359 
360 		enum Kind
361 		{
362 			Simple,                         // OpBranch or other simple terminator.
363 			StructuredBranchConditional,    // OpSelectionMerge + OpBranchConditional
364 			UnstructuredBranchConditional,  // OpBranchConditional
365 			StructuredSwitch,               // OpSelectionMerge + OpSwitch
366 			UnstructuredSwitch,             // OpSwitch
367 			Loop,                           // OpLoopMerge + [OpBranchConditional | OpBranch]
368 		};
369 
370 		Kind kind = Simple;
371 		InsnIterator mergeInstruction;   // Structured control flow merge instruction.
372 		InsnIterator branchInstruction;  // Branch instruction.
373 		ID mergeBlock;                   // Structured flow merge block.
374 		ID continueTarget;               // Loop continue block.
375 		Set ins;                         // Blocks that branch into this block.
376 		Set outs;                        // Blocks that this block branches to.
377 		bool isLoopMerge = false;
378 
379 	private:
380 		InsnIterator begin_;
381 		InsnIterator end_;
382 	};
383 
384 	class Function
385 	{
386 	public:
387 		using ID = SpirvID<Function>;
388 
389 		// Walks all reachable the blocks starting from id adding them to
390 		// reachable.
391 		void TraverseReachableBlocks(Block::ID id, Block::Set &reachable) const;
392 
393 		// AssignBlockFields() performs the following for all reachable blocks:
394 		// * Assigns Block::ins with the identifiers of all blocks that contain
395 		//   this block in their Block::outs.
396 		// * Sets Block::isLoopMerge to true if the block is the merge of a
397 		//   another loop block.
398 		void AssignBlockFields();
399 
400 		// ForeachBlockDependency calls f with each dependency of the given
401 		// block. A dependency is an incoming block that is not a loop-back
402 		// edge.
403 		void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const;
404 
405 		// ExistsPath returns true if there's a direct or indirect flow from
406 		// the 'from' block to the 'to' block that does not pass through
407 		// notPassingThrough.
408 		bool ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const;
409 
getBlock(Block::ID id) const410 		Block const &getBlock(Block::ID id) const
411 		{
412 			auto it = blocks.find(id);
413 			ASSERT_MSG(it != blocks.end(), "Unknown block %d", id.value());
414 			return it->second;
415 		}
416 
417 		Block::ID entry;          // function entry point block.
418 		HandleMap<Block> blocks;  // blocks belonging to this function.
419 		Type::ID type;            // type of the function.
420 		Type::ID result;          // return type.
421 	};
422 
423 	using String = std::string;
424 	using StringID = SpirvID<std::string>;
425 
426 	class Extension
427 	{
428 	public:
429 		using ID = SpirvID<Extension>;
430 
431 		enum Name
432 		{
433 			Unknown,
434 			GLSLstd450,
435 			OpenCLDebugInfo100
436 		};
437 
438 		Name name;
439 	};
440 
441 	struct TypeOrObject
442 	{};
443 
444 	// TypeOrObjectID is an identifier that represents a Type or an Object,
445 	// and supports implicit casting to and from Type::ID or Object::ID.
446 	class TypeOrObjectID : public SpirvID<TypeOrObject>
447 	{
448 	public:
449 		using Hash = std::hash<SpirvID<TypeOrObject>>;
450 
TypeOrObjectID(uint32_t id)451 		inline TypeOrObjectID(uint32_t id)
452 		    : SpirvID(id)
453 		{}
TypeOrObjectID(Type::ID id)454 		inline TypeOrObjectID(Type::ID id)
455 		    : SpirvID(id.value())
456 		{}
TypeOrObjectID(Object::ID id)457 		inline TypeOrObjectID(Object::ID id)
458 		    : SpirvID(id.value())
459 		{}
operator Type::ID() const460 		inline operator Type::ID() const { return Type::ID(value()); }
operator Object::ID() const461 		inline operator Object::ID() const { return Object::ID(value()); }
462 	};
463 
464 	// OpImageSample variants
465 	enum Variant
466 	{
467 		None,  // No Dref or Proj. Also used by OpImageFetch and OpImageQueryLod.
468 		Dref,
469 		Proj,
470 		ProjDref,
471 		VARIANT_LAST = ProjDref
472 	};
473 
474 	// Compact representation of image instruction parameters that is passed to the
475 	// trampoline function for retrieving/generating the corresponding sampling routine.
476 	struct ImageInstruction
477 	{
ImageInstructionsw::SpirvShader::ImageInstruction478 		ImageInstruction(Variant variant, SamplerMethod samplerMethod)
479 		    : parameters(0)
480 		{
481 			this->variant = variant;
482 			this->samplerMethod = samplerMethod;
483 		}
484 
485 		// Unmarshal from raw 32-bit data
ImageInstructionsw::SpirvShader::ImageInstruction486 		ImageInstruction(uint32_t parameters)
487 		    : parameters(parameters)
488 		{}
489 
getSamplerFunctionsw::SpirvShader::ImageInstruction490 		SamplerFunction getSamplerFunction() const
491 		{
492 			return { static_cast<SamplerMethod>(samplerMethod), offset != 0, sample != 0 };
493 		}
494 
isDrefsw::SpirvShader::ImageInstruction495 		bool isDref() const
496 		{
497 			return (variant == Dref) || (variant == ProjDref);
498 		}
499 
isProjsw::SpirvShader::ImageInstruction500 		bool isProj() const
501 		{
502 			return (variant == Proj) || (variant == ProjDref);
503 		}
504 
505 		union
506 		{
507 			struct
508 			{
509 				uint32_t variant : BITS(VARIANT_LAST);
510 				uint32_t samplerMethod : BITS(SAMPLER_METHOD_LAST);
511 				uint32_t gatherComponent : 2;
512 
513 				// Parameters are passed to the sampling routine in this order:
514 				uint32_t coordinates : 3;       // 1-4 (does not contain projection component)
515 				/*	uint32_t dref : 1; */       // Indicated by Variant::ProjDref|Dref
516 				/*	uint32_t lodOrBias : 1; */  // Indicated by SamplerMethod::Lod|Bias|Fetch
517 				uint32_t grad : 2;              // 0-3 components (for each of dx / dy)
518 				uint32_t offset : 2;            // 0-3 components
519 				uint32_t sample : 1;            // 0-1 scalar integer
520 			};
521 
522 			uint32_t parameters;
523 		};
524 	};
525 
526 	static_assert(sizeof(ImageInstruction) == sizeof(uint32_t), "ImageInstruction must be 32-bit");
527 
528 	// This method is for retrieving an ID that uniquely identifies the
529 	// shader entry point represented by this object.
getSerialID() const530 	uint64_t getSerialID() const
531 	{
532 		return ((uint64_t)entryPoint.value() << 32) | codeSerialID;
533 	}
534 
535 	SpirvShader(uint32_t codeSerialID,
536 	            VkShaderStageFlagBits stage,
537 	            const char *entryPointName,
538 	            InsnStore const &insns,
539 	            const vk::RenderPass *renderPass,
540 	            uint32_t subpassIndex,
541 	            bool robustBufferAccess,
542 	            const std::shared_ptr<vk::dbg::Context> &dbgctx);
543 
544 	~SpirvShader();
545 
546 	struct Modes
547 	{
548 		bool EarlyFragmentTests : 1;
549 		bool DepthReplacing : 1;
550 		bool DepthGreater : 1;
551 		bool DepthLess : 1;
552 		bool DepthUnchanged : 1;
553 		bool ContainsKill : 1;
554 		bool ContainsControlBarriers : 1;
555 		bool NeedsCentroid : 1;
556 		bool ContainsSampleQualifier : 1;
557 
558 		// Compute workgroup dimensions
559 		int WorkgroupSizeX = 1;
560 		int WorkgroupSizeY = 1;
561 		int WorkgroupSizeZ = 1;
562 	};
563 
getModes() const564 	Modes const &getModes() const
565 	{
566 		return modes;
567 	}
568 
569 	struct Capabilities
570 	{
571 		bool Matrix : 1;
572 		bool Shader : 1;
573 		bool StorageImageMultisample : 1;
574 		bool ClipDistance : 1;
575 		bool CullDistance : 1;
576 		bool ImageCubeArray : 1;
577 		bool SampleRateShading : 1;
578 		bool InputAttachment : 1;
579 		bool Sampled1D : 1;
580 		bool Image1D : 1;
581 		bool SampledBuffer : 1;
582 		bool SampledCubeArray : 1;
583 		bool ImageBuffer : 1;
584 		bool ImageMSArray : 1;
585 		bool StorageImageExtendedFormats : 1;
586 		bool ImageQuery : 1;
587 		bool DerivativeControl : 1;
588 		bool InterpolationFunction : 1;
589 		bool GroupNonUniform : 1;
590 		bool GroupNonUniformVote : 1;
591 		bool GroupNonUniformBallot : 1;
592 		bool GroupNonUniformShuffle : 1;
593 		bool GroupNonUniformShuffleRelative : 1;
594 		bool GroupNonUniformArithmetic : 1;
595 		bool DeviceGroup : 1;
596 		bool MultiView : 1;
597 		bool StencilExportEXT : 1;
598 	};
599 
getUsedCapabilities() const600 	Capabilities const &getUsedCapabilities() const
601 	{
602 		return capabilities;
603 	}
604 
605 	// getNumOutputClipDistances() returns the number of ClipDistances
606 	// outputted by this shader.
getNumOutputClipDistances() const607 	unsigned int getNumOutputClipDistances() const
608 	{
609 		if(getUsedCapabilities().ClipDistance)
610 		{
611 			auto it = outputBuiltins.find(spv::BuiltInClipDistance);
612 			if(it != outputBuiltins.end())
613 			{
614 				return it->second.SizeInComponents;
615 			}
616 		}
617 		return 0;
618 	}
619 
620 	// getNumOutputCullDistances() returns the number of CullDistances
621 	// outputted by this shader.
getNumOutputCullDistances() const622 	unsigned int getNumOutputCullDistances() const
623 	{
624 		if(getUsedCapabilities().CullDistance)
625 		{
626 			auto it = outputBuiltins.find(spv::BuiltInCullDistance);
627 			if(it != outputBuiltins.end())
628 			{
629 				return it->second.SizeInComponents;
630 			}
631 		}
632 		return 0;
633 	}
634 
635 	enum AttribType : unsigned char
636 	{
637 		ATTRIBTYPE_FLOAT,
638 		ATTRIBTYPE_INT,
639 		ATTRIBTYPE_UINT,
640 		ATTRIBTYPE_UNUSED,
641 
642 		ATTRIBTYPE_LAST = ATTRIBTYPE_UINT
643 	};
644 
hasBuiltinInput(spv::BuiltIn b) const645 	bool hasBuiltinInput(spv::BuiltIn b) const
646 	{
647 		return inputBuiltins.find(b) != inputBuiltins.end();
648 	}
649 
hasBuiltinOutput(spv::BuiltIn b) const650 	bool hasBuiltinOutput(spv::BuiltIn b) const
651 	{
652 		return outputBuiltins.find(b) != outputBuiltins.end();
653 	}
654 
655 	struct Decorations
656 	{
657 		int32_t Location = -1;
658 		int32_t Component = 0;
659 		spv::BuiltIn BuiltIn = static_cast<spv::BuiltIn>(-1);
660 		int32_t Offset = -1;
661 		int32_t ArrayStride = -1;
662 		int32_t MatrixStride = 1;
663 
664 		bool HasLocation : 1;
665 		bool HasComponent : 1;
666 		bool HasBuiltIn : 1;
667 		bool HasOffset : 1;
668 		bool HasArrayStride : 1;
669 		bool HasMatrixStride : 1;
670 		bool HasRowMajor : 1;  // whether RowMajor bit is valid.
671 
672 		bool Flat : 1;
673 		bool Centroid : 1;
674 		bool NoPerspective : 1;
675 		bool Block : 1;
676 		bool BufferBlock : 1;
677 		bool RelaxedPrecision : 1;
678 		bool RowMajor : 1;      // RowMajor if true; ColMajor if false
679 		bool InsideMatrix : 1;  // pseudo-decoration for whether we're inside a matrix.
680 
Decorationssw::SpirvShader::Decorations681 		Decorations()
682 		    : Location{ -1 }
683 		    , Component{ 0 }
684 		    , BuiltIn{ static_cast<spv::BuiltIn>(-1) }
685 		    , Offset{ -1 }
686 		    , ArrayStride{ -1 }
687 		    , MatrixStride{ -1 }
688 		    , HasLocation{ false }
689 		    , HasComponent{ false }
690 		    , HasBuiltIn{ false }
691 		    , HasOffset{ false }
692 		    , HasArrayStride{ false }
693 		    , HasMatrixStride{ false }
694 		    , HasRowMajor{ false }
695 		    , Flat{ false }
696 		    , Centroid{ false }
697 		    , NoPerspective{ false }
698 		    , Block{ false }
699 		    , BufferBlock{ false }
700 		    , RelaxedPrecision{ false }
701 		    , RowMajor{ false }
702 		    , InsideMatrix{ false }
703 		{
704 		}
705 
706 		Decorations(Decorations const &) = default;
707 
708 		void Apply(Decorations const &src);
709 
710 		void Apply(spv::Decoration decoration, uint32_t arg);
711 	};
712 
713 	std::unordered_map<TypeOrObjectID, Decorations, TypeOrObjectID::Hash> decorations;
714 	std::unordered_map<Type::ID, std::vector<Decorations>> memberDecorations;
715 
716 	struct DescriptorDecorations
717 	{
718 		int32_t DescriptorSet = -1;
719 		int32_t Binding = -1;
720 		int32_t InputAttachmentIndex = -1;
721 
722 		void Apply(DescriptorDecorations const &src);
723 	};
724 
725 	std::unordered_map<Object::ID, DescriptorDecorations> descriptorDecorations;
726 	std::vector<VkFormat> inputAttachmentFormats;
727 
728 	struct InterfaceComponent
729 	{
730 		AttribType Type;
731 
732 		union
733 		{
734 			struct
735 			{
736 				bool Flat : 1;
737 				bool Centroid : 1;
738 				bool NoPerspective : 1;
739 			};
740 
741 			uint8_t DecorationBits;
742 		};
743 
InterfaceComponentsw::SpirvShader::InterfaceComponent744 		InterfaceComponent()
745 		    : Type{ ATTRIBTYPE_UNUSED }
746 		    , DecorationBits{ 0 }
747 		{
748 		}
749 	};
750 
751 	struct BuiltinMapping
752 	{
753 		Object::ID Id;
754 		uint32_t FirstComponent;
755 		uint32_t SizeInComponents;
756 	};
757 
758 	struct WorkgroupMemory
759 	{
760 		// allocates a new variable of size bytes with the given identifier.
allocatesw::SpirvShader::WorkgroupMemory761 		inline void allocate(Object::ID id, uint32_t size)
762 		{
763 			uint32_t offset = totalSize;
764 			auto it = offsets.emplace(id, offset);
765 			ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d", int(id.value()));
766 			totalSize += size;
767 		}
768 		// returns the byte offset of the variable with the given identifier.
offsetOfsw::SpirvShader::WorkgroupMemory769 		inline uint32_t offsetOf(Object::ID id) const
770 		{
771 			auto it = offsets.find(id);
772 			ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d", int(id.value()));
773 			return it->second;
774 		}
775 		// returns the total allocated size in bytes.
sizesw::SpirvShader::WorkgroupMemory776 		inline uint32_t size() const { return totalSize; }
777 
778 	private:
779 		uint32_t totalSize = 0;                            // in bytes
780 		std::unordered_map<Object::ID, uint32_t> offsets;  // in bytes
781 	};
782 
783 	std::vector<InterfaceComponent> inputs;
784 	std::vector<InterfaceComponent> outputs;
785 
786 	void emitProlog(SpirvRoutine *routine) const;
787 	void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets, unsigned int multiSampleCount = 0) const;
788 	void emitEpilog(SpirvRoutine *routine) const;
789 	void clearPhis(SpirvRoutine *routine) const;
790 
containsImageWrite() const791 	bool containsImageWrite() const { return imageWriteEmitted; }
792 
793 	using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>;
794 	std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins;
795 	std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins;
796 	WorkgroupMemory workgroupMemory;
797 
798 private:
799 	const uint32_t codeSerialID;
800 	Modes modes = {};
801 	Capabilities capabilities = {};
802 	HandleMap<Type> types;
803 	HandleMap<Object> defs;
804 	HandleMap<Function> functions;
805 	std::unordered_map<StringID, String> strings;
806 	HandleMap<Extension> extensionsByID;
807 	std::unordered_set<uint32_t> extensionsImported;
808 	Function::ID entryPoint;
809 	mutable bool imageWriteEmitted = false;
810 
811 	const bool robustBufferAccess = true;
812 	spv::ExecutionModel executionModel = spv::ExecutionModelMax;  // Invalid prior to OpEntryPoint parsing.
813 
814 	// DeclareType creates a Type for the given OpTypeX instruction, storing
815 	// it into the types map. It is called from the analysis pass (constructor).
816 	void DeclareType(InsnIterator insn);
817 
818 	void ProcessExecutionMode(InsnIterator it);
819 
820 	uint32_t ComputeTypeSize(InsnIterator insn);
821 	void ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const;
822 	void ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const;
823 	void ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds) const;
824 
825 	// Creates an Object for the instruction's result in 'defs'.
826 	void DefineResult(const InsnIterator &insn);
827 
828 	// Processes the OpenCL.Debug.100 instruction for the initial definition
829 	// pass of the SPIR-V.
830 	void DefineOpenCLDebugInfo100(const InsnIterator &insn);
831 
832 	// Returns true if data in the given storage class is word-interleaved
833 	// by each SIMD vector lane, otherwise data is stored linerally.
834 	//
835 	// Each lane addresses a single word, picked by a base pointer and an
836 	// integer offset.
837 	//
838 	// A word is currently 32 bits (single float, int32_t, uint32_t).
839 	// A lane is a single element of a SIMD vector register.
840 	//
841 	// Storage interleaved by lane - (IsStorageInterleavedByLane() == true):
842 	// ---------------------------------------------------------------------
843 	//
844 	// Address = PtrBase + sizeof(Word) * (SIMD::Width * LaneOffset + LaneIndex)
845 	//
846 	// Assuming SIMD::Width == 4:
847 	//
848 	//                   Lane[0]  |  Lane[1]  |  Lane[2]  |  Lane[3]
849 	//                 ===========+===========+===========+==========
850 	//  LaneOffset=0: |  Word[0]  |  Word[1]  |  Word[2]  |  Word[3]
851 	// ---------------+-----------+-----------+-----------+----------
852 	//  LaneOffset=1: |  Word[4]  |  Word[5]  |  Word[6]  |  Word[7]
853 	// ---------------+-----------+-----------+-----------+----------
854 	//  LaneOffset=2: |  Word[8]  |  Word[9]  |  Word[a]  |  Word[b]
855 	// ---------------+-----------+-----------+-----------+----------
856 	//  LaneOffset=3: |  Word[c]  |  Word[d]  |  Word[e]  |  Word[f]
857 	//
858 	//
859 	// Linear storage - (IsStorageInterleavedByLane() == false):
860 	// ---------------------------------------------------------
861 	//
862 	// Address = PtrBase + sizeof(Word) * LaneOffset
863 	//
864 	//                   Lane[0]  |  Lane[1]  |  Lane[2]  |  Lane[3]
865 	//                 ===========+===========+===========+==========
866 	//  LaneOffset=0: |  Word[0]  |  Word[0]  |  Word[0]  |  Word[0]
867 	// ---------------+-----------+-----------+-----------+----------
868 	//  LaneOffset=1: |  Word[1]  |  Word[1]  |  Word[1]  |  Word[1]
869 	// ---------------+-----------+-----------+-----------+----------
870 	//  LaneOffset=2: |  Word[2]  |  Word[2]  |  Word[2]  |  Word[2]
871 	// ---------------+-----------+-----------+-----------+----------
872 	//  LaneOffset=3: |  Word[3]  |  Word[3]  |  Word[3]  |  Word[3]
873 	//
874 	static bool IsStorageInterleavedByLane(spv::StorageClass storageClass);
875 	static bool IsExplicitLayout(spv::StorageClass storageClass);
876 
877 	static sw::SIMD::Pointer InterleaveByLane(sw::SIMD::Pointer p);
878 
879 	// Output storage buffers and images should not be affected by helper invocations
880 	static bool StoresInHelperInvocation(spv::StorageClass storageClass);
881 
882 	using InterfaceVisitor = std::function<void(Decorations const, AttribType)>;
883 
884 	void VisitInterface(Object::ID id, const InterfaceVisitor &v) const;
885 
886 	int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor &v) const;
887 
888 	// MemoryElement describes a scalar element within a structure, and is
889 	// used by the callback function of VisitMemoryObject().
890 	struct MemoryElement
891 	{
892 		uint32_t index;    // index of the scalar element
893 		uint32_t offset;   // offset (in bytes) from the base of the object
894 		const Type &type;  // element type
895 	};
896 
897 	using MemoryVisitor = std::function<void(const MemoryElement &)>;
898 
899 	// VisitMemoryObject() walks a type tree in an explicitly laid out
900 	// storage class, calling the MemoryVisitor for each scalar element
901 	// within the
902 	void VisitMemoryObject(Object::ID id, const MemoryVisitor &v) const;
903 
904 	// VisitMemoryObjectInner() is internally called by VisitMemoryObject()
905 	void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &v) const;
906 
907 	Object &CreateConstant(InsnIterator it);
908 
909 	void ProcessInterfaceVariable(Object &object);
910 
911 	// EmitState holds control-flow state for the emit() pass.
912 	class EmitState
913 	{
914 	public:
EmitState(SpirvRoutine * routine,Function::ID function,RValue<SIMD::Int> activeLaneMask,RValue<SIMD::Int> storesAndAtomicsMask,const vk::DescriptorSet::Bindings & descriptorSets,bool robustBufferAccess,unsigned int multiSampleCount,spv::ExecutionModel executionModel)915 		EmitState(SpirvRoutine *routine,
916 		          Function::ID function,
917 		          RValue<SIMD::Int> activeLaneMask,
918 		          RValue<SIMD::Int> storesAndAtomicsMask,
919 		          const vk::DescriptorSet::Bindings &descriptorSets,
920 		          bool robustBufferAccess,
921 		          unsigned int multiSampleCount,
922 		          spv::ExecutionModel executionModel)
923 		    : routine(routine)
924 		    , function(function)
925 		    , activeLaneMaskValue(activeLaneMask.value())
926 		    , storesAndAtomicsMaskValue(storesAndAtomicsMask.value())
927 		    , descriptorSets(descriptorSets)
928 		    , robustBufferAccess(robustBufferAccess)
929 		    , multiSampleCount(multiSampleCount)
930 		    , executionModel(executionModel)
931 		{
932 			ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0));  // Must parse OpEntryPoint before emitting.
933 		}
934 
935 		// Returns the mask describing the active lanes as updated by dynamic
936 		// control flow. Active lanes include helper invocations, used for
937 		// calculating fragment derivitives, which must not perform memory
938 		// stores or atomic writes.
939 		//
940 		// Use activeStoresAndAtomicsMask() to consider both control flow and
941 		// lanes which are permitted to perform memory stores and atomic
942 		// operations
activeLaneMask() const943 		RValue<SIMD::Int> activeLaneMask() const
944 		{
945 			ASSERT(activeLaneMaskValue != nullptr);
946 			return RValue<SIMD::Int>(activeLaneMaskValue);
947 		}
948 
949 		// Returns the immutable lane mask that describes which lanes are
950 		// permitted to perform memory stores and atomic operations.
951 		// Note that unlike activeStoresAndAtomicsMask() this mask *does not*
952 		// consider lanes that have been made inactive due to control flow.
storesAndAtomicsMask() const953 		RValue<SIMD::Int> storesAndAtomicsMask() const
954 		{
955 			ASSERT(storesAndAtomicsMaskValue != nullptr);
956 			return RValue<SIMD::Int>(storesAndAtomicsMaskValue);
957 		}
958 
959 		// Returns a lane mask that describes which lanes are permitted to
960 		// perform memory stores and atomic operations, considering lanes that
961 		// may have been made inactive due to control flow.
activeStoresAndAtomicsMask() const962 		RValue<SIMD::Int> activeStoresAndAtomicsMask() const
963 		{
964 			return activeLaneMask() & storesAndAtomicsMask();
965 		}
966 
967 		// Add a new active lane mask edge from the current block to out.
968 		// The edge mask value will be (mask AND activeLaneMaskValue).
969 		// If multiple active lane masks are added for the same edge, then
970 		// they will be ORed together.
971 		void addOutputActiveLaneMaskEdge(Block::ID out, RValue<SIMD::Int> mask);
972 
973 		// Add a new active lane mask for the edge from -> to.
974 		// If multiple active lane masks are added for the same edge, then
975 		// they will be ORed together.
976 		void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask);
977 
978 		SpirvRoutine *routine = nullptr;                 // The current routine being built.
979 		Function::ID function;                           // The current function being built.
980 		Block::ID block;                                 // The current block being built.
981 		rr::Value *activeLaneMaskValue = nullptr;        // The current active lane mask.
982 		rr::Value *storesAndAtomicsMaskValue = nullptr;  // The current atomics mask.
983 		Block::Set visited;                              // Blocks already built.
984 		std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks;
985 		std::deque<Block::ID> *pending;
986 
987 		const vk::DescriptorSet::Bindings &descriptorSets;
988 
989 		OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const;
990 
getMultiSampleCount() const991 		unsigned int getMultiSampleCount() const { return multiSampleCount; }
992 
createIntermediate(Object::ID id,uint32_t componentCount)993 		Intermediate &createIntermediate(Object::ID id, uint32_t componentCount)
994 		{
995 			auto it = intermediates.emplace(std::piecewise_construct,
996 			                                std::forward_as_tuple(id),
997 			                                std::forward_as_tuple(componentCount));
998 			ASSERT_MSG(it.second, "Intermediate %d created twice", id.value());
999 			return it.first->second;
1000 		}
1001 
getIntermediate(Object::ID id) const1002 		Intermediate const &getIntermediate(Object::ID id) const
1003 		{
1004 			auto it = intermediates.find(id);
1005 			ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value());
1006 			return it->second;
1007 		}
1008 
createPointer(Object::ID id,SIMD::Pointer ptr)1009 		void createPointer(Object::ID id, SIMD::Pointer ptr)
1010 		{
1011 			bool added = pointers.emplace(id, ptr).second;
1012 			ASSERT_MSG(added, "Pointer %d created twice", id.value());
1013 		}
1014 
getPointer(Object::ID id) const1015 		SIMD::Pointer const &getPointer(Object::ID id) const
1016 		{
1017 			auto it = pointers.find(id);
1018 			ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value());
1019 			return it->second;
1020 		}
1021 
1022 	private:
1023 		std::unordered_map<Object::ID, Intermediate> intermediates;
1024 		std::unordered_map<Object::ID, SIMD::Pointer> pointers;
1025 
1026 		const bool robustBufferAccess = true;  // Emit robustBufferAccess safe code.
1027 		const unsigned int multiSampleCount = 0;
1028 		const spv::ExecutionModel executionModel = spv::ExecutionModelMax;
1029 	};
1030 
1031 	// EmitResult is an enumerator of result values from the Emit functions.
1032 	enum class EmitResult
1033 	{
1034 		Continue,    // No termination instructions.
1035 		Terminator,  // Reached a termination instruction.
1036 	};
1037 
1038 	// Generic wrapper over either per-lane intermediate value, or a constant.
1039 	// Constants are transparently widened to per-lane values in operator[].
1040 	// This is appropriate in most cases -- if we're not going to do something
1041 	// significantly different based on whether the value is uniform across lanes.
1042 	class Operand
1043 	{
1044 	public:
1045 		Operand(const SpirvShader *shader, const EmitState *state, SpirvShader::Object::ID objectId);
1046 		Operand(const Intermediate &value);
1047 
Float(uint32_t i) const1048 		RValue<SIMD::Float> Float(uint32_t i) const
1049 		{
1050 			if(intermediate)
1051 			{
1052 				return intermediate->Float(i);
1053 			}
1054 
1055 			// Constructing a constant SIMD::Float is not guaranteed to preserve the data's exact
1056 			// bit pattern, but SPIR-V provides 32-bit words representing "the bit pattern for the constant".
1057 			// Thus we must first construct an integer constant, and bitcast to float.
1058 			return As<SIMD::Float>(SIMD::UInt(constant[i]));
1059 		}
1060 
Int(uint32_t i) const1061 		RValue<SIMD::Int> Int(uint32_t i) const
1062 		{
1063 			if(intermediate)
1064 			{
1065 				return intermediate->Int(i);
1066 			}
1067 
1068 			return SIMD::Int(constant[i]);
1069 		}
1070 
UInt(uint32_t i) const1071 		RValue<SIMD::UInt> UInt(uint32_t i) const
1072 		{
1073 			if(intermediate)
1074 			{
1075 				return intermediate->UInt(i);
1076 			}
1077 
1078 			return SIMD::UInt(constant[i]);
1079 		}
1080 
1081 		bool isConstantZero() const;
1082 
1083 	private:
1084 		RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)
1085 
1086 		// Delegate constructor
1087 		Operand(const EmitState *state, const Object &object);
1088 
1089 		const uint32_t *constant;
1090 		const Intermediate *intermediate;
1091 
1092 	public:
1093 		const uint32_t componentCount;
1094 	};
1095 
RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)1096 	RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)
1097 
1098 	Type const &getType(Type::ID id) const
1099 	{
1100 		auto it = types.find(id);
1101 		ASSERT_MSG(it != types.end(), "Unknown type %d", id.value());
1102 		return it->second;
1103 	}
1104 
getType(const Object & object) const1105 	Type const &getType(const Object &object) const
1106 	{
1107 		return getType(object.typeId());
1108 	}
1109 
getObject(Object::ID id) const1110 	Object const &getObject(Object::ID id) const
1111 	{
1112 		auto it = defs.find(id);
1113 		ASSERT_MSG(it != defs.end(), "Unknown object %d", id.value());
1114 		return it->second;
1115 	}
1116 
getFunction(Function::ID id) const1117 	Function const &getFunction(Function::ID id) const
1118 	{
1119 		auto it = functions.find(id);
1120 		ASSERT_MSG(it != functions.end(), "Unknown function %d", id.value());
1121 		return it->second;
1122 	}
1123 
getString(StringID id) const1124 	String const &getString(StringID id) const
1125 	{
1126 		auto it = strings.find(id);
1127 		ASSERT_MSG(it != strings.end(), "Unknown string %d", id.value());
1128 		return it->second;
1129 	}
1130 
getExtension(Extension::ID id) const1131 	Extension const &getExtension(Extension::ID id) const
1132 	{
1133 		auto it = extensionsByID.find(id);
1134 		ASSERT_MSG(it != extensionsByID.end(), "Unknown extension %d", id.value());
1135 		return it->second;
1136 	}
1137 
1138 	// Returns a SIMD::Pointer to the underlying data for the given pointer
1139 	// object.
1140 	// Handles objects of the following kinds:
1141 	//  - DescriptorSet
1142 	//  - Pointer
1143 	//  - InterfaceVariable
1144 	// Calling GetPointerToData with objects of any other kind will assert.
1145 	SIMD::Pointer GetPointerToData(Object::ID id, Int arrayIndex, EmitState const *state) const;
1146 
1147 	SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const;
1148 	SIMD::Pointer WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const;
1149 
1150 	// Returns the *component* offset in the literal for the given access chain.
1151 	uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const;
1152 
1153 	// Lookup the active lane mask for the edge from -> to.
1154 	// If from is unreachable, then a mask of all zeros is returned.
1155 	// Asserts if from is reachable and the edge does not exist.
1156 	RValue<SIMD::Int> GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const;
1157 
1158 	// Updates the current active lane mask.
1159 	void SetActiveLaneMask(RValue<SIMD::Int> mask, EmitState *state) const;
1160 
1161 	// Emit all the unvisited blocks (except for ignore) in DFS order,
1162 	// starting with id.
1163 	void EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore = 0) const;
1164 	void EmitNonLoop(EmitState *state) const;
1165 	void EmitLoop(EmitState *state) const;
1166 
1167 	void EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const;
1168 	EmitResult EmitInstruction(InsnIterator insn, EmitState *state) const;
1169 
1170 	// Emit pass instructions:
1171 	EmitResult EmitVariable(InsnIterator insn, EmitState *state) const;
1172 	EmitResult EmitLoad(InsnIterator insn, EmitState *state) const;
1173 	EmitResult EmitStore(InsnIterator insn, EmitState *state) const;
1174 	EmitResult EmitAccessChain(InsnIterator insn, EmitState *state) const;
1175 	EmitResult EmitCompositeConstruct(InsnIterator insn, EmitState *state) const;
1176 	EmitResult EmitCompositeInsert(InsnIterator insn, EmitState *state) const;
1177 	EmitResult EmitCompositeExtract(InsnIterator insn, EmitState *state) const;
1178 	EmitResult EmitVectorShuffle(InsnIterator insn, EmitState *state) const;
1179 	EmitResult EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const;
1180 	EmitResult EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const;
1181 	EmitResult EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const;
1182 	EmitResult EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const;
1183 	EmitResult EmitOuterProduct(InsnIterator insn, EmitState *state) const;
1184 	EmitResult EmitTranspose(InsnIterator insn, EmitState *state) const;
1185 	EmitResult EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const;
1186 	EmitResult EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const;
1187 	EmitResult EmitUnaryOp(InsnIterator insn, EmitState *state) const;
1188 	EmitResult EmitBinaryOp(InsnIterator insn, EmitState *state) const;
1189 	EmitResult EmitDot(InsnIterator insn, EmitState *state) const;
1190 	EmitResult EmitSelect(InsnIterator insn, EmitState *state) const;
1191 	EmitResult EmitExtendedInstruction(InsnIterator insn, EmitState *state) const;
1192 	EmitResult EmitExtGLSLstd450(InsnIterator insn, EmitState *state) const;
1193 	EmitResult EmitOpenCLDebugInfo100(InsnIterator insn, EmitState *state) const;
1194 	EmitResult EmitLine(InsnIterator insn, EmitState *state) const;
1195 	EmitResult EmitAny(InsnIterator insn, EmitState *state) const;
1196 	EmitResult EmitAll(InsnIterator insn, EmitState *state) const;
1197 	EmitResult EmitBranch(InsnIterator insn, EmitState *state) const;
1198 	EmitResult EmitBranchConditional(InsnIterator insn, EmitState *state) const;
1199 	EmitResult EmitSwitch(InsnIterator insn, EmitState *state) const;
1200 	EmitResult EmitUnreachable(InsnIterator insn, EmitState *state) const;
1201 	EmitResult EmitReturn(InsnIterator insn, EmitState *state) const;
1202 	EmitResult EmitKill(InsnIterator insn, EmitState *state) const;
1203 	EmitResult EmitFunctionCall(InsnIterator insn, EmitState *state) const;
1204 	EmitResult EmitPhi(InsnIterator insn, EmitState *state) const;
1205 	EmitResult EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const;
1206 	EmitResult EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState *state) const;
1207 	EmitResult EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const;
1208 	EmitResult EmitImageFetch(InsnIterator insn, EmitState *state) const;
1209 	EmitResult EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const;
1210 	EmitResult EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const;
1211 	EmitResult EmitImageQuerySize(InsnIterator insn, EmitState *state) const;
1212 	EmitResult EmitImageQueryLod(InsnIterator insn, EmitState *state) const;
1213 	EmitResult EmitImageQueryLevels(InsnIterator insn, EmitState *state) const;
1214 	EmitResult EmitImageQuerySamples(InsnIterator insn, EmitState *state) const;
1215 	EmitResult EmitImageRead(InsnIterator insn, EmitState *state) const;
1216 	EmitResult EmitImageWrite(InsnIterator insn, EmitState *state) const;
1217 	EmitResult EmitImageTexelPointer(InsnIterator insn, EmitState *state) const;
1218 	EmitResult EmitAtomicOp(InsnIterator insn, EmitState *state) const;
1219 	EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const;
1220 	EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const;
1221 	EmitResult EmitCopyObject(InsnIterator insn, EmitState *state) const;
1222 	EmitResult EmitCopyMemory(InsnIterator insn, EmitState *state) const;
1223 	EmitResult EmitControlBarrier(InsnIterator insn, EmitState *state) const;
1224 	EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState *state) const;
1225 	EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState *state) const;
1226 	EmitResult EmitArrayLength(InsnIterator insn, EmitState *state) const;
1227 
1228 	// Emits code to sample an image, regardless of whether any SIMD lanes are active.
1229 	void EmitImageSampleUnconditional(Array<SIMD::Float> &out, ImageInstruction instruction, InsnIterator insn, EmitState *state) const;
1230 
1231 	void GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const;
1232 	SIMD::Pointer GetTexelAddress(EmitState const *state, Pointer<Byte> imageBase, Int imageSizeInBytes, Operand const &coordinate, Type const &imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect, OutOfBoundsBehavior outOfBoundsBehavior) const;
1233 	uint32_t GetConstScalarInt(Object::ID id) const;
1234 	void EvalSpecConstantOp(InsnIterator insn);
1235 	void EvalSpecConstantUnaryOp(InsnIterator insn);
1236 	void EvalSpecConstantBinaryOp(InsnIterator insn);
1237 
1238 	// Fragment input interpolation functions
1239 	uint32_t GetNumInputComponents(int32_t location) const;
1240 	enum InterpolationType
1241 	{
1242 		Centroid,
1243 		AtSample,
1244 		AtOffset,
1245 	};
1246 	SIMD::Float Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId, uint32_t component,
1247 	                        uint32_t component_count, EmitState *state, InterpolationType type) const;
1248 
1249 	// Helper for implementing OpStore, which doesn't take an InsnIterator so it
1250 	// can also store independent operands.
1251 	void Store(Object::ID pointerId, const Operand &value, bool atomic, std::memory_order memoryOrder, EmitState *state) const;
1252 
1253 	// LoadPhi loads the phi values from the alloca storage and places the
1254 	// load values into the intermediate with the phi's result id.
1255 	void LoadPhi(InsnIterator insn, EmitState *state) const;
1256 
1257 	// StorePhi updates the phi's alloca storage value using the incoming
1258 	// values from blocks that are both in the OpPhi instruction and in
1259 	// filter.
1260 	void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const &filter) const;
1261 
1262 	// Emits a rr::Fence for the given MemorySemanticsMask.
1263 	void Fence(spv::MemorySemanticsMask semantics) const;
1264 
1265 	// Helper for calling rr::Yield with res cast to an rr::Int.
1266 	void Yield(YieldResult res) const;
1267 
1268 	// WriteCFGGraphVizDotFile() writes a graphviz dot file of the shader's
1269 	// control flow to the given file path.
1270 	void WriteCFGGraphVizDotFile(const char *path) const;
1271 
1272 	// OpcodeName() returns the name of the opcode op.
1273 	static const char *OpcodeName(spv::Op op);
1274 	static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics);
1275 
1276 	// IsStatement() returns true if the given opcode actually performs
1277 	// work (as opposed to declaring a type, defining a function start / end,
1278 	// etc).
1279 	static bool IsStatement(spv::Op op);
1280 
1281 	// HasTypeAndResult() returns true if the given opcode's instruction
1282 	// has a result type ID and result ID, i.e. defines an Object.
1283 	static bool HasTypeAndResult(spv::Op op);
1284 
1285 	// Helper as we often need to take dot products as part of doing other things.
1286 	SIMD::Float Dot(unsigned numComponents, Operand const &x, Operand const &y) const;
1287 
1288 	// Splits x into a floating-point significand in the range [0.5, 1.0)
1289 	// and an integral exponent of two, such that:
1290 	//   x = significand * 2^exponent
1291 	// Returns the pair <significand, exponent>
1292 	std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const;
1293 
1294 	static ImageSampler *getImageSampler(uint32_t instruction, vk::SampledImageDescriptor const *imageDescriptor, const vk::Sampler *sampler);
1295 	static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState);
1296 
1297 	// TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly.
1298 	static sw::FilterType convertFilterMode(const vk::Sampler *sampler, VkImageViewType imageViewType, ImageInstruction instruction);
1299 	static sw::MipmapType convertMipmapMode(const vk::Sampler *sampler);
1300 	static sw::AddressingMode convertAddressingMode(int coordinateIndex, const vk::Sampler *sampler, VkImageViewType imageViewType);
1301 
1302 	// Returns 0 when invalid.
1303 	static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model);
1304 
1305 	// Debugger API functions. When ENABLE_VK_DEBUGGER is not defined, these
1306 	// are all no-ops.
1307 
1308 	// dbgInit() initializes the debugger code generation.
1309 	// All other dbgXXX() functions are no-op until this is called.
1310 	void dbgInit(const std::shared_ptr<vk::dbg::Context> &dbgctx);
1311 
1312 	// dbgTerm() terminates the debugger code generation.
1313 	void dbgTerm();
1314 
1315 	// dbgCreateFile() generates a synthetic file containing the disassembly
1316 	// of the SPIR-V shader. This is the file displayed in the debug
1317 	// session.
1318 	void dbgCreateFile();
1319 
1320 	// dbgBeginEmit() sets up the debugging state for the shader.
1321 	void dbgBeginEmit(EmitState *state) const;
1322 
1323 	// dbgEndEmit() tears down the debugging state for the shader.
1324 	void dbgEndEmit(EmitState *state) const;
1325 
1326 	// dbgBeginEmitInstruction() updates the current debugger location for
1327 	// the given instruction.
1328 	void dbgBeginEmitInstruction(InsnIterator insn, EmitState *state) const;
1329 
1330 	// dbgEndEmitInstruction() creates any new debugger variables for the
1331 	// instruction that just completed.
1332 	void dbgEndEmitInstruction(InsnIterator insn, EmitState *state) const;
1333 
1334 	// dbgExposeIntermediate() exposes the intermediate with the given ID to
1335 	// the debugger.
1336 	void dbgExposeIntermediate(Object::ID id, EmitState *state) const;
1337 
1338 	// dbgUpdateActiveLaneMask() updates the active lane masks to the
1339 	// debugger.
1340 	void dbgUpdateActiveLaneMask(RValue<SIMD::Int> mask, EmitState *state) const;
1341 
1342 	// dbgDeclareResult() associates resultId as the result of the given
1343 	// instruction.
1344 	void dbgDeclareResult(const InsnIterator &insn, Object::ID resultId) const;
1345 
1346 	// Impl holds forward declaration structs and pointers to state for the
1347 	// private implementations in the corresponding SpirvShaderXXX.cpp files.
1348 	// This allows access to the private members of the SpirvShader, without
1349 	// littering the header with implementation details.
1350 	struct Impl
1351 	{
1352 		struct Debugger;
1353 		struct Group;
1354 		Debugger *debugger = nullptr;
1355 	};
1356 	Impl impl;
1357 };
1358 
1359 class SpirvRoutine
1360 {
1361 public:
1362 	SpirvRoutine(vk::PipelineLayout const *pipelineLayout);
1363 
1364 	using Variable = Array<SIMD::Float>;
1365 
1366 	struct SamplerCache
1367 	{
1368 		Pointer<Byte> imageDescriptor = nullptr;
1369 		Pointer<Byte> sampler;
1370 		Pointer<Byte> function;
1371 	};
1372 
1373 	struct InterpolationData
1374 	{
1375 		Pointer<Byte> primitive;
1376 		SIMD::Float x;
1377 		SIMD::Float y;
1378 		SIMD::Float rhw;
1379 		SIMD::Float xCentroid;
1380 		SIMD::Float yCentroid;
1381 		SIMD::Float rhwCentroid;
1382 	};
1383 
1384 	vk::PipelineLayout const *const pipelineLayout;
1385 
1386 	std::unordered_map<SpirvShader::Object::ID, Variable> variables;
1387 	std::unordered_map<SpirvShader::Object::ID, SamplerCache> samplerCache;
1388 	Variable inputs = Variable{ MAX_INTERFACE_COMPONENTS };
1389 	Variable outputs = Variable{ MAX_INTERFACE_COMPONENTS };
1390 	InterpolationData interpolationData;
1391 
1392 	Pointer<Byte> workgroupMemory;
1393 	Pointer<Pointer<Byte>> descriptorSets;
1394 	Pointer<Int> descriptorDynamicOffsets;
1395 	Pointer<Byte> pushConstants;
1396 	Pointer<Byte> constants;
1397 	Int killMask = Int{ 0 };
1398 
1399 	// Shader invocation state.
1400 	// Not all of these variables are used for every type of shader, and some
1401 	// are only used when debugging. See b/146486064 for more information.
1402 	// Give careful consideration to the runtime performance loss before adding
1403 	// more state here.
1404 	std::array<SIMD::Int, 2> windowSpacePosition;
1405 	Int viewID;  // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
1406 	Int instanceID;
1407 	SIMD::Int vertexIndex;
1408 	std::array<SIMD::Float, 4> fragCoord;
1409 	std::array<SIMD::Float, 4> pointCoord;
1410 	SIMD::Int helperInvocation;
1411 	Int4 numWorkgroups;
1412 	Int4 workgroupID;
1413 	Int4 workgroupSize;
1414 	Int subgroupsPerWorkgroup;
1415 	Int invocationsPerSubgroup;
1416 	Int subgroupIndex;
1417 	SIMD::Int localInvocationIndex;
1418 	std::array<SIMD::Int, 3> localInvocationID;
1419 	std::array<SIMD::Int, 3> globalInvocationID;
1420 
1421 	Pointer<Byte> dbgState;  // Pointer to a debugger state.
1422 
createVariable(SpirvShader::Object::ID id,uint32_t componentCount)1423 	void createVariable(SpirvShader::Object::ID id, uint32_t componentCount)
1424 	{
1425 		bool added = variables.emplace(id, Variable(componentCount)).second;
1426 		ASSERT_MSG(added, "Variable %d created twice", id.value());
1427 	}
1428 
getVariable(SpirvShader::Object::ID id)1429 	Variable &getVariable(SpirvShader::Object::ID id)
1430 	{
1431 		auto it = variables.find(id);
1432 		ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value());
1433 		return it->second;
1434 	}
1435 
1436 	// setImmutableInputBuiltins() sets all the immutable input builtins,
1437 	// common for all shader types.
1438 	void setImmutableInputBuiltins(SpirvShader const *shader);
1439 
1440 	static SIMD::Float interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective);
1441 
1442 	// setInputBuiltin() calls f() with the builtin and value if the shader
1443 	// uses the input builtin, otherwise the call is a no-op.
1444 	// F is a function with the signature:
1445 	// void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
1446 	template<typename F>
setInputBuiltin(SpirvShader const * shader,spv::BuiltIn id,F && f)1447 	inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F &&f)
1448 	{
1449 		auto it = shader->inputBuiltins.find(id);
1450 		if(it != shader->inputBuiltins.end())
1451 		{
1452 			const auto &builtin = it->second;
1453 			f(builtin, getVariable(builtin.Id));
1454 		}
1455 	}
1456 
1457 private:
1458 	// The phis are only accessible to SpirvShader as they are only used and
1459 	// exist between calls to SpirvShader::emitProlog() and
1460 	// SpirvShader::emitEpilog().
1461 	friend class SpirvShader;
1462 
1463 	std::unordered_map<SpirvShader::Object::ID, Variable> phis;
1464 };
1465 
1466 }  // namespace sw
1467 
1468 #endif  // sw_SpirvShader_hpp
1469