1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef sw_Shader_hpp
16 #define sw_Shader_hpp
17 
18 #include "Common/Types.hpp"
19 
20 #include <string>
21 #include <vector>
22 
23 namespace sw
24 {
25 	class Shader
26 	{
27 	public:
28 		enum ShaderType
29 		{
30 			SHADER_PIXEL = 0xFFFF,
31 			SHADER_VERTEX = 0xFFFE,
32 			SHADER_GEOMETRY = 0xFFFD
33 		};
34 
35 		enum Opcode
36 		{
37 			// Matches order in d3d9types.h
38 			// See https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/content/d3d9types/ne-d3d9types-_d3dshader_instruction_opcode_type
39 			OPCODE_NOP = 0,
40 			OPCODE_MOV,
41 			OPCODE_ADD,
42 			OPCODE_SUB,
43 			OPCODE_MAD,
44 			OPCODE_MUL,
45 			OPCODE_RCPX,
46 			OPCODE_RSQX,
47 			OPCODE_DP3,
48 			OPCODE_DP4,
49 			OPCODE_MIN,
50 			OPCODE_MAX,
51 			OPCODE_SLT,
52 			OPCODE_SGE,
53 			OPCODE_EXP2X,   // D3DSIO_EXP
54 			OPCODE_LOG2X,   // D3DSIO_LOG
55 			OPCODE_LIT,
56 			OPCODE_ATT,   // D3DSIO_DST
57 			OPCODE_LRP,
58 			OPCODE_FRC,
59 			OPCODE_M4X4,
60 			OPCODE_M4X3,
61 			OPCODE_M3X4,
62 			OPCODE_M3X3,
63 			OPCODE_M3X2,
64 			OPCODE_CALL,
65 			OPCODE_CALLNZ,
66 			OPCODE_LOOP,
67 			OPCODE_RET,
68 			OPCODE_ENDLOOP,
69 			OPCODE_LABEL,
70 			OPCODE_DCL,
71 			OPCODE_POWX,
72 			OPCODE_CRS,
73 			OPCODE_SGN,
74 			OPCODE_ABS,
75 			OPCODE_NRM3,   // D3DSIO_NRM
76 			OPCODE_SINCOS,
77 			OPCODE_REP,
78 			OPCODE_ENDREP,
79 			OPCODE_IF,
80 			OPCODE_IFC,
81 			OPCODE_ELSE,
82 			OPCODE_ENDIF,
83 			OPCODE_BREAK,
84 			OPCODE_BREAKC,
85 			OPCODE_MOVA,
86 			OPCODE_DEFB,
87 			OPCODE_DEFI,
88 
89 			OPCODE_TEXCOORD = 64,
90 			OPCODE_TEXKILL,
91 			OPCODE_TEX,
92 			OPCODE_TEXBEM,
93 			OPCODE_TEXBEML,
94 			OPCODE_TEXREG2AR,
95 			OPCODE_TEXREG2GB,
96 			OPCODE_TEXM3X2PAD,
97 			OPCODE_TEXM3X2TEX,
98 			OPCODE_TEXM3X3PAD,
99 			OPCODE_TEXM3X3TEX,
100 			OPCODE_RESERVED0,
101 			OPCODE_TEXM3X3SPEC,
102 			OPCODE_TEXM3X3VSPEC,
103 			OPCODE_EXPP,
104 			OPCODE_LOGP,
105 			OPCODE_CND,
106 			OPCODE_DEF,
107 			OPCODE_TEXREG2RGB,
108 			OPCODE_TEXDP3TEX,
109 			OPCODE_TEXM3X2DEPTH,
110 			OPCODE_TEXDP3,
111 			OPCODE_TEXM3X3,
112 			OPCODE_TEXDEPTH,
113 			OPCODE_CMP0,   // D3DSIO_CMP
114 			OPCODE_BEM,
115 			OPCODE_DP2ADD,
116 			OPCODE_DFDX,   // D3DSIO_DSX
117 			OPCODE_DFDY,   // D3DSIO_DSY
118 			OPCODE_TEXLDD,
119 			OPCODE_CMP,   // D3DSIO_SETP
120 			OPCODE_TEXLDL,
121 			OPCODE_BREAKP,
122 
123 			OPCODE_PHASE = 0xFFFD,
124 			OPCODE_COMMENT = 0xFFFE,
125 			OPCODE_END = 0xFFFF,
126 
127 			OPCODE_PS_1_0 = 0xFFFF0100,
128 			OPCODE_PS_1_1 = 0xFFFF0101,
129 			OPCODE_PS_1_2 = 0xFFFF0102,
130 			OPCODE_PS_1_3 = 0xFFFF0103,
131 			OPCODE_PS_1_4 = 0xFFFF0104,
132 			OPCODE_PS_2_0 = 0xFFFF0200,
133 			OPCODE_PS_2_x = 0xFFFF0201,
134 			OPCODE_PS_3_0 = 0xFFFF0300,
135 
136 			OPCODE_VS_1_0 = 0xFFFE0100,
137 			OPCODE_VS_1_1 = 0xFFFE0101,
138 			OPCODE_VS_2_0 = 0xFFFE0200,
139 			OPCODE_VS_2_x = 0xFFFE0201,
140 			OPCODE_VS_2_sw = 0xFFFE02FF,
141 			OPCODE_VS_3_0 = 0xFFFE0300,
142 			OPCODE_VS_3_sw = 0xFFFE03FF,
143 
144 			OPCODE_NULL = 0x10000000,   // Dead instruction, to be eliminated
145 			OPCODE_WHILE,
146 			OPCODE_ENDWHILE,
147 			OPCODE_COS,
148 			OPCODE_SIN,
149 			OPCODE_TAN,
150 			OPCODE_ACOS,
151 			OPCODE_ASIN,
152 			OPCODE_ATAN,
153 			OPCODE_ATAN2,
154 			OPCODE_COSH,
155 			OPCODE_SINH,
156 			OPCODE_TANH,
157 			OPCODE_ACOSH,
158 			OPCODE_ASINH,
159 			OPCODE_ATANH,
160 			OPCODE_DP1,
161 			OPCODE_DP2,
162 			OPCODE_TRUNC,
163 			OPCODE_FLOOR,
164 			OPCODE_ROUND,
165 			OPCODE_ROUNDEVEN,
166 			OPCODE_CEIL,
167 			OPCODE_SQRT,
168 			OPCODE_RSQ,
169 			OPCODE_LEN2,
170 			OPCODE_LEN3,
171 			OPCODE_LEN4,
172 			OPCODE_DIST1,
173 			OPCODE_DIST2,
174 			OPCODE_DIST3,
175 			OPCODE_DIST4,
176 			OPCODE_NRM2,
177 			OPCODE_NRM4,
178 			OPCODE_DIV,
179 			OPCODE_MOD,
180 			OPCODE_EXP2,
181 			OPCODE_LOG2,
182 			OPCODE_EXP,
183 			OPCODE_LOG,
184 			OPCODE_POW,
185 			OPCODE_F2B,   // Float to bool
186 			OPCODE_B2F,   // Bool to float
187 			OPCODE_F2I,   // Float to int
188 			OPCODE_I2F,   // Int to float
189 			OPCODE_F2U,   // Float to uint
190 			OPCODE_U2F,   // Uint to float
191 			OPCODE_I2B,   // Int to bool
192 			OPCODE_B2I,   // Bool to int
193 			OPCODE_DET2,
194 			OPCODE_DET3,
195 			OPCODE_DET4,
196 			OPCODE_ALL,
197 			OPCODE_ANY,
198 			OPCODE_NEG,
199 			OPCODE_NOT,
200 			OPCODE_OR,
201 			OPCODE_XOR,
202 			OPCODE_AND,
203 			OPCODE_EQ,
204 			OPCODE_NE,
205 			OPCODE_STEP,
206 			OPCODE_SMOOTH,
207 			OPCODE_ISNAN,
208 			OPCODE_ISINF,
209 			OPCODE_TEXOFFSET,
210 			OPCODE_TEXLODOFFSET,
211 			OPCODE_TEXELFETCH,
212 			OPCODE_TEXELFETCHOFFSET,
213 			OPCODE_TEXGRAD,
214 			OPCODE_TEXGRADOFFSET,
215 			OPCODE_TEXBIAS,
216 			OPCODE_TEXLOD,
217 			OPCODE_TEXOFFSETBIAS,
218 			OPCODE_TEXSIZE,
219 			OPCODE_FLOATBITSTOINT,
220 			OPCODE_FLOATBITSTOUINT,
221 			OPCODE_INTBITSTOFLOAT,
222 			OPCODE_UINTBITSTOFLOAT,
223 			OPCODE_PACKSNORM2x16,
224 			OPCODE_PACKUNORM2x16,
225 			OPCODE_PACKHALF2x16,
226 			OPCODE_UNPACKSNORM2x16,
227 			OPCODE_UNPACKUNORM2x16,
228 			OPCODE_UNPACKHALF2x16,
229 			OPCODE_FORWARD1,
230 			OPCODE_FORWARD2,
231 			OPCODE_FORWARD3,
232 			OPCODE_FORWARD4,
233 			OPCODE_REFLECT1,
234 			OPCODE_REFLECT2,
235 			OPCODE_REFLECT3,
236 			OPCODE_REFLECT4,
237 			OPCODE_REFRACT1,
238 			OPCODE_REFRACT2,
239 			OPCODE_REFRACT3,
240 			OPCODE_REFRACT4,
241 			OPCODE_ICMP,
242 			OPCODE_UCMP,
243 			OPCODE_SELECT,
244 			OPCODE_EXTRACT,
245 			OPCODE_INSERT,
246 			OPCODE_DISCARD,
247 			OPCODE_FWIDTH,
248 			OPCODE_LEAVE,    // Return before the end of the function
249 			OPCODE_CONTINUE,
250 			OPCODE_TEST,     // Marks the end of the code that can be skipped by 'continue'
251 			OPCODE_SCALAR,   // Marks the start of code not subject to SIMD lane masking. Ends at WHILE and ENDWHILE.
252 			OPCODE_SWITCH,
253 			OPCODE_ENDSWITCH,
254 
255 			// Integer opcodes
256 			OPCODE_INEG,
257 			OPCODE_IABS,
258 			OPCODE_ISGN,
259 			OPCODE_IADD,
260 			OPCODE_ISUB,
261 			OPCODE_IMUL,
262 			OPCODE_IDIV,
263 			OPCODE_IMAD,
264 			OPCODE_IMOD,
265 			OPCODE_SHL,
266 			OPCODE_ISHR,
267 			OPCODE_IMIN,
268 			OPCODE_IMAX,
269 
270 			// Unsigned integer opcodes
271 			OPCODE_UDIV,
272 			OPCODE_UMOD,
273 			OPCODE_USHR,
274 			OPCODE_UMIN,
275 			OPCODE_UMAX,
276 		};
277 
278 		static Opcode OPCODE_DP(int);
279 		static Opcode OPCODE_LEN(int);
280 		static Opcode OPCODE_DIST(int);
281 		static Opcode OPCODE_NRM(int);
282 		static Opcode OPCODE_FORWARD(int);
283 		static Opcode OPCODE_REFLECT(int);
284 		static Opcode OPCODE_REFRACT(int);
285 
286 		enum Control
287 		{
288 			CONTROL_RESERVED0,
289 			CONTROL_GT,
290 			CONTROL_EQ,
291 			CONTROL_GE,
292 			CONTROL_LT,
293 			CONTROL_NE,
294 			CONTROL_LE,
295 			CONTROL_RESERVED1
296 		};
297 
298 		enum SamplerType
299 		{
300 			SAMPLER_UNKNOWN,
301 			SAMPLER_1D,
302 			SAMPLER_2D,
303 			SAMPLER_CUBE,
304 			SAMPLER_VOLUME
305 		};
306 
307 		enum Usage   // For vertex input/output declarations
308 		{
309 			USAGE_POSITION = 0,
310 			USAGE_BLENDWEIGHT = 1,
311 			USAGE_BLENDINDICES = 2,
312 			USAGE_NORMAL = 3,
313 			USAGE_PSIZE = 4,
314 			USAGE_TEXCOORD = 5,
315 			USAGE_TANGENT = 6,
316 			USAGE_BINORMAL = 7,
317 			USAGE_TESSFACTOR = 8,
318 			USAGE_POSITIONT = 9,
319 			USAGE_COLOR = 10,
320 			USAGE_FOG = 11,
321 			USAGE_DEPTH = 12,
322 			USAGE_SAMPLE = 13
323 		};
324 
325 		enum ParameterType
326 		{
327 			PARAMETER_TEMP = 0,
328 			PARAMETER_INPUT = 1,
329 			PARAMETER_CONST = 2,
330 			PARAMETER_TEXTURE = 3,
331 			PARAMETER_ADDR = 3,
332 			PARAMETER_RASTOUT = 4,
333 			PARAMETER_ATTROUT = 5,
334 			PARAMETER_TEXCRDOUT = 6,
335 			PARAMETER_OUTPUT = 6,
336 			PARAMETER_CONSTINT = 7,
337 			PARAMETER_COLOROUT = 8,
338 			PARAMETER_DEPTHOUT = 9,
339 			PARAMETER_SAMPLER = 10,
340 			PARAMETER_CONST2 = 11,
341 			PARAMETER_CONST3 = 12,
342 			PARAMETER_CONST4 = 13,
343 			PARAMETER_CONSTBOOL = 14,
344 			PARAMETER_LOOP = 15,
345 			PARAMETER_TEMPFLOAT16 = 16,
346 			PARAMETER_MISCTYPE = 17,
347 			PARAMETER_LABEL = 18,
348 			PARAMETER_PREDICATE = 19,
349 
350 		//	PARAMETER_FLOAT1LITERAL,
351 		//	PARAMETER_FLOAT2LITERAL,
352 		//	PARAMETER_FLOAT3LITERAL,
353 			PARAMETER_FLOAT4LITERAL,
354 			PARAMETER_BOOL1LITERAL,
355 		//	PARAMETER_BOOL2LITERAL,
356 		//	PARAMETER_BOOL3LITERAL,
357 		//	PARAMETER_BOOL4LITERAL,
358 		//	PARAMETER_INT1LITERAL,
359 		//	PARAMETER_INT2LITERAL,
360 		//	PARAMETER_INT3LITERAL,
361 			PARAMETER_INT4LITERAL,
362 
363 			PARAMETER_VOID
364 		};
365 
366 		enum MiscParameterIndex
367 		{
368 			VPosIndex = 0,
369 			VFaceIndex = 1,
370 			InstanceIDIndex = 2,
371 			VertexIDIndex = 3,
372 		};
373 
374 		enum Modifier
375 		{
376 			MODIFIER_NONE,
377 			MODIFIER_NEGATE,
378 			MODIFIER_BIAS,
379 			MODIFIER_BIAS_NEGATE,
380 			MODIFIER_SIGN,
381 			MODIFIER_SIGN_NEGATE,
382 			MODIFIER_COMPLEMENT,
383 			MODIFIER_X2,
384 			MODIFIER_X2_NEGATE,
385 			MODIFIER_DZ,
386 			MODIFIER_DW,
387 			MODIFIER_ABS,
388 			MODIFIER_ABS_NEGATE,
389 			MODIFIER_NOT
390 		};
391 
392 		enum Analysis
393 		{
394 			// Flags indicating whether an instruction is affected by an execution enable mask
395 			ANALYSIS_BRANCH   = 0x00000001,
396 			ANALYSIS_BREAK    = 0x00000002,
397 			ANALYSIS_CONTINUE = 0x00000004,
398 			ANALYSIS_LEAVE    = 0x00000008,
399 		};
400 
401 		struct Relative
402 		{
403 			ParameterType type : 8;
404 			unsigned int index;
405 			unsigned int swizzle : 8;
406 			unsigned int scale;
407 			bool dynamic;   // Varies between concurrent shader instances
408 		};
409 
410 		struct Parameter
411 		{
412 			union
413 			{
414 				struct
415 				{
416 					unsigned int index;   // For registers types
417 
418 					Relative rel;
419 				};
420 
421 				float value[4];       // For float constants
422 				int integer[4];       // For integer constants
423 				int boolean[4];       // For boolean constants
424 
425 				struct
426 				{
427 					unsigned int label;      // Label index
428 					unsigned int callSite;   // Call index (per label)
429 				};
430 			};
431 
Parametersw::Shader::Parameter432 			Parameter() : index(0), type(PARAMETER_VOID)
433 			{
434 				rel.type = PARAMETER_VOID;
435 				rel.index = 0;
436 				rel.swizzle = 0;
437 				rel.scale = 1;
438 				rel.dynamic = true;
439 			}
440 
441 			std::string string(ShaderType shaderType, unsigned short version) const;
442 			std::string typeString(ShaderType shaderType, unsigned short version) const;
443 			std::string relativeString() const;
444 
445 			ParameterType type : 8;
446 		};
447 
448 		struct DestinationParameter : Parameter
449 		{
450 			union
451 			{
452 				unsigned char mask;
453 
454 				struct
455 				{
456 					bool x : 1;
457 					bool y : 1;
458 					bool z : 1;
459 					bool w : 1;
460 				};
461 			};
462 
DestinationParametersw::Shader::DestinationParameter463 			DestinationParameter() : mask(0xF), saturate(false), partialPrecision(false), centroid(false), shift(0)
464 			{
465 			}
466 
467 			std::string modifierString() const;
468 			std::string shiftString() const;
469 			std::string maskString() const;
470 
471 			bool saturate         : 1;
472 			bool partialPrecision : 1;
473 			bool centroid         : 1;
474 			signed char shift     : 4;
475 		};
476 
477 		struct SourceParameter : Parameter
478 		{
SourceParametersw::Shader::SourceParameter479 			SourceParameter() : swizzle(0xE4), modifier(MODIFIER_NONE), bufferIndex(-1)
480 			{
481 			}
482 
483 			std::string string(ShaderType shaderType, unsigned short version) const;
484 			std::string swizzleString() const;
485 			std::string preModifierString() const;
486 			std::string postModifierString() const;
487 
488 			unsigned int swizzle : 8;
489 			Modifier modifier : 8;
490 			int bufferIndex : 8;
491 		};
492 
493 		struct Instruction
494 		{
495 			explicit Instruction(Opcode opcode);
496 			Instruction(const unsigned long *token, int size, unsigned char majorVersion);
497 
498 			virtual ~Instruction();
499 
500 			void parseOperationToken(unsigned long token, unsigned char majorVersion);
501 			void parseDeclarationToken(unsigned long token);
502 			void parseDestinationToken(const unsigned long *token, unsigned char majorVersion);
503 			void parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion);
504 
505 			std::string string(ShaderType shaderType, unsigned short version) const;
506 			static std::string swizzleString(ParameterType type, unsigned char swizzle);
507 			std::string operationString(unsigned short version) const;
508 			std::string controlString() const;
509 
510 			bool isBranch() const;
511 			bool isCall() const;
512 			bool isBreak() const;
513 			bool isLoop() const;
514 			bool isEndLoop() const;
515 
516 			bool isPredicated() const;
517 
518 			Opcode opcode;
519 
520 			union
521 			{
522 				Control control;
523 
524 				struct
525 				{
526 					unsigned char project : 1;   // D3DSI_TEXLD_PROJECT
527 					unsigned char bias : 1;      // D3DSI_TEXLD_BIAS
528 				};
529 			};
530 
531 			bool predicate;
532 			bool predicateNot;   // Negative predicate
533 			unsigned char predicateSwizzle;
534 
535 			bool coissue;
536 			SamplerType samplerType;
537 			Usage usage;
538 			unsigned char usageIndex;
539 
540 			DestinationParameter dst;
541 			SourceParameter src[5];
542 
543 			union
544 			{
545 				unsigned int analysis;
546 
547 				struct
548 				{
549 					// Keep in sync with Shader::Analysis flags
550 					unsigned int analysisBranch : 1;
551 					unsigned int analysisBreak : 1;
552 					unsigned int analysisContinue : 1;
553 					unsigned int analysisLeave : 1;
554 				};
555 			};
556 		};
557 
558 		// Limits holds the maximum nested counts for the shader.
559 		struct Limits
560 		{
561 			uint32_t loops = 0; // maximum nested loop and reps.
562 			uint32_t ifs = 0; // maximum nested if statements.
563 			uint32_t stack = 0; // maximum call depth.
564 			uint32_t maxLabel = 0; // highest label in use.
565 		};
566 
567 		Shader();
568 
569 		virtual ~Shader();
570 
571 		int getSerialID() const;
572 		size_t getLength() const;
573 		ShaderType getShaderType() const;
574 		unsigned short getShaderModel() const;
getLimits() const575 		inline const Limits& getLimits() const { return limits; }
576 
577 		void append(Instruction *instruction);
578 		void declareSampler(int i);
579 
580 		const Instruction *getInstruction(size_t i) const;
581 		int size(unsigned long opcode) const;
582 		static int size(unsigned long opcode, unsigned short shaderModel);
583 
584 		void print(const char *fileName, ...) const;
585 		void printInstruction(int index, const char *fileName) const;
586 
587 		static bool maskContainsComponent(int mask, int component);
588 		static bool swizzleContainsComponent(int swizzle, int component);
589 		static bool swizzleContainsComponentMasked(int swizzle, int component, int mask);
590 
591 		bool containsDynamicBranching() const;
592 		bool containsBreakInstruction() const;
593 		bool containsContinueInstruction() const;
594 		bool containsLeaveInstruction() const;
595 		bool containsDefineInstruction() const;
596 		bool usesSampler(int i) const;
597 
598 		struct Semantic
599 		{
Semanticsw::Shader::Semantic600 			Semantic(unsigned char usage = 0xFF, unsigned char index = 0xFF, bool flat = false) : usage(usage), index(index), centroid(false), flat(flat)
601 			{
602 			}
603 
operator ==sw::Shader::Semantic604 			bool operator==(const Semantic &semantic) const
605 			{
606 				return usage == semantic.usage && index == semantic.index;
607 			}
608 
activesw::Shader::Semantic609 			bool active() const
610 			{
611 				return usage != 0xFF;
612 			}
613 
614 			unsigned char usage;
615 			unsigned char index;
616 			bool centroid;
617 			bool flat;
618 		};
619 
620 		void optimize();
621 
622 		// FIXME: Private
623 		unsigned int dirtyConstantsF;
624 		unsigned int dirtyConstantsI;
625 		unsigned int dirtyConstantsB;
626 
627 		bool indirectAddressableTemporaries;
628 		bool indirectAddressableInput;
629 		bool indirectAddressableOutput;
630 
631 	protected:
632 		void parse(const unsigned long *token);
633 
634 		void optimizeLeave();
635 		void optimizeCall();
636 		void removeNull();
637 
638 		void analyzeDirtyConstants();
639 		void analyzeDynamicBranching();
640 		void analyzeSamplers();
641 		void analyzeCallSites();
642 		void analyzeIndirectAddressing();
643 		void analyzeLimits();
644 		void markFunctionAnalysis(unsigned int functionLabel, Analysis flag);
645 
646 		Limits limits; // Calculated in analyzeLimits().
647 
648 		ShaderType shaderType;
649 
650 		union
651 		{
652 			unsigned short shaderModel;
653 
654 			struct
655 			{
656 				unsigned char minorVersion;
657 				unsigned char majorVersion;
658 			};
659 		};
660 
661 		std::vector<Instruction*> instruction;
662 
663 		unsigned short usedSamplers;   // Bit flags
664 
665 	private:
666 		const int serialID;
667 		static volatile int serialCounter;
668 
669 		bool dynamicBranching;
670 		bool containsBreak;
671 		bool containsContinue;
672 		bool containsLeave;
673 		bool containsDefine;
674 	};
675 }
676 
677 #endif   // sw_Shader_hpp
678