1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file state.h
24 *
25 * @brief Definitions for API state.
26 *
27 ******************************************************************************/
28 #pragma once
29 
30 #include "common/formats.h"
31 #include "common/intrin.h"
32 using gfxptr_t = unsigned long long;
33 #include <functional>
34 #include <algorithm>
35 
36 //////////////////////////////////////////////////////////////////////////
37 /// PRIMITIVE_TOPOLOGY.
38 //////////////////////////////////////////////////////////////////////////
39 enum PRIMITIVE_TOPOLOGY
40 {
41     TOP_UNKNOWN = 0x0,
42     TOP_POINT_LIST = 0x1,
43     TOP_LINE_LIST = 0x2,
44     TOP_LINE_STRIP = 0x3,
45     TOP_TRIANGLE_LIST = 0x4,
46     TOP_TRIANGLE_STRIP = 0x5,
47     TOP_TRIANGLE_FAN = 0x6,
48     TOP_QUAD_LIST = 0x7,
49     TOP_QUAD_STRIP = 0x8,
50     TOP_LINE_LIST_ADJ = 0x9,
51     TOP_LISTSTRIP_ADJ = 0xA,
52     TOP_TRI_LIST_ADJ = 0xB,
53     TOP_TRI_STRIP_ADJ = 0xC,
54     TOP_TRI_STRIP_REVERSE = 0xD,
55     TOP_POLYGON = 0xE,
56     TOP_RECT_LIST = 0xF,
57     TOP_LINE_LOOP = 0x10,
58     TOP_POINT_LIST_BF = 0x11,
59     TOP_LINE_STRIP_CONT = 0x12,
60     TOP_LINE_STRIP_BF = 0x13,
61     TOP_LINE_STRIP_CONT_BF = 0x14,
62     TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
63     TOP_TRIANGLE_DISC = 0x17,   /// @todo What is this??
64 
65     TOP_PATCHLIST_BASE = 0x1F,  // Invalid topology, used to calculate num verts for a patchlist.
66     TOP_PATCHLIST_1 = 0x20,     // List of 1-vertex patches
67     TOP_PATCHLIST_2 = 0x21,
68     TOP_PATCHLIST_3 = 0x22,
69     TOP_PATCHLIST_4 = 0x23,
70     TOP_PATCHLIST_5 = 0x24,
71     TOP_PATCHLIST_6 = 0x25,
72     TOP_PATCHLIST_7 = 0x26,
73     TOP_PATCHLIST_8 = 0x27,
74     TOP_PATCHLIST_9 = 0x28,
75     TOP_PATCHLIST_10 = 0x29,
76     TOP_PATCHLIST_11 = 0x2A,
77     TOP_PATCHLIST_12 = 0x2B,
78     TOP_PATCHLIST_13 = 0x2C,
79     TOP_PATCHLIST_14 = 0x2D,
80     TOP_PATCHLIST_15 = 0x2E,
81     TOP_PATCHLIST_16 = 0x2F,
82     TOP_PATCHLIST_17 = 0x30,
83     TOP_PATCHLIST_18 = 0x31,
84     TOP_PATCHLIST_19 = 0x32,
85     TOP_PATCHLIST_20 = 0x33,
86     TOP_PATCHLIST_21 = 0x34,
87     TOP_PATCHLIST_22 = 0x35,
88     TOP_PATCHLIST_23 = 0x36,
89     TOP_PATCHLIST_24 = 0x37,
90     TOP_PATCHLIST_25 = 0x38,
91     TOP_PATCHLIST_26 = 0x39,
92     TOP_PATCHLIST_27 = 0x3A,
93     TOP_PATCHLIST_28 = 0x3B,
94     TOP_PATCHLIST_29 = 0x3C,
95     TOP_PATCHLIST_30 = 0x3D,
96     TOP_PATCHLIST_31 = 0x3E,
97     TOP_PATCHLIST_32 = 0x3F,   // List of 32-vertex patches
98 };
99 
100 //////////////////////////////////////////////////////////////////////////
101 /// SWR_SHADER_TYPE
102 //////////////////////////////////////////////////////////////////////////
103 enum SWR_SHADER_TYPE
104 {
105     SHADER_VERTEX,
106     SHADER_GEOMETRY,
107     SHADER_DOMAIN,
108     SHADER_HULL,
109     SHADER_PIXEL,
110     SHADER_COMPUTE,
111 
112     NUM_SHADER_TYPES,
113 };
114 
115 //////////////////////////////////////////////////////////////////////////
116 /// SWR_RENDERTARGET_ATTACHMENT
117 /// @todo Its not clear what an "attachment" means. Its not common term.
118 //////////////////////////////////////////////////////////////////////////
119 enum SWR_RENDERTARGET_ATTACHMENT
120 {
121     SWR_ATTACHMENT_COLOR0,
122     SWR_ATTACHMENT_COLOR1,
123     SWR_ATTACHMENT_COLOR2,
124     SWR_ATTACHMENT_COLOR3,
125     SWR_ATTACHMENT_COLOR4,
126     SWR_ATTACHMENT_COLOR5,
127     SWR_ATTACHMENT_COLOR6,
128     SWR_ATTACHMENT_COLOR7,
129     SWR_ATTACHMENT_DEPTH,
130     SWR_ATTACHMENT_STENCIL,
131 
132     SWR_NUM_ATTACHMENTS
133 };
134 
135 #define SWR_NUM_RENDERTARGETS 8
136 
137 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
138 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
139 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
140 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
141 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
142 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
143 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
144 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
145 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
146 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
147 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
148 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
149 
150 
151 //////////////////////////////////////////////////////////////////////////
152 /// @brief SWR Inner Tessellation factor ID
153 /// See above GetTessFactorOutputPosition code for documentation
154 enum SWR_INNER_TESSFACTOR_ID
155 {
156     SWR_QUAD_U_TRI_INSIDE,
157     SWR_QUAD_V_INSIDE,
158 
159     SWR_NUM_INNER_TESS_FACTORS,
160 };
161 
162 //////////////////////////////////////////////////////////////////////////
163 /// @brief SWR Outer Tessellation factor ID
164 /// See above GetTessFactorOutputPosition code for documentation
165 enum SWR_OUTER_TESSFACTOR_ID
166 {
167     SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
168     SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
169     SWR_QUAD_U_EQ1_TRI_W,
170     SWR_QUAD_V_EQ1,
171 
172     SWR_NUM_OUTER_TESS_FACTORS,
173 };
174 
175 
176 /////////////////////////////////////////////////////////////////////////
177 /// simdvertex
178 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
179 ///        Contains space for position, SGV, and 32 generic attributes
180 /////////////////////////////////////////////////////////////////////////
181 enum SWR_VTX_SLOTS
182 {
183     VERTEX_SGV_SLOT                 = 0,
184         VERTEX_SGV_RTAI_COMP        = 0,
185         VERTEX_SGV_VAI_COMP         = 1,
186         VERTEX_SGV_POINT_SIZE_COMP  = 2,
187     VERTEX_POSITION_SLOT            = 1,
188     VERTEX_POSITION_END_SLOT        = 1,
189     VERTEX_CLIPCULL_DIST_LO_SLOT    = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
190     VERTEX_CLIPCULL_DIST_HI_SLOT    = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
191     VERTEX_ATTRIB_START_SLOT        = (3 + VERTEX_POSITION_END_SLOT),
192     VERTEX_ATTRIB_END_SLOT          = (34 + VERTEX_POSITION_END_SLOT),
193     SWR_VTX_NUM_SLOTS               = (1 + VERTEX_ATTRIB_END_SLOT)
194 };
195 
196 // SoAoSoA
197 struct simdvertex
198 {
199     simdvector      attrib[SWR_VTX_NUM_SLOTS];
200 };
201 
202 #if ENABLE_AVX512_SIMD16
203 struct simd16vertex
204 {
205     simd16vector    attrib[SWR_VTX_NUM_SLOTS];
206 };
207 
208 #endif
209 
210 template<typename SIMD_T>
211 struct SIMDVERTEX_T
212 {
213     typename SIMD_T::Vec4               attrib[SWR_VTX_NUM_SLOTS];
214 };
215 
216 //////////////////////////////////////////////////////////////////////////
217 /// SWR_VS_CONTEXT
218 /// @brief Input to vertex shader
219 /////////////////////////////////////////////////////////////////////////
220 struct SWR_VS_CONTEXT
221 {
222     simdvertex* pVin;           // IN: SIMD input vertex data store
223     simdvertex* pVout;          // OUT: SIMD output vertex data store
224 
225     uint32_t InstanceID;        // IN: Instance ID, constant across all verts of the SIMD
226     simdscalari VertexID;       // IN: Vertex ID
227     simdscalari mask;           // IN: Active mask for shader
228 #if USE_SIMD16_FRONTEND
229     uint32_t AlternateOffset;   // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
230 #if USE_SIMD16_VS
231     simd16scalari mask16;	// IN: Active mask for shader (16-wide)
232     simd16scalari VertexID16;	// IN: Vertex ID (16-wide)
233 #endif
234 #endif
235 };
236 
237 /////////////////////////////////////////////////////////////////////////
238 /// ScalarCPoint
239 /// @brief defines a control point element as passed from the output
240 /// of the hull shader to the input of the domain shader
241 /////////////////////////////////////////////////////////////////////////
242 struct ScalarAttrib
243 {
244     float x;
245     float y;
246     float z;
247     float w;
248 };
249 
250 struct ScalarCPoint
251 {
252     ScalarAttrib attrib[SWR_VTX_NUM_SLOTS];
253 };
254 
255 //////////////////////////////////////////////////////////////////////////
256 /// SWR_TESSELLATION_FACTORS
257 /// @brief Tessellation factors structure (non-vector)
258 /////////////////////////////////////////////////////////////////////////
259 struct SWR_TESSELLATION_FACTORS
260 {
261     float  OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
262     float  InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
263 };
264 
265 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
266 struct ScalarPatch
267 {
268     SWR_TESSELLATION_FACTORS tessFactors;
269     ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
270     ScalarCPoint patchData;
271 };
272 
273 //////////////////////////////////////////////////////////////////////////
274 /// SWR_HS_CONTEXT
275 /// @brief Input to hull shader
276 /////////////////////////////////////////////////////////////////////////
277 struct SWR_HS_CONTEXT
278 {
279     simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
280     simdscalari PrimitiveID;    // IN: (SIMD) primitive ID generated from the draw call
281     simdscalari mask;           // IN: Active mask for shader
282     ScalarPatch* pCPout;        // OUT: Output control point patch
283                                 // SIMD-sized-array of SCALAR patches
284 };
285 
286 //////////////////////////////////////////////////////////////////////////
287 /// SWR_DS_CONTEXT
288 /// @brief Input to domain shader
289 /////////////////////////////////////////////////////////////////////////
290 struct SWR_DS_CONTEXT
291 {
292     uint32_t        PrimitiveID;    // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
293     uint32_t        vectorOffset;   // IN: (SCALAR) vector index offset into SIMD data.
294     uint32_t        vectorStride;   // IN: (SCALAR) stride (in vectors) of output data per attribute-component
295     uint32_t        outVertexAttribOffset; // IN: (SCALAR) Offset to the attributes as processed by the next shader stage.
296     ScalarPatch*    pCpIn;          // IN: (SCALAR) Control patch
297     simdscalar*     pDomainU;       // IN: (SIMD) Domain Point U coords
298     simdscalar*     pDomainV;       // IN: (SIMD) Domain Point V coords
299     simdscalari     mask;           // IN: Active mask for shader
300     simdscalar*     pOutputData;    // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
301 };
302 
303 //////////////////////////////////////////////////////////////////////////
304 /// SWR_GS_CONTEXT
305 /// @brief Input to geometry shader.
306 /////////////////////////////////////////////////////////////////////////
307 struct SWR_GS_CONTEXT
308 {
309     simdvector* pVerts;                 // IN: input primitive data for SIMD prims
310     uint32_t inputVertStride;           // IN: input vertex stride, in attributes
311     simdscalari PrimitiveID;            // IN: input primitive ID generated from the draw call
312     uint32_t InstanceID;                // IN: input instance ID
313     simdscalari mask;                   // IN: Active mask for shader
314     uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams)
315 };
316 
317 struct PixelPositions
318 {
319     simdscalar UL;
320     simdscalar center;
321     simdscalar sample;
322     simdscalar centroid;
323 };
324 
325 #define SWR_MAX_NUM_MULTISAMPLES 16
326 
327 //////////////////////////////////////////////////////////////////////////
328 /// SWR_PS_CONTEXT
329 /// @brief Input to pixel shader.
330 /////////////////////////////////////////////////////////////////////////
331 struct SWR_PS_CONTEXT
332 {
333     PixelPositions vX;          // IN: x location(s) of pixels
334     PixelPositions vY;          // IN: x location(s) of pixels
335     simdscalar vZ;              // INOUT: z location of pixels
336     simdscalari activeMask;     // OUT: mask for kill
337     simdscalar  inputMask;      // IN: input coverage mask for all samples
338     simdscalari oMask;          // OUT: mask for output coverage
339 
340     PixelPositions vI;          // barycentric coords evaluated at pixel center, sample position, centroid
341     PixelPositions vJ;
342     PixelPositions vOneOverW;   // IN: 1/w
343 
344     const float* pAttribs;      // IN: pointer to attribute barycentric coefficients
345     const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
346     const float* pRecipW;       // IN: pointer to 1/w coord for each vertex
347     const float *I;             // IN: Barycentric A, B, and C coefs used to compute I
348     const float *J;             // IN: Barycentric A, B, and C coefs used to compute J
349     float recipDet;             // IN: 1/Det, used when barycentric interpolating attributes
350     const float* pSamplePosX;   // IN: array of sample positions
351     const float* pSamplePosY;   // IN: array of sample positions
352     simdvector shaded[SWR_NUM_RENDERTARGETS];
353                                 // OUT: result color per rendertarget
354 
355     uint32_t frontFace;                 // IN: front- 1, back- 0
356     uint32_t sampleIndex;               // IN: sampleIndex
357     uint32_t renderTargetArrayIndex;    // IN: render target array index from GS
358     uint32_t rasterizerSampleCount;     // IN: sample count used by the rasterizer
359 
360     uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
361 };
362 
363 //////////////////////////////////////////////////////////////////////////
364 /// SWR_CS_CONTEXT
365 /// @brief Input to compute shader.
366 /////////////////////////////////////////////////////////////////////////
367 struct SWR_CS_CONTEXT
368 {
369     // The ThreadGroupId is the current thread group index relative
370     // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
371     // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
372 
373     // Compute shader accepts the following system values.
374     // o ThreadId - Current thread id relative to all other threads in dispatch.
375     // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
376     // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
377     // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
378     //
379     // All of these system values can be computed in the shader. They will be
380     // derived from the current tile counter. The tile counter is an atomic counter that
381     // resides in the draw context and is initialized to the product of the dispatch dims.
382     //
383     //  tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
384     //
385     // Each CPU worker thread will atomically decrement this counter and passes the current
386     // count into the shader. When the count reaches 0 then all thread groups in the
387     // dispatch call have been completed.
388 
389     uint32_t tileCounter;  // The tile counter value for this thread group.
390 
391     // Dispatch dimensions used by shader to compute system values from the tile counter.
392     uint32_t dispatchDims[3];
393 
394     uint8_t* pTGSM;  // Thread Group Shared Memory pointer.
395 
396     uint8_t* pSpillFillBuffer;  // Spill/fill buffer for barrier support
397 
398     uint8_t* pScratchSpace;     // Pointer to scratch space buffer used by the shader, shader is responsible
399                                 // for subdividing scratch space per instance/simd
400 
401     uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH
402 };
403 
404 // enums
405 enum SWR_TILE_MODE
406 {
407     SWR_TILE_NONE = 0x0,    // Linear mode (no tiling)
408     SWR_TILE_MODE_WMAJOR,   // W major tiling
409     SWR_TILE_MODE_XMAJOR,   // X major tiling
410     SWR_TILE_MODE_YMAJOR,   // Y major tiling
411     SWR_TILE_SWRZ,          // SWR-Z tiling
412 
413     SWR_TILE_MODE_COUNT
414 };
415 
416 enum SWR_SURFACE_TYPE
417 {
418     SURFACE_1D        = 0,
419     SURFACE_2D        = 1,
420     SURFACE_3D        = 2,
421     SURFACE_CUBE      = 3,
422     SURFACE_BUFFER    = 4,
423     SURFACE_STRUCTURED_BUFFER = 5,
424     SURFACE_NULL       = 7
425 };
426 
427 enum SWR_ZFUNCTION
428 {
429     ZFUNC_ALWAYS,
430     ZFUNC_NEVER,
431     ZFUNC_LT,
432     ZFUNC_EQ,
433     ZFUNC_LE,
434     ZFUNC_GT,
435     ZFUNC_NE,
436     ZFUNC_GE,
437     NUM_ZFUNC
438 };
439 
440 enum SWR_STENCILOP
441 {
442     STENCILOP_KEEP,
443     STENCILOP_ZERO,
444     STENCILOP_REPLACE,
445     STENCILOP_INCRSAT,
446     STENCILOP_DECRSAT,
447     STENCILOP_INCR,
448     STENCILOP_DECR,
449     STENCILOP_INVERT
450 };
451 
452 enum SWR_BLEND_FACTOR
453 {
454     BLENDFACTOR_ONE,
455     BLENDFACTOR_SRC_COLOR,
456     BLENDFACTOR_SRC_ALPHA,
457     BLENDFACTOR_DST_ALPHA,
458     BLENDFACTOR_DST_COLOR,
459     BLENDFACTOR_SRC_ALPHA_SATURATE,
460     BLENDFACTOR_CONST_COLOR,
461     BLENDFACTOR_CONST_ALPHA,
462     BLENDFACTOR_SRC1_COLOR,
463     BLENDFACTOR_SRC1_ALPHA,
464     BLENDFACTOR_ZERO,
465     BLENDFACTOR_INV_SRC_COLOR,
466     BLENDFACTOR_INV_SRC_ALPHA,
467     BLENDFACTOR_INV_DST_ALPHA,
468     BLENDFACTOR_INV_DST_COLOR,
469     BLENDFACTOR_INV_CONST_COLOR,
470     BLENDFACTOR_INV_CONST_ALPHA,
471     BLENDFACTOR_INV_SRC1_COLOR,
472     BLENDFACTOR_INV_SRC1_ALPHA
473 };
474 
475 enum SWR_BLEND_OP
476 {
477     BLENDOP_ADD,
478     BLENDOP_SUBTRACT,
479     BLENDOP_REVSUBTRACT,
480     BLENDOP_MIN,
481     BLENDOP_MAX,
482 };
483 
484 enum SWR_LOGIC_OP
485 {
486     LOGICOP_CLEAR,
487     LOGICOP_NOR,
488     LOGICOP_AND_INVERTED,
489     LOGICOP_COPY_INVERTED,
490     LOGICOP_AND_REVERSE,
491     LOGICOP_INVERT,
492     LOGICOP_XOR,
493     LOGICOP_NAND,
494     LOGICOP_AND,
495     LOGICOP_EQUIV,
496     LOGICOP_NOOP,
497     LOGICOP_OR_INVERTED,
498     LOGICOP_COPY,
499     LOGICOP_OR_REVERSE,
500     LOGICOP_OR,
501     LOGICOP_SET,
502 };
503 
504 //////////////////////////////////////////////////////////////////////////
505 /// SWR_AUX_MODE
506 /// @brief Specifies how the auxiliary buffer is used by the driver.
507 //////////////////////////////////////////////////////////////////////////
508 enum SWR_AUX_MODE
509 {
510     AUX_MODE_NONE,
511     AUX_MODE_COLOR,
512     AUX_MODE_UAV,
513     AUX_MODE_DEPTH,
514 };
515 
516 //////////////////////////////////////////////////////////////////////////
517 /// SWR_SURFACE_STATE
518 //////////////////////////////////////////////////////////////////////////
519 struct SWR_SURFACE_STATE
520 {
521     gfxptr_t xpBaseAddress;
522     SWR_SURFACE_TYPE type;  // @llvm_enum
523     SWR_FORMAT format;      // @llvm_enum
524     uint32_t width;
525     uint32_t height;
526     uint32_t depth;
527     uint32_t numSamples;
528     uint32_t samplePattern;
529     uint32_t pitch;
530     uint32_t qpitch;
531     uint32_t minLod;            // for sampled surfaces, the most detailed LOD that can be accessed by sampler
532     uint32_t maxLod;            // for sampled surfaces, the max LOD that can be accessed
533     float resourceMinLod;       // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
534     uint32_t lod;               // for render targets, the lod being rendered to
535     uint32_t arrayIndex;        // for render targets, the array index being rendered to for arrayed surfaces
536     SWR_TILE_MODE tileMode;     // @llvm_enum
537     uint32_t halign;
538     uint32_t valign;
539     uint32_t xOffset;
540     uint32_t yOffset;
541 
542     uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
543 
544     gfxptr_t xpAuxBaseAddress;   // Used for compression, append/consume counter, etc.
545     SWR_AUX_MODE auxMode;      // @llvm_enum
546 
547 
548     bool bInterleavedSamples;   // are MSAA samples stored interleaved or planar
549 };
550 
551 // vertex fetch state
552 // WARNING- any changes to this struct need to be reflected
553 // in the fetch shader jit
554 struct SWR_VERTEX_BUFFER_STATE
555 {
556     uint32_t index;
557     uint32_t pitch;
558     const uint8_t *pData;
559     uint32_t size;
560     uint32_t numaNode;
561     uint32_t minVertex;             // min vertex (for bounds checking)
562     uint32_t maxVertex;             // size / pitch.  precalculated value used by fetch shader for OOB checks
563     uint32_t partialInboundsSize;   // size % pitch.  precalculated value used by fetch shader for partially OOB vertices
564 };
565 
566 struct SWR_INDEX_BUFFER_STATE
567 {
568     // Format type for indices (e.g. UINT16, UINT32, etc.)
569     SWR_FORMAT format; // @llvm_enum
570     const void *pIndices;
571     uint32_t size;
572 };
573 
574 
575 //////////////////////////////////////////////////////////////////////////
576 /// SWR_FETCH_CONTEXT
577 /// @brief Input to fetch shader.
578 /// @note WARNING - Changes to this struct need to be reflected in the
579 ///                 fetch shader jit.
580 /////////////////////////////////////////////////////////////////////////
581 struct SWR_FETCH_CONTEXT
582 {
583     const SWR_VERTEX_BUFFER_STATE* pStreams;    // IN: array of bound vertex buffers
584     const int32_t* pIndices;                    // IN: pointer to index buffer for indexed draws
585     const int32_t* pLastIndex;                  // IN: pointer to end of index buffer, used for bounds checking
586     uint32_t CurInstance;                       // IN: current instance
587     uint32_t BaseVertex;                        // IN: base vertex
588     uint32_t StartVertex;                       // IN: start vertex
589     uint32_t StartInstance;                     // IN: start instance
590     simdscalari VertexID;                       // OUT: vector of vertex IDs
591     simdscalari CutMask;                        // OUT: vector mask of indices which have the cut index value
592 #if USE_SIMD16_SHADERS
593 //    simd16scalari VertexID;                     // OUT: vector of vertex IDs
594 //    simd16scalari CutMask;                      // OUT: vector mask of indices which have the cut index value
595     simdscalari VertexID2;                      // OUT: vector of vertex IDs
596     simdscalari CutMask2;                       // OUT: vector mask of indices which have the cut index value
597 #endif
598 };
599 
600 //////////////////////////////////////////////////////////////////////////
601 /// SWR_STATS
602 ///
603 /// @brief All statistics generated by SWR go here. These are public
604 ///        to driver.
605 /////////////////////////////////////////////////////////////////////////
OSALIGNLINE(struct)606 OSALIGNLINE(struct) SWR_STATS
607 {
608     // Occlusion Query
609     uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
610 
611     // Pipeline Stats
612     uint64_t PsInvocations;  // Number of Pixel Shader invocations
613     uint64_t CsInvocations;  // Number of Compute Shader invocations
614 
615 };
616 
617 //////////////////////////////////////////////////////////////////////////
618 /// SWR_STATS
619 ///
620 /// @brief All statistics generated by FE.
621 /////////////////////////////////////////////////////////////////////////
OSALIGNLINE(struct)622 OSALIGNLINE(struct) SWR_STATS_FE
623 {
624     uint64_t IaVertices;    // Number of Fetch Shader vertices
625     uint64_t IaPrimitives;  // Number of PA primitives.
626     uint64_t VsInvocations; // Number of Vertex Shader invocations
627     uint64_t HsInvocations; // Number of Hull Shader invocations
628     uint64_t DsInvocations; // Number of Domain Shader invocations
629     uint64_t GsInvocations; // Number of Geometry Shader invocations
630     uint64_t GsPrimitives;  // Number of prims GS outputs.
631     uint64_t CInvocations;  // Number of clipper invocations
632     uint64_t CPrimitives;   // Number of clipper primitives.
633 
634     // Streamout Stats
635     uint64_t SoPrimStorageNeeded[4];
636     uint64_t SoNumPrimsWritten[4];
637 };
638 
639 //////////////////////////////////////////////////////////////////////////
640 /// STREAMOUT_BUFFERS
641 /////////////////////////////////////////////////////////////////////////
642 
643 #define MAX_SO_STREAMS 4
644 #define MAX_SO_BUFFERS 4
645 #define MAX_ATTRIBUTES 32
646 
647 struct SWR_STREAMOUT_BUFFER
648 {
649     bool enable;
650     bool soWriteEnable;
651 
652     // Pointers to streamout buffers.
653     uint32_t* pBuffer;
654 
655     // Size of buffer in dwords.
656     uint32_t bufferSize;
657 
658     // Vertex pitch of buffer in dwords.
659     uint32_t pitch;
660 
661     // Offset into buffer in dwords. SOS will increment this offset.
662     uint32_t streamOffset;
663 
664     // Offset to the SO write offset. If not null then we update offset here.
665     uint32_t* pWriteOffset;
666 
667 };
668 
669 //////////////////////////////////////////////////////////////////////////
670 /// STREAMOUT_STATE
671 /////////////////////////////////////////////////////////////////////////
672 struct SWR_STREAMOUT_STATE
673 {
674     // This disables stream output.
675     bool soEnable;
676 
677     // which streams are enabled for streamout
678     bool streamEnable[MAX_SO_STREAMS];
679 
680     // If set then do not send any streams to the rasterizer.
681     bool rasterizerDisable;
682 
683     // Specifies which stream to send to the rasterizer.
684     uint32_t streamToRasterizer;
685 
686     // The stream masks specify which attributes are sent to which streams.
687     // These masks help the FE to setup the pPrimData buffer that is passed
688     // the Stream Output Shader (SOS) function.
689     uint32_t streamMasks[MAX_SO_STREAMS];
690 
691     // Number of attributes, including position, per vertex that are streamed out.
692     // This should match number of bits in stream mask.
693     uint32_t streamNumEntries[MAX_SO_STREAMS];
694 
695     // Offset to the start of the attributes of the input vertices, in simdvector units
696     uint32_t vertexAttribOffset[MAX_SO_STREAMS];
697 };
698 
699 //////////////////////////////////////////////////////////////////////////
700 /// STREAMOUT_CONTEXT - Passed to SOS
701 /////////////////////////////////////////////////////////////////////////
702 struct SWR_STREAMOUT_CONTEXT
703 {
704     uint32_t* pPrimData;
705     SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
706 
707     // Num prims written for this stream
708     uint32_t numPrimsWritten;
709 
710     // Num prims that should have been written if there were no overflow.
711     uint32_t numPrimStorageNeeded;
712 };
713 
714 //////////////////////////////////////////////////////////////////////////
715 /// SWR_GS_STATE - Geometry shader state
716 /////////////////////////////////////////////////////////////////////////
717 struct SWR_GS_STATE
718 {
719     bool gsEnable;
720 
721     // Number of input attributes per vertex. Used by the frontend to
722     // optimize assembling primitives for GS
723     uint32_t numInputAttribs;
724 
725     // Stride of incoming verts in attributes
726     uint32_t inputVertStride;
727 
728     // Output topology - can be point, tristrip, or linestrip
729     PRIMITIVE_TOPOLOGY outputTopology;      // @llvm_enum
730 
731     // Maximum number of verts that can be emitted by a single instance of the GS
732     uint32_t maxNumVerts;
733 
734     // Instance count
735     uint32_t instanceCount;
736 
737     // If true, geometry shader emits a single stream, with separate cut buffer.
738     // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
739     // to map vertices to streams
740     bool isSingleStream;
741 
742     // When single stream is enabled, singleStreamID dictates which stream is being output.
743     // field ignored if isSingleStream is false
744     uint32_t singleStreamID;
745 
746     // Total amount of memory to allocate for one instance of the shader output in bytes
747     uint32_t allocationSize;
748 
749     // Offset to the start of the attributes of the input vertices, in simdvector units, as read by the GS
750     uint32_t vertexAttribOffset;
751 
752     // Offset to the attributes as stored by the preceding shader stage.
753     uint32_t srcVertexAttribOffset;
754 
755     // Size of the control data section which contains cut or streamID data, in simdscalar units. Should be sized to handle
756     // the maximum number of verts output by the GS. Can be 0 if there are no cuts or streamID bits.
757     uint32_t controlDataSize;
758 
759     // Offset to the control data section, in bytes
760     uint32_t controlDataOffset;
761 
762     // Total size of an output vertex, in simdvector units
763     uint32_t outputVertexSize;
764 
765     // Offset to the start of the vertex section, in bytes
766     uint32_t outputVertexOffset;
767 
768     // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero, shader is
769     // expected to store the final vertex count in the first dword of the gs output stream.
770     uint32_t staticVertexCount;
771 };
772 
773 
774 //////////////////////////////////////////////////////////////////////////
775 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
776 /////////////////////////////////////////////////////////////////////////
777 enum SWR_TS_OUTPUT_TOPOLOGY
778 {
779     SWR_TS_OUTPUT_POINT,
780     SWR_TS_OUTPUT_LINE,
781     SWR_TS_OUTPUT_TRI_CW,
782     SWR_TS_OUTPUT_TRI_CCW,
783 
784     SWR_TS_OUTPUT_TOPOLOGY_COUNT
785 };
786 
787 //////////////////////////////////////////////////////////////////////////
788 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
789 /////////////////////////////////////////////////////////////////////////
790 enum SWR_TS_PARTITIONING
791 {
792     SWR_TS_INTEGER,
793     SWR_TS_ODD_FRACTIONAL,
794     SWR_TS_EVEN_FRACTIONAL,
795 
796     SWR_TS_PARTITIONING_COUNT
797 };
798 
799 //////////////////////////////////////////////////////////////////////////
800 /// SWR_TS_DOMAIN - Defines Tessellation Domain
801 /////////////////////////////////////////////////////////////////////////
802 enum SWR_TS_DOMAIN
803 {
804     SWR_TS_QUAD,
805     SWR_TS_TRI,
806     SWR_TS_ISOLINE,
807 
808     SWR_TS_DOMAIN_COUNT
809 };
810 
811 //////////////////////////////////////////////////////////////////////////
812 /// SWR_TS_STATE - Tessellation state
813 /////////////////////////////////////////////////////////////////////////
814 struct SWR_TS_STATE
815 {
816     bool                    tsEnable;
817     SWR_TS_OUTPUT_TOPOLOGY  tsOutputTopology;   // @llvm_enum
818     SWR_TS_PARTITIONING     partitioning;       // @llvm_enum
819     SWR_TS_DOMAIN           domain;             // @llvm_enum
820 
821     PRIMITIVE_TOPOLOGY      postDSTopology;     // @llvm_enum
822 
823     uint32_t                numHsInputAttribs;
824     uint32_t                numHsOutputAttribs;
825     uint32_t                numDsOutputAttribs;
826     uint32_t                dsAllocationSize;
827     uint32_t                dsOutVtxAttribOffset;
828 
829     // Offset to the start of the attributes of the input vertices, in simdvector units
830     uint32_t                vertexAttribOffset;
831 };
832 
833 // output merger state
834 struct SWR_RENDER_TARGET_BLEND_STATE
835 {
836     uint8_t writeDisableRed : 1;
837     uint8_t writeDisableGreen : 1;
838     uint8_t writeDisableBlue : 1;
839     uint8_t writeDisableAlpha : 1;
840 };
841 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
842 
843 enum SWR_MULTISAMPLE_COUNT
844 {
845     SWR_MULTISAMPLE_1X = 0,
846     SWR_MULTISAMPLE_2X,
847     SWR_MULTISAMPLE_4X,
848     SWR_MULTISAMPLE_8X,
849     SWR_MULTISAMPLE_16X,
850     SWR_MULTISAMPLE_TYPE_COUNT
851 };
852 
GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount)853 INLINE uint32_t GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount) // @llvm_func_start
854 {
855     static const uint32_t sampleCountLUT[SWR_MULTISAMPLE_TYPE_COUNT] {1, 2, 4, 8, 16};
856     assert(sampleCount < SWR_MULTISAMPLE_TYPE_COUNT);
857     return sampleCountLUT[sampleCount];
858 } // @llvm_func_end
859 
860 struct SWR_BLEND_STATE
861 {
862     // constant blend factor color in RGBA float
863     float constantColor[4];
864 
865     // alpha test reference value in unorm8 or float32
866     uint32_t alphaTestReference;
867     uint32_t sampleMask;
868     // all RT's have the same sample count
869     ///@todo move this to Output Merger state when we refactor
870     SWR_MULTISAMPLE_COUNT sampleCount;  // @llvm_enum
871 
872     SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
873 };
874 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
875 
876 //////////////////////////////////////////////////////////////////////////
877 /// FUNCTION POINTERS FOR SHADERS
878 
879 #if USE_SIMD16_SHADERS
880 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
881 #else
882 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
883 #endif
884 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
885 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
886 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
887 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
888 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
889 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
890 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
891 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
892 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
893     simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
894     uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask);
895 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
896 
897 
898 
899 //////////////////////////////////////////////////////////////////////////
900 /// FRONTEND_STATE
901 /////////////////////////////////////////////////////////////////////////
902 struct SWR_FRONTEND_STATE
903 {
904     // skip clip test, perspective divide, and viewport transform
905     // intended for verts in screen space
906     bool vpTransformDisable;
907     bool bEnableCutIndex;
908     union
909     {
910         struct
911         {
912             uint32_t triFan : 2;
913             uint32_t lineStripList : 1;
914             uint32_t triStripList : 2;
915         };
916         uint32_t bits;
917     } provokingVertex;
918     uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
919 
920     // Size of a vertex in simdvector units. Should be sized to the
921     // maximum of the input/output of the vertex shader.
922     uint32_t vsVertexSize;
923 };
924 
925 //////////////////////////////////////////////////////////////////////////
926 /// VIEWPORT_MATRIX
927 /////////////////////////////////////////////////////////////////////////
928 struct SWR_VIEWPORT_MATRIX
929 {
930     float m00;
931     float m11;
932     float m22;
933     float m30;
934     float m31;
935     float m32;
936 };
937 
938 //////////////////////////////////////////////////////////////////////////
939 /// VIEWPORT_MATRIXES
940 /////////////////////////////////////////////////////////////////////////
941 struct SWR_VIEWPORT_MATRICES
942 {
943     float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
944     float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
945     float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
946     float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
947     float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
948     float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
949 };
950 
951 //////////////////////////////////////////////////////////////////////////
952 /// SWR_VIEWPORT
953 /////////////////////////////////////////////////////////////////////////
954 struct SWR_VIEWPORT
955 {
956     float x;
957     float y;
958     float width;
959     float height;
960     float minZ;
961     float maxZ;
962 };
963 
964 //////////////////////////////////////////////////////////////////////////
965 /// SWR_CULLMODE
966 //////////////////////////////////////////////////////////////////////////
967 enum SWR_CULLMODE
968 {
969     SWR_CULLMODE_BOTH,
970     SWR_CULLMODE_NONE,
971     SWR_CULLMODE_FRONT,
972     SWR_CULLMODE_BACK
973 };
974 
975 enum SWR_FILLMODE
976 {
977     SWR_FILLMODE_POINT,
978     SWR_FILLMODE_WIREFRAME,
979     SWR_FILLMODE_SOLID
980 };
981 
982 enum SWR_FRONTWINDING
983 {
984     SWR_FRONTWINDING_CW,
985     SWR_FRONTWINDING_CCW
986 };
987 
988 
989 enum SWR_PIXEL_LOCATION
990 {
991     SWR_PIXEL_LOCATION_CENTER,
992     SWR_PIXEL_LOCATION_UL,
993 };
994 
995 // fixed point screen space sample locations within a pixel
996 struct SWR_MULTISAMPLE_POS
997 {
998 public:
SetXiSWR_MULTISAMPLE_POS999     INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
SetYiSWR_MULTISAMPLE_POS1000     INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
XiSWR_MULTISAMPLE_POS1001     INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
YiSWR_MULTISAMPLE_POS1002     INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
SetXSWR_MULTISAMPLE_POS1003     INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
SetYSWR_MULTISAMPLE_POS1004     INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
XSWR_MULTISAMPLE_POS1005     INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
YSWR_MULTISAMPLE_POS1006     INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
1007     typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
XSWR_MULTISAMPLE_POS1008     INLINE sampleArrayT X() const { return _x; }; // @llvm_func
YSWR_MULTISAMPLE_POS1009     INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
vXiSWR_MULTISAMPLE_POS1010     INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
vYiSWR_MULTISAMPLE_POS1011     INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
vXSWR_MULTISAMPLE_POS1012     INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
vYSWR_MULTISAMPLE_POS1013     INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
TileSampleOffsetsXSWR_MULTISAMPLE_POS1014     INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
TileSampleOffsetsYSWR_MULTISAMPLE_POS1015     INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
1016 
1017     INLINE void PrecalcSampleData(int numSamples); //@llvm_func
1018 
1019 private:
1020     template <typename MaskT>
1021     INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func
1022     INLINE void CalcTileSampleOffsets(int numSamples);   // @llvm_func
1023 
1024     // scalar sample values
1025     uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
1026     uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
1027     float _x[SWR_MAX_NUM_MULTISAMPLES];
1028     float _y[SWR_MAX_NUM_MULTISAMPLES];
1029 
1030     // precalc'd / vectorized samples
1031     __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
1032     __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
1033     simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
1034     simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
1035     __m128i tileSampleOffsetsX;
1036     __m128i tileSampleOffsetsY;
1037 };
1038 
1039 //////////////////////////////////////////////////////////////////////////
1040 /// SWR_RASTSTATE
1041 //////////////////////////////////////////////////////////////////////////
1042 struct SWR_RASTSTATE
1043 {
1044     uint32_t cullMode               : 2;
1045     uint32_t fillMode               : 2;
1046     uint32_t frontWinding           : 1;
1047     uint32_t scissorEnable          : 1;
1048     uint32_t depthClipEnable        : 1;
1049     uint32_t clipHalfZ              : 1;
1050     uint32_t pointParam             : 1;
1051     uint32_t pointSpriteEnable      : 1;
1052     uint32_t pointSpriteTopOrigin   : 1;
1053     uint32_t forcedSampleCount      : 1;
1054     uint32_t pixelOffset            : 1;
1055     uint32_t depthBiasPreAdjusted   : 1;    ///< depth bias constant is in float units, not per-format Z units
1056     uint32_t conservativeRast       : 1;
1057 
1058     float pointSize;
1059     float lineWidth;
1060 
1061     float depthBias;
1062     float slopeScaledDepthBias;
1063     float depthBiasClamp;
1064     SWR_FORMAT depthFormat;     // @llvm_enum
1065 
1066     // sample count the rasterizer is running at
1067     SWR_MULTISAMPLE_COUNT sampleCount;  // @llvm_enum
1068     uint32_t pixelLocation;     // UL or Center
1069     SWR_MULTISAMPLE_POS samplePositions;    // @llvm_struct
1070     bool bIsCenterPattern;   // @llvm_enum
1071 };
1072 
1073 
1074 enum SWR_CONSTANT_SOURCE
1075 {
1076     SWR_CONSTANT_SOURCE_CONST_0000,
1077     SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
1078     SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
1079     SWR_CONSTANT_SOURCE_PRIM_ID
1080 };
1081 
1082 struct SWR_ATTRIB_SWIZZLE
1083 {
1084     uint16_t sourceAttrib : 5;          // source attribute
1085     uint16_t constantSource : 2;        // constant source to apply
1086     uint16_t componentOverrideMask : 4; // override component with constant source
1087 };
1088 
1089 // backend state
1090 struct SWR_BACKEND_STATE
1091 {
1092     uint32_t constantInterpolationMask;     // bitmask indicating which attributes have constant interpolation
1093     uint32_t pointSpriteTexCoordMask;       // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
1094 
1095     uint8_t numAttributes;                  // total number of attributes to send to backend (up to 32)
1096     uint8_t numComponents[32];              // number of components to setup per attribute, this reduces some calculations for unneeded components
1097 
1098     bool swizzleEnable;                 // when enabled, core will parse the swizzle map when
1099                                         // setting up attributes for the backend, otherwise
1100                                         // all attributes up to numAttributes will be sent
1101     SWR_ATTRIB_SWIZZLE swizzleMap[32];
1102 
1103     bool readRenderTargetArrayIndex;    // Forward render target array index from last FE stage to the backend
1104     bool readViewportArrayIndex;        // Read viewport array index from last FE stage during binning
1105 
1106 	// Offset to the start of the attributes of the input vertices, in simdvector units
1107     uint32_t vertexAttribOffset;
1108 
1109     // User clip/cull distance enables
1110     uint8_t cullDistanceMask;
1111     uint8_t clipDistanceMask;
1112 
1113     // Offset to clip/cull attrib section of the vertex, in simdvector units
1114     uint32_t vertexClipCullOffset;
1115 };
1116 
1117 
1118 union SWR_DEPTH_STENCIL_STATE
1119 {
1120     struct
1121     {
1122         // dword 0
1123         uint32_t depthWriteEnable : 1;
1124         uint32_t depthTestEnable : 1;
1125         uint32_t stencilWriteEnable : 1;
1126         uint32_t stencilTestEnable : 1;
1127         uint32_t doubleSidedStencilTestEnable : 1;
1128 
1129         uint32_t depthTestFunc : 3;
1130         uint32_t stencilTestFunc : 3;
1131 
1132         uint32_t backfaceStencilPassDepthPassOp : 3;
1133         uint32_t backfaceStencilPassDepthFailOp : 3;
1134         uint32_t backfaceStencilFailOp : 3;
1135         uint32_t backfaceStencilTestFunc : 3;
1136         uint32_t stencilPassDepthPassOp : 3;
1137         uint32_t stencilPassDepthFailOp : 3;
1138         uint32_t stencilFailOp : 3;
1139 
1140         // dword 1
1141         uint8_t backfaceStencilWriteMask;
1142         uint8_t backfaceStencilTestMask;
1143         uint8_t stencilWriteMask;
1144         uint8_t stencilTestMask;
1145 
1146         // dword 2
1147         uint8_t backfaceStencilRefValue;
1148         uint8_t stencilRefValue;
1149     };
1150     uint32_t value[3];
1151 };
1152 
1153 enum SWR_SHADING_RATE
1154 {
1155     SWR_SHADING_RATE_PIXEL,
1156     SWR_SHADING_RATE_SAMPLE,
1157     SWR_SHADING_RATE_COUNT,
1158 };
1159 
1160 enum SWR_INPUT_COVERAGE
1161 {
1162     SWR_INPUT_COVERAGE_NONE,
1163     SWR_INPUT_COVERAGE_NORMAL,
1164     SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
1165     SWR_INPUT_COVERAGE_COUNT,
1166 };
1167 
1168 enum SWR_PS_POSITION_OFFSET
1169 {
1170     SWR_PS_POSITION_SAMPLE_NONE,
1171     SWR_PS_POSITION_SAMPLE_OFFSET,
1172     SWR_PS_POSITION_CENTROID_OFFSET,
1173     SWR_PS_POSITION_OFFSET_COUNT,
1174 };
1175 
1176 enum SWR_BARYCENTRICS_MASK
1177 {
1178     SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
1179     SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
1180     SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
1181 };
1182 
1183 // pixel shader state
1184 struct SWR_PS_STATE
1185 {
1186     // dword 0-1
1187     PFN_PIXEL_KERNEL pfnPixelShader;  // @llvm_pfn
1188 
1189     // dword 2
1190     uint32_t killsPixel             : 1;    // pixel shader can kill pixels
1191     uint32_t inputCoverage          : 2;    // ps uses input coverage
1192     uint32_t writesODepth           : 1;    // pixel shader writes to depth
1193     uint32_t usesSourceDepth        : 1;    // pixel shader reads depth
1194     uint32_t shadingRate            : 2;    // shading per pixel / sample / coarse pixel
1195     uint32_t posOffset              : 2;    // type of offset (none, sample, centroid) to add to pixel position
1196     uint32_t barycentricsMask       : 3;    // which type(s) of barycentric coords does the PS interpolate attributes with
1197     uint32_t usesUAV                : 1;    // pixel shader accesses UAV
1198     uint32_t forceEarlyZ            : 1;    // force execution of early depth/stencil test
1199 
1200     uint8_t renderTargetMask;               // Mask of render targets written
1201 };
1202 
1203 // depth bounds state
1204 struct SWR_DEPTH_BOUNDS_STATE
1205 {
1206     bool    depthBoundsTestEnable;
1207     float   depthBoundsTestMinValue;
1208     float   depthBoundsTestMaxValue;
1209 };
1210 
1211