1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #ifndef SHADER_INFO_H
26 #define SHADER_INFO_H
27 
28 #include "util/bitset.h"
29 #include "shader_enums.h"
30 #include <stdint.h>
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 
36 #define MAX_INLINABLE_UNIFORMS 4
37 
38 struct spirv_supported_capabilities {
39    bool address;
40    bool atomic_storage;
41    bool demote_to_helper_invocation;
42    bool derivative_group;
43    bool descriptor_array_dynamic_indexing;
44    bool descriptor_array_non_uniform_indexing;
45    bool descriptor_indexing;
46    bool device_group;
47    bool draw_parameters;
48    bool float32_atomic_add;
49    bool float64;
50    bool float64_atomic_add;
51    bool fragment_shader_sample_interlock;
52    bool fragment_shader_pixel_interlock;
53    bool generic_pointers;
54    bool geometry_streams;
55    bool image_ms_array;
56    bool image_read_without_format;
57    bool image_write_without_format;
58    bool image_atomic_int64;
59    bool int8;
60    bool int16;
61    bool int64;
62    bool int64_atomics;
63    bool integer_functions2;
64    bool kernel;
65    bool kernel_image;
66    bool literal_sampler;
67    bool min_lod;
68    bool multiview;
69    bool physical_storage_buffer_address;
70    bool post_depth_coverage;
71    bool ray_tracing;
72    bool ray_query;
73    bool ray_traversal_primitive_culling;
74    bool runtime_descriptor_array;
75    bool float_controls;
76    bool shader_clock;
77    bool shader_viewport_index_layer;
78    bool stencil_export;
79    bool storage_8bit;
80    bool storage_16bit;
81    bool storage_image_ms;
82    bool subgroup_arithmetic;
83    bool subgroup_ballot;
84    bool subgroup_basic;
85    bool subgroup_quad;
86    bool subgroup_shuffle;
87    bool subgroup_vote;
88    bool tessellation;
89    bool transform_feedback;
90    bool variable_pointers;
91    bool vk_memory_model;
92    bool vk_memory_model_device_scope;
93    bool float16;
94    bool amd_fragment_mask;
95    bool amd_gcn_shader;
96    bool amd_shader_ballot;
97    bool amd_trinary_minmax;
98    bool amd_image_read_write_lod;
99    bool amd_shader_explicit_vertex_parameter;
100    bool amd_image_gather_bias_lod;
101 
102    bool intel_subgroup_shuffle;
103    bool intel_subgroup_buffer_block_io;
104 };
105 
106 typedef struct shader_info {
107    const char *name;
108 
109    /* Descriptive name provided by the client; may be NULL */
110    const char *label;
111 
112    /* Shader is internal, and should be ignored by things like NIR_PRINT */
113    bool internal;
114 
115    /** The shader stage, such as MESA_SHADER_VERTEX. */
116    gl_shader_stage stage:8;
117 
118    /** The shader stage in a non SSO linked program that follows this stage,
119      * such as MESA_SHADER_FRAGMENT.
120      */
121    gl_shader_stage next_stage:8;
122 
123    /* Number of textures used by this shader */
124    uint8_t num_textures;
125    /* Number of uniform buffers used by this shader */
126    uint8_t num_ubos;
127    /* Number of atomic buffers used by this shader */
128    uint8_t num_abos;
129    /* Number of shader storage buffers (max .driver_location + 1) used by this
130     * shader.  In the case of nir_lower_atomics_to_ssbo being used, this will
131     * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs
132     * and atomic counters in nir_shader->info.
133     */
134    uint8_t num_ssbos;
135    /* Number of images used by this shader */
136    uint8_t num_images;
137 
138    /* Which inputs are actually read */
139    uint64_t inputs_read;
140    /* Which outputs are actually written */
141    uint64_t outputs_written;
142    /* Which outputs are actually read */
143    uint64_t outputs_read;
144    /* Which system values are actually read */
145    BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
146 
147    /* Which patch inputs are actually read */
148    uint32_t patch_inputs_read;
149    /* Which patch outputs are actually written */
150    uint32_t patch_outputs_written;
151    /* Which patch outputs are read */
152    uint32_t patch_outputs_read;
153 
154    /* Which inputs are read indirectly (subset of inputs_read) */
155    uint64_t inputs_read_indirectly;
156    /* Which outputs are read or written indirectly */
157    uint64_t outputs_accessed_indirectly;
158    /* Which patch inputs are read indirectly (subset of patch_inputs_read) */
159    uint64_t patch_inputs_read_indirectly;
160    /* Which patch outputs are read or written indirectly */
161    uint64_t patch_outputs_accessed_indirectly;
162 
163    /** Bitfield of which textures are used */
164    uint32_t textures_used;
165 
166    /** Bitfield of which textures are used by texelFetch() */
167    uint32_t textures_used_by_txf;
168 
169    /** Bitfield of which images are used */
170    uint32_t images_used;
171    /** Bitfield of which images are buffers. */
172    uint32_t image_buffers;
173    /** Bitfield of which images are MSAA. */
174    uint32_t msaa_images;
175 
176    /* SPV_KHR_float_controls: execution mode for floating point ops */
177    uint16_t float_controls_execution_mode;
178 
179    uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
180    uint8_t num_inlinable_uniforms:4;
181 
182    /* The size of the gl_ClipDistance[] array, if declared. */
183    uint8_t clip_distance_array_size:4;
184 
185    /* The size of the gl_CullDistance[] array, if declared. */
186    uint8_t cull_distance_array_size:4;
187 
188    /* Whether or not this shader ever uses textureGather() */
189    bool uses_texture_gather:1;
190 
191    /**
192     * True if this shader uses the fddx/fddy opcodes.
193     *
194     * Note that this does not include the "fine" and "coarse" variants.
195     */
196    bool uses_fddx_fddy:1;
197 
198    /* Bitmask of bit-sizes used with ALU instructions. */
199    uint8_t bit_sizes_float;
200    uint8_t bit_sizes_int;
201 
202    /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */
203    bool first_ubo_is_default_ubo:1;
204 
205    /* Whether or not separate shader objects were used */
206    bool separate_shader:1;
207 
208    /** Was this shader linked with any transform feedback varyings? */
209    bool has_transform_feedback_varyings:1;
210 
211    /* Whether flrp has been lowered. */
212    bool flrp_lowered:1;
213 
214    /* Whether nir_lower_io has been called to lower derefs.
215     * nir_variables for inputs and outputs might not be present in the IR.
216     */
217    bool io_lowered:1;
218 
219    /* Whether the shader writes memory, including transform feedback. */
220    bool writes_memory:1;
221 
222    /* Whether gl_Layer is viewport-relative */
223    bool layer_viewport_relative:1;
224 
225    union {
226       struct {
227          /* Which inputs are doubles */
228          uint64_t double_inputs;
229 
230          /* For AMD-specific driver-internal shaders. It replaces vertex
231           * buffer loads with code generating VS inputs from scalar registers.
232           *
233           * Valid values: SI_VS_BLIT_SGPRS_POS_*
234           */
235          uint8_t blit_sgprs_amd:4;
236 
237          /* True if the shader writes position in window space coordinates pre-transform */
238          bool window_space_position:1;
239       } vs;
240 
241       struct {
242          /** The output primitive type (GL enum value) */
243          uint16_t output_primitive;
244 
245          /** The input primitive type (GL enum value) */
246          uint16_t input_primitive;
247 
248          /** The maximum number of vertices the geometry shader might write. */
249          uint16_t vertices_out;
250 
251          /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
252          uint8_t invocations;
253 
254          /** The number of vertices recieves per input primitive (max. 6) */
255          uint8_t vertices_in:3;
256 
257          /** Whether or not this shader uses EndPrimitive */
258          bool uses_end_primitive:1;
259 
260          /** The streams used in this shaders (max. 4) */
261          uint8_t active_stream_mask:4;
262       } gs;
263 
264       struct {
265          bool uses_discard:1;
266          bool uses_demote:1;
267          bool uses_fbfetch_output:1;
268          bool color_is_dual_source:1;
269 
270          /**
271           * True if this fragment shader requires helper invocations.  This
272           * can be caused by the use of ALU derivative ops, texture
273           * instructions which do implicit derivatives, and the use of quad
274           * subgroup operations.
275           */
276          bool needs_helper_invocations:1;
277 
278          /**
279           * Whether any inputs are declared with the "sample" qualifier.
280           */
281          bool uses_sample_qualifier:1;
282 
283          /**
284           * Whether early fragment tests are enabled as defined by
285           * ARB_shader_image_load_store.
286           */
287          bool early_fragment_tests:1;
288 
289          /**
290           * Defined by INTEL_conservative_rasterization.
291           */
292          bool inner_coverage:1;
293 
294          bool post_depth_coverage:1;
295 
296          /**
297           * \name ARB_fragment_coord_conventions
298           * @{
299           */
300          bool pixel_center_integer:1;
301          bool origin_upper_left:1;
302          /*@}*/
303 
304          bool pixel_interlock_ordered:1;
305          bool pixel_interlock_unordered:1;
306          bool sample_interlock_ordered:1;
307          bool sample_interlock_unordered:1;
308 
309          /**
310           * Flags whether NIR's base types on the FS color outputs should be
311           * ignored.
312           *
313           * GLSL requires that fragment shader output base types match the
314           * render target's base types for the behavior to be defined.  From
315           * the GL 4.6 spec:
316           *
317           *     "If the values written by the fragment shader do not match the
318           *      format(s) of the corresponding color buffer(s), the result is
319           *      undefined."
320           *
321           * However, for NIR shaders translated from TGSI, we don't have the
322           * output types any more, so the driver will need to do whatever
323           * fixups are necessary to handle effectively untyped data being
324           * output from the FS.
325           */
326          bool untyped_color_outputs:1;
327 
328          /** gl_FragDepth layout for ARB_conservative_depth. */
329          enum gl_frag_depth_layout depth_layout:3;
330 
331          /**
332           * Interpolation qualifiers for drivers that lowers color inputs
333           * to system values.
334           */
335          unsigned color0_interp:3; /* glsl_interp_mode */
336          bool color0_sample:1;
337          bool color0_centroid:1;
338          unsigned color1_interp:3; /* glsl_interp_mode */
339          bool color1_sample:1;
340          bool color1_centroid:1;
341       } fs;
342 
343       struct {
344          uint16_t local_size[3];
345 
346          bool local_size_variable:1;
347          uint8_t user_data_components_amd:3;
348 
349          /*
350           * Arrangement of invocations used to calculate derivatives in a compute
351           * shader.  From NV_compute_shader_derivatives.
352           */
353          enum gl_derivative_group derivative_group:2;
354 
355          /**
356           * Size of shared variables accessed by the compute shader.
357           */
358          unsigned shared_size;
359 
360          /**
361           * pointer size is:
362           *   AddressingModelLogical:    0    (default)
363           *   AddressingModelPhysical32: 32
364           *   AddressingModelPhysical64: 64
365           */
366          unsigned ptr_size;
367       } cs;
368 
369       /* Applies to both TCS and TES. */
370       struct {
371          uint16_t primitive_mode; /* GL_TRIANGLES, GL_QUADS or GL_ISOLINES */
372 
373          /** The number of vertices in the TCS output patch. */
374          uint8_t tcs_vertices_out;
375          enum gl_tess_spacing spacing:2;
376 
377          /** Is the vertex order counterclockwise? */
378          bool ccw:1;
379          bool point_mode:1;
380 
381          /* Bit mask of TCS per-vertex inputs (VS outputs) that are used
382           * with a vertex index that is NOT the invocation id
383           */
384          uint64_t tcs_cross_invocation_inputs_read;
385 
386          /* Bit mask of TCS per-vertex outputs that are used
387           * with a vertex index that is NOT the invocation id
388           */
389          uint64_t tcs_cross_invocation_outputs_read;
390       } tess;
391    };
392 } shader_info;
393 
394 #ifdef __cplusplus
395 }
396 #endif
397 
398 #endif /* SHADER_INFO_H */
399