1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file link_varyings.cpp
26  *
27  * Linker functions related specifically to linking varyings between shader
28  * stages.
29  */
30 
31 
32 #include "main/mtypes.h"
33 #include "glsl_symbol_table.h"
34 #include "glsl_parser_extras.h"
35 #include "ir_optimization.h"
36 #include "linker.h"
37 #include "link_varyings.h"
38 #include "main/macros.h"
39 #include "util/hash_table.h"
40 #include "program.h"
41 
42 
43 /**
44  * Get the varying type stripped of the outermost array if we're processing
45  * a stage whose varyings are arrays indexed by a vertex number (such as
46  * geometry shader inputs).
47  */
48 static const glsl_type *
get_varying_type(const ir_variable * var,gl_shader_stage stage)49 get_varying_type(const ir_variable *var, gl_shader_stage stage)
50 {
51    const glsl_type *type = var->type;
52 
53    if (!var->data.patch &&
54        ((var->data.mode == ir_var_shader_out &&
55          stage == MESA_SHADER_TESS_CTRL) ||
56         (var->data.mode == ir_var_shader_in &&
57          (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
58           stage == MESA_SHADER_GEOMETRY)))) {
59       assert(type->is_array());
60       type = type->fields.array;
61    }
62 
63    return type;
64 }
65 
66 static void
create_xfb_varying_names(void * mem_ctx,const glsl_type * t,char ** name,size_t name_length,unsigned * count,const char * ifc_member_name,const glsl_type * ifc_member_t,char *** varying_names)67 create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name,
68                          size_t name_length, unsigned *count,
69                          const char *ifc_member_name,
70                          const glsl_type *ifc_member_t, char ***varying_names)
71 {
72    if (t->is_interface()) {
73       size_t new_length = name_length;
74 
75       assert(ifc_member_name && ifc_member_t);
76       ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
77 
78       create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
79                                NULL, NULL, varying_names);
80    } else if (t->is_record()) {
81       for (unsigned i = 0; i < t->length; i++) {
82          const char *field = t->fields.structure[i].name;
83          size_t new_length = name_length;
84 
85          ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
86 
87          create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name,
88                                   new_length, count, NULL, NULL,
89                                   varying_names);
90       }
91    } else if (t->without_array()->is_record() ||
92               t->without_array()->is_interface() ||
93               (t->is_array() && t->fields.array->is_array())) {
94       for (unsigned i = 0; i < t->length; i++) {
95          size_t new_length = name_length;
96 
97          /* Append the subscript to the current variable name */
98          ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
99 
100          create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length,
101                                   count, ifc_member_name, ifc_member_t,
102                                   varying_names);
103       }
104    } else {
105       (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
106    }
107 }
108 
109 static bool
process_xfb_layout_qualifiers(void * mem_ctx,const gl_linked_shader * sh,struct gl_shader_program * prog,unsigned * num_tfeedback_decls,char *** varying_names)110 process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
111                               struct gl_shader_program *prog,
112                               unsigned *num_tfeedback_decls,
113                               char ***varying_names)
114 {
115    bool has_xfb_qualifiers = false;
116 
117    /* We still need to enable transform feedback mode even if xfb_stride is
118     * only applied to a global out. Also we don't bother to propagate
119     * xfb_stride to interface block members so this will catch that case also.
120     */
121    for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
122       if (prog->TransformFeedback.BufferStride[j]) {
123          has_xfb_qualifiers = true;
124          break;
125       }
126    }
127 
128    foreach_in_list(ir_instruction, node, sh->ir) {
129       ir_variable *var = node->as_variable();
130       if (!var || var->data.mode != ir_var_shader_out)
131          continue;
132 
133       /* From the ARB_enhanced_layouts spec:
134        *
135        *    "Any shader making any static use (after preprocessing) of any of
136        *     these *xfb_* qualifiers will cause the shader to be in a
137        *     transform feedback capturing mode and hence responsible for
138        *     describing the transform feedback setup.  This mode will capture
139        *     any output selected by *xfb_offset*, directly or indirectly, to
140        *     a transform feedback buffer."
141        */
142       if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
143          has_xfb_qualifiers = true;
144       }
145 
146       if (var->data.explicit_xfb_offset) {
147          *num_tfeedback_decls += var->type->varying_count();
148          has_xfb_qualifiers = true;
149       }
150    }
151 
152    if (*num_tfeedback_decls == 0)
153       return has_xfb_qualifiers;
154 
155    unsigned i = 0;
156    *varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls);
157    foreach_in_list(ir_instruction, node, sh->ir) {
158       ir_variable *var = node->as_variable();
159       if (!var || var->data.mode != ir_var_shader_out)
160          continue;
161 
162       if (var->data.explicit_xfb_offset) {
163          char *name;
164          const glsl_type *type, *member_type;
165 
166          if (var->data.from_named_ifc_block) {
167             type = var->get_interface_type();
168 
169             /* Find the member type before it was altered by lowering */
170             const glsl_type *type_wa = type->without_array();
171             member_type =
172                type_wa->fields.structure[type_wa->field_index(var->name)].type;
173             name = ralloc_strdup(NULL, type_wa->name);
174          } else {
175             type = var->type;
176             member_type = NULL;
177             name = ralloc_strdup(NULL, var->name);
178          }
179          create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
180                                   var->name, member_type, varying_names);
181          ralloc_free(name);
182       }
183    }
184 
185    assert(i == *num_tfeedback_decls);
186    return has_xfb_qualifiers;
187 }
188 
189 /**
190  * Validate the types and qualifiers of an output from one stage against the
191  * matching input to another stage.
192  */
193 static void
cross_validate_types_and_qualifiers(struct gl_context * ctx,struct gl_shader_program * prog,const ir_variable * input,const ir_variable * output,gl_shader_stage consumer_stage,gl_shader_stage producer_stage)194 cross_validate_types_and_qualifiers(struct gl_context *ctx,
195                                     struct gl_shader_program *prog,
196                                     const ir_variable *input,
197                                     const ir_variable *output,
198                                     gl_shader_stage consumer_stage,
199                                     gl_shader_stage producer_stage)
200 {
201    /* Check that the types match between stages.
202     */
203    const glsl_type *type_to_match = input->type;
204 
205    /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
206    const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
207                                    consumer_stage != MESA_SHADER_FRAGMENT) ||
208                                   consumer_stage == MESA_SHADER_GEOMETRY;
209    if (extra_array_level) {
210       assert(type_to_match->is_array());
211       type_to_match = type_to_match->fields.array;
212    }
213 
214    if (type_to_match != output->type) {
215       /* There is a bit of a special case for gl_TexCoord.  This
216        * built-in is unsized by default.  Applications that variable
217        * access it must redeclare it with a size.  There is some
218        * language in the GLSL spec that implies the fragment shader
219        * and vertex shader do not have to agree on this size.  Other
220        * driver behave this way, and one or two applications seem to
221        * rely on it.
222        *
223        * Neither declaration needs to be modified here because the array
224        * sizes are fixed later when update_array_sizes is called.
225        *
226        * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
227        *
228        *     "Unlike user-defined varying variables, the built-in
229        *     varying variables don't have a strict one-to-one
230        *     correspondence between the vertex language and the
231        *     fragment language."
232        */
233       if (!output->type->is_array() || !is_gl_identifier(output->name)) {
234          linker_error(prog,
235                       "%s shader output `%s' declared as type `%s', "
236                       "but %s shader input declared as type `%s'\n",
237                       _mesa_shader_stage_to_string(producer_stage),
238                       output->name,
239                       output->type->name,
240                       _mesa_shader_stage_to_string(consumer_stage),
241                       input->type->name);
242          return;
243       }
244    }
245 
246    /* Check that all of the qualifiers match between stages.
247     */
248 
249    /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
250     * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
251     * conformance test suite does not verify that the qualifiers must match.
252     * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
253     * OpenGLES 3.0 drivers, so we relax the checking in all cases.
254     */
255    if (false /* always skip the centroid check */ &&
256        prog->data->Version < (prog->IsES ? 310 : 430) &&
257        input->data.centroid != output->data.centroid) {
258       linker_error(prog,
259                    "%s shader output `%s' %s centroid qualifier, "
260                    "but %s shader input %s centroid qualifier\n",
261                    _mesa_shader_stage_to_string(producer_stage),
262                    output->name,
263                    (output->data.centroid) ? "has" : "lacks",
264                    _mesa_shader_stage_to_string(consumer_stage),
265                    (input->data.centroid) ? "has" : "lacks");
266       return;
267    }
268 
269    if (input->data.sample != output->data.sample) {
270       linker_error(prog,
271                    "%s shader output `%s' %s sample qualifier, "
272                    "but %s shader input %s sample qualifier\n",
273                    _mesa_shader_stage_to_string(producer_stage),
274                    output->name,
275                    (output->data.sample) ? "has" : "lacks",
276                    _mesa_shader_stage_to_string(consumer_stage),
277                    (input->data.sample) ? "has" : "lacks");
278       return;
279    }
280 
281    if (input->data.patch != output->data.patch) {
282       linker_error(prog,
283                    "%s shader output `%s' %s patch qualifier, "
284                    "but %s shader input %s patch qualifier\n",
285                    _mesa_shader_stage_to_string(producer_stage),
286                    output->name,
287                    (output->data.patch) ? "has" : "lacks",
288                    _mesa_shader_stage_to_string(consumer_stage),
289                    (input->data.patch) ? "has" : "lacks");
290       return;
291    }
292 
293    /* The GLSL 4.30 and GLSL ES 3.00 specifications say:
294     *
295     *    "As only outputs need be declared with invariant, an output from
296     *     one shader stage will still match an input of a subsequent stage
297     *     without the input being declared as invariant."
298     *
299     * while GLSL 4.20 says:
300     *
301     *    "For variables leaving one shader and coming into another shader,
302     *     the invariant keyword has to be used in both shaders, or a link
303     *     error will result."
304     *
305     * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
306     *
307     *    "The invariance of varyings that are declared in both the vertex
308     *     and fragment shaders must match."
309     */
310    if (input->data.invariant != output->data.invariant &&
311        prog->data->Version < (prog->IsES ? 300 : 430)) {
312       linker_error(prog,
313                    "%s shader output `%s' %s invariant qualifier, "
314                    "but %s shader input %s invariant qualifier\n",
315                    _mesa_shader_stage_to_string(producer_stage),
316                    output->name,
317                    (output->data.invariant) ? "has" : "lacks",
318                    _mesa_shader_stage_to_string(consumer_stage),
319                    (input->data.invariant) ? "has" : "lacks");
320       return;
321    }
322 
323    /* GLSL >= 4.40 removes text requiring interpolation qualifiers
324     * to match cross stage, they must only match within the same stage.
325     *
326     * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
327     *
328     *     "It is a link-time error if, within the same stage, the interpolation
329     *     qualifiers of variables of the same name do not match.
330     *
331     * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
332     *
333     *    "When no interpolation qualifier is present, smooth interpolation
334     *    is used."
335     *
336     * So we match variables where one is smooth and the other has no explicit
337     * qualifier.
338     */
339    unsigned input_interpolation = input->data.interpolation;
340    unsigned output_interpolation = output->data.interpolation;
341    if (prog->IsES) {
342       if (input_interpolation == INTERP_MODE_NONE)
343          input_interpolation = INTERP_MODE_SMOOTH;
344       if (output_interpolation == INTERP_MODE_NONE)
345          output_interpolation = INTERP_MODE_SMOOTH;
346    }
347    if (input_interpolation != output_interpolation &&
348        prog->data->Version < 440) {
349       if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) {
350          linker_error(prog,
351                       "%s shader output `%s' specifies %s "
352                       "interpolation qualifier, "
353                       "but %s shader input specifies %s "
354                       "interpolation qualifier\n",
355                       _mesa_shader_stage_to_string(producer_stage),
356                       output->name,
357                       interpolation_string(output->data.interpolation),
358                       _mesa_shader_stage_to_string(consumer_stage),
359                       interpolation_string(input->data.interpolation));
360          return;
361       } else {
362          linker_warning(prog,
363                         "%s shader output `%s' specifies %s "
364                         "interpolation qualifier, "
365                         "but %s shader input specifies %s "
366                         "interpolation qualifier\n",
367                         _mesa_shader_stage_to_string(producer_stage),
368                         output->name,
369                         interpolation_string(output->data.interpolation),
370                         _mesa_shader_stage_to_string(consumer_stage),
371                         interpolation_string(input->data.interpolation));
372       }
373    }
374 }
375 
376 /**
377  * Validate front and back color outputs against single color input
378  */
379 static void
cross_validate_front_and_back_color(struct gl_context * ctx,struct gl_shader_program * prog,const ir_variable * input,const ir_variable * front_color,const ir_variable * back_color,gl_shader_stage consumer_stage,gl_shader_stage producer_stage)380 cross_validate_front_and_back_color(struct gl_context *ctx,
381                                     struct gl_shader_program *prog,
382                                     const ir_variable *input,
383                                     const ir_variable *front_color,
384                                     const ir_variable *back_color,
385                                     gl_shader_stage consumer_stage,
386                                     gl_shader_stage producer_stage)
387 {
388    if (front_color != NULL && front_color->data.assigned)
389       cross_validate_types_and_qualifiers(ctx, prog, input, front_color,
390                                           consumer_stage, producer_stage);
391 
392    if (back_color != NULL && back_color->data.assigned)
393       cross_validate_types_and_qualifiers(ctx, prog, input, back_color,
394                                           consumer_stage, producer_stage);
395 }
396 
397 static unsigned
compute_variable_location_slot(ir_variable * var,gl_shader_stage stage)398 compute_variable_location_slot(ir_variable *var, gl_shader_stage stage)
399 {
400    unsigned location_start = VARYING_SLOT_VAR0;
401 
402    switch (stage) {
403       case MESA_SHADER_VERTEX:
404          if (var->data.mode == ir_var_shader_in)
405             location_start = VERT_ATTRIB_GENERIC0;
406          break;
407       case MESA_SHADER_TESS_CTRL:
408       case MESA_SHADER_TESS_EVAL:
409          if (var->data.patch)
410             location_start = VARYING_SLOT_PATCH0;
411          break;
412       case MESA_SHADER_FRAGMENT:
413          if (var->data.mode == ir_var_shader_out)
414             location_start = FRAG_RESULT_DATA0;
415          break;
416       default:
417          break;
418    }
419 
420    return var->data.location - location_start;
421 }
422 
423 struct explicit_location_info {
424    ir_variable *var;
425    unsigned numerical_type;
426    unsigned interpolation;
427    bool centroid;
428    bool sample;
429    bool patch;
430 };
431 
432 static inline unsigned
get_numerical_type(const glsl_type * type)433 get_numerical_type(const glsl_type *type)
434 {
435    /* From the OpenGL 4.6 spec, section 4.4.1 Input Layout Qualifiers, Page 68,
436     * (Location aliasing):
437     *
438     *    "Further, when location aliasing, the aliases sharing the location
439     *     must have the same underlying numerical type  (floating-point or
440     *     integer)
441     */
442    if (type->is_float() || type->is_double())
443       return GLSL_TYPE_FLOAT;
444    return GLSL_TYPE_INT;
445 }
446 
447 static bool
check_location_aliasing(struct explicit_location_info explicit_locations[][4],ir_variable * var,unsigned location,unsigned component,unsigned location_limit,const glsl_type * type,unsigned interpolation,bool centroid,bool sample,bool patch,gl_shader_program * prog,gl_shader_stage stage)448 check_location_aliasing(struct explicit_location_info explicit_locations[][4],
449                         ir_variable *var,
450                         unsigned location,
451                         unsigned component,
452                         unsigned location_limit,
453                         const glsl_type *type,
454                         unsigned interpolation,
455                         bool centroid,
456                         bool sample,
457                         bool patch,
458                         gl_shader_program *prog,
459                         gl_shader_stage stage)
460 {
461    unsigned last_comp;
462    if (type->without_array()->is_record()) {
463       /* The component qualifier can't be used on structs so just treat
464        * all component slots as used.
465        */
466       last_comp = 4;
467    } else {
468       unsigned dmul = type->without_array()->is_64bit() ? 2 : 1;
469       last_comp = component + type->without_array()->vector_elements * dmul;
470    }
471 
472    while (location < location_limit) {
473       unsigned comp = 0;
474       while (comp < 4) {
475          struct explicit_location_info *info =
476             &explicit_locations[location][comp];
477 
478          if (info->var) {
479             /* Component aliasing is not alloed */
480             if (comp >= component && comp < last_comp) {
481                linker_error(prog,
482                             "%s shader has multiple outputs explicitly "
483                             "assigned to location %d and component %d\n",
484                             _mesa_shader_stage_to_string(stage),
485                             location, comp);
486                return false;
487             } else {
488                /* For all other used components we need to have matching
489                 * types, interpolation and auxiliary storage
490                 */
491                if (info->numerical_type !=
492                    get_numerical_type(type->without_array())) {
493                   linker_error(prog,
494                                "Varyings sharing the same location must "
495                                "have the same underlying numerical type. "
496                                "Location %u component %u\n",
497                                location, comp);
498                   return false;
499                }
500 
501                if (info->interpolation != interpolation) {
502                   linker_error(prog,
503                                "%s shader has multiple outputs at explicit "
504                                "location %u with different interpolation "
505                                "settings\n",
506                                _mesa_shader_stage_to_string(stage), location);
507                   return false;
508                }
509 
510                if (info->centroid != centroid ||
511                    info->sample != sample ||
512                    info->patch != patch) {
513                   linker_error(prog,
514                                "%s shader has multiple outputs at explicit "
515                                "location %u with different aux storage\n",
516                                _mesa_shader_stage_to_string(stage), location);
517                   return false;
518                }
519             }
520          } else if (comp >= component && comp < last_comp) {
521             info->var = var;
522             info->numerical_type = get_numerical_type(type->without_array());
523             info->interpolation = interpolation;
524             info->centroid = centroid;
525             info->sample = sample;
526             info->patch = patch;
527          }
528 
529          comp++;
530 
531          /* We need to do some special handling for doubles as dvec3 and
532           * dvec4 consume two consecutive locations. We don't need to
533           * worry about components beginning at anything other than 0 as
534           * the spec does not allow this for dvec3 and dvec4.
535           */
536          if (comp == 4 && last_comp > 4) {
537             last_comp = last_comp - 4;
538             /* Bump location index and reset the component index */
539             location++;
540             comp = 0;
541             component = 0;
542          }
543       }
544 
545       location++;
546    }
547 
548    return true;
549 }
550 
551 static bool
validate_explicit_variable_location(struct gl_context * ctx,struct explicit_location_info explicit_locations[][4],ir_variable * var,gl_shader_program * prog,gl_linked_shader * sh)552 validate_explicit_variable_location(struct gl_context *ctx,
553                                     struct explicit_location_info explicit_locations[][4],
554                                     ir_variable *var,
555                                     gl_shader_program *prog,
556                                     gl_linked_shader *sh)
557 {
558    const glsl_type *type = get_varying_type(var, sh->Stage);
559    unsigned num_elements = type->count_attribute_slots(false);
560    unsigned idx = compute_variable_location_slot(var, sh->Stage);
561    unsigned slot_limit = idx + num_elements;
562 
563    /* Vertex shader inputs and fragment shader outputs are validated in
564     * assign_attribute_or_color_locations() so we should not attempt to
565     * validate them again here.
566     */
567    unsigned slot_max;
568    if (var->data.mode == ir_var_shader_out) {
569       assert(sh->Stage != MESA_SHADER_FRAGMENT);
570       slot_max =
571          ctx->Const.Program[sh->Stage].MaxOutputComponents / 4;
572    } else {
573       assert(var->data.mode == ir_var_shader_in);
574       assert(sh->Stage != MESA_SHADER_VERTEX);
575       slot_max =
576          ctx->Const.Program[sh->Stage].MaxInputComponents / 4;
577    }
578 
579    if (slot_limit > slot_max) {
580       linker_error(prog,
581                    "Invalid location %u in %s shader\n",
582                    idx, _mesa_shader_stage_to_string(sh->Stage));
583       return false;
584    }
585 
586    const glsl_type *type_without_array = type->without_array();
587    if (type_without_array->is_interface()) {
588       for (unsigned i = 0; i < type_without_array->length; i++) {
589          glsl_struct_field *field = &type_without_array->fields.structure[i];
590          unsigned field_location = field->location -
591             (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
592          if (!check_location_aliasing(explicit_locations, var,
593                                       field_location,
594                                       0, field_location + 1,
595                                       field->type,
596                                       field->interpolation,
597                                       field->centroid,
598                                       field->sample,
599                                       field->patch,
600                                       prog, sh->Stage)) {
601             return false;
602          }
603       }
604    } else if (!check_location_aliasing(explicit_locations, var,
605                                        idx, var->data.location_frac,
606                                        slot_limit, type,
607                                        var->data.interpolation,
608                                        var->data.centroid,
609                                        var->data.sample,
610                                        var->data.patch,
611                                        prog, sh->Stage)) {
612       return false;
613    }
614 
615    return true;
616 }
617 
618 /**
619  * Validate explicit locations for the inputs to the first stage and the
620  * outputs of the last stage in an SSO program (everything in between is
621  * validated in cross_validate_outputs_to_inputs).
622  */
623 void
validate_sso_explicit_locations(struct gl_context * ctx,struct gl_shader_program * prog,gl_shader_stage first_stage,gl_shader_stage last_stage)624 validate_sso_explicit_locations(struct gl_context *ctx,
625                                 struct gl_shader_program *prog,
626                                 gl_shader_stage first_stage,
627                                 gl_shader_stage last_stage)
628 {
629    assert(prog->SeparateShader);
630 
631    /* VS inputs and FS outputs are validated in
632     * assign_attribute_or_color_locations()
633     */
634    bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
635    bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
636    if (!validate_first_stage && !validate_last_stage)
637       return;
638 
639    struct explicit_location_info explicit_locations[MAX_VARYING][4];
640 
641    gl_shader_stage stages[2] = { first_stage, last_stage };
642    bool validate_stage[2] = { validate_first_stage, validate_last_stage };
643    ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out };
644 
645    for (unsigned i = 0; i < 2; i++) {
646       if (!validate_stage[i])
647          continue;
648 
649       gl_shader_stage stage = stages[i];
650 
651       gl_linked_shader *sh = prog->_LinkedShaders[stage];
652       assert(sh);
653 
654       memset(explicit_locations, 0, sizeof(explicit_locations));
655 
656       foreach_in_list(ir_instruction, node, sh->ir) {
657          ir_variable *const var = node->as_variable();
658 
659          if (var == NULL ||
660              !var->data.explicit_location ||
661              var->data.location < VARYING_SLOT_VAR0 ||
662              var->data.mode != var_direction[i])
663             continue;
664 
665          if (!validate_explicit_variable_location(
666                ctx, explicit_locations, var, prog, sh)) {
667             return;
668          }
669       }
670    }
671 }
672 
673 /**
674  * Validate that outputs from one stage match inputs of another
675  */
676 void
cross_validate_outputs_to_inputs(struct gl_context * ctx,struct gl_shader_program * prog,gl_linked_shader * producer,gl_linked_shader * consumer)677 cross_validate_outputs_to_inputs(struct gl_context *ctx,
678                                  struct gl_shader_program *prog,
679                                  gl_linked_shader *producer,
680                                  gl_linked_shader *consumer)
681 {
682    glsl_symbol_table parameters;
683    struct explicit_location_info explicit_locations[MAX_VARYING][4] = { 0 };
684 
685    /* Find all shader outputs in the "producer" stage.
686     */
687    foreach_in_list(ir_instruction, node, producer->ir) {
688       ir_variable *const var = node->as_variable();
689 
690       if (var == NULL || var->data.mode != ir_var_shader_out)
691          continue;
692 
693       if (!var->data.explicit_location
694           || var->data.location < VARYING_SLOT_VAR0)
695          parameters.add_variable(var);
696       else {
697          /* User-defined varyings with explicit locations are handled
698           * differently because they do not need to have matching names.
699           */
700          if (!validate_explicit_variable_location(ctx,
701                                                   explicit_locations,
702                                                   var, prog, producer)) {
703             return;
704          }
705       }
706    }
707 
708 
709    /* Find all shader inputs in the "consumer" stage.  Any variables that have
710     * matching outputs already in the symbol table must have the same type and
711     * qualifiers.
712     *
713     * Exception: if the consumer is the geometry shader, then the inputs
714     * should be arrays and the type of the array element should match the type
715     * of the corresponding producer output.
716     */
717    foreach_in_list(ir_instruction, node, consumer->ir) {
718       ir_variable *const input = node->as_variable();
719 
720       if (input == NULL || input->data.mode != ir_var_shader_in)
721          continue;
722 
723       if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
724          const ir_variable *const front_color =
725             parameters.get_variable("gl_FrontColor");
726 
727          const ir_variable *const back_color =
728             parameters.get_variable("gl_BackColor");
729 
730          cross_validate_front_and_back_color(ctx, prog, input,
731                                              front_color, back_color,
732                                              consumer->Stage, producer->Stage);
733       } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
734          const ir_variable *const front_color =
735             parameters.get_variable("gl_FrontSecondaryColor");
736 
737          const ir_variable *const back_color =
738             parameters.get_variable("gl_BackSecondaryColor");
739 
740          cross_validate_front_and_back_color(ctx, prog, input,
741                                              front_color, back_color,
742                                              consumer->Stage, producer->Stage);
743       } else {
744          /* The rules for connecting inputs and outputs change in the presence
745           * of explicit locations.  In this case, we no longer care about the
746           * names of the variables.  Instead, we care only about the
747           * explicitly assigned location.
748           */
749          ir_variable *output = NULL;
750          if (input->data.explicit_location
751              && input->data.location >= VARYING_SLOT_VAR0) {
752 
753             const glsl_type *type = get_varying_type(input, consumer->Stage);
754             unsigned num_elements = type->count_attribute_slots(false);
755             unsigned idx =
756                compute_variable_location_slot(input, consumer->Stage);
757             unsigned slot_limit = idx + num_elements;
758 
759             while (idx < slot_limit) {
760                if (idx >= MAX_VARYING) {
761                   linker_error(prog,
762                                "Invalid location %u in %s shader\n", idx,
763                                _mesa_shader_stage_to_string(consumer->Stage));
764                   return;
765                }
766 
767                output = explicit_locations[idx][input->data.location_frac].var;
768 
769                if (output == NULL ||
770                    input->data.location != output->data.location) {
771                   linker_error(prog,
772                                "%s shader input `%s' with explicit location "
773                                "has no matching output\n",
774                                _mesa_shader_stage_to_string(consumer->Stage),
775                                input->name);
776                   break;
777                }
778                idx++;
779             }
780          } else {
781             output = parameters.get_variable(input->name);
782          }
783 
784          if (output != NULL) {
785             /* Interface blocks have their own validation elsewhere so don't
786              * try validating them here.
787              */
788             if (!(input->get_interface_type() &&
789                   output->get_interface_type()))
790                cross_validate_types_and_qualifiers(ctx, prog, input, output,
791                                                    consumer->Stage,
792                                                    producer->Stage);
793          } else {
794             /* Check for input vars with unmatched output vars in prev stage
795              * taking into account that interface blocks could have a matching
796              * output but with different name, so we ignore them.
797              */
798             assert(!input->data.assigned);
799             if (input->data.used && !input->get_interface_type() &&
800                 !input->data.explicit_location && !prog->SeparateShader)
801                linker_error(prog,
802                             "%s shader input `%s' "
803                             "has no matching output in the previous stage\n",
804                             _mesa_shader_stage_to_string(consumer->Stage),
805                             input->name);
806          }
807       }
808    }
809 }
810 
811 /**
812  * Demote shader inputs and outputs that are not used in other stages, and
813  * remove them via dead code elimination.
814  */
815 static void
remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,gl_linked_shader * sh,enum ir_variable_mode mode)816 remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
817                                         gl_linked_shader *sh,
818                                         enum ir_variable_mode mode)
819 {
820    if (is_separate_shader_object)
821       return;
822 
823    foreach_in_list(ir_instruction, node, sh->ir) {
824       ir_variable *const var = node->as_variable();
825 
826       if (var == NULL || var->data.mode != int(mode))
827          continue;
828 
829       /* A shader 'in' or 'out' variable is only really an input or output if
830        * its value is used by other shader stages. This will cause the
831        * variable to have a location assigned.
832        */
833       if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) {
834          assert(var->data.mode != ir_var_temporary);
835 
836          /* Assign zeros to demoted inputs to allow more optimizations. */
837          if (var->data.mode == ir_var_shader_in && !var->constant_value)
838             var->constant_value = ir_constant::zero(var, var->type);
839 
840          var->data.mode = ir_var_auto;
841       }
842    }
843 
844    /* Eliminate code that is now dead due to unused inputs/outputs being
845     * demoted.
846     */
847    while (do_dead_code(sh->ir, false))
848       ;
849 
850 }
851 
852 /**
853  * Initialize this object based on a string that was passed to
854  * glTransformFeedbackVaryings.
855  *
856  * If the input is mal-formed, this call still succeeds, but it sets
857  * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var()
858  * will fail to find any matching variable.
859  */
860 void
init(struct gl_context * ctx,const void * mem_ctx,const char * input)861 tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx,
862                      const char *input)
863 {
864    /* We don't have to be pedantic about what is a valid GLSL variable name,
865     * because any variable with an invalid name can't exist in the IR anyway.
866     */
867 
868    this->location = -1;
869    this->orig_name = input;
870    this->lowered_builtin_array_variable = none;
871    this->skip_components = 0;
872    this->next_buffer_separator = false;
873    this->matched_candidate = NULL;
874    this->stream_id = 0;
875    this->buffer = 0;
876    this->offset = 0;
877 
878    if (ctx->Extensions.ARB_transform_feedback3) {
879       /* Parse gl_NextBuffer. */
880       if (strcmp(input, "gl_NextBuffer") == 0) {
881          this->next_buffer_separator = true;
882          return;
883       }
884 
885       /* Parse gl_SkipComponents. */
886       if (strcmp(input, "gl_SkipComponents1") == 0)
887          this->skip_components = 1;
888       else if (strcmp(input, "gl_SkipComponents2") == 0)
889          this->skip_components = 2;
890       else if (strcmp(input, "gl_SkipComponents3") == 0)
891          this->skip_components = 3;
892       else if (strcmp(input, "gl_SkipComponents4") == 0)
893          this->skip_components = 4;
894 
895       if (this->skip_components)
896          return;
897    }
898 
899    /* Parse a declaration. */
900    const char *base_name_end;
901    long subscript = parse_program_resource_name(input, &base_name_end);
902    this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
903    if (this->var_name == NULL) {
904       _mesa_error_no_memory(__func__);
905       return;
906    }
907 
908    if (subscript >= 0) {
909       this->array_subscript = subscript;
910       this->is_subscripted = true;
911    } else {
912       this->is_subscripted = false;
913    }
914 
915    /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
916     * class must behave specially to account for the fact that gl_ClipDistance
917     * is converted from a float[8] to a vec4[2].
918     */
919    if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
920        strcmp(this->var_name, "gl_ClipDistance") == 0) {
921       this->lowered_builtin_array_variable = clip_distance;
922    }
923    if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
924        strcmp(this->var_name, "gl_CullDistance") == 0) {
925       this->lowered_builtin_array_variable = cull_distance;
926    }
927 
928    if (ctx->Const.LowerTessLevel &&
929        (strcmp(this->var_name, "gl_TessLevelOuter") == 0))
930       this->lowered_builtin_array_variable = tess_level_outer;
931    if (ctx->Const.LowerTessLevel &&
932        (strcmp(this->var_name, "gl_TessLevelInner") == 0))
933       this->lowered_builtin_array_variable = tess_level_inner;
934 }
935 
936 
937 /**
938  * Determine whether two tfeedback_decl objects refer to the same variable and
939  * array index (if applicable).
940  */
941 bool
is_same(const tfeedback_decl & x,const tfeedback_decl & y)942 tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y)
943 {
944    assert(x.is_varying() && y.is_varying());
945 
946    if (strcmp(x.var_name, y.var_name) != 0)
947       return false;
948    if (x.is_subscripted != y.is_subscripted)
949       return false;
950    if (x.is_subscripted && x.array_subscript != y.array_subscript)
951       return false;
952    return true;
953 }
954 
955 
956 /**
957  * Assign a location and stream ID for this tfeedback_decl object based on the
958  * transform feedback candidate found by find_candidate.
959  *
960  * If an error occurs, the error is reported through linker_error() and false
961  * is returned.
962  */
963 bool
assign_location(struct gl_context * ctx,struct gl_shader_program * prog)964 tfeedback_decl::assign_location(struct gl_context *ctx,
965                                 struct gl_shader_program *prog)
966 {
967    assert(this->is_varying());
968 
969    unsigned fine_location
970       = this->matched_candidate->toplevel_var->data.location * 4
971       + this->matched_candidate->toplevel_var->data.location_frac
972       + this->matched_candidate->offset;
973    const unsigned dmul =
974       this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
975 
976    if (this->matched_candidate->type->is_array()) {
977       /* Array variable */
978       const unsigned matrix_cols =
979          this->matched_candidate->type->fields.array->matrix_columns;
980       const unsigned vector_elements =
981          this->matched_candidate->type->fields.array->vector_elements;
982       unsigned actual_array_size;
983       switch (this->lowered_builtin_array_variable) {
984       case clip_distance:
985          actual_array_size = prog->last_vert_prog ?
986             prog->last_vert_prog->info.clip_distance_array_size : 0;
987          break;
988       case cull_distance:
989          actual_array_size = prog->last_vert_prog ?
990             prog->last_vert_prog->info.cull_distance_array_size : 0;
991          break;
992       case tess_level_outer:
993          actual_array_size = 4;
994          break;
995       case tess_level_inner:
996          actual_array_size = 2;
997          break;
998       case none:
999       default:
1000          actual_array_size = this->matched_candidate->type->array_size();
1001          break;
1002       }
1003 
1004       if (this->is_subscripted) {
1005          /* Check array bounds. */
1006          if (this->array_subscript >= actual_array_size) {
1007             linker_error(prog, "Transform feedback varying %s has index "
1008                          "%i, but the array size is %u.",
1009                          this->orig_name, this->array_subscript,
1010                          actual_array_size);
1011             return false;
1012          }
1013          unsigned array_elem_size = this->lowered_builtin_array_variable ?
1014             1 : vector_elements * matrix_cols * dmul;
1015          fine_location += array_elem_size * this->array_subscript;
1016          this->size = 1;
1017       } else {
1018          this->size = actual_array_size;
1019       }
1020       this->vector_elements = vector_elements;
1021       this->matrix_columns = matrix_cols;
1022       if (this->lowered_builtin_array_variable)
1023          this->type = GL_FLOAT;
1024       else
1025          this->type = this->matched_candidate->type->fields.array->gl_type;
1026    } else {
1027       /* Regular variable (scalar, vector, or matrix) */
1028       if (this->is_subscripted) {
1029          linker_error(prog, "Transform feedback varying %s requested, "
1030                       "but %s is not an array.",
1031                       this->orig_name, this->var_name);
1032          return false;
1033       }
1034       this->size = 1;
1035       this->vector_elements = this->matched_candidate->type->vector_elements;
1036       this->matrix_columns = this->matched_candidate->type->matrix_columns;
1037       this->type = this->matched_candidate->type->gl_type;
1038    }
1039    this->location = fine_location / 4;
1040    this->location_frac = fine_location % 4;
1041 
1042    /* From GL_EXT_transform_feedback:
1043     *   A program will fail to link if:
1044     *
1045     *   * the total number of components to capture in any varying
1046     *     variable in <varyings> is greater than the constant
1047     *     MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
1048     *     buffer mode is SEPARATE_ATTRIBS_EXT;
1049     */
1050    if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1051        this->num_components() >
1052        ctx->Const.MaxTransformFeedbackSeparateComponents) {
1053       linker_error(prog, "Transform feedback varying %s exceeds "
1054                    "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
1055                    this->orig_name);
1056       return false;
1057    }
1058 
1059    /* Only transform feedback varyings can be assigned to non-zero streams,
1060     * so assign the stream id here.
1061     */
1062    this->stream_id = this->matched_candidate->toplevel_var->data.stream;
1063 
1064    unsigned array_offset = this->array_subscript * 4 * dmul;
1065    unsigned struct_offset = this->matched_candidate->offset * 4 * dmul;
1066    this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer;
1067    this->offset = this->matched_candidate->toplevel_var->data.offset +
1068       array_offset + struct_offset;
1069 
1070    return true;
1071 }
1072 
1073 
1074 unsigned
get_num_outputs() const1075 tfeedback_decl::get_num_outputs() const
1076 {
1077    if (!this->is_varying()) {
1078       return 0;
1079    }
1080    return (this->num_components() + this->location_frac + 3)/4;
1081 }
1082 
1083 
1084 /**
1085  * Update gl_transform_feedback_info to reflect this tfeedback_decl.
1086  *
1087  * If an error occurs, the error is reported through linker_error() and false
1088  * is returned.
1089  */
1090 bool
store(struct gl_context * ctx,struct gl_shader_program * prog,struct gl_transform_feedback_info * info,unsigned buffer,unsigned buffer_index,const unsigned max_outputs,bool * explicit_stride,bool has_xfb_qualifiers) const1091 tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
1092                       struct gl_transform_feedback_info *info,
1093                       unsigned buffer, unsigned buffer_index,
1094                       const unsigned max_outputs, bool *explicit_stride,
1095                       bool has_xfb_qualifiers) const
1096 {
1097    unsigned xfb_offset = 0;
1098    unsigned size = this->size;
1099    /* Handle gl_SkipComponents. */
1100    if (this->skip_components) {
1101       info->Buffers[buffer].Stride += this->skip_components;
1102       size = this->skip_components;
1103       goto store_varying;
1104    }
1105 
1106    if (this->next_buffer_separator) {
1107       size = 0;
1108       goto store_varying;
1109    }
1110 
1111    if (has_xfb_qualifiers) {
1112       xfb_offset = this->offset / 4;
1113    } else {
1114       xfb_offset = info->Buffers[buffer].Stride;
1115    }
1116    info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
1117 
1118    {
1119       unsigned location = this->location;
1120       unsigned location_frac = this->location_frac;
1121       unsigned num_components = this->num_components();
1122       while (num_components > 0) {
1123          unsigned output_size = MIN2(num_components, 4 - location_frac);
1124          assert((info->NumOutputs == 0 && max_outputs == 0) ||
1125                 info->NumOutputs < max_outputs);
1126 
1127          /* From the ARB_enhanced_layouts spec:
1128           *
1129           *    "If such a block member or variable is not written during a shader
1130           *    invocation, the buffer contents at the assigned offset will be
1131           *    undefined.  Even if there are no static writes to a variable or
1132           *    member that is assigned a transform feedback offset, the space is
1133           *    still allocated in the buffer and still affects the stride."
1134           */
1135          if (this->is_varying_written()) {
1136             info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
1137             info->Outputs[info->NumOutputs].OutputRegister = location;
1138             info->Outputs[info->NumOutputs].NumComponents = output_size;
1139             info->Outputs[info->NumOutputs].StreamId = stream_id;
1140             info->Outputs[info->NumOutputs].OutputBuffer = buffer;
1141             info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
1142             ++info->NumOutputs;
1143          }
1144          info->Buffers[buffer].Stream = this->stream_id;
1145          xfb_offset += output_size;
1146 
1147          num_components -= output_size;
1148          location++;
1149          location_frac = 0;
1150       }
1151    }
1152 
1153    if (explicit_stride && explicit_stride[buffer]) {
1154       if (this->is_64bit() && info->Buffers[buffer].Stride % 2) {
1155          linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
1156                       "multiple of 8 as its applied to a type that is or "
1157                       "contains a double.",
1158                       info->Buffers[buffer].Stride * 4);
1159          return false;
1160       }
1161 
1162       if ((this->offset / 4) / info->Buffers[buffer].Stride !=
1163           (xfb_offset - 1) / info->Buffers[buffer].Stride) {
1164          linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
1165                       "buffer (%d)", xfb_offset * 4,
1166                       info->Buffers[buffer].Stride * 4, buffer);
1167          return false;
1168       }
1169    } else {
1170       info->Buffers[buffer].Stride = xfb_offset;
1171    }
1172 
1173    /* From GL_EXT_transform_feedback:
1174     *   A program will fail to link if:
1175     *
1176     *     * the total number of components to capture is greater than
1177     *       the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
1178     *       and the buffer mode is INTERLEAVED_ATTRIBS_EXT.
1179     *
1180     * From GL_ARB_enhanced_layouts:
1181     *
1182     *   "The resulting stride (implicit or explicit) must be less than or
1183     *   equal to the implementation-dependent constant
1184     *   gl_MaxTransformFeedbackInterleavedComponents."
1185     */
1186    if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
1187         has_xfb_qualifiers) &&
1188        info->Buffers[buffer].Stride >
1189        ctx->Const.MaxTransformFeedbackInterleavedComponents) {
1190       linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
1191                    "limit has been exceeded.");
1192       return false;
1193    }
1194 
1195  store_varying:
1196    info->Varyings[info->NumVarying].Name = ralloc_strdup(prog,
1197                                                          this->orig_name);
1198    info->Varyings[info->NumVarying].Type = this->type;
1199    info->Varyings[info->NumVarying].Size = size;
1200    info->Varyings[info->NumVarying].BufferIndex = buffer_index;
1201    info->NumVarying++;
1202    info->Buffers[buffer].NumVaryings++;
1203 
1204    return true;
1205 }
1206 
1207 
1208 const tfeedback_candidate *
find_candidate(gl_shader_program * prog,hash_table * tfeedback_candidates)1209 tfeedback_decl::find_candidate(gl_shader_program *prog,
1210                                hash_table *tfeedback_candidates)
1211 {
1212    const char *name = this->var_name;
1213    switch (this->lowered_builtin_array_variable) {
1214    case none:
1215       name = this->var_name;
1216       break;
1217    case clip_distance:
1218       name = "gl_ClipDistanceMESA";
1219       break;
1220    case cull_distance:
1221       name = "gl_CullDistanceMESA";
1222       break;
1223    case tess_level_outer:
1224       name = "gl_TessLevelOuterMESA";
1225       break;
1226    case tess_level_inner:
1227       name = "gl_TessLevelInnerMESA";
1228       break;
1229    }
1230    hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name);
1231 
1232    this->matched_candidate = entry ?
1233          (const tfeedback_candidate *) entry->data : NULL;
1234 
1235    if (!this->matched_candidate) {
1236       /* From GL_EXT_transform_feedback:
1237        *   A program will fail to link if:
1238        *
1239        *   * any variable name specified in the <varyings> array is not
1240        *     declared as an output in the geometry shader (if present) or
1241        *     the vertex shader (if no geometry shader is present);
1242        */
1243       linker_error(prog, "Transform feedback varying %s undeclared.",
1244                    this->orig_name);
1245    }
1246 
1247    return this->matched_candidate;
1248 }
1249 
1250 
1251 /**
1252  * Parse all the transform feedback declarations that were passed to
1253  * glTransformFeedbackVaryings() and store them in tfeedback_decl objects.
1254  *
1255  * If an error occurs, the error is reported through linker_error() and false
1256  * is returned.
1257  */
1258 static bool
parse_tfeedback_decls(struct gl_context * ctx,struct gl_shader_program * prog,const void * mem_ctx,unsigned num_names,char ** varying_names,tfeedback_decl * decls)1259 parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
1260                       const void *mem_ctx, unsigned num_names,
1261                       char **varying_names, tfeedback_decl *decls)
1262 {
1263    for (unsigned i = 0; i < num_names; ++i) {
1264       decls[i].init(ctx, mem_ctx, varying_names[i]);
1265 
1266       if (!decls[i].is_varying())
1267          continue;
1268 
1269       /* From GL_EXT_transform_feedback:
1270        *   A program will fail to link if:
1271        *
1272        *   * any two entries in the <varyings> array specify the same varying
1273        *     variable;
1274        *
1275        * We interpret this to mean "any two entries in the <varyings> array
1276        * specify the same varying variable and array index", since transform
1277        * feedback of arrays would be useless otherwise.
1278        */
1279       for (unsigned j = 0; j < i; ++j) {
1280          if (decls[j].is_varying()) {
1281             if (tfeedback_decl::is_same(decls[i], decls[j])) {
1282                linker_error(prog, "Transform feedback varying %s specified "
1283                             "more than once.", varying_names[i]);
1284                return false;
1285             }
1286          }
1287       }
1288    }
1289    return true;
1290 }
1291 
1292 
1293 static int
cmp_xfb_offset(const void * x_generic,const void * y_generic)1294 cmp_xfb_offset(const void * x_generic, const void * y_generic)
1295 {
1296    tfeedback_decl *x = (tfeedback_decl *) x_generic;
1297    tfeedback_decl *y = (tfeedback_decl *) y_generic;
1298 
1299    if (x->get_buffer() != y->get_buffer())
1300       return x->get_buffer() - y->get_buffer();
1301    return x->get_offset() - y->get_offset();
1302 }
1303 
1304 /**
1305  * Store transform feedback location assignments into
1306  * prog->sh.LinkedTransformFeedback based on the data stored in
1307  * tfeedback_decls.
1308  *
1309  * If an error occurs, the error is reported through linker_error() and false
1310  * is returned.
1311  */
1312 static bool
store_tfeedback_info(struct gl_context * ctx,struct gl_shader_program * prog,unsigned num_tfeedback_decls,tfeedback_decl * tfeedback_decls,bool has_xfb_qualifiers)1313 store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
1314                      unsigned num_tfeedback_decls,
1315                      tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers)
1316 {
1317    if (!prog->last_vert_prog)
1318       return true;
1319 
1320    /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
1321     * tracking the number of buffers doesn't overflow.
1322     */
1323    assert(ctx->Const.MaxTransformFeedbackBuffers < 32);
1324 
1325    bool separate_attribs_mode =
1326       prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
1327 
1328    struct gl_program *xfb_prog = prog->last_vert_prog;
1329    xfb_prog->sh.LinkedTransformFeedback =
1330       rzalloc(xfb_prog, struct gl_transform_feedback_info);
1331 
1332    /* The xfb_offset qualifier does not have to be used in increasing order
1333     * however some drivers expect to receive the list of transform feedback
1334     * declarations in order so sort it now for convenience.
1335     */
1336    if (has_xfb_qualifiers) {
1337       qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls),
1338             cmp_xfb_offset);
1339    } else {
1340       xfb_prog->sh.LinkedTransformFeedback->api_enabled = true;
1341    }
1342 
1343    xfb_prog->sh.LinkedTransformFeedback->Varyings =
1344       rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
1345                     num_tfeedback_decls);
1346 
1347    unsigned num_outputs = 0;
1348    for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1349       if (tfeedback_decls[i].is_varying_written())
1350          num_outputs += tfeedback_decls[i].get_num_outputs();
1351    }
1352 
1353    xfb_prog->sh.LinkedTransformFeedback->Outputs =
1354       rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
1355                     num_outputs);
1356 
1357    unsigned num_buffers = 0;
1358    unsigned buffers = 0;
1359 
1360    if (!has_xfb_qualifiers && separate_attribs_mode) {
1361       /* GL_SEPARATE_ATTRIBS */
1362       for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1363          if (!tfeedback_decls[i].store(ctx, prog,
1364                                        xfb_prog->sh.LinkedTransformFeedback,
1365                                        num_buffers, num_buffers, num_outputs,
1366                                        NULL, has_xfb_qualifiers))
1367             return false;
1368 
1369          buffers |= 1 << num_buffers;
1370          num_buffers++;
1371       }
1372    }
1373    else {
1374       /* GL_INVERLEAVED_ATTRIBS */
1375       int buffer_stream_id = -1;
1376       unsigned buffer =
1377          num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0;
1378       bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
1379 
1380       /* Apply any xfb_stride global qualifiers */
1381       if (has_xfb_qualifiers) {
1382          for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
1383             if (prog->TransformFeedback.BufferStride[j]) {
1384                explicit_stride[j] = true;
1385                xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
1386                   prog->TransformFeedback.BufferStride[j] / 4;
1387             }
1388          }
1389       }
1390 
1391       for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1392          if (has_xfb_qualifiers &&
1393              buffer != tfeedback_decls[i].get_buffer()) {
1394             /* we have moved to the next buffer so reset stream id */
1395             buffer_stream_id = -1;
1396             num_buffers++;
1397          }
1398 
1399          if (tfeedback_decls[i].is_next_buffer_separator()) {
1400             if (!tfeedback_decls[i].store(ctx, prog,
1401                                           xfb_prog->sh.LinkedTransformFeedback,
1402                                           buffer, num_buffers, num_outputs,
1403                                           explicit_stride, has_xfb_qualifiers))
1404                return false;
1405             num_buffers++;
1406             buffer_stream_id = -1;
1407             continue;
1408          }
1409 
1410          if (has_xfb_qualifiers) {
1411             buffer = tfeedback_decls[i].get_buffer();
1412          } else {
1413             buffer = num_buffers;
1414          }
1415 
1416          if (tfeedback_decls[i].is_varying()) {
1417             if (buffer_stream_id == -1)  {
1418                /* First varying writing to this buffer: remember its stream */
1419                buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
1420 
1421                /* Only mark a buffer as active when there is a varying
1422                 * attached to it. This behaviour is based on a revised version
1423                 * of section 13.2.2 of the GL 4.6 spec.
1424                 */
1425                buffers |= 1 << buffer;
1426             } else if (buffer_stream_id !=
1427                        (int) tfeedback_decls[i].get_stream_id()) {
1428                /* Varying writes to the same buffer from a different stream */
1429                linker_error(prog,
1430                             "Transform feedback can't capture varyings belonging "
1431                             "to different vertex streams in a single buffer. "
1432                             "Varying %s writes to buffer from stream %u, other "
1433                             "varyings in the same buffer write from stream %u.",
1434                             tfeedback_decls[i].name(),
1435                             tfeedback_decls[i].get_stream_id(),
1436                             buffer_stream_id);
1437                return false;
1438             }
1439          }
1440 
1441          if (!tfeedback_decls[i].store(ctx, prog,
1442                                        xfb_prog->sh.LinkedTransformFeedback,
1443                                        buffer, num_buffers, num_outputs,
1444                                        explicit_stride, has_xfb_qualifiers))
1445             return false;
1446       }
1447    }
1448 
1449    assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
1450 
1451    xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
1452    return true;
1453 }
1454 
1455 namespace {
1456 
1457 /**
1458  * Data structure recording the relationship between outputs of one shader
1459  * stage (the "producer") and inputs of another (the "consumer").
1460  */
1461 class varying_matches
1462 {
1463 public:
1464    varying_matches(bool disable_varying_packing, bool xfb_enabled,
1465                    bool enhanced_layouts_enabled,
1466                    gl_shader_stage producer_stage,
1467                    gl_shader_stage consumer_stage);
1468    ~varying_matches();
1469    void record(ir_variable *producer_var, ir_variable *consumer_var);
1470    unsigned assign_locations(struct gl_shader_program *prog,
1471                              uint8_t components[],
1472                              uint64_t reserved_slots);
1473    void store_locations() const;
1474 
1475 private:
1476    bool is_varying_packing_safe(const glsl_type *type,
1477                                 const ir_variable *var) const;
1478 
1479    /**
1480     * If true, this driver disables varying packing, so all varyings need to
1481     * be aligned on slot boundaries, and take up a number of slots equal to
1482     * their number of matrix columns times their array size.
1483     *
1484     * Packing may also be disabled because our current packing method is not
1485     * safe in SSO or versions of OpenGL where interpolation qualifiers are not
1486     * guaranteed to match across stages.
1487     */
1488    const bool disable_varying_packing;
1489 
1490    /**
1491     * If true, this driver has transform feedback enabled. The transform
1492     * feedback code requires at least some packing be done even when varying
1493     * packing is disabled, fortunately where transform feedback requires
1494     * packing it's safe to override the disabled setting. See
1495     * is_varying_packing_safe().
1496     */
1497    const bool xfb_enabled;
1498 
1499    const bool enhanced_layouts_enabled;
1500 
1501    /**
1502     * Enum representing the order in which varyings are packed within a
1503     * packing class.
1504     *
1505     * Currently we pack vec4's first, then vec2's, then scalar values, then
1506     * vec3's.  This order ensures that the only vectors that are at risk of
1507     * having to be "double parked" (split between two adjacent varying slots)
1508     * are the vec3's.
1509     */
1510    enum packing_order_enum {
1511       PACKING_ORDER_VEC4,
1512       PACKING_ORDER_VEC2,
1513       PACKING_ORDER_SCALAR,
1514       PACKING_ORDER_VEC3,
1515    };
1516 
1517    static unsigned compute_packing_class(const ir_variable *var);
1518    static packing_order_enum compute_packing_order(const ir_variable *var);
1519    static int match_comparator(const void *x_generic, const void *y_generic);
1520    static int xfb_comparator(const void *x_generic, const void *y_generic);
1521 
1522    /**
1523     * Structure recording the relationship between a single producer output
1524     * and a single consumer input.
1525     */
1526    struct match {
1527       /**
1528        * Packing class for this varying, computed by compute_packing_class().
1529        */
1530       unsigned packing_class;
1531 
1532       /**
1533        * Packing order for this varying, computed by compute_packing_order().
1534        */
1535       packing_order_enum packing_order;
1536       unsigned num_components;
1537 
1538       /**
1539        * The output variable in the producer stage.
1540        */
1541       ir_variable *producer_var;
1542 
1543       /**
1544        * The input variable in the consumer stage.
1545        */
1546       ir_variable *consumer_var;
1547 
1548       /**
1549        * The location which has been assigned for this varying.  This is
1550        * expressed in multiples of a float, with the first generic varying
1551        * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
1552        * value 0.
1553        */
1554       unsigned generic_location;
1555    } *matches;
1556 
1557    /**
1558     * The number of elements in the \c matches array that are currently in
1559     * use.
1560     */
1561    unsigned num_matches;
1562 
1563    /**
1564     * The number of elements that were set aside for the \c matches array when
1565     * it was allocated.
1566     */
1567    unsigned matches_capacity;
1568 
1569    gl_shader_stage producer_stage;
1570    gl_shader_stage consumer_stage;
1571 };
1572 
1573 } /* anonymous namespace */
1574 
varying_matches(bool disable_varying_packing,bool xfb_enabled,bool enhanced_layouts_enabled,gl_shader_stage producer_stage,gl_shader_stage consumer_stage)1575 varying_matches::varying_matches(bool disable_varying_packing,
1576                                  bool xfb_enabled,
1577                                  bool enhanced_layouts_enabled,
1578                                  gl_shader_stage producer_stage,
1579                                  gl_shader_stage consumer_stage)
1580    : disable_varying_packing(disable_varying_packing),
1581      xfb_enabled(xfb_enabled),
1582      enhanced_layouts_enabled(enhanced_layouts_enabled),
1583      producer_stage(producer_stage),
1584      consumer_stage(consumer_stage)
1585 {
1586    /* Note: this initial capacity is rather arbitrarily chosen to be large
1587     * enough for many cases without wasting an unreasonable amount of space.
1588     * varying_matches::record() will resize the array if there are more than
1589     * this number of varyings.
1590     */
1591    this->matches_capacity = 8;
1592    this->matches = (match *)
1593       malloc(sizeof(*this->matches) * this->matches_capacity);
1594    this->num_matches = 0;
1595 }
1596 
1597 
~varying_matches()1598 varying_matches::~varying_matches()
1599 {
1600    free(this->matches);
1601 }
1602 
1603 
1604 /**
1605  * Packing is always safe on individual arrays, structures, and matrices. It
1606  * is also safe if the varying is only used for transform feedback.
1607  */
1608 bool
is_varying_packing_safe(const glsl_type * type,const ir_variable * var) const1609 varying_matches::is_varying_packing_safe(const glsl_type *type,
1610                                          const ir_variable *var) const
1611 {
1612    if (consumer_stage == MESA_SHADER_TESS_EVAL ||
1613        consumer_stage == MESA_SHADER_TESS_CTRL ||
1614        producer_stage == MESA_SHADER_TESS_CTRL)
1615       return false;
1616 
1617    return xfb_enabled && (type->is_array() || type->is_record() ||
1618                           type->is_matrix() || var->data.is_xfb_only);
1619 }
1620 
1621 
1622 /**
1623  * Record the given producer/consumer variable pair in the list of variables
1624  * that should later be assigned locations.
1625  *
1626  * It is permissible for \c consumer_var to be NULL (this happens if a
1627  * variable is output by the producer and consumed by transform feedback, but
1628  * not consumed by the consumer).
1629  *
1630  * If \c producer_var has already been paired up with a consumer_var, or
1631  * producer_var is part of fixed pipeline functionality (and hence already has
1632  * a location assigned), this function has no effect.
1633  *
1634  * Note: as a side effect this function may change the interpolation type of
1635  * \c producer_var, but only when the change couldn't possibly affect
1636  * rendering.
1637  */
1638 void
record(ir_variable * producer_var,ir_variable * consumer_var)1639 varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
1640 {
1641    assert(producer_var != NULL || consumer_var != NULL);
1642 
1643    if ((producer_var && (!producer_var->data.is_unmatched_generic_inout ||
1644        producer_var->data.explicit_location)) ||
1645        (consumer_var && (!consumer_var->data.is_unmatched_generic_inout ||
1646        consumer_var->data.explicit_location))) {
1647       /* Either a location already exists for this variable (since it is part
1648        * of fixed functionality), or it has already been recorded as part of a
1649        * previous match.
1650        */
1651       return;
1652    }
1653 
1654    bool needs_flat_qualifier = consumer_var == NULL &&
1655       (producer_var->type->contains_integer() ||
1656        producer_var->type->contains_double());
1657 
1658    if (!disable_varying_packing &&
1659        (needs_flat_qualifier ||
1660         (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) {
1661       /* Since this varying is not being consumed by the fragment shader, its
1662        * interpolation type varying cannot possibly affect rendering.
1663        * Also, this variable is non-flat and is (or contains) an integer
1664        * or a double.
1665        * If the consumer stage is unknown, don't modify the interpolation
1666        * type as it could affect rendering later with separate shaders.
1667        *
1668        * lower_packed_varyings requires all integer varyings to flat,
1669        * regardless of where they appear.  We can trivially satisfy that
1670        * requirement by changing the interpolation type to flat here.
1671        */
1672       if (producer_var) {
1673          producer_var->data.centroid = false;
1674          producer_var->data.sample = false;
1675          producer_var->data.interpolation = INTERP_MODE_FLAT;
1676       }
1677 
1678       if (consumer_var) {
1679          consumer_var->data.centroid = false;
1680          consumer_var->data.sample = false;
1681          consumer_var->data.interpolation = INTERP_MODE_FLAT;
1682       }
1683    }
1684 
1685    if (this->num_matches == this->matches_capacity) {
1686       this->matches_capacity *= 2;
1687       this->matches = (match *)
1688          realloc(this->matches,
1689                  sizeof(*this->matches) * this->matches_capacity);
1690    }
1691 
1692    /* We must use the consumer to compute the packing class because in GL4.4+
1693     * there is no guarantee interpolation qualifiers will match across stages.
1694     *
1695     * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
1696     *
1697     *    "The type and presence of interpolation qualifiers of variables with
1698     *    the same name declared in all linked shaders for the same cross-stage
1699     *    interface must match, otherwise the link command will fail.
1700     *
1701     *    When comparing an output from one stage to an input of a subsequent
1702     *    stage, the input and output don't match if their interpolation
1703     *    qualifiers (or lack thereof) are not the same."
1704     *
1705     * This text was also in at least revison 7 of the 4.40 spec but is no
1706     * longer in revision 9 and not in the 4.50 spec.
1707     */
1708    const ir_variable *const var = (consumer_var != NULL)
1709       ? consumer_var : producer_var;
1710    const gl_shader_stage stage = (consumer_var != NULL)
1711       ? consumer_stage : producer_stage;
1712    const glsl_type *type = get_varying_type(var, stage);
1713 
1714    if (producer_var && consumer_var &&
1715        consumer_var->data.must_be_shader_input) {
1716       producer_var->data.must_be_shader_input = 1;
1717    }
1718 
1719    this->matches[this->num_matches].packing_class
1720       = this->compute_packing_class(var);
1721    this->matches[this->num_matches].packing_order
1722       = this->compute_packing_order(var);
1723    if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) ||
1724        var->data.must_be_shader_input) {
1725       unsigned slots = type->count_attribute_slots(false);
1726       this->matches[this->num_matches].num_components = slots * 4;
1727    } else {
1728       this->matches[this->num_matches].num_components
1729          = type->component_slots();
1730    }
1731 
1732    this->matches[this->num_matches].producer_var = producer_var;
1733    this->matches[this->num_matches].consumer_var = consumer_var;
1734    this->num_matches++;
1735    if (producer_var)
1736       producer_var->data.is_unmatched_generic_inout = 0;
1737    if (consumer_var)
1738       consumer_var->data.is_unmatched_generic_inout = 0;
1739 }
1740 
1741 
1742 /**
1743  * Choose locations for all of the variable matches that were previously
1744  * passed to varying_matches::record().
1745  * \param components  returns array[slot] of number of components used
1746  *                    per slot (1, 2, 3 or 4)
1747  * \param reserved_slots  bitmask indicating which varying slots are already
1748  *                        allocated
1749  * \return number of slots (4-element vectors) allocated
1750  */
1751 unsigned
assign_locations(struct gl_shader_program * prog,uint8_t components[],uint64_t reserved_slots)1752 varying_matches::assign_locations(struct gl_shader_program *prog,
1753                                   uint8_t components[],
1754                                   uint64_t reserved_slots)
1755 {
1756    /* If packing has been disabled then we cannot safely sort the varyings by
1757     * class as it may mean we are using a version of OpenGL where
1758     * interpolation qualifiers are not guaranteed to be matching across
1759     * shaders, sorting in this case could result in mismatching shader
1760     * interfaces.
1761     * When packing is disabled the sort orders varyings used by transform
1762     * feedback first, but also depends on *undefined behaviour* of qsort to
1763     * reverse the order of the varyings. See: xfb_comparator().
1764     */
1765    if (!this->disable_varying_packing) {
1766       /* Sort varying matches into an order that makes them easy to pack. */
1767       qsort(this->matches, this->num_matches, sizeof(*this->matches),
1768             &varying_matches::match_comparator);
1769    } else {
1770       /* Only sort varyings that are only used by transform feedback. */
1771       qsort(this->matches, this->num_matches, sizeof(*this->matches),
1772             &varying_matches::xfb_comparator);
1773    }
1774 
1775    unsigned generic_location = 0;
1776    unsigned generic_patch_location = MAX_VARYING*4;
1777    bool previous_var_xfb_only = false;
1778    unsigned previous_packing_class = ~0u;
1779 
1780    /* For tranform feedback separate mode, we know the number of attributes
1781     * is <= the number of buffers.  So packing isn't critical.  In fact,
1782     * packing vec3 attributes can cause trouble because splitting a vec3
1783     * effectively creates an additional transform feedback output.  The
1784     * extra TFB output may exceed device driver limits.
1785     */
1786    const bool dont_pack_vec3 =
1787       (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1788        prog->TransformFeedback.NumVarying > 0);
1789 
1790    for (unsigned i = 0; i < this->num_matches; i++) {
1791       unsigned *location = &generic_location;
1792       const ir_variable *var;
1793       const glsl_type *type;
1794       bool is_vertex_input = false;
1795 
1796       if (matches[i].consumer_var) {
1797          var = matches[i].consumer_var;
1798          type = get_varying_type(var, consumer_stage);
1799          if (consumer_stage == MESA_SHADER_VERTEX)
1800             is_vertex_input = true;
1801       } else {
1802          var = matches[i].producer_var;
1803          type = get_varying_type(var, producer_stage);
1804       }
1805 
1806       if (var->data.patch)
1807          location = &generic_patch_location;
1808 
1809       /* Advance to the next slot if this varying has a different packing
1810        * class than the previous one, and we're not already on a slot
1811        * boundary.
1812        *
1813        * Also advance to the next slot if packing is disabled. This makes sure
1814        * we don't assign varyings the same locations which is possible
1815        * because we still pack individual arrays, records and matrices even
1816        * when packing is disabled. Note we don't advance to the next slot if
1817        * we can pack varyings together that are only used for transform
1818        * feedback.
1819        */
1820       if (var->data.must_be_shader_input ||
1821           (this->disable_varying_packing &&
1822            !(previous_var_xfb_only && var->data.is_xfb_only)) ||
1823           (previous_packing_class != this->matches[i].packing_class) ||
1824           (this->matches[i].packing_order == PACKING_ORDER_VEC3 &&
1825            dont_pack_vec3)) {
1826          *location = ALIGN(*location, 4);
1827       }
1828 
1829       previous_var_xfb_only = var->data.is_xfb_only;
1830       previous_packing_class = this->matches[i].packing_class;
1831 
1832       /* The number of components taken up by this variable. For vertex shader
1833        * inputs, we use the number of slots * 4, as they have different
1834        * counting rules.
1835        */
1836       unsigned num_components = is_vertex_input ?
1837          type->count_attribute_slots(is_vertex_input) * 4 :
1838          this->matches[i].num_components;
1839 
1840       /* The last slot for this variable, inclusive. */
1841       unsigned slot_end = *location + num_components - 1;
1842 
1843       /* FIXME: We could be smarter in the below code and loop back over
1844        * trying to fill any locations that we skipped because we couldn't pack
1845        * the varying between an explicit location. For now just let the user
1846        * hit the linking error if we run out of room and suggest they use
1847        * explicit locations.
1848        */
1849       while (slot_end < MAX_VARYING * 4u) {
1850          const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
1851          const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
1852 
1853          assert(slots > 0);
1854 
1855          if ((reserved_slots & slot_mask) == 0) {
1856             break;
1857          }
1858 
1859          *location = ALIGN(*location + 1, 4);
1860          slot_end = *location + num_components - 1;
1861       }
1862 
1863       if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
1864          linker_error(prog, "insufficient contiguous locations available for "
1865                       "%s it is possible an array or struct could not be "
1866                       "packed between varyings with explicit locations. Try "
1867                       "using an explicit location for arrays and structs.",
1868                       var->name);
1869       }
1870 
1871       if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
1872          for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
1873             components[j] = 4;
1874          components[slot_end / 4u] = (slot_end & 3) + 1;
1875       }
1876 
1877       this->matches[i].generic_location = *location;
1878 
1879       *location = slot_end + 1;
1880    }
1881 
1882    return (generic_location + 3) / 4;
1883 }
1884 
1885 
1886 /**
1887  * Update the producer and consumer shaders to reflect the locations
1888  * assignments that were made by varying_matches::assign_locations().
1889  */
1890 void
store_locations() const1891 varying_matches::store_locations() const
1892 {
1893    /* Check is location needs to be packed with lower_packed_varyings() or if
1894     * we can just use ARB_enhanced_layouts packing.
1895     */
1896    bool pack_loc[MAX_VARYINGS_INCL_PATCH] = { 0 };
1897    const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
1898 
1899    for (unsigned i = 0; i < this->num_matches; i++) {
1900       ir_variable *producer_var = this->matches[i].producer_var;
1901       ir_variable *consumer_var = this->matches[i].consumer_var;
1902       unsigned generic_location = this->matches[i].generic_location;
1903       unsigned slot = generic_location / 4;
1904       unsigned offset = generic_location % 4;
1905 
1906       if (producer_var) {
1907          producer_var->data.location = VARYING_SLOT_VAR0 + slot;
1908          producer_var->data.location_frac = offset;
1909       }
1910 
1911       if (consumer_var) {
1912          assert(consumer_var->data.location == -1);
1913          consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
1914          consumer_var->data.location_frac = offset;
1915       }
1916 
1917       /* Find locations suitable for native packing via
1918        * ARB_enhanced_layouts.
1919        */
1920       if (producer_var && consumer_var) {
1921          if (enhanced_layouts_enabled) {
1922             const glsl_type *type =
1923                get_varying_type(producer_var, producer_stage);
1924             if (type->is_array() || type->is_matrix() || type->is_record() ||
1925                 type->is_double()) {
1926                unsigned comp_slots = type->component_slots() + offset;
1927                unsigned slots = comp_slots / 4;
1928                if (comp_slots % 4)
1929                   slots += 1;
1930 
1931                for (unsigned j = 0; j < slots; j++) {
1932                   pack_loc[slot + j] = true;
1933                }
1934             } else if (offset + type->vector_elements > 4) {
1935                pack_loc[slot] = true;
1936                pack_loc[slot + 1] = true;
1937             } else {
1938                loc_type[slot][offset] = type;
1939             }
1940          }
1941       }
1942    }
1943 
1944    /* Attempt to use ARB_enhanced_layouts for more efficient packing if
1945     * suitable.
1946     */
1947    if (enhanced_layouts_enabled) {
1948       for (unsigned i = 0; i < this->num_matches; i++) {
1949          ir_variable *producer_var = this->matches[i].producer_var;
1950          ir_variable *consumer_var = this->matches[i].consumer_var;
1951          unsigned generic_location = this->matches[i].generic_location;
1952          unsigned slot = generic_location / 4;
1953 
1954          if (pack_loc[slot] || !producer_var || !consumer_var)
1955             continue;
1956 
1957          const glsl_type *type =
1958             get_varying_type(producer_var, producer_stage);
1959          bool type_match = true;
1960          for (unsigned j = 0; j < 4; j++) {
1961             if (loc_type[slot][j]) {
1962                if (type->base_type != loc_type[slot][j]->base_type)
1963                   type_match = false;
1964             }
1965          }
1966 
1967          if (type_match) {
1968             producer_var->data.explicit_location = 1;
1969             consumer_var->data.explicit_location = 1;
1970             producer_var->data.explicit_component = 1;
1971             consumer_var->data.explicit_component = 1;
1972          }
1973       }
1974    }
1975 }
1976 
1977 
1978 /**
1979  * Compute the "packing class" of the given varying.  This is an unsigned
1980  * integer with the property that two variables in the same packing class can
1981  * be safely backed into the same vec4.
1982  */
1983 unsigned
compute_packing_class(const ir_variable * var)1984 varying_matches::compute_packing_class(const ir_variable *var)
1985 {
1986    /* Without help from the back-end, there is no way to pack together
1987     * variables with different interpolation types, because
1988     * lower_packed_varyings must choose exactly one interpolation type for
1989     * each packed varying it creates.
1990     *
1991     * However, we can safely pack together floats, ints, and uints, because:
1992     *
1993     * - varyings of base type "int" and "uint" must use the "flat"
1994     *   interpolation type, which can only occur in GLSL 1.30 and above.
1995     *
1996     * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
1997     *   can store flat floats as ints without losing any information (using
1998     *   the ir_unop_bitcast_* opcodes).
1999     *
2000     * Therefore, the packing class depends only on the interpolation type.
2001     */
2002    const unsigned interp = var->is_interpolation_flat()
2003       ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation;
2004 
2005    assert(interp < (1 << 3));
2006 
2007    const unsigned packing_class = (interp << 0) |
2008                                   (var->data.centroid << 3) |
2009                                   (var->data.sample << 4) |
2010                                   (var->data.patch << 5) |
2011                                   (var->data.must_be_shader_input << 6);
2012 
2013    return packing_class;
2014 }
2015 
2016 
2017 /**
2018  * Compute the "packing order" of the given varying.  This is a sort key we
2019  * use to determine when to attempt to pack the given varying relative to
2020  * other varyings in the same packing class.
2021  */
2022 varying_matches::packing_order_enum
compute_packing_order(const ir_variable * var)2023 varying_matches::compute_packing_order(const ir_variable *var)
2024 {
2025    const glsl_type *element_type = var->type;
2026 
2027    while (element_type->is_array()) {
2028       element_type = element_type->fields.array;
2029    }
2030 
2031    switch (element_type->component_slots() % 4) {
2032    case 1: return PACKING_ORDER_SCALAR;
2033    case 2: return PACKING_ORDER_VEC2;
2034    case 3: return PACKING_ORDER_VEC3;
2035    case 0: return PACKING_ORDER_VEC4;
2036    default:
2037       assert(!"Unexpected value of vector_elements");
2038       return PACKING_ORDER_VEC4;
2039    }
2040 }
2041 
2042 
2043 /**
2044  * Comparison function passed to qsort() to sort varyings by packing_class and
2045  * then by packing_order.
2046  */
2047 int
match_comparator(const void * x_generic,const void * y_generic)2048 varying_matches::match_comparator(const void *x_generic, const void *y_generic)
2049 {
2050    const match *x = (const match *) x_generic;
2051    const match *y = (const match *) y_generic;
2052 
2053    if (x->packing_class != y->packing_class)
2054       return x->packing_class - y->packing_class;
2055    return x->packing_order - y->packing_order;
2056 }
2057 
2058 
2059 /**
2060  * Comparison function passed to qsort() to sort varyings used only by
2061  * transform feedback when packing of other varyings is disabled.
2062  */
2063 int
xfb_comparator(const void * x_generic,const void * y_generic)2064 varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
2065 {
2066    const match *x = (const match *) x_generic;
2067 
2068    if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
2069       return match_comparator(x_generic, y_generic);
2070 
2071    /* FIXME: When the comparator returns 0 it means the elements being
2072     * compared are equivalent. However the qsort documentation says:
2073     *
2074     *    "The order of equivalent elements is undefined."
2075     *
2076     * In practice the sort ends up reversing the order of the varyings which
2077     * means locations are also assigned in this reversed order and happens to
2078     * be what we want. This is also whats happening in
2079     * varying_matches::match_comparator().
2080     */
2081    return 0;
2082 }
2083 
2084 
2085 /**
2086  * Is the given variable a varying variable to be counted against the
2087  * limit in ctx->Const.MaxVarying?
2088  * This includes variables such as texcoords, colors and generic
2089  * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
2090  */
2091 static bool
var_counts_against_varying_limit(gl_shader_stage stage,const ir_variable * var)2092 var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var)
2093 {
2094    /* Only fragment shaders will take a varying variable as an input */
2095    if (stage == MESA_SHADER_FRAGMENT &&
2096        var->data.mode == ir_var_shader_in) {
2097       switch (var->data.location) {
2098       case VARYING_SLOT_POS:
2099       case VARYING_SLOT_FACE:
2100       case VARYING_SLOT_PNTC:
2101          return false;
2102       default:
2103          return true;
2104       }
2105    }
2106    return false;
2107 }
2108 
2109 
2110 /**
2111  * Visitor class that generates tfeedback_candidate structs describing all
2112  * possible targets of transform feedback.
2113  *
2114  * tfeedback_candidate structs are stored in the hash table
2115  * tfeedback_candidates, which is passed to the constructor.  This hash table
2116  * maps varying names to instances of the tfeedback_candidate struct.
2117  */
2118 class tfeedback_candidate_generator : public program_resource_visitor
2119 {
2120 public:
tfeedback_candidate_generator(void * mem_ctx,hash_table * tfeedback_candidates)2121    tfeedback_candidate_generator(void *mem_ctx,
2122                                  hash_table *tfeedback_candidates)
2123       : mem_ctx(mem_ctx),
2124         tfeedback_candidates(tfeedback_candidates),
2125         toplevel_var(NULL),
2126         varying_floats(0)
2127    {
2128    }
2129 
process(ir_variable * var)2130    void process(ir_variable *var)
2131    {
2132       /* All named varying interface blocks should be flattened by now */
2133       assert(!var->is_interface_instance());
2134 
2135       this->toplevel_var = var;
2136       this->varying_floats = 0;
2137       program_resource_visitor::process(var, false);
2138    }
2139 
2140 private:
visit_field(const glsl_type * type,const char * name,bool,const glsl_type *,const enum glsl_interface_packing,bool)2141    virtual void visit_field(const glsl_type *type, const char *name,
2142                             bool /* row_major */,
2143                             const glsl_type * /* record_type */,
2144                             const enum glsl_interface_packing,
2145                             bool /* last_field */)
2146    {
2147       assert(!type->without_array()->is_record());
2148       assert(!type->without_array()->is_interface());
2149 
2150       tfeedback_candidate *candidate
2151          = rzalloc(this->mem_ctx, tfeedback_candidate);
2152       candidate->toplevel_var = this->toplevel_var;
2153       candidate->type = type;
2154       candidate->offset = this->varying_floats;
2155       _mesa_hash_table_insert(this->tfeedback_candidates,
2156                               ralloc_strdup(this->mem_ctx, name),
2157                               candidate);
2158       this->varying_floats += type->component_slots();
2159    }
2160 
2161    /**
2162     * Memory context used to allocate hash table keys and values.
2163     */
2164    void * const mem_ctx;
2165 
2166    /**
2167     * Hash table in which tfeedback_candidate objects should be stored.
2168     */
2169    hash_table * const tfeedback_candidates;
2170 
2171    /**
2172     * Pointer to the toplevel variable that is being traversed.
2173     */
2174    ir_variable *toplevel_var;
2175 
2176    /**
2177     * Total number of varying floats that have been visited so far.  This is
2178     * used to determine the offset to each varying within the toplevel
2179     * variable.
2180     */
2181    unsigned varying_floats;
2182 };
2183 
2184 
2185 namespace linker {
2186 
2187 void
populate_consumer_input_sets(void * mem_ctx,exec_list * ir,hash_table * consumer_inputs,hash_table * consumer_interface_inputs,ir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])2188 populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
2189                              hash_table *consumer_inputs,
2190                              hash_table *consumer_interface_inputs,
2191                              ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
2192 {
2193    memset(consumer_inputs_with_locations,
2194           0,
2195           sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
2196 
2197    foreach_in_list(ir_instruction, node, ir) {
2198       ir_variable *const input_var = node->as_variable();
2199 
2200       if (input_var != NULL && input_var->data.mode == ir_var_shader_in) {
2201          /* All interface blocks should have been lowered by this point */
2202          assert(!input_var->type->is_interface());
2203 
2204          if (input_var->data.explicit_location) {
2205             /* assign_varying_locations only cares about finding the
2206              * ir_variable at the start of a contiguous location block.
2207              *
2208              *     - For !producer, consumer_inputs_with_locations isn't used.
2209              *
2210              *     - For !consumer, consumer_inputs_with_locations is empty.
2211              *
2212              * For consumer && producer, if you were trying to set some
2213              * ir_variable to the middle of a location block on the other side
2214              * of producer/consumer, cross_validate_outputs_to_inputs() should
2215              * be link-erroring due to either type mismatch or location
2216              * overlaps.  If the variables do match up, then they've got a
2217              * matching data.location and you only looked at
2218              * consumer_inputs_with_locations[var->data.location], not any
2219              * following entries for the array/structure.
2220              */
2221             consumer_inputs_with_locations[input_var->data.location] =
2222                input_var;
2223          } else if (input_var->get_interface_type() != NULL) {
2224             char *const iface_field_name =
2225                ralloc_asprintf(mem_ctx, "%s.%s",
2226                   input_var->get_interface_type()->without_array()->name,
2227                   input_var->name);
2228             _mesa_hash_table_insert(consumer_interface_inputs,
2229                                     iface_field_name, input_var);
2230          } else {
2231             _mesa_hash_table_insert(consumer_inputs,
2232                                     ralloc_strdup(mem_ctx, input_var->name),
2233                                     input_var);
2234          }
2235       }
2236    }
2237 }
2238 
2239 /**
2240  * Find a variable from the consumer that "matches" the specified variable
2241  *
2242  * This function only finds inputs with names that match.  There is no
2243  * validation (here) that the types, etc. are compatible.
2244  */
2245 ir_variable *
get_matching_input(void * mem_ctx,const ir_variable * output_var,hash_table * consumer_inputs,hash_table * consumer_interface_inputs,ir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])2246 get_matching_input(void *mem_ctx,
2247                    const ir_variable *output_var,
2248                    hash_table *consumer_inputs,
2249                    hash_table *consumer_interface_inputs,
2250                    ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
2251 {
2252    ir_variable *input_var;
2253 
2254    if (output_var->data.explicit_location) {
2255       input_var = consumer_inputs_with_locations[output_var->data.location];
2256    } else if (output_var->get_interface_type() != NULL) {
2257       char *const iface_field_name =
2258          ralloc_asprintf(mem_ctx, "%s.%s",
2259             output_var->get_interface_type()->without_array()->name,
2260             output_var->name);
2261       hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
2262       input_var = entry ? (ir_variable *) entry->data : NULL;
2263    } else {
2264       hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name);
2265       input_var = entry ? (ir_variable *) entry->data : NULL;
2266    }
2267 
2268    return (input_var == NULL || input_var->data.mode != ir_var_shader_in)
2269       ? NULL : input_var;
2270 }
2271 
2272 }
2273 
2274 static int
io_variable_cmp(const void * _a,const void * _b)2275 io_variable_cmp(const void *_a, const void *_b)
2276 {
2277    const ir_variable *const a = *(const ir_variable **) _a;
2278    const ir_variable *const b = *(const ir_variable **) _b;
2279 
2280    if (a->data.explicit_location && b->data.explicit_location)
2281       return b->data.location - a->data.location;
2282 
2283    if (a->data.explicit_location && !b->data.explicit_location)
2284       return 1;
2285 
2286    if (!a->data.explicit_location && b->data.explicit_location)
2287       return -1;
2288 
2289    return -strcmp(a->name, b->name);
2290 }
2291 
2292 /**
2293  * Sort the shader IO variables into canonical order
2294  */
2295 static void
canonicalize_shader_io(exec_list * ir,enum ir_variable_mode io_mode)2296 canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
2297 {
2298    ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
2299    unsigned num_variables = 0;
2300 
2301    foreach_in_list(ir_instruction, node, ir) {
2302       ir_variable *const var = node->as_variable();
2303 
2304       if (var == NULL || var->data.mode != io_mode)
2305          continue;
2306 
2307       /* If we have already encountered more I/O variables that could
2308        * successfully link, bail.
2309        */
2310       if (num_variables == ARRAY_SIZE(var_table))
2311          return;
2312 
2313       var_table[num_variables++] = var;
2314    }
2315 
2316    if (num_variables == 0)
2317       return;
2318 
2319    /* Sort the list in reverse order (io_variable_cmp handles this).  Later
2320     * we're going to push the variables on to the IR list as a stack, so we
2321     * want the last variable (in canonical order) to be first in the list.
2322     */
2323    qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
2324 
2325    /* Remove the variable from it's current location in the IR, and put it at
2326     * the front.
2327     */
2328    for (unsigned i = 0; i < num_variables; i++) {
2329       var_table[i]->remove();
2330       ir->push_head(var_table[i]);
2331    }
2332 }
2333 
2334 /**
2335  * Generate a bitfield map of the explicit locations for shader varyings.
2336  *
2337  * Note: For Tessellation shaders we are sitting right on the limits of the
2338  * 64 bit map. Per-vertex and per-patch both have separate location domains
2339  * with a max of MAX_VARYING.
2340  */
2341 static uint64_t
reserved_varying_slot(struct gl_linked_shader * stage,ir_variable_mode io_mode)2342 reserved_varying_slot(struct gl_linked_shader *stage,
2343                       ir_variable_mode io_mode)
2344 {
2345    assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
2346    /* Avoid an overflow of the returned value */
2347    assert(MAX_VARYINGS_INCL_PATCH <= 64);
2348 
2349    uint64_t slots = 0;
2350    int var_slot;
2351 
2352    if (!stage)
2353       return slots;
2354 
2355    foreach_in_list(ir_instruction, node, stage->ir) {
2356       ir_variable *const var = node->as_variable();
2357 
2358       if (var == NULL || var->data.mode != io_mode ||
2359           !var->data.explicit_location ||
2360           var->data.location < VARYING_SLOT_VAR0)
2361          continue;
2362 
2363       var_slot = var->data.location - VARYING_SLOT_VAR0;
2364 
2365       unsigned num_elements = get_varying_type(var, stage->Stage)
2366          ->count_attribute_slots(io_mode == ir_var_shader_in &&
2367                                  stage->Stage == MESA_SHADER_VERTEX);
2368       for (unsigned i = 0; i < num_elements; i++) {
2369          if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
2370             slots |= UINT64_C(1) << var_slot;
2371          var_slot += 1;
2372       }
2373    }
2374 
2375    return slots;
2376 }
2377 
2378 
2379 /**
2380  * Assign locations for all variables that are produced in one pipeline stage
2381  * (the "producer") and consumed in the next stage (the "consumer").
2382  *
2383  * Variables produced by the producer may also be consumed by transform
2384  * feedback.
2385  *
2386  * \param num_tfeedback_decls is the number of declarations indicating
2387  *        variables that may be consumed by transform feedback.
2388  *
2389  * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects
2390  *        representing the result of parsing the strings passed to
2391  *        glTransformFeedbackVaryings().  assign_location() will be called for
2392  *        each of these objects that matches one of the outputs of the
2393  *        producer.
2394  *
2395  * When num_tfeedback_decls is nonzero, it is permissible for the consumer to
2396  * be NULL.  In this case, varying locations are assigned solely based on the
2397  * requirements of transform feedback.
2398  */
2399 static bool
assign_varying_locations(struct gl_context * ctx,void * mem_ctx,struct gl_shader_program * prog,gl_linked_shader * producer,gl_linked_shader * consumer,unsigned num_tfeedback_decls,tfeedback_decl * tfeedback_decls,const uint64_t reserved_slots)2400 assign_varying_locations(struct gl_context *ctx,
2401                          void *mem_ctx,
2402                          struct gl_shader_program *prog,
2403                          gl_linked_shader *producer,
2404                          gl_linked_shader *consumer,
2405                          unsigned num_tfeedback_decls,
2406                          tfeedback_decl *tfeedback_decls,
2407                          const uint64_t reserved_slots)
2408 {
2409    /* Tessellation shaders treat inputs and outputs as shared memory and can
2410     * access inputs and outputs of other invocations.
2411     * Therefore, they can't be lowered to temps easily (and definitely not
2412     * efficiently).
2413     */
2414    bool unpackable_tess =
2415       (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
2416       (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
2417       (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
2418 
2419    /* Transform feedback code assumes varying arrays are packed, so if the
2420     * driver has disabled varying packing, make sure to at least enable
2421     * packing required by transform feedback.
2422     */
2423    bool xfb_enabled =
2424       ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
2425 
2426    /* Disable packing on outward facing interfaces for SSO because in ES we
2427     * need to retain the unpacked varying information for draw time
2428     * validation.
2429     *
2430     * Packing is still enabled on individual arrays, structs, and matrices as
2431     * these are required by the transform feedback code and it is still safe
2432     * to do so. We also enable packing when a varying is only used for
2433     * transform feedback and its not a SSO.
2434     */
2435    bool disable_varying_packing =
2436       ctx->Const.DisableVaryingPacking || unpackable_tess;
2437    if (prog->SeparateShader && (producer == NULL || consumer == NULL))
2438       disable_varying_packing = true;
2439 
2440    varying_matches matches(disable_varying_packing, xfb_enabled,
2441                            ctx->Extensions.ARB_enhanced_layouts,
2442                            producer ? producer->Stage : MESA_SHADER_NONE,
2443                            consumer ? consumer->Stage : MESA_SHADER_NONE);
2444    hash_table *tfeedback_candidates =
2445          _mesa_hash_table_create(NULL, _mesa_key_hash_string,
2446                                  _mesa_key_string_equal);
2447    hash_table *consumer_inputs =
2448          _mesa_hash_table_create(NULL, _mesa_key_hash_string,
2449                                  _mesa_key_string_equal);
2450    hash_table *consumer_interface_inputs =
2451          _mesa_hash_table_create(NULL, _mesa_key_hash_string,
2452                                  _mesa_key_string_equal);
2453    ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
2454       NULL,
2455    };
2456 
2457    unsigned consumer_vertices = 0;
2458    if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
2459       consumer_vertices = prog->Geom.VerticesIn;
2460 
2461    /* Operate in a total of four passes.
2462     *
2463     * 1. Sort inputs / outputs into a canonical order.  This is necessary so
2464     *    that inputs / outputs of separable shaders will be assigned
2465     *    predictable locations regardless of the order in which declarations
2466     *    appeared in the shader source.
2467     *
2468     * 2. Assign locations for any matching inputs and outputs.
2469     *
2470     * 3. Mark output variables in the producer that do not have locations as
2471     *    not being outputs.  This lets the optimizer eliminate them.
2472     *
2473     * 4. Mark input variables in the consumer that do not have locations as
2474     *    not being inputs.  This lets the optimizer eliminate them.
2475     */
2476    if (consumer)
2477       canonicalize_shader_io(consumer->ir, ir_var_shader_in);
2478 
2479    if (producer)
2480       canonicalize_shader_io(producer->ir, ir_var_shader_out);
2481 
2482    if (consumer)
2483       linker::populate_consumer_input_sets(mem_ctx, consumer->ir,
2484                                            consumer_inputs,
2485                                            consumer_interface_inputs,
2486                                            consumer_inputs_with_locations);
2487 
2488    if (producer) {
2489       foreach_in_list(ir_instruction, node, producer->ir) {
2490          ir_variable *const output_var = node->as_variable();
2491 
2492          if (output_var == NULL || output_var->data.mode != ir_var_shader_out)
2493             continue;
2494 
2495          /* Only geometry shaders can use non-zero streams */
2496          assert(output_var->data.stream == 0 ||
2497                 (output_var->data.stream < MAX_VERTEX_STREAMS &&
2498                  producer->Stage == MESA_SHADER_GEOMETRY));
2499 
2500          if (num_tfeedback_decls > 0) {
2501             tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates);
2502             g.process(output_var);
2503          }
2504 
2505          ir_variable *const input_var =
2506             linker::get_matching_input(mem_ctx, output_var, consumer_inputs,
2507                                        consumer_interface_inputs,
2508                                        consumer_inputs_with_locations);
2509 
2510          /* If a matching input variable was found, add this output (and the
2511           * input) to the set.  If this is a separable program and there is no
2512           * consumer stage, add the output.
2513           *
2514           * Always add TCS outputs. They are shared by all invocations
2515           * within a patch and can be used as shared memory.
2516           */
2517          if (input_var || (prog->SeparateShader && consumer == NULL) ||
2518              producer->Stage == MESA_SHADER_TESS_CTRL) {
2519             matches.record(output_var, input_var);
2520          }
2521 
2522          /* Only stream 0 outputs can be consumed in the next stage */
2523          if (input_var && output_var->data.stream != 0) {
2524             linker_error(prog, "output %s is assigned to stream=%d but "
2525                          "is linked to an input, which requires stream=0",
2526                          output_var->name, output_var->data.stream);
2527             return false;
2528          }
2529       }
2530    } else {
2531       /* If there's no producer stage, then this must be a separable program.
2532        * For example, we may have a program that has just a fragment shader.
2533        * Later this program will be used with some arbitrary vertex (or
2534        * geometry) shader program.  This means that locations must be assigned
2535        * for all the inputs.
2536        */
2537       foreach_in_list(ir_instruction, node, consumer->ir) {
2538          ir_variable *const input_var = node->as_variable();
2539          if (input_var && input_var->data.mode == ir_var_shader_in) {
2540             matches.record(NULL, input_var);
2541          }
2542       }
2543    }
2544 
2545    for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
2546       if (!tfeedback_decls[i].is_varying())
2547          continue;
2548 
2549       const tfeedback_candidate *matched_candidate
2550          = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates);
2551 
2552       if (matched_candidate == NULL) {
2553          _mesa_hash_table_destroy(tfeedback_candidates, NULL);
2554          return false;
2555       }
2556 
2557       /* Mark xfb varyings as always active */
2558       matched_candidate->toplevel_var->data.always_active_io = 1;
2559 
2560       /* Mark any corresponding inputs as always active also. We must do this
2561        * because we have a NIR pass that lowers vectors to scalars and another
2562        * that removes unused varyings.
2563        * We don't split varyings marked as always active because there is no
2564        * point in doing so. This means we need to mark both sides of the
2565        * interface as always active otherwise we will have a mismatch and
2566        * start removing things we shouldn't.
2567        */
2568       ir_variable *const input_var =
2569          linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var,
2570                                     consumer_inputs,
2571                                     consumer_interface_inputs,
2572                                     consumer_inputs_with_locations);
2573       if (input_var)
2574          input_var->data.always_active_io = 1;
2575 
2576       if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
2577          matched_candidate->toplevel_var->data.is_xfb_only = 1;
2578          matches.record(matched_candidate->toplevel_var, NULL);
2579       }
2580    }
2581 
2582    _mesa_hash_table_destroy(consumer_inputs, NULL);
2583    _mesa_hash_table_destroy(consumer_interface_inputs, NULL);
2584 
2585    uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
2586    const unsigned slots_used = matches.assign_locations(
2587          prog, components, reserved_slots);
2588    matches.store_locations();
2589 
2590    for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
2591       if (tfeedback_decls[i].is_varying()) {
2592          if (!tfeedback_decls[i].assign_location(ctx, prog)) {
2593             _mesa_hash_table_destroy(tfeedback_candidates, NULL);
2594             return false;
2595          }
2596       }
2597    }
2598    _mesa_hash_table_destroy(tfeedback_candidates, NULL);
2599 
2600    if (consumer && producer) {
2601       foreach_in_list(ir_instruction, node, consumer->ir) {
2602          ir_variable *const var = node->as_variable();
2603 
2604          if (var && var->data.mode == ir_var_shader_in &&
2605              var->data.is_unmatched_generic_inout) {
2606             if (!prog->IsES && prog->data->Version <= 120) {
2607                /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
2608                 *
2609                 *     Only those varying variables used (i.e. read) in
2610                 *     the fragment shader executable must be written to
2611                 *     by the vertex shader executable; declaring
2612                 *     superfluous varying variables in a vertex shader is
2613                 *     permissible.
2614                 *
2615                 * We interpret this text as meaning that the VS must
2616                 * write the variable for the FS to read it.  See
2617                 * "glsl1-varying read but not written" in piglit.
2618                 */
2619                linker_error(prog, "%s shader varying %s not written "
2620                             "by %s shader\n.",
2621                             _mesa_shader_stage_to_string(consumer->Stage),
2622                             var->name,
2623                             _mesa_shader_stage_to_string(producer->Stage));
2624             } else {
2625                linker_warning(prog, "%s shader varying %s not written "
2626                               "by %s shader\n.",
2627                               _mesa_shader_stage_to_string(consumer->Stage),
2628                               var->name,
2629                               _mesa_shader_stage_to_string(producer->Stage));
2630             }
2631          }
2632       }
2633 
2634       /* Now that validation is done its safe to remove unused varyings. As
2635        * we have both a producer and consumer its safe to remove unused
2636        * varyings even if the program is a SSO because the stages are being
2637        * linked together i.e. we have a multi-stage SSO.
2638        */
2639       remove_unused_shader_inputs_and_outputs(false, producer,
2640                                               ir_var_shader_out);
2641       remove_unused_shader_inputs_and_outputs(false, consumer,
2642                                               ir_var_shader_in);
2643    }
2644 
2645    if (producer) {
2646       lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
2647                             0, producer, disable_varying_packing,
2648                             xfb_enabled);
2649    }
2650 
2651    if (consumer) {
2652       lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
2653                             consumer_vertices, consumer,
2654                             disable_varying_packing, xfb_enabled);
2655    }
2656 
2657    return true;
2658 }
2659 
2660 static bool
check_against_output_limit(struct gl_context * ctx,struct gl_shader_program * prog,gl_linked_shader * producer,unsigned num_explicit_locations)2661 check_against_output_limit(struct gl_context *ctx,
2662                            struct gl_shader_program *prog,
2663                            gl_linked_shader *producer,
2664                            unsigned num_explicit_locations)
2665 {
2666    unsigned output_vectors = num_explicit_locations;
2667 
2668    foreach_in_list(ir_instruction, node, producer->ir) {
2669       ir_variable *const var = node->as_variable();
2670 
2671       if (var && !var->data.explicit_location &&
2672           var->data.mode == ir_var_shader_out &&
2673           var_counts_against_varying_limit(producer->Stage, var)) {
2674          /* outputs for fragment shader can't be doubles */
2675          output_vectors += var->type->count_attribute_slots(false);
2676       }
2677    }
2678 
2679    assert(producer->Stage != MESA_SHADER_FRAGMENT);
2680    unsigned max_output_components =
2681       ctx->Const.Program[producer->Stage].MaxOutputComponents;
2682 
2683    const unsigned output_components = output_vectors * 4;
2684    if (output_components > max_output_components) {
2685       if (ctx->API == API_OPENGLES2 || prog->IsES)
2686          linker_error(prog, "%s shader uses too many output vectors "
2687                       "(%u > %u)\n",
2688                       _mesa_shader_stage_to_string(producer->Stage),
2689                       output_vectors,
2690                       max_output_components / 4);
2691       else
2692          linker_error(prog, "%s shader uses too many output components "
2693                       "(%u > %u)\n",
2694                       _mesa_shader_stage_to_string(producer->Stage),
2695                       output_components,
2696                       max_output_components);
2697 
2698       return false;
2699    }
2700 
2701    return true;
2702 }
2703 
2704 static bool
check_against_input_limit(struct gl_context * ctx,struct gl_shader_program * prog,gl_linked_shader * consumer,unsigned num_explicit_locations)2705 check_against_input_limit(struct gl_context *ctx,
2706                           struct gl_shader_program *prog,
2707                           gl_linked_shader *consumer,
2708                           unsigned num_explicit_locations)
2709 {
2710    unsigned input_vectors = num_explicit_locations;
2711 
2712    foreach_in_list(ir_instruction, node, consumer->ir) {
2713       ir_variable *const var = node->as_variable();
2714 
2715       if (var && !var->data.explicit_location &&
2716           var->data.mode == ir_var_shader_in &&
2717           var_counts_against_varying_limit(consumer->Stage, var)) {
2718          /* vertex inputs aren't varying counted */
2719          input_vectors += var->type->count_attribute_slots(false);
2720       }
2721    }
2722 
2723    assert(consumer->Stage != MESA_SHADER_VERTEX);
2724    unsigned max_input_components =
2725       ctx->Const.Program[consumer->Stage].MaxInputComponents;
2726 
2727    const unsigned input_components = input_vectors * 4;
2728    if (input_components > max_input_components) {
2729       if (ctx->API == API_OPENGLES2 || prog->IsES)
2730          linker_error(prog, "%s shader uses too many input vectors "
2731                       "(%u > %u)\n",
2732                       _mesa_shader_stage_to_string(consumer->Stage),
2733                       input_vectors,
2734                       max_input_components / 4);
2735       else
2736          linker_error(prog, "%s shader uses too many input components "
2737                       "(%u > %u)\n",
2738                       _mesa_shader_stage_to_string(consumer->Stage),
2739                       input_components,
2740                       max_input_components);
2741 
2742       return false;
2743    }
2744 
2745    return true;
2746 }
2747 
2748 bool
link_varyings(struct gl_shader_program * prog,unsigned first,unsigned last,struct gl_context * ctx,void * mem_ctx)2749 link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last,
2750               struct gl_context *ctx, void *mem_ctx)
2751 {
2752    bool has_xfb_qualifiers = false;
2753    unsigned num_tfeedback_decls = 0;
2754    char **varying_names = NULL;
2755    tfeedback_decl *tfeedback_decls = NULL;
2756 
2757    /* From the ARB_enhanced_layouts spec:
2758     *
2759     *    "If the shader used to record output variables for transform feedback
2760     *    varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
2761     *    qualifiers, the values specified by TransformFeedbackVaryings are
2762     *    ignored, and the set of variables captured for transform feedback is
2763     *    instead derived from the specified layout qualifiers."
2764     */
2765    for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
2766       /* Find last stage before fragment shader */
2767       if (prog->_LinkedShaders[i]) {
2768          has_xfb_qualifiers =
2769             process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
2770                                           prog, &num_tfeedback_decls,
2771                                           &varying_names);
2772          break;
2773       }
2774    }
2775 
2776    if (!has_xfb_qualifiers) {
2777       num_tfeedback_decls = prog->TransformFeedback.NumVarying;
2778       varying_names = prog->TransformFeedback.VaryingNames;
2779    }
2780 
2781    if (num_tfeedback_decls != 0) {
2782       /* From GL_EXT_transform_feedback:
2783        *   A program will fail to link if:
2784        *
2785        *   * the <count> specified by TransformFeedbackVaryingsEXT is
2786        *     non-zero, but the program object has no vertex or geometry
2787        *     shader;
2788        */
2789       if (first >= MESA_SHADER_FRAGMENT) {
2790          linker_error(prog, "Transform feedback varyings specified, but "
2791                       "no vertex, tessellation, or geometry shader is "
2792                       "present.\n");
2793          return false;
2794       }
2795 
2796       tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl,
2797                                       num_tfeedback_decls);
2798       if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls,
2799                                  varying_names, tfeedback_decls))
2800          return false;
2801    }
2802 
2803    /* If there is no fragment shader we need to set transform feedback.
2804     *
2805     * For SSO we also need to assign output locations.  We assign them here
2806     * because we need to do it for both single stage programs and multi stage
2807     * programs.
2808     */
2809    if (last < MESA_SHADER_FRAGMENT &&
2810        (num_tfeedback_decls != 0 || prog->SeparateShader)) {
2811       const uint64_t reserved_out_slots =
2812          reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out);
2813       if (!assign_varying_locations(ctx, mem_ctx, prog,
2814                                     prog->_LinkedShaders[last], NULL,
2815                                     num_tfeedback_decls, tfeedback_decls,
2816                                     reserved_out_slots))
2817          return false;
2818    }
2819 
2820    if (last <= MESA_SHADER_FRAGMENT) {
2821       /* Remove unused varyings from the first/last stage unless SSO */
2822       remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
2823                                               prog->_LinkedShaders[first],
2824                                               ir_var_shader_in);
2825       remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
2826                                               prog->_LinkedShaders[last],
2827                                               ir_var_shader_out);
2828 
2829       /* If the program is made up of only a single stage */
2830       if (first == last) {
2831          gl_linked_shader *const sh = prog->_LinkedShaders[last];
2832 
2833          do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL);
2834          do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls,
2835                                   tfeedback_decls);
2836 
2837          if (prog->SeparateShader) {
2838             const uint64_t reserved_slots =
2839                reserved_varying_slot(sh, ir_var_shader_in);
2840 
2841             /* Assign input locations for SSO, output locations are already
2842              * assigned.
2843              */
2844             if (!assign_varying_locations(ctx, mem_ctx, prog,
2845                                           NULL /* producer */,
2846                                           sh /* consumer */,
2847                                           0 /* num_tfeedback_decls */,
2848                                           NULL /* tfeedback_decls */,
2849                                           reserved_slots))
2850                return false;
2851          }
2852       } else {
2853          /* Linking the stages in the opposite order (from fragment to vertex)
2854           * ensures that inter-shader outputs written to in an earlier stage
2855           * are eliminated if they are (transitively) not used in a later
2856           * stage.
2857           */
2858          int next = last;
2859          for (int i = next - 1; i >= 0; i--) {
2860             if (prog->_LinkedShaders[i] == NULL && i != 0)
2861                continue;
2862 
2863             gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
2864             gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
2865 
2866             const uint64_t reserved_out_slots =
2867                reserved_varying_slot(sh_i, ir_var_shader_out);
2868             const uint64_t reserved_in_slots =
2869                reserved_varying_slot(sh_next, ir_var_shader_in);
2870 
2871             do_dead_builtin_varyings(ctx, sh_i, sh_next,
2872                       next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
2873                       tfeedback_decls);
2874 
2875             if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
2876                       next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
2877                       tfeedback_decls,
2878                       reserved_out_slots | reserved_in_slots))
2879                return false;
2880 
2881             /* This must be done after all dead varyings are eliminated. */
2882             if (sh_i != NULL) {
2883                unsigned slots_used = _mesa_bitcount_64(reserved_out_slots);
2884                if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) {
2885                   return false;
2886                }
2887             }
2888 
2889             unsigned slots_used = _mesa_bitcount_64(reserved_in_slots);
2890             if (!check_against_input_limit(ctx, prog, sh_next, slots_used))
2891                return false;
2892 
2893             next = i;
2894          }
2895       }
2896    }
2897 
2898    if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls,
2899                              has_xfb_qualifiers))
2900       return false;
2901 
2902    return true;
2903 }
2904