1 /*
2  * Copyright (c) 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file lower_buffer_access.cpp
26  *
27  * Helper for IR lowering pass to replace dereferences of buffer object based
28  * shader variables with intrinsic function calls.
29  *
30  * This helper is used by lowering passes for UBOs, SSBOs and compute shader
31  * shared variables.
32  */
33 
34 #include "lower_buffer_access.h"
35 #include "ir_builder.h"
36 #include "main/macros.h"
37 #include "util/list.h"
38 #include "glsl_parser_extras.h"
39 #include "linker.h"
40 
41 using namespace ir_builder;
42 
43 namespace lower_buffer_access {
44 
45 static inline int
writemask_for_size(unsigned n)46 writemask_for_size(unsigned n)
47 {
48    return ((1 << n) - 1);
49 }
50 
51 /**
52  * Takes a deref and recursively calls itself to break the deref down to the
53  * point that the reads or writes generated are contiguous scalars or vectors.
54  */
55 void
emit_access(void * mem_ctx,bool is_write,ir_dereference * deref,ir_variable * base_offset,unsigned int deref_offset,bool row_major,const glsl_type * matrix_type,enum glsl_interface_packing packing,unsigned int write_mask)56 lower_buffer_access::emit_access(void *mem_ctx,
57                                  bool is_write,
58                                  ir_dereference *deref,
59                                  ir_variable *base_offset,
60                                  unsigned int deref_offset,
61                                  bool row_major,
62                                  const glsl_type *matrix_type,
63                                  enum glsl_interface_packing packing,
64                                  unsigned int write_mask)
65 {
66    if (deref->type->is_record()) {
67       unsigned int field_offset = 0;
68 
69       for (unsigned i = 0; i < deref->type->length; i++) {
70          const struct glsl_struct_field *field =
71             &deref->type->fields.structure[i];
72          ir_dereference *field_deref =
73             new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
74                                                field->name);
75 
76          unsigned field_align;
77          if (packing == GLSL_INTERFACE_PACKING_STD430)
78             field_align = field->type->std430_base_alignment(row_major);
79          else
80             field_align = field->type->std140_base_alignment(row_major);
81          field_offset = glsl_align(field_offset, field_align);
82 
83          emit_access(mem_ctx, is_write, field_deref, base_offset,
84                      deref_offset + field_offset,
85                      row_major, NULL, packing,
86                      writemask_for_size(field_deref->type->vector_elements));
87 
88          if (packing == GLSL_INTERFACE_PACKING_STD430)
89             field_offset += field->type->std430_size(row_major);
90          else
91             field_offset += field->type->std140_size(row_major);
92       }
93       return;
94    }
95 
96    if (deref->type->is_array()) {
97       unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ?
98          deref->type->fields.array->std430_array_stride(row_major) :
99          glsl_align(deref->type->fields.array->std140_size(row_major), 16);
100 
101       for (unsigned i = 0; i < deref->type->length; i++) {
102          ir_constant *element = new(mem_ctx) ir_constant(i);
103          ir_dereference *element_deref =
104             new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL),
105                                               element);
106          emit_access(mem_ctx, is_write, element_deref, base_offset,
107                      deref_offset + i * array_stride,
108                      row_major, NULL, packing,
109                      writemask_for_size(element_deref->type->vector_elements));
110       }
111       return;
112    }
113 
114    if (deref->type->is_matrix()) {
115       for (unsigned i = 0; i < deref->type->matrix_columns; i++) {
116          ir_constant *col = new(mem_ctx) ir_constant(i);
117          ir_dereference *col_deref =
118             new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col);
119 
120          /* For a row-major matrix, the next column starts at the next
121           * element.  Otherwise it is offset by the matrix stride.
122           */
123          const unsigned size_mul = row_major
124             ? (deref->type->is_double() ? 8 : 4)
125             : link_calculate_matrix_stride(deref->type, row_major, packing);
126 
127          emit_access(mem_ctx, is_write, col_deref, base_offset,
128                      deref_offset + i * size_mul,
129                      row_major, deref->type, packing,
130                      writemask_for_size(col_deref->type->vector_elements));
131       }
132       return;
133    }
134 
135    assert(deref->type->is_scalar() || deref->type->is_vector());
136 
137    if (!row_major) {
138       ir_rvalue *offset =
139          add(base_offset, new(mem_ctx) ir_constant(deref_offset));
140       unsigned mask =
141          is_write ? write_mask : (1 << deref->type->vector_elements) - 1;
142       insert_buffer_access(mem_ctx, deref, deref->type, offset, mask, -1);
143    } else {
144       /* We're dereffing a column out of a row-major matrix, so we
145        * gather the vector from each stored row.
146        */
147       assert(deref->type->is_float() || deref->type->is_double());
148       assert(matrix_type != NULL);
149 
150       const unsigned matrix_stride =
151          link_calculate_matrix_stride(matrix_type, row_major, packing);
152 
153       const glsl_type *deref_type = deref->type->get_scalar_type();
154 
155       for (unsigned i = 0; i < deref->type->vector_elements; i++) {
156          ir_rvalue *chan_offset =
157             add(base_offset,
158                 new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
159          if (!is_write || ((1U << i) & write_mask))
160             insert_buffer_access(mem_ctx, deref, deref_type, chan_offset,
161                                  (1U << i), i);
162       }
163    }
164 }
165 
166 /**
167  * Determine if a thing being dereferenced is row-major
168  *
169  * There is some trickery here.
170  *
171  * If the thing being dereferenced is a member of uniform block \b without an
172  * instance name, then the name of the \c ir_variable is the field name of an
173  * interface type.  If this field is row-major, then the thing referenced is
174  * row-major.
175  *
176  * If the thing being dereferenced is a member of uniform block \b with an
177  * instance name, then the last dereference in the tree will be an
178  * \c ir_dereference_record.  If that record field is row-major, then the
179  * thing referenced is row-major.
180  */
181 bool
is_dereferenced_thing_row_major(const ir_rvalue * deref)182 lower_buffer_access::is_dereferenced_thing_row_major(const ir_rvalue *deref)
183 {
184    bool matrix = false;
185    const ir_rvalue *ir = deref;
186 
187    while (true) {
188       matrix = matrix || ir->type->without_array()->is_matrix();
189 
190       switch (ir->ir_type) {
191       case ir_type_dereference_array: {
192          const ir_dereference_array *const array_deref =
193             (const ir_dereference_array *) ir;
194 
195          ir = array_deref->array;
196          break;
197       }
198 
199       case ir_type_dereference_record: {
200          const ir_dereference_record *const record_deref =
201             (const ir_dereference_record *) ir;
202 
203          ir = record_deref->record;
204 
205          const int idx = record_deref->field_idx;
206          assert(idx >= 0);
207 
208          const enum glsl_matrix_layout matrix_layout =
209             glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout);
210 
211          switch (matrix_layout) {
212          case GLSL_MATRIX_LAYOUT_INHERITED:
213             break;
214          case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
215             return false;
216          case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
217             return matrix || deref->type->without_array()->is_record();
218          }
219 
220          break;
221       }
222 
223       case ir_type_dereference_variable: {
224          const ir_dereference_variable *const var_deref =
225             (const ir_dereference_variable *) ir;
226 
227          const enum glsl_matrix_layout matrix_layout =
228             glsl_matrix_layout(var_deref->var->data.matrix_layout);
229 
230          switch (matrix_layout) {
231          case GLSL_MATRIX_LAYOUT_INHERITED: {
232             /* For interface block matrix variables we handle inherited
233              * layouts at HIR generation time, but we don't do that for shared
234              * variables, which are always column-major
235              */
236             MAYBE_UNUSED ir_variable *var = deref->variable_referenced();
237             assert((var->is_in_buffer_block() && !matrix) ||
238                    var->data.mode == ir_var_shader_shared);
239             return false;
240          }
241          case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
242             return false;
243          case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
244             return matrix || deref->type->without_array()->is_record();
245          }
246 
247          unreachable("invalid matrix layout");
248          break;
249       }
250 
251       default:
252          return false;
253       }
254    }
255 
256    /* The tree must have ended with a dereference that wasn't an
257     * ir_dereference_variable.  That is invalid, and it should be impossible.
258     */
259    unreachable("invalid dereference tree");
260    return false;
261 }
262 
263 /**
264  * This function initializes various values that will be used later by
265  * emit_access when actually emitting loads or stores.
266  *
267  * Note: const_offset is an input as well as an output, clients must
268  * initialize it to the offset of the variable in the underlying block, and
269  * this function will adjust it by adding the constant offset of the member
270  * being accessed into that variable.
271  */
272 void
setup_buffer_access(void * mem_ctx,ir_rvalue * deref,ir_rvalue ** offset,unsigned * const_offset,bool * row_major,const glsl_type ** matrix_type,const glsl_struct_field ** struct_field,enum glsl_interface_packing packing)273 lower_buffer_access::setup_buffer_access(void *mem_ctx,
274                                          ir_rvalue *deref,
275                                          ir_rvalue **offset,
276                                          unsigned *const_offset,
277                                          bool *row_major,
278                                          const glsl_type **matrix_type,
279                                          const glsl_struct_field **struct_field,
280                                          enum glsl_interface_packing packing)
281 {
282    *offset = new(mem_ctx) ir_constant(0u);
283    *row_major = is_dereferenced_thing_row_major(deref);
284    *matrix_type = NULL;
285 
286    /* Calculate the offset to the start of the region of the UBO
287     * dereferenced by *rvalue.  This may be a variable offset if an
288     * array dereference has a variable index.
289     */
290    while (deref) {
291       switch (deref->ir_type) {
292       case ir_type_dereference_variable: {
293          deref = NULL;
294          break;
295       }
296 
297       case ir_type_dereference_array: {
298          ir_dereference_array *deref_array = (ir_dereference_array *) deref;
299          unsigned array_stride;
300          if (deref_array->array->type->is_vector()) {
301             /* We get this when storing or loading a component out of a vector
302              * with a non-constant index. This happens for v[i] = f where v is
303              * a vector (or m[i][j] = f where m is a matrix). If we don't
304              * lower that here, it gets turned into v = vector_insert(v, i,
305              * f), which loads the entire vector, modifies one component and
306              * then write the entire thing back.  That breaks if another
307              * thread or SIMD channel is modifying the same vector.
308              */
309             array_stride = 4;
310             if (deref_array->array->type->is_64bit())
311                array_stride *= 2;
312          } else if (deref_array->array->type->is_matrix() && *row_major) {
313             /* When loading a vector out of a row major matrix, the
314              * step between the columns (vectors) is the size of a
315              * float, while the step between the rows (elements of a
316              * vector) is handled below in emit_ubo_loads.
317              */
318             array_stride = 4;
319             if (deref_array->array->type->is_64bit())
320                array_stride *= 2;
321             *matrix_type = deref_array->array->type;
322          } else if (deref_array->type->without_array()->is_interface()) {
323             /* We're processing an array dereference of an interface instance
324              * array. The thing being dereferenced *must* be a variable
325              * dereference because interfaces cannot be embedded in other
326              * types. In terms of calculating the offsets for the lowering
327              * pass, we don't care about the array index. All elements of an
328              * interface instance array will have the same offsets relative to
329              * the base of the block that backs them.
330              */
331             deref = deref_array->array->as_dereference();
332             break;
333          } else {
334             /* Whether or not the field is row-major (because it might be a
335              * bvec2 or something) does not affect the array itself. We need
336              * to know whether an array element in its entirety is row-major.
337              */
338             const bool array_row_major =
339                is_dereferenced_thing_row_major(deref_array);
340 
341             /* The array type will give the correct interface packing
342              * information
343              */
344             if (packing == GLSL_INTERFACE_PACKING_STD430) {
345                array_stride = deref_array->type->std430_array_stride(array_row_major);
346             } else {
347                array_stride = deref_array->type->std140_size(array_row_major);
348                array_stride = glsl_align(array_stride, 16);
349             }
350          }
351 
352          ir_rvalue *array_index = deref_array->array_index;
353          if (array_index->type->base_type == GLSL_TYPE_INT)
354             array_index = i2u(array_index);
355 
356          ir_constant *const_index =
357             array_index->constant_expression_value(mem_ctx, NULL);
358          if (const_index) {
359             *const_offset += array_stride * const_index->value.u[0];
360          } else {
361             *offset = add(*offset,
362                           mul(array_index,
363                               new(mem_ctx) ir_constant(array_stride)));
364          }
365          deref = deref_array->array->as_dereference();
366          break;
367       }
368 
369       case ir_type_dereference_record: {
370          ir_dereference_record *deref_record = (ir_dereference_record *) deref;
371          const glsl_type *struct_type = deref_record->record->type;
372          unsigned intra_struct_offset = 0;
373 
374          for (unsigned int i = 0; i < struct_type->length; i++) {
375             const glsl_type *type = struct_type->fields.structure[i].type;
376 
377             ir_dereference_record *field_deref = new(mem_ctx)
378                ir_dereference_record(deref_record->record,
379                                      struct_type->fields.structure[i].name);
380             const bool field_row_major =
381                is_dereferenced_thing_row_major(field_deref);
382 
383             ralloc_free(field_deref);
384 
385             unsigned field_align = 0;
386 
387             if (packing == GLSL_INTERFACE_PACKING_STD430)
388                field_align = type->std430_base_alignment(field_row_major);
389             else
390                field_align = type->std140_base_alignment(field_row_major);
391 
392             if (struct_type->fields.structure[i].offset != -1) {
393                intra_struct_offset = struct_type->fields.structure[i].offset;
394             }
395 
396             intra_struct_offset = glsl_align(intra_struct_offset, field_align);
397 
398             assert(deref_record->field_idx >= 0);
399             if (i == (unsigned) deref_record->field_idx) {
400                if (struct_field)
401                   *struct_field = &struct_type->fields.structure[i];
402                break;
403             }
404 
405             if (packing == GLSL_INTERFACE_PACKING_STD430)
406                intra_struct_offset += type->std430_size(field_row_major);
407             else
408                intra_struct_offset += type->std140_size(field_row_major);
409 
410             /* If the field just examined was itself a structure, apply rule
411              * #9:
412              *
413              *     "The structure may have padding at the end; the base offset
414              *     of the member following the sub-structure is rounded up to
415              *     the next multiple of the base alignment of the structure."
416              */
417             if (type->without_array()->is_record()) {
418                intra_struct_offset = glsl_align(intra_struct_offset,
419                                                 field_align);
420 
421             }
422          }
423 
424          *const_offset += intra_struct_offset;
425          deref = deref_record->record->as_dereference();
426          break;
427       }
428 
429       case ir_type_swizzle: {
430          ir_swizzle *deref_swizzle = (ir_swizzle *) deref;
431 
432          assert(deref_swizzle->mask.num_components == 1);
433 
434          *const_offset += deref_swizzle->mask.x * sizeof(int);
435          deref = deref_swizzle->val->as_dereference();
436          break;
437       }
438 
439       default:
440          assert(!"not reached");
441          deref = NULL;
442          break;
443       }
444    }
445 }
446 
447 } /* namespace lower_buffer_access */
448