1 /*
2  * Copyright (c) 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file lower_shared_reference.cpp
26  *
27  * IR lower pass to replace dereferences of compute shader shared variables
28  * with intrinsic function calls.
29  *
30  * This relieves drivers of the responsibility of allocating space for the
31  * shared variables in the shared memory region.
32  */
33 
34 #include "lower_buffer_access.h"
35 #include "ir_builder.h"
36 #include "linker.h"
37 #include "main/macros.h"
38 #include "util/list.h"
39 #include "glsl_parser_extras.h"
40 
41 using namespace ir_builder;
42 
43 namespace {
44 
45 struct var_offset {
46    struct list_head node;
47    const ir_variable *var;
48    unsigned offset;
49 };
50 
51 class lower_shared_reference_visitor :
52       public lower_buffer_access::lower_buffer_access {
53 public:
54 
lower_shared_reference_visitor(struct gl_linked_shader * shader)55    lower_shared_reference_visitor(struct gl_linked_shader *shader)
56       : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u)
57    {
58       list_inithead(&var_offsets);
59    }
60 
~lower_shared_reference_visitor()61    ~lower_shared_reference_visitor()
62    {
63       ralloc_free(list_ctx);
64    }
65 
66    enum {
67       shared_load_access,
68       shared_store_access,
69       shared_atomic_access,
70    } buffer_access_type;
71 
72    void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
73                              const glsl_type *type, ir_rvalue *offset,
74                              unsigned mask, int channel);
75 
76    void handle_rvalue(ir_rvalue **rvalue);
77    ir_visitor_status visit_enter(ir_assignment *ir);
78    void handle_assignment(ir_assignment *ir);
79 
80    ir_call *lower_shared_atomic_intrinsic(ir_call *ir);
81    ir_call *check_for_shared_atomic_intrinsic(ir_call *ir);
82    ir_visitor_status visit_enter(ir_call *ir);
83 
84    unsigned get_shared_offset(const ir_variable *);
85 
86    ir_call *shared_load(void *mem_ctx, const struct glsl_type *type,
87                         ir_rvalue *offset);
88    ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
89                          unsigned write_mask);
90 
91    void *list_ctx;
92    struct gl_linked_shader *shader;
93    struct list_head var_offsets;
94    unsigned shared_size;
95    bool progress;
96 };
97 
98 unsigned
get_shared_offset(const ir_variable * var)99 lower_shared_reference_visitor::get_shared_offset(const ir_variable *var)
100 {
101    list_for_each_entry(var_offset, var_entry, &var_offsets, node) {
102       if (var_entry->var == var)
103          return var_entry->offset;
104    }
105 
106    struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset);
107    list_add(&new_entry->node, &var_offsets);
108    new_entry->var = var;
109 
110    unsigned var_align = var->type->std430_base_alignment(false);
111    new_entry->offset = glsl_align(shared_size, var_align);
112 
113    unsigned var_size = var->type->std430_size(false);
114    shared_size = new_entry->offset + var_size;
115 
116    return new_entry->offset;
117 }
118 
119 void
handle_rvalue(ir_rvalue ** rvalue)120 lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
121 {
122    if (!*rvalue)
123       return;
124 
125    ir_dereference *deref = (*rvalue)->as_dereference();
126    if (!deref)
127       return;
128 
129    ir_variable *var = deref->variable_referenced();
130    if (!var || var->data.mode != ir_var_shader_shared)
131       return;
132 
133    buffer_access_type = shared_load_access;
134 
135    void *mem_ctx = ralloc_parent(shader->ir);
136 
137    ir_rvalue *offset = NULL;
138    unsigned const_offset = get_shared_offset(var);
139    bool row_major;
140    const glsl_type *matrix_type;
141    assert(var->get_interface_type() == NULL);
142    const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
143 
144    setup_buffer_access(mem_ctx, deref,
145                        &offset, &const_offset,
146                        &row_major, &matrix_type, NULL, packing);
147 
148    /* Now that we've calculated the offset to the start of the
149     * dereference, walk over the type and emit loads into a temporary.
150     */
151    const glsl_type *type = (*rvalue)->type;
152    ir_variable *load_var = new(mem_ctx) ir_variable(type,
153                                                     "shared_load_temp",
154                                                     ir_var_temporary);
155    base_ir->insert_before(load_var);
156 
157    ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
158                                                        "shared_load_temp_offset",
159                                                        ir_var_temporary);
160    base_ir->insert_before(load_offset);
161    base_ir->insert_before(assign(load_offset, offset));
162 
163    deref = new(mem_ctx) ir_dereference_variable(load_var);
164 
165    emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major,
166                matrix_type, packing, 0);
167 
168    *rvalue = deref;
169 
170    progress = true;
171 }
172 
173 void
handle_assignment(ir_assignment * ir)174 lower_shared_reference_visitor::handle_assignment(ir_assignment *ir)
175 {
176    if (!ir || !ir->lhs)
177       return;
178 
179    ir_rvalue *rvalue = ir->lhs->as_rvalue();
180    if (!rvalue)
181       return;
182 
183    ir_dereference *deref = ir->lhs->as_dereference();
184    if (!deref)
185       return;
186 
187    ir_variable *var = ir->lhs->variable_referenced();
188    if (!var || var->data.mode != ir_var_shader_shared)
189       return;
190 
191    buffer_access_type = shared_store_access;
192 
193    /* We have a write to a shared variable, so declare a temporary and rewrite
194     * the assignment so that the temporary is the LHS.
195     */
196    void *mem_ctx = ralloc_parent(shader->ir);
197 
198    const glsl_type *type = rvalue->type;
199    ir_variable *store_var = new(mem_ctx) ir_variable(type,
200                                                      "shared_store_temp",
201                                                      ir_var_temporary);
202    base_ir->insert_before(store_var);
203    ir->lhs = new(mem_ctx) ir_dereference_variable(store_var);
204 
205    ir_rvalue *offset = NULL;
206    unsigned const_offset = get_shared_offset(var);
207    bool row_major;
208    const glsl_type *matrix_type;
209    assert(var->get_interface_type() == NULL);
210    const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
211 
212    setup_buffer_access(mem_ctx, deref,
213                        &offset, &const_offset,
214                        &row_major, &matrix_type, NULL, packing);
215 
216    deref = new(mem_ctx) ir_dereference_variable(store_var);
217 
218    ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
219                                                         "shared_store_temp_offset",
220                                                         ir_var_temporary);
221    base_ir->insert_before(store_offset);
222    base_ir->insert_before(assign(store_offset, offset));
223 
224    /* Now we have to write the value assigned to the temporary back to memory */
225    emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major,
226                matrix_type, packing, ir->write_mask);
227 
228    progress = true;
229 }
230 
231 ir_visitor_status
visit_enter(ir_assignment * ir)232 lower_shared_reference_visitor::visit_enter(ir_assignment *ir)
233 {
234    handle_assignment(ir);
235    return rvalue_visit(ir);
236 }
237 
238 void
insert_buffer_access(void * mem_ctx,ir_dereference * deref,const glsl_type * type,ir_rvalue * offset,unsigned mask,int)239 lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx,
240                                                      ir_dereference *deref,
241                                                      const glsl_type *type,
242                                                      ir_rvalue *offset,
243                                                      unsigned mask,
244                                                      int /* channel */)
245 {
246    if (buffer_access_type == shared_store_access) {
247       ir_call *store = shared_store(mem_ctx, deref, offset, mask);
248       base_ir->insert_after(store);
249    } else {
250       ir_call *load = shared_load(mem_ctx, type, offset);
251       base_ir->insert_before(load);
252       ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL);
253       base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
254                                     value));
255    }
256 }
257 
258 static bool
compute_shader_enabled(const _mesa_glsl_parse_state * state)259 compute_shader_enabled(const _mesa_glsl_parse_state *state)
260 {
261    return state->stage == MESA_SHADER_COMPUTE;
262 }
263 
264 ir_call *
shared_store(void * mem_ctx,ir_rvalue * deref,ir_rvalue * offset,unsigned write_mask)265 lower_shared_reference_visitor::shared_store(void *mem_ctx,
266                                              ir_rvalue *deref,
267                                              ir_rvalue *offset,
268                                              unsigned write_mask)
269 {
270    exec_list sig_params;
271 
272    ir_variable *offset_ref = new(mem_ctx)
273       ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
274    sig_params.push_tail(offset_ref);
275 
276    ir_variable *val_ref = new(mem_ctx)
277       ir_variable(deref->type, "value" , ir_var_function_in);
278    sig_params.push_tail(val_ref);
279 
280    ir_variable *writemask_ref = new(mem_ctx)
281       ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
282    sig_params.push_tail(writemask_ref);
283 
284    ir_function_signature *sig = new(mem_ctx)
285       ir_function_signature(glsl_type::void_type, compute_shader_enabled);
286    assert(sig);
287    sig->replace_parameters(&sig_params);
288    sig->intrinsic_id = ir_intrinsic_shared_store;
289 
290    ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared");
291    f->add_signature(sig);
292 
293    exec_list call_params;
294    call_params.push_tail(offset->clone(mem_ctx, NULL));
295    call_params.push_tail(deref->clone(mem_ctx, NULL));
296    call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
297    return new(mem_ctx) ir_call(sig, NULL, &call_params);
298 }
299 
300 ir_call *
shared_load(void * mem_ctx,const struct glsl_type * type,ir_rvalue * offset)301 lower_shared_reference_visitor::shared_load(void *mem_ctx,
302                                             const struct glsl_type *type,
303                                             ir_rvalue *offset)
304 {
305    exec_list sig_params;
306 
307    ir_variable *offset_ref = new(mem_ctx)
308       ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
309    sig_params.push_tail(offset_ref);
310 
311    ir_function_signature *sig =
312       new(mem_ctx) ir_function_signature(type, compute_shader_enabled);
313    assert(sig);
314    sig->replace_parameters(&sig_params);
315    sig->intrinsic_id = ir_intrinsic_shared_load;
316 
317    ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared");
318    f->add_signature(sig);
319 
320    ir_variable *result = new(mem_ctx)
321       ir_variable(type, "shared_load_result", ir_var_temporary);
322    base_ir->insert_before(result);
323    ir_dereference_variable *deref_result = new(mem_ctx)
324       ir_dereference_variable(result);
325 
326    exec_list call_params;
327    call_params.push_tail(offset->clone(mem_ctx, NULL));
328 
329    return new(mem_ctx) ir_call(sig, deref_result, &call_params);
330 }
331 
332 /* Lowers the intrinsic call to a new internal intrinsic that swaps the access
333  * to the shared variable in the first parameter by an offset. This involves
334  * creating the new internal intrinsic (i.e. the new function signature).
335  */
336 ir_call *
lower_shared_atomic_intrinsic(ir_call * ir)337 lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir)
338 {
339    /* Shared atomics usually have 2 parameters, the shared variable and an
340     * integer argument. The exception is CompSwap, that has an additional
341     * integer parameter.
342     */
343    int param_count = ir->actual_parameters.length();
344    assert(param_count == 2 || param_count == 3);
345 
346    /* First argument must be a scalar integer shared variable */
347    exec_node *param = ir->actual_parameters.get_head();
348    ir_instruction *inst = (ir_instruction *) param;
349    assert(inst->ir_type == ir_type_dereference_variable ||
350           inst->ir_type == ir_type_dereference_array ||
351           inst->ir_type == ir_type_dereference_record ||
352           inst->ir_type == ir_type_swizzle);
353 
354    ir_rvalue *deref = (ir_rvalue *) inst;
355    assert(deref->type->is_scalar() && deref->type->is_integer());
356 
357    ir_variable *var = deref->variable_referenced();
358    assert(var);
359 
360    /* Compute the offset to the start if the dereference
361     */
362    void *mem_ctx = ralloc_parent(shader->ir);
363 
364    ir_rvalue *offset = NULL;
365    unsigned const_offset = get_shared_offset(var);
366    bool row_major;
367    const glsl_type *matrix_type;
368    assert(var->get_interface_type() == NULL);
369    const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
370    buffer_access_type = shared_atomic_access;
371 
372    setup_buffer_access(mem_ctx, deref,
373                        &offset, &const_offset,
374                        &row_major, &matrix_type, NULL, packing);
375 
376    assert(offset);
377    assert(!row_major);
378    assert(matrix_type == NULL);
379 
380    ir_rvalue *deref_offset =
381       add(offset, new(mem_ctx) ir_constant(const_offset));
382 
383    /* Create the new internal function signature that will take an offset
384     * instead of a shared variable
385     */
386    exec_list sig_params;
387    ir_variable *sig_param = new(mem_ctx)
388       ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
389    sig_params.push_tail(sig_param);
390 
391    const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
392       glsl_type::int_type : glsl_type::uint_type;
393    sig_param = new(mem_ctx)
394          ir_variable(type, "data1", ir_var_function_in);
395    sig_params.push_tail(sig_param);
396 
397    if (param_count == 3) {
398       sig_param = new(mem_ctx)
399             ir_variable(type, "data2", ir_var_function_in);
400       sig_params.push_tail(sig_param);
401    }
402 
403    ir_function_signature *sig =
404       new(mem_ctx) ir_function_signature(deref->type,
405                                          compute_shader_enabled);
406    assert(sig);
407    sig->replace_parameters(&sig_params);
408 
409    assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load);
410    assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap);
411    sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, shared);
412 
413    char func_name[64];
414    sprintf(func_name, "%s_shared", ir->callee_name());
415    ir_function *f = new(mem_ctx) ir_function(func_name);
416    f->add_signature(sig);
417 
418    /* Now, create the call to the internal intrinsic */
419    exec_list call_params;
420    call_params.push_tail(deref_offset);
421    param = ir->actual_parameters.get_head()->get_next();
422    ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
423    call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
424    if (param_count == 3) {
425       param = param->get_next();
426       param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
427       call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
428    }
429    ir_dereference_variable *return_deref =
430       ir->return_deref->clone(mem_ctx, NULL);
431    return new(mem_ctx) ir_call(sig, return_deref, &call_params);
432 }
433 
434 ir_call *
check_for_shared_atomic_intrinsic(ir_call * ir)435 lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir)
436 {
437    exec_list& params = ir->actual_parameters;
438 
439    if (params.length() < 2 || params.length() > 3)
440       return ir;
441 
442    ir_rvalue *rvalue =
443       ((ir_instruction *) params.get_head())->as_rvalue();
444    if (!rvalue)
445       return ir;
446 
447    ir_variable *var = rvalue->variable_referenced();
448    if (!var || var->data.mode != ir_var_shader_shared)
449       return ir;
450 
451    const enum ir_intrinsic_id id = ir->callee->intrinsic_id;
452    if (id == ir_intrinsic_generic_atomic_add ||
453        id == ir_intrinsic_generic_atomic_min ||
454        id == ir_intrinsic_generic_atomic_max ||
455        id == ir_intrinsic_generic_atomic_and ||
456        id == ir_intrinsic_generic_atomic_or ||
457        id == ir_intrinsic_generic_atomic_xor ||
458        id == ir_intrinsic_generic_atomic_exchange ||
459        id == ir_intrinsic_generic_atomic_comp_swap) {
460       return lower_shared_atomic_intrinsic(ir);
461    }
462 
463    return ir;
464 }
465 
466 ir_visitor_status
visit_enter(ir_call * ir)467 lower_shared_reference_visitor::visit_enter(ir_call *ir)
468 {
469    ir_call *new_ir = check_for_shared_atomic_intrinsic(ir);
470    if (new_ir != ir) {
471       progress = true;
472       base_ir->replace_with(new_ir);
473       return visit_continue_with_parent;
474    }
475 
476    return rvalue_visit(ir);
477 }
478 
479 } /* unnamed namespace */
480 
481 void
lower_shared_reference(struct gl_context * ctx,struct gl_shader_program * prog,struct gl_linked_shader * shader)482 lower_shared_reference(struct gl_context *ctx,
483                        struct gl_shader_program *prog,
484                        struct gl_linked_shader *shader)
485 {
486    if (shader->Stage != MESA_SHADER_COMPUTE)
487       return;
488 
489    lower_shared_reference_visitor v(shader);
490 
491    /* Loop over the instructions lowering references, because we take a deref
492     * of an shared variable array using a shared variable dereference as the
493     * index will produce a collection of instructions all of which have cloned
494     * shared variable dereferences for that array index.
495     */
496    do {
497       v.progress = false;
498       visit_list_elements(&v, shader->ir);
499    } while (v.progress);
500 
501    prog->Comp.SharedSize = v.shared_size;
502 
503    /* Section 19.1 (Compute Shader Variables) of the OpenGL 4.5 (Core Profile)
504     * specification says:
505     *
506     *   "There is a limit to the total size of all variables declared as
507     *    shared in a single program object. This limit, expressed in units of
508     *    basic machine units, may be queried as the value of
509     *    MAX_COMPUTE_SHARED_MEMORY_SIZE."
510     */
511    if (prog->Comp.SharedSize > ctx->Const.MaxComputeSharedMemorySize) {
512       linker_error(prog, "Too much shared memory used (%u/%u)\n",
513                    prog->Comp.SharedSize,
514                    ctx->Const.MaxComputeSharedMemorySize);
515    }
516 }
517