1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /*
25  * Implements a pass that lowers output and/or input variables to a
26  * temporary plus an output variable with a single copy at each exit
27  * point of the shader and/or an input variable with a single copy
28  * at the entrance point of the shader.  This way the output variable
29  * is only ever written once and/or input is only read once, and there
30  * are no indirect outut/input accesses.
31  */
32 
33 #include "nir.h"
34 
35 struct lower_io_state {
36    nir_shader *shader;
37    nir_function_impl *entrypoint;
38    struct exec_list old_outputs;
39    struct exec_list old_inputs;
40 };
41 
42 static void
emit_copies(nir_cursor cursor,nir_shader * shader,struct exec_list * new_vars,struct exec_list * old_vars)43 emit_copies(nir_cursor cursor, nir_shader *shader, struct exec_list *new_vars,
44           struct exec_list *old_vars)
45 {
46    assert(exec_list_length(new_vars) == exec_list_length(old_vars));
47 
48    foreach_two_lists(new_node, new_vars, old_node, old_vars) {
49       nir_variable *newv = exec_node_data(nir_variable, new_node, node);
50       nir_variable *temp = exec_node_data(nir_variable, old_node, node);
51 
52       /* No need to copy the contents of a non-fb_fetch_output output variable
53        * to the temporary allocated for it, since its initial value is
54        * undefined.
55        */
56       if (temp->data.mode == nir_var_shader_out &&
57           !temp->data.fb_fetch_output)
58          continue;
59 
60       /* Can't copy the contents of the temporary back to a read-only
61        * interface variable.  The value of the temporary won't have been
62        * modified by the shader anyway.
63        */
64       if (newv->data.read_only)
65          continue;
66 
67       nir_intrinsic_instr *copy =
68          nir_intrinsic_instr_create(shader, nir_intrinsic_copy_var);
69       copy->variables[0] = nir_deref_var_create(copy, newv);
70       copy->variables[1] = nir_deref_var_create(copy, temp);
71 
72       nir_instr_insert(cursor, &copy->instr);
73    }
74 }
75 
76 static void
emit_output_copies_impl(struct lower_io_state * state,nir_function_impl * impl)77 emit_output_copies_impl(struct lower_io_state *state, nir_function_impl *impl)
78 {
79    if (state->shader->stage == MESA_SHADER_GEOMETRY) {
80       /* For geometry shaders, we have to emit the output copies right
81        * before each EmitVertex call.
82        */
83       nir_foreach_block(block, impl) {
84          nir_foreach_instr(instr, block) {
85             if (instr->type != nir_instr_type_intrinsic)
86                continue;
87 
88             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
89             if (intrin->intrinsic == nir_intrinsic_emit_vertex) {
90                nir_cursor cursor = nir_before_instr(&intrin->instr);
91                emit_copies(cursor, state->shader, &state->shader->outputs,
92                            &state->old_outputs);
93             }
94          }
95       }
96    } else if (impl == state->entrypoint) {
97       nir_cursor cursor = nir_before_block(nir_start_block(impl));
98       emit_copies(cursor, state->shader, &state->old_outputs,
99                   &state->shader->outputs);
100 
101       /* For all other shader types, we need to do the copies right before
102        * the jumps to the end block.
103        */
104       struct set_entry *block_entry;
105       set_foreach(impl->end_block->predecessors, block_entry) {
106          struct nir_block *block = (void *)block_entry->key;
107          nir_cursor cursor = nir_after_block_before_jump(block);
108          emit_copies(cursor, state->shader, &state->shader->outputs,
109                      &state->old_outputs);
110       }
111    }
112 }
113 
114 static void
emit_input_copies_impl(struct lower_io_state * state,nir_function_impl * impl)115 emit_input_copies_impl(struct lower_io_state *state, nir_function_impl *impl)
116 {
117    if (impl == state->entrypoint) {
118       nir_cursor cursor = nir_before_block(nir_start_block(impl));
119       emit_copies(cursor, state->shader, &state->old_inputs,
120                   &state->shader->inputs);
121    }
122 }
123 
124 static nir_variable *
create_shadow_temp(struct lower_io_state * state,nir_variable * var)125 create_shadow_temp(struct lower_io_state *state, nir_variable *var)
126 {
127    nir_variable *nvar = ralloc(state->shader, nir_variable);
128    memcpy(nvar, var, sizeof *nvar);
129 
130    /* The original is now the temporary */
131    nir_variable *temp = var;
132 
133    /* Reparent the name to the new variable */
134    ralloc_steal(nvar, nvar->name);
135 
136    assert(nvar->constant_initializer == NULL);
137 
138    /* Give the original a new name with @<mode>-temp appended */
139    const char *mode = (temp->data.mode == nir_var_shader_in) ? "in" : "out";
140    temp->name = ralloc_asprintf(var, "%s@%s-temp", mode, nvar->name);
141    temp->data.mode = nir_var_global;
142    temp->data.read_only = false;
143    temp->data.fb_fetch_output = false;
144 
145    return nvar;
146 }
147 
148 void
nir_lower_io_to_temporaries(nir_shader * shader,nir_function_impl * entrypoint,bool outputs,bool inputs)149 nir_lower_io_to_temporaries(nir_shader *shader, nir_function_impl *entrypoint,
150                             bool outputs, bool inputs)
151 {
152    struct lower_io_state state;
153 
154    if (shader->stage == MESA_SHADER_TESS_CTRL)
155       return;
156 
157    state.shader = shader;
158    state.entrypoint = entrypoint;
159 
160    if (inputs)
161       exec_list_move_nodes_to(&shader->inputs, &state.old_inputs);
162    else
163       exec_list_make_empty(&state.old_inputs);
164 
165    if (outputs)
166       exec_list_move_nodes_to(&shader->outputs, &state.old_outputs);
167    else
168       exec_list_make_empty(&state.old_outputs);
169 
170    /* Walk over all of the outputs turn each output into a temporary and
171     * make a new variable for the actual output.
172     */
173    nir_foreach_variable(var, &state.old_outputs) {
174       nir_variable *output = create_shadow_temp(&state, var);
175       exec_list_push_tail(&shader->outputs, &output->node);
176    }
177 
178    /* and same for inputs: */
179    nir_foreach_variable(var, &state.old_inputs) {
180       nir_variable *input = create_shadow_temp(&state, var);
181       exec_list_push_tail(&shader->inputs, &input->node);
182    }
183 
184    nir_foreach_function(function, shader) {
185       if (function->impl == NULL)
186          continue;
187 
188       if (inputs)
189          emit_input_copies_impl(&state, function->impl);
190 
191       if (outputs)
192          emit_output_copies_impl(&state, function->impl);
193 
194       nir_metadata_preserve(function->impl, nir_metadata_block_index |
195                                             nir_metadata_dominance);
196    }
197 
198    exec_list_append(&shader->globals, &state.old_inputs);
199    exec_list_append(&shader->globals, &state.old_outputs);
200 }
201