1 /*
2  * Copyright (c) 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_nir.h"
25 #include "compiler/nir/nir_builder.h"
26 
27 struct lower_intrinsics_state {
28    nir_shader *nir;
29    union {
30       struct brw_stage_prog_data *prog_data;
31       struct brw_cs_prog_data *cs_prog_data;
32    };
33    nir_function_impl *impl;
34    bool progress;
35    nir_builder builder;
36    bool cs_thread_id_used;
37 };
38 
39 static nir_ssa_def *
40 read_thread_local_id(struct lower_intrinsics_state *state)
41 {
42    nir_builder *b = &state->builder;
43    nir_shader *nir = state->nir;
44    const unsigned *sizes = nir->info->cs.local_size;
45    const unsigned group_size = sizes[0] * sizes[1] * sizes[2];
46 
47    /* Some programs have local_size dimensions so small that the thread local
48     * ID will always be 0.
49     */
50    if (group_size <= 8)
51       return nir_imm_int(b, 0);
52 
53    assert(state->cs_prog_data->thread_local_id_index >= 0);
54    state->cs_thread_id_used = true;
55    const int id_index = state->cs_prog_data->thread_local_id_index;
56 
57    nir_intrinsic_instr *load =
58       nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
59    load->num_components = 1;
60    load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
61    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
62    nir_intrinsic_set_base(load, id_index * sizeof(uint32_t));
63    nir_intrinsic_set_range(load, sizeof(uint32_t));
64    nir_builder_instr_insert(b, &load->instr);
65    return &load->dest.ssa;
66 }
67 
68 static bool
69 lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
70                                   nir_block *block)
71 {
72    bool progress = false;
73    nir_builder *b = &state->builder;
74    nir_shader *nir = state->nir;
75 
76    nir_foreach_instr_safe(instr, block) {
77       if (instr->type != nir_instr_type_intrinsic)
78          continue;
79 
80       nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
81 
82       b->cursor = nir_after_instr(&intrinsic->instr);
83 
84       nir_ssa_def *sysval;
85       switch (intrinsic->intrinsic) {
86       case nir_intrinsic_load_local_invocation_index: {
87          assert(nir->stage == MESA_SHADER_COMPUTE);
88          /* We construct the local invocation index from:
89           *
90           *    gl_LocalInvocationIndex =
91           *       cs_thread_local_id + channel_num;
92           */
93          nir_ssa_def *thread_local_id = read_thread_local_id(state);
94          nir_ssa_def *channel = nir_load_channel_num(b);
95          sysval = nir_iadd(b, channel, thread_local_id);
96          break;
97       }
98 
99       case nir_intrinsic_load_local_invocation_id: {
100          assert(nir->stage == MESA_SHADER_COMPUTE);
101          /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based
102           * on this formula:
103           *
104           *    gl_LocalInvocationID.x =
105           *       gl_LocalInvocationIndex % gl_WorkGroupSize.x;
106           *    gl_LocalInvocationID.y =
107           *       (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
108           *       gl_WorkGroupSize.y;
109           *    gl_LocalInvocationID.z =
110           *       (gl_LocalInvocationIndex /
111           *        (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
112           *       gl_WorkGroupSize.z;
113           */
114          unsigned *size = nir->info->cs.local_size;
115 
116          nir_ssa_def *local_index = nir_load_local_invocation_index(b);
117 
118          nir_const_value uvec3;
119          uvec3.u32[0] = 1;
120          uvec3.u32[1] = size[0];
121          uvec3.u32[2] = size[0] * size[1];
122          nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3);
123          uvec3.u32[0] = size[0];
124          uvec3.u32[1] = size[1];
125          uvec3.u32[2] = size[2];
126          nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3);
127 
128          sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val);
129          break;
130       }
131 
132       default:
133          continue;
134       }
135 
136       nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, nir_src_for_ssa(sysval));
137       nir_instr_remove(&intrinsic->instr);
138 
139       state->progress = true;
140    }
141 
142    return progress;
143 }
144 
145 static void
146 lower_cs_intrinsics_convert_impl(struct lower_intrinsics_state *state)
147 {
148    nir_builder_init(&state->builder, state->impl);
149 
150    nir_foreach_block(block, state->impl) {
151       lower_cs_intrinsics_convert_block(state, block);
152    }
153 
154    nir_metadata_preserve(state->impl,
155                          nir_metadata_block_index | nir_metadata_dominance);
156 }
157 
158 bool
159 brw_nir_lower_intrinsics(nir_shader *nir, struct brw_stage_prog_data *prog_data)
160 {
161    /* Currently we only lower intrinsics for compute shaders */
162    if (nir->stage != MESA_SHADER_COMPUTE)
163       return false;
164 
165    bool progress = false;
166    struct lower_intrinsics_state state;
167    memset(&state, 0, sizeof(state));
168    state.nir = nir;
169    state.prog_data = prog_data;
170 
171    do {
172       state.progress = false;
173       nir_foreach_function(function, nir) {
174          if (function->impl) {
175             state.impl = function->impl;
176             lower_cs_intrinsics_convert_impl(&state);
177          }
178       }
179       progress |= state.progress;
180    } while (state.progress);
181 
182    if (nir->stage == MESA_SHADER_COMPUTE && !state.cs_thread_id_used)
183       state.cs_prog_data->thread_local_id_index = -1;
184 
185    return progress;
186 }
187