1 /* 2 * Copyright © 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file brw_vec4_gs.c 26 * 27 * State atom for client-programmable geometry shaders, and support code. 28 */ 29 30 #include "brw_gs.h" 31 #include "brw_context.h" 32 #include "brw_vec4_gs_visitor.h" 33 #include "brw_state.h" 34 #include "brw_ff_gs.h" 35 #include "brw_nir.h" 36 #include "brw_program.h" 37 #include "compiler/glsl/ir_uniform.h" 38 39 static void 40 brw_gs_debug_recompile(struct brw_context *brw, struct gl_program *prog, 41 const struct brw_gs_prog_key *key) 42 { 43 perf_debug("Recompiling geometry shader for program %d\n", prog->Id); 44 45 bool found = false; 46 const struct brw_gs_prog_key *old_key = 47 brw_find_previous_compile(&brw->cache, BRW_CACHE_GS_PROG, 48 key->program_string_id); 49 50 if (!old_key) { 51 perf_debug(" Didn't find previous compile in the shader cache for " 52 "debug\n"); 53 return; 54 } 55 56 found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); 57 58 if (!found) { 59 perf_debug(" Something else\n"); 60 } 61 } 62 63 static void 64 assign_gs_binding_table_offsets(const struct gen_device_info *devinfo, 65 const struct gl_program *prog, 66 struct brw_gs_prog_data *prog_data) 67 { 68 /* In gen6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform 69 * feedback surfaces. 70 */ 71 uint32_t reserved = devinfo->gen == 6 ? BRW_MAX_SOL_BINDINGS : 0; 72 73 brw_assign_common_binding_table_offsets(devinfo, prog, 74 &prog_data->base.base, reserved); 75 } 76 77 static bool 78 brw_codegen_gs_prog(struct brw_context *brw, 79 struct brw_program *gp, 80 struct brw_gs_prog_key *key) 81 { 82 struct brw_compiler *compiler = brw->screen->compiler; 83 const struct gen_device_info *devinfo = &brw->screen->devinfo; 84 struct brw_stage_state *stage_state = &brw->gs.base; 85 struct brw_gs_prog_data prog_data; 86 bool start_busy = false; 87 double start_time = 0; 88 89 memset(&prog_data, 0, sizeof(prog_data)); 90 91 assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data); 92 93 /* Allocate the references to the uniforms that will end up in the 94 * prog_data associated with the compiled program, and which will be freed 95 * by the state cache. 96 * 97 * Note: param_count needs to be num_uniform_components * 4, since we add 98 * padding around uniform values below vec4 size, so the worst case is that 99 * every uniform is a float which gets padded to the size of a vec4. 100 */ 101 int param_count = gp->program.nir->num_uniforms / 4; 102 103 prog_data.base.base.param = 104 rzalloc_array(NULL, const gl_constant_value *, param_count); 105 prog_data.base.base.pull_param = 106 rzalloc_array(NULL, const gl_constant_value *, param_count); 107 prog_data.base.base.image_param = 108 rzalloc_array(NULL, struct brw_image_param, 109 gp->program.info.num_images); 110 prog_data.base.base.nr_params = param_count; 111 prog_data.base.base.nr_image_params = gp->program.info.num_images; 112 113 brw_nir_setup_glsl_uniforms(gp->program.nir, &gp->program, 114 &prog_data.base.base, 115 compiler->scalar_stage[MESA_SHADER_GEOMETRY]); 116 117 uint64_t outputs_written = gp->program.info.outputs_written; 118 119 brw_compute_vue_map(devinfo, 120 &prog_data.base.vue_map, outputs_written, 121 gp->program.info.separate_shader); 122 123 int st_index = -1; 124 if (INTEL_DEBUG & DEBUG_SHADER_TIME) 125 st_index = brw_get_shader_time_index(brw, &gp->program, ST_GS, true); 126 127 if (unlikely(brw->perf_debug)) { 128 start_busy = brw->batch.last_bo && drm_intel_bo_busy(brw->batch.last_bo); 129 start_time = get_time(); 130 } 131 132 void *mem_ctx = ralloc_context(NULL); 133 unsigned program_size; 134 char *error_str; 135 const unsigned *program = 136 brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key, 137 &prog_data, gp->program.nir, &gp->program, 138 st_index, &program_size, &error_str); 139 if (program == NULL) { 140 ralloc_strcat(&gp->program.sh.data->InfoLog, error_str); 141 _mesa_problem(NULL, "Failed to compile geometry shader: %s\n", error_str); 142 143 ralloc_free(mem_ctx); 144 return false; 145 } 146 147 if (unlikely(brw->perf_debug)) { 148 if (gp->compiled_once) { 149 brw_gs_debug_recompile(brw, &gp->program, key); 150 } 151 if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { 152 perf_debug("GS compile took %.03f ms and stalled the GPU\n", 153 (get_time() - start_time) * 1000); 154 } 155 gp->compiled_once = true; 156 } 157 158 /* Scratch space is used for register spilling */ 159 brw_alloc_stage_scratch(brw, stage_state, 160 prog_data.base.base.total_scratch, 161 devinfo->max_gs_threads); 162 163 brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG, 164 key, sizeof(*key), 165 program, program_size, 166 &prog_data, sizeof(prog_data), 167 &stage_state->prog_offset, &brw->gs.base.prog_data); 168 ralloc_free(mem_ctx); 169 170 return true; 171 } 172 173 static bool 174 brw_gs_state_dirty(const struct brw_context *brw) 175 { 176 return brw_state_dirty(brw, 177 _NEW_TEXTURE, 178 BRW_NEW_GEOMETRY_PROGRAM | 179 BRW_NEW_TRANSFORM_FEEDBACK); 180 } 181 182 void 183 brw_gs_populate_key(struct brw_context *brw, 184 struct brw_gs_prog_key *key) 185 { 186 struct gl_context *ctx = &brw->ctx; 187 struct brw_program *gp = (struct brw_program *) brw->geometry_program; 188 189 memset(key, 0, sizeof(*key)); 190 191 key->program_string_id = gp->id; 192 193 /* _NEW_TEXTURE */ 194 brw_populate_sampler_prog_key_data(ctx, &gp->program, &key->tex); 195 } 196 197 void 198 brw_upload_gs_prog(struct brw_context *brw) 199 { 200 struct brw_stage_state *stage_state = &brw->gs.base; 201 struct brw_gs_prog_key key; 202 /* BRW_NEW_GEOMETRY_PROGRAM */ 203 struct brw_program *gp = (struct brw_program *) brw->geometry_program; 204 205 if (!brw_gs_state_dirty(brw)) 206 return; 207 208 if (gp == NULL) { 209 /* No geometry shader. Vertex data just passes straight through. */ 210 if (brw->gen == 6 && 211 (brw->ctx.NewDriverState & BRW_NEW_TRANSFORM_FEEDBACK)) { 212 gen6_brw_upload_ff_gs_prog(brw); 213 return; 214 } 215 216 /* Other state atoms had better not try to access prog_data, since 217 * there's no GS program. 218 */ 219 brw->gs.base.prog_data = NULL; 220 221 return; 222 } 223 224 brw_gs_populate_key(brw, &key); 225 226 if (!brw_search_cache(&brw->cache, BRW_CACHE_GS_PROG, 227 &key, sizeof(key), 228 &stage_state->prog_offset, 229 &brw->gs.base.prog_data)) { 230 bool success = brw_codegen_gs_prog(brw, gp, &key); 231 assert(success); 232 (void)success; 233 } 234 } 235 236 bool 237 brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog) 238 { 239 struct brw_context *brw = brw_context(ctx); 240 struct brw_gs_prog_key key; 241 uint32_t old_prog_offset = brw->gs.base.prog_offset; 242 struct brw_stage_prog_data *old_prog_data = brw->gs.base.prog_data; 243 bool success; 244 245 struct brw_program *bgp = brw_program(prog); 246 247 memset(&key, 0, sizeof(key)); 248 249 brw_setup_tex_for_precompile(brw, &key.tex, prog); 250 key.program_string_id = bgp->id; 251 252 success = brw_codegen_gs_prog(brw, bgp, &key); 253 254 brw->gs.base.prog_offset = old_prog_offset; 255 brw->gs.base.prog_data = old_prog_data; 256 257 return success; 258 } 259