1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 
33 #include "util/compiler.h"
34 #include "main/context.h"
35 #include "brw_context.h"
36 #include "brw_vs.h"
37 #include "brw_util.h"
38 #include "brw_state.h"
39 #include "program/prog_print.h"
40 #include "program/prog_parameter.h"
41 #include "compiler/brw_nir.h"
42 #include "brw_program.h"
43 
44 #include "util/ralloc.h"
45 
46 /**
47  * Decide which set of clip planes should be used when clipping via
48  * gl_Position or gl_ClipVertex.
49  */
50 gl_clip_plane *
brw_select_clip_planes(struct gl_context * ctx)51 brw_select_clip_planes(struct gl_context *ctx)
52 {
53    if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) {
54       /* There is currently a GLSL vertex shader, so clip according to GLSL
55        * rules, which means compare gl_ClipVertex (or gl_Position, if
56        * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
57        * that were stored in EyeUserPlane at the time the clip planes were
58        * specified.
59        */
60       return ctx->Transform.EyeUserPlane;
61    } else {
62       /* Either we are using fixed function or an ARB vertex program.  In
63        * either case the clip planes are going to be compared against
64        * gl_Position (which is in clip coordinates) so we have to clip using
65        * _ClipUserPlane, which was transformed into clip coordinates by Mesa
66        * core.
67        */
68       return ctx->Transform._ClipUserPlane;
69    }
70 }
71 
72 static GLbitfield64
brw_vs_outputs_written(struct brw_context * brw,struct brw_vs_prog_key * key,GLbitfield64 user_varyings)73 brw_vs_outputs_written(struct brw_context *brw, struct brw_vs_prog_key *key,
74                        GLbitfield64 user_varyings)
75 {
76    const struct gen_device_info *devinfo = &brw->screen->devinfo;
77    GLbitfield64 outputs_written = user_varyings;
78 
79    if (key->copy_edgeflag) {
80       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
81    }
82 
83    if (devinfo->gen < 6) {
84       /* Put dummy slots into the VUE for the SF to put the replaced
85        * point sprite coords in.  We shouldn't need these dummy slots,
86        * which take up precious URB space, but it would mean that the SF
87        * doesn't get nice aligned pairs of input coords into output
88        * coords, which would be a pain to handle.
89        */
90       for (unsigned i = 0; i < 8; i++) {
91          if (key->point_coord_replace & (1 << i))
92             outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
93       }
94 
95       /* if back colors are written, allocate slots for front colors too */
96       if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
97          outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
98       if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
99          outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
100    }
101 
102    /* In order for legacy clipping to work, we need to populate the clip
103     * distance varying slots whenever clipping is enabled, even if the vertex
104     * shader doesn't write to gl_ClipDistance.
105     */
106    if (key->nr_userclip_plane_consts > 0) {
107       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
108       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
109    }
110 
111    return outputs_written;
112 }
113 
114 static bool
brw_codegen_vs_prog(struct brw_context * brw,struct brw_program * vp,struct brw_vs_prog_key * key)115 brw_codegen_vs_prog(struct brw_context *brw,
116                     struct brw_program *vp,
117                     struct brw_vs_prog_key *key)
118 {
119    const struct brw_compiler *compiler = brw->screen->compiler;
120    const struct gen_device_info *devinfo = &brw->screen->devinfo;
121    const GLuint *program;
122    struct brw_vs_prog_data prog_data;
123    struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
124    void *mem_ctx;
125    bool start_busy = false;
126    double start_time = 0;
127 
128    memset(&prog_data, 0, sizeof(prog_data));
129 
130    /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
131    if (vp->program.is_arb_asm)
132       stage_prog_data->use_alt_mode = true;
133 
134    mem_ctx = ralloc_context(NULL);
135 
136    nir_shader *nir = nir_shader_clone(mem_ctx, vp->program.nir);
137 
138    brw_assign_common_binding_table_offsets(devinfo, &vp->program,
139                                            &prog_data.base.base, 0);
140 
141    if (!vp->program.is_arb_asm) {
142       brw_nir_setup_glsl_uniforms(mem_ctx, nir, &vp->program,
143                                   &prog_data.base.base,
144                                   compiler->scalar_stage[MESA_SHADER_VERTEX]);
145       brw_nir_analyze_ubo_ranges(compiler, nir, key,
146                                  prog_data.base.base.ubo_ranges);
147    } else {
148       brw_nir_setup_arb_uniforms(mem_ctx, nir, &vp->program,
149                                  &prog_data.base.base);
150    }
151 
152    if (key->nr_userclip_plane_consts > 0) {
153       brw_nir_lower_legacy_clipping(nir, key->nr_userclip_plane_consts,
154                                     &prog_data.base.base);
155    }
156 
157    uint64_t outputs_written =
158       brw_vs_outputs_written(brw, key, nir->info.outputs_written);
159 
160    brw_compute_vue_map(devinfo,
161                        &prog_data.base.vue_map, outputs_written,
162                        nir->info.separate_shader, 1);
163 
164    if (0) {
165       _mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true);
166    }
167 
168    if (unlikely(brw->perf_debug)) {
169       start_busy = (brw->batch.last_bo &&
170                     brw_bo_busy(brw->batch.last_bo));
171       start_time = get_time();
172    }
173 
174    if (INTEL_DEBUG & DEBUG_VS) {
175       if (vp->program.is_arb_asm)
176          brw_dump_arb_asm("vertex", &vp->program);
177    }
178 
179    int st_index = -1;
180    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
181       st_index = brw_get_shader_time_index(brw, &vp->program, ST_VS,
182                                            !vp->program.is_arb_asm);
183    }
184 
185    /* Emit GEN4 code.
186     */
187    char *error_str;
188    program = brw_compile_vs(compiler, brw, mem_ctx, key, &prog_data,
189                             nir, st_index, NULL, &error_str);
190    if (program == NULL) {
191       if (!vp->program.is_arb_asm) {
192          vp->program.sh.data->LinkStatus = LINKING_FAILURE;
193          ralloc_strcat(&vp->program.sh.data->InfoLog, error_str);
194       }
195 
196       _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", error_str);
197 
198       ralloc_free(mem_ctx);
199       return false;
200    }
201 
202    if (unlikely(brw->perf_debug)) {
203       if (vp->compiled_once) {
204          brw_debug_recompile(brw, MESA_SHADER_VERTEX, vp->program.Id,
205                              &key->base);
206       }
207       if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
208          perf_debug("VS compile took %.03f ms and stalled the GPU\n",
209                     (get_time() - start_time) * 1000);
210       }
211       vp->compiled_once = true;
212    }
213 
214    /* Scratch space is used for register spilling */
215    brw_alloc_stage_scratch(brw, &brw->vs.base,
216                            prog_data.base.base.total_scratch);
217 
218    /* The param and pull_param arrays will be freed by the shader cache. */
219    ralloc_steal(NULL, prog_data.base.base.param);
220    ralloc_steal(NULL, prog_data.base.base.pull_param);
221    brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
222                     key, sizeof(struct brw_vs_prog_key),
223                     program, prog_data.base.base.program_size,
224                     &prog_data, sizeof(prog_data),
225                     &brw->vs.base.prog_offset, &brw->vs.base.prog_data);
226    ralloc_free(mem_ctx);
227 
228    return true;
229 }
230 
231 static bool
brw_vs_state_dirty(const struct brw_context * brw)232 brw_vs_state_dirty(const struct brw_context *brw)
233 {
234    return brw_state_dirty(brw,
235                           _NEW_BUFFERS |
236                           _NEW_LIGHT |
237                           _NEW_POINT |
238                           _NEW_POLYGON |
239                           _NEW_TEXTURE |
240                           _NEW_TRANSFORM,
241                           BRW_NEW_VERTEX_PROGRAM |
242                           BRW_NEW_VS_ATTRIB_WORKAROUNDS);
243 }
244 
245 void
brw_vs_populate_key(struct brw_context * brw,struct brw_vs_prog_key * key)246 brw_vs_populate_key(struct brw_context *brw,
247                     struct brw_vs_prog_key *key)
248 {
249    struct gl_context *ctx = &brw->ctx;
250    /* BRW_NEW_VERTEX_PROGRAM */
251    struct gl_program *prog = brw->programs[MESA_SHADER_VERTEX];
252    struct brw_program *vp = (struct brw_program *) prog;
253    const struct gen_device_info *devinfo = &brw->screen->devinfo;
254 
255    memset(key, 0, sizeof(*key));
256 
257    /* Just upload the program verbatim for now.  Always send it all
258     * the inputs it asks for, whether they are varying or not.
259     */
260 
261    /* _NEW_TEXTURE */
262    brw_populate_base_prog_key(ctx, vp, &key->base);
263 
264    if (ctx->Transform.ClipPlanesEnabled != 0 &&
265        (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES) &&
266        vp->program.info.clip_distance_array_size == 0) {
267       key->nr_userclip_plane_consts =
268          util_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
269    }
270 
271    if (devinfo->gen < 6) {
272       /* _NEW_POLYGON */
273       key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
274                             ctx->Polygon.BackMode != GL_FILL);
275 
276       /* _NEW_POINT */
277       if (ctx->Point.PointSprite) {
278          key->point_coord_replace = ctx->Point.CoordReplace & 0xff;
279       }
280    }
281 
282    if (prog->info.outputs_written &
283        (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
284         VARYING_BIT_BFC1)) {
285       /* _NEW_LIGHT | _NEW_BUFFERS */
286       key->clamp_vertex_color = ctx->Light._ClampVertexColor;
287    }
288 
289    /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
290    if (devinfo->gen < 8 && !devinfo->is_haswell) {
291       memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
292              sizeof(brw->vb.attrib_wa_flags));
293    }
294 }
295 
296 void
brw_upload_vs_prog(struct brw_context * brw)297 brw_upload_vs_prog(struct brw_context *brw)
298 {
299    struct brw_vs_prog_key key;
300    /* BRW_NEW_VERTEX_PROGRAM */
301    struct brw_program *vp =
302       (struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
303 
304    if (!brw_vs_state_dirty(brw))
305       return;
306 
307    brw_vs_populate_key(brw, &key);
308 
309    if (brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG, &key, sizeof(key),
310                         &brw->vs.base.prog_offset, &brw->vs.base.prog_data,
311                         true))
312       return;
313 
314    if (brw_disk_cache_upload_program(brw, MESA_SHADER_VERTEX))
315       return;
316 
317    vp = (struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
318    vp->id = key.base.program_string_id;
319 
320    ASSERTED bool success = brw_codegen_vs_prog(brw, vp, &key);
321    assert(success);
322 }
323 
324 void
brw_vs_populate_default_key(const struct brw_compiler * compiler,struct brw_vs_prog_key * key,struct gl_program * prog)325 brw_vs_populate_default_key(const struct brw_compiler *compiler,
326                             struct brw_vs_prog_key *key,
327                             struct gl_program *prog)
328 {
329    const struct gen_device_info *devinfo = compiler->devinfo;
330    struct brw_program *bvp = brw_program(prog);
331 
332    memset(key, 0, sizeof(*key));
333 
334    brw_populate_default_base_prog_key(devinfo, bvp, &key->base);
335 
336    key->clamp_vertex_color =
337       (prog->info.outputs_written &
338        (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
339         VARYING_BIT_BFC1));
340 }
341 
342 bool
brw_vs_precompile(struct gl_context * ctx,struct gl_program * prog)343 brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog)
344 {
345    struct brw_context *brw = brw_context(ctx);
346    struct brw_vs_prog_key key;
347    uint32_t old_prog_offset = brw->vs.base.prog_offset;
348    struct brw_stage_prog_data *old_prog_data = brw->vs.base.prog_data;
349    bool success;
350 
351    struct brw_program *bvp = brw_program(prog);
352 
353    brw_vs_populate_default_key(brw->screen->compiler, &key, prog);
354 
355    success = brw_codegen_vs_prog(brw, bvp, &key);
356 
357    brw->vs.base.prog_offset = old_prog_offset;
358    brw->vs.base.prog_data = old_prog_data;
359 
360    return success;
361 }
362