1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 
33 
34 #include "brw_context.h"
35 #include "brw_defines.h"
36 #include "brw_state.h"
37 #include "brw_program.h"
38 #include "drivers/common/meta.h"
39 #include "intel_batchbuffer.h"
40 #include "intel_buffers.h"
41 #include "brw_vs.h"
42 #include "brw_ff_gs.h"
43 #include "brw_gs.h"
44 #include "brw_wm.h"
45 #include "brw_cs.h"
46 #include "genxml/genX_bits.h"
47 #include "main/framebuffer.h"
48 
49 void
brw_enable_obj_preemption(struct brw_context * brw,bool enable)50 brw_enable_obj_preemption(struct brw_context *brw, bool enable)
51 {
52    ASSERTED const struct gen_device_info *devinfo = &brw->screen->devinfo;
53    assert(devinfo->gen >= 9);
54 
55    if (enable == brw->object_preemption)
56       return;
57 
58    /* A fixed function pipe flush is required before modifying this field */
59    brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
60 
61    bool replay_mode = enable ?
62       GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER;
63 
64    /* enable object level preemption */
65    brw_load_register_imm32(brw, CS_CHICKEN1,
66                            replay_mode | GEN9_REPLAY_MODE_MASK);
67 
68    brw->object_preemption = enable;
69 }
70 
71 static void
brw_upload_gen11_slice_hashing_state(struct brw_context * brw)72 brw_upload_gen11_slice_hashing_state(struct brw_context *brw)
73 {
74    const struct gen_device_info *devinfo = &brw->screen->devinfo;
75    int subslices_delta =
76       devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1];
77    if (subslices_delta == 0)
78       return;
79 
80    unsigned size = GEN11_SLICE_HASH_TABLE_length * 4;
81    uint32_t hash_address;
82 
83    uint32_t *map = brw_state_batch(brw, size, 64, &hash_address);
84 
85    unsigned idx = 0;
86 
87    unsigned sl_small = 0;
88    unsigned sl_big = 1;
89    if (subslices_delta > 0) {
90       sl_small = 1;
91       sl_big = 0;
92    }
93 
94    /**
95     * Create a 16x16 slice hashing table like the following one:
96     *
97     * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
98     * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
99     * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
100     * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
101     * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
102     * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
103     * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
104     * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
105     * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
106     * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
107     * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
108     * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
109     * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
110     * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
111     * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
112     * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
113     *
114     * The table above is used when the pixel pipe 0 has less subslices than
115     * pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table
116     * with 0's and 1's inverted is used.
117     */
118    for (int i = 0; i < GEN11_SLICE_HASH_TABLE_length; i++) {
119       uint32_t dw = 0;
120 
121       for (int j = 0; j < 8; j++) {
122          unsigned slice = idx++ % 3 ? sl_big : sl_small;
123          dw |= slice << (j * 4);
124       }
125       map[i] = dw;
126    }
127 
128    BEGIN_BATCH(2);
129    OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2));
130    OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1);
131    ADVANCE_BATCH();
132 
133    /* From gen10/gen11 workaround table in h/w specs:
134     *
135     *    "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
136     *     a value of 0xFFFF"
137     *
138     * This means that whenever we update a field with this instruction, we need
139     * to update all the others.
140     *
141     * Since this is the first time we emit this
142     * instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag,
143     * and leaving everything else at their default state (0).
144     */
145    BEGIN_BATCH(2);
146    OUT_BATCH(_3DSTATE_3D_MODE  << 16 | (2 - 2));
147    OUT_BATCH(0xffff | SLICE_HASHING_TABLE_ENABLE);
148    ADVANCE_BATCH();
149 }
150 
151 static void
brw_upload_initial_gpu_state(struct brw_context * brw)152 brw_upload_initial_gpu_state(struct brw_context *brw)
153 {
154    const struct gen_device_info *devinfo = &brw->screen->devinfo;
155    const struct brw_compiler *compiler = brw->screen->compiler;
156 
157    /* On platforms with hardware contexts, we can set our initial GPU state
158     * right away rather than doing it via state atoms.  This saves a small
159     * amount of overhead on every draw call.
160     */
161    if (!brw->hw_ctx)
162       return;
163 
164    if (devinfo->gen == 6)
165       brw_emit_post_sync_nonzero_flush(brw);
166 
167    brw_upload_invariant_state(brw);
168 
169    if (devinfo->gen == 11) {
170       /* The default behavior of bit 5 "Headerless Message for Pre-emptable
171        * Contexts" in SAMPLER MODE register is set to 0, which means
172        * headerless sampler messages are not allowed for pre-emptable
173        * contexts. Set the bit 5 to 1 to allow them.
174        */
175       brw_load_register_imm32(brw, GEN11_SAMPLER_MODE,
176                               HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
177                               HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
178 
179       /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
180        * HALF_SLICE_CHICKEN7 register.
181        */
182       brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
183                               TEXEL_OFFSET_FIX_MASK |
184                               TEXEL_OFFSET_FIX_ENABLE);
185 
186       /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
187        * in L3CNTLREG register. The default setting of the bit is not the
188        * desirable behavior.
189        */
190       brw_load_register_imm32(brw, GEN8_L3CNTLREG,
191                               GEN8_L3CNTLREG_EDBC_NO_HANG);
192    }
193 
194    /* hardware specification recommends disabling repacking for
195     * the compatibility with decompression mechanism in display controller.
196     */
197    if (devinfo->disable_ccs_repack) {
198       brw_load_register_imm32(brw, GEN7_CACHE_MODE_0,
199                               GEN11_DISABLE_REPACKING_FOR_COMPRESSION |
200                               REG_MASK(GEN11_DISABLE_REPACKING_FOR_COMPRESSION));
201    }
202 
203    if (devinfo->gen == 9) {
204       /* Recommended optimizations for Victim Cache eviction and floating
205        * point blending.
206        */
207       brw_load_register_imm32(brw, GEN7_CACHE_MODE_1,
208                               REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
209                               REG_MASK(GEN9_MSC_RAW_HAZARD_AVOIDANCE_BIT) |
210                               REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
211                               GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
212                               GEN9_MSC_RAW_HAZARD_AVOIDANCE_BIT |
213                               GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
214    }
215 
216    if (devinfo->gen >= 8) {
217       gen8_emit_3dstate_sample_pattern(brw);
218 
219       BEGIN_BATCH(5);
220       OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
221       OUT_BATCH(0);
222       OUT_BATCH(0);
223       OUT_BATCH(0);
224       OUT_BATCH(0);
225       ADVANCE_BATCH();
226 
227       BEGIN_BATCH(2);
228       OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
229       OUT_BATCH(0);
230       ADVANCE_BATCH();
231    }
232 
233    /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
234     * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
235     *
236     * This is only safe on kernels with context isolation support.
237     */
238    if (!compiler->constant_buffer_0_is_relative) {
239       if (devinfo->gen >= 9) {
240          BEGIN_BATCH(3);
241          OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
242          OUT_BATCH(CS_DEBUG_MODE2);
243          OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
244                    CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
245          ADVANCE_BATCH();
246       } else if (devinfo->gen == 8) {
247          BEGIN_BATCH(3);
248          OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
249          OUT_BATCH(INSTPM);
250          OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
251                    INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
252          ADVANCE_BATCH();
253       }
254    }
255 
256    brw->object_preemption = false;
257 
258    if (devinfo->gen >= 10)
259       brw_enable_obj_preemption(brw, true);
260 
261    if (devinfo->gen == 11)
262       brw_upload_gen11_slice_hashing_state(brw);
263 }
264 
265 static inline const struct brw_tracked_state *
brw_get_pipeline_atoms(struct brw_context * brw,enum brw_pipeline pipeline)266 brw_get_pipeline_atoms(struct brw_context *brw,
267                        enum brw_pipeline pipeline)
268 {
269    switch (pipeline) {
270    case BRW_RENDER_PIPELINE:
271       return brw->render_atoms;
272    case BRW_COMPUTE_PIPELINE:
273       return brw->compute_atoms;
274    default:
275       STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
276       unreachable("Unsupported pipeline");
277       return NULL;
278    }
279 }
280 
281 void
brw_copy_pipeline_atoms(struct brw_context * brw,enum brw_pipeline pipeline,const struct brw_tracked_state ** atoms,int num_atoms)282 brw_copy_pipeline_atoms(struct brw_context *brw,
283                         enum brw_pipeline pipeline,
284                         const struct brw_tracked_state **atoms,
285                         int num_atoms)
286 {
287    /* This is to work around brw_context::atoms being declared const.  We want
288     * it to be const, but it needs to be initialized somehow!
289     */
290    struct brw_tracked_state *context_atoms =
291       (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
292 
293    for (int i = 0; i < num_atoms; i++) {
294       context_atoms[i] = *atoms[i];
295       assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
296       assert(context_atoms[i].emit);
297    }
298 
299    brw->num_atoms[pipeline] = num_atoms;
300 }
301 
brw_init_state(struct brw_context * brw)302 void brw_init_state( struct brw_context *brw )
303 {
304    struct gl_context *ctx = &brw->ctx;
305    const struct gen_device_info *devinfo = &brw->screen->devinfo;
306 
307    /* Force the first brw_select_pipeline to emit pipeline select */
308    brw->last_pipeline = BRW_NUM_PIPELINES;
309 
310    brw_init_caches(brw);
311 
312    if (devinfo->gen >= 11)
313       gen11_init_atoms(brw);
314    else if (devinfo->gen >= 10)
315       unreachable("Gen10 support dropped.");
316    else if (devinfo->gen >= 9)
317       gen9_init_atoms(brw);
318    else if (devinfo->gen >= 8)
319       gen8_init_atoms(brw);
320    else if (devinfo->is_haswell)
321       gen75_init_atoms(brw);
322    else if (devinfo->gen >= 7)
323       gen7_init_atoms(brw);
324    else if (devinfo->gen >= 6)
325       gen6_init_atoms(brw);
326    else if (devinfo->gen >= 5)
327       gen5_init_atoms(brw);
328    else if (devinfo->is_g4x)
329       gen45_init_atoms(brw);
330    else
331       gen4_init_atoms(brw);
332 
333    brw_upload_initial_gpu_state(brw);
334 
335    brw->NewGLState = ~0;
336    brw->ctx.NewDriverState = ~0ull;
337 
338    /* ~0 is a nonsensical value which won't match anything we program, so
339     * the programming will take effect on the first time around.
340     */
341    brw->pma_stall_bits = ~0;
342 
343    /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
344     * dirty flags.
345     */
346    STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
347 
348    ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
349    ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
350    ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
351    ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
352    ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
353    ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
354    ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
355    ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
356    ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
357    ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
358 }
359 
360 
brw_destroy_state(struct brw_context * brw)361 void brw_destroy_state( struct brw_context *brw )
362 {
363    brw_destroy_caches(brw);
364 }
365 
366 /***********************************************************************
367  */
368 
369 static bool
check_state(const struct brw_state_flags * a,const struct brw_state_flags * b)370 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
371 {
372    return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
373 }
374 
accumulate_state(struct brw_state_flags * a,const struct brw_state_flags * b)375 static void accumulate_state( struct brw_state_flags *a,
376 			      const struct brw_state_flags *b )
377 {
378    a->mesa |= b->mesa;
379    a->brw |= b->brw;
380 }
381 
382 
xor_states(struct brw_state_flags * result,const struct brw_state_flags * a,const struct brw_state_flags * b)383 static void xor_states( struct brw_state_flags *result,
384 			     const struct brw_state_flags *a,
385 			      const struct brw_state_flags *b )
386 {
387    result->mesa = a->mesa ^ b->mesa;
388    result->brw = a->brw ^ b->brw;
389 }
390 
391 struct dirty_bit_map {
392    uint64_t bit;
393    char *name;
394    uint32_t count;
395 };
396 
397 #define DEFINE_BIT(name) {name, #name, 0}
398 
399 static struct dirty_bit_map mesa_bits[] = {
400    DEFINE_BIT(_NEW_MODELVIEW),
401    DEFINE_BIT(_NEW_PROJECTION),
402    DEFINE_BIT(_NEW_TEXTURE_MATRIX),
403    DEFINE_BIT(_NEW_COLOR),
404    DEFINE_BIT(_NEW_DEPTH),
405    DEFINE_BIT(_NEW_FOG),
406    DEFINE_BIT(_NEW_HINT),
407    DEFINE_BIT(_NEW_LIGHT),
408    DEFINE_BIT(_NEW_LINE),
409    DEFINE_BIT(_NEW_PIXEL),
410    DEFINE_BIT(_NEW_POINT),
411    DEFINE_BIT(_NEW_POLYGON),
412    DEFINE_BIT(_NEW_POLYGONSTIPPLE),
413    DEFINE_BIT(_NEW_SCISSOR),
414    DEFINE_BIT(_NEW_STENCIL),
415    DEFINE_BIT(_NEW_TEXTURE_OBJECT),
416    DEFINE_BIT(_NEW_TRANSFORM),
417    DEFINE_BIT(_NEW_VIEWPORT),
418    DEFINE_BIT(_NEW_TEXTURE_STATE),
419    DEFINE_BIT(_NEW_RENDERMODE),
420    DEFINE_BIT(_NEW_BUFFERS),
421    DEFINE_BIT(_NEW_CURRENT_ATTRIB),
422    DEFINE_BIT(_NEW_MULTISAMPLE),
423    DEFINE_BIT(_NEW_TRACK_MATRIX),
424    DEFINE_BIT(_NEW_PROGRAM),
425    DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
426    DEFINE_BIT(_NEW_FRAG_CLAMP),
427    /* Avoid sign extension problems. */
428    {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
429    {0, 0, 0}
430 };
431 
432 static struct dirty_bit_map brw_bits[] = {
433    DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
434    DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
435    DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
436    DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
437    DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
438    DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
439    DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
440    DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
441    DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
442    DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
443    DEFINE_BIT(BRW_NEW_URB_FENCE),
444    DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
445    DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
446    DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
447    DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
448    DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
449    DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
450    DEFINE_BIT(BRW_NEW_PRIMITIVE),
451    DEFINE_BIT(BRW_NEW_CONTEXT),
452    DEFINE_BIT(BRW_NEW_PSP),
453    DEFINE_BIT(BRW_NEW_SURFACES),
454    DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
455    DEFINE_BIT(BRW_NEW_INDICES),
456    DEFINE_BIT(BRW_NEW_VERTICES),
457    DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
458    DEFINE_BIT(BRW_NEW_BATCH),
459    DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
460    DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
461    DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
462    DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
463    DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
464    DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
465    DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
466    DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
467    DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
468    DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
469    DEFINE_BIT(BRW_NEW_STATS_WM),
470    DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
471    DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
472    DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
473    DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
474    DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
475    DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
476    DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
477    DEFINE_BIT(BRW_NEW_CC_VP),
478    DEFINE_BIT(BRW_NEW_SF_VP),
479    DEFINE_BIT(BRW_NEW_CLIP_VP),
480    DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
481    DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
482    DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
483    DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
484    DEFINE_BIT(BRW_NEW_URB_SIZE),
485    DEFINE_BIT(BRW_NEW_CC_STATE),
486    DEFINE_BIT(BRW_NEW_BLORP),
487    DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
488    DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
489    DEFINE_BIT(BRW_NEW_DRAW_CALL),
490    DEFINE_BIT(BRW_NEW_AUX_STATE),
491    {0, 0, 0}
492 };
493 
494 static void
brw_update_dirty_count(struct dirty_bit_map * bit_map,uint64_t bits)495 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
496 {
497    for (int i = 0; bit_map[i].bit != 0; i++) {
498       if (bit_map[i].bit & bits)
499 	 bit_map[i].count++;
500    }
501 }
502 
503 static void
brw_print_dirty_count(struct dirty_bit_map * bit_map)504 brw_print_dirty_count(struct dirty_bit_map *bit_map)
505 {
506    for (int i = 0; bit_map[i].bit != 0; i++) {
507       if (bit_map[i].count > 1) {
508          fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
509                  bit_map[i].bit, bit_map[i].count, bit_map[i].name);
510       }
511    }
512 }
513 
514 static inline void
brw_upload_tess_programs(struct brw_context * brw)515 brw_upload_tess_programs(struct brw_context *brw)
516 {
517    if (brw->programs[MESA_SHADER_TESS_EVAL]) {
518       brw_upload_tcs_prog(brw);
519       brw_upload_tes_prog(brw);
520    } else {
521       brw->tcs.base.prog_data = NULL;
522       brw->tes.base.prog_data = NULL;
523    }
524 }
525 
526 static inline void
brw_upload_programs(struct brw_context * brw,enum brw_pipeline pipeline)527 brw_upload_programs(struct brw_context *brw,
528                     enum brw_pipeline pipeline)
529 {
530    struct gl_context *ctx = &brw->ctx;
531    const struct gen_device_info *devinfo = &brw->screen->devinfo;
532 
533    if (pipeline == BRW_RENDER_PIPELINE) {
534       brw_upload_vs_prog(brw);
535       brw_upload_tess_programs(brw);
536 
537       if (brw->programs[MESA_SHADER_GEOMETRY]) {
538          brw_upload_gs_prog(brw);
539       } else {
540          brw->gs.base.prog_data = NULL;
541          if (devinfo->gen < 7)
542             brw_upload_ff_gs_prog(brw);
543       }
544 
545       /* Update the VUE map for data exiting the GS stage of the pipeline.
546        * This comes from the last enabled shader stage.
547        */
548       GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
549       bool old_separate = brw->vue_map_geom_out.separate;
550       struct brw_vue_prog_data *vue_prog_data;
551       if (brw->programs[MESA_SHADER_GEOMETRY])
552          vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
553       else if (brw->programs[MESA_SHADER_TESS_EVAL])
554          vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
555       else
556          vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
557 
558       brw->vue_map_geom_out = vue_prog_data->vue_map;
559 
560       /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
561       if (old_slots != brw->vue_map_geom_out.slots_valid ||
562           old_separate != brw->vue_map_geom_out.separate)
563          brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
564 
565       if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
566           VARYING_BIT_VIEWPORT) {
567          ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
568          brw->clip.viewport_count =
569             (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
570             ctx->Const.MaxViewports : 1;
571       }
572 
573       brw_upload_wm_prog(brw);
574 
575       if (devinfo->gen < 6) {
576          brw_upload_clip_prog(brw);
577          brw_upload_sf_prog(brw);
578       }
579 
580       brw_disk_cache_write_render_programs(brw);
581    } else if (pipeline == BRW_COMPUTE_PIPELINE) {
582       brw_upload_cs_prog(brw);
583       brw_disk_cache_write_compute_program(brw);
584    }
585 }
586 
587 static inline void
merge_ctx_state(struct brw_context * brw,struct brw_state_flags * state)588 merge_ctx_state(struct brw_context *brw,
589                 struct brw_state_flags *state)
590 {
591    state->mesa |= brw->NewGLState;
592    state->brw |= brw->ctx.NewDriverState;
593 }
594 
595 static ALWAYS_INLINE void
check_and_emit_atom(struct brw_context * brw,struct brw_state_flags * state,const struct brw_tracked_state * atom)596 check_and_emit_atom(struct brw_context *brw,
597                     struct brw_state_flags *state,
598                     const struct brw_tracked_state *atom)
599 {
600    if (check_state(state, &atom->dirty)) {
601       atom->emit(brw);
602       merge_ctx_state(brw, state);
603    }
604 }
605 
606 static inline void
brw_upload_pipeline_state(struct brw_context * brw,enum brw_pipeline pipeline)607 brw_upload_pipeline_state(struct brw_context *brw,
608                           enum brw_pipeline pipeline)
609 {
610    const struct gen_device_info *devinfo = &brw->screen->devinfo;
611    struct gl_context *ctx = &brw->ctx;
612    int i;
613    static int dirty_count = 0;
614    struct brw_state_flags state = brw->state.pipelines[pipeline];
615    const unsigned fb_samples =
616       MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
617 
618    brw_select_pipeline(brw, pipeline);
619 
620    if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
621       brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
622 
623    if (INTEL_DEBUG & DEBUG_REEMIT) {
624       /* Always re-emit all state. */
625       brw->NewGLState = ~0;
626       ctx->NewDriverState = ~0ull;
627    }
628 
629    if (pipeline == BRW_RENDER_PIPELINE) {
630       if (brw->programs[MESA_SHADER_FRAGMENT] !=
631           ctx->FragmentProgram._Current) {
632          brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
633          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
634       }
635 
636       if (brw->programs[MESA_SHADER_TESS_EVAL] !=
637           ctx->TessEvalProgram._Current) {
638          brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
639          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
640       }
641 
642       if (brw->programs[MESA_SHADER_TESS_CTRL] !=
643           ctx->TessCtrlProgram._Current) {
644          brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
645          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
646       }
647 
648       if (brw->programs[MESA_SHADER_GEOMETRY] !=
649           ctx->GeometryProgram._Current) {
650          brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
651          brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
652       }
653 
654       if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
655          brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
656          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
657       }
658    }
659 
660    if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
661       brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
662       brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
663    }
664 
665    if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
666       brw->meta_in_progress = _mesa_meta_in_progress(ctx);
667       brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
668    }
669 
670    if (brw->num_samples != fb_samples) {
671       brw->num_samples = fb_samples;
672       brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
673    }
674 
675    /* Exit early if no state is flagged as dirty */
676    merge_ctx_state(brw, &state);
677    if ((state.mesa | state.brw) == 0)
678       return;
679 
680    /* Emit Sandybridge workaround flushes on every primitive, for safety. */
681    if (devinfo->gen == 6)
682       brw_emit_post_sync_nonzero_flush(brw);
683 
684    brw_upload_programs(brw, pipeline);
685    merge_ctx_state(brw, &state);
686 
687    brw_upload_state_base_address(brw);
688 
689    const struct brw_tracked_state *atoms =
690       brw_get_pipeline_atoms(brw, pipeline);
691    const int num_atoms = brw->num_atoms[pipeline];
692 
693    if (INTEL_DEBUG) {
694       /* Debug version which enforces various sanity checks on the
695        * state flags which are generated and checked to help ensure
696        * state atoms are ordered correctly in the list.
697        */
698       struct brw_state_flags examined, prev;
699       memset(&examined, 0, sizeof(examined));
700       prev = state;
701 
702       for (i = 0; i < num_atoms; i++) {
703 	 const struct brw_tracked_state *atom = &atoms[i];
704 	 struct brw_state_flags generated;
705 
706          check_and_emit_atom(brw, &state, atom);
707 
708 	 accumulate_state(&examined, &atom->dirty);
709 
710 	 /* generated = (prev ^ state)
711 	  * if (examined & generated)
712 	  *     fail;
713 	  */
714 	 xor_states(&generated, &prev, &state);
715 	 assert(!check_state(&examined, &generated));
716 	 prev = state;
717       }
718    }
719    else {
720       for (i = 0; i < num_atoms; i++) {
721 	 const struct brw_tracked_state *atom = &atoms[i];
722 
723          check_and_emit_atom(brw, &state, atom);
724       }
725    }
726 
727    if (INTEL_DEBUG & DEBUG_STATE) {
728       STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
729 
730       brw_update_dirty_count(mesa_bits, state.mesa);
731       brw_update_dirty_count(brw_bits, state.brw);
732       if (dirty_count++ % 1000 == 0) {
733 	 brw_print_dirty_count(mesa_bits);
734 	 brw_print_dirty_count(brw_bits);
735 	 fprintf(stderr, "\n");
736       }
737    }
738 }
739 
740 /***********************************************************************
741  * Emit all state:
742  */
brw_upload_render_state(struct brw_context * brw)743 void brw_upload_render_state(struct brw_context *brw)
744 {
745    brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
746 }
747 
748 static inline void
brw_pipeline_state_finished(struct brw_context * brw,enum brw_pipeline pipeline)749 brw_pipeline_state_finished(struct brw_context *brw,
750                             enum brw_pipeline pipeline)
751 {
752    /* Save all dirty state into the other pipelines */
753    for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
754       if (i != pipeline) {
755          brw->state.pipelines[i].mesa |= brw->NewGLState;
756          brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
757       } else {
758          memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
759       }
760    }
761 
762    brw->NewGLState = 0;
763    brw->ctx.NewDriverState = 0ull;
764 }
765 
766 /**
767  * Clear dirty bits to account for the fact that the state emitted by
768  * brw_upload_render_state() has been committed to the hardware. This is a
769  * separate call from brw_upload_render_state() because it's possible that
770  * after the call to brw_upload_render_state(), we will discover that we've
771  * run out of aperture space, and need to rewind the batch buffer to the state
772  * it had before the brw_upload_render_state() call.
773  */
774 void
brw_render_state_finished(struct brw_context * brw)775 brw_render_state_finished(struct brw_context *brw)
776 {
777    brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
778 }
779 
780 void
brw_upload_compute_state(struct brw_context * brw)781 brw_upload_compute_state(struct brw_context *brw)
782 {
783    brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
784 }
785 
786 void
brw_compute_state_finished(struct brw_context * brw)787 brw_compute_state_finished(struct brw_context *brw)
788 {
789    brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
790 }
791