1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_context.h"
25 #include "brw_defines.h"
26 #include "intel_batchbuffer.h"
27 #include "intel_fbo.h"
28 
29 /**
30  * According to the latest documentation, any PIPE_CONTROL with the
31  * "Command Streamer Stall" bit set must also have another bit set,
32  * with five different options:
33  *
34  *  - Render Target Cache Flush
35  *  - Depth Cache Flush
36  *  - Stall at Pixel Scoreboard
37  *  - Post-Sync Operation
38  *  - Depth Stall
39  *  - DC Flush Enable
40  *
41  * I chose "Stall at Pixel Scoreboard" since we've used it effectively
42  * in the past, but the choice is fairly arbitrary.
43  */
44 static void
gen8_add_cs_stall_workaround_bits(uint32_t * flags)45 gen8_add_cs_stall_workaround_bits(uint32_t *flags)
46 {
47    uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
48                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
49                       PIPE_CONTROL_WRITE_IMMEDIATE |
50                       PIPE_CONTROL_WRITE_DEPTH_COUNT |
51                       PIPE_CONTROL_WRITE_TIMESTAMP |
52                       PIPE_CONTROL_STALL_AT_SCOREBOARD |
53                       PIPE_CONTROL_DEPTH_STALL |
54                       PIPE_CONTROL_DATA_CACHE_FLUSH;
55 
56    /* If we're doing a CS stall, and don't already have one of the
57     * workaround bits set, add "Stall at Pixel Scoreboard."
58     */
59    if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
60       *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
61 }
62 
63 /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
64  *
65  * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
66  *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
67  *
68  * Note that the kernel does CS stalls between batches, so we only need
69  * to count them within a batch.
70  */
71 static uint32_t
gen7_cs_stall_every_four_pipe_controls(struct brw_context * brw,uint32_t flags)72 gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
73 {
74    const struct gen_device_info *devinfo = &brw->screen->devinfo;
75 
76    if (devinfo->gen == 7 && !devinfo->is_haswell) {
77       if (flags & PIPE_CONTROL_CS_STALL) {
78          /* If we're doing a CS stall, reset the counter and carry on. */
79          brw->pipe_controls_since_last_cs_stall = 0;
80          return 0;
81       }
82 
83       /* If this is the fourth pipe control without a CS stall, do one now. */
84       if (++brw->pipe_controls_since_last_cs_stall == 4) {
85          brw->pipe_controls_since_last_cs_stall = 0;
86          return PIPE_CONTROL_CS_STALL;
87       }
88    }
89    return 0;
90 }
91 
92 /* #1130 from gen10 workarounds page in h/w specs:
93  * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
94  *  not enabled in same PIPE CONTROL and Enable Pixel score board stall if
95  *  Render target cache flush is enabled."
96  *
97  * Applicable to CNL B0 and C0 steppings only.
98  */
99 static void
gen10_add_rcpfe_workaround_bits(uint32_t * flags)100 gen10_add_rcpfe_workaround_bits(uint32_t *flags)
101 {
102    if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
103       *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD;
104    } else if (*flags &
105              (PIPE_CONTROL_WRITE_IMMEDIATE |
106               PIPE_CONTROL_WRITE_DEPTH_COUNT |
107               PIPE_CONTROL_WRITE_TIMESTAMP)) {
108       *flags = *flags | PIPE_CONTROL_DEPTH_STALL;
109    }
110 }
111 
112 static void
brw_emit_pipe_control(struct brw_context * brw,uint32_t flags,struct brw_bo * bo,uint32_t offset,uint64_t imm)113 brw_emit_pipe_control(struct brw_context *brw, uint32_t flags,
114                       struct brw_bo *bo, uint32_t offset, uint64_t imm)
115 {
116    const struct gen_device_info *devinfo = &brw->screen->devinfo;
117 
118    if (devinfo->gen >= 8) {
119       if (devinfo->gen == 8)
120          gen8_add_cs_stall_workaround_bits(&flags);
121 
122       if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
123          if (devinfo->gen == 9) {
124             /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
125              * lists several workarounds:
126              *
127              *    "Project: SKL, KBL, BXT
128              *
129              *     If the VF Cache Invalidation Enable is set to a 1 in a
130              *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
131              *     sets to 0, with the VF Cache Invalidation Enable set to 0
132              *     needs to be sent prior to the PIPE_CONTROL with VF Cache
133              *     Invalidation Enable set to a 1."
134              */
135             brw_emit_pipe_control_flush(brw, 0);
136          }
137 
138          if (devinfo->gen >= 9) {
139             /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
140              *
141              *    "Project: BDW+
142              *
143              *     When VF Cache Invalidate is set “Post Sync Operation” must
144              *     be enabled to “Write Immediate Data” or “Write PS Depth
145              *     Count” or “Write Timestamp”."
146              *
147              * If there's a BO, we're already doing some kind of write.
148              * If not, add a write to the workaround BO.
149              *
150              * XXX: This causes GPU hangs on Broadwell, so restrict it to
151              *      Gen9+ for now...see this bug for more information:
152              *      https://bugs.freedesktop.org/show_bug.cgi?id=103787
153              */
154             if (!bo) {
155                flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
156                bo = brw->workaround_bo;
157             }
158          }
159       }
160 
161       if (devinfo->gen == 10)
162          gen10_add_rcpfe_workaround_bits(&flags);
163 
164       BEGIN_BATCH(6);
165       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
166       OUT_BATCH(flags);
167       if (bo) {
168          OUT_RELOC64(bo, RELOC_WRITE, offset);
169       } else {
170          OUT_BATCH(0);
171          OUT_BATCH(0);
172       }
173       OUT_BATCH(imm);
174       OUT_BATCH(imm >> 32);
175       ADVANCE_BATCH();
176    } else if (devinfo->gen >= 6) {
177       if (devinfo->gen == 6 &&
178           (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
179          /* Hardware workaround: SNB B-Spec says:
180           *
181           *   [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
182           *   Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
183           *   required.
184           */
185          brw_emit_post_sync_nonzero_flush(brw);
186       }
187 
188       flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
189 
190       /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
191        * on later platforms.  We always use PPGTT on Gen7+.
192        */
193       unsigned gen6_gtt = devinfo->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
194 
195       BEGIN_BATCH(5);
196       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
197       OUT_BATCH(flags);
198       if (bo) {
199          OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, gen6_gtt | offset);
200       } else {
201          OUT_BATCH(0);
202       }
203       OUT_BATCH(imm);
204       OUT_BATCH(imm >> 32);
205       ADVANCE_BATCH();
206    } else {
207       BEGIN_BATCH(4);
208       OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
209       if (bo) {
210          OUT_RELOC(bo, RELOC_WRITE, PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
211       } else {
212          OUT_BATCH(0);
213       }
214       OUT_BATCH(imm);
215       OUT_BATCH(imm >> 32);
216       ADVANCE_BATCH();
217    }
218 }
219 
220 /**
221  * Emit a PIPE_CONTROL with various flushing flags.
222  *
223  * The caller is responsible for deciding what flags are appropriate for the
224  * given generation.
225  */
226 void
brw_emit_pipe_control_flush(struct brw_context * brw,uint32_t flags)227 brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
228 {
229    const struct gen_device_info *devinfo = &brw->screen->devinfo;
230 
231    if (devinfo->gen >= 6 &&
232        (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
233        (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
234       /* A pipe control command with flush and invalidate bits set
235        * simultaneously is an inherently racy operation on Gen6+ if the
236        * contents of the flushed caches were intended to become visible from
237        * any of the invalidated caches.  Split it in two PIPE_CONTROLs, the
238        * first one should stall the pipeline to make sure that the flushed R/W
239        * caches are coherent with memory once the specified R/O caches are
240        * invalidated.  On pre-Gen6 hardware the (implicit) R/O cache
241        * invalidation seems to happen at the bottom of the pipeline together
242        * with any write cache flush, so this shouldn't be a concern.  In order
243        * to ensure a full stall, we do an end-of-pipe sync.
244        */
245       brw_emit_end_of_pipe_sync(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS));
246       flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
247    }
248 
249    brw_emit_pipe_control(brw, flags, NULL, 0, 0);
250 }
251 
252 /**
253  * Emit a PIPE_CONTROL that writes to a buffer object.
254  *
255  * \p flags should contain one of the following items:
256  *  - PIPE_CONTROL_WRITE_IMMEDIATE
257  *  - PIPE_CONTROL_WRITE_TIMESTAMP
258  *  - PIPE_CONTROL_WRITE_DEPTH_COUNT
259  */
260 void
brw_emit_pipe_control_write(struct brw_context * brw,uint32_t flags,struct brw_bo * bo,uint32_t offset,uint64_t imm)261 brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
262                             struct brw_bo *bo, uint32_t offset,
263                             uint64_t imm)
264 {
265    brw_emit_pipe_control(brw, flags, bo, offset, imm);
266 }
267 
268 /**
269  * Restriction [DevSNB, DevIVB]:
270  *
271  * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
272  * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
273  * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
274  * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
275  * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
276  * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
277  * unless SW can otherwise guarantee that the pipeline from WM onwards is
278  * already flushed (e.g., via a preceding MI_FLUSH).
279  */
280 void
brw_emit_depth_stall_flushes(struct brw_context * brw)281 brw_emit_depth_stall_flushes(struct brw_context *brw)
282 {
283    const struct gen_device_info *devinfo = &brw->screen->devinfo;
284 
285    assert(devinfo->gen >= 6);
286 
287    /* Starting on BDW, these pipe controls are unnecessary.
288     *
289     *   WM HW will internally manage the draining pipe and flushing of the caches
290     *   when this command is issued. The PIPE_CONTROL restrictions are removed.
291     */
292    if (devinfo->gen >= 8)
293       return;
294 
295    brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
296    brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
297    brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
298 }
299 
300 /**
301  * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
302  * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
303  *  stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
304  *  3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
305  *  3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL needs
306  *  to be sent before any combination of VS associated 3DSTATE."
307  */
308 void
gen7_emit_vs_workaround_flush(struct brw_context * brw)309 gen7_emit_vs_workaround_flush(struct brw_context *brw)
310 {
311    const struct gen_device_info *devinfo = &brw->screen->devinfo;
312 
313    assert(devinfo->gen == 7);
314    brw_emit_pipe_control_write(brw,
315                                PIPE_CONTROL_WRITE_IMMEDIATE
316                                | PIPE_CONTROL_DEPTH_STALL,
317                                brw->workaround_bo, 0, 0);
318 }
319 
320 /**
321  * From the PRM, Volume 2a:
322  *
323  *    "Indirect State Pointers Disable
324  *
325  *    At the completion of the post-sync operation associated with this pipe
326  *    control packet, the indirect state pointers in the hardware are
327  *    considered invalid; the indirect pointers are not saved in the context.
328  *    If any new indirect state commands are executed in the command stream
329  *    while the pipe control is pending, the new indirect state commands are
330  *    preserved.
331  *
332  *    [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context
333  *    restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant
334  *    commands are only considered as Indirect State Pointers. Once ISP is
335  *    issued in a context, SW must initialize by programming push constant
336  *    commands for all the shaders (at least to zero length) before attempting
337  *    any rendering operation for the same context."
338  *
339  * 3DSTATE_CONSTANT_* packets are restored during a context restore,
340  * even though they point to a BO that has been already unreferenced at
341  * the end of the previous batch buffer. This has been fine so far since
342  * we are protected by these scratch page (every address not covered by
343  * a BO should be pointing to the scratch page). But on CNL, it is
344  * causing a GPU hang during context restore at the 3DSTATE_CONSTANT_*
345  * instruction.
346  *
347  * The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the
348  * hardware to ignore previous 3DSTATE_CONSTANT_* packets during a
349  * context restore, so the mentioned hang doesn't happen. However,
350  * software must program push constant commands for all stages prior to
351  * rendering anything, so we flag them as dirty.
352  */
353 void
gen10_emit_isp_disable(struct brw_context * brw)354 gen10_emit_isp_disable(struct brw_context *brw)
355 {
356    brw_emit_pipe_control(brw,
357                          PIPE_CONTROL_ISP_DIS |
358                          PIPE_CONTROL_CS_STALL,
359                          NULL, 0, 0);
360 
361    brw->vs.base.push_constants_dirty = true;
362    brw->tcs.base.push_constants_dirty = true;
363    brw->tes.base.push_constants_dirty = true;
364    brw->gs.base.push_constants_dirty = true;
365    brw->wm.base.push_constants_dirty = true;
366 }
367 
368 /**
369  * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
370  */
371 void
gen7_emit_cs_stall_flush(struct brw_context * brw)372 gen7_emit_cs_stall_flush(struct brw_context *brw)
373 {
374    brw_emit_pipe_control_write(brw,
375                                PIPE_CONTROL_CS_STALL
376                                | PIPE_CONTROL_WRITE_IMMEDIATE,
377                                brw->workaround_bo, 0, 0);
378 }
379 
380 /**
381  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
382  * implementing two workarounds on gen6.  From section 1.4.7.1
383  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
384  *
385  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
386  * produced by non-pipelined state commands), software needs to first
387  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
388  * 0.
389  *
390  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
391  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
392  *
393  * And the workaround for these two requires this workaround first:
394  *
395  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
396  * BEFORE the pipe-control with a post-sync op and no write-cache
397  * flushes.
398  *
399  * And this last workaround is tricky because of the requirements on
400  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
401  * volume 2 part 1:
402  *
403  *     "1 of the following must also be set:
404  *      - Render Target Cache Flush Enable ([12] of DW1)
405  *      - Depth Cache Flush Enable ([0] of DW1)
406  *      - Stall at Pixel Scoreboard ([1] of DW1)
407  *      - Depth Stall ([13] of DW1)
408  *      - Post-Sync Operation ([13] of DW1)
409  *      - Notify Enable ([8] of DW1)"
410  *
411  * The cache flushes require the workaround flush that triggered this
412  * one, so we can't use it.  Depth stall would trigger the same.
413  * Post-sync nonzero is what triggered this second workaround, so we
414  * can't use that one either.  Notify enable is IRQs, which aren't
415  * really our business.  That leaves only stall at scoreboard.
416  */
417 void
brw_emit_post_sync_nonzero_flush(struct brw_context * brw)418 brw_emit_post_sync_nonzero_flush(struct brw_context *brw)
419 {
420    brw_emit_pipe_control_flush(brw,
421                                PIPE_CONTROL_CS_STALL |
422                                PIPE_CONTROL_STALL_AT_SCOREBOARD);
423 
424    brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
425                                brw->workaround_bo, 0, 0);
426 }
427 
428 /*
429  * From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
430  *
431  *  Write synchronization is a special case of end-of-pipe
432  *  synchronization that requires that the render cache and/or depth
433  *  related caches are flushed to memory, where the data will become
434  *  globally visible. This type of synchronization is required prior to
435  *  SW (CPU) actually reading the result data from memory, or initiating
436  *  an operation that will use as a read surface (such as a texture
437  *  surface) a previous render target and/or depth/stencil buffer
438  *
439  *
440  * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
441  *
442  *  Exercising the write cache flush bits (Render Target Cache Flush
443  *  Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
444  *  ensures the write caches are flushed and doesn't guarantee the data
445  *  is globally visible.
446  *
447  *  SW can track the completion of the end-of-pipe-synchronization by
448  *  using "Notify Enable" and "PostSync Operation - Write Immediate
449  *  Data" in the PIPE_CONTROL command.
450  */
451 void
brw_emit_end_of_pipe_sync(struct brw_context * brw,uint32_t flags)452 brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags)
453 {
454    const struct gen_device_info *devinfo = &brw->screen->devinfo;
455 
456    if (devinfo->gen >= 6) {
457       /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
458        *
459        *    "The most common action to perform upon reaching a synchronization
460        *    point is to write a value out to memory. An immediate value
461        *    (included with the synchronization command) may be written."
462        *
463        *
464        * From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
465        *
466        *    "In case the data flushed out by the render engine is to be read
467        *    back in to the render engine in coherent manner, then the render
468        *    engine has to wait for the fence completion before accessing the
469        *    flushed data. This can be achieved by following means on various
470        *    products: PIPE_CONTROL command with CS Stall and the required
471        *    write caches flushed with Post-Sync-Operation as Write Immediate
472        *    Data.
473        *
474        *    Example:
475        *       - Workload-1 (3D/GPGPU/MEDIA)
476        *       - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate
477        *         Data, Required Write Cache Flush bits set)
478        *       - Workload-2 (Can use the data produce or output by Workload-1)
479        */
480       brw_emit_pipe_control_write(brw,
481                                   flags | PIPE_CONTROL_CS_STALL |
482                                   PIPE_CONTROL_WRITE_IMMEDIATE,
483                                   brw->workaround_bo, 0, 0);
484 
485       if (devinfo->is_haswell) {
486          /* Haswell needs addition work-arounds:
487           *
488           * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
489           *
490           *    Option 1:
491           *    PIPE_CONTROL command with the CS Stall and the required write
492           *    caches flushed with Post-SyncOperation as Write Immediate Data
493           *    followed by eight dummy MI_STORE_DATA_IMM (write to scratch
494           *    spce) commands.
495           *
496           *    Example:
497           *       - Workload-1
498           *       - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write
499           *         Immediate Data, Required Write Cache Flush bits set)
500           *       - MI_STORE_DATA_IMM (8 times) (Dummy data, Scratch Address)
501           *       - Workload-2 (Can use the data produce or output by
502           *         Workload-1)
503           *
504           * Unfortunately, both the PRMs and the internal docs are a bit
505           * out-of-date in this regard.  What the windows driver does (and
506           * this appears to actually work) is to emit a register read from the
507           * memory address written by the pipe control above.
508           *
509           * What register we load into doesn't matter.  We choose an indirect
510           * rendering register because we know it always exists and it's one
511           * of the first registers the command parser allows us to write.  If
512           * you don't have command parser support in your kernel (pre-4.2),
513           * this will get turned into MI_NOOP and you won't get the
514           * workaround.  Unfortunately, there's just not much we can do in
515           * that case.  This register is perfectly safe to write since we
516           * always re-load all of the indirect draw registers right before
517           * 3DPRIMITIVE when needed anyway.
518           */
519          brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE,
520                                brw->workaround_bo, 0);
521       }
522    } else {
523       /* On gen4-5, a regular pipe control seems to suffice. */
524       brw_emit_pipe_control_flush(brw, flags);
525    }
526 }
527 
528 /* Emit a pipelined flush to either flush render and texture cache for
529  * reading from a FBO-drawn texture, or flush so that frontbuffer
530  * render appears on the screen in DRI1.
531  *
532  * This is also used for the always_flush_cache driconf debug option.
533  */
534 void
brw_emit_mi_flush(struct brw_context * brw)535 brw_emit_mi_flush(struct brw_context *brw)
536 {
537    const struct gen_device_info *devinfo = &brw->screen->devinfo;
538 
539    if (brw->batch.ring == BLT_RING && devinfo->gen >= 6) {
540       const unsigned n_dwords = devinfo->gen >= 8 ? 5 : 4;
541       BEGIN_BATCH_BLT(n_dwords);
542       OUT_BATCH(MI_FLUSH_DW | (n_dwords - 2));
543       OUT_BATCH(0);
544       OUT_BATCH(0);
545       OUT_BATCH(0);
546       if (n_dwords == 5)
547          OUT_BATCH(0);
548       ADVANCE_BATCH();
549    } else {
550       int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
551       if (devinfo->gen >= 6) {
552          flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
553                   PIPE_CONTROL_CONST_CACHE_INVALIDATE |
554                   PIPE_CONTROL_DATA_CACHE_FLUSH |
555                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
556                   PIPE_CONTROL_VF_CACHE_INVALIDATE |
557                   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
558                   PIPE_CONTROL_CS_STALL;
559       }
560       brw_emit_pipe_control_flush(brw, flags);
561    }
562 }
563 
564 int
brw_init_pipe_control(struct brw_context * brw,const struct gen_device_info * devinfo)565 brw_init_pipe_control(struct brw_context *brw,
566                       const struct gen_device_info *devinfo)
567 {
568    if (devinfo->gen < 6)
569       return 0;
570 
571    /* We can't just use brw_state_batch to get a chunk of space for
572     * the gen6 workaround because it involves actually writing to
573     * the buffer, and the kernel doesn't let us write to the batch.
574     */
575    brw->workaround_bo = brw_bo_alloc(brw->bufmgr,
576                                      "pipe_control workaround",
577                                      4096, 4096);
578    if (brw->workaround_bo == NULL)
579       return -ENOMEM;
580 
581    brw->pipe_controls_since_last_cs_stall = 0;
582 
583    return 0;
584 }
585 
586 void
brw_fini_pipe_control(struct brw_context * brw)587 brw_fini_pipe_control(struct brw_context *brw)
588 {
589    brw_bo_unreference(brw->workaround_bo);
590 }
591