1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "genX_boilerplate.h"
25 #include "brw_defines.h"
26 #include "brw_state.h"
27 
28 static unsigned
flags_to_post_sync_op(uint32_t flags)29 flags_to_post_sync_op(uint32_t flags)
30 {
31    if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
32       return WriteImmediateData;
33 
34    if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
35       return WritePSDepthCount;
36 
37    if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
38       return WriteTimestamp;
39 
40    return 0;
41 }
42 
43 /**
44  * Do the given flags have a Post Sync or LRI Post Sync operation?
45  */
46 static enum pipe_control_flags
get_post_sync_flags(enum pipe_control_flags flags)47 get_post_sync_flags(enum pipe_control_flags flags)
48 {
49    flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
50             PIPE_CONTROL_WRITE_DEPTH_COUNT |
51             PIPE_CONTROL_WRITE_TIMESTAMP |
52             PIPE_CONTROL_LRI_POST_SYNC_OP;
53 
54    /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
55     * "LRI Post Sync Operation".  So more than one bit set would be illegal.
56     */
57    assert(util_bitcount(flags) <= 1);
58 
59    return flags;
60 }
61 
62 #define IS_COMPUTE_PIPELINE(brw) \
63    (GEN_GEN >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE)
64 
65 /* Closed interval - GEN_GEN \in [x, y] */
66 #define IS_GEN_BETWEEN(x, y) (GEN_GEN >= x && GEN_GEN <= y)
67 #define IS_GENx10_BETWEEN(x, y) \
68    (GEN_VERSIONx10 >= x && GEN_VERSIONx10 <= y)
69 
70 /**
71  * Emit a series of PIPE_CONTROL commands, taking into account any
72  * workarounds necessary to actually accomplish the caller's request.
73  *
74  * Unless otherwise noted, spec quotations in this function come from:
75  *
76  * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
77  * Restrictions for PIPE_CONTROL.
78  *
79  * You should not use this function directly.  Use the helpers in
80  * brw_pipe_control.c instead, which may split the pipe control further.
81  */
82 void
genX(emit_raw_pipe_control)83 genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
84                             struct brw_bo *bo, uint32_t offset, uint64_t imm)
85 {
86    UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
87    enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
88    enum pipe_control_flags non_lri_post_sync_flags =
89       post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
90 
91    /* Recursive PIPE_CONTROL workarounds --------------------------------
92     * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
93     *
94     * We do these first because we want to look at the original operation,
95     * rather than any workarounds we set.
96     */
97    if (GEN_GEN == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
98       /* Hardware workaround: SNB B-Spec says:
99        *
100        *    "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
101        *     Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
102        *     required."
103        */
104       brw_emit_post_sync_nonzero_flush(brw);
105    }
106 
107    if (GEN_GEN == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
108       /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
109        * lists several workarounds:
110        *
111        *    "Project: SKL, KBL, BXT
112        *
113        *     If the VF Cache Invalidation Enable is set to a 1 in a
114        *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
115        *     sets to 0, with the VF Cache Invalidation Enable set to 0
116        *     needs to be sent prior to the PIPE_CONTROL with VF Cache
117        *     Invalidation Enable set to a 1."
118        */
119       genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0);
120    }
121 
122    if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) {
123       /* Project: SKL / Argument: LRI Post Sync Operation [23]
124        *
125        * "PIPECONTROL command with “Command Streamer Stall Enable” must be
126        *  programmed prior to programming a PIPECONTROL command with "LRI
127        *  Post Sync Operation" in GPGPU mode of operation (i.e when
128        *  PIPELINE_SELECT command is set to GPGPU mode of operation)."
129        *
130        * The same text exists a few rows below for Post Sync Op.
131        */
132       genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, NULL, 0, 0);
133    }
134 
135    /* "Flush Types" workarounds ---------------------------------------------
136     * We do these now because they may add post-sync operations or CS stalls.
137     */
138 
139    if (IS_GEN_BETWEEN(8, 10) && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
140       /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
141        *
142        * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
143        *  'Write PS Depth Count' or 'Write Timestamp'."
144        */
145       if (!bo) {
146          flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
147          post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
148          non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
149          bo = brw->workaround_bo;
150          offset = brw->workaround_bo_offset;
151       }
152    }
153 
154    if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
155       /* Project: PRE-HSW / Argument: Depth Stall
156        *
157        * "The following bits must be clear:
158        *  - Render Target Cache Flush Enable ([12] of DW1)
159        *  - Depth Cache Flush Enable ([0] of DW1)"
160        */
161       assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
162                         PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
163    }
164 
165    if (GEN_GEN >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
166       /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
167        *
168        *    "This bit must be DISABLED for operations other than writing
169        *     PS_DEPTH_COUNT."
170        *
171        * This seems like nonsense.  An Ivybridge workaround requires us to
172        * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
173        * operation.  Gen8+ requires us to emit depth stalls and depth cache
174        * flushes together.  So, it's hard to imagine this means anything other
175        * than "we originally intended this to be used for PS_DEPTH_COUNT".
176        *
177        * We ignore the supposed restriction and do nothing.
178        */
179    }
180 
181    if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
182       /* Project: PRE-HSW / Argument: Depth Cache Flush
183        *
184        * "Depth Stall must be clear ([13] of DW1)."
185        */
186       assert(!(flags & PIPE_CONTROL_DEPTH_STALL));
187    }
188 
189    if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
190                 PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
191       /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
192        *
193        *    "This bit must be DISABLED for End-of-pipe (Read) fences,
194        *     PS_DEPTH_COUNT or TIMESTAMP queries."
195        *
196        * TODO: Implement end-of-pipe checking.
197        */
198       assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
199                                   PIPE_CONTROL_WRITE_TIMESTAMP)));
200    }
201 
202    if (GEN_GEN < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
203       /* From the PIPE_CONTROL instruction table, bit 1:
204        *
205        *    "This bit is ignored if Depth Stall Enable is set.
206        *     Further, the render cache is not flushed even if Write Cache
207        *     Flush Enable bit is set."
208        *
209        * We assert that the caller doesn't do this combination, to try and
210        * prevent mistakes.  It shouldn't hurt the GPU, though.
211        *
212        * We skip this check on Gen11+ as the "Stall and Pixel Scoreboard"
213        * and "Render Target Flush" combo is explicitly required for BTI
214        * update workarounds.
215        */
216       assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
217                         PIPE_CONTROL_RENDER_TARGET_FLUSH)));
218    }
219 
220    /* PIPE_CONTROL page workarounds ------------------------------------- */
221 
222    if (IS_GEN_BETWEEN(7, 8) && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
223       /* From the PIPE_CONTROL page itself:
224        *
225        *    "IVB, HSW, BDW
226        *     Restriction: Pipe_control with CS-stall bit set must be issued
227        *     before a pipe-control command that has the State Cache
228        *     Invalidate bit set."
229        */
230       flags |= PIPE_CONTROL_CS_STALL;
231    }
232 
233    if (GEN_IS_HASWELL) {
234       /* From the PIPE_CONTROL page itself:
235        *
236        *    "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
237        *     Prior to programming a PIPECONTROL command with any of the RO
238        *     cache invalidation bit set, program a PIPECONTROL flush command
239        *     with “CS stall” bit and “HDC Flush” bit set."
240        *
241        * TODO: Actually implement this.  What's an HDC Flush?
242        */
243    }
244 
245    if (flags & PIPE_CONTROL_FLUSH_LLC) {
246       /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
247        *
248        *    "Project: ALL
249        *     SW must always program Post-Sync Operation to "Write Immediate
250        *     Data" when Flush LLC is set."
251        *
252        * For now, we just require the caller to do it.
253        */
254       assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
255    }
256 
257    /* "Post-Sync Operation" workarounds -------------------------------- */
258 
259    /* Project: All / Argument: Global Snapshot Count Reset [19]
260     *
261     * "This bit must not be exercised on any product.
262     *  Requires stall bit ([20] of DW1) set."
263     *
264     * We don't use this, so we just assert that it isn't used.  The
265     * PIPE_CONTROL instruction page indicates that they intended this
266     * as a debug feature and don't think it is useful in production,
267     * but it may actually be usable, should we ever want to.
268     */
269    assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
270 
271    if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
272                 PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
273       /* Project: All / Arguments:
274        *
275        * - Generic Media State Clear [16]
276        * - Indirect State Pointers Disable [16]
277        *
278        *    "Requires stall bit ([20] of DW1) set."
279        *
280        * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
281        * State Clear) says:
282        *
283        *    "PIPECONTROL command with “Command Streamer Stall Enable” must be
284        *     programmed prior to programming a PIPECONTROL command with "Media
285        *     State Clear" set in GPGPU mode of operation"
286        *
287        * This is a subset of the earlier rule, so there's nothing to do.
288        */
289       flags |= PIPE_CONTROL_CS_STALL;
290    }
291 
292    if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
293       /* Project: All / Argument: Store Data Index
294        *
295        * "Post-Sync Operation ([15:14] of DW1) must be set to something other
296        *  than '0'."
297        *
298        * For now, we just assert that the caller does this.  We might want to
299        * automatically add a write to the workaround BO...
300        */
301       assert(non_lri_post_sync_flags != 0);
302    }
303 
304    if (flags & PIPE_CONTROL_SYNC_GFDT) {
305       /* Project: All / Argument: Sync GFDT
306        *
307        * "Post-Sync Operation ([15:14] of DW1) must be set to something other
308        *  than '0' or 0x2520[13] must be set."
309        *
310        * For now, we just assert that the caller does this.
311        */
312       assert(non_lri_post_sync_flags != 0);
313    }
314 
315    if (IS_GENx10_BETWEEN(60, 75) && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
316       /* Project: SNB, IVB, HSW / Argument: TLB inv
317        *
318        * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
319        *  must be set to something other than '0'."
320        *
321        * For now, we just assert that the caller does this.
322        */
323       assert(non_lri_post_sync_flags != 0);
324    }
325 
326    if (GEN_GEN >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
327       /* Project: IVB+ / Argument: TLB inv
328        *
329        *    "Requires stall bit ([20] of DW1) set."
330        *
331        * Also, from the PIPE_CONTROL instruction table:
332        *
333        *    "Project: SKL+
334        *     Post Sync Operation or CS stall must be set to ensure a TLB
335        *     invalidation occurs.  Otherwise no cycle will occur to the TLB
336        *     cache to invalidate."
337        *
338        * This is not a subset of the earlier rule, so there's nothing to do.
339        */
340       flags |= PIPE_CONTROL_CS_STALL;
341    }
342 
343    if (GEN_GEN == 9 && devinfo->gt == 4) {
344       /* TODO: The big Skylake GT4 post sync op workaround */
345    }
346 
347    /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
348 
349    if (IS_COMPUTE_PIPELINE(brw)) {
350       if (GEN_GEN >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
351          /* Project: SKL+ / Argument: Tex Invalidate
352           * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
353           */
354          flags |= PIPE_CONTROL_CS_STALL;
355       }
356 
357       if (GEN_GEN == 8 && (post_sync_flags ||
358                            (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
359                                      PIPE_CONTROL_DEPTH_STALL |
360                                      PIPE_CONTROL_RENDER_TARGET_FLUSH |
361                                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
362                                      PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
363          /* Project: BDW / Arguments:
364           *
365           * - LRI Post Sync Operation   [23]
366           * - Post Sync Op              [15:14]
367           * - Notify En                 [8]
368           * - Depth Stall               [13]
369           * - Render Target Cache Flush [12]
370           * - Depth Cache Flush         [0]
371           * - DC Flush Enable           [5]
372           *
373           *    "Requires stall bit ([20] of DW) set for all GPGPU and Media
374           *     Workloads."
375           *
376           * (The docs have separate table rows for each bit, with essentially
377           * the same workaround text.  We've combined them here.)
378           */
379          flags |= PIPE_CONTROL_CS_STALL;
380 
381          /* Also, from the PIPE_CONTROL instruction table, bit 20:
382           *
383           *    "Project: BDW
384           *     This bit must be always set when PIPE_CONTROL command is
385           *     programmed by GPGPU and MEDIA workloads, except for the cases
386           *     when only Read Only Cache Invalidation bits are set (State
387           *     Cache Invalidation Enable, Instruction cache Invalidation
388           *     Enable, Texture Cache Invalidation Enable, Constant Cache
389           *     Invalidation Enable). This is to WA FFDOP CG issue, this WA
390           *     need not implemented when FF_DOP_CG is disable via "Fixed
391           *     Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
392           *
393           * It sounds like we could avoid CS stalls in some cases, but we
394           * don't currently bother.  This list isn't exactly the list above,
395           * either...
396           */
397       }
398    }
399 
400    /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
401     *
402     * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
403     *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
404     *
405     * Note that the kernel does CS stalls between batches, so we only need
406     * to count them within a batch.  We currently naively count every 4, and
407     * don't skip the ones with only read-cache-invalidate bits set.  This
408     * may or may not be a problem...
409     */
410    if (GEN_GEN == 7 && !GEN_IS_HASWELL) {
411       if (flags & PIPE_CONTROL_CS_STALL) {
412          /* If we're doing a CS stall, reset the counter and carry on. */
413          brw->pipe_controls_since_last_cs_stall = 0;
414       }
415 
416       /* If this is the fourth pipe control without a CS stall, do one now. */
417       if (++brw->pipe_controls_since_last_cs_stall == 4) {
418          brw->pipe_controls_since_last_cs_stall = 0;
419          flags |= PIPE_CONTROL_CS_STALL;
420       }
421    }
422 
423    /* "Stall" workarounds ----------------------------------------------
424     * These have to come after the earlier ones because we may have added
425     * some additional CS stalls above.
426     */
427 
428    if (GEN_GEN < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
429       /* Project: PRE-SKL, VLV, CHV
430        *
431        * "[All Stepping][All SKUs]:
432        *
433        *  One of the following must also be set:
434        *
435        *  - Render Target Cache Flush Enable ([12] of DW1)
436        *  - Depth Cache Flush Enable ([0] of DW1)
437        *  - Stall at Pixel Scoreboard ([1] of DW1)
438        *  - Depth Stall ([13] of DW1)
439        *  - Post-Sync Operation ([13] of DW1)
440        *  - DC Flush Enable ([5] of DW1)"
441        *
442        * If we don't already have one of those bits set, we choose to add
443        * "Stall at Pixel Scoreboard".  Some of the other bits require a
444        * CS stall as a workaround (see above), which would send us into
445        * an infinite recursion of PIPE_CONTROLs.  "Stall at Pixel Scoreboard"
446        * appears to be safe, so we choose that.
447        */
448       const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
449                                PIPE_CONTROL_DEPTH_CACHE_FLUSH |
450                                PIPE_CONTROL_WRITE_IMMEDIATE |
451                                PIPE_CONTROL_WRITE_DEPTH_COUNT |
452                                PIPE_CONTROL_WRITE_TIMESTAMP |
453                                PIPE_CONTROL_STALL_AT_SCOREBOARD |
454                                PIPE_CONTROL_DEPTH_STALL |
455                                PIPE_CONTROL_DATA_CACHE_FLUSH;
456       if (!(flags & wa_bits))
457          flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
458    }
459 
460    /* Emit --------------------------------------------------------------- */
461 
462    brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
463    #if GEN_GEN >= 9
464       pc.FlushLLC = 0;
465    #endif
466    #if GEN_GEN >= 7
467       pc.LRIPostSyncOperation = NoLRIOperation;
468       pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
469       pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
470    #endif
471    #if GEN_GEN >= 6
472       pc.StoreDataIndex = 0;
473       pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
474       pc.GlobalSnapshotCountReset =
475          flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
476       pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
477       pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
478       pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
479       pc.RenderTargetCacheFlushEnable =
480          flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
481       pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
482       pc.StateCacheInvalidationEnable =
483          flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
484       pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
485       pc.ConstantCacheInvalidationEnable =
486          flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
487    #else
488       pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
489    #endif
490       pc.PostSyncOperation = flags_to_post_sync_op(flags);
491       pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
492       pc.InstructionCacheInvalidateEnable =
493          flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
494       pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
495    #if GEN_GEN >= 5 || GEN_IS_G4X
496       pc.IndirectStatePointersDisable =
497          flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
498    #endif
499    #if GEN_GEN >= 6
500       pc.TextureCacheInvalidationEnable =
501          flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
502    #elif GEN_GEN == 5 || GEN_IS_G4X
503       pc.TextureCacheFlushEnable =
504          flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
505    #endif
506       pc.Address = ggtt_bo(bo, offset);
507       if (GEN_GEN < 7 && bo)
508          pc.DestinationAddressType = DAT_GGTT;
509       pc.ImmediateData = imm;
510    }
511 }
512