1 /*
2  * Copyright © 2019 Google LLC
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 #ifndef TU_CS_H
24 #define TU_CS_H
25 
26 #include "tu_private.h"
27 
28 #include "adreno_pm4.xml.h"
29 
30 void
31 tu_cs_init(struct tu_cs *cs,
32            struct tu_device *device,
33            enum tu_cs_mode mode,
34            uint32_t initial_size);
35 
36 void
37 tu_cs_init_external(struct tu_cs *cs, uint32_t *start, uint32_t *end);
38 
39 void
40 tu_cs_finish(struct tu_cs *cs);
41 
42 void
43 tu_cs_begin(struct tu_cs *cs);
44 
45 void
46 tu_cs_end(struct tu_cs *cs);
47 
48 VkResult
49 tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs);
50 
51 VkResult
52 tu_cs_alloc(struct tu_cs *cs,
53             uint32_t count,
54             uint32_t size,
55             struct tu_cs_memory *memory);
56 
57 struct tu_cs_entry
58 tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs);
59 
60 static inline struct tu_draw_state
tu_cs_end_draw_state(struct tu_cs * cs,struct tu_cs * sub_cs)61 tu_cs_end_draw_state(struct tu_cs *cs, struct tu_cs *sub_cs)
62 {
63    struct tu_cs_entry entry = tu_cs_end_sub_stream(cs, sub_cs);
64    return (struct tu_draw_state) {
65       .iova = entry.bo->iova + entry.offset,
66       .size = entry.size / sizeof(uint32_t),
67    };
68 }
69 
70 VkResult
71 tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size);
72 
73 static inline struct tu_draw_state
tu_cs_draw_state(struct tu_cs * sub_cs,struct tu_cs * cs,uint32_t size)74 tu_cs_draw_state(struct tu_cs *sub_cs, struct tu_cs *cs, uint32_t size)
75 {
76    struct tu_cs_memory memory;
77 
78    /* TODO: clean this up */
79    tu_cs_alloc(sub_cs, size, 1, &memory);
80    tu_cs_init_external(cs, memory.map, memory.map + size);
81    tu_cs_begin(cs);
82    tu_cs_reserve_space(cs, size);
83 
84    return (struct tu_draw_state) {
85       .iova = memory.iova,
86       .size = size,
87    };
88 }
89 
90 void
91 tu_cs_reset(struct tu_cs *cs);
92 
93 VkResult
94 tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target);
95 
96 /**
97  * Get the size of the command packets emitted since the last call to
98  * tu_cs_add_entry.
99  */
100 static inline uint32_t
tu_cs_get_size(const struct tu_cs * cs)101 tu_cs_get_size(const struct tu_cs *cs)
102 {
103    return cs->cur - cs->start;
104 }
105 
106 /**
107  * Return true if there is no command packet emitted since the last call to
108  * tu_cs_add_entry.
109  */
110 static inline uint32_t
tu_cs_is_empty(const struct tu_cs * cs)111 tu_cs_is_empty(const struct tu_cs *cs)
112 {
113    return tu_cs_get_size(cs) == 0;
114 }
115 
116 /**
117  * Discard all entries.  This allows \a cs to be reused while keeping the
118  * existing BOs and command packets intact.
119  */
120 static inline void
tu_cs_discard_entries(struct tu_cs * cs)121 tu_cs_discard_entries(struct tu_cs *cs)
122 {
123    assert(cs->mode == TU_CS_MODE_GROW);
124    cs->entry_count = 0;
125 }
126 
127 /**
128  * Get the size needed for tu_cs_emit_call.
129  */
130 static inline uint32_t
tu_cs_get_call_size(const struct tu_cs * cs)131 tu_cs_get_call_size(const struct tu_cs *cs)
132 {
133    assert(cs->mode == TU_CS_MODE_GROW);
134    /* each CP_INDIRECT_BUFFER needs 4 dwords */
135    return cs->entry_count * 4;
136 }
137 
138 /**
139  * Assert that we did not exceed the reserved space.
140  */
141 static inline void
tu_cs_sanity_check(const struct tu_cs * cs)142 tu_cs_sanity_check(const struct tu_cs *cs)
143 {
144    assert(cs->start <= cs->cur);
145    assert(cs->cur <= cs->reserved_end);
146    assert(cs->reserved_end <= cs->end);
147 }
148 
149 /**
150  * Emit a uint32_t value into a command stream, without boundary checking.
151  */
152 static inline void
tu_cs_emit(struct tu_cs * cs,uint32_t value)153 tu_cs_emit(struct tu_cs *cs, uint32_t value)
154 {
155    assert(cs->cur < cs->reserved_end);
156    *cs->cur = value;
157    ++cs->cur;
158 }
159 
160 /**
161  * Emit an array of uint32_t into a command stream, without boundary checking.
162  */
163 static inline void
tu_cs_emit_array(struct tu_cs * cs,const uint32_t * values,uint32_t length)164 tu_cs_emit_array(struct tu_cs *cs, const uint32_t *values, uint32_t length)
165 {
166    assert(cs->cur + length <= cs->reserved_end);
167    memcpy(cs->cur, values, sizeof(uint32_t) * length);
168    cs->cur += length;
169 }
170 
171 static inline unsigned
tu_odd_parity_bit(unsigned val)172 tu_odd_parity_bit(unsigned val)
173 {
174    /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
175     * note that we want odd parity so 0x6996 is inverted.
176     */
177    val ^= val >> 16;
178    val ^= val >> 8;
179    val ^= val >> 4;
180    val &= 0xf;
181    return (~0x6996 >> val) & 1;
182 }
183 
184 /**
185  * Get the size of the remaining space in the current BO.
186  */
187 static inline uint32_t
tu_cs_get_space(const struct tu_cs * cs)188 tu_cs_get_space(const struct tu_cs *cs)
189 {
190    return cs->end - cs->cur;
191 }
192 
193 static inline void
tu_cs_reserve(struct tu_cs * cs,uint32_t reserved_size)194 tu_cs_reserve(struct tu_cs *cs, uint32_t reserved_size)
195 {
196    if (cs->mode != TU_CS_MODE_GROW) {
197       assert(tu_cs_get_space(cs) >= reserved_size);
198       assert(cs->reserved_end == cs->end);
199       return;
200    }
201 
202    if (tu_cs_get_space(cs) >= reserved_size &&
203        cs->entry_count < cs->entry_capacity) {
204       cs->reserved_end = cs->cur + reserved_size;
205       return;
206    }
207 
208    VkResult result = tu_cs_reserve_space(cs, reserved_size);
209    /* TODO: set this error in tu_cs and use it */
210    assert(result == VK_SUCCESS);
211 }
212 
213 /**
214  * Emit a type-4 command packet header into a command stream.
215  */
216 static inline void
tu_cs_emit_pkt4(struct tu_cs * cs,uint16_t regindx,uint16_t cnt)217 tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt)
218 {
219    tu_cs_reserve(cs, cnt + 1);
220    tu_cs_emit(cs, CP_TYPE4_PKT | cnt | (tu_odd_parity_bit(cnt) << 7) |
221                      ((regindx & 0x3ffff) << 8) |
222                      ((tu_odd_parity_bit(regindx) << 27)));
223 }
224 
225 /**
226  * Emit a type-7 command packet header into a command stream.
227  */
228 static inline void
tu_cs_emit_pkt7(struct tu_cs * cs,uint8_t opcode,uint16_t cnt)229 tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
230 {
231    tu_cs_reserve(cs, cnt + 1);
232    tu_cs_emit(cs, CP_TYPE7_PKT | cnt | (tu_odd_parity_bit(cnt) << 15) |
233                      ((opcode & 0x7f) << 16) |
234                      ((tu_odd_parity_bit(opcode) << 23)));
235 }
236 
237 static inline void
tu_cs_emit_wfi(struct tu_cs * cs)238 tu_cs_emit_wfi(struct tu_cs *cs)
239 {
240    tu_cs_emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
241 }
242 
243 static inline void
tu_cs_emit_qw(struct tu_cs * cs,uint64_t value)244 tu_cs_emit_qw(struct tu_cs *cs, uint64_t value)
245 {
246    tu_cs_emit(cs, (uint32_t) value);
247    tu_cs_emit(cs, (uint32_t) (value >> 32));
248 }
249 
250 static inline void
tu_cs_emit_write_reg(struct tu_cs * cs,uint16_t reg,uint32_t value)251 tu_cs_emit_write_reg(struct tu_cs *cs, uint16_t reg, uint32_t value)
252 {
253    tu_cs_emit_pkt4(cs, reg, 1);
254    tu_cs_emit(cs, value);
255 }
256 
257 /**
258  * Emit a CP_INDIRECT_BUFFER command packet.
259  */
260 static inline void
tu_cs_emit_ib(struct tu_cs * cs,const struct tu_cs_entry * entry)261 tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry)
262 {
263    assert(entry->bo);
264    assert(entry->size && entry->offset + entry->size <= entry->bo->size);
265    assert(entry->size % sizeof(uint32_t) == 0);
266    assert(entry->offset % sizeof(uint32_t) == 0);
267 
268    tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
269    tu_cs_emit_qw(cs, entry->bo->iova + entry->offset);
270    tu_cs_emit(cs, entry->size / sizeof(uint32_t));
271 }
272 
273 /* for compute which isn't using SET_DRAW_STATE */
274 static inline void
tu_cs_emit_state_ib(struct tu_cs * cs,struct tu_draw_state state)275 tu_cs_emit_state_ib(struct tu_cs *cs, struct tu_draw_state state)
276 {
277    if (state.size) {
278       tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
279       tu_cs_emit_qw(cs, state.iova);
280       tu_cs_emit(cs, state.size);
281    }
282 }
283 
284 /**
285  * Emit a CP_INDIRECT_BUFFER command packet for each entry in the target
286  * command stream.
287  */
288 static inline void
tu_cs_emit_call(struct tu_cs * cs,const struct tu_cs * target)289 tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target)
290 {
291    assert(target->mode == TU_CS_MODE_GROW);
292    for (uint32_t i = 0; i < target->entry_count; i++)
293       tu_cs_emit_ib(cs, target->entries + i);
294 }
295 
296 /* Helpers for bracketing a large sequence of commands of unknown size inside
297  * a CP_COND_REG_EXEC packet.
298  */
299 static inline void
tu_cond_exec_start(struct tu_cs * cs,uint32_t cond_flags)300 tu_cond_exec_start(struct tu_cs *cs, uint32_t cond_flags)
301 {
302    assert(cs->mode == TU_CS_MODE_GROW);
303    assert(!cs->cond_flags && cond_flags);
304 
305    tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
306    tu_cs_emit(cs, cond_flags);
307 
308    cs->cond_flags = cond_flags;
309    cs->cond_dwords = cs->cur;
310 
311    /* Emit dummy DWORD field here */
312    tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(0));
313 }
314 #define CP_COND_EXEC_0_RENDER_MODE_GMEM \
315    (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_GMEM)
316 #define CP_COND_EXEC_0_RENDER_MODE_SYSMEM \
317    (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_SYSMEM)
318 
319 static inline void
tu_cond_exec_end(struct tu_cs * cs)320 tu_cond_exec_end(struct tu_cs *cs)
321 {
322    assert(cs->cond_flags);
323 
324    cs->cond_flags = 0;
325    /* Subtract one here to account for the DWORD field itself. */
326    *cs->cond_dwords = cs->cur - cs->cond_dwords - 1;
327 }
328 
329 #define fd_reg_pair tu_reg_value
330 #define __bo_type struct tu_bo *
331 
332 #include "a6xx.xml.h"
333 #include "a6xx-pack.xml.h"
334 
335 #define __assert_eq(a, b)                                               \
336    do {                                                                 \
337       if ((a) != (b)) {                                                 \
338          fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \
339          assert((a) == (b));                                            \
340       }                                                                 \
341    } while (0)
342 
343 #define __ONE_REG(i, regs)                                      \
344    do {                                                         \
345       if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) {            \
346          __assert_eq(regs[0].reg + i, regs[i].reg);             \
347          if (regs[i].bo) {                                      \
348             uint64_t v = regs[i].bo->iova + regs[i].bo_offset;  \
349             v >>= regs[i].bo_shift;                             \
350             v |= regs[i].value;                                 \
351                                                                 \
352             *p++ = v;                                           \
353             *p++ = v >> 32;                                     \
354          } else {                                               \
355             *p++ = regs[i].value;                               \
356             if (regs[i].is_address)                             \
357                *p++ = regs[i].value >> 32;                      \
358          }                                                      \
359       }                                                         \
360    } while (0)
361 
362 /* Emits a sequence of register writes in order using a pkt4.  This will check
363  * (at runtime on a !NDEBUG build) that the registers were actually set up in
364  * order in the code.
365  *
366  * Note that references to buffers aren't automatically added to the CS,
367  * unlike in freedreno.  We are clever in various places to avoid duplicating
368  * the reference add work.
369  *
370  * Also, 64-bit address registers don't have a way (currently) to set a 64-bit
371  * address without having a reference to a BO, since the .dword field in the
372  * register's struct is only 32-bit wide.  We should fix this in the pack
373  * codegen later.
374  */
375 #define tu_cs_emit_regs(cs, ...) do {                   \
376    const struct fd_reg_pair regs[] = { __VA_ARGS__ };   \
377    unsigned count = ARRAY_SIZE(regs);                   \
378                                                         \
379    STATIC_ASSERT(count > 0);                            \
380    STATIC_ASSERT(count <= 16);                          \
381                                                         \
382    tu_cs_emit_pkt4((cs), regs[0].reg, count);             \
383    uint32_t *p = (cs)->cur;                               \
384    __ONE_REG( 0, regs);                                 \
385    __ONE_REG( 1, regs);                                 \
386    __ONE_REG( 2, regs);                                 \
387    __ONE_REG( 3, regs);                                 \
388    __ONE_REG( 4, regs);                                 \
389    __ONE_REG( 5, regs);                                 \
390    __ONE_REG( 6, regs);                                 \
391    __ONE_REG( 7, regs);                                 \
392    __ONE_REG( 8, regs);                                 \
393    __ONE_REG( 9, regs);                                 \
394    __ONE_REG(10, regs);                                 \
395    __ONE_REG(11, regs);                                 \
396    __ONE_REG(12, regs);                                 \
397    __ONE_REG(13, regs);                                 \
398    __ONE_REG(14, regs);                                 \
399    __ONE_REG(15, regs);                                 \
400    (cs)->cur = p;                                         \
401    } while (0)
402 
403 #endif /* TU_CS_H */
404