1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef V3DV_CL_H
25 #define V3DV_CL_H
26 
27 #include "broadcom/cle/v3d_packet_helpers.h"
28 
29 #include "list.h"
30 
31 struct v3dv_bo;
32 struct v3dv_job;
33 struct v3dv_cl;
34 
35 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
36 
37 /**
38  * Undefined structure, used for typechecking that you're passing the pointers
39  * to these functions correctly.
40  */
41 struct v3dv_cl_out;
42 
43 /** A reference to a BO used in the CL packing functions */
44 struct v3dv_cl_reloc {
45    struct v3dv_bo *bo;
46    uint32_t offset;
47 };
48 
49 struct v3dv_cl {
50    void *base;
51    struct v3dv_job *job;
52    struct v3dv_cl_out *next;
53    struct v3dv_bo *bo;
54    uint32_t size;
55    struct list_head bo_list;
56 };
57 
58 static inline struct v3dv_cl_reloc
__unpack_address(const uint8_t * cl,uint32_t s,uint32_t e)59 __unpack_address(const uint8_t *cl, uint32_t s, uint32_t e)
60 {
61     struct v3dv_cl_reloc reloc =
62             { NULL, __gen_unpack_uint(cl, s, e) << (31 - (e - s)) };
63     return reloc;
64 }
65 
66 static inline uint32_t
v3dv_cl_offset(struct v3dv_cl * cl)67 v3dv_cl_offset(struct v3dv_cl *cl)
68 {
69    return (char *)cl->next - (char *)cl->base;
70 }
71 
72 static inline struct v3dv_cl_reloc
v3dv_cl_address(struct v3dv_bo * bo,uint32_t offset)73 v3dv_cl_address(struct v3dv_bo *bo, uint32_t offset)
74 {
75    struct v3dv_cl_reloc reloc = {
76       .bo = bo,
77       .offset = offset,
78    };
79    return reloc;
80 }
81 
82 static inline struct v3dv_cl_reloc
v3dv_cl_get_address(struct v3dv_cl * cl)83 v3dv_cl_get_address(struct v3dv_cl *cl)
84 {
85    return (struct v3dv_cl_reloc){ .bo = cl->bo, .offset = v3dv_cl_offset(cl) };
86 }
87 
88 void v3dv_cl_init(struct v3dv_job *job, struct v3dv_cl *cl);
89 void v3dv_cl_destroy(struct v3dv_cl *cl);
90 
91 static inline struct v3dv_cl_out *
cl_start(struct v3dv_cl * cl)92 cl_start(struct v3dv_cl *cl)
93 {
94    return cl->next;
95 }
96 
97 static inline void
cl_end(struct v3dv_cl * cl,struct v3dv_cl_out * next)98 cl_end(struct v3dv_cl *cl, struct v3dv_cl_out *next)
99 {
100    cl->next = next;
101    assert(v3dv_cl_offset(cl) <= cl->size);
102 }
103 
104 static inline void
cl_advance(struct v3dv_cl_out ** cl,uint32_t n)105 cl_advance(struct v3dv_cl_out **cl, uint32_t n)
106 {
107    (*cl) = (struct v3dv_cl_out *)((char *)(*cl) + n);
108 }
109 
110 static inline void
cl_aligned_u32(struct v3dv_cl_out ** cl,uint32_t n)111 cl_aligned_u32(struct v3dv_cl_out **cl, uint32_t n)
112 {
113    *(uint32_t *)(*cl) = n;
114    cl_advance(cl, 4);
115 }
116 
117 static inline void
cl_aligned_f(struct v3dv_cl_out ** cl,float f)118 cl_aligned_f(struct v3dv_cl_out **cl, float f)
119 {
120    cl_aligned_u32(cl, fui(f));
121 }
122 
123 static inline void
cl_aligned_reloc(struct v3dv_cl * cl,struct v3dv_cl_out ** cl_out,struct v3dv_bo * bo,uint32_t offset)124 cl_aligned_reloc(struct v3dv_cl *cl,
125                  struct v3dv_cl_out **cl_out,
126                  struct v3dv_bo *bo,
127                  uint32_t offset)
128 {
129    cl_aligned_u32(cl_out, bo->offset + offset);
130    v3dv_job_add_bo(cl->job, bo);
131 }
132 
133 uint32_t v3dv_cl_ensure_space(struct v3dv_cl *cl, uint32_t space, uint32_t alignment);
134 void v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space);
135 
136 /* We redefine ALIGN as a macro as we want to use cl_aligned_packet_length for
137  * struct fields
138  */
139 #define ALIGN(value, alignment)                           \
140         (((value) + (alignment) - 1) & ~((alignment) - 1))
141 
142 #define cl_packet_header(packet) V3DX(packet ## _header)
143 #define cl_packet_length(packet) V3DX(packet ## _length)
144 #define cl_aligned_packet_length(packet, alignment) ALIGN(cl_packet_length(packet), alignment)
145 #define cl_packet_pack(packet)   V3DX(packet ## _pack)
146 #define cl_packet_struct(packet) V3DX(packet)
147 
148 /* Macro for setting up an emit of a CL struct.  A temporary unpacked struct
149  * is created, which you get to set fields in of the form:
150  *
151  * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
152  *     .flags.flat_shade_flags = 1 << 2,
153  * }
154  *
155  * or default values only can be emitted with just:
156  *
157  * cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
158  *
159  * The trick here is that we make a for loop that will execute the body
160  * (either the block or the ';' after the macro invocation) exactly once.
161  */
162 #define cl_emit(cl, packet, name)                                \
163         for (struct cl_packet_struct(packet) name = {            \
164                 cl_packet_header(packet)                         \
165         },                                                       \
166         *_loop_terminate = &name;                                \
167         __builtin_expect(_loop_terminate != NULL, 1);            \
168         ({                                                       \
169                 struct v3dv_cl_out *cl_out = cl_start(cl);        \
170                 cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
171                 cl_advance(&cl_out, cl_packet_length(packet));   \
172                 cl_end(cl, cl_out);                              \
173                 _loop_terminate = NULL;                          \
174         }))                                                      \
175 
176 #define cl_emit_with_prepacked(cl, packet, prepacked, name)      \
177         for (struct cl_packet_struct(packet) name = {            \
178                 cl_packet_header(packet)                         \
179         },                                                       \
180         *_loop_terminate = &name;                                \
181         __builtin_expect(_loop_terminate != NULL, 1);            \
182         ({                                                       \
183                 struct v3dv_cl_out *cl_out = cl_start(cl);        \
184                 uint8_t packed[cl_packet_length(packet)];         \
185                 cl_packet_pack(packet)(cl, packed, &name);       \
186                 for (int _i = 0; _i < cl_packet_length(packet); _i++) \
187                         ((uint8_t *)cl_out)[_i] = packed[_i] | (prepacked)[_i]; \
188                 cl_advance(&cl_out, cl_packet_length(packet));   \
189                 cl_end(cl, cl_out);                              \
190                 _loop_terminate = NULL;                          \
191         }))                                                      \
192 
193 /**
194  * Helper function called by the XML-generated pack functions for filling in
195  * an address field in shader records.
196  *
197  * Since we have a private address space as of VC5, our BOs can have lifelong
198  * offsets, and all the kernel needs to know is which BOs need to be paged in
199  * for this exec.
200  */
201 static inline void
cl_pack_emit_reloc(struct v3dv_cl * cl,const struct v3dv_cl_reloc * reloc)202 cl_pack_emit_reloc(struct v3dv_cl *cl, const struct v3dv_cl_reloc *reloc)
203 {
204         if (reloc->bo)
205                 v3dv_job_add_bo(cl->job, reloc->bo);
206 }
207 
208 #define cl_emit_prepacked_sized(cl, packet, size) do {                \
209         memcpy((cl)->next, packet, size);             \
210         cl_advance(&(cl)->next, size);                \
211 } while (0)
212 
213 #define cl_emit_prepacked(cl, packet) \
214         cl_emit_prepacked_sized(cl, packet, sizeof(*(packet)))
215 
216 #define v3dv_pack(packed, packet, name)                          \
217         for (struct cl_packet_struct(packet) name = {            \
218                 cl_packet_header(packet)                         \
219         },                                                       \
220         *_loop_terminate = &name;                                \
221         __builtin_expect(_loop_terminate != NULL, 1);            \
222         ({                                                       \
223                 cl_packet_pack(packet)(NULL, (uint8_t *)packed, &name); \
224                 VG(VALGRIND_CHECK_MEM_IS_DEFINED((uint8_t *)packed, \
225                                                  cl_packet_length(packet))); \
226                 _loop_terminate = NULL;                          \
227         }))                                                      \
228 
229 #endif /* V3DV_CL_H */
230