1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef VC5_CL_H
25 #define VC5_CL_H
26 
27 #include <stdint.h>
28 
29 #include "util/u_math.h"
30 #include "util/macros.h"
31 
32 struct vc5_bo;
33 struct vc5_job;
34 struct vc5_cl;
35 
36 /**
37  * Undefined structure, used for typechecking that you're passing the pointers
38  * to these functions correctly.
39  */
40 struct vc5_cl_out;
41 
42 /** A reference to a BO used in the CL packing functions */
43 struct vc5_cl_reloc {
44         struct vc5_bo *bo;
45         uint32_t offset;
46 };
47 
48 static inline void cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *);
49 
50 #define __gen_user_data struct vc5_cl
51 #define __gen_address_type struct vc5_cl_reloc
52 #define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \
53                                      (reloc)->offset)
54 #define __gen_emit_reloc cl_pack_emit_reloc
55 
56 struct vc5_cl {
57         void *base;
58         struct vc5_job *job;
59         struct vc5_cl_out *next;
60         struct vc5_bo *bo;
61         uint32_t size;
62 };
63 
64 void vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl);
65 void vc5_destroy_cl(struct vc5_cl *cl);
66 void vc5_dump_cl(void *cl, uint32_t size, bool is_render);
67 uint32_t vc5_gem_hindex(struct vc5_job *job, struct vc5_bo *bo);
68 
69 struct PACKED unaligned_16 { uint16_t x; };
70 struct PACKED unaligned_32 { uint32_t x; };
71 
cl_offset(struct vc5_cl * cl)72 static inline uint32_t cl_offset(struct vc5_cl *cl)
73 {
74         return (char *)cl->next - (char *)cl->base;
75 }
76 
cl_get_address(struct vc5_cl * cl)77 static inline struct vc5_cl_reloc cl_get_address(struct vc5_cl *cl)
78 {
79         return (struct vc5_cl_reloc){ .bo = cl->bo, .offset = cl_offset(cl) };
80 }
81 
82 static inline void
cl_advance(struct vc5_cl_out ** cl,uint32_t n)83 cl_advance(struct vc5_cl_out **cl, uint32_t n)
84 {
85         (*cl) = (struct vc5_cl_out *)((char *)(*cl) + n);
86 }
87 
88 static inline struct vc5_cl_out *
cl_start(struct vc5_cl * cl)89 cl_start(struct vc5_cl *cl)
90 {
91         return cl->next;
92 }
93 
94 static inline void
cl_end(struct vc5_cl * cl,struct vc5_cl_out * next)95 cl_end(struct vc5_cl *cl, struct vc5_cl_out *next)
96 {
97         cl->next = next;
98         assert(cl_offset(cl) <= cl->size);
99 }
100 
101 
102 static inline void
put_unaligned_32(struct vc5_cl_out * ptr,uint32_t val)103 put_unaligned_32(struct vc5_cl_out *ptr, uint32_t val)
104 {
105         struct unaligned_32 *p = (void *)ptr;
106         p->x = val;
107 }
108 
109 static inline void
put_unaligned_16(struct vc5_cl_out * ptr,uint16_t val)110 put_unaligned_16(struct vc5_cl_out *ptr, uint16_t val)
111 {
112         struct unaligned_16 *p = (void *)ptr;
113         p->x = val;
114 }
115 
116 static inline void
cl_u8(struct vc5_cl_out ** cl,uint8_t n)117 cl_u8(struct vc5_cl_out **cl, uint8_t n)
118 {
119         *(uint8_t *)(*cl) = n;
120         cl_advance(cl, 1);
121 }
122 
123 static inline void
cl_u16(struct vc5_cl_out ** cl,uint16_t n)124 cl_u16(struct vc5_cl_out **cl, uint16_t n)
125 {
126         put_unaligned_16(*cl, n);
127         cl_advance(cl, 2);
128 }
129 
130 static inline void
cl_u32(struct vc5_cl_out ** cl,uint32_t n)131 cl_u32(struct vc5_cl_out **cl, uint32_t n)
132 {
133         put_unaligned_32(*cl, n);
134         cl_advance(cl, 4);
135 }
136 
137 static inline void
cl_aligned_u32(struct vc5_cl_out ** cl,uint32_t n)138 cl_aligned_u32(struct vc5_cl_out **cl, uint32_t n)
139 {
140         *(uint32_t *)(*cl) = n;
141         cl_advance(cl, 4);
142 }
143 
144 static inline void
cl_aligned_reloc(struct vc5_cl * cl,struct vc5_cl_out ** cl_out,struct vc5_bo * bo,uint32_t offset)145 cl_aligned_reloc(struct vc5_cl *cl,
146                  struct vc5_cl_out **cl_out,
147                  struct vc5_bo *bo, uint32_t offset)
148 {
149         cl_aligned_u32(cl_out, bo->offset + offset);
150         vc5_job_add_bo(cl->job, bo);
151 }
152 
153 static inline void
cl_ptr(struct vc5_cl_out ** cl,void * ptr)154 cl_ptr(struct vc5_cl_out **cl, void *ptr)
155 {
156         *(struct vc5_cl_out **)(*cl) = ptr;
157         cl_advance(cl, sizeof(void *));
158 }
159 
160 static inline void
cl_f(struct vc5_cl_out ** cl,float f)161 cl_f(struct vc5_cl_out **cl, float f)
162 {
163         cl_u32(cl, fui(f));
164 }
165 
166 static inline void
cl_aligned_f(struct vc5_cl_out ** cl,float f)167 cl_aligned_f(struct vc5_cl_out **cl, float f)
168 {
169         cl_aligned_u32(cl, fui(f));
170 }
171 
172 /**
173  * Reference to a BO with its associated offset, used in the pack process.
174  */
175 static inline struct vc5_cl_reloc
cl_address(struct vc5_bo * bo,uint32_t offset)176 cl_address(struct vc5_bo *bo, uint32_t offset)
177 {
178         struct vc5_cl_reloc reloc = {
179                 .bo = bo,
180                 .offset = offset,
181         };
182         return reloc;
183 }
184 
185 uint32_t vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t size, uint32_t align);
186 void vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t size);
187 
188 #define cl_packet_header(packet) V3DX(packet ## _header)
189 #define cl_packet_length(packet) V3DX(packet ## _length)
190 #define cl_packet_pack(packet)   V3DX(packet ## _pack)
191 #define cl_packet_struct(packet) V3DX(packet)
192 
193 static inline void *
cl_get_emit_space(struct vc5_cl_out ** cl,size_t size)194 cl_get_emit_space(struct vc5_cl_out **cl, size_t size)
195 {
196         void *addr = *cl;
197         cl_advance(cl, size);
198         return addr;
199 }
200 
201 /* Macro for setting up an emit of a CL struct.  A temporary unpacked struct
202  * is created, which you get to set fields in of the form:
203  *
204  * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
205  *     .flags.flat_shade_flags = 1 << 2,
206  * }
207  *
208  * or default values only can be emitted with just:
209  *
210  * cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
211  *
212  * The trick here is that we make a for loop that will execute the body
213  * (either the block or the ';' after the macro invocation) exactly once.
214  */
215 #define cl_emit(cl, packet, name)                                \
216         for (struct cl_packet_struct(packet) name = {            \
217                 cl_packet_header(packet)                         \
218         },                                                       \
219         *_loop_terminate = &name;                                \
220         __builtin_expect(_loop_terminate != NULL, 1);            \
221         ({                                                       \
222                 struct vc5_cl_out *cl_out = cl_start(cl);        \
223                 cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
224                 cl_advance(&cl_out, cl_packet_length(packet));   \
225                 cl_end(cl, cl_out);                              \
226                 _loop_terminate = NULL;                          \
227         }))                                                      \
228 
229 #define cl_emit_with_prepacked(cl, packet, prepacked, name)      \
230         for (struct cl_packet_struct(packet) name = {            \
231                 cl_packet_header(packet)                         \
232         },                                                       \
233         *_loop_terminate = &name;                                \
234         __builtin_expect(_loop_terminate != NULL, 1);            \
235         ({                                                       \
236                 struct vc5_cl_out *cl_out = cl_start(cl);        \
237                 uint8_t packed[cl_packet_length(packet)];         \
238                 cl_packet_pack(packet)(cl, packed, &name);       \
239                 for (int _i = 0; _i < cl_packet_length(packet); _i++) \
240                         ((uint8_t *)cl_out)[_i] = packed[_i] | (prepacked)[_i]; \
241                 cl_advance(&cl_out, cl_packet_length(packet));   \
242                 cl_end(cl, cl_out);                              \
243                 _loop_terminate = NULL;                          \
244         }))                                                      \
245 
246 #define cl_emit_prepacked(cl, packet) do {                       \
247         memcpy((cl)->next, packet, sizeof(*packet));             \
248         cl_advance(&(cl)->next, sizeof(*packet));                \
249 } while (0)
250 
251 #define v3dx_pack(packed, packet, name)                          \
252         for (struct cl_packet_struct(packet) name = {            \
253                 cl_packet_header(packet)                         \
254         },                                                       \
255         *_loop_terminate = &name;                                \
256         __builtin_expect(_loop_terminate != NULL, 1);            \
257         ({                                                       \
258                 cl_packet_pack(packet)(NULL, (uint8_t *)packed, &name); \
259                 VG(VALGRIND_CHECK_MEM_IS_DEFINED((uint8_t *)packed, \
260                                                  cl_packet_length(packet))); \
261                 _loop_terminate = NULL;                          \
262         }))                                                      \
263 
264 /**
265  * Helper function called by the XML-generated pack functions for filling in
266  * an address field in shader records.
267  *
268  * Since we have a private address space as of VC5, our BOs can have lifelong
269  * offsets, and all the kernel needs to know is which BOs need to be paged in
270  * for this exec.
271  */
272 static inline void
cl_pack_emit_reloc(struct vc5_cl * cl,const struct vc5_cl_reloc * reloc)273 cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *reloc)
274 {
275         if (reloc->bo)
276                 vc5_job_add_bo(cl->job, reloc->bo);
277 }
278 
279 #endif /* VC5_CL_H */
280