1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file vc5_job.c
25  *
26  * Functions for submitting VC5 render jobs to the kernel.
27  */
28 
29 #include <xf86drm.h>
30 #include "vc5_context.h"
31 /* The OQ/semaphore packets are the same across V3D versions. */
32 #define V3D_VERSION 33
33 #include "broadcom/cle/v3dx_pack.h"
34 #include "broadcom/common/v3d_macros.h"
35 #include "util/hash_table.h"
36 #include "util/ralloc.h"
37 #include "util/set.h"
38 #include "broadcom/clif/clif_dump.h"
39 
40 static void
remove_from_ht(struct hash_table * ht,void * key)41 remove_from_ht(struct hash_table *ht, void *key)
42 {
43         struct hash_entry *entry = _mesa_hash_table_search(ht, key);
44         _mesa_hash_table_remove(ht, entry);
45 }
46 
47 static void
vc5_job_free(struct vc5_context * vc5,struct vc5_job * job)48 vc5_job_free(struct vc5_context *vc5, struct vc5_job *job)
49 {
50         struct set_entry *entry;
51 
52         set_foreach(job->bos, entry) {
53                 struct vc5_bo *bo = (struct vc5_bo *)entry->key;
54                 vc5_bo_unreference(&bo);
55         }
56 
57         remove_from_ht(vc5->jobs, &job->key);
58 
59         if (job->write_prscs) {
60                 struct set_entry *entry;
61 
62                 set_foreach(job->write_prscs, entry) {
63                         const struct pipe_resource *prsc = entry->key;
64 
65                         remove_from_ht(vc5->write_jobs, (void *)prsc);
66                 }
67         }
68 
69         for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
70                 if (job->cbufs[i]) {
71                         remove_from_ht(vc5->write_jobs, job->cbufs[i]->texture);
72                         pipe_surface_reference(&job->cbufs[i], NULL);
73                 }
74         }
75         if (job->zsbuf) {
76                 remove_from_ht(vc5->write_jobs, job->zsbuf->texture);
77                 pipe_surface_reference(&job->zsbuf, NULL);
78         }
79 
80         if (vc5->job == job)
81                 vc5->job = NULL;
82 
83         vc5_destroy_cl(&job->bcl);
84         vc5_destroy_cl(&job->rcl);
85         vc5_destroy_cl(&job->indirect);
86         vc5_bo_unreference(&job->tile_alloc);
87         vc5_bo_unreference(&job->tile_state);
88 
89         ralloc_free(job);
90 }
91 
92 static struct vc5_job *
vc5_job_create(struct vc5_context * vc5)93 vc5_job_create(struct vc5_context *vc5)
94 {
95         struct vc5_job *job = rzalloc(vc5, struct vc5_job);
96 
97         job->vc5 = vc5;
98 
99         vc5_init_cl(job, &job->bcl);
100         vc5_init_cl(job, &job->rcl);
101         vc5_init_cl(job, &job->indirect);
102 
103         job->draw_min_x = ~0;
104         job->draw_min_y = ~0;
105         job->draw_max_x = 0;
106         job->draw_max_y = 0;
107 
108         job->bos = _mesa_set_create(job,
109                                     _mesa_hash_pointer,
110                                     _mesa_key_pointer_equal);
111         return job;
112 }
113 
114 void
vc5_job_add_bo(struct vc5_job * job,struct vc5_bo * bo)115 vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo)
116 {
117         if (!bo)
118                 return;
119 
120         if (_mesa_set_search(job->bos, bo))
121                 return;
122 
123         vc5_bo_reference(bo);
124         _mesa_set_add(job->bos, bo);
125         job->referenced_size += bo->size;
126 
127         uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;
128 
129         if (job->submit.bo_handle_count >= job->bo_handles_size) {
130                 job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
131                 bo_handles = reralloc(job, bo_handles,
132                                       uint32_t, job->bo_handles_size);
133                 job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
134         }
135         bo_handles[job->submit.bo_handle_count++] = bo->handle;
136 }
137 
138 void
vc5_job_add_write_resource(struct vc5_job * job,struct pipe_resource * prsc)139 vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc)
140 {
141         struct vc5_context *vc5 = job->vc5;
142 
143         if (!job->write_prscs) {
144                 job->write_prscs = _mesa_set_create(job,
145                                                     _mesa_hash_pointer,
146                                                     _mesa_key_pointer_equal);
147         }
148 
149         _mesa_set_add(job->write_prscs, prsc);
150         _mesa_hash_table_insert(vc5->write_jobs, prsc, job);
151 }
152 
153 void
vc5_flush_jobs_writing_resource(struct vc5_context * vc5,struct pipe_resource * prsc)154 vc5_flush_jobs_writing_resource(struct vc5_context *vc5,
155                                 struct pipe_resource *prsc)
156 {
157         struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs,
158                                                            prsc);
159         if (entry) {
160                 struct vc5_job *job = entry->data;
161                 vc5_job_submit(vc5, job);
162         }
163 }
164 
165 void
vc5_flush_jobs_reading_resource(struct vc5_context * vc5,struct pipe_resource * prsc)166 vc5_flush_jobs_reading_resource(struct vc5_context *vc5,
167                                 struct pipe_resource *prsc)
168 {
169         struct vc5_resource *rsc = vc5_resource(prsc);
170 
171         vc5_flush_jobs_writing_resource(vc5, prsc);
172 
173         struct hash_entry *entry;
174         hash_table_foreach(vc5->jobs, entry) {
175                 struct vc5_job *job = entry->data;
176 
177                 if (_mesa_set_search(job->bos, rsc->bo)) {
178                         vc5_job_submit(vc5, job);
179                         /* Reminder: vc5->jobs is safe to keep iterating even
180                          * after deletion of an entry.
181                          */
182                         continue;
183                 }
184         }
185 }
186 
187 static void
vc5_job_set_tile_buffer_size(struct vc5_job * job)188 vc5_job_set_tile_buffer_size(struct vc5_job *job)
189 {
190         static const uint8_t tile_sizes[] = {
191                 64, 64,
192                 64, 32,
193                 32, 32,
194                 32, 16,
195                 16, 16,
196         };
197         int tile_size_index = 0;
198         if (job->msaa)
199                 tile_size_index += 2;
200 
201         if (job->cbufs[3] || job->cbufs[2])
202                 tile_size_index += 2;
203         else if (job->cbufs[1])
204                 tile_size_index++;
205 
206         int max_bpp = RENDER_TARGET_MAXIMUM_32BPP;
207         for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
208                 if (job->cbufs[i]) {
209                         struct vc5_surface *surf = vc5_surface(job->cbufs[i]);
210                         max_bpp = MAX2(max_bpp, surf->internal_bpp);
211                 }
212         }
213         job->internal_bpp = max_bpp;
214         STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0);
215         tile_size_index += max_bpp;
216 
217         assert(tile_size_index < ARRAY_SIZE(tile_sizes));
218         job->tile_width = tile_sizes[tile_size_index * 2 + 0];
219         job->tile_height = tile_sizes[tile_size_index * 2 + 1];
220 }
221 
222 /**
223  * Returns a vc5_job struture for tracking V3D rendering to a particular FBO.
224  *
225  * If we've already started rendering to this FBO, then return old same job,
226  * otherwise make a new one.  If we're beginning rendering to an FBO, make
227  * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
228  * have been flushed.
229  */
230 struct vc5_job *
vc5_get_job(struct vc5_context * vc5,struct pipe_surface ** cbufs,struct pipe_surface * zsbuf)231 vc5_get_job(struct vc5_context *vc5,
232             struct pipe_surface **cbufs, struct pipe_surface *zsbuf)
233 {
234         /* Return the existing job for this FBO if we have one */
235         struct vc5_job_key local_key = {
236                 .cbufs = {
237                         cbufs[0],
238                         cbufs[1],
239                         cbufs[2],
240                         cbufs[3],
241                 },
242                 .zsbuf = zsbuf,
243         };
244         struct hash_entry *entry = _mesa_hash_table_search(vc5->jobs,
245                                                            &local_key);
246         if (entry)
247                 return entry->data;
248 
249         /* Creating a new job.  Make sure that any previous jobs reading or
250          * writing these buffers are flushed.
251          */
252         struct vc5_job *job = vc5_job_create(vc5);
253 
254         for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
255                 if (cbufs[i]) {
256                         vc5_flush_jobs_reading_resource(vc5, cbufs[i]->texture);
257                         pipe_surface_reference(&job->cbufs[i], cbufs[i]);
258 
259                         if (cbufs[i]->texture->nr_samples > 1)
260                                 job->msaa = true;
261                 }
262         }
263         if (zsbuf) {
264                 vc5_flush_jobs_reading_resource(vc5, zsbuf->texture);
265                 pipe_surface_reference(&job->zsbuf, zsbuf);
266                 if (zsbuf->texture->nr_samples > 1)
267                         job->msaa = true;
268         }
269 
270         vc5_job_set_tile_buffer_size(job);
271 
272         for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
273                 if (cbufs[i])
274                         _mesa_hash_table_insert(vc5->write_jobs,
275                                                 cbufs[i]->texture, job);
276         }
277         if (zsbuf)
278                 _mesa_hash_table_insert(vc5->write_jobs, zsbuf->texture, job);
279 
280         memcpy(&job->key, &local_key, sizeof(local_key));
281         _mesa_hash_table_insert(vc5->jobs, &job->key, job);
282 
283         return job;
284 }
285 
286 struct vc5_job *
vc5_get_job_for_fbo(struct vc5_context * vc5)287 vc5_get_job_for_fbo(struct vc5_context *vc5)
288 {
289         if (vc5->job)
290                 return vc5->job;
291 
292         struct pipe_surface **cbufs = vc5->framebuffer.cbufs;
293         struct pipe_surface *zsbuf = vc5->framebuffer.zsbuf;
294         struct vc5_job *job = vc5_get_job(vc5, cbufs, zsbuf);
295 
296         /* The dirty flags are tracking what's been updated while vc5->job has
297          * been bound, so set them all to ~0 when switching between jobs.  We
298          * also need to reset all state at the start of rendering.
299          */
300         vc5->dirty = ~0;
301 
302         /* If we're binding to uninitialized buffers, no need to load their
303          * contents before drawing.
304          */
305         for (int i = 0; i < 4; i++) {
306                 if (cbufs[i]) {
307                         struct vc5_resource *rsc = vc5_resource(cbufs[i]->texture);
308                         if (!rsc->writes)
309                                 job->cleared |= PIPE_CLEAR_COLOR0 << i;
310                 }
311         }
312 
313         if (zsbuf) {
314                 struct vc5_resource *rsc = vc5_resource(zsbuf->texture);
315                 if (!rsc->writes)
316                         job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
317         }
318 
319         job->draw_tiles_x = DIV_ROUND_UP(vc5->framebuffer.width,
320                                          job->tile_width);
321         job->draw_tiles_y = DIV_ROUND_UP(vc5->framebuffer.height,
322                                          job->tile_height);
323 
324         vc5->job = job;
325 
326         return job;
327 }
328 
329 static bool
vc5_clif_dump_lookup(void * data,uint32_t addr,void ** vaddr)330 vc5_clif_dump_lookup(void *data, uint32_t addr, void **vaddr)
331 {
332         struct vc5_job *job = data;
333         struct set_entry *entry;
334 
335         set_foreach(job->bos, entry) {
336                 struct vc5_bo *bo = (void *)entry->key;
337 
338                 if (addr >= bo->offset &&
339                     addr < bo->offset + bo->size) {
340                         vc5_bo_map(bo);
341                         *vaddr = bo->map + addr - bo->offset;
342                         return true;
343                 }
344         }
345 
346         return false;
347 }
348 
349 static void
vc5_clif_dump(struct vc5_context * vc5,struct vc5_job * job)350 vc5_clif_dump(struct vc5_context *vc5, struct vc5_job *job)
351 {
352         if (!(V3D_DEBUG & V3D_DEBUG_CL))
353                 return;
354 
355         struct clif_dump *clif = clif_dump_init(&vc5->screen->devinfo,
356                                                 stderr, vc5_clif_dump_lookup,
357                                                 job);
358 
359         fprintf(stderr, "BCL: 0x%08x..0x%08x\n",
360                 job->submit.bcl_start, job->submit.bcl_end);
361 
362         clif_dump_add_cl(clif, job->submit.bcl_start, job->submit.bcl_end);
363 
364         fprintf(stderr, "RCL: 0x%08x..0x%08x\n",
365                 job->submit.rcl_start, job->submit.rcl_end);
366         clif_dump_add_cl(clif, job->submit.rcl_start, job->submit.rcl_end);
367 }
368 
369 /**
370  * Submits the job to the kernel and then reinitializes it.
371  */
372 void
vc5_job_submit(struct vc5_context * vc5,struct vc5_job * job)373 vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job)
374 {
375         MAYBE_UNUSED struct vc5_screen *screen = vc5->screen;
376 
377         if (!job->needs_flush)
378                 goto done;
379 
380         /* The RCL setup would choke if the draw bounds cause no drawing, so
381          * just drop the drawing if that's the case.
382          */
383         if (job->draw_max_x <= job->draw_min_x ||
384             job->draw_max_y <= job->draw_min_y) {
385                 goto done;
386         }
387 
388         if (vc5->screen->devinfo.ver >= 41)
389                 v3d41_emit_rcl(job);
390         else
391                 v3d33_emit_rcl(job);
392 
393         if (cl_offset(&job->bcl) > 0) {
394                 vc5_cl_ensure_space_with_branch(&job->bcl,
395                                                 7 +
396                                                 cl_packet_length(OCCLUSION_QUERY_COUNTER));
397 
398                 if (job->oq_enabled) {
399                         /* Disable the OQ at the end of the CL, so that the
400                          * draw calls at the start of the CL don't inherit the
401                          * OQ counter.
402                          */
403                         cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter);
404                 }
405 
406                 /* Increment the semaphore indicating that binning is done and
407                  * unblocking the render thread.  Note that this doesn't act
408                  * until the FLUSH completes.
409                  */
410                 cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
411 
412                 /* The FLUSH_ALL emits any unwritten state changes in each
413                  * tile.  We can use this to reset any state that needs to be
414                  * present at the start of the next tile, as we do with
415                  * OCCLUSION_QUERY_COUNTER above.
416                  */
417                 cl_emit(&job->bcl, FLUSH_ALL_STATE, flush);
418         }
419 
420         job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
421         job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
422 
423         /* On V3D 4.1, the tile alloc/state setup moved to register writes
424          * instead of binner pac`kets.
425          */
426         if (screen->devinfo.ver >= 41) {
427                 vc5_job_add_bo(job, job->tile_alloc);
428                 job->submit.qma = job->tile_alloc->offset;
429                 job->submit.qms = job->tile_alloc->size;
430 
431                 vc5_job_add_bo(job, job->tile_state);
432                 job->submit.qts = job->tile_state->offset;
433         }
434 
435         vc5_clif_dump(vc5, job);
436 
437         if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) {
438                 int ret;
439 
440 #ifndef USE_VC5_SIMULATOR
441                 ret = drmIoctl(vc5->fd, DRM_IOCTL_VC5_SUBMIT_CL, &job->submit);
442 #else
443                 ret = vc5_simulator_flush(vc5, &job->submit, job);
444 #endif
445                 static bool warned = false;
446                 if (ret && !warned) {
447                         fprintf(stderr, "Draw call returned %s.  "
448                                         "Expect corruption.\n", strerror(errno));
449                         warned = true;
450                 }
451         }
452 
453         if (vc5->last_emit_seqno - vc5->screen->finished_seqno > 5) {
454                 if (!vc5_wait_seqno(vc5->screen,
455                                     vc5->last_emit_seqno - 5,
456                                     PIPE_TIMEOUT_INFINITE,
457                                     "job throttling")) {
458                         fprintf(stderr, "Job throttling failed\n");
459                 }
460         }
461 
462 done:
463         vc5_job_free(vc5, job);
464 }
465 
466 static bool
vc5_job_compare(const void * a,const void * b)467 vc5_job_compare(const void *a, const void *b)
468 {
469         return memcmp(a, b, sizeof(struct vc5_job_key)) == 0;
470 }
471 
472 static uint32_t
vc5_job_hash(const void * key)473 vc5_job_hash(const void *key)
474 {
475         return _mesa_hash_data(key, sizeof(struct vc5_job_key));
476 }
477 
478 void
vc5_job_init(struct vc5_context * vc5)479 vc5_job_init(struct vc5_context *vc5)
480 {
481         vc5->jobs = _mesa_hash_table_create(vc5,
482                                             vc5_job_hash,
483                                             vc5_job_compare);
484         vc5->write_jobs = _mesa_hash_table_create(vc5,
485                                                   _mesa_hash_pointer,
486                                                   _mesa_key_pointer_equal);
487 }
488 
489