1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file v3d_job.c
25  *
26  * Functions for submitting VC5 render jobs to the kernel.
27  */
28 
29 #include <xf86drm.h>
30 #include "v3d_context.h"
31 /* The OQ/semaphore packets are the same across V3D versions. */
32 #define V3D_VERSION 33
33 #include "broadcom/cle/v3dx_pack.h"
34 #include "broadcom/common/v3d_macros.h"
35 #include "util/hash_table.h"
36 #include "util/ralloc.h"
37 #include "util/set.h"
38 #include "broadcom/clif/clif_dump.h"
39 
40 void
v3d_job_free(struct v3d_context * v3d,struct v3d_job * job)41 v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
42 {
43         set_foreach(job->bos, entry) {
44                 struct v3d_bo *bo = (struct v3d_bo *)entry->key;
45                 v3d_bo_unreference(&bo);
46         }
47 
48         _mesa_hash_table_remove_key(v3d->jobs, &job->key);
49 
50         if (job->write_prscs) {
51                 set_foreach(job->write_prscs, entry) {
52                         const struct pipe_resource *prsc = entry->key;
53 
54                         _mesa_hash_table_remove_key(v3d->write_jobs, prsc);
55                 }
56         }
57 
58         for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
59                 if (job->cbufs[i]) {
60                         _mesa_hash_table_remove_key(v3d->write_jobs,
61                                                     job->cbufs[i]->texture);
62                         pipe_surface_reference(&job->cbufs[i], NULL);
63                 }
64         }
65         if (job->zsbuf) {
66                 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
67                 if (rsc->separate_stencil)
68                         _mesa_hash_table_remove_key(v3d->write_jobs,
69                                                     &rsc->separate_stencil->base);
70 
71                 _mesa_hash_table_remove_key(v3d->write_jobs,
72                                             job->zsbuf->texture);
73                 pipe_surface_reference(&job->zsbuf, NULL);
74         }
75 
76         if (v3d->job == job)
77                 v3d->job = NULL;
78 
79         v3d_destroy_cl(&job->bcl);
80         v3d_destroy_cl(&job->rcl);
81         v3d_destroy_cl(&job->indirect);
82         v3d_bo_unreference(&job->tile_alloc);
83         v3d_bo_unreference(&job->tile_state);
84 
85         ralloc_free(job);
86 }
87 
88 struct v3d_job *
v3d_job_create(struct v3d_context * v3d)89 v3d_job_create(struct v3d_context *v3d)
90 {
91         struct v3d_job *job = rzalloc(v3d, struct v3d_job);
92 
93         job->v3d = v3d;
94 
95         v3d_init_cl(job, &job->bcl);
96         v3d_init_cl(job, &job->rcl);
97         v3d_init_cl(job, &job->indirect);
98 
99         job->draw_min_x = ~0;
100         job->draw_min_y = ~0;
101         job->draw_max_x = 0;
102         job->draw_max_y = 0;
103 
104         job->bos = _mesa_set_create(job,
105                                     _mesa_hash_pointer,
106                                     _mesa_key_pointer_equal);
107         return job;
108 }
109 
110 void
v3d_job_add_bo(struct v3d_job * job,struct v3d_bo * bo)111 v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo)
112 {
113         if (!bo)
114                 return;
115 
116         if (_mesa_set_search(job->bos, bo))
117                 return;
118 
119         v3d_bo_reference(bo);
120         _mesa_set_add(job->bos, bo);
121         job->referenced_size += bo->size;
122 
123         uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;
124 
125         if (job->submit.bo_handle_count >= job->bo_handles_size) {
126                 job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
127                 bo_handles = reralloc(job, bo_handles,
128                                       uint32_t, job->bo_handles_size);
129                 job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
130         }
131         bo_handles[job->submit.bo_handle_count++] = bo->handle;
132 }
133 
134 void
v3d_job_add_write_resource(struct v3d_job * job,struct pipe_resource * prsc)135 v3d_job_add_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
136 {
137         struct v3d_context *v3d = job->v3d;
138 
139         if (!job->write_prscs) {
140                 job->write_prscs = _mesa_set_create(job,
141                                                     _mesa_hash_pointer,
142                                                     _mesa_key_pointer_equal);
143         }
144 
145         _mesa_set_add(job->write_prscs, prsc);
146         _mesa_hash_table_insert(v3d->write_jobs, prsc, job);
147 }
148 
149 void
v3d_flush_jobs_using_bo(struct v3d_context * v3d,struct v3d_bo * bo)150 v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo)
151 {
152         hash_table_foreach(v3d->jobs, entry) {
153                 struct v3d_job *job = entry->data;
154 
155                 if (_mesa_set_search(job->bos, bo))
156                         v3d_job_submit(v3d, job);
157         }
158 }
159 
160 void
v3d_job_add_tf_write_resource(struct v3d_job * job,struct pipe_resource * prsc)161 v3d_job_add_tf_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
162 {
163         v3d_job_add_write_resource(job, prsc);
164 
165         if (!job->tf_write_prscs)
166                 job->tf_write_prscs = _mesa_pointer_set_create(job);
167 
168         _mesa_set_add(job->tf_write_prscs, prsc);
169 }
170 
171 static bool
v3d_job_writes_resource_from_tf(struct v3d_job * job,struct pipe_resource * prsc)172 v3d_job_writes_resource_from_tf(struct v3d_job *job,
173                                 struct pipe_resource *prsc)
174 {
175         if (!job->tf_enabled)
176                 return false;
177 
178         if (!job->tf_write_prscs)
179                 return false;
180 
181         return _mesa_set_search(job->tf_write_prscs, prsc) != NULL;
182 }
183 
184 void
v3d_flush_jobs_writing_resource(struct v3d_context * v3d,struct pipe_resource * prsc,enum v3d_flush_cond flush_cond,bool is_compute_pipeline)185 v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
186                                 struct pipe_resource *prsc,
187                                 enum v3d_flush_cond flush_cond,
188                                 bool is_compute_pipeline)
189 {
190         struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
191                                                            prsc);
192         struct v3d_resource *rsc = v3d_resource(prsc);
193 
194         /* We need to sync if graphics pipeline reads a resource written
195          * by the compute pipeline. The same would be needed for the case of
196          * graphics-compute dependency but nowadays all compute jobs
197          * are serialized with the previous submitted job.
198          */
199         if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
200            v3d->sync_on_last_compute_job = true;
201            rsc->compute_written = false;
202         }
203 
204         if (!entry)
205                 return;
206 
207         struct v3d_job *job = entry->data;
208 
209         bool needs_flush;
210         switch (flush_cond) {
211         case V3D_FLUSH_ALWAYS:
212                 needs_flush = true;
213                 break;
214         case V3D_FLUSH_NOT_CURRENT_JOB:
215                 needs_flush = !v3d->job || v3d->job != job;
216                 break;
217         case V3D_FLUSH_DEFAULT:
218         default:
219                 /* For writes from TF in the same job we use the "Wait for TF"
220                  * feature provided by the hardware so we don't want to flush.
221                  * The exception to this is when the caller is about to map the
222                  * resource since in that case we don't have a 'Wait for TF'
223                  * command the in command stream. In this scenario the caller
224                  * is expected to set 'always_flush' to True.
225                  */
226                 needs_flush = !v3d_job_writes_resource_from_tf(job, prsc);
227         }
228 
229         if (needs_flush)
230                 v3d_job_submit(v3d, job);
231 }
232 
233 void
v3d_flush_jobs_reading_resource(struct v3d_context * v3d,struct pipe_resource * prsc,enum v3d_flush_cond flush_cond,bool is_compute_pipeline)234 v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
235                                 struct pipe_resource *prsc,
236                                 enum v3d_flush_cond flush_cond,
237                                 bool is_compute_pipeline)
238 {
239         struct v3d_resource *rsc = v3d_resource(prsc);
240 
241         /* We only need to force the flush on TF writes, which is the only
242          * case where we might skip the flush to use the 'Wait for TF'
243          * command. Here we are flushing for a read, which means that the
244          * caller intends to write to the resource, so we don't care if
245          * there was a previous TF write to it.
246          */
247         v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
248                                         is_compute_pipeline);
249 
250         hash_table_foreach(v3d->jobs, entry) {
251                 struct v3d_job *job = entry->data;
252 
253                 if (!_mesa_set_search(job->bos, rsc->bo))
254                         continue;
255 
256                 bool needs_flush;
257                 switch (flush_cond) {
258                 case V3D_FLUSH_NOT_CURRENT_JOB:
259                         needs_flush = !v3d->job || v3d->job != job;
260                         break;
261                 case V3D_FLUSH_ALWAYS:
262                 case V3D_FLUSH_DEFAULT:
263                 default:
264                         needs_flush = true;
265                 }
266 
267                 if (needs_flush)
268                         v3d_job_submit(v3d, job);
269 
270                 /* Reminder: v3d->jobs is safe to keep iterating even
271                  * after deletion of an entry.
272                  */
273                 continue;
274         }
275 }
276 
277 static void
v3d_job_set_tile_buffer_size(struct v3d_job * job)278 v3d_job_set_tile_buffer_size(struct v3d_job *job)
279 {
280         static const uint8_t tile_sizes[] = {
281                 64, 64,
282                 64, 32,
283                 32, 32,
284                 32, 16,
285                 16, 16,
286         };
287         int tile_size_index = 0;
288         if (job->msaa)
289                 tile_size_index += 2;
290 
291         if (job->cbufs[3] || job->cbufs[2])
292                 tile_size_index += 2;
293         else if (job->cbufs[1])
294                 tile_size_index++;
295 
296         int max_bpp = RENDER_TARGET_MAXIMUM_32BPP;
297         for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
298                 if (job->cbufs[i]) {
299                         struct v3d_surface *surf = v3d_surface(job->cbufs[i]);
300                         max_bpp = MAX2(max_bpp, surf->internal_bpp);
301                 }
302         }
303         job->internal_bpp = max_bpp;
304         STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0);
305         tile_size_index += max_bpp;
306 
307         assert(tile_size_index < ARRAY_SIZE(tile_sizes));
308         job->tile_width = tile_sizes[tile_size_index * 2 + 0];
309         job->tile_height = tile_sizes[tile_size_index * 2 + 1];
310 }
311 
312 /**
313  * Returns a v3d_job struture for tracking V3D rendering to a particular FBO.
314  *
315  * If we've already started rendering to this FBO, then return the same job,
316  * otherwise make a new one.  If we're beginning rendering to an FBO, make
317  * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
318  * have been flushed.
319  */
320 struct v3d_job *
v3d_get_job(struct v3d_context * v3d,struct pipe_surface ** cbufs,struct pipe_surface * zsbuf)321 v3d_get_job(struct v3d_context *v3d,
322             struct pipe_surface **cbufs, struct pipe_surface *zsbuf)
323 {
324         /* Return the existing job for this FBO if we have one */
325         struct v3d_job_key local_key = {
326                 .cbufs = {
327                         cbufs[0],
328                         cbufs[1],
329                         cbufs[2],
330                         cbufs[3],
331                 },
332                 .zsbuf = zsbuf,
333         };
334         struct hash_entry *entry = _mesa_hash_table_search(v3d->jobs,
335                                                            &local_key);
336         if (entry)
337                 return entry->data;
338 
339         /* Creating a new job.  Make sure that any previous jobs reading or
340          * writing these buffers are flushed.
341          */
342         struct v3d_job *job = v3d_job_create(v3d);
343 
344         for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
345                 if (cbufs[i]) {
346                         v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
347                                                         V3D_FLUSH_DEFAULT,
348                                                         false);
349                         pipe_surface_reference(&job->cbufs[i], cbufs[i]);
350 
351                         if (cbufs[i]->texture->nr_samples > 1)
352                                 job->msaa = true;
353                 }
354         }
355         if (zsbuf) {
356                 v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
357                                                 V3D_FLUSH_DEFAULT,
358                                                 false);
359                 pipe_surface_reference(&job->zsbuf, zsbuf);
360                 if (zsbuf->texture->nr_samples > 1)
361                         job->msaa = true;
362         }
363 
364         for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
365                 if (cbufs[i])
366                         _mesa_hash_table_insert(v3d->write_jobs,
367                                                 cbufs[i]->texture, job);
368         }
369         if (zsbuf) {
370                 _mesa_hash_table_insert(v3d->write_jobs, zsbuf->texture, job);
371 
372                 struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
373                 if (rsc->separate_stencil) {
374                         v3d_flush_jobs_reading_resource(v3d,
375                                                         &rsc->separate_stencil->base,
376                                                         V3D_FLUSH_DEFAULT,
377                                                         false);
378                         _mesa_hash_table_insert(v3d->write_jobs,
379                                                 &rsc->separate_stencil->base,
380                                                 job);
381                 }
382         }
383 
384         memcpy(&job->key, &local_key, sizeof(local_key));
385         _mesa_hash_table_insert(v3d->jobs, &job->key, job);
386 
387         return job;
388 }
389 
390 struct v3d_job *
v3d_get_job_for_fbo(struct v3d_context * v3d)391 v3d_get_job_for_fbo(struct v3d_context *v3d)
392 {
393         if (v3d->job)
394                 return v3d->job;
395 
396         struct pipe_surface **cbufs = v3d->framebuffer.cbufs;
397         struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf;
398         struct v3d_job *job = v3d_get_job(v3d, cbufs, zsbuf);
399 
400         if (v3d->framebuffer.samples >= 1)
401                 job->msaa = true;
402 
403         v3d_job_set_tile_buffer_size(job);
404 
405         /* The dirty flags are tracking what's been updated while v3d->job has
406          * been bound, so set them all to ~0 when switching between jobs.  We
407          * also need to reset all state at the start of rendering.
408          */
409         v3d->dirty = ~0;
410 
411         /* If we're binding to uninitialized buffers, no need to load their
412          * contents before drawing.
413          */
414         for (int i = 0; i < 4; i++) {
415                 if (cbufs[i]) {
416                         struct v3d_resource *rsc = v3d_resource(cbufs[i]->texture);
417                         if (!rsc->writes)
418                                 job->clear |= PIPE_CLEAR_COLOR0 << i;
419                 }
420         }
421 
422         if (zsbuf) {
423                 struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
424                 if (!rsc->writes)
425                         job->clear |= PIPE_CLEAR_DEPTH;
426 
427                 if (rsc->separate_stencil)
428                         rsc = rsc->separate_stencil;
429 
430                 if (!rsc->writes)
431                         job->clear |= PIPE_CLEAR_STENCIL;
432         }
433 
434         job->draw_tiles_x = DIV_ROUND_UP(v3d->framebuffer.width,
435                                          job->tile_width);
436         job->draw_tiles_y = DIV_ROUND_UP(v3d->framebuffer.height,
437                                          job->tile_height);
438 
439         v3d->job = job;
440 
441         return job;
442 }
443 
444 static void
v3d_clif_dump(struct v3d_context * v3d,struct v3d_job * job)445 v3d_clif_dump(struct v3d_context *v3d, struct v3d_job *job)
446 {
447         if (!(V3D_DEBUG & (V3D_DEBUG_CL | V3D_DEBUG_CLIF)))
448                 return;
449 
450         struct clif_dump *clif = clif_dump_init(&v3d->screen->devinfo,
451                                                 stderr,
452                                                 V3D_DEBUG & V3D_DEBUG_CL);
453 
454         set_foreach(job->bos, entry) {
455                 struct v3d_bo *bo = (void *)entry->key;
456                 char *name = ralloc_asprintf(NULL, "%s_0x%x",
457                                              bo->name, bo->offset);
458 
459                 v3d_bo_map(bo);
460                 clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
461 
462                 ralloc_free(name);
463         }
464 
465         clif_dump(clif, &job->submit);
466 
467         clif_dump_destroy(clif);
468 }
469 
470 static void
v3d_read_and_accumulate_primitive_counters(struct v3d_context * v3d)471 v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)
472 {
473         assert(v3d->prim_counts);
474 
475         perf_debug("stalling on TF counts readback\n");
476         struct v3d_resource *rsc = v3d_resource(v3d->prim_counts);
477         if (v3d_bo_wait(rsc->bo, PIPE_TIMEOUT_INFINITE, "prim-counts")) {
478                 uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;
479                 v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];
480                 /* When we only have a vertex shader we determine the primitive
481                  * count in the CPU so don't update it here again.
482                  */
483                 if (v3d->prog.gs)
484                         v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];
485         }
486 }
487 
488 /**
489  * Submits the job to the kernel and then reinitializes it.
490  */
491 void
v3d_job_submit(struct v3d_context * v3d,struct v3d_job * job)492 v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
493 {
494         struct v3d_screen *screen = v3d->screen;
495 
496         if (!job->needs_flush)
497                 goto done;
498 
499         if (screen->devinfo.ver >= 41)
500                 v3d41_emit_rcl(job);
501         else
502                 v3d33_emit_rcl(job);
503 
504         if (cl_offset(&job->bcl) > 0) {
505                 if (screen->devinfo.ver >= 41)
506                         v3d41_bcl_epilogue(v3d, job);
507                 else
508                         v3d33_bcl_epilogue(v3d, job);
509         }
510 
511         /* While the RCL will implicitly depend on the last RCL to have
512          * finished, we also need to block on any previous TFU job we may have
513          * dispatched.
514          */
515         job->submit.in_sync_rcl = v3d->out_sync;
516 
517         /* Update the sync object for the last rendering by our context. */
518         job->submit.out_sync = v3d->out_sync;
519 
520         job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
521         job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
522 
523         job->submit.flags = 0;
524         if (job->tmu_dirty_rcl && screen->has_cache_flush)
525                 job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
526 
527         /* On V3D 4.1, the tile alloc/state setup moved to register writes
528          * instead of binner packets.
529          */
530         if (screen->devinfo.ver >= 41) {
531                 v3d_job_add_bo(job, job->tile_alloc);
532                 job->submit.qma = job->tile_alloc->offset;
533                 job->submit.qms = job->tile_alloc->size;
534 
535                 v3d_job_add_bo(job, job->tile_state);
536                 job->submit.qts = job->tile_state->offset;
537         }
538 
539         v3d_clif_dump(v3d, job);
540 
541         if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) {
542                 int ret;
543 
544                 ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
545                 static bool warned = false;
546                 if (ret && !warned) {
547                         fprintf(stderr, "Draw call returned %s.  "
548                                         "Expect corruption.\n", strerror(errno));
549                         warned = true;
550                 }
551 
552                 /* If we are submitting a job in the middle of transform
553                  * feedback we need to read the primitive counts and accumulate
554                  * them, otherwise they will be reset at the start of the next
555                  * draw when we emit the Tile Binning Mode Configuration packet.
556                  *
557                  * If the job doesn't have any TF draw calls, then we know
558                  * the primitive count must be zero and we can skip stalling
559                  * for this. This also fixes a problem because it seems that
560                  * in this scenario the counters are not reset with the Tile
561                  * Binning Mode Configuration packet, which would translate
562                  * to us reading an obsolete (possibly non-zero) value from
563                  * the GPU counters.
564                  */
565                 if (v3d->streamout.num_targets && job->tf_draw_calls_queued > 0)
566                         v3d_read_and_accumulate_primitive_counters(v3d);
567         }
568 
569 done:
570         v3d_job_free(v3d, job);
571 }
572 
573 static bool
v3d_job_compare(const void * a,const void * b)574 v3d_job_compare(const void *a, const void *b)
575 {
576         return memcmp(a, b, sizeof(struct v3d_job_key)) == 0;
577 }
578 
579 static uint32_t
v3d_job_hash(const void * key)580 v3d_job_hash(const void *key)
581 {
582         return _mesa_hash_data(key, sizeof(struct v3d_job_key));
583 }
584 
585 void
v3d_job_init(struct v3d_context * v3d)586 v3d_job_init(struct v3d_context *v3d)
587 {
588         v3d->jobs = _mesa_hash_table_create(v3d,
589                                             v3d_job_hash,
590                                             v3d_job_compare);
591         v3d->write_jobs = _mesa_hash_table_create(v3d,
592                                                   _mesa_hash_pointer,
593                                                   _mesa_key_pointer_equal);
594 }
595 
596