1 /*
2  * Copyright © 2014-2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file vc4_job.c
25  *
26  * Functions for submitting VC4 render jobs to the kernel.
27  */
28 
29 #include <xf86drm.h>
30 #include "vc4_cl_dump.h"
31 #include "vc4_context.h"
32 #include "util/hash_table.h"
33 
34 static void
remove_from_ht(struct hash_table * ht,void * key)35 remove_from_ht(struct hash_table *ht, void *key)
36 {
37         struct hash_entry *entry = _mesa_hash_table_search(ht, key);
38         _mesa_hash_table_remove(ht, entry);
39 }
40 
41 static void
vc4_job_free(struct vc4_context * vc4,struct vc4_job * job)42 vc4_job_free(struct vc4_context *vc4, struct vc4_job *job)
43 {
44         struct vc4_bo **referenced_bos = job->bo_pointers.base;
45         for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
46                 vc4_bo_unreference(&referenced_bos[i]);
47         }
48 
49         remove_from_ht(vc4->jobs, &job->key);
50 
51         if (job->color_write) {
52                 remove_from_ht(vc4->write_jobs, job->color_write->texture);
53                 pipe_surface_reference(&job->color_write, NULL);
54         }
55         if (job->msaa_color_write) {
56                 remove_from_ht(vc4->write_jobs, job->msaa_color_write->texture);
57                 pipe_surface_reference(&job->msaa_color_write, NULL);
58         }
59         if (job->zs_write) {
60                 remove_from_ht(vc4->write_jobs, job->zs_write->texture);
61                 pipe_surface_reference(&job->zs_write, NULL);
62         }
63         if (job->msaa_zs_write) {
64                 remove_from_ht(vc4->write_jobs, job->msaa_zs_write->texture);
65                 pipe_surface_reference(&job->msaa_zs_write, NULL);
66         }
67 
68         pipe_surface_reference(&job->color_read, NULL);
69         pipe_surface_reference(&job->zs_read, NULL);
70 
71         if (vc4->job == job)
72                 vc4->job = NULL;
73 
74         ralloc_free(job);
75 }
76 
77 static struct vc4_job *
vc4_job_create(struct vc4_context * vc4)78 vc4_job_create(struct vc4_context *vc4)
79 {
80         struct vc4_job *job = rzalloc(vc4, struct vc4_job);
81 
82         vc4_init_cl(job, &job->bcl);
83         vc4_init_cl(job, &job->shader_rec);
84         vc4_init_cl(job, &job->uniforms);
85         vc4_init_cl(job, &job->bo_handles);
86         vc4_init_cl(job, &job->bo_pointers);
87 
88         job->draw_min_x = ~0;
89         job->draw_min_y = ~0;
90         job->draw_max_x = 0;
91         job->draw_max_y = 0;
92 
93         job->last_gem_handle_hindex = ~0;
94 
95         return job;
96 }
97 
98 void
vc4_flush_jobs_writing_resource(struct vc4_context * vc4,struct pipe_resource * prsc)99 vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
100                                 struct pipe_resource *prsc)
101 {
102         struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
103                                                            prsc);
104         if (entry) {
105                 struct vc4_job *job = entry->data;
106                 vc4_job_submit(vc4, job);
107         }
108 }
109 
110 void
vc4_flush_jobs_reading_resource(struct vc4_context * vc4,struct pipe_resource * prsc)111 vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
112                                 struct pipe_resource *prsc)
113 {
114         struct vc4_resource *rsc = vc4_resource(prsc);
115 
116         vc4_flush_jobs_writing_resource(vc4, prsc);
117 
118         struct hash_entry *entry;
119         hash_table_foreach(vc4->jobs, entry) {
120                 struct vc4_job *job = entry->data;
121 
122                 struct vc4_bo **referenced_bos = job->bo_pointers.base;
123                 bool found = false;
124                 for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
125                         if (referenced_bos[i] == rsc->bo) {
126                                 found = true;
127                                 break;
128                         }
129                 }
130                 if (found) {
131                         vc4_job_submit(vc4, job);
132                         continue;
133                 }
134 
135                 /* Also check for the Z/color buffers, since the references to
136                  * those are only added immediately before submit.
137                  */
138                 if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) {
139                         struct vc4_resource *ctex =
140                                 vc4_resource(job->color_read->texture);
141                         if (ctex->bo == rsc->bo) {
142                                 vc4_job_submit(vc4, job);
143                                 continue;
144                         }
145                 }
146 
147                 if (job->zs_read && !(job->cleared &
148                                       (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
149                         struct vc4_resource *ztex =
150                                 vc4_resource(job->zs_read->texture);
151                         if (ztex->bo == rsc->bo) {
152                                 vc4_job_submit(vc4, job);
153                                 continue;
154                         }
155                 }
156         }
157 }
158 
159 /**
160  * Returns a vc4_job struture for tracking V3D rendering to a particular FBO.
161  *
162  * If we've already started rendering to this FBO, then return old same job,
163  * otherwise make a new one.  If we're beginning rendering to an FBO, make
164  * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
165  * have been flushed.
166  */
167 struct vc4_job *
vc4_get_job(struct vc4_context * vc4,struct pipe_surface * cbuf,struct pipe_surface * zsbuf)168 vc4_get_job(struct vc4_context *vc4,
169             struct pipe_surface *cbuf, struct pipe_surface *zsbuf)
170 {
171         /* Return the existing job for this FBO if we have one */
172         struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf};
173         struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs,
174                                                            &local_key);
175         if (entry)
176                 return entry->data;
177 
178         /* Creating a new job.  Make sure that any previous jobs reading or
179          * writing these buffers are flushed.
180          */
181         if (cbuf)
182                 vc4_flush_jobs_reading_resource(vc4, cbuf->texture);
183         if (zsbuf)
184                 vc4_flush_jobs_reading_resource(vc4, zsbuf->texture);
185 
186         struct vc4_job *job = vc4_job_create(vc4);
187 
188         if (cbuf) {
189                 if (cbuf->texture->nr_samples > 1) {
190                         job->msaa = true;
191                         pipe_surface_reference(&job->msaa_color_write, cbuf);
192                 } else {
193                         pipe_surface_reference(&job->color_write, cbuf);
194                 }
195         }
196 
197         if (zsbuf) {
198                 if (zsbuf->texture->nr_samples > 1) {
199                         job->msaa = true;
200                         pipe_surface_reference(&job->msaa_zs_write, zsbuf);
201                 } else {
202                         pipe_surface_reference(&job->zs_write, zsbuf);
203                 }
204         }
205 
206         if (job->msaa) {
207                 job->tile_width = 32;
208                 job->tile_height = 32;
209         } else {
210                 job->tile_width = 64;
211                 job->tile_height = 64;
212         }
213 
214         if (cbuf)
215                 _mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job);
216         if (zsbuf)
217                 _mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job);
218 
219         job->key.cbuf = cbuf;
220         job->key.zsbuf = zsbuf;
221         _mesa_hash_table_insert(vc4->jobs, &job->key, job);
222 
223         return job;
224 }
225 
226 struct vc4_job *
vc4_get_job_for_fbo(struct vc4_context * vc4)227 vc4_get_job_for_fbo(struct vc4_context *vc4)
228 {
229         if (vc4->job)
230                 return vc4->job;
231 
232         struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
233         struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
234         struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf);
235 
236         /* The dirty flags are tracking what's been updated while vc4->job has
237          * been bound, so set them all to ~0 when switching between jobs.  We
238          * also need to reset all state at the start of rendering.
239          */
240         vc4->dirty = ~0;
241 
242         /* Set up the read surfaces in the job.  If they aren't actually
243          * getting read (due to a clear starting the frame), job->cleared will
244          * mask out the read.
245          */
246         pipe_surface_reference(&job->color_read, cbuf);
247         pipe_surface_reference(&job->zs_read, zsbuf);
248 
249         /* If we're binding to uninitialized buffers, no need to load their
250          * contents before drawing.
251          */
252         if (cbuf) {
253                 struct vc4_resource *rsc = vc4_resource(cbuf->texture);
254                 if (!rsc->writes)
255                         job->cleared |= PIPE_CLEAR_COLOR0;
256         }
257 
258         if (zsbuf) {
259                 struct vc4_resource *rsc = vc4_resource(zsbuf->texture);
260                 if (!rsc->writes)
261                         job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
262         }
263 
264         job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width,
265                                          job->tile_width);
266         job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height,
267                                          job->tile_height);
268 
269         /* Initialize the job with the raster order flags -- each draw will
270          * check that we haven't changed the flags, since that requires a
271          * flush.
272          */
273         if (vc4->rasterizer)
274                 job->flags = vc4->rasterizer->tile_raster_order_flags;
275 
276         vc4->job = job;
277 
278         return job;
279 }
280 
281 static void
vc4_submit_setup_rcl_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf,bool is_depth,bool is_write)282 vc4_submit_setup_rcl_surface(struct vc4_job *job,
283                              struct drm_vc4_submit_rcl_surface *submit_surf,
284                              struct pipe_surface *psurf,
285                              bool is_depth, bool is_write)
286 {
287         struct vc4_surface *surf = vc4_surface(psurf);
288 
289         if (!surf)
290                 return;
291 
292         struct vc4_resource *rsc = vc4_resource(psurf->texture);
293         submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
294         submit_surf->offset = surf->offset;
295 
296         if (psurf->texture->nr_samples <= 1) {
297                 if (is_depth) {
298                         submit_surf->bits =
299                                 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
300                                               VC4_LOADSTORE_TILE_BUFFER_BUFFER);
301 
302                 } else {
303                         submit_surf->bits =
304                                 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
305                                               VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
306                                 VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
307                                               VC4_LOADSTORE_TILE_BUFFER_BGR565 :
308                                               VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
309                                               VC4_LOADSTORE_TILE_BUFFER_FORMAT);
310                 }
311                 submit_surf->bits |=
312                         VC4_SET_FIELD(surf->tiling,
313                                       VC4_LOADSTORE_TILE_BUFFER_TILING);
314         } else {
315                 assert(!is_write);
316                 submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;
317         }
318 
319         if (is_write)
320                 rsc->writes++;
321 }
322 
323 static void
vc4_submit_setup_rcl_render_config_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf)324 vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job,
325                                            struct drm_vc4_submit_rcl_surface *submit_surf,
326                                            struct pipe_surface *psurf)
327 {
328         struct vc4_surface *surf = vc4_surface(psurf);
329 
330         if (!surf)
331                 return;
332 
333         struct vc4_resource *rsc = vc4_resource(psurf->texture);
334         submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
335         submit_surf->offset = surf->offset;
336 
337         if (psurf->texture->nr_samples <= 1) {
338                 submit_surf->bits =
339                         VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
340                                       VC4_RENDER_CONFIG_FORMAT_BGR565 :
341                                       VC4_RENDER_CONFIG_FORMAT_RGBA8888,
342                                       VC4_RENDER_CONFIG_FORMAT) |
343                         VC4_SET_FIELD(surf->tiling,
344                                       VC4_RENDER_CONFIG_MEMORY_FORMAT);
345         }
346 
347         rsc->writes++;
348 }
349 
350 static void
vc4_submit_setup_rcl_msaa_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf)351 vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job,
352                                   struct drm_vc4_submit_rcl_surface *submit_surf,
353                                   struct pipe_surface *psurf)
354 {
355         struct vc4_surface *surf = vc4_surface(psurf);
356 
357         if (!surf)
358                 return;
359 
360         struct vc4_resource *rsc = vc4_resource(psurf->texture);
361         submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
362         submit_surf->offset = surf->offset;
363         submit_surf->bits = 0;
364         rsc->writes++;
365 }
366 
367 /**
368  * Submits the job to the kernel and then reinitializes it.
369  */
370 void
vc4_job_submit(struct vc4_context * vc4,struct vc4_job * job)371 vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
372 {
373         if (!job->needs_flush)
374                 goto done;
375 
376         /* The RCL setup would choke if the draw bounds cause no drawing, so
377          * just drop the drawing if that's the case.
378          */
379         if (job->draw_max_x <= job->draw_min_x ||
380             job->draw_max_y <= job->draw_min_y) {
381                 goto done;
382         }
383 
384         if (vc4_debug & VC4_DEBUG_CL) {
385                 fprintf(stderr, "BCL:\n");
386                 vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false);
387         }
388 
389         if (cl_offset(&job->bcl) > 0) {
390                 /* Increment the semaphore indicating that binning is done and
391                  * unblocking the render thread.  Note that this doesn't act
392                  * until the FLUSH completes.
393                  */
394                 cl_ensure_space(&job->bcl, 8);
395                 cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
396                 /* The FLUSH caps all of our bin lists with a
397                  * VC4_PACKET_RETURN.
398                  */
399                 cl_emit(&job->bcl, FLUSH, flush);
400         }
401         struct drm_vc4_submit_cl submit = {
402                 .color_read.hindex = ~0,
403                 .zs_read.hindex = ~0,
404                 .color_write.hindex = ~0,
405                 .msaa_color_write.hindex = ~0,
406                 .zs_write.hindex = ~0,
407                 .msaa_zs_write.hindex = ~0,
408         };
409 
410         cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t));
411         cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *));
412 
413         if (job->resolve & PIPE_CLEAR_COLOR) {
414                 if (!(job->cleared & PIPE_CLEAR_COLOR)) {
415                         vc4_submit_setup_rcl_surface(job, &submit.color_read,
416                                                      job->color_read,
417                                                      false, false);
418                 }
419                 vc4_submit_setup_rcl_render_config_surface(job,
420                                                            &submit.color_write,
421                                                            job->color_write);
422                 vc4_submit_setup_rcl_msaa_surface(job,
423                                                   &submit.msaa_color_write,
424                                                   job->msaa_color_write);
425         }
426         if (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
427                 if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
428                         vc4_submit_setup_rcl_surface(job, &submit.zs_read,
429                                                      job->zs_read, true, false);
430                 }
431                 vc4_submit_setup_rcl_surface(job, &submit.zs_write,
432                                              job->zs_write, true, true);
433                 vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write,
434                                                   job->msaa_zs_write);
435         }
436 
437         if (job->msaa) {
438                 /* This bit controls how many pixels the general
439                  * (i.e. subsampled) loads/stores are iterating over
440                  * (multisample loads replicate out to the other samples).
441                  */
442                 submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X;
443                 /* Controls whether color_write's
444                  * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation
445                  */
446                 submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;
447         }
448 
449         submit.bo_handles = (uintptr_t)job->bo_handles.base;
450         submit.bo_handle_count = cl_offset(&job->bo_handles) / 4;
451         submit.bin_cl = (uintptr_t)job->bcl.base;
452         submit.bin_cl_size = cl_offset(&job->bcl);
453         submit.shader_rec = (uintptr_t)job->shader_rec.base;
454         submit.shader_rec_size = cl_offset(&job->shader_rec);
455         submit.shader_rec_count = job->shader_rec_count;
456         submit.uniforms = (uintptr_t)job->uniforms.base;
457         submit.uniforms_size = cl_offset(&job->uniforms);
458 
459         assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);
460         submit.min_x_tile = job->draw_min_x / job->tile_width;
461         submit.min_y_tile = job->draw_min_y / job->tile_height;
462         submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width;
463         submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height;
464         submit.width = job->draw_width;
465         submit.height = job->draw_height;
466         if (job->cleared) {
467                 submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
468                 submit.clear_color[0] = job->clear_color[0];
469                 submit.clear_color[1] = job->clear_color[1];
470                 submit.clear_z = job->clear_depth;
471                 submit.clear_s = job->clear_stencil;
472         }
473         submit.flags |= job->flags;
474 
475         if (!(vc4_debug & VC4_DEBUG_NORAST)) {
476                 int ret;
477 
478 #ifndef USE_VC4_SIMULATOR
479                 ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
480 #else
481                 ret = vc4_simulator_flush(vc4, &submit, job);
482 #endif
483                 static bool warned = false;
484                 if (ret && !warned) {
485                         fprintf(stderr, "Draw call returned %s.  "
486                                         "Expect corruption.\n", strerror(errno));
487                         warned = true;
488                 } else if (!ret) {
489                         vc4->last_emit_seqno = submit.seqno;
490                 }
491         }
492 
493         if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
494                 if (!vc4_wait_seqno(vc4->screen,
495                                     vc4->last_emit_seqno - 5,
496                                     PIPE_TIMEOUT_INFINITE,
497                                     "job throttling")) {
498                         fprintf(stderr, "Job throttling failed\n");
499                 }
500         }
501 
502         if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
503                 if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
504                                     PIPE_TIMEOUT_INFINITE, "sync")) {
505                         fprintf(stderr, "Wait failed.\n");
506                         abort();
507                 }
508         }
509 
510 done:
511         vc4_job_free(vc4, job);
512 }
513 
514 static bool
vc4_job_compare(const void * a,const void * b)515 vc4_job_compare(const void *a, const void *b)
516 {
517         return memcmp(a, b, sizeof(struct vc4_job_key)) == 0;
518 }
519 
520 static uint32_t
vc4_job_hash(const void * key)521 vc4_job_hash(const void *key)
522 {
523         return _mesa_hash_data(key, sizeof(struct vc4_job_key));
524 }
525 
526 void
vc4_job_init(struct vc4_context * vc4)527 vc4_job_init(struct vc4_context *vc4)
528 {
529         vc4->jobs = _mesa_hash_table_create(vc4,
530                                             vc4_job_hash,
531                                             vc4_job_compare);
532         vc4->write_jobs = _mesa_hash_table_create(vc4,
533                                                   _mesa_hash_pointer,
534                                                   _mesa_key_pointer_equal);
535 }
536 
537