1 /*
2  * Copyright (C) 2019 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  */
24 
25 #include <string.h>
26 #include "pan_scoreboard.h"
27 #include "pan_device.h"
28 #include "panfrost-quirks.h"
29 
30 /*
31  * There are various types of Mali jobs:
32  *
33  *  - WRITE_VALUE: generic write primitive, used to zero tiler field
34  *  - VERTEX: runs a vertex shader
35  *  - TILER: runs tiling and sets up a fragment shader
36  *  - FRAGMENT: runs fragment shaders and writes out
37  *  - COMPUTE: runs a compute shader
38  *  - FUSED: vertex+tiler fused together, implicit intradependency (Bifrost)
39  *  - GEOMETRY: runs a geometry shader (unimplemented)
40  *  - CACHE_FLUSH: unseen in the wild, theoretically cache flush
41  *
42  * In between a full batch and a single Mali job is the "job chain", a series
43  * of Mali jobs together forming a linked list. Within the job chain, each Mali
44  * job can set (up to) two dependencies on other earlier jobs in the chain.
45  * This dependency graph forms a scoreboard. The general idea of a scoreboard
46  * applies: when there is a data dependency of job B on job A, job B sets one
47  * of its dependency indices to job A, ensuring that job B won't start until
48  * job A finishes.
49  *
50  * More specifically, here are a set of rules:
51  *
52  * - A write value job must appear if and only if there is at least one tiler
53  *   job, and tiler jobs must depend on it.
54  *
55  * - Vertex jobs and tiler jobs are independent.
56  *
57  * - A tiler job must have a dependency on its data source. If it's getting
58  *   data from a vertex job, it depends on the vertex job. If it's getting data
59  *   from software, this is null.
60  *
61  * - Tiler jobs must depend on the write value job (chained or otherwise).
62  *
63  * - Tiler jobs must be strictly ordered. So each tiler job must depend on the
64  *   previous job in the chain.
65  *
66  * - Jobs linking via next_job has no bearing on order of execution, rather it
67  *   just establishes the linked list of jobs, EXCEPT:
68  *
69  * - A job's dependencies must appear earlier in the linked list (job chain).
70  *
71  * Justification for each rule:
72  *
73  * - Write value jobs are used to write a zero into a magic tiling field, which
74  *   enables tiling to work. If tiling occurs, they are needed; if it does not,
75  *   we cannot emit them since then tiling partially occurs and it's bad.
76  *
77  * - The hardware has no notion of a "vertex/tiler job" (at least not our
78  *   hardware -- other revs have fused jobs, but --- crap, this just got even
79  *   more complicated). They are independent units that take in data, process
80  *   it, and spit out data.
81  *
82  * - Any job must depend on its data source, in fact, or risk a
83  *   read-before-write hazard. Tiler jobs get their data from vertex jobs, ergo
84  *   tiler jobs depend on the corresponding vertex job (if it's there).
85  *
86  * - The tiler is not thread-safe; this dependency prevents race conditions
87  *   between two different jobs trying to write to the tiler outputs at the
88  *   same time.
89  *
90  * - Internally, jobs are scoreboarded; the next job fields just form a linked
91  *   list to allow the jobs to be read in; the execution order is from
92  *   resolving the dependency fields instead.
93  *
94  * - The hardware cannot set a dependency on a job it doesn't know about yet,
95  *   and dependencies are processed in-order of the next job fields.
96  *
97  */
98 
99 /* Generates, uploads, and queues a a new job. All fields are written in order
100  * except for next_job accounting (TODO: Should we be clever and defer the
101  * upload of the header here until next job to keep the access pattern totally
102  * linear? Or is that just a micro op at this point?). Returns the generated
103  * index for dep management.
104  *
105  * Inject is used to inject a job at the front, for wallpapering. If you are
106  * not wallpapering and set this, dragons will eat you. */
107 
108 unsigned
panfrost_add_job(struct pan_pool * pool,struct pan_scoreboard * scoreboard,enum mali_job_type type,bool barrier,unsigned local_dep,const struct panfrost_ptr * job,bool inject)109 panfrost_add_job(
110                 struct pan_pool *pool,
111                 struct pan_scoreboard *scoreboard,
112                 enum mali_job_type type,
113                 bool barrier,
114                 unsigned local_dep,
115                 const struct panfrost_ptr *job,
116                 bool inject)
117 {
118         bool is_bifrost = !!(pool->dev->quirks & IS_BIFROST);
119         unsigned global_dep = 0;
120 
121         if (type == MALI_JOB_TYPE_TILER) {
122                 /* Tiler jobs must be chained, and on Midgard, the first tiler
123                  * job must depend on the write value job, whose index we
124                  * reserve now */
125 
126                 if (is_bifrost && !scoreboard->write_value_index)
127                         scoreboard->write_value_index = ++scoreboard->job_index;
128 
129                 if (scoreboard->tiler_dep && !inject)
130                         global_dep = scoreboard->tiler_dep;
131                 else if (is_bifrost)
132                         global_dep = scoreboard->write_value_index;
133         }
134 
135         /* Assign the index */
136         unsigned index = ++scoreboard->job_index;
137 
138         pan_pack(job->cpu, JOB_HEADER, header) {
139                 header.type = type;
140                 header.barrier = barrier;
141                 header.index = index;
142                 header.dependency_1 = local_dep;
143                 header.dependency_2 = global_dep;
144 
145                 if (inject)
146                         header.next = scoreboard->first_job;
147         }
148 
149         if (inject) {
150                 if (type == MALI_JOB_TYPE_TILER) {
151                         if (scoreboard->first_tiler) {
152                                 /* Manual update of the dep2 field. This is bad,
153                                  * don't copy this pattern.
154                                  */
155                                 scoreboard->first_tiler->opaque[5] =
156                                         scoreboard->first_tiler_dep1 | (index << 16);
157                         }
158 
159                         scoreboard->first_tiler = (void *)job->cpu;
160                         scoreboard->first_tiler_dep1 = local_dep;
161                 }
162                 scoreboard->first_job = job->gpu;
163                 return index;
164         }
165 
166         /* Form a chain */
167         if (type == MALI_JOB_TYPE_TILER) {
168                 if (!scoreboard->first_tiler) {
169                         scoreboard->first_tiler = (void *)job->cpu;
170                         scoreboard->first_tiler_dep1 = local_dep;
171                 }
172                 scoreboard->tiler_dep = index;
173         }
174 
175         if (scoreboard->prev_job) {
176                 /* Manual update of the next pointer. This is bad, don't copy
177                  * this pattern.
178                  * TODO: Find a way to defer last job header emission until we
179                  * have a new job to queue or the batch is ready for execution.
180                  */
181                 scoreboard->prev_job->opaque[6] = job->gpu;
182                 scoreboard->prev_job->opaque[7] = job->gpu >> 32;
183 	} else {
184                 scoreboard->first_job = job->gpu;
185         }
186 
187         scoreboard->prev_job = (struct mali_job_header_packed *)job->cpu;
188         return index;
189 }
190 
191 /* Generates a write value job, used to initialize the tiler structures. Note
192  * this is called right before frame submission. */
193 
194 void
panfrost_scoreboard_initialize_tiler(struct pan_pool * pool,struct pan_scoreboard * scoreboard,mali_ptr polygon_list)195 panfrost_scoreboard_initialize_tiler(struct pan_pool *pool,
196                 struct pan_scoreboard *scoreboard,
197                 mali_ptr polygon_list)
198 {
199         /* Check if we even need tiling */
200         if (pool->dev->quirks & IS_BIFROST || !scoreboard->tiler_dep)
201                 return;
202 
203         /* Okay, we do. Let's generate it. We'll need the job's polygon list
204          * regardless of size. */
205 
206         struct panfrost_ptr transfer =
207                 panfrost_pool_alloc_aligned(pool,
208                                             MALI_WRITE_VALUE_JOB_LENGTH,
209                                             64);
210 
211         pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, HEADER, header) {
212                 header.type = MALI_JOB_TYPE_WRITE_VALUE;
213                 header.index = scoreboard->write_value_index;
214                 header.next = scoreboard->first_job;
215         }
216 
217         pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, PAYLOAD, payload) {
218                 payload.address = polygon_list;
219                 payload.type = MALI_WRITE_VALUE_TYPE_ZERO;
220         }
221 
222         scoreboard->first_job = transfer.gpu;
223 }
224