1 /*
2  * Copyright © 2014 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file
26  *
27  * Validates the QPU instruction sequence after register allocation and
28  * scheduling.
29  */
30 
31 #include <assert.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include "v3d_compiler.h"
35 #include "qpu/qpu_disasm.h"
36 
37 struct v3d_qpu_validate_state {
38         struct v3d_compile *c;
39         const struct v3d_qpu_instr *last;
40         int ip;
41         int last_sfu_write;
42         int last_branch_ip;
43         int last_thrsw_ip;
44         bool last_thrsw_found;
45         int thrsw_count;
46 };
47 
48 static void
fail_instr(struct v3d_qpu_validate_state * state,const char * msg)49 fail_instr(struct v3d_qpu_validate_state *state, const char *msg)
50 {
51         struct v3d_compile *c = state->c;
52 
53         fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);
54 
55         int dump_ip = 0;
56         vir_for_each_inst_inorder(inst, c) {
57                 v3d_qpu_dump(c->devinfo, &inst->qpu);
58 
59                 if (dump_ip++ == state->ip)
60                         fprintf(stderr, " *** ERROR ***");
61 
62                 fprintf(stderr, "\n");
63         }
64 
65         fprintf(stderr, "\n");
66         abort();
67 }
68 
69 static bool
in_branch_delay_slots(struct v3d_qpu_validate_state * state)70 in_branch_delay_slots(struct v3d_qpu_validate_state *state)
71 {
72         return (state->ip - state->last_branch_ip) < 3;
73 }
74 
75 static bool
in_thrsw_delay_slots(struct v3d_qpu_validate_state * state)76 in_thrsw_delay_slots(struct v3d_qpu_validate_state *state)
77 {
78         return (state->ip - state->last_thrsw_ip) < 3;
79 }
80 
81 static bool
qpu_magic_waddr_matches(const struct v3d_qpu_instr * inst,bool (* predicate)(enum v3d_qpu_waddr waddr))82 qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
83                         bool (*predicate)(enum v3d_qpu_waddr waddr))
84 {
85         if (inst->type == V3D_QPU_INSTR_TYPE_ALU)
86                 return false;
87 
88         if (inst->alu.add.op != V3D_QPU_A_NOP &&
89             inst->alu.add.magic_write &&
90             predicate(inst->alu.add.waddr))
91                 return true;
92 
93         if (inst->alu.mul.op != V3D_QPU_M_NOP &&
94             inst->alu.mul.magic_write &&
95             predicate(inst->alu.mul.waddr))
96                 return true;
97 
98         return false;
99 }
100 
101 static void
qpu_validate_inst(struct v3d_qpu_validate_state * state,struct qinst * qinst)102 qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
103 {
104         const struct v3d_device_info *devinfo = state->c->devinfo;
105         const struct v3d_qpu_instr *inst = &qinst->qpu;
106 
107         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
108                 return;
109 
110         /* LDVARY writes r5 two instructions later and LDUNIF writes
111          * r5 one instruction later, which is illegal to have
112          * together.
113          */
114         if (state->last && state->last->sig.ldvary &&
115             (inst->sig.ldunif || inst->sig.ldunifa)) {
116                 fail_instr(state, "LDUNIF after a LDVARY");
117         }
118 
119         int tmu_writes = 0;
120         int sfu_writes = 0;
121         int vpm_writes = 0;
122         int tlb_writes = 0;
123         int tsy_writes = 0;
124 
125         if (inst->alu.add.op != V3D_QPU_A_NOP) {
126                 if (inst->alu.add.magic_write) {
127                         if (v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr))
128                                 tmu_writes++;
129                         if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
130                                 sfu_writes++;
131                         if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
132                                 vpm_writes++;
133                         if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))
134                                 tlb_writes++;
135                         if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))
136                                 tsy_writes++;
137                 }
138         }
139 
140         if (inst->alu.mul.op != V3D_QPU_M_NOP) {
141                 if (inst->alu.mul.magic_write) {
142                         if (v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr))
143                                 tmu_writes++;
144                         if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
145                                 sfu_writes++;
146                         if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
147                                 vpm_writes++;
148                         if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))
149                                 tlb_writes++;
150                         if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))
151                                 tsy_writes++;
152                 }
153         }
154 
155         if (in_thrsw_delay_slots(state)) {
156                 /* There's no way you want to start SFU during the THRSW delay
157                  * slots, since the result would land in the other thread.
158                  */
159                 if (sfu_writes) {
160                         fail_instr(state,
161                                    "SFU write started during THRSW delay slots ");
162                 }
163 
164                 if (inst->sig.ldvary)
165                         fail_instr(state, "LDVARY during THRSW delay slots");
166         }
167 
168         (void)qpu_magic_waddr_matches; /* XXX */
169 
170         /* SFU r4 results come back two instructions later.  No doing
171          * r4 read/writes or other SFU lookups until it's done.
172          */
173         if (state->ip - state->last_sfu_write < 2) {
174                 if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
175                         fail_instr(state, "R4 read too soon after SFU");
176 
177                 if (v3d_qpu_writes_r4(devinfo, inst))
178                         fail_instr(state, "R4 write too soon after SFU");
179 
180                 if (sfu_writes)
181                         fail_instr(state, "SFU write too soon after SFU");
182         }
183 
184         /* XXX: The docs say VPM can happen with the others, but the simulator
185          * disagrees.
186          */
187         if (tmu_writes +
188             sfu_writes +
189             vpm_writes +
190             tlb_writes +
191             tsy_writes +
192             inst->sig.ldtmu +
193             inst->sig.ldtlb +
194             inst->sig.ldvpm +
195             inst->sig.ldtlbu > 1) {
196                 fail_instr(state,
197                            "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
198         }
199 
200         if (sfu_writes)
201                 state->last_sfu_write = state->ip;
202 
203         if (inst->sig.thrsw) {
204                 if (in_branch_delay_slots(state))
205                         fail_instr(state, "THRSW in a branch delay slot.");
206 
207                 if (state->last_thrsw_ip == state->ip - 1) {
208                         /* If it's the second THRSW in a row, then it's just a
209                          * last-thrsw signal.
210                          */
211                         if (state->last_thrsw_found)
212                                 fail_instr(state, "Two last-THRSW signals");
213                         state->last_thrsw_found = true;
214                 } else {
215                         if (in_thrsw_delay_slots(state)) {
216                                 fail_instr(state,
217                                            "THRSW too close to another THRSW.");
218                         }
219                         state->thrsw_count++;
220                         state->last_thrsw_ip = state->ip;
221                 }
222         }
223 
224         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
225                 if (in_branch_delay_slots(state))
226                         fail_instr(state, "branch in a branch delay slot.");
227                 if (in_thrsw_delay_slots(state))
228                         fail_instr(state, "branch in a THRSW delay slot.");
229                 state->last_branch_ip = state->ip;
230         }
231 }
232 
233 static void
qpu_validate_block(struct v3d_qpu_validate_state * state,struct qblock * block)234 qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)
235 {
236         vir_for_each_inst(qinst, block) {
237                 qpu_validate_inst(state, qinst);
238 
239                 state->last = &qinst->qpu;
240                 state->ip++;
241         }
242 }
243 
244 /**
245  * Checks for the instruction restrictions from page 37 ("Summary of
246  * Instruction Restrictions").
247  */
248 void
qpu_validate(struct v3d_compile * c)249 qpu_validate(struct v3d_compile *c)
250 {
251         /* We don't want to do validation in release builds, but we want to
252          * keep compiling the validation code to make sure it doesn't get
253          * broken.
254          */
255 #ifndef DEBUG
256         return;
257 #endif
258 
259         struct v3d_qpu_validate_state state = {
260                 .c = c,
261                 .last_sfu_write = -10,
262                 .last_thrsw_ip = -10,
263                 .last_branch_ip = -10,
264                 .ip = 0,
265         };
266 
267         vir_for_each_block(block, c) {
268                 qpu_validate_block(&state, block);
269         }
270 
271         if (state.thrsw_count > 1 && !state.last_thrsw_found) {
272                 fail_instr(&state,
273                            "thread switch found without last-THRSW in program");
274         }
275 
276         if (state.thrsw_count == 0 ||
277             (state.last_thrsw_found && state.thrsw_count == 1)) {
278                 fail_instr(&state, "No program-end THRSW found");
279         }
280 }
281