1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Author: Tom Stellard <thomas.stellard@amd.com>
24  */
25 
26 #include "radeon_compiler.h"
27 #include "radeon_compiler_util.h"
28 #include "radeon_dataflow.h"
29 #include "radeon_program.h"
30 #include "radeon_program_constants.h"
31 
32 struct vert_fc_state {
33 	struct radeon_compiler *C;
34 	unsigned BranchDepth;
35 	unsigned LoopDepth;
36 	unsigned LoopsReserved;
37 	int PredStack[R500_PVS_MAX_LOOP_DEPTH];
38 	int PredicateReg;
39 	unsigned InCFBreak;
40 };
41 
build_pred_src(struct rc_src_register * src,struct vert_fc_state * fc_state)42 static void build_pred_src(
43 	struct rc_src_register * src,
44 	struct vert_fc_state * fc_state)
45 {
46 	src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
47 					RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
48 	src->File = RC_FILE_TEMPORARY;
49 	src->Index = fc_state->PredicateReg;
50 }
51 
build_pred_dst(struct rc_dst_register * dst,struct vert_fc_state * fc_state)52 static void build_pred_dst(
53 	struct rc_dst_register * dst,
54 	struct vert_fc_state * fc_state)
55 {
56 	dst->WriteMask = RC_MASK_W;
57 	dst->File = RC_FILE_TEMPORARY;
58 	dst->Index = fc_state->PredicateReg;
59 }
60 
mark_write(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)61 static void mark_write(void * userdata,	struct rc_instruction * inst,
62 		rc_register_file file,	unsigned int index, unsigned int mask)
63 {
64 	unsigned int * writemasks = userdata;
65 
66 	if (file != RC_FILE_TEMPORARY)
67 		return;
68 
69 	if (index >= R300_VS_MAX_TEMPS)
70 		return;
71 
72 	writemasks[index] |= mask;
73 }
74 
reserve_predicate_reg(struct vert_fc_state * fc_state)75 static int reserve_predicate_reg(struct vert_fc_state * fc_state)
76 {
77 	int i;
78 	unsigned int writemasks[RC_REGISTER_MAX_INDEX];
79 	struct rc_instruction * inst;
80 	memset(writemasks, 0, sizeof(writemasks));
81 	for(inst = fc_state->C->Program.Instructions.Next;
82 				inst != &fc_state->C->Program.Instructions;
83 				inst = inst->Next) {
84 		rc_for_all_writes_mask(inst, mark_write, writemasks);
85 	}
86 
87 	for(i = 0; i < fc_state->C->max_temp_regs; i++) {
88 		/* Most of the control flow instructions only write the
89 		 * W component of the Predicate Register, but
90 		 * the docs say that ME_PRED_SET_CLR and
91 		 * ME_PRED_SET_RESTORE write all components of the
92 		 * register, so we must reserve a register that has
93 		 * all its components free. */
94 		if (!writemasks[i]) {
95 			fc_state->PredicateReg = i;
96 			break;
97 		}
98 	}
99 	if (i == fc_state->C->max_temp_regs) {
100 		rc_error(fc_state->C, "No free temporary to use for"
101 				" predicate stack counter.\n");
102 		return -1;
103 	}
104 	return 1;
105 }
106 
lower_bgnloop(struct rc_instruction * inst,struct vert_fc_state * fc_state)107 static void lower_bgnloop(
108 	struct rc_instruction * inst,
109 	struct vert_fc_state * fc_state)
110 {
111 	struct rc_instruction * new_inst =
112 			rc_insert_new_instruction(fc_state->C, inst->Prev);
113 
114 	if ((!fc_state->C->is_r500
115 		&& fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
116 	     || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
117 		rc_error(fc_state->C, "Loops are nested too deep.");
118 		return;
119 	}
120 
121 	if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
122 		if (fc_state->PredicateReg == -1) {
123 			if (reserve_predicate_reg(fc_state) == -1) {
124 				return;
125 			}
126 		}
127 
128 		/* Initialize the predicate bit to true. */
129 		new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
130 		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
131 		new_inst->U.I.SrcReg[0].Index = 0;
132 		new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
133 		new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
134 	} else {
135 		fc_state->PredStack[fc_state->LoopDepth] =
136 						fc_state->PredicateReg;
137 		/* Copy the current predicate value to this loop's
138 		 * predicate register */
139 
140 		/* Use the old predicate value for src0 */
141 		build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
142 
143 		/* Reserve this loop's predicate register */
144 		if (reserve_predicate_reg(fc_state) == -1) {
145 			return;
146 		}
147 
148 		/* Copy the old predicate value to the new register */
149 		new_inst->U.I.Opcode = RC_OPCODE_ADD;
150 		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
151 		new_inst->U.I.SrcReg[1].Index = 0;
152 		new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
153 		new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
154 	}
155 
156 }
157 
lower_brk(struct rc_instruction * inst,struct vert_fc_state * fc_state)158 static void lower_brk(
159 	struct rc_instruction * inst,
160 	struct vert_fc_state * fc_state)
161 {
162 	if (fc_state->LoopDepth == 1) {
163 		inst->U.I.Opcode = RC_OPCODE_RCP;
164 		inst->U.I.DstReg.Pred = RC_PRED_INV;
165 		inst->U.I.SrcReg[0].Index = 0;
166 		inst->U.I.SrcReg[0].File = RC_FILE_NONE;
167 		inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
168 	} else {
169 		inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
170 		inst->U.I.DstReg.Pred = RC_PRED_SET;
171 	}
172 
173 	build_pred_dst(&inst->U.I.DstReg, fc_state);
174 }
175 
lower_endloop(struct rc_instruction * inst,struct vert_fc_state * fc_state)176 static void lower_endloop(
177 	struct rc_instruction * inst,
178 	struct vert_fc_state * fc_state)
179 {
180 	struct rc_instruction * new_inst =
181 			rc_insert_new_instruction(fc_state->C, inst);
182 
183 	new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
184 	build_pred_dst(&new_inst->U.I.DstReg, fc_state);
185 	/* Restore the previous predicate register. */
186 	fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
187 	build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
188 }
189 
lower_if(struct rc_instruction * inst,struct vert_fc_state * fc_state)190 static void lower_if(
191 	struct rc_instruction * inst,
192 	struct vert_fc_state * fc_state)
193 {
194 	/* Reserve a temporary to use as our predicate stack counter, if we
195 	 * don't already have one. */
196 	if (fc_state->PredicateReg == -1) {
197 		/* If we are inside a loop, the Predicate Register should
198 		 * have already been defined. */
199 		assert(fc_state->LoopDepth == 0);
200 
201 		if (reserve_predicate_reg(fc_state) == -1) {
202 			return;
203 		}
204 	}
205 
206 	if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
207 		fc_state->InCFBreak = 1;
208 	}
209 	if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
210 			|| (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
211 		if (fc_state->InCFBreak) {
212 			inst->U.I.Opcode = RC_ME_PRED_SEQ;
213 			inst->U.I.DstReg.Pred = RC_PRED_SET;
214 		} else {
215 			inst->U.I.Opcode = RC_ME_PRED_SNEQ;
216 		}
217 	} else {
218 		unsigned swz;
219 		inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
220 		memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
221 						sizeof(inst->U.I.SrcReg[1]));
222 		swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
223 		/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
224 		 * w component */
225 		inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
226 				RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
227 		build_pred_src(&inst->U.I.SrcReg[0], fc_state);
228 	}
229 	build_pred_dst(&inst->U.I.DstReg, fc_state);
230 }
231 
rc_vert_fc(struct radeon_compiler * c,void * user)232 void rc_vert_fc(struct radeon_compiler *c, void *user)
233 {
234 	struct rc_instruction * inst;
235 	struct vert_fc_state fc_state;
236 
237 	memset(&fc_state, 0, sizeof(fc_state));
238 	fc_state.PredicateReg = -1;
239 	fc_state.C = c;
240 
241 	for(inst = c->Program.Instructions.Next;
242 					inst != &c->Program.Instructions;
243 					inst = inst->Next) {
244 
245 		switch (inst->U.I.Opcode) {
246 
247 		case RC_OPCODE_BGNLOOP:
248 			lower_bgnloop(inst, &fc_state);
249 			fc_state.LoopDepth++;
250 			break;
251 
252 		case RC_OPCODE_BRK:
253 			lower_brk(inst, &fc_state);
254 			break;
255 
256 		case RC_OPCODE_ENDLOOP:
257 			if (fc_state.BranchDepth != 0
258 					|| fc_state.LoopDepth != 1) {
259 				lower_endloop(inst, &fc_state);
260 			}
261 			fc_state.LoopDepth--;
262 			/* Skip PRED_RESTORE */
263 			inst = inst->Next;
264 			break;
265 		case RC_OPCODE_IF:
266 			lower_if(inst, &fc_state);
267 			fc_state.BranchDepth++;
268 			break;
269 
270 		case RC_OPCODE_ELSE:
271 			inst->U.I.Opcode = RC_ME_PRED_SET_INV;
272 			build_pred_dst(&inst->U.I.DstReg, &fc_state);
273 			build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
274 			break;
275 
276 		case RC_OPCODE_ENDIF:
277 			if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
278 				struct rc_instruction * to_delete = inst;
279 				inst = inst->Prev;
280 				rc_remove_instruction(to_delete);
281 				/* XXX: Delete the endif instruction */
282 			} else {
283 				inst->U.I.Opcode = RC_ME_PRED_SET_POP;
284 				build_pred_dst(&inst->U.I.DstReg, &fc_state);
285 				build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
286 			}
287 			fc_state.InCFBreak = 0;
288 			fc_state.BranchDepth--;
289 			break;
290 
291 		default:
292 			if (fc_state.BranchDepth || fc_state.LoopDepth) {
293 				inst->U.I.DstReg.Pred = RC_PRED_SET;
294 			}
295 			break;
296 		}
297 
298 		if (c->Error) {
299 			return;
300 		}
301 	}
302 }
303