1 /*
2  * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22 
23 #include "radeon_emulate_branches.h"
24 
25 #include <stdio.h>
26 
27 #include "radeon_compiler.h"
28 #include "radeon_dataflow.h"
29 
30 #define VERBOSE 0
31 
32 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
33 
34 
35 struct proxy_info {
36 	unsigned int Proxied:1;
37 	unsigned int Index:RC_REGISTER_INDEX_BITS;
38 };
39 
40 struct register_proxies {
41 	struct proxy_info Temporary[RC_REGISTER_MAX_INDEX];
42 };
43 
44 struct branch_info {
45 	struct rc_instruction * If;
46 	struct rc_instruction * Else;
47 };
48 
49 struct emulate_branch_state {
50 	struct radeon_compiler * C;
51 
52 	struct branch_info * Branches;
53 	unsigned int BranchCount;
54 	unsigned int BranchReserved;
55 };
56 
57 
handle_if(struct emulate_branch_state * s,struct rc_instruction * inst)58 static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
59 {
60 	struct branch_info * branch;
61 	struct rc_instruction * inst_mov;
62 
63 	memory_pool_array_reserve(&s->C->Pool, struct branch_info,
64 			s->Branches, s->BranchCount, s->BranchReserved, 1);
65 
66 	DBG("%s\n", __FUNCTION__);
67 
68 	branch = &s->Branches[s->BranchCount++];
69 	memset(branch, 0, sizeof(struct branch_info));
70 	branch->If = inst;
71 
72 	/* Make a safety copy of the decision register, because we will need
73 	 * it at ENDIF time and it might be overwritten in both branches. */
74 	inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
75 	inst_mov->U.I.Opcode = RC_OPCODE_MOV;
76 	inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
77 	inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
78 	inst_mov->U.I.DstReg.WriteMask = RC_MASK_X;
79 	inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
80 
81 	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
82 	inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
83 	inst->U.I.SrcReg[0].Swizzle = 0;
84 	inst->U.I.SrcReg[0].Abs = 0;
85 	inst->U.I.SrcReg[0].Negate = 0;
86 }
87 
handle_else(struct emulate_branch_state * s,struct rc_instruction * inst)88 static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
89 {
90 	struct branch_info * branch;
91 
92 	if (!s->BranchCount) {
93 		rc_error(s->C, "Encountered ELSE outside of branches");
94 		return;
95 	}
96 
97 	DBG("%s\n", __FUNCTION__);
98 
99 	branch = &s->Branches[s->BranchCount - 1];
100 	branch->Else = inst;
101 }
102 
103 
104 struct state_and_proxies {
105 	struct emulate_branch_state * S;
106 	struct register_proxies * Proxies;
107 };
108 
get_proxy_info(struct state_and_proxies * sap,rc_register_file file,unsigned int index)109 static struct proxy_info * get_proxy_info(struct state_and_proxies * sap,
110 			rc_register_file file, unsigned int index)
111 {
112 	if (file == RC_FILE_TEMPORARY) {
113 		return &sap->Proxies->Temporary[index];
114 	} else {
115 		return 0;
116 	}
117 }
118 
scan_write(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int comp)119 static void scan_write(void * userdata, struct rc_instruction * inst,
120 		rc_register_file file, unsigned int index, unsigned int comp)
121 {
122 	struct state_and_proxies * sap = userdata;
123 	struct proxy_info * proxy = get_proxy_info(sap, file, index);
124 
125 	if (proxy && !proxy->Proxied) {
126 		proxy->Proxied = 1;
127 		proxy->Index = rc_find_free_temporary(sap->S->C);
128 	}
129 }
130 
remap_proxy_function(void * userdata,struct rc_instruction * inst,rc_register_file * pfile,unsigned int * pindex)131 static void remap_proxy_function(void * userdata, struct rc_instruction * inst,
132 		rc_register_file * pfile, unsigned int * pindex)
133 {
134 	struct state_and_proxies * sap = userdata;
135 	struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex);
136 
137 	if (proxy && proxy->Proxied) {
138 		*pfile = RC_FILE_TEMPORARY;
139 		*pindex = proxy->Index;
140 	}
141 }
142 
143 /**
144  * Redirect all writes in the instruction range [begin, end) to proxy
145  * temporary registers.
146  */
allocate_and_insert_proxies(struct emulate_branch_state * s,struct register_proxies * proxies,struct rc_instruction * begin,struct rc_instruction * end)147 static void allocate_and_insert_proxies(struct emulate_branch_state * s,
148 		struct register_proxies * proxies,
149 		struct rc_instruction * begin,
150 		struct rc_instruction * end)
151 {
152 	struct state_and_proxies sap;
153 
154 	sap.S = s;
155 	sap.Proxies = proxies;
156 
157 	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
158 		rc_for_all_writes_mask(inst, scan_write, &sap);
159 		rc_remap_registers(inst, remap_proxy_function, &sap);
160 	}
161 
162 	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
163 		if (proxies->Temporary[index].Proxied) {
164 			struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev);
165 			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
166 			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
167 			inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index;
168 			inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
169 			inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
170 			inst_mov->U.I.SrcReg[0].Index = index;
171 		}
172 	}
173 }
174 
175 
inject_cmp(struct emulate_branch_state * s,struct rc_instruction * inst_if,struct rc_instruction * inst_endif,rc_register_file file,unsigned int index,struct proxy_info ifproxy,struct proxy_info elseproxy)176 static void inject_cmp(struct emulate_branch_state * s,
177 		struct rc_instruction * inst_if,
178 		struct rc_instruction * inst_endif,
179 		rc_register_file file, unsigned int index,
180 		struct proxy_info ifproxy,
181 		struct proxy_info elseproxy)
182 {
183 	struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif);
184 	inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
185 	inst_cmp->U.I.DstReg.File = file;
186 	inst_cmp->U.I.DstReg.Index = index;
187 	inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW;
188 	inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
189 	inst_cmp->U.I.SrcReg[0].Abs = 1;
190 	inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
191 	inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
192 	inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index;
193 	inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
194 	inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index;
195 }
196 
handle_endif(struct emulate_branch_state * s,struct rc_instruction * inst)197 static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
198 {
199 	struct branch_info * branch;
200 	struct register_proxies IfProxies;
201 	struct register_proxies ElseProxies;
202 
203 	if (!s->BranchCount) {
204 		rc_error(s->C, "Encountered ENDIF outside of branches");
205 		return;
206 	}
207 
208 	DBG("%s\n", __FUNCTION__);
209 
210 	branch = &s->Branches[s->BranchCount - 1];
211 
212 	memset(&IfProxies, 0, sizeof(IfProxies));
213 	memset(&ElseProxies, 0, sizeof(ElseProxies));
214 
215 	allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst);
216 
217 	if (branch->Else)
218 		allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst);
219 
220 	/* Insert the CMP instructions at the end. */
221 	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
222 		if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) {
223 			inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index,
224 					IfProxies.Temporary[index], ElseProxies.Temporary[index]);
225 		}
226 	}
227 
228 	/* Remove all traces of the branch instructions */
229 	rc_remove_instruction(branch->If);
230 	if (branch->Else)
231 		rc_remove_instruction(branch->Else);
232 	rc_remove_instruction(inst);
233 
234 	s->BranchCount--;
235 
236 	if (VERBOSE) {
237 		DBG("Program after ENDIF handling:\n");
238 		rc_print_program(&s->C->Program);
239 	}
240 }
241 
242 
243 struct remap_output_data {
244 	unsigned int Output:RC_REGISTER_INDEX_BITS;
245 	unsigned int Temporary:RC_REGISTER_INDEX_BITS;
246 };
247 
remap_output_function(void * userdata,struct rc_instruction * inst,rc_register_file * pfile,unsigned int * pindex)248 static void remap_output_function(void * userdata, struct rc_instruction * inst,
249 		rc_register_file * pfile, unsigned int * pindex)
250 {
251 	struct remap_output_data * data = userdata;
252 
253 	if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) {
254 		*pfile = RC_FILE_TEMPORARY;
255 		*pindex = data->Temporary;
256 	}
257 }
258 
259 
260 /**
261  * Output registers cannot be read from and so cannot be dealt with like
262  * temporary registers.
263  *
264  * We do the simplest thing: If an output registers is written within
265  * a branch, then *all* writes to this register are proxied to a
266  * temporary register, and a final MOV is appended to the end of
267  * the program.
268  */
fix_output_writes(struct emulate_branch_state * s,struct rc_instruction * inst)269 static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
270 {
271 	const struct rc_opcode_info * opcode;
272 
273 	if (!s->BranchCount)
274 		return;
275 
276 	opcode = rc_get_opcode_info(inst->U.I.Opcode);
277 
278 	if (!opcode->HasDstReg)
279 		return;
280 
281 	if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
282 		struct remap_output_data remap;
283 		struct rc_instruction * inst_mov;
284 
285 		remap.Output = inst->U.I.DstReg.Index;
286 		remap.Temporary = rc_find_free_temporary(s->C);
287 
288 		for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
289 		    inst != &s->C->Program.Instructions;
290 		    inst = inst->Next) {
291 			rc_remap_registers(inst, &remap_output_function, &remap);
292 		}
293 
294 		inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
295 		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
296 		inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
297 		inst_mov->U.I.DstReg.Index = remap.Output;
298 		inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
299 		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
300 		inst_mov->U.I.SrcReg[0].Index = remap.Temporary;
301 	}
302 }
303 
304 /**
305  * Remove branch instructions; instead, execute both branches
306  * on different register sets and choose between their results
307  * using CMP instructions in place of the original ENDIF.
308  */
rc_emulate_branches(struct radeon_compiler * c,void * user)309 void rc_emulate_branches(struct radeon_compiler *c, void *user)
310 {
311 	struct emulate_branch_state s;
312 	struct rc_instruction * ptr;
313 
314 	memset(&s, 0, sizeof(s));
315 	s.C = c;
316 
317 	/* Untypical loop because we may remove the current instruction */
318 	ptr = c->Program.Instructions.Next;
319 	while(ptr != &c->Program.Instructions) {
320 		struct rc_instruction * inst = ptr;
321 		ptr = ptr->Next;
322 
323 		if (inst->Type == RC_INSTRUCTION_NORMAL) {
324 			switch(inst->U.I.Opcode) {
325 			case RC_OPCODE_IF:
326 				handle_if(&s, inst);
327 				break;
328 			case RC_OPCODE_ELSE:
329 				handle_else(&s, inst);
330 				break;
331 			case RC_OPCODE_ENDIF:
332 				handle_endif(&s, inst);
333 				break;
334 			default:
335 				fix_output_writes(&s, inst);
336 				break;
337 			}
338 		} else {
339 			rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__);
340 		}
341 	}
342 }
343