1 /*
2  * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22 
23 #include "radeon_compiler.h"
24 
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 
29 #include "radeon_dataflow.h"
30 #include "radeon_program.h"
31 #include "radeon_program_pair.h"
32 #include "radeon_regalloc.h"
33 #include "radeon_compiler_util.h"
34 
35 
rc_init(struct radeon_compiler * c,const struct rc_regalloc_state * rs)36 void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs)
37 {
38 	memset(c, 0, sizeof(*c));
39 
40 	memory_pool_init(&c->Pool);
41 	c->Program.Instructions.Prev = &c->Program.Instructions;
42 	c->Program.Instructions.Next = &c->Program.Instructions;
43 	c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
44 	c->regalloc_state = rs;
45 }
46 
rc_destroy(struct radeon_compiler * c)47 void rc_destroy(struct radeon_compiler * c)
48 {
49 	rc_constants_destroy(&c->Program.Constants);
50 	memory_pool_destroy(&c->Pool);
51 	free(c->ErrorMsg);
52 }
53 
rc_debug(struct radeon_compiler * c,const char * fmt,...)54 void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
55 {
56 	va_list ap;
57 
58 	if (!(c->Debug & RC_DBG_LOG))
59 		return;
60 
61 	va_start(ap, fmt);
62 	vfprintf(stderr, fmt, ap);
63 	va_end(ap);
64 }
65 
rc_error(struct radeon_compiler * c,const char * fmt,...)66 void rc_error(struct radeon_compiler * c, const char * fmt, ...)
67 {
68 	va_list ap;
69 
70 	c->Error = 1;
71 
72 	if (!c->ErrorMsg) {
73 		/* Only remember the first error */
74 		char buf[1024];
75 		int written;
76 
77 		va_start(ap, fmt);
78 		written = vsnprintf(buf, sizeof(buf), fmt, ap);
79 		va_end(ap);
80 
81 		if (written < sizeof(buf)) {
82 			c->ErrorMsg = strdup(buf);
83 		} else {
84 			c->ErrorMsg = malloc(written + 1);
85 
86 			va_start(ap, fmt);
87 			vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
88 			va_end(ap);
89 		}
90 	}
91 
92 	if (c->Debug & RC_DBG_LOG) {
93 		fprintf(stderr, "r300compiler error: ");
94 
95 		va_start(ap, fmt);
96 		vfprintf(stderr, fmt, ap);
97 		va_end(ap);
98 	}
99 }
100 
rc_if_fail_helper(struct radeon_compiler * c,const char * file,int line,const char * assertion)101 int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
102 {
103 	rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
104 	return 1;
105 }
106 
107 /**
108  * Recompute c->Program.InputsRead and c->Program.OutputsWritten
109  * based on which inputs and outputs are actually referenced
110  * in program instructions.
111  */
rc_calculate_inputs_outputs(struct radeon_compiler * c)112 void rc_calculate_inputs_outputs(struct radeon_compiler * c)
113 {
114 	struct rc_instruction *inst;
115 
116 	c->Program.InputsRead = 0;
117 	c->Program.OutputsWritten = 0;
118 
119 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
120 	{
121 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
122 		int i;
123 
124 		for (i = 0; i < opcode->NumSrcRegs; ++i) {
125 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
126 				c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
127 		}
128 
129 		if (opcode->HasDstReg) {
130 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
131 				c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
132 		}
133 	}
134 }
135 
136 /**
137  * Rewrite the program such that everything that source the given input
138  * register will source new_input instead.
139  */
rc_move_input(struct radeon_compiler * c,unsigned input,struct rc_src_register new_input)140 void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
141 {
142 	struct rc_instruction * inst;
143 
144 	c->Program.InputsRead &= ~(1 << input);
145 
146 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
147 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
148 		unsigned i;
149 
150 		for(i = 0; i < opcode->NumSrcRegs; ++i) {
151 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
152 				inst->U.I.SrcReg[i].File = new_input.File;
153 				inst->U.I.SrcReg[i].Index = new_input.Index;
154 				inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
155 				if (!inst->U.I.SrcReg[i].Abs) {
156 					inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
157 					inst->U.I.SrcReg[i].Abs = new_input.Abs;
158 				}
159 
160 				c->Program.InputsRead |= 1 << new_input.Index;
161 			}
162 		}
163 	}
164 }
165 
166 
167 /**
168  * Rewrite the program such that everything that writes into the given
169  * output register will instead write to new_output. The new_output
170  * writemask is honoured.
171  */
rc_move_output(struct radeon_compiler * c,unsigned output,unsigned new_output,unsigned writemask)172 void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
173 {
174 	struct rc_instruction * inst;
175 
176 	c->Program.OutputsWritten &= ~(1 << output);
177 
178 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
179 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
180 
181 		if (opcode->HasDstReg) {
182 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
183 				inst->U.I.DstReg.Index = new_output;
184 				inst->U.I.DstReg.WriteMask &= writemask;
185 
186 				c->Program.OutputsWritten |= 1 << new_output;
187 			}
188 		}
189 	}
190 }
191 
192 
193 /**
194  * Rewrite the program such that a given output is duplicated.
195  */
rc_copy_output(struct radeon_compiler * c,unsigned output,unsigned dup_output)196 void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
197 {
198 	unsigned tempreg = rc_find_free_temporary(c);
199 	struct rc_instruction * inst;
200 
201 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
202 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
203 
204 		if (opcode->HasDstReg) {
205 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
206 				inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
207 				inst->U.I.DstReg.Index = tempreg;
208 			}
209 		}
210 	}
211 
212 	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
213 	inst->U.I.Opcode = RC_OPCODE_MOV;
214 	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
215 	inst->U.I.DstReg.Index = output;
216 
217 	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
218 	inst->U.I.SrcReg[0].Index = tempreg;
219 	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
220 
221 	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
222 	inst->U.I.Opcode = RC_OPCODE_MOV;
223 	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
224 	inst->U.I.DstReg.Index = dup_output;
225 
226 	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
227 	inst->U.I.SrcReg[0].Index = tempreg;
228 	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
229 
230 	c->Program.OutputsWritten |= 1 << dup_output;
231 }
232 
233 
234 /**
235  * Introduce standard code fragment to deal with fragment.position.
236  */
rc_transform_fragment_wpos(struct radeon_compiler * c,unsigned wpos,unsigned new_input,int full_vtransform)237 void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
238                                 int full_vtransform)
239 {
240 	unsigned tempregi = rc_find_free_temporary(c);
241 	struct rc_instruction * inst_rcp;
242 	struct rc_instruction * inst_mul;
243 	struct rc_instruction * inst_mad;
244 	struct rc_instruction * inst;
245 
246 	c->Program.InputsRead &= ~(1 << wpos);
247 	c->Program.InputsRead |= 1 << new_input;
248 
249 	/* perspective divide */
250 	inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
251 	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
252 
253 	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
254 	inst_rcp->U.I.DstReg.Index = tempregi;
255 	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
256 
257 	inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
258 	inst_rcp->U.I.SrcReg[0].Index = new_input;
259 	inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
260 
261 	inst_mul = rc_insert_new_instruction(c, inst_rcp);
262 	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
263 
264 	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
265 	inst_mul->U.I.DstReg.Index = tempregi;
266 	inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
267 
268 	inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
269 	inst_mul->U.I.SrcReg[0].Index = new_input;
270 
271 	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
272 	inst_mul->U.I.SrcReg[1].Index = tempregi;
273 	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
274 
275 	/* viewport transformation */
276 	inst_mad = rc_insert_new_instruction(c, inst_mul);
277 	inst_mad->U.I.Opcode = RC_OPCODE_MAD;
278 
279 	inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
280 	inst_mad->U.I.DstReg.Index = tempregi;
281 	inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
282 
283 	inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
284 	inst_mad->U.I.SrcReg[0].Index = tempregi;
285 	inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
286 
287 	inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
288 	inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
289 
290 	inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
291 	inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
292 
293 	if (full_vtransform) {
294 		inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
295 		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
296 	} else {
297 		inst_mad->U.I.SrcReg[1].Index =
298 		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
299 	}
300 
301 	for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
302 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
303 		unsigned i;
304 
305 		for(i = 0; i < opcode->NumSrcRegs; i++) {
306 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
307 			    inst->U.I.SrcReg[i].Index == wpos) {
308 				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
309 				inst->U.I.SrcReg[i].Index = tempregi;
310 			}
311 		}
312 	}
313 }
314 
315 
316 /**
317  * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
318  * Gallium and OpenGL define it the other way around.
319  *
320  * So let's just negate FACE at the beginning of the shader and rewrite the rest
321  * of the shader to read from the newly allocated temporary.
322  */
rc_transform_fragment_face(struct radeon_compiler * c,unsigned face)323 void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
324 {
325 	unsigned tempregi = rc_find_free_temporary(c);
326 	struct rc_instruction *inst_add;
327 	struct rc_instruction *inst;
328 
329 	/* perspective divide */
330 	inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
331 	inst_add->U.I.Opcode = RC_OPCODE_ADD;
332 
333 	inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
334 	inst_add->U.I.DstReg.Index = tempregi;
335 	inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
336 
337 	inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
338 	inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
339 
340 	inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
341 	inst_add->U.I.SrcReg[1].Index = face;
342 	inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
343 	inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
344 
345 	for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
346 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
347 		unsigned i;
348 
349 		for(i = 0; i < opcode->NumSrcRegs; i++) {
350 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
351 			    inst->U.I.SrcReg[i].Index == face) {
352 				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
353 				inst->U.I.SrcReg[i].Index = tempregi;
354 			}
355 		}
356 	}
357 }
358 
reg_count_callback(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)359 static void reg_count_callback(void * userdata, struct rc_instruction * inst,
360 		rc_register_file file, unsigned int index, unsigned int mask)
361 {
362 	struct rc_program_stats *s = userdata;
363 	if (file == RC_FILE_TEMPORARY)
364 		(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
365 	if (file == RC_FILE_INLINE)
366 		s->num_inline_literals++;
367 }
368 
rc_get_stats(struct radeon_compiler * c,struct rc_program_stats * s)369 void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
370 {
371 	struct rc_instruction * tmp;
372 	memset(s, 0, sizeof(*s));
373 
374 	for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
375 							tmp = tmp->Next){
376 		const struct rc_opcode_info * info;
377 		rc_for_all_reads_mask(tmp, reg_count_callback, s);
378 		if (tmp->Type == RC_INSTRUCTION_NORMAL) {
379 			info = rc_get_opcode_info(tmp->U.I.Opcode);
380 			if (info->Opcode == RC_OPCODE_BEGIN_TEX)
381 				continue;
382 			if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
383 				s->num_presub_ops++;
384 		} else {
385 			if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
386 				s->num_presub_ops++;
387 			if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
388 				s->num_presub_ops++;
389 			/* Assuming alpha will never be a flow control or
390 			 * a tex instruction. */
391 			if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
392 				s->num_alpha_insts++;
393 			if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
394 				s->num_rgb_insts++;
395 			if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
396 				tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
397 				s->num_omod_ops++;
398 			}
399 			if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
400 				tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
401 				s->num_omod_ops++;
402 			}
403 			info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
404 		}
405 		if (info->IsFlowControl)
406 			s->num_fc_insts++;
407 		if (info->HasTexture)
408 			s->num_tex_insts++;
409 		s->num_insts++;
410 	}
411 	/* Increment here because the reg_count_callback store the max
412 	 * temporary reg index in s->nun_temp_regs. */
413 	s->num_temp_regs++;
414 }
415 
print_stats(struct radeon_compiler * c)416 static void print_stats(struct radeon_compiler * c)
417 {
418 	struct rc_program_stats s;
419 
420 	if (c->initial_num_insts <= 5)
421 		return;
422 
423 	rc_get_stats(c, &s);
424 
425 	switch (c->type) {
426 	case RC_VERTEX_PROGRAM:
427 		fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
428 			       "~%4u Instructions\n"
429 			       "~%4u Flow Control Instructions\n"
430 			       "~%4u Temporary Registers\n"
431 			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
432 			       s.num_insts, s.num_fc_insts, s.num_temp_regs);
433 		break;
434 
435 	case RC_FRAGMENT_PROGRAM:
436 		fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
437 			       "~%4u Instructions\n"
438 			       "~%4u Vector Instructions (RGB)\n"
439 			       "~%4u Scalar Instructions (Alpha)\n"
440 			       "~%4u Flow Control Instructions\n"
441 			       "~%4u Texture Instructions\n"
442 			       "~%4u Presub Operations\n"
443 			       "~%4u OMOD Operations\n"
444 			       "~%4u Temporary Registers\n"
445 			       "~%4u Inline Literals\n"
446 			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
447 			       s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
448 			       s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
449 			       s.num_omod_ops, s.num_temp_regs, s.num_inline_literals);
450 		break;
451 	default:
452 		assert(0);
453 	}
454 }
455 
456 static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
457 	"Vertex Program",
458 	"Fragment Program"
459 };
460 
rc_run_compiler_passes(struct radeon_compiler * c,struct radeon_compiler_pass * list)461 void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
462 {
463 	for (unsigned i = 0; list[i].name; i++) {
464 		if (list[i].predicate) {
465 			list[i].run(c, list[i].user);
466 
467 			if (c->Error)
468 				return;
469 
470 			if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
471 				fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
472 				rc_print_program(&c->Program);
473 			}
474 		}
475 	}
476 }
477 
478 /* Executes a list of compiler passes given in the parameter 'list'. */
rc_run_compiler(struct radeon_compiler * c,struct radeon_compiler_pass * list)479 void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
480 {
481 	struct rc_program_stats s;
482 
483 	rc_get_stats(c, &s);
484 	c->initial_num_insts = s.num_insts;
485 
486 	if (c->Debug & RC_DBG_LOG) {
487 		fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
488 		rc_print_program(&c->Program);
489 	}
490 
491 	rc_run_compiler_passes(c, list);
492 
493 	if (c->Debug & RC_DBG_STATS)
494 		print_stats(c);
495 }
496 
rc_validate_final_shader(struct radeon_compiler * c,void * user)497 void rc_validate_final_shader(struct radeon_compiler *c, void *user)
498 {
499 	/* Check the number of constants. */
500 	if (c->Program.Constants.Count > c->max_constants) {
501 		rc_error(c, "Too many constants. Max: %i, Got: %i\n",
502 			 c->max_constants, c->Program.Constants.Count);
503 	}
504 }
505