1 /*
2  * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22 
23 #include "radeon_compiler.h"
24 
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 
29 #include "radeon_dataflow.h"
30 #include "radeon_program.h"
31 #include "radeon_program_pair.h"
32 #include "radeon_compiler_util.h"
33 
34 
rc_init(struct radeon_compiler * c)35 void rc_init(struct radeon_compiler * c)
36 {
37 	memset(c, 0, sizeof(*c));
38 
39 	memory_pool_init(&c->Pool);
40 	c->Program.Instructions.Prev = &c->Program.Instructions;
41 	c->Program.Instructions.Next = &c->Program.Instructions;
42 	c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
43 }
44 
rc_destroy(struct radeon_compiler * c)45 void rc_destroy(struct radeon_compiler * c)
46 {
47 	rc_constants_destroy(&c->Program.Constants);
48 	memory_pool_destroy(&c->Pool);
49 	free(c->ErrorMsg);
50 }
51 
rc_debug(struct radeon_compiler * c,const char * fmt,...)52 void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
53 {
54 	va_list ap;
55 
56 	if (!(c->Debug & RC_DBG_LOG))
57 		return;
58 
59 	va_start(ap, fmt);
60 	vfprintf(stderr, fmt, ap);
61 	va_end(ap);
62 }
63 
rc_error(struct radeon_compiler * c,const char * fmt,...)64 void rc_error(struct radeon_compiler * c, const char * fmt, ...)
65 {
66 	va_list ap;
67 
68 	c->Error = 1;
69 
70 	if (!c->ErrorMsg) {
71 		/* Only remember the first error */
72 		char buf[1024];
73 		int written;
74 
75 		va_start(ap, fmt);
76 		written = vsnprintf(buf, sizeof(buf), fmt, ap);
77 		va_end(ap);
78 
79 		if (written < sizeof(buf)) {
80 			c->ErrorMsg = strdup(buf);
81 		} else {
82 			c->ErrorMsg = malloc(written + 1);
83 
84 			va_start(ap, fmt);
85 			vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
86 			va_end(ap);
87 		}
88 	}
89 
90 	if (c->Debug & RC_DBG_LOG) {
91 		fprintf(stderr, "r300compiler error: ");
92 
93 		va_start(ap, fmt);
94 		vfprintf(stderr, fmt, ap);
95 		va_end(ap);
96 	}
97 }
98 
rc_if_fail_helper(struct radeon_compiler * c,const char * file,int line,const char * assertion)99 int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
100 {
101 	rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
102 	return 1;
103 }
104 
105 /**
106  * Recompute c->Program.InputsRead and c->Program.OutputsWritten
107  * based on which inputs and outputs are actually referenced
108  * in program instructions.
109  */
rc_calculate_inputs_outputs(struct radeon_compiler * c)110 void rc_calculate_inputs_outputs(struct radeon_compiler * c)
111 {
112 	struct rc_instruction *inst;
113 
114 	c->Program.InputsRead = 0;
115 	c->Program.OutputsWritten = 0;
116 
117 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
118 	{
119 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
120 		int i;
121 
122 		for (i = 0; i < opcode->NumSrcRegs; ++i) {
123 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
124 				c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
125 		}
126 
127 		if (opcode->HasDstReg) {
128 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
129 				c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
130 		}
131 	}
132 }
133 
134 /**
135  * Rewrite the program such that everything that source the given input
136  * register will source new_input instead.
137  */
rc_move_input(struct radeon_compiler * c,unsigned input,struct rc_src_register new_input)138 void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
139 {
140 	struct rc_instruction * inst;
141 
142 	c->Program.InputsRead &= ~(1 << input);
143 
144 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
145 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
146 		unsigned i;
147 
148 		for(i = 0; i < opcode->NumSrcRegs; ++i) {
149 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
150 				inst->U.I.SrcReg[i].File = new_input.File;
151 				inst->U.I.SrcReg[i].Index = new_input.Index;
152 				inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
153 				if (!inst->U.I.SrcReg[i].Abs) {
154 					inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
155 					inst->U.I.SrcReg[i].Abs = new_input.Abs;
156 				}
157 
158 				c->Program.InputsRead |= 1 << new_input.Index;
159 			}
160 		}
161 	}
162 }
163 
164 
165 /**
166  * Rewrite the program such that everything that writes into the given
167  * output register will instead write to new_output. The new_output
168  * writemask is honoured.
169  */
rc_move_output(struct radeon_compiler * c,unsigned output,unsigned new_output,unsigned writemask)170 void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
171 {
172 	struct rc_instruction * inst;
173 
174 	c->Program.OutputsWritten &= ~(1 << output);
175 
176 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
177 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
178 
179 		if (opcode->HasDstReg) {
180 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
181 				inst->U.I.DstReg.Index = new_output;
182 				inst->U.I.DstReg.WriteMask &= writemask;
183 
184 				c->Program.OutputsWritten |= 1 << new_output;
185 			}
186 		}
187 	}
188 }
189 
190 
191 /**
192  * Rewrite the program such that a given output is duplicated.
193  */
rc_copy_output(struct radeon_compiler * c,unsigned output,unsigned dup_output)194 void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
195 {
196 	unsigned tempreg = rc_find_free_temporary(c);
197 	struct rc_instruction * inst;
198 
199 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
200 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
201 
202 		if (opcode->HasDstReg) {
203 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
204 				inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
205 				inst->U.I.DstReg.Index = tempreg;
206 			}
207 		}
208 	}
209 
210 	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
211 	inst->U.I.Opcode = RC_OPCODE_MOV;
212 	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
213 	inst->U.I.DstReg.Index = output;
214 
215 	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
216 	inst->U.I.SrcReg[0].Index = tempreg;
217 	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
218 
219 	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
220 	inst->U.I.Opcode = RC_OPCODE_MOV;
221 	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
222 	inst->U.I.DstReg.Index = dup_output;
223 
224 	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
225 	inst->U.I.SrcReg[0].Index = tempreg;
226 	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
227 
228 	c->Program.OutputsWritten |= 1 << dup_output;
229 }
230 
231 
232 /**
233  * Introduce standard code fragment to deal with fragment.position.
234  */
rc_transform_fragment_wpos(struct radeon_compiler * c,unsigned wpos,unsigned new_input,int full_vtransform)235 void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
236                                 int full_vtransform)
237 {
238 	unsigned tempregi = rc_find_free_temporary(c);
239 	struct rc_instruction * inst_rcp;
240 	struct rc_instruction * inst_mul;
241 	struct rc_instruction * inst_mad;
242 	struct rc_instruction * inst;
243 
244 	c->Program.InputsRead &= ~(1 << wpos);
245 	c->Program.InputsRead |= 1 << new_input;
246 
247 	/* perspective divide */
248 	inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
249 	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
250 
251 	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
252 	inst_rcp->U.I.DstReg.Index = tempregi;
253 	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
254 
255 	inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
256 	inst_rcp->U.I.SrcReg[0].Index = new_input;
257 	inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
258 
259 	inst_mul = rc_insert_new_instruction(c, inst_rcp);
260 	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
261 
262 	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
263 	inst_mul->U.I.DstReg.Index = tempregi;
264 	inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
265 
266 	inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
267 	inst_mul->U.I.SrcReg[0].Index = new_input;
268 
269 	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
270 	inst_mul->U.I.SrcReg[1].Index = tempregi;
271 	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
272 
273 	/* viewport transformation */
274 	inst_mad = rc_insert_new_instruction(c, inst_mul);
275 	inst_mad->U.I.Opcode = RC_OPCODE_MAD;
276 
277 	inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
278 	inst_mad->U.I.DstReg.Index = tempregi;
279 	inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
280 
281 	inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
282 	inst_mad->U.I.SrcReg[0].Index = tempregi;
283 	inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
284 
285 	inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
286 	inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
287 
288 	inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
289 	inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
290 
291 	if (full_vtransform) {
292 		inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
293 		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
294 	} else {
295 		inst_mad->U.I.SrcReg[1].Index =
296 		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
297 	}
298 
299 	for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
300 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
301 		unsigned i;
302 
303 		for(i = 0; i < opcode->NumSrcRegs; i++) {
304 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
305 			    inst->U.I.SrcReg[i].Index == wpos) {
306 				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
307 				inst->U.I.SrcReg[i].Index = tempregi;
308 			}
309 		}
310 	}
311 }
312 
313 
314 /**
315  * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
316  * Gallium and OpenGL define it the other way around.
317  *
318  * So let's just negate FACE at the beginning of the shader and rewrite the rest
319  * of the shader to read from the newly allocated temporary.
320  */
rc_transform_fragment_face(struct radeon_compiler * c,unsigned face)321 void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
322 {
323 	unsigned tempregi = rc_find_free_temporary(c);
324 	struct rc_instruction *inst_add;
325 	struct rc_instruction *inst;
326 
327 	/* perspective divide */
328 	inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
329 	inst_add->U.I.Opcode = RC_OPCODE_ADD;
330 
331 	inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
332 	inst_add->U.I.DstReg.Index = tempregi;
333 	inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
334 
335 	inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
336 	inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
337 
338 	inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
339 	inst_add->U.I.SrcReg[1].Index = face;
340 	inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
341 	inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
342 
343 	for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
344 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
345 		unsigned i;
346 
347 		for(i = 0; i < opcode->NumSrcRegs; i++) {
348 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
349 			    inst->U.I.SrcReg[i].Index == face) {
350 				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
351 				inst->U.I.SrcReg[i].Index = tempregi;
352 			}
353 		}
354 	}
355 }
356 
reg_count_callback(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)357 static void reg_count_callback(void * userdata, struct rc_instruction * inst,
358 		rc_register_file file, unsigned int index, unsigned int mask)
359 {
360 	struct rc_program_stats *s = userdata;
361 	if (file == RC_FILE_TEMPORARY)
362 		(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
363 	if (file == RC_FILE_INLINE)
364 		s->num_inline_literals++;
365 }
366 
rc_get_stats(struct radeon_compiler * c,struct rc_program_stats * s)367 void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
368 {
369 	struct rc_instruction * tmp;
370 	memset(s, 0, sizeof(*s));
371 
372 	for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
373 							tmp = tmp->Next){
374 		const struct rc_opcode_info * info;
375 		rc_for_all_reads_mask(tmp, reg_count_callback, s);
376 		if (tmp->Type == RC_INSTRUCTION_NORMAL) {
377 			info = rc_get_opcode_info(tmp->U.I.Opcode);
378 			if (info->Opcode == RC_OPCODE_BEGIN_TEX)
379 				continue;
380 			if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
381 				s->num_presub_ops++;
382 		} else {
383 			if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
384 				s->num_presub_ops++;
385 			if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
386 				s->num_presub_ops++;
387 			/* Assuming alpha will never be a flow control or
388 			 * a tex instruction. */
389 			if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
390 				s->num_alpha_insts++;
391 			if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
392 				s->num_rgb_insts++;
393 			if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
394 				tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
395 				s->num_omod_ops++;
396 			}
397 			if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
398 				tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
399 				s->num_omod_ops++;
400 			}
401 			info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
402 		}
403 		if (info->IsFlowControl)
404 			s->num_fc_insts++;
405 		if (info->HasTexture)
406 			s->num_tex_insts++;
407 		s->num_insts++;
408 	}
409 	/* Increment here because the reg_count_callback store the max
410 	 * temporary reg index in s->nun_temp_regs. */
411 	s->num_temp_regs++;
412 }
413 
print_stats(struct radeon_compiler * c)414 static void print_stats(struct radeon_compiler * c)
415 {
416 	struct rc_program_stats s;
417 
418 	if (c->initial_num_insts <= 5)
419 		return;
420 
421 	rc_get_stats(c, &s);
422 
423 	switch (c->type) {
424 	case RC_VERTEX_PROGRAM:
425 		fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
426 			       "~%4u Instructions\n"
427 			       "~%4u Flow Control Instructions\n"
428 			       "~%4u Temporary Registers\n"
429 			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
430 			       s.num_insts, s.num_fc_insts, s.num_temp_regs);
431 		break;
432 
433 	case RC_FRAGMENT_PROGRAM:
434 		fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
435 			       "~%4u Instructions\n"
436 			       "~%4u Vector Instructions (RGB)\n"
437 			       "~%4u Scalar Instructions (Alpha)\n"
438 			       "~%4u Flow Control Instructions\n"
439 			       "~%4u Texture Instructions\n"
440 			       "~%4u Presub Operations\n"
441 			       "~%4u OMOD Operations\n"
442 			       "~%4u Temporary Registers\n"
443 			       "~%4u Inline Literals\n"
444 			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
445 			       s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
446 			       s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
447 			       s.num_omod_ops, s.num_temp_regs, s.num_inline_literals);
448 		break;
449 	default:
450 		assert(0);
451 	}
452 }
453 
454 static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
455 	"Vertex Program",
456 	"Fragment Program"
457 };
458 
rc_run_compiler_passes(struct radeon_compiler * c,struct radeon_compiler_pass * list)459 void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
460 {
461 	for (unsigned i = 0; list[i].name; i++) {
462 		if (list[i].predicate) {
463 			list[i].run(c, list[i].user);
464 
465 			if (c->Error)
466 				return;
467 
468 			if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
469 				fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
470 				rc_print_program(&c->Program);
471 			}
472 		}
473 	}
474 }
475 
476 /* Executes a list of compiler passes given in the parameter 'list'. */
rc_run_compiler(struct radeon_compiler * c,struct radeon_compiler_pass * list)477 void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
478 {
479 	struct rc_program_stats s;
480 
481 	rc_get_stats(c, &s);
482 	c->initial_num_insts = s.num_insts;
483 
484 	if (c->Debug & RC_DBG_LOG) {
485 		fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
486 		rc_print_program(&c->Program);
487 	}
488 
489 	rc_run_compiler_passes(c, list);
490 
491 	if (c->Debug & RC_DBG_STATS)
492 		print_stats(c);
493 }
494 
rc_validate_final_shader(struct radeon_compiler * c,void * user)495 void rc_validate_final_shader(struct radeon_compiler *c, void *user)
496 {
497 	/* Check the number of constants. */
498 	if (c->Program.Constants.Count > c->max_constants) {
499 		rc_error(c, "Too many constants. Max: %i, Got: %i\n",
500 			 c->max_constants, c->Program.Constants.Count);
501 	}
502 }
503