1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4  *
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial
17  * portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  */
28 
29 #include "radeon_program_pair.h"
30 
31 #include <stdio.h>
32 
33 #include "main/glheader.h"
34 #include "program/register_allocate.h"
35 #include "ralloc.h"
36 
37 #include "r300_fragprog_swizzle.h"
38 #include "radeon_compiler.h"
39 #include "radeon_compiler_util.h"
40 #include "radeon_dataflow.h"
41 #include "radeon_list.h"
42 #include "radeon_variable.h"
43 
44 #define VERBOSE 0
45 
46 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
47 
48 
49 
50 struct register_info {
51 	struct live_intervals Live[4];
52 
53 	unsigned int Used:1;
54 	unsigned int Allocated:1;
55 	unsigned int File:3;
56 	unsigned int Index:RC_REGISTER_INDEX_BITS;
57 	unsigned int Writemask;
58 };
59 
60 struct regalloc_state {
61 	struct radeon_compiler * C;
62 
63 	struct register_info * Input;
64 	unsigned int NumInputs;
65 
66 	struct register_info * Temporary;
67 	unsigned int NumTemporaries;
68 
69 	unsigned int Simple;
70 	int LoopEnd;
71 };
72 
73 enum rc_reg_class {
74 	RC_REG_CLASS_SINGLE,
75 	RC_REG_CLASS_DOUBLE,
76 	RC_REG_CLASS_TRIPLE,
77 	RC_REG_CLASS_ALPHA,
78 	RC_REG_CLASS_SINGLE_PLUS_ALPHA,
79 	RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
80 	RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
81 	RC_REG_CLASS_X,
82 	RC_REG_CLASS_Y,
83 	RC_REG_CLASS_Z,
84 	RC_REG_CLASS_XY,
85 	RC_REG_CLASS_YZ,
86 	RC_REG_CLASS_XZ,
87 	RC_REG_CLASS_XW,
88 	RC_REG_CLASS_YW,
89 	RC_REG_CLASS_ZW,
90 	RC_REG_CLASS_XYW,
91 	RC_REG_CLASS_YZW,
92 	RC_REG_CLASS_XZW,
93 	RC_REG_CLASS_COUNT
94 };
95 
96 struct rc_class {
97 	enum rc_reg_class Class;
98 
99 	unsigned int WritemaskCount;
100 
101 	/** This is 1 if this class is being used by the register allocator
102 	 * and 0 otherwise */
103 	unsigned int Used;
104 
105 	/** This is the ID number assigned to this class by ra. */
106 	unsigned int Id;
107 
108 	/** List of writemasks that belong to this class */
109 	unsigned int Writemasks[3];
110 
111 
112 };
113 
print_live_intervals(struct live_intervals * src)114 static void print_live_intervals(struct live_intervals * src)
115 {
116 	if (!src || !src->Used) {
117 		DBG("(null)");
118 		return;
119 	}
120 
121 	DBG("(%i,%i)", src->Start, src->End);
122 }
123 
overlap_live_intervals(struct live_intervals * a,struct live_intervals * b)124 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
125 {
126 	if (VERBOSE) {
127 		DBG("overlap_live_intervals: ");
128 		print_live_intervals(a);
129 		DBG(" to ");
130 		print_live_intervals(b);
131 		DBG("\n");
132 	}
133 
134 	if (!a->Used || !b->Used) {
135 		DBG("    unused interval\n");
136 		return 0;
137 	}
138 
139 	if (a->Start > b->Start) {
140 		if (a->Start < b->End) {
141 			DBG("    overlap\n");
142 			return 1;
143 		}
144 	} else if (b->Start > a->Start) {
145 		if (b->Start < a->End) {
146 			DBG("    overlap\n");
147 			return 1;
148 		}
149 	} else { /* a->Start == b->Start */
150 		if (a->Start != a->End && b->Start != b->End) {
151 			DBG("    overlap\n");
152 			return 1;
153 		}
154 	}
155 
156 	DBG("    no overlap\n");
157 
158 	return 0;
159 }
160 
scan_read_callback(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)161 static void scan_read_callback(void * data, struct rc_instruction * inst,
162 		rc_register_file file, unsigned int index, unsigned int mask)
163 {
164 	struct regalloc_state * s = data;
165 	struct register_info * reg;
166 	unsigned int i;
167 
168 	if (file != RC_FILE_INPUT)
169 		return;
170 
171 	s->Input[index].Used = 1;
172 	reg = &s->Input[index];
173 
174 	for (i = 0; i < 4; i++) {
175 		if (!((mask >> i) & 0x1)) {
176 			continue;
177 		}
178 		reg->Live[i].Used = 1;
179 		reg->Live[i].Start = 0;
180 		reg->Live[i].End =
181 			s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
182 	}
183 }
184 
remap_register(void * data,struct rc_instruction * inst,rc_register_file * file,unsigned int * index)185 static void remap_register(void * data, struct rc_instruction * inst,
186 		rc_register_file * file, unsigned int * index)
187 {
188 	struct regalloc_state * s = data;
189 	const struct register_info * reg;
190 
191 	if (*file == RC_FILE_TEMPORARY && s->Simple)
192 		reg = &s->Temporary[*index];
193 	else if (*file == RC_FILE_INPUT)
194 		reg = &s->Input[*index];
195 	else
196 		return;
197 
198 	if (reg->Allocated) {
199 		*index = reg->Index;
200 	}
201 }
202 
alloc_input_simple(void * data,unsigned int input,unsigned int hwreg)203 static void alloc_input_simple(void * data, unsigned int input,
204 							unsigned int hwreg)
205 {
206 	struct regalloc_state * s = data;
207 
208 	if (input >= s->NumInputs)
209 		return;
210 
211 	s->Input[input].Allocated = 1;
212 	s->Input[input].File = RC_FILE_TEMPORARY;
213 	s->Input[input].Index = hwreg;
214 }
215 
216 /* This functions offsets the temporary register indices by the number
217  * of input registers, because input registers are actually temporaries and
218  * should not occupy the same space.
219  *
220  * This pass is supposed to be used to maintain correct allocation of inputs
221  * if the standard register allocation is disabled. */
do_regalloc_inputs_only(struct regalloc_state * s)222 static void do_regalloc_inputs_only(struct regalloc_state * s)
223 {
224 	for (unsigned i = 0; i < s->NumTemporaries; i++) {
225 		s->Temporary[i].Allocated = 1;
226 		s->Temporary[i].File = RC_FILE_TEMPORARY;
227 		s->Temporary[i].Index = i + s->NumInputs;
228 	}
229 }
230 
is_derivative(rc_opcode op)231 static unsigned int is_derivative(rc_opcode op)
232 {
233 	return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
234 }
235 
find_class(struct rc_class * classes,unsigned int writemask,unsigned int max_writemask_count)236 static int find_class(
237 	struct rc_class * classes,
238 	unsigned int writemask,
239 	unsigned int max_writemask_count)
240 {
241 	unsigned int i;
242 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
243 		unsigned int j;
244 		if (classes[i].WritemaskCount > max_writemask_count) {
245 			continue;
246 		}
247 		for (j = 0; j < 3; j++) {
248 			if (classes[i].Writemasks[j] == writemask) {
249 				return i;
250 			}
251 		}
252 	}
253 	return -1;
254 }
255 
256 struct variable_get_class_cb_data {
257 	unsigned int * can_change_writemask;
258 	unsigned int conversion_swizzle;
259 };
260 
variable_get_class_read_cb(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)261 static void variable_get_class_read_cb(
262 	void * userdata,
263 	struct rc_instruction * inst,
264 	struct rc_pair_instruction_arg * arg,
265 	struct rc_pair_instruction_source * src)
266 {
267 	struct variable_get_class_cb_data * d = userdata;
268 	unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
269 							d->conversion_swizzle);
270 	if (!r300_swizzle_is_native_basic(new_swizzle)) {
271 		*d->can_change_writemask = 0;
272 	}
273 }
274 
variable_get_class(struct rc_variable * variable,struct rc_class * classes)275 static enum rc_reg_class variable_get_class(
276 	struct rc_variable * variable,
277 	struct rc_class * classes)
278 {
279 	unsigned int i;
280 	unsigned int can_change_writemask= 1;
281 	unsigned int writemask = rc_variable_writemask_sum(variable);
282 	struct rc_list * readers = rc_variable_readers_union(variable);
283 	int class_index;
284 
285 	if (!variable->C->is_r500) {
286 		struct rc_class c;
287 		struct rc_variable * var_ptr;
288 		/* The assumption here is that if an instruction has type
289 		 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
290 		 * r300 and r400 can't swizzle the result of a TEX lookup. */
291 		for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
292 			if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
293 				writemask = RC_MASK_XYZW;
294 			}
295 		}
296 
297 		/* Check if it is possible to do swizzle packing for r300/r400
298 		 * without creating non-native swizzles. */
299 		class_index = find_class(classes, writemask, 3);
300 		if (class_index < 0) {
301 			goto error;
302 		}
303 		c = classes[class_index];
304 		if (c.WritemaskCount == 1) {
305 			goto done;
306 		}
307 		for (i = 0; i < c.WritemaskCount; i++) {
308 			struct rc_variable * var_ptr;
309 			for (var_ptr = variable; var_ptr;
310 						var_ptr = var_ptr->Friend) {
311 				int j;
312 				unsigned int conversion_swizzle =
313 						rc_make_conversion_swizzle(
314 						writemask, c.Writemasks[i]);
315 				struct variable_get_class_cb_data d;
316 				d.can_change_writemask = &can_change_writemask;
317 				d.conversion_swizzle = conversion_swizzle;
318 				/* If we get this far var_ptr->Inst has to
319 				 * be a pair instruction.  If variable or any
320 				 * of its friends are normal instructions,
321 				 * then the writemask will be set to RC_MASK_XYZW
322 				 * and the function will return before it gets
323 				 * here. */
324 				rc_pair_for_all_reads_arg(var_ptr->Inst,
325 					variable_get_class_read_cb, &d);
326 
327 				for (j = 0; j < var_ptr->ReaderCount; j++) {
328 					unsigned int old_swizzle;
329 					unsigned int new_swizzle;
330 					struct rc_reader r = var_ptr->Readers[j];
331 					if (r.Inst->Type ==
332 							RC_INSTRUCTION_PAIR ) {
333 						old_swizzle = r.U.P.Arg->Swizzle;
334 					} else {
335 						old_swizzle = r.U.I.Src->Swizzle;
336 					}
337 					new_swizzle = rc_adjust_channels(
338 						old_swizzle, conversion_swizzle);
339 					if (!r300_swizzle_is_native_basic(
340 								new_swizzle)) {
341 						can_change_writemask = 0;
342 						break;
343 					}
344 				}
345 				if (!can_change_writemask) {
346 					break;
347 				}
348 			}
349 			if (!can_change_writemask) {
350 				break;
351 			}
352 		}
353 	}
354 
355 	if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
356 		/* DDX/DDY seem to always fail when their writemasks are
357 		 * changed.*/
358 		if (is_derivative(variable->Inst->U.P.RGB.Opcode)
359 		    || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
360 			can_change_writemask = 0;
361 		}
362 	}
363 	for ( ; readers; readers = readers->Next) {
364 		struct rc_reader * r = readers->Item;
365 		if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
366 			if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
367 				can_change_writemask = 0;
368 				break;
369 			}
370 			/* DDX/DDY also fail when their swizzles are changed. */
371 			if (is_derivative(r->Inst->U.P.RGB.Opcode)
372 			    || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
373 				can_change_writemask = 0;
374 				break;
375 			}
376 		}
377 	}
378 
379 	class_index = find_class(classes, writemask,
380 						can_change_writemask ? 3 : 1);
381 done:
382 	if (class_index > -1) {
383 		return classes[class_index].Class;
384 	} else {
385 error:
386 		rc_error(variable->C,
387 				"Could not find class for index=%u mask=%u\n",
388 				variable->Dst.Index, writemask);
389 		return 0;
390 	}
391 }
392 
overlap_live_intervals_array(struct live_intervals * a,struct live_intervals * b)393 static unsigned int overlap_live_intervals_array(
394 	struct live_intervals * a,
395 	struct live_intervals * b)
396 {
397 	unsigned int a_chan, b_chan;
398 	for (a_chan = 0; a_chan < 4; a_chan++) {
399 		for (b_chan = 0; b_chan < 4; b_chan++) {
400 			if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
401 					return 1;
402 			}
403 		}
404 	}
405 	return 0;
406 }
407 
reg_get_index(int reg)408 static unsigned int reg_get_index(int reg)
409 {
410 	return reg / RC_MASK_XYZW;
411 }
412 
reg_get_writemask(int reg)413 static unsigned int reg_get_writemask(int reg)
414 {
415 	return (reg % RC_MASK_XYZW) + 1;
416 }
417 
get_reg_id(unsigned int index,unsigned int writemask)418 static int get_reg_id(unsigned int index, unsigned int writemask)
419 {
420 	assert(writemask);
421 	if (writemask == 0) {
422 		return 0;
423 	}
424 	return (index * RC_MASK_XYZW) + (writemask - 1);
425 }
426 
427 #if VERBOSE
print_reg(int reg)428 static void print_reg(int reg)
429 {
430 	unsigned int index = reg_get_index(reg);
431 	unsigned int mask = reg_get_writemask(reg);
432 	fprintf(stderr, "Temp[%u].%c%c%c%c", index,
433 		mask & RC_MASK_X ? 'x' : '_',
434 		mask & RC_MASK_Y ? 'y' : '_',
435 		mask & RC_MASK_Z ? 'z' : '_',
436 		mask & RC_MASK_W ? 'w' : '_');
437 }
438 #endif
439 
add_register_conflicts(struct ra_regs * regs,unsigned int max_temp_regs)440 static void add_register_conflicts(
441 	struct ra_regs * regs,
442 	unsigned int max_temp_regs)
443 {
444 	unsigned int index, a_mask, b_mask;
445 	for (index = 0; index < max_temp_regs; index++) {
446 		for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
447 			for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
448 								b_mask++) {
449 				if (a_mask & b_mask) {
450 					ra_add_reg_conflict(regs,
451 						get_reg_id(index, a_mask),
452 						get_reg_id(index, b_mask));
453 				}
454 			}
455 		}
456 	}
457 }
458 
do_advanced_regalloc(struct regalloc_state * s)459 static void do_advanced_regalloc(struct regalloc_state * s)
460 {
461 	struct rc_class rc_class_list [] = {
462 		{RC_REG_CLASS_SINGLE, 3, 0, 0,
463 			{RC_MASK_X,
464 			 RC_MASK_Y,
465 			 RC_MASK_Z}},
466 		{RC_REG_CLASS_DOUBLE, 3, 0, 0,
467 			{RC_MASK_X | RC_MASK_Y,
468 			 RC_MASK_X | RC_MASK_Z,
469 			 RC_MASK_Y | RC_MASK_Z}},
470 		{RC_REG_CLASS_TRIPLE, 1, 0, 0,
471 			{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
472 			 RC_MASK_NONE,
473 			 RC_MASK_NONE}},
474 		{RC_REG_CLASS_ALPHA, 1, 0, 0,
475 			{RC_MASK_W,
476 			 RC_MASK_NONE,
477 			 RC_MASK_NONE}},
478 		{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
479 			{RC_MASK_X | RC_MASK_W,
480 			 RC_MASK_Y | RC_MASK_W,
481 			 RC_MASK_Z | RC_MASK_W}},
482 		{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
483 			{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
484 			 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
485 			 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
486 		{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
487 			{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
488 			RC_MASK_NONE,
489 			RC_MASK_NONE}},
490 		{RC_REG_CLASS_X, 1, 0, 0,
491 			{RC_MASK_X,
492 			RC_MASK_NONE,
493 			RC_MASK_NONE}},
494 		{RC_REG_CLASS_Y, 1, 0, 0,
495 			{RC_MASK_Y,
496 			RC_MASK_NONE,
497 			RC_MASK_NONE}},
498 		{RC_REG_CLASS_Z, 1, 0, 0,
499 			{RC_MASK_Z,
500 			RC_MASK_NONE,
501 			RC_MASK_NONE}},
502 		{RC_REG_CLASS_XY, 1, 0, 0,
503 			{RC_MASK_X | RC_MASK_Y,
504 			RC_MASK_NONE,
505 			RC_MASK_NONE}},
506 		{RC_REG_CLASS_YZ, 1, 0, 0,
507 			{RC_MASK_Y | RC_MASK_Z,
508 			RC_MASK_NONE,
509 			RC_MASK_NONE}},
510 		{RC_REG_CLASS_XZ, 1, 0, 0,
511 			{RC_MASK_X | RC_MASK_Z,
512 			RC_MASK_NONE,
513 			RC_MASK_NONE}},
514 		{RC_REG_CLASS_XW, 1, 0, 0,
515 			{RC_MASK_X | RC_MASK_W,
516 			RC_MASK_NONE,
517 			RC_MASK_NONE}},
518 		{RC_REG_CLASS_YW, 1, 0, 0,
519 			{RC_MASK_Y | RC_MASK_W,
520 			RC_MASK_NONE,
521 			RC_MASK_NONE}},
522 		{RC_REG_CLASS_ZW, 1, 0, 0,
523 			{RC_MASK_Z | RC_MASK_W,
524 			RC_MASK_NONE,
525 			RC_MASK_NONE}},
526 		{RC_REG_CLASS_XYW, 1, 0, 0,
527 			{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
528 			RC_MASK_NONE,
529 			RC_MASK_NONE}},
530 		{RC_REG_CLASS_YZW, 1, 0, 0,
531 			{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
532 			RC_MASK_NONE,
533 			RC_MASK_NONE}},
534 		{RC_REG_CLASS_XZW, 1, 0, 0,
535 			{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
536 			RC_MASK_NONE,
537 			RC_MASK_NONE}}
538 	};
539 
540 	unsigned int i, j, index, input_node, node_count, node_index;
541 	unsigned int * node_classes;
542 	unsigned int * input_classes;
543 	struct rc_instruction * inst;
544 	struct rc_list * var_ptr;
545 	struct rc_list * variables;
546 	struct ra_regs * regs;
547 	struct ra_graph * graph;
548 
549 	/* Allocate the main ra data structure */
550 	regs = ra_alloc_reg_set(NULL, s->C->max_temp_regs * RC_MASK_XYZW);
551 
552 	/* Get list of program variables */
553 	variables = rc_get_variables(s->C);
554 	node_count = rc_list_count(variables);
555 	node_classes = memory_pool_malloc(&s->C->Pool,
556 			node_count * sizeof(unsigned int));
557 	input_classes = memory_pool_malloc(&s->C->Pool,
558 			s->NumInputs * sizeof(unsigned int));
559 
560 	for (var_ptr = variables, node_index = 0; var_ptr;
561 					var_ptr = var_ptr->Next, node_index++) {
562 		unsigned int class_index;
563 		/* Compute the live intervals */
564 		rc_variable_compute_live_intervals(var_ptr->Item);
565 
566 		class_index = variable_get_class(var_ptr->Item,	rc_class_list);
567 
568 		/* If we haven't used this register class yet, mark it
569 		 * as used and allocate space for it. */
570 		if (!rc_class_list[class_index].Used) {
571 			rc_class_list[class_index].Used = 1;
572 			rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
573 		}
574 
575 		node_classes[node_index] = rc_class_list[class_index].Id;
576 	}
577 
578 
579 	/* Assign registers to the classes */
580 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
581 		struct rc_class class = rc_class_list[i];
582 		if (!class.Used) {
583 			continue;
584 		}
585 
586 		for (index = 0; index < s->C->max_temp_regs; index++) {
587 			for (j = 0; j < class.WritemaskCount; j++) {
588 				int reg_id = get_reg_id(index,
589 							class.Writemasks[j]);
590 				ra_class_add_reg(regs, class.Id, reg_id);
591 			}
592 		}
593 	}
594 
595 	/* Add register conflicts */
596 	add_register_conflicts(regs, s->C->max_temp_regs);
597 
598 	/* Calculate live intervals for input registers */
599 	for (inst = s->C->Program.Instructions.Next;
600 					inst != &s->C->Program.Instructions;
601 					inst = inst->Next) {
602 		rc_opcode op = rc_get_flow_control_inst(inst);
603 		if (op == RC_OPCODE_BGNLOOP) {
604 			struct rc_instruction * endloop =
605 							rc_match_bgnloop(inst);
606 			if (endloop->IP > s->LoopEnd) {
607 				s->LoopEnd = endloop->IP;
608 			}
609 		}
610 		rc_for_all_reads_mask(inst, scan_read_callback, s);
611 	}
612 
613 	/* Create classes for input registers */
614 	for (i = 0; i < s->NumInputs; i++) {
615 		unsigned int chan, class_id, writemask = 0;
616 		for (chan = 0; chan < 4; chan++) {
617 			if (s->Input[i].Live[chan].Used) {
618 				writemask |= (1 << chan);
619 			}
620 		}
621 		s->Input[i].Writemask = writemask;
622 		if (!writemask) {
623 			continue;
624 		}
625 
626 		class_id = ra_alloc_reg_class(regs);
627 		input_classes[i] = class_id;
628 		ra_class_add_reg(regs, class_id,
629 				get_reg_id(s->Input[i].Index, writemask));
630 	}
631 
632 	ra_set_finalize(regs);
633 
634 	graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
635 
636 	/* Build the interference graph */
637 	for (var_ptr = variables, node_index = 0; var_ptr;
638 					var_ptr = var_ptr->Next,node_index++) {
639 		struct rc_list * a, * b;
640 		unsigned int b_index;
641 
642 		ra_set_node_class(graph, node_index, node_classes[node_index]);
643 
644 		for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
645 						b; b = b->Next, b_index++) {
646 			struct rc_variable * var_a = a->Item;
647 			while (var_a) {
648 				struct rc_variable * var_b = b->Item;
649 				while (var_b) {
650 					if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
651 						ra_add_node_interference(graph,
652 							node_index, b_index);
653 					}
654 					var_b = var_b->Friend;
655 				}
656 				var_a = var_a->Friend;
657 			}
658 		}
659 	}
660 
661 	/* Add input registers to the interference graph */
662 	for (i = 0, input_node = 0; i< s->NumInputs; i++) {
663 		if (!s->Input[i].Writemask) {
664 			continue;
665 		}
666 		ra_set_node_class(graph, node_count + input_node,
667 							input_classes[i]);
668 		for (var_ptr = variables, node_index = 0;
669 				var_ptr; var_ptr = var_ptr->Next, node_index++) {
670 			struct rc_variable * var = var_ptr->Item;
671 			if (overlap_live_intervals_array(s->Input[i].Live,
672 								var->Live)) {
673 				ra_add_node_interference(graph, node_index,
674 						node_count + input_node);
675 			}
676 		}
677 		/* Manually allocate a register for this input */
678 		ra_set_node_reg(graph, node_count + input_node, get_reg_id(
679 				s->Input[i].Index, s->Input[i].Writemask));
680 		input_node++;
681 	}
682 
683 	if (!ra_allocate_no_spills(graph)) {
684 		rc_error(s->C, "Ran out of hardware temporaries\n");
685 		return;
686 	}
687 
688 	/* Rewrite the registers */
689 	for (var_ptr = variables, node_index = 0; var_ptr;
690 				var_ptr = var_ptr->Next, node_index++) {
691 		int reg = ra_get_node_reg(graph, node_index);
692 		unsigned int writemask = reg_get_writemask(reg);
693 		unsigned int index = reg_get_index(reg);
694 		struct rc_variable * var = var_ptr->Item;
695 
696 		if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
697 			writemask = rc_variable_writemask_sum(var);
698 		}
699 
700 		if (var->Dst.File == RC_FILE_INPUT) {
701 			continue;
702 		}
703 		rc_variable_change_dst(var, index, writemask);
704 	}
705 
706 	ralloc_free(graph);
707 	ralloc_free(regs);
708 }
709 
710 /**
711  * @param user This parameter should be a pointer to an integer value.  If this
712  * integer value is zero, then a simple register allocator will be used that
713  * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
714  * user is non-zero, then the regular register allocator will be used
715  * (\sa do_regalloc).
716   */
rc_pair_regalloc(struct radeon_compiler * cc,void * user)717 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
718 {
719 	struct r300_fragment_program_compiler *c =
720 				(struct r300_fragment_program_compiler*)cc;
721 	struct regalloc_state s;
722 	int * do_full_regalloc = (int*)user;
723 
724 	memset(&s, 0, sizeof(s));
725 	s.C = cc;
726 	s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
727 	s.Input = memory_pool_malloc(&cc->Pool,
728 			s.NumInputs * sizeof(struct register_info));
729 	memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
730 
731 	s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
732 	s.Temporary = memory_pool_malloc(&cc->Pool,
733 			s.NumTemporaries * sizeof(struct register_info));
734 	memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
735 
736 	rc_recompute_ips(s.C);
737 
738 	c->AllocateHwInputs(c, &alloc_input_simple, &s);
739 	if (*do_full_regalloc) {
740 		do_advanced_regalloc(&s);
741 	} else {
742 		s.Simple = 1;
743 		do_regalloc_inputs_only(&s);
744 	}
745 
746 	/* Rewrite inputs and if we are doing the simple allocation, rewrite
747 	 * temporaries too. */
748 	for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
749 					inst != &s.C->Program.Instructions;
750 					inst = inst->Next) {
751 		rc_remap_registers(inst, &remap_register, &s);
752 	}
753 }
754