1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2 
3 /*
4  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Rob Clark <robclark@freedesktop.org>
27  */
28 
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "tgsi/tgsi_parse.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_strings.h"
37 #include "tgsi/tgsi_dump.h"
38 
39 #include "fd2_compiler.h"
40 #include "fd2_program.h"
41 #include "fd2_util.h"
42 
43 #include "instr-a2xx.h"
44 #include "ir-a2xx.h"
45 
46 struct fd2_compile_context {
47 	struct fd_program_stateobj *prog;
48 	struct fd2_shader_stateobj *so;
49 
50 	struct tgsi_parse_context parser;
51 	unsigned type;
52 
53 	/* predicate stack: */
54 	int pred_depth;
55 	enum ir2_pred pred_stack[8];
56 
57 	/* Internal-Temporary and Predicate register assignment:
58 	 *
59 	 * Some TGSI instructions which translate into multiple actual
60 	 * instructions need one or more temporary registers, which are not
61 	 * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
62 	 * And some instructions (texture fetch) cannot write directly to
63 	 * output registers.  We could be more clever and re-use dst or a
64 	 * src register in some cases.  But for now don't try to be clever.
65 	 * Eventually we should implement an optimization pass that re-
66 	 * juggles the register usage and gets rid of unneeded temporaries.
67 	 *
68 	 * The predicate register must be valid across multiple TGSI
69 	 * instructions, but internal temporary's do not.  For this reason,
70 	 * once the predicate register is requested, until it is no longer
71 	 * needed, it gets the first register slot after after the TGSI
72 	 * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the
73 	 * internal temporaries get the register slots above this.
74 	 */
75 
76 	int pred_reg;
77 	int num_internal_temps;
78 
79 	uint8_t num_regs[TGSI_FILE_COUNT];
80 
81 	/* maps input register idx to prog->export_linkage idx: */
82 	uint8_t input_export_idx[64];
83 
84 	/* maps output register idx to prog->export_linkage idx: */
85 	uint8_t output_export_idx[64];
86 
87 	/* idx/slot for last compiler generated immediate */
88 	unsigned immediate_idx;
89 
90 	// TODO we can skip emit exports in the VS that the FS doesn't need..
91 	// and get rid perhaps of num_param..
92 	unsigned num_position, num_param;
93 	unsigned position, psize;
94 
95 	uint64_t need_sync;
96 
97 	/* current exec CF instruction */
98 	struct ir2_cf *cf;
99 };
100 
101 static int
semantic_idx(struct tgsi_declaration_semantic * semantic)102 semantic_idx(struct tgsi_declaration_semantic *semantic)
103 {
104 	int idx = semantic->Name;
105 	if (idx == TGSI_SEMANTIC_GENERIC)
106 		idx = TGSI_SEMANTIC_COUNT + semantic->Index;
107 	return idx;
108 }
109 
110 /* assign/get the input/export register # for given semantic idx as
111  * returned by semantic_idx():
112  */
113 static int
export_linkage(struct fd2_compile_context * ctx,int idx)114 export_linkage(struct fd2_compile_context *ctx, int idx)
115 {
116 	struct fd_program_stateobj *prog = ctx->prog;
117 
118 	/* if first time we've seen this export, assign the next available slot: */
119 	if (prog->export_linkage[idx] == 0xff)
120 		prog->export_linkage[idx] = prog->num_exports++;
121 
122 	return prog->export_linkage[idx];
123 }
124 
125 static unsigned
compile_init(struct fd2_compile_context * ctx,struct fd_program_stateobj * prog,struct fd2_shader_stateobj * so)126 compile_init(struct fd2_compile_context *ctx, struct fd_program_stateobj *prog,
127 		struct fd2_shader_stateobj *so)
128 {
129 	unsigned ret;
130 
131 	ctx->prog = prog;
132 	ctx->so = so;
133 	ctx->cf = NULL;
134 	ctx->pred_depth = 0;
135 
136 	ret = tgsi_parse_init(&ctx->parser, so->tokens);
137 	if (ret != TGSI_PARSE_OK)
138 		return ret;
139 
140 	ctx->type = ctx->parser.FullHeader.Processor.Processor;
141 	ctx->position = ~0;
142 	ctx->psize = ~0;
143 	ctx->num_position = 0;
144 	ctx->num_param = 0;
145 	ctx->need_sync = 0;
146 	ctx->immediate_idx = 0;
147 	ctx->pred_reg = -1;
148 	ctx->num_internal_temps = 0;
149 
150 	memset(ctx->num_regs, 0, sizeof(ctx->num_regs));
151 	memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx));
152 	memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx));
153 
154 	/* do first pass to extract declarations: */
155 	while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
156 		tgsi_parse_token(&ctx->parser);
157 
158 		switch (ctx->parser.FullToken.Token.Type) {
159 		case TGSI_TOKEN_TYPE_DECLARATION: {
160 			struct tgsi_full_declaration *decl =
161 					&ctx->parser.FullToken.FullDeclaration;
162 			if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
163 				unsigned name = decl->Semantic.Name;
164 
165 				assert(decl->Declaration.Semantic);  // TODO is this ever not true?
166 
167 				ctx->output_export_idx[decl->Range.First] =
168 						semantic_idx(&decl->Semantic);
169 
170 				if (ctx->type == PIPE_SHADER_VERTEX) {
171 					switch (name) {
172 					case TGSI_SEMANTIC_POSITION:
173 						ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT];
174 						ctx->num_position++;
175 						break;
176 					case TGSI_SEMANTIC_PSIZE:
177 						ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT];
178 						ctx->num_position++;
179 						break;
180 					case TGSI_SEMANTIC_COLOR:
181 					case TGSI_SEMANTIC_GENERIC:
182 						ctx->num_param++;
183 						break;
184 					default:
185 						DBG("unknown VS semantic name: %s",
186 								tgsi_semantic_names[name]);
187 						assert(0);
188 					}
189 				} else {
190 					switch (name) {
191 					case TGSI_SEMANTIC_COLOR:
192 					case TGSI_SEMANTIC_GENERIC:
193 						ctx->num_param++;
194 						break;
195 					default:
196 						DBG("unknown PS semantic name: %s",
197 								tgsi_semantic_names[name]);
198 						assert(0);
199 					}
200 				}
201 			} else if (decl->Declaration.File == TGSI_FILE_INPUT) {
202 				ctx->input_export_idx[decl->Range.First] =
203 						semantic_idx(&decl->Semantic);
204 			}
205 			ctx->num_regs[decl->Declaration.File] =
206 					MAX2(ctx->num_regs[decl->Declaration.File], decl->Range.Last + 1);
207 			break;
208 		}
209 		case TGSI_TOKEN_TYPE_IMMEDIATE: {
210 			struct tgsi_full_immediate *imm =
211 					&ctx->parser.FullToken.FullImmediate;
212 			unsigned n = ctx->so->num_immediates++;
213 			memcpy(ctx->so->immediates[n].val, imm->u, 16);
214 			break;
215 		}
216 		default:
217 			break;
218 		}
219 	}
220 
221 	/* TGSI generated immediates are always entire vec4's, ones we
222 	 * generate internally are not:
223 	 */
224 	ctx->immediate_idx = ctx->so->num_immediates * 4;
225 
226 	ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT];
227 
228 	tgsi_parse_free(&ctx->parser);
229 
230 	return tgsi_parse_init(&ctx->parser, so->tokens);
231 }
232 
233 static void
compile_free(struct fd2_compile_context * ctx)234 compile_free(struct fd2_compile_context *ctx)
235 {
236 	tgsi_parse_free(&ctx->parser);
237 }
238 
239 static struct ir2_cf *
next_exec_cf(struct fd2_compile_context * ctx)240 next_exec_cf(struct fd2_compile_context *ctx)
241 {
242 	struct ir2_cf *cf = ctx->cf;
243 	if (!cf || cf->exec.instrs_count >= ARRAY_SIZE(ctx->cf->exec.instrs))
244 		ctx->cf = cf = ir2_cf_create(ctx->so->ir, EXEC);
245 	return cf;
246 }
247 
248 static void
compile_vtx_fetch(struct fd2_compile_context * ctx)249 compile_vtx_fetch(struct fd2_compile_context *ctx)
250 {
251 	struct ir2_instruction **vfetch_instrs = ctx->so->vfetch_instrs;
252 	int i;
253 	for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) {
254 		struct ir2_instruction *instr = ir2_instr_create(
255 				next_exec_cf(ctx), IR2_FETCH);
256 		instr->fetch.opc = VTX_FETCH;
257 
258 		ctx->need_sync |= 1 << (i+1);
259 
260 		ir2_reg_create(instr, i+1, "xyzw", 0);
261 		ir2_reg_create(instr, 0, "x", 0);
262 
263 		if (i == 0)
264 			instr->sync = true;
265 
266 		vfetch_instrs[i] = instr;
267 	}
268 	ctx->so->num_vfetch_instrs = i;
269 	ctx->cf = NULL;
270 }
271 
272 /*
273  * For vertex shaders (VS):
274  * --- ------ -------------
275  *
276  *   Inputs:     R1-R(num_input)
277  *   Constants:  C0-C(num_const-1)
278  *   Immediates: C(num_const)-C(num_const+num_imm-1)
279  *   Outputs:    export0-export(n) and export62, export63
280  *      n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
281  *   Temps:      R(num_input+1)-R(num_input+num_temps)
282  *
283  * R0 could be clobbered after the vertex fetch instructions.. so we
284  * could use it for one of the temporaries.
285  *
286  * TODO: maybe the vertex fetch part could fetch first input into R0 as
287  * the last vtx fetch instruction, which would let us use the same
288  * register layout in either case.. although this is not what the blob
289  * compiler does.
290  *
291  *
292  * For frag shaders (PS):
293  * --- ---- -------------
294  *
295  *   Inputs:     R0-R(num_input-1)
296  *   Constants:  same as VS
297  *   Immediates: same as VS
298  *   Outputs:    export0-export(num_outputs)
299  *   Temps:      R(num_input)-R(num_input+num_temps-1)
300  *
301  * In either case, immediates are are postpended to the constants
302  * (uniforms).
303  *
304  */
305 
306 static unsigned
get_temp_gpr(struct fd2_compile_context * ctx,int idx)307 get_temp_gpr(struct fd2_compile_context *ctx, int idx)
308 {
309 	unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT];
310 	if (ctx->type == PIPE_SHADER_VERTEX)
311 		num++;
312 	return num;
313 }
314 
315 static struct ir2_register *
add_dst_reg(struct fd2_compile_context * ctx,struct ir2_instruction * alu,const struct tgsi_dst_register * dst)316 add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
317 		const struct tgsi_dst_register *dst)
318 {
319 	unsigned flags = 0, num = 0;
320 	char swiz[5];
321 
322 	switch (dst->File) {
323 	case TGSI_FILE_OUTPUT:
324 		flags |= IR2_REG_EXPORT;
325 		if (ctx->type == PIPE_SHADER_VERTEX) {
326 			if (dst->Index == ctx->position) {
327 				num = 62;
328 			} else if (dst->Index == ctx->psize) {
329 				num = 63;
330 			} else {
331 				num = export_linkage(ctx,
332 						ctx->output_export_idx[dst->Index]);
333 			}
334 		} else {
335 			num = dst->Index;
336 		}
337 		break;
338 	case TGSI_FILE_TEMPORARY:
339 		num = get_temp_gpr(ctx, dst->Index);
340 		break;
341 	default:
342 		DBG("unsupported dst register file: %s",
343 			tgsi_file_name(dst->File));
344 		assert(0);
345 		break;
346 	}
347 
348 	swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_';
349 	swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_';
350 	swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_';
351 	swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_';
352 	swiz[4] = '\0';
353 
354 	return ir2_reg_create(alu, num, swiz, flags);
355 }
356 
357 static struct ir2_register *
add_src_reg(struct fd2_compile_context * ctx,struct ir2_instruction * alu,const struct tgsi_src_register * src)358 add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
359 		const struct tgsi_src_register *src)
360 {
361 	static const char swiz_vals[] = {
362 			'x', 'y', 'z', 'w',
363 	};
364 	char swiz[5];
365 	unsigned flags = 0, num = 0;
366 
367 	switch (src->File) {
368 	case TGSI_FILE_CONSTANT:
369 		num = src->Index;
370 		flags |= IR2_REG_CONST;
371 		break;
372 	case TGSI_FILE_INPUT:
373 		if (ctx->type == PIPE_SHADER_VERTEX) {
374 			num = src->Index + 1;
375 		} else {
376 			num = export_linkage(ctx,
377 					ctx->input_export_idx[src->Index]);
378 		}
379 		break;
380 	case TGSI_FILE_TEMPORARY:
381 		num = get_temp_gpr(ctx, src->Index);
382 		break;
383 	case TGSI_FILE_IMMEDIATE:
384 		num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT];
385 		flags |= IR2_REG_CONST;
386 		break;
387 	default:
388 		DBG("unsupported src register file: %s",
389 			tgsi_file_name(src->File));
390 		assert(0);
391 		break;
392 	}
393 
394 	if (src->Absolute)
395 		flags |= IR2_REG_ABS;
396 	if (src->Negate)
397 		flags |= IR2_REG_NEGATE;
398 
399 	swiz[0] = swiz_vals[src->SwizzleX];
400 	swiz[1] = swiz_vals[src->SwizzleY];
401 	swiz[2] = swiz_vals[src->SwizzleZ];
402 	swiz[3] = swiz_vals[src->SwizzleW];
403 	swiz[4] = '\0';
404 
405 	if ((ctx->need_sync & ((uint64_t)1 << num)) &&
406 			!(flags & IR2_REG_CONST)) {
407 		alu->sync = true;
408 		ctx->need_sync &= ~((uint64_t)1 << num);
409 	}
410 
411 	return ir2_reg_create(alu, num, swiz, flags);
412 }
413 
414 static void
add_vector_clamp(struct tgsi_full_instruction * inst,struct ir2_instruction * alu)415 add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
416 {
417 	if (inst->Instruction.Saturate) {
418 		alu->alu.vector_clamp = true;
419 	}
420 }
421 
422 static void
add_scalar_clamp(struct tgsi_full_instruction * inst,struct ir2_instruction * alu)423 add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
424 {
425 	if (inst->Instruction.Saturate) {
426 		alu->alu.scalar_clamp = true;
427 	}
428 }
429 
430 static void
add_regs_vector_1(struct fd2_compile_context * ctx,struct tgsi_full_instruction * inst,struct ir2_instruction * alu)431 add_regs_vector_1(struct fd2_compile_context *ctx,
432 		struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
433 {
434 	assert(inst->Instruction.NumSrcRegs == 1);
435 	assert(inst->Instruction.NumDstRegs == 1);
436 
437 	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
438 	add_src_reg(ctx, alu, &inst->Src[0].Register);
439 	add_src_reg(ctx, alu, &inst->Src[0].Register);
440 	add_vector_clamp(inst, alu);
441 }
442 
443 static void
add_regs_vector_2(struct fd2_compile_context * ctx,struct tgsi_full_instruction * inst,struct ir2_instruction * alu)444 add_regs_vector_2(struct fd2_compile_context *ctx,
445 		struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
446 {
447 	assert(inst->Instruction.NumSrcRegs == 2);
448 	assert(inst->Instruction.NumDstRegs == 1);
449 
450 	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
451 	add_src_reg(ctx, alu, &inst->Src[0].Register);
452 	add_src_reg(ctx, alu, &inst->Src[1].Register);
453 	add_vector_clamp(inst, alu);
454 }
455 
456 static void
add_regs_vector_3(struct fd2_compile_context * ctx,struct tgsi_full_instruction * inst,struct ir2_instruction * alu)457 add_regs_vector_3(struct fd2_compile_context *ctx,
458 		struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
459 {
460 	assert(inst->Instruction.NumSrcRegs == 3);
461 	assert(inst->Instruction.NumDstRegs == 1);
462 
463 	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
464 	/* maybe should re-arrange the syntax some day, but
465 	 * in assembler/disassembler and what ir.c expects
466 	 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
467 	 */
468 	add_src_reg(ctx, alu, &inst->Src[2].Register);
469 	add_src_reg(ctx, alu, &inst->Src[0].Register);
470 	add_src_reg(ctx, alu, &inst->Src[1].Register);
471 	add_vector_clamp(inst, alu);
472 }
473 
474 static void
add_regs_dummy_vector(struct ir2_instruction * alu)475 add_regs_dummy_vector(struct ir2_instruction *alu)
476 {
477 	/* create dummy, non-written vector dst/src regs
478 	 * for unused vector instr slot:
479 	 */
480 	ir2_reg_create(alu, 0, "____", 0); /* vector dst */
481 	ir2_reg_create(alu, 0, NULL, 0);   /* vector src1 */
482 	ir2_reg_create(alu, 0, NULL, 0);   /* vector src2 */
483 }
484 
485 static void
add_regs_scalar_1(struct fd2_compile_context * ctx,struct tgsi_full_instruction * inst,struct ir2_instruction * alu)486 add_regs_scalar_1(struct fd2_compile_context *ctx,
487 		struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
488 {
489 	assert(inst->Instruction.NumSrcRegs == 1);
490 	assert(inst->Instruction.NumDstRegs == 1);
491 
492 	add_regs_dummy_vector(alu);
493 
494 	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
495 	add_src_reg(ctx, alu, &inst->Src[0].Register);
496 	add_scalar_clamp(inst, alu);
497 }
498 
499 /*
500  * Helpers for TGSI instructions that don't map to a single shader instr:
501  */
502 
503 static void
src_from_dst(struct tgsi_src_register * src,struct tgsi_dst_register * dst)504 src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
505 {
506 	src->File      = dst->File;
507 	src->Indirect  = dst->Indirect;
508 	src->Dimension = dst->Dimension;
509 	src->Index     = dst->Index;
510 	src->Absolute  = 0;
511 	src->Negate    = 0;
512 	src->SwizzleX  = TGSI_SWIZZLE_X;
513 	src->SwizzleY  = TGSI_SWIZZLE_Y;
514 	src->SwizzleZ  = TGSI_SWIZZLE_Z;
515 	src->SwizzleW  = TGSI_SWIZZLE_W;
516 }
517 
518 /* Get internal-temp src/dst to use for a sequence of instructions
519  * generated by a single TGSI op.
520  */
521 static void
get_internal_temp(struct fd2_compile_context * ctx,struct tgsi_dst_register * tmp_dst,struct tgsi_src_register * tmp_src)522 get_internal_temp(struct fd2_compile_context *ctx,
523 		struct tgsi_dst_register *tmp_dst,
524 		struct tgsi_src_register *tmp_src)
525 {
526 	int n;
527 
528 	tmp_dst->File      = TGSI_FILE_TEMPORARY;
529 	tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
530 	tmp_dst->Indirect  = 0;
531 	tmp_dst->Dimension = 0;
532 
533 	/* assign next temporary: */
534 	n = ctx->num_internal_temps++;
535 	if (ctx->pred_reg != -1)
536 		n++;
537 
538 	tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n;
539 
540 	src_from_dst(tmp_src, tmp_dst);
541 }
542 
543 static void
get_predicate(struct fd2_compile_context * ctx,struct tgsi_dst_register * dst,struct tgsi_src_register * src)544 get_predicate(struct fd2_compile_context *ctx, struct tgsi_dst_register *dst,
545 		struct tgsi_src_register *src)
546 {
547 	assert(ctx->pred_reg != -1);
548 
549 	dst->File      = TGSI_FILE_TEMPORARY;
550 	dst->WriteMask = TGSI_WRITEMASK_W;
551 	dst->Indirect  = 0;
552 	dst->Dimension = 0;
553 	dst->Index     = get_temp_gpr(ctx, ctx->pred_reg);
554 
555 	if (src) {
556 		src_from_dst(src, dst);
557 		src->SwizzleX  = TGSI_SWIZZLE_W;
558 		src->SwizzleY  = TGSI_SWIZZLE_W;
559 		src->SwizzleZ  = TGSI_SWIZZLE_W;
560 		src->SwizzleW  = TGSI_SWIZZLE_W;
561 	}
562 }
563 
564 static void
push_predicate(struct fd2_compile_context * ctx,struct tgsi_src_register * src)565 push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src)
566 {
567 	struct ir2_instruction *alu;
568 	struct tgsi_dst_register pred_dst;
569 
570 	/* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
571 	 * themselves:
572 	 */
573 	ctx->cf = NULL;
574 
575 	if (ctx->pred_depth == 0) {
576 		/* assign predicate register: */
577 		ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY];
578 
579 		get_predicate(ctx, &pred_dst, NULL);
580 
581 		alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs);
582 		add_regs_dummy_vector(alu);
583 		add_dst_reg(ctx, alu, &pred_dst);
584 		add_src_reg(ctx, alu, src);
585 	} else {
586 		struct tgsi_src_register pred_src;
587 
588 		get_predicate(ctx, &pred_dst, &pred_src);
589 
590 		alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
591 		add_dst_reg(ctx, alu, &pred_dst);
592 		add_src_reg(ctx, alu, &pred_src);
593 		add_src_reg(ctx, alu, src);
594 
595 		// XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make
596 		// sure src reg is valid if it was calculated with a predicate
597 		// condition..
598 		alu->pred = IR2_PRED_NONE;
599 	}
600 
601 	/* save previous pred state to restore in pop_predicate(): */
602 	ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred;
603 
604 	ctx->cf = NULL;
605 }
606 
607 static void
pop_predicate(struct fd2_compile_context * ctx)608 pop_predicate(struct fd2_compile_context *ctx)
609 {
610 	/* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
611 	 * themselves:
612 	 */
613 	ctx->cf = NULL;
614 
615 	/* restore previous predicate state: */
616 	ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth];
617 
618 	if (ctx->pred_depth != 0) {
619 		struct ir2_instruction *alu;
620 		struct tgsi_dst_register pred_dst;
621 		struct tgsi_src_register pred_src;
622 
623 		get_predicate(ctx, &pred_dst, &pred_src);
624 
625 		alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs);
626 		add_regs_dummy_vector(alu);
627 		add_dst_reg(ctx, alu, &pred_dst);
628 		add_src_reg(ctx, alu, &pred_src);
629 		alu->pred = IR2_PRED_NONE;
630 	} else {
631 		/* predicate register no longer needed: */
632 		ctx->pred_reg = -1;
633 	}
634 
635 	ctx->cf = NULL;
636 }
637 
638 static void
get_immediate(struct fd2_compile_context * ctx,struct tgsi_src_register * reg,uint32_t val)639 get_immediate(struct fd2_compile_context *ctx,
640 		struct tgsi_src_register *reg, uint32_t val)
641 {
642 	unsigned neg, swiz, idx, i;
643 	/* actually maps 1:1 currently.. not sure if that is safe to rely on: */
644 	static const unsigned swiz2tgsi[] = {
645 			TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
646 	};
647 
648 	for (i = 0; i < ctx->immediate_idx; i++) {
649 		swiz = i % 4;
650 		idx  = i / 4;
651 
652 		if (ctx->so->immediates[idx].val[swiz] == val) {
653 			neg = 0;
654 			break;
655 		}
656 
657 		if (ctx->so->immediates[idx].val[swiz] == -val) {
658 			neg = 1;
659 			break;
660 		}
661 	}
662 
663 	if (i == ctx->immediate_idx) {
664 		/* need to generate a new immediate: */
665 		swiz = i % 4;
666 		idx  = i / 4;
667 		neg  = 0;
668 		ctx->so->immediates[idx].val[swiz] = val;
669 		ctx->so->num_immediates = idx + 1;
670 		ctx->immediate_idx++;
671 	}
672 
673 	reg->File      = TGSI_FILE_IMMEDIATE;
674 	reg->Indirect  = 0;
675 	reg->Dimension = 0;
676 	reg->Index     = idx;
677 	reg->Absolute  = 0;
678 	reg->Negate    = neg;
679 	reg->SwizzleX  = swiz2tgsi[swiz];
680 	reg->SwizzleY  = swiz2tgsi[swiz];
681 	reg->SwizzleZ  = swiz2tgsi[swiz];
682 	reg->SwizzleW  = swiz2tgsi[swiz];
683 }
684 
685 /* POW(a,b) = EXP2(b * LOG2(a)) */
686 static void
translate_pow(struct fd2_compile_context * ctx,struct tgsi_full_instruction * inst)687 translate_pow(struct fd2_compile_context *ctx,
688 		struct tgsi_full_instruction *inst)
689 {
690 	struct tgsi_dst_register tmp_dst;
691 	struct tgsi_src_register tmp_src;
692 	struct ir2_instruction *alu;
693 
694 	get_internal_temp(ctx, &tmp_dst, &tmp_src);
695 
696 	alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP);
697 	add_regs_dummy_vector(alu);
698 	add_dst_reg(ctx, alu, &tmp_dst);
699 	add_src_reg(ctx, alu, &inst->Src[0].Register);
700 
701 	alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
702 	add_dst_reg(ctx, alu, &tmp_dst);
703 	add_src_reg(ctx, alu, &tmp_src);
704 	add_src_reg(ctx, alu, &inst->Src[1].Register);
705 
706 	/* NOTE: some of the instructions, like EXP_IEEE, seem hard-
707 	 * coded to take their input from the w component.
708 	 */
709 	switch(inst->Dst[0].Register.WriteMask) {
710 	case TGSI_WRITEMASK_X:
711 		tmp_src.SwizzleW = TGSI_SWIZZLE_X;
712 		break;
713 	case TGSI_WRITEMASK_Y:
714 		tmp_src.SwizzleW = TGSI_SWIZZLE_Y;
715 		break;
716 	case TGSI_WRITEMASK_Z:
717 		tmp_src.SwizzleW = TGSI_SWIZZLE_Z;
718 		break;
719 	case TGSI_WRITEMASK_W:
720 		tmp_src.SwizzleW = TGSI_SWIZZLE_W;
721 		break;
722 	default:
723 		DBG("invalid writemask!");
724 		assert(0);
725 		break;
726 	}
727 
728 	alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE);
729 	add_regs_dummy_vector(alu);
730 	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
731 	add_src_reg(ctx, alu, &tmp_src);
732 	add_scalar_clamp(inst, alu);
733 }
734 
735 static void
translate_tex(struct fd2_compile_context * ctx,struct tgsi_full_instruction * inst,unsigned opc)736 translate_tex(struct fd2_compile_context *ctx,
737 		struct tgsi_full_instruction *inst, unsigned opc)
738 {
739 	struct ir2_instruction *instr;
740 	struct ir2_register *reg;
741 	struct tgsi_dst_register tmp_dst;
742 	struct tgsi_src_register tmp_src;
743 	const struct tgsi_src_register *coord;
744 	bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) ||
745 			inst->Instruction.Saturate;
746 	int idx;
747 
748 	if (using_temp || (opc == TGSI_OPCODE_TXP))
749 		get_internal_temp(ctx, &tmp_dst, &tmp_src);
750 
751 	if (opc == TGSI_OPCODE_TXP) {
752 		static const char *swiz[] = {
753 				[TGSI_SWIZZLE_X] = "xxxx",
754 				[TGSI_SWIZZLE_Y] = "yyyy",
755 				[TGSI_SWIZZLE_Z] = "zzzz",
756 				[TGSI_SWIZZLE_W] = "wwww",
757 		};
758 
759 		/* TXP - Projective Texture Lookup:
760 		 *
761 		 *  coord.x = src0.x / src.w
762 		 *  coord.y = src0.y / src.w
763 		 *  coord.z = src0.z / src.w
764 		 *  coord.w = src0.w
765 		 *  bias = 0.0
766 		 *
767 		 *  dst = texture_sample(unit, coord, bias)
768 		 */
769 		instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE);
770 
771 		/* MAXv: */
772 		add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w";
773 		add_src_reg(ctx, instr, &inst->Src[0].Register);
774 		add_src_reg(ctx, instr, &inst->Src[0].Register);
775 
776 		/* RECIP_IEEE: */
777 		add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___";
778 		add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle =
779 				swiz[inst->Src[0].Register.SwizzleW];
780 
781 		instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
782 		add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_";
783 		add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx";
784 		add_src_reg(ctx, instr, &inst->Src[0].Register);
785 
786 		coord = &tmp_src;
787 	} else {
788 		coord = &inst->Src[0].Register;
789 	}
790 
791 	instr = ir2_instr_create(next_exec_cf(ctx), IR2_FETCH);
792 	instr->fetch.opc = TEX_FETCH;
793 	instr->fetch.is_cube = (inst->Texture.Texture == TGSI_TEXTURE_3D);
794 	assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases?
795 
796 	/* save off the tex fetch to be patched later with correct const_idx: */
797 	idx = ctx->so->num_tfetch_instrs++;
798 	ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index;
799 	ctx->so->tfetch_instrs[idx].instr = instr;
800 
801 	add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register);
802 	reg = add_src_reg(ctx, instr, coord);
803 
804 	/* blob compiler always sets 3rd component to same as 1st for 2d: */
805 	if (inst->Texture.Texture == TGSI_TEXTURE_2D)
806 		reg->swizzle[2] = reg->swizzle[0];
807 
808 	/* dst register needs to be marked for sync: */
809 	ctx->need_sync |= 1 << instr->regs[0]->num;
810 
811 	/* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
812 	instr->sync = true;
813 
814 	if (using_temp) {
815 		/* texture fetch can't write directly to export, so if tgsi
816 		 * is telling us the dst register is in output file, we load
817 		 * the texture to a temp and the use ALU instruction to move
818 		 * to output
819 		 */
820 		instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, ~0);
821 
822 		add_dst_reg(ctx, instr, &inst->Dst[0].Register);
823 		add_src_reg(ctx, instr, &tmp_src);
824 		add_src_reg(ctx, instr, &tmp_src);
825 		add_vector_clamp(inst, instr);
826 	}
827 }
828 
829 /* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
830 /* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
831 static void
translate_sge_slt(struct fd2_compile_context * ctx,struct tgsi_full_instruction * inst,unsigned opc)832 translate_sge_slt(struct fd2_compile_context *ctx,
833 		struct tgsi_full_instruction *inst, unsigned opc)
834 {
835 	struct ir2_instruction *instr;
836 	struct tgsi_dst_register tmp_dst;
837 	struct tgsi_src_register tmp_src;
838 	struct tgsi_src_register tmp_const;
839 	float c0, c1;
840 
841 	switch (opc) {
842 	default:
843 		assert(0);
844 	case TGSI_OPCODE_SGE:
845 		c0 = 1.0;
846 		c1 = 0.0;
847 		break;
848 	case TGSI_OPCODE_SLT:
849 		c0 = 0.0;
850 		c1 = 1.0;
851 		break;
852 	}
853 
854 	get_internal_temp(ctx, &tmp_dst, &tmp_src);
855 
856 	instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
857 	add_dst_reg(ctx, instr, &tmp_dst);
858 	add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE;
859 	add_src_reg(ctx, instr, &inst->Src[1].Register);
860 
861 	instr = ir2_instr_create_alu(next_exec_cf(ctx), CNDGTEv, ~0);
862 	add_dst_reg(ctx, instr, &inst->Dst[0].Register);
863 	/* maybe should re-arrange the syntax some day, but
864 	 * in assembler/disassembler and what ir.c expects
865 	 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
866 	 */
867 	get_immediate(ctx, &tmp_const, fui(c0));
868 	add_src_reg(ctx, instr, &tmp_const);
869 	add_src_reg(ctx, instr, &tmp_src);
870 	get_immediate(ctx, &tmp_const, fui(c1));
871 	add_src_reg(ctx, instr, &tmp_const);
872 }
873 
874 /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
875 static void
translate_lrp(struct fd2_compile_context * ctx,struct tgsi_full_instruction * inst,unsigned opc)876 translate_lrp(struct fd2_compile_context *ctx,
877 		struct tgsi_full_instruction *inst,
878 		unsigned opc)
879 {
880 	struct ir2_instruction *instr;
881 	struct tgsi_dst_register tmp_dst1, tmp_dst2;
882 	struct tgsi_src_register tmp_src1, tmp_src2;
883 	struct tgsi_src_register tmp_const;
884 
885 	get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
886 	get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
887 
888 	get_immediate(ctx, &tmp_const, fui(1.0));
889 
890 	/* tmp1 = (a * b) */
891 	instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
892 	add_dst_reg(ctx, instr, &tmp_dst1);
893 	add_src_reg(ctx, instr, &inst->Src[0].Register);
894 	add_src_reg(ctx, instr, &inst->Src[1].Register);
895 
896 	/* tmp2 = (1 - a) */
897 	instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
898 	add_dst_reg(ctx, instr, &tmp_dst2);
899 	add_src_reg(ctx, instr, &tmp_const);
900 	add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE;
901 
902 	/* tmp2 = tmp2 * c */
903 	instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
904 	add_dst_reg(ctx, instr, &tmp_dst2);
905 	add_src_reg(ctx, instr, &tmp_src2);
906 	add_src_reg(ctx, instr, &inst->Src[2].Register);
907 
908 	/* dst = tmp1 + tmp2 */
909 	instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
910 	add_dst_reg(ctx, instr, &inst->Dst[0].Register);
911 	add_src_reg(ctx, instr, &tmp_src1);
912 	add_src_reg(ctx, instr, &tmp_src2);
913 }
914 
915 static void
translate_trig(struct fd2_compile_context * ctx,struct tgsi_full_instruction * inst,unsigned opc)916 translate_trig(struct fd2_compile_context *ctx,
917 		struct tgsi_full_instruction *inst,
918 		unsigned opc)
919 {
920 	struct ir2_instruction *instr;
921 	struct tgsi_dst_register tmp_dst;
922 	struct tgsi_src_register tmp_src;
923 	struct tgsi_src_register tmp_const;
924 	instr_scalar_opc_t op;
925 
926 	switch (opc) {
927 	default:
928 		assert(0);
929 	case TGSI_OPCODE_SIN:
930 		op = SIN;
931 		break;
932 	case TGSI_OPCODE_COS:
933 		op = COS;
934 		break;
935 	}
936 
937 	get_internal_temp(ctx, &tmp_dst, &tmp_src);
938 
939 	tmp_dst.WriteMask = TGSI_WRITEMASK_X;
940 	tmp_src.SwizzleX = tmp_src.SwizzleY =
941 			tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X;
942 
943 	/* maybe should re-arrange the syntax some day, but
944 	 * in assembler/disassembler and what ir.c expects
945 	 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
946 	 */
947 	instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0);
948 	add_dst_reg(ctx, instr, &tmp_dst);
949 	get_immediate(ctx, &tmp_const, fui(0.5));
950 	add_src_reg(ctx, instr, &tmp_const);
951 	add_src_reg(ctx, instr, &inst->Src[0].Register);
952 	get_immediate(ctx, &tmp_const, fui(0.159155));
953 	add_src_reg(ctx, instr, &tmp_const);
954 
955 	instr = ir2_instr_create_alu(next_exec_cf(ctx), FRACv, ~0);
956 	add_dst_reg(ctx, instr, &tmp_dst);
957 	add_src_reg(ctx, instr, &tmp_src);
958 	add_src_reg(ctx, instr, &tmp_src);
959 
960 	instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0);
961 	add_dst_reg(ctx, instr, &tmp_dst);
962 	get_immediate(ctx, &tmp_const, fui(-3.141593));
963 	add_src_reg(ctx, instr, &tmp_const);
964 	add_src_reg(ctx, instr, &tmp_src);
965 	get_immediate(ctx, &tmp_const, fui(6.283185));
966 	add_src_reg(ctx, instr, &tmp_const);
967 
968 	instr = ir2_instr_create_alu(next_exec_cf(ctx), ~0, op);
969 	add_regs_dummy_vector(instr);
970 	add_dst_reg(ctx, instr, &inst->Dst[0].Register);
971 	add_src_reg(ctx, instr, &tmp_src);
972 }
973 
974 /*
975  * Main part of compiler/translator:
976  */
977 
978 static void
translate_instruction(struct fd2_compile_context * ctx,struct tgsi_full_instruction * inst)979 translate_instruction(struct fd2_compile_context *ctx,
980 		struct tgsi_full_instruction *inst)
981 {
982 	unsigned opc = inst->Instruction.Opcode;
983 	struct ir2_instruction *instr;
984 	static struct ir2_cf *cf;
985 
986 	if (opc == TGSI_OPCODE_END)
987 		return;
988 
989 	if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
990 		unsigned num = inst->Dst[0].Register.Index;
991 		/* seems like we need to ensure that position vs param/pixel
992 		 * exports don't end up in the same EXEC clause..  easy way
993 		 * to do this is force a new EXEC clause on first appearance
994 		 * of an position or param/pixel export.
995 		 */
996 		if ((num == ctx->position) || (num == ctx->psize)) {
997 			if (ctx->num_position > 0) {
998 				ctx->cf = NULL;
999 				ir2_cf_create_alloc(ctx->so->ir, SQ_POSITION,
1000 						ctx->num_position - 1);
1001 				ctx->num_position = 0;
1002 			}
1003 		} else {
1004 			if (ctx->num_param > 0) {
1005 				ctx->cf = NULL;
1006 				ir2_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL,
1007 						ctx->num_param - 1);
1008 				ctx->num_param = 0;
1009 			}
1010 		}
1011 	}
1012 
1013 	cf = next_exec_cf(ctx);
1014 
1015 	/* TODO turn this into a table: */
1016 	switch (opc) {
1017 	case TGSI_OPCODE_MOV:
1018 		instr = ir2_instr_create_alu(cf, MAXv, ~0);
1019 		add_regs_vector_1(ctx, inst, instr);
1020 		break;
1021 	case TGSI_OPCODE_RCP:
1022 		instr = ir2_instr_create_alu(cf, ~0, RECIP_IEEE);
1023 		add_regs_scalar_1(ctx, inst, instr);
1024 		break;
1025 	case TGSI_OPCODE_RSQ:
1026 		instr = ir2_instr_create_alu(cf, ~0, RECIPSQ_IEEE);
1027 		add_regs_scalar_1(ctx, inst, instr);
1028 		break;
1029 	case TGSI_OPCODE_SQRT:
1030 		instr = ir2_instr_create_alu(cf, ~0, SQRT_IEEE);
1031 		add_regs_scalar_1(ctx, inst, instr);
1032 		break;
1033 	case TGSI_OPCODE_MUL:
1034 		instr = ir2_instr_create_alu(cf, MULv, ~0);
1035 		add_regs_vector_2(ctx, inst, instr);
1036 		break;
1037 	case TGSI_OPCODE_ADD:
1038 		instr = ir2_instr_create_alu(cf, ADDv, ~0);
1039 		add_regs_vector_2(ctx, inst, instr);
1040 		break;
1041 	case TGSI_OPCODE_DP3:
1042 		instr = ir2_instr_create_alu(cf, DOT3v, ~0);
1043 		add_regs_vector_2(ctx, inst, instr);
1044 		break;
1045 	case TGSI_OPCODE_DP4:
1046 		instr = ir2_instr_create_alu(cf, DOT4v, ~0);
1047 		add_regs_vector_2(ctx, inst, instr);
1048 		break;
1049 	case TGSI_OPCODE_MIN:
1050 		instr = ir2_instr_create_alu(cf, MINv, ~0);
1051 		add_regs_vector_2(ctx, inst, instr);
1052 		break;
1053 	case TGSI_OPCODE_MAX:
1054 		instr = ir2_instr_create_alu(cf, MAXv, ~0);
1055 		add_regs_vector_2(ctx, inst, instr);
1056 		break;
1057 	case TGSI_OPCODE_SLT:
1058 	case TGSI_OPCODE_SGE:
1059 		translate_sge_slt(ctx, inst, opc);
1060 		break;
1061 	case TGSI_OPCODE_MAD:
1062 		instr = ir2_instr_create_alu(cf, MULADDv, ~0);
1063 		add_regs_vector_3(ctx, inst, instr);
1064 		break;
1065 	case TGSI_OPCODE_LRP:
1066 		translate_lrp(ctx, inst, opc);
1067 		break;
1068 	case TGSI_OPCODE_FRC:
1069 		instr = ir2_instr_create_alu(cf, FRACv, ~0);
1070 		add_regs_vector_1(ctx, inst, instr);
1071 		break;
1072 	case TGSI_OPCODE_FLR:
1073 		instr = ir2_instr_create_alu(cf, FLOORv, ~0);
1074 		add_regs_vector_1(ctx, inst, instr);
1075 		break;
1076 	case TGSI_OPCODE_EX2:
1077 		instr = ir2_instr_create_alu(cf, ~0, EXP_IEEE);
1078 		add_regs_scalar_1(ctx, inst, instr);
1079 		break;
1080 	case TGSI_OPCODE_POW:
1081 		translate_pow(ctx, inst);
1082 		break;
1083 	case TGSI_OPCODE_COS:
1084 	case TGSI_OPCODE_SIN:
1085 		translate_trig(ctx, inst, opc);
1086 		break;
1087 	case TGSI_OPCODE_TEX:
1088 	case TGSI_OPCODE_TXP:
1089 		translate_tex(ctx, inst, opc);
1090 		break;
1091 	case TGSI_OPCODE_CMP:
1092 		instr = ir2_instr_create_alu(cf, CNDGTEv, ~0);
1093 		add_regs_vector_3(ctx, inst, instr);
1094 		// TODO this should be src0 if regs where in sane order..
1095 		instr->regs[2]->flags ^= IR2_REG_NEGATE; /* src1 */
1096 		break;
1097 	case TGSI_OPCODE_IF:
1098 		push_predicate(ctx, &inst->Src[0].Register);
1099 		ctx->so->ir->pred = IR2_PRED_EQ;
1100 		break;
1101 	case TGSI_OPCODE_ELSE:
1102 		ctx->so->ir->pred = IR2_PRED_NE;
1103 		/* not sure if this is required in all cases, but blob compiler
1104 		 * won't combine EQ and NE in same CF:
1105 		 */
1106 		ctx->cf = NULL;
1107 		break;
1108 	case TGSI_OPCODE_ENDIF:
1109 		pop_predicate(ctx);
1110 		break;
1111 	case TGSI_OPCODE_F2I:
1112 		instr = ir2_instr_create_alu(cf, TRUNCv, ~0);
1113 		add_regs_vector_1(ctx, inst, instr);
1114 		break;
1115 	default:
1116 		DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc));
1117 		tgsi_dump(ctx->so->tokens, 0);
1118 		assert(0);
1119 		break;
1120 	}
1121 
1122 	/* internal temporaries are only valid for the duration of a single
1123 	 * TGSI instruction:
1124 	 */
1125 	ctx->num_internal_temps = 0;
1126 }
1127 
1128 static void
compile_instructions(struct fd2_compile_context * ctx)1129 compile_instructions(struct fd2_compile_context *ctx)
1130 {
1131 	while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
1132 		tgsi_parse_token(&ctx->parser);
1133 
1134 		switch (ctx->parser.FullToken.Token.Type) {
1135 		case TGSI_TOKEN_TYPE_INSTRUCTION:
1136 			translate_instruction(ctx,
1137 					&ctx->parser.FullToken.FullInstruction);
1138 			break;
1139 		default:
1140 			break;
1141 		}
1142 	}
1143 
1144 	ctx->cf->cf_type = EXEC_END;
1145 }
1146 
1147 int
fd2_compile_shader(struct fd_program_stateobj * prog,struct fd2_shader_stateobj * so)1148 fd2_compile_shader(struct fd_program_stateobj *prog,
1149 		struct fd2_shader_stateobj *so)
1150 {
1151 	struct fd2_compile_context ctx;
1152 
1153 	ir2_shader_destroy(so->ir);
1154 	so->ir = ir2_shader_create();
1155 	so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0;
1156 
1157 	if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK)
1158 		return -1;
1159 
1160 	if (ctx.type == PIPE_SHADER_VERTEX) {
1161 		compile_vtx_fetch(&ctx);
1162 	} else if (ctx.type == PIPE_SHADER_FRAGMENT) {
1163 		prog->num_exports = 0;
1164 		memset(prog->export_linkage, 0xff,
1165 				sizeof(prog->export_linkage));
1166 	}
1167 
1168 	compile_instructions(&ctx);
1169 
1170 	compile_free(&ctx);
1171 
1172 	return 0;
1173 }
1174 
1175