1 /*
2  * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "ir-a2xx.h"
25 
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <assert.h>
30 
31 #include "freedreno_util.h"
32 #include "instr-a2xx.h"
33 
34 #define DEBUG_MSG(f, ...)  do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
35 #define WARN_MSG(f, ...)   DBG("WARN:  "f, ##__VA_ARGS__)
36 #define ERROR_MSG(f, ...)  DBG("ERROR: "f, ##__VA_ARGS__)
37 
38 #define REG_MASK 0x3f
39 
40 static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr);
41 
42 static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
43 		uint32_t idx, struct ir2_shader_info *info);
44 
45 static void reg_update_stats(struct ir2_register *reg,
46 		struct ir2_shader_info *info, bool dest);
47 static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n);
48 static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg);
49 static uint32_t reg_alu_dst_swiz(struct ir2_register *reg);
50 static uint32_t reg_alu_src_swiz(struct ir2_register *reg);
51 
52 /* simple allocator to carve allocations out of an up-front allocated heap,
53  * so that we can free everything easily in one shot.
54  */
ir2_alloc(struct ir2_shader * shader,int sz)55 static void * ir2_alloc(struct ir2_shader *shader, int sz)
56 {
57 	void *ptr = &shader->heap[shader->heap_idx];
58 	shader->heap_idx += align(sz, 4);
59 	return ptr;
60 }
61 
ir2_strdup(struct ir2_shader * shader,const char * str)62 static char * ir2_strdup(struct ir2_shader *shader, const char *str)
63 {
64 	char *ptr = NULL;
65 	if (str) {
66 		int len = strlen(str);
67 		ptr = ir2_alloc(shader, len+1);
68 		memcpy(ptr, str, len);
69 		ptr[len] = '\0';
70 	}
71 	return ptr;
72 }
73 
ir2_shader_create(void)74 struct ir2_shader * ir2_shader_create(void)
75 {
76 	DEBUG_MSG("");
77 	return calloc(1, sizeof(struct ir2_shader));
78 }
79 
ir2_shader_destroy(struct ir2_shader * shader)80 void ir2_shader_destroy(struct ir2_shader *shader)
81 {
82 	DEBUG_MSG("");
83 	free(shader);
84 }
85 
86 /* resolve addr/cnt/sequence fields in the individual CF's */
shader_resolve(struct ir2_shader * shader,struct ir2_shader_info * info)87 static int shader_resolve(struct ir2_shader *shader, struct ir2_shader_info *info)
88 {
89 	uint32_t addr;
90 	unsigned i;
91 	int j;
92 
93 	addr = shader->cfs_count / 2;
94 	for (i = 0; i < shader->cfs_count; i++) {
95 		struct ir2_cf *cf = shader->cfs[i];
96 		if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
97 			uint32_t sequence = 0;
98 
99 			if (cf->exec.addr && (cf->exec.addr != addr))
100 				WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i);
101 			if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count))
102 				WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i);
103 
104 			for (j = cf->exec.instrs_count - 1; j >= 0; j--) {
105 				struct ir2_instruction *instr = cf->exec.instrs[j];
106 				sequence <<= 2;
107 				if (instr->instr_type == IR2_FETCH)
108 					sequence |= 0x1;
109 				if (instr->sync)
110 					sequence |= 0x2;
111 			}
112 
113 			cf->exec.addr = addr;
114 			cf->exec.cnt  = cf->exec.instrs_count;
115 			cf->exec.sequence = sequence;
116 
117 			addr += cf->exec.instrs_count;
118 		}
119 	}
120 
121 	info->sizedwords = 3 * addr;
122 
123 	return 0;
124 }
125 
ir2_shader_assemble(struct ir2_shader * shader,struct ir2_shader_info * info)126 void * ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info)
127 {
128 	uint32_t i, j;
129 	uint32_t *ptr, *dwords = NULL;
130 	uint32_t idx = 0;
131 	int ret;
132 
133 	info->sizedwords    = 0;
134 	info->max_reg       = -1;
135 	info->max_input_reg = 0;
136 	info->regs_written  = 0;
137 
138 	/* we need an even # of CF's.. insert a NOP if needed */
139 	if (shader->cfs_count != align(shader->cfs_count, 2))
140 		ir2_cf_create(shader, NOP);
141 
142 	/* first pass, resolve sizes and addresses: */
143 	ret = shader_resolve(shader, info);
144 	if (ret) {
145 		ERROR_MSG("resolve failed: %d", ret);
146 		goto fail;
147 	}
148 
149 	ptr = dwords = calloc(4, info->sizedwords);
150 
151 	/* second pass, emit CF program in pairs: */
152 	for (i = 0; i < shader->cfs_count; i += 2) {
153 		instr_cf_t *cfs = (instr_cf_t *)ptr;
154 		ret = cf_emit(shader->cfs[i], &cfs[0]);
155 		if (ret) {
156 			ERROR_MSG("CF emit failed: %d\n", ret);
157 			goto fail;
158 		}
159 		ret = cf_emit(shader->cfs[i+1], &cfs[1]);
160 		if (ret) {
161 			ERROR_MSG("CF emit failed: %d\n", ret);
162 			goto fail;
163 		}
164 		ptr += 3;
165 		assert((ptr - dwords) <= info->sizedwords);
166 	}
167 
168 	/* third pass, emit ALU/FETCH: */
169 	for (i = 0; i < shader->cfs_count; i++) {
170 		struct ir2_cf *cf = shader->cfs[i];
171 		if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
172 			for (j = 0; j < cf->exec.instrs_count; j++) {
173 				ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info);
174 				if (ret) {
175 					ERROR_MSG("instruction emit failed: %d", ret);
176 					goto fail;
177 				}
178 				ptr += 3;
179 				assert((ptr - dwords) <= info->sizedwords);
180 			}
181 		}
182 	}
183 
184 	return dwords;
185 
186 fail:
187 	free(dwords);
188 	return NULL;
189 }
190 
191 
ir2_cf_create(struct ir2_shader * shader,instr_cf_opc_t cf_type)192 struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type)
193 {
194 	struct ir2_cf *cf = ir2_alloc(shader, sizeof(struct ir2_cf));
195 	DEBUG_MSG("%d", cf_type);
196 	cf->shader = shader;
197 	cf->cf_type = cf_type;
198 	assert(shader->cfs_count < ARRAY_SIZE(shader->cfs));
199 	shader->cfs[shader->cfs_count++] = cf;
200 	return cf;
201 }
202 
203 
204 /*
205  * CF instructions:
206  */
207 
cf_emit(struct ir2_cf * cf,instr_cf_t * instr)208 static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr)
209 {
210 	memset(instr, 0, sizeof(*instr));
211 
212 	instr->opc = cf->cf_type;
213 
214 	switch (cf->cf_type) {
215 	case NOP:
216 		break;
217 	case EXEC:
218 	case EXEC_END:
219 		assert(cf->exec.addr <= 0x1ff);
220 		assert(cf->exec.cnt <= 0x6);
221 		assert(cf->exec.sequence <= 0xfff);
222 		instr->exec.address = cf->exec.addr;
223 		instr->exec.count = cf->exec.cnt;
224 		instr->exec.serialize = cf->exec.sequence;
225 		break;
226 	case ALLOC:
227 		assert(cf->alloc.size <= 0xf);
228 		instr->alloc.size = cf->alloc.size;
229 		switch (cf->alloc.type) {
230 		case SQ_POSITION:
231 		case SQ_PARAMETER_PIXEL:
232 			instr->alloc.buffer_select = cf->alloc.type;
233 			break;
234 		default:
235 			ERROR_MSG("invalid alloc type: %d", cf->alloc.type);
236 			return -1;
237 		}
238 		break;
239 	case COND_EXEC:
240 	case COND_EXEC_END:
241 	case COND_PRED_EXEC:
242 	case COND_PRED_EXEC_END:
243 	case LOOP_START:
244 	case LOOP_END:
245 	case COND_CALL:
246 	case RETURN:
247 	case COND_JMP:
248 	case COND_EXEC_PRED_CLEAN:
249 	case COND_EXEC_PRED_CLEAN_END:
250 	case MARK_VS_FETCH_DONE:
251 		ERROR_MSG("TODO");
252 		return -1;
253 	}
254 
255 	return 0;
256 }
257 
258 
ir2_instr_create(struct ir2_cf * cf,int instr_type)259 struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type)
260 {
261 	struct ir2_instruction *instr =
262 			ir2_alloc(cf->shader, sizeof(struct ir2_instruction));
263 	DEBUG_MSG("%d", instr_type);
264 	instr->shader = cf->shader;
265 	instr->pred = cf->shader->pred;
266 	instr->instr_type = instr_type;
267 	assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs));
268 	cf->exec.instrs[cf->exec.instrs_count++] = instr;
269 	return instr;
270 }
271 
272 
273 /*
274  * FETCH instructions:
275  */
276 
instr_emit_fetch(struct ir2_instruction * instr,uint32_t * dwords,uint32_t idx,struct ir2_shader_info * info)277 static int instr_emit_fetch(struct ir2_instruction *instr,
278 		uint32_t *dwords, uint32_t idx,
279 		struct ir2_shader_info *info)
280 {
281 	instr_fetch_t *fetch = (instr_fetch_t *)dwords;
282 	int reg = 0;
283 	struct ir2_register *dst_reg = instr->regs[reg++];
284 	struct ir2_register *src_reg = instr->regs[reg++];
285 
286 	memset(fetch, 0, sizeof(*fetch));
287 
288 	reg_update_stats(dst_reg, info, true);
289 	reg_update_stats(src_reg, info, false);
290 
291 	fetch->opc = instr->fetch.opc;
292 
293 	if (instr->fetch.opc == VTX_FETCH) {
294 		instr_fetch_vtx_t *vtx = &fetch->vtx;
295 
296 		assert(instr->fetch.stride <= 0xff);
297 		assert(instr->fetch.fmt <= 0x3f);
298 		assert(instr->fetch.const_idx <= 0x1f);
299 		assert(instr->fetch.const_idx_sel <= 0x3);
300 
301 		vtx->src_reg = src_reg->num;
302 		vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1);
303 		vtx->dst_reg = dst_reg->num;
304 		vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg);
305 		vtx->must_be_one = 1;
306 		vtx->const_index = instr->fetch.const_idx;
307 		vtx->const_index_sel = instr->fetch.const_idx_sel;
308 		vtx->format_comp_all = !!instr->fetch.is_signed;
309 		vtx->num_format_all = !instr->fetch.is_normalized;
310 		vtx->format = instr->fetch.fmt;
311 		vtx->stride = instr->fetch.stride;
312 		vtx->offset = instr->fetch.offset;
313 
314 		if (instr->pred != IR2_PRED_NONE) {
315 			vtx->pred_select = 1;
316 			vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
317 		}
318 
319 		/* XXX seems like every FETCH but the first has
320 		 * this bit set:
321 		 */
322 		vtx->reserved3 = (idx > 0) ? 0x1 : 0x0;
323 		vtx->reserved0 = (idx > 0) ? 0x2 : 0x3;
324 	} else if (instr->fetch.opc == TEX_FETCH) {
325 		instr_fetch_tex_t *tex = &fetch->tex;
326 
327 		assert(instr->fetch.const_idx <= 0x1f);
328 
329 		tex->src_reg = src_reg->num;
330 		tex->src_swiz = reg_fetch_src_swiz(src_reg, 3);
331 		tex->dst_reg = dst_reg->num;
332 		tex->dst_swiz = reg_fetch_dst_swiz(dst_reg);
333 		tex->const_idx = instr->fetch.const_idx;
334 		tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
335 		tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
336 		tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
337 		tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
338 		tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
339 		tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
340 		tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
341 		tex->use_comp_lod = 1;
342 		tex->use_reg_lod = !instr->fetch.is_cube;
343 		tex->sample_location = SAMPLE_CENTER;
344 
345 		if (instr->pred != IR2_PRED_NONE) {
346 			tex->pred_select = 1;
347 			tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
348 		}
349 
350 	} else {
351 		ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc);
352 		return -1;
353 	}
354 
355 	return 0;
356 }
357 
358 /*
359  * ALU instructions:
360  */
361 
instr_emit_alu(struct ir2_instruction * instr,uint32_t * dwords,struct ir2_shader_info * info)362 static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords,
363 		struct ir2_shader_info *info)
364 {
365 	int reg = 0;
366 	instr_alu_t *alu = (instr_alu_t *)dwords;
367 	struct ir2_register *dst_reg  = instr->regs[reg++];
368 	struct ir2_register *src1_reg;
369 	struct ir2_register *src2_reg;
370 	struct ir2_register *src3_reg;
371 
372 	memset(alu, 0, sizeof(*alu));
373 
374 	/* handle instructions w/ 3 src operands: */
375 	switch (instr->alu.vector_opc) {
376 	case MULADDv:
377 	case CNDEv:
378 	case CNDGTEv:
379 	case CNDGTv:
380 	case DOT2ADDv:
381 		/* note: disassembler lists 3rd src first, ie:
382 		 *   MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2)
383 		 * which is the reason for this strange ordering.
384 		 */
385 		src3_reg = instr->regs[reg++];
386 		break;
387 	default:
388 		src3_reg = NULL;
389 		break;
390 	}
391 
392 	src1_reg = instr->regs[reg++];
393 	src2_reg = instr->regs[reg++];
394 
395 	reg_update_stats(dst_reg, info, true);
396 	reg_update_stats(src1_reg, info, false);
397 	reg_update_stats(src2_reg, info, false);
398 
399 	assert((dst_reg->flags & ~IR2_REG_EXPORT) == 0);
400 	assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4));
401 	assert((src1_reg->flags & IR2_REG_EXPORT) == 0);
402 	assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4));
403 	assert((src2_reg->flags & IR2_REG_EXPORT) == 0);
404 	assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4));
405 
406 	if (instr->alu.vector_opc == (instr_vector_opc_t)~0) {
407 		alu->vector_opc          = MAXv;
408 		alu->vector_write_mask   = 0;
409 	} else {
410 		alu->vector_opc          = instr->alu.vector_opc;
411 		alu->vector_write_mask   = reg_alu_dst_swiz(dst_reg);
412 	}
413 
414 	alu->vector_dest         = dst_reg->num;
415 	alu->export_data         = !!(dst_reg->flags & IR2_REG_EXPORT);
416 
417 	// TODO predicate case/condition.. need to add to parser
418 
419 	alu->src2_reg            = src2_reg->num;
420 	alu->src2_swiz           = reg_alu_src_swiz(src2_reg);
421 	alu->src2_reg_negate     = !!(src2_reg->flags & IR2_REG_NEGATE);
422 	alu->src2_reg_abs        = !!(src2_reg->flags & IR2_REG_ABS);
423 	alu->src2_sel            = !(src2_reg->flags & IR2_REG_CONST);
424 
425 	alu->src1_reg            = src1_reg->num;
426 	alu->src1_swiz           = reg_alu_src_swiz(src1_reg);
427 	alu->src1_reg_negate     = !!(src1_reg->flags & IR2_REG_NEGATE);
428 	alu->src1_reg_abs        = !!(src1_reg->flags & IR2_REG_ABS);
429 	alu->src1_sel            = !(src1_reg->flags & IR2_REG_CONST);
430 
431 	alu->vector_clamp        = instr->alu.vector_clamp;
432 	alu->scalar_clamp        = instr->alu.scalar_clamp;
433 
434 	if (instr->alu.scalar_opc != (instr_scalar_opc_t)~0) {
435 		struct ir2_register *sdst_reg = instr->regs[reg++];
436 
437 		reg_update_stats(sdst_reg, info, true);
438 
439 		assert(sdst_reg->flags == dst_reg->flags);
440 
441 		if (src3_reg) {
442 			assert(src3_reg == instr->regs[reg]);
443 			reg++;
444 		} else {
445 			src3_reg = instr->regs[reg++];
446 		}
447 
448 		alu->scalar_dest         = sdst_reg->num;
449 		alu->scalar_write_mask   = reg_alu_dst_swiz(sdst_reg);
450 		alu->scalar_opc          = instr->alu.scalar_opc;
451 	} else {
452 		/* not sure if this is required, but adreno compiler seems
453 		 * to always set scalar opc to MAXs if it is not used:
454 		 */
455 		alu->scalar_opc = MAXs;
456 	}
457 
458 	if (src3_reg) {
459 		reg_update_stats(src3_reg, info, false);
460 
461 		alu->src3_reg            = src3_reg->num;
462 		alu->src3_swiz           = reg_alu_src_swiz(src3_reg);
463 		alu->src3_reg_negate     = !!(src3_reg->flags & IR2_REG_NEGATE);
464 		alu->src3_reg_abs        = !!(src3_reg->flags & IR2_REG_ABS);
465 		alu->src3_sel            = !(src3_reg->flags & IR2_REG_CONST);
466 	} else {
467 		/* not sure if this is required, but adreno compiler seems
468 		 * to always set register bank for 3rd src if unused:
469 		 */
470 		alu->src3_sel = 1;
471 	}
472 
473 	if (instr->pred != IR2_PRED_NONE) {
474 		alu->pred_select = (instr->pred == IR2_PRED_EQ) ? 3 : 2;
475 	}
476 
477 	return 0;
478 }
479 
instr_emit(struct ir2_instruction * instr,uint32_t * dwords,uint32_t idx,struct ir2_shader_info * info)480 static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
481 		uint32_t idx, struct ir2_shader_info *info)
482 {
483 	switch (instr->instr_type) {
484 	case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info);
485 	case IR2_ALU:   return instr_emit_alu(instr, dwords, info);
486 	}
487 	return -1;
488 }
489 
490 
ir2_reg_create(struct ir2_instruction * instr,int num,const char * swizzle,int flags)491 struct ir2_register * ir2_reg_create(struct ir2_instruction *instr,
492 		int num, const char *swizzle, int flags)
493 {
494 	struct ir2_register *reg =
495 			ir2_alloc(instr->shader, sizeof(struct ir2_register));
496 	DEBUG_MSG("%x, %d, %s", flags, num, swizzle);
497 	assert(num <= REG_MASK);
498 	reg->flags = flags;
499 	reg->num = num;
500 	reg->swizzle = ir2_strdup(instr->shader, swizzle);
501 	assert(instr->regs_count < ARRAY_SIZE(instr->regs));
502 	instr->regs[instr->regs_count++] = reg;
503 	return reg;
504 }
505 
reg_update_stats(struct ir2_register * reg,struct ir2_shader_info * info,bool dest)506 static void reg_update_stats(struct ir2_register *reg,
507 		struct ir2_shader_info *info, bool dest)
508 {
509 	if (!(reg->flags & (IR2_REG_CONST|IR2_REG_EXPORT))) {
510 		info->max_reg = MAX2(info->max_reg, reg->num);
511 
512 		if (dest) {
513 			info->regs_written |= (1 << reg->num);
514 		} else if (!(info->regs_written & (1 << reg->num))) {
515 			/* for registers that haven't been written, they must be an
516 			 * input register that the thread scheduler (presumably?)
517 			 * needs to know about:
518 			 */
519 			info->max_input_reg = MAX2(info->max_input_reg, reg->num);
520 		}
521 	}
522 }
523 
reg_fetch_src_swiz(struct ir2_register * reg,uint32_t n)524 static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n)
525 {
526 	uint32_t swiz = 0;
527 	int i;
528 
529 	assert(reg->flags == 0);
530 	assert(reg->swizzle);
531 
532 	DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle);
533 
534 	for (i = n-1; i >= 0; i--) {
535 		swiz <<= 2;
536 		switch (reg->swizzle[i]) {
537 		default:
538 			ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle);
539 		case 'x': swiz |= 0x0; break;
540 		case 'y': swiz |= 0x1; break;
541 		case 'z': swiz |= 0x2; break;
542 		case 'w': swiz |= 0x3; break;
543 		}
544 	}
545 
546 	return swiz;
547 }
548 
reg_fetch_dst_swiz(struct ir2_register * reg)549 static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg)
550 {
551 	uint32_t swiz = 0;
552 	int i;
553 
554 	assert(reg->flags == 0);
555 	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
556 
557 	DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle);
558 
559 	if (reg->swizzle) {
560 		for (i = 3; i >= 0; i--) {
561 			swiz <<= 3;
562 			switch (reg->swizzle[i]) {
563 			default:
564 				ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
565 			case 'x': swiz |= 0x0; break;
566 			case 'y': swiz |= 0x1; break;
567 			case 'z': swiz |= 0x2; break;
568 			case 'w': swiz |= 0x3; break;
569 			case '0': swiz |= 0x4; break;
570 			case '1': swiz |= 0x5; break;
571 			case '_': swiz |= 0x7; break;
572 			}
573 		}
574 	} else {
575 		swiz = 0x688;
576 	}
577 
578 	return swiz;
579 }
580 
581 /* actually, a write-mask */
reg_alu_dst_swiz(struct ir2_register * reg)582 static uint32_t reg_alu_dst_swiz(struct ir2_register *reg)
583 {
584 	uint32_t swiz = 0;
585 	int i;
586 
587 	assert((reg->flags & ~IR2_REG_EXPORT) == 0);
588 	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
589 
590 	DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle);
591 
592 	if (reg->swizzle) {
593 		for (i = 3; i >= 0; i--) {
594 			swiz <<= 1;
595 			if (reg->swizzle[i] == "xyzw"[i]) {
596 				swiz |= 0x1;
597 			} else if (reg->swizzle[i] != '_') {
598 				ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
599 				break;
600 			}
601 		}
602 	} else {
603 		swiz = 0xf;
604 	}
605 
606 	return swiz;
607 }
608 
reg_alu_src_swiz(struct ir2_register * reg)609 static uint32_t reg_alu_src_swiz(struct ir2_register *reg)
610 {
611 	uint32_t swiz = 0;
612 	int i;
613 
614 	assert((reg->flags & IR2_REG_EXPORT) == 0);
615 	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
616 
617 	DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle);
618 
619 	if (reg->swizzle) {
620 		for (i = 3; i >= 0; i--) {
621 			swiz <<= 2;
622 			switch (reg->swizzle[i]) {
623 			default:
624 				ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle);
625 			case 'x': swiz |= (0x0 - i) & 0x3; break;
626 			case 'y': swiz |= (0x1 - i) & 0x3; break;
627 			case 'z': swiz |= (0x2 - i) & 0x3; break;
628 			case 'w': swiz |= (0x3 - i) & 0x3; break;
629 			}
630 		}
631 	} else {
632 		swiz = 0x0;
633 	}
634 
635 	return swiz;
636 }
637