Lines Matching refs:p

133 static struct x86_reg get_const( struct translate_sse *p, unsigned id)  in get_const()  argument
138 if(p->const_to_reg[id] >= 0) in get_const()
139 return x86_make_reg(file_XMM, p->const_to_reg[id]); in get_const()
143 if(p->reg_to_const[i] < 0) in get_const()
153 if(p->reg_to_const[i] >= 0) in get_const()
154 p->const_to_reg[p->reg_to_const[i]] = -1; in get_const()
156 p->reg_to_const[i] = id; in get_const()
157 p->const_to_reg[id] = i; in get_const()
160 sse_movaps(p->func, reg, in get_const()
161 x86_make_disp(p->machine_EDI, in get_const()
162 get_offset(p, &p->consts[id][0]))); in get_const()
168 static boolean emit_load_sse2( struct translate_sse *p, in emit_load_sse2() argument
174 struct x86_reg tmp = p->tmp_EAX; in emit_load_sse2()
178 x86_movzx8(p->func, tmp, src); in emit_load_sse2()
179 sse2_movd(p->func, data, tmp); in emit_load_sse2()
182 x86_movzx16(p->func, tmp, src); in emit_load_sse2()
183 sse2_movd(p->func, data, tmp); in emit_load_sse2()
186 x86_movzx8(p->func, tmp, x86_make_disp(src, 2)); in emit_load_sse2()
187 x86_shl_imm(p->func, tmp, 16); in emit_load_sse2()
188 x86_mov16(p->func, tmp, src); in emit_load_sse2()
189 sse2_movd(p->func, data, tmp); in emit_load_sse2()
192 sse2_movd(p->func, data, src); in emit_load_sse2()
195 sse2_movd(p->func, data, src); in emit_load_sse2()
196 x86_movzx16(p->func, tmp, x86_make_disp(src, 4)); in emit_load_sse2()
197 sse2_movd(p->func, tmpXMM, tmp); in emit_load_sse2()
198 sse2_punpckldq(p->func, data, tmpXMM); in emit_load_sse2()
201 sse2_movq(p->func, data, src); in emit_load_sse2()
204 sse2_movq(p->func, data, src); in emit_load_sse2()
205 sse2_movd(p->func, tmpXMM, x86_make_disp(src, 8)); in emit_load_sse2()
206 sse2_punpcklqdq(p->func, data, tmpXMM); in emit_load_sse2()
209 sse2_movdqu(p->func, data, src); in emit_load_sse2()
227 static void emit_load_float32( struct translate_sse *p, in emit_load_float32() argument
239 sse_movss(p->func, data, arg0); in emit_load_float32()
241 sse_orps(p->func, data, get_const(p, CONST_IDENTITY) ); in emit_load_float32()
248 sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) ); in emit_load_float32()
250 sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) ); in emit_load_float32()
251 sse_movlps(p->func, data, arg0); in emit_load_float32()
261 sse_movss(p->func, data, x86_make_disp(arg0, 8)); in emit_load_float32()
263 sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X,Y,Z,W) ); in emit_load_float32()
264 sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) ); in emit_load_float32()
265 sse_movlps(p->func, data, arg0); in emit_load_float32()
268 sse_movups(p->func, data, arg0); in emit_load_float32()
276 static void emit_load_float64to32( struct translate_sse *p, in emit_load_float64to32() argument
286 sse2_movsd(p->func, data, arg0); in emit_load_float64to32()
288 sse2_cvtpd2ps(p->func, data, data); in emit_load_float64to32()
290 sse2_cvtsd2ss(p->func, data, data); in emit_load_float64to32()
292 sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) ); in emit_load_float64to32()
295 sse2_movupd(p->func, data, arg0); in emit_load_float64to32()
296 sse2_cvtpd2ps(p->func, data, data); in emit_load_float64to32()
298 sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) ); in emit_load_float64to32()
300 sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) ); in emit_load_float64to32()
303 sse2_movupd(p->func, data, arg0); in emit_load_float64to32()
304 sse2_cvtpd2ps(p->func, data, data); in emit_load_float64to32()
305 sse2_movsd(p->func, tmpXMM, x86_make_disp(arg0, 16)); in emit_load_float64to32()
307 sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM); in emit_load_float64to32()
309 sse2_cvtsd2ss(p->func, tmpXMM, tmpXMM); in emit_load_float64to32()
310 sse_movlhps(p->func, data, tmpXMM); in emit_load_float64to32()
312 sse_orps(p->func, data, get_const(p, CONST_IDENTITY) ); in emit_load_float64to32()
315 sse2_movupd(p->func, data, arg0); in emit_load_float64to32()
316 sse2_cvtpd2ps(p->func, data, data); in emit_load_float64to32()
317 sse2_movupd(p->func, tmpXMM, x86_make_disp(arg0, 16)); in emit_load_float64to32()
318 sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM); in emit_load_float64to32()
319 sse_movlhps(p->func, data, tmpXMM); in emit_load_float64to32()
324 static void emit_mov64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, str… in emit_mov64() argument
326 if(x86_target(p->func) != X86_32) in emit_mov64()
327 x64_mov64(p->func, dst_gpr, src_gpr); in emit_mov64()
331 if(x86_target_caps(p->func) & X86_SSE2) in emit_mov64()
332 sse2_movq(p->func, dst_xmm, src_xmm); in emit_mov64()
334 sse_movlps(p->func, dst_xmm, src_xmm); in emit_mov64()
338 static void emit_load64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, st… in emit_load64() argument
340 emit_mov64(p, dst_gpr, dst_xmm, src, src); in emit_load64()
343 static void emit_store64(struct translate_sse *p, struct x86_reg dst, struct x86_reg src_gpr, struc… in emit_store64() argument
345 emit_mov64(p, dst, dst, src_gpr, src_xmm); in emit_store64()
348 static void emit_mov128(struct translate_sse *p, struct x86_reg dst, struct x86_reg src) in emit_mov128() argument
350 if(x86_target_caps(p->func) & X86_SSE2) in emit_mov128()
351 sse2_movdqu(p->func, dst, src); in emit_mov128()
353 sse_movups(p->func, dst, src); in emit_mov128()
360 static void emit_memcpy(struct translate_sse *p, struct x86_reg dst, struct x86_reg src, unsigned s… in emit_memcpy() argument
364 struct x86_reg dataGPR = p->tmp_EAX; in emit_memcpy()
365 struct x86_reg dataGPR2 = p->tmp2_EDX; in emit_memcpy()
372 x86_mov8(p->func, dataGPR, src); in emit_memcpy()
373 x86_mov8(p->func, dst, dataGPR); in emit_memcpy()
376 x86_mov16(p->func, dataGPR, src); in emit_memcpy()
377 x86_mov16(p->func, dst, dataGPR); in emit_memcpy()
380 x86_mov16(p->func, dataGPR, src); in emit_memcpy()
381 x86_mov8(p->func, dataGPR2, x86_make_disp(src, 2)); in emit_memcpy()
382 x86_mov16(p->func, dst, dataGPR); in emit_memcpy()
383 x86_mov8(p->func, x86_make_disp(dst, 2), dataGPR2); in emit_memcpy()
386 x86_mov(p->func, dataGPR, src); in emit_memcpy()
387 x86_mov(p->func, dst, dataGPR); in emit_memcpy()
390 x86_mov(p->func, dataGPR, src); in emit_memcpy()
391 x86_mov16(p->func, dataGPR2, x86_make_disp(src, 4)); in emit_memcpy()
392 x86_mov(p->func, dst, dataGPR); in emit_memcpy()
393 x86_mov16(p->func, x86_make_disp(dst, 4), dataGPR2); in emit_memcpy()
397 else if(!(x86_target_caps(p->func) & X86_SSE)) in emit_memcpy()
403 x86_mov(p->func, dataGPR, x86_make_disp(src, i)); in emit_memcpy()
404 x86_mov(p->func, x86_make_disp(dst, i), dataGPR); in emit_memcpy()
412 emit_load64(p, dataGPR, dataXMM, src); in emit_memcpy()
413 emit_store64(p, dst, dataGPR, dataXMM); in emit_memcpy()
416 emit_load64(p, dataGPR2, dataXMM, src); in emit_memcpy()
417 x86_mov(p->func, dataGPR, x86_make_disp(src, 8)); in emit_memcpy()
418 emit_store64(p, dst, dataGPR2, dataXMM); in emit_memcpy()
419 x86_mov(p->func, x86_make_disp(dst, 8), dataGPR); in emit_memcpy()
422 emit_mov128(p, dataXMM, src); in emit_memcpy()
423 emit_mov128(p, dst, dataXMM); in emit_memcpy()
426 emit_mov128(p, dataXMM, src); in emit_memcpy()
427 emit_load64(p, dataGPR, dataXMM2, x86_make_disp(src, 16)); in emit_memcpy()
428 emit_mov128(p, dst, dataXMM); in emit_memcpy()
429 emit_store64(p, x86_make_disp(dst, 16), dataGPR, dataXMM2); in emit_memcpy()
432 emit_mov128(p, dataXMM, src); in emit_memcpy()
433 emit_mov128(p, dataXMM2, x86_make_disp(src, 16)); in emit_memcpy()
434 emit_mov128(p, dst, dataXMM); in emit_memcpy()
435 emit_mov128(p, x86_make_disp(dst, 16), dataXMM2); in emit_memcpy()
443 static boolean translate_attr_convert( struct translate_sse *p, in translate_attr_convert() argument
484 if((x86_target_caps(p->func) & X86_SSE) && (0 in translate_attr_convert()
511 if(!(x86_target_caps(p->func) & X86_SSE2)) in translate_attr_convert()
513 … emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3); in translate_attr_convert()
520 sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); in translate_attr_convert()
521 sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); in translate_attr_convert()
524 sse2_punpcklwd(p->func, dataXMM, get_const(p, CONST_IDENTITY)); in translate_attr_convert()
527 sse2_psrld_imm(p->func, dataXMM, 1); in translate_attr_convert()
532 sse2_cvtdq2ps(p->func, dataXMM, dataXMM); in translate_attr_convert()
539 factor = get_const(p, CONST_INV_255); in translate_attr_convert()
542 factor = get_const(p, CONST_INV_65535); in translate_attr_convert()
545 factor = get_const(p, CONST_INV_2147483647); in translate_attr_convert()
555 sse_mulps(p->func, dataXMM, factor); in translate_attr_convert()
558 …sse_addps(p->func, dataXMM, dataXMM); /* compensate for the bit we threw away to fit u32 into s32 … in translate_attr_convert()
561 if(!(x86_target_caps(p->func) & X86_SSE2)) in translate_attr_convert()
563 … emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3); in translate_attr_convert()
569 sse2_punpcklbw(p->func, dataXMM, dataXMM); in translate_attr_convert()
570 sse2_punpcklbw(p->func, dataXMM, dataXMM); in translate_attr_convert()
571 sse2_psrad_imm(p->func, dataXMM, 24); in translate_attr_convert()
574 sse2_punpcklwd(p->func, dataXMM, dataXMM); in translate_attr_convert()
575 sse2_psrad_imm(p->func, dataXMM, 16); in translate_attr_convert()
582 sse2_cvtdq2ps(p->func, dataXMM, dataXMM); in translate_attr_convert()
589 factor = get_const(p, CONST_INV_127); in translate_attr_convert()
592 factor = get_const(p, CONST_INV_32767); in translate_attr_convert()
595 factor = get_const(p, CONST_INV_2147483647); in translate_attr_convert()
605 sse_mulps(p->func, dataXMM, factor); in translate_attr_convert()
621 emit_load_float32(p, dataXMM, src, needed_chans, input_desc->nr_channels); in translate_attr_convert()
624 if(!(x86_target_caps(p->func) & X86_SSE2)) in translate_attr_convert()
626 emit_load_float64to32(p, dataXMM, src, needed_chans, input_desc->nr_channels); in translate_attr_convert()
637 … sse_shufps(p->func, dataXMM, dataXMM, SHUF(swizzle[0], swizzle[1], swizzle[2], swizzle[3]) ); in translate_attr_convert()
646 sse_movups(p->func, dst, dataXMM); in translate_attr_convert()
652 sse_movlps(p->func, dst, dataXMM); in translate_attr_convert()
656 sse_movss(p->func, dst, dataXMM); in translate_attr_convert()
658 x86_mov_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]); in translate_attr_convert()
664 sse_shufps(p->func, dataXMM, dataXMM, SHUF(1, 1, 2, 3)); in translate_attr_convert()
665 sse_movss(p->func, x86_make_disp(dst, 4), dataXMM); in translate_attr_convert()
668 … x86_mov_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]); in translate_attr_convert()
677 sse_movhps(p->func, x86_make_disp(dst, 8), dataXMM); in translate_attr_convert()
682 sse_shufps(p->func, dataXMM, dataXMM, SHUF(2, 2, 2, 3)); in translate_attr_convert()
683 sse_movss(p->func, x86_make_disp(dst, 8), dataXMM); in translate_attr_convert()
686 … x86_mov_imm(p->func, x86_make_disp(dst, 8), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]); in translate_attr_convert()
692 sse_shufps(p->func, dataXMM, dataXMM, SHUF(3, 3, 3, 3)); in translate_attr_convert()
693 sse_movss(p->func, x86_make_disp(dst, 12), dataXMM); in translate_attr_convert()
696 … x86_mov_imm(p->func, x86_make_disp(dst, 12), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]); in translate_attr_convert()
703 …else if((x86_target_caps(p->func) & X86_SSE2) && input_desc->channel[0].size == 8 && output_desc->… in translate_attr_convert()
713 struct x86_reg tmp = p->tmp_EAX; in translate_attr_convert()
732 … emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3); in translate_attr_convert()
739 sse2_punpcklbw(p->func, dataXMM, dataXMM); in translate_attr_convert()
741 sse2_psrlw_imm(p->func, dataXMM, 1); in translate_attr_convert()
744 sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); in translate_attr_convert()
749 sse2_movq(p->func, tmpXMM, get_const(p, CONST_IDENTITY)); in translate_attr_convert()
750 sse2_punpcklbw(p->func, tmpXMM, dataXMM); in translate_attr_convert()
751 sse2_psllw_imm(p->func, dataXMM, 9); in translate_attr_convert()
752 sse2_psrlw_imm(p->func, dataXMM, 8); in translate_attr_convert()
753 sse2_por(p->func, tmpXMM, dataXMM); in translate_attr_convert()
754 sse2_psrlw_imm(p->func, dataXMM, 7); in translate_attr_convert()
755 sse2_por(p->func, tmpXMM, dataXMM); in translate_attr_convert()
764 sse2_punpcklbw(p->func, dataXMM, dataXMM); in translate_attr_convert()
765 sse2_psraw_imm(p->func, dataXMM, 8); in translate_attr_convert()
776 …sse2_pshuflw(p->func, dataXMM, dataXMM, (swizzle[0] & 3) | ((swizzle[1] & 3) << 2) | ((swizzle[2] … in translate_attr_convert()
785 sse2_movq(p->func, dst, dataXMM); in translate_attr_convert()
791 sse2_movd(p->func, dst, dataXMM); in translate_attr_convert()
794 sse2_movd(p->func, tmp, dataXMM); in translate_attr_convert()
795 x86_mov16(p->func, dst, tmp); in translate_attr_convert()
797 … x86_mov16_imm(p->func, x86_make_disp(dst, 2), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]); in translate_attr_convert()
803 …x86_mov_imm(p->func, dst, (imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[0] - UTI… in translate_attr_convert()
806 x86_mov16_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]); in translate_attr_convert()
809 sse2_movd(p->func, tmp, dataXMM); in translate_attr_convert()
810 x86_shr_imm(p->func, tmp, 16); in translate_attr_convert()
811 x86_mov16(p->func, x86_make_disp(dst, 2), tmp); in translate_attr_convert()
822 sse2_psrlq_imm(p->func, dataXMM, 32); in translate_attr_convert()
823 sse2_movd(p->func, x86_make_disp(dst, 4), dataXMM); in translate_attr_convert()
827 sse2_psrlq_imm(p->func, dataXMM, 32); in translate_attr_convert()
828 sse2_movd(p->func, tmp, dataXMM); in translate_attr_convert()
829 x86_mov16(p->func, x86_make_disp(dst, 4), tmp); in translate_attr_convert()
832 … x86_mov16_imm(p->func, x86_make_disp(dst, 6), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]); in translate_attr_convert()
839 …x86_mov_imm(p->func, x86_make_disp(dst, 4), (imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0] << 16) | imm… in translate_attr_convert()
842 … x86_mov16_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]); in translate_attr_convert()
846 sse2_psrlq_imm(p->func, dataXMM, 48); in translate_attr_convert()
847 sse2_movd(p->func, tmp, dataXMM); in translate_attr_convert()
848 x86_mov16(p->func, x86_make_disp(dst, 6), tmp); in translate_attr_convert()
858 struct x86_reg tmp = p->tmp_EAX; in translate_attr_convert()
867 x86_mov(p->func, tmp, src); in translate_attr_convert()
868 x86_bswap(p->func, tmp); in translate_attr_convert()
869 x86_mov(p->func, dst, tmp); in translate_attr_convert()
895 x86_mov8_imm(p->func, x86_make_disp(dst, i * 1), v); in translate_attr_convert()
899 x86_mov8(p->func, tmp, x86_make_disp(src, swizzle[i] * 1)); in translate_attr_convert()
900 x86_mov8(p->func, x86_make_disp(dst, i * 1), tmp); in translate_attr_convert()
924 x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), v); in translate_attr_convert()
927 x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), 0); in translate_attr_convert()
930 x86_mov16(p->func, tmp, x86_make_disp(src, swizzle[i] * 2)); in translate_attr_convert()
931 x86_mov16(p->func, x86_make_disp(dst, i * 2), tmp); in translate_attr_convert()
955 x86_mov_imm(p->func, x86_make_disp(dst, i * 4), v); in translate_attr_convert()
959 x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 4)); in translate_attr_convert()
960 x86_mov(p->func, x86_make_disp(dst, i * 4), tmp); in translate_attr_convert()
988 x86_mov_imm(p->func, x86_make_disp(dst, i * 8), l); in translate_attr_convert()
989 x86_mov_imm(p->func, x86_make_disp(dst, i * 8 + 4), h); in translate_attr_convert()
993 if(x86_target_caps(p->func) & X86_SSE) in translate_attr_convert()
996 emit_load64(p, tmp, tmpXMM, x86_make_disp(src, swizzle[i] * 8)); in translate_attr_convert()
997 emit_store64(p, x86_make_disp(dst, i * 8), tmp, tmpXMM); in translate_attr_convert()
1001 x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8)); in translate_attr_convert()
1002 x86_mov(p->func, x86_make_disp(dst, i * 8), tmp); in translate_attr_convert()
1003 x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8 + 4)); in translate_attr_convert()
1004 x86_mov(p->func, x86_make_disp(dst, i * 8 + 4), tmp); in translate_attr_convert()
1015 else if((x86_target_caps(p->func) & X86_SSE2) && in translate_attr_convert()
1024 sse_movups(p->func, dataXMM, src); in translate_attr_convert()
1027 sse_shufps(p->func, dataXMM, dataXMM, SHUF(2,1,0,3)); in translate_attr_convert()
1030 sse_mulps(p->func, dataXMM, get_const(p, CONST_255)); in translate_attr_convert()
1033 sse2_cvtps2dq(p->func, dataXMM, dataXMM); in translate_attr_convert()
1034 sse2_packssdw(p->func, dataXMM, dataXMM); in translate_attr_convert()
1035 sse2_packuswb(p->func, dataXMM, dataXMM); in translate_attr_convert()
1036 sse2_movd(p->func, dst, dataXMM); in translate_attr_convert()
1044 static boolean translate_attr( struct translate_sse *p, in translate_attr() argument
1051 emit_memcpy(p, dst, src, util_format_get_stride(a->input_format, 1)); in translate_attr()
1055 return translate_attr_convert(p, a, src, dst); in translate_attr()
1058 static boolean init_inputs( struct translate_sse *p, in init_inputs() argument
1062 struct x86_reg instance_id = x86_make_disp(p->machine_EDI, in init_inputs()
1063 get_offset(p, &p->instance_id)); in init_inputs()
1065 for (i = 0; i < p->nr_buffer_variants; i++) { in init_inputs()
1066 struct translate_buffer_variant *variant = &p->buffer_variant[i]; in init_inputs()
1067 struct translate_buffer *buffer = &p->buffer[variant->buffer_index]; in init_inputs()
1070 struct x86_reg buf_max_index = x86_make_disp(p->machine_EDI, in init_inputs()
1071 get_offset(p, &buffer->max_index)); in init_inputs()
1072 struct x86_reg buf_stride = x86_make_disp(p->machine_EDI, in init_inputs()
1073 get_offset(p, &buffer->stride)); in init_inputs()
1074 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI, in init_inputs()
1075 get_offset(p, &variant->ptr)); in init_inputs()
1076 struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDI, in init_inputs()
1077 get_offset(p, &buffer->base_ptr)); in init_inputs()
1078 struct x86_reg elt = p->idx_ESI; in init_inputs()
1079 struct x86_reg tmp_EAX = p->tmp_EAX; in init_inputs()
1087 x86_mov(p->func, tmp_EAX, instance_id); in init_inputs()
1090 struct x86_reg tmp_EDX = p->tmp2_EDX; in init_inputs()
1091 struct x86_reg tmp_ECX = p->src_ECX; in init_inputs()
1097 x86_xor(p->func, tmp_EDX, tmp_EDX); in init_inputs()
1098 x86_mov_reg_imm(p->func, tmp_ECX, variant->instance_divisor); in init_inputs()
1099 x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ in init_inputs()
1107 x86_mov(p->func, tmp_EAX, elt); in init_inputs()
1111 x86_cmp(p->func, tmp_EAX, buf_max_index); in init_inputs()
1112 x86_cmovcc(p->func, tmp_EAX, buf_max_index, cc_AE); in init_inputs()
1115 x86_imul(p->func, tmp_EAX, buf_stride); in init_inputs()
1116 x64_rexw(p->func); in init_inputs()
1117 x86_add(p->func, tmp_EAX, buf_base_ptr); in init_inputs()
1119 x86_cmp(p->func, p->count_EBP, p->tmp_EAX); in init_inputs()
1124 if (!index_size && p->nr_buffer_variants == 1) in init_inputs()
1126 x64_rexw(p->func); in init_inputs()
1127 x86_mov(p->func, elt, tmp_EAX); in init_inputs()
1131 x64_rexw(p->func); in init_inputs()
1132 x86_mov(p->func, buf_ptr, tmp_EAX); in init_inputs()
1141 static struct x86_reg get_buffer_ptr( struct translate_sse *p, in get_buffer_ptr() argument
1147 return x86_make_disp(p->machine_EDI, in get_buffer_ptr()
1148 get_offset(p, &p->instance_id)); in get_buffer_ptr()
1150 if (!index_size && p->nr_buffer_variants == 1) { in get_buffer_ptr()
1151 return p->idx_ESI; in get_buffer_ptr()
1153 else if (!index_size || p->buffer_variant[var_idx].instance_divisor) { in get_buffer_ptr()
1154 struct x86_reg ptr = p->src_ECX; in get_buffer_ptr()
1156 x86_make_disp(p->machine_EDI, in get_buffer_ptr()
1157 get_offset(p, &p->buffer_variant[var_idx].ptr)); in get_buffer_ptr()
1159 x64_rexw(p->func); in get_buffer_ptr()
1160 x86_mov(p->func, ptr, buf_ptr); in get_buffer_ptr()
1164 struct x86_reg ptr = p->src_ECX; in get_buffer_ptr()
1165 const struct translate_buffer_variant *variant = &p->buffer_variant[var_idx]; in get_buffer_ptr()
1168 x86_make_disp(p->machine_EDI, in get_buffer_ptr()
1169 get_offset(p, &p->buffer[variant->buffer_index].stride)); in get_buffer_ptr()
1172 x86_make_disp(p->machine_EDI, in get_buffer_ptr()
1173 get_offset(p, &p->buffer[variant->buffer_index].base_ptr)); in get_buffer_ptr()
1176 x86_make_disp(p->machine_EDI, in get_buffer_ptr()
1177 get_offset(p, &p->buffer[variant->buffer_index].max_index)); in get_buffer_ptr()
1186 x86_movzx8(p->func, ptr, elt); in get_buffer_ptr()
1189 x86_movzx16(p->func, ptr, elt); in get_buffer_ptr()
1192 x86_mov(p->func, ptr, elt); in get_buffer_ptr()
1198 x86_cmp(p->func, ptr, buf_max_index); in get_buffer_ptr()
1199 x86_cmovcc(p->func, ptr, buf_max_index, cc_AE); in get_buffer_ptr()
1201 x86_imul(p->func, ptr, buf_stride); in get_buffer_ptr()
1202 x64_rexw(p->func); in get_buffer_ptr()
1203 x86_add(p->func, ptr, buf_base_ptr); in get_buffer_ptr()
1210 static boolean incr_inputs( struct translate_sse *p, in incr_inputs() argument
1213 if (!index_size && p->nr_buffer_variants == 1) { in incr_inputs()
1214 struct x86_reg stride = x86_make_disp(p->machine_EDI, in incr_inputs()
1215 get_offset(p, &p->buffer[0].stride)); in incr_inputs()
1217 if (p->buffer_variant[0].instance_divisor == 0) { in incr_inputs()
1218 x64_rexw(p->func); in incr_inputs()
1219 x86_add(p->func, p->idx_ESI, stride); in incr_inputs()
1220 sse_prefetchnta(p->func, x86_make_disp(p->idx_ESI, 192)); in incr_inputs()
1228 for (i = 0; i < p->nr_buffer_variants; i++) { in incr_inputs()
1229 struct translate_buffer_variant *variant = &p->buffer_variant[i]; in incr_inputs()
1230 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI, in incr_inputs()
1231 get_offset(p, &variant->ptr)); in incr_inputs()
1232 struct x86_reg buf_stride = x86_make_disp(p->machine_EDI, in incr_inputs()
1233 … get_offset(p, &p->buffer[variant->buffer_index].stride)); in incr_inputs()
1236 x86_mov(p->func, p->tmp_EAX, buf_stride); in incr_inputs()
1237 x64_rexw(p->func); in incr_inputs()
1238 x86_add(p->func, p->tmp_EAX, buf_ptr); in incr_inputs()
1239 if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); in incr_inputs()
1240 x64_rexw(p->func); in incr_inputs()
1241 x86_mov(p->func, buf_ptr, p->tmp_EAX); in incr_inputs()
1246 x64_rexw(p->func); in incr_inputs()
1247 x86_lea(p->func, p->idx_ESI, x86_make_disp(p->idx_ESI, index_size)); in incr_inputs()
1270 static boolean build_vertex_emit( struct translate_sse *p, in build_vertex_emit() argument
1277 memset(p->reg_to_const, 0xff, sizeof(p->reg_to_const)); in build_vertex_emit()
1278 memset(p->const_to_reg, 0xff, sizeof(p->const_to_reg)); in build_vertex_emit()
1280 p->tmp_EAX = x86_make_reg(file_REG32, reg_AX); in build_vertex_emit()
1281 p->idx_ESI = x86_make_reg(file_REG32, reg_SI); in build_vertex_emit()
1282 p->outbuf_EBX = x86_make_reg(file_REG32, reg_BX); in build_vertex_emit()
1283 p->machine_EDI = x86_make_reg(file_REG32, reg_DI); in build_vertex_emit()
1284 p->count_EBP = x86_make_reg(file_REG32, reg_BP); in build_vertex_emit()
1285 p->tmp2_EDX = x86_make_reg(file_REG32, reg_DX); in build_vertex_emit()
1286 p->src_ECX = x86_make_reg(file_REG32, reg_CX); in build_vertex_emit()
1288 p->func = func; in build_vertex_emit()
1290 x86_init_func(p->func); in build_vertex_emit()
1292 if(x86_target(p->func) == X86_64_WIN64_ABI) in build_vertex_emit()
1295 …sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8), x86_make_reg(file_XMM, 6)… in build_vertex_emit()
1296 …sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24), x86_make_reg(file_XMM, 7… in build_vertex_emit()
1299 x86_push(p->func, p->outbuf_EBX); in build_vertex_emit()
1300 x86_push(p->func, p->count_EBP); in build_vertex_emit()
1303 if(x86_target(p->func) != X86_64_STD_ABI) in build_vertex_emit()
1305 x86_push(p->func, p->machine_EDI); in build_vertex_emit()
1306 x86_push(p->func, p->idx_ESI); in build_vertex_emit()
1308 x86_mov(p->func, p->machine_EDI, x86_fn_arg(p->func, 1)); in build_vertex_emit()
1309 x86_mov(p->func, p->idx_ESI, x86_fn_arg(p->func, 2)); in build_vertex_emit()
1312 x86_mov(p->func, p->count_EBP, x86_fn_arg(p->func, 3)); in build_vertex_emit()
1314 if(x86_target(p->func) != X86_32) in build_vertex_emit()
1315 x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5)); in build_vertex_emit()
1317 x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5)); in build_vertex_emit()
1321 if (p->use_instancing) { in build_vertex_emit()
1322 x86_mov(p->func, in build_vertex_emit()
1323 p->tmp_EAX, in build_vertex_emit()
1324 x86_fn_arg(p->func, 4)); in build_vertex_emit()
1325 x86_mov(p->func, in build_vertex_emit()
1326 x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)), in build_vertex_emit()
1327 p->tmp_EAX); in build_vertex_emit()
1332 x86_xor(p->func, p->tmp_EAX, p->tmp_EAX); in build_vertex_emit()
1333 x86_cmp(p->func, p->count_EBP, p->tmp_EAX); in build_vertex_emit()
1334 fixup = x86_jcc_forward(p->func, cc_E); in build_vertex_emit()
1338 init_inputs(p, index_size); in build_vertex_emit()
1342 label = x86_get_label(p->func); in build_vertex_emit()
1344 struct x86_reg elt = !index_size ? p->idx_ESI : x86_deref(p->idx_ESI); in build_vertex_emit()
1348 for (j = 0; j < p->translate.key.nr_elements; j++) { in build_vertex_emit()
1349 const struct translate_element *a = &p->translate.key.element[j]; in build_vertex_emit()
1350 unsigned variant = p->element_to_buffer_variant[j]; in build_vertex_emit()
1356 vb = get_buffer_ptr(p, index_size, variant, elt); in build_vertex_emit()
1359 if (!translate_attr( p, a, in build_vertex_emit()
1361 x86_make_disp(p->outbuf_EBX, a->output_offset))) in build_vertex_emit()
1367 x64_rexw(p->func); in build_vertex_emit()
1368 x86_lea(p->func, in build_vertex_emit()
1369 p->outbuf_EBX, in build_vertex_emit()
1370 x86_make_disp(p->outbuf_EBX, in build_vertex_emit()
1371 p->translate.key.output_stride)); in build_vertex_emit()
1375 incr_inputs( p, index_size ); in build_vertex_emit()
1380 x86_dec(p->func, p->count_EBP); in build_vertex_emit()
1381 x86_jcc(p->func, cc_NZ, label); in build_vertex_emit()
1385 if (p->func->need_emms) in build_vertex_emit()
1386 mmx_emms(p->func); in build_vertex_emit()
1390 x86_fixup_fwd_jump(p->func, fixup); in build_vertex_emit()
1395 if(x86_target(p->func) != X86_64_STD_ABI) in build_vertex_emit()
1397 x86_pop(p->func, p->idx_ESI); in build_vertex_emit()
1398 x86_pop(p->func, p->machine_EDI); in build_vertex_emit()
1401 x86_pop(p->func, p->count_EBP); in build_vertex_emit()
1402 x86_pop(p->func, p->outbuf_EBX); in build_vertex_emit()
1404 if(x86_target(p->func) == X86_64_WIN64_ABI) in build_vertex_emit()
1406 …sse2_movdqa(p->func, x86_make_reg(file_XMM, 6), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8)… in build_vertex_emit()
1407 …sse2_movdqa(p->func, x86_make_reg(file_XMM, 7), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24… in build_vertex_emit()
1409 x86_ret(p->func); in build_vertex_emit()
1426 struct translate_sse *p = (struct translate_sse *)translate; in translate_sse_set_buffer() local
1428 if (buf < p->nr_buffers) { in translate_sse_set_buffer()
1429 p->buffer[buf].base_ptr = (char *)ptr; in translate_sse_set_buffer()
1430 p->buffer[buf].stride = stride; in translate_sse_set_buffer()
1431 p->buffer[buf].max_index = max_index; in translate_sse_set_buffer()
1436 p->nr_buffers, in translate_sse_set_buffer()
1443 struct translate_sse *p = (struct translate_sse *)translate; in translate_sse_release() local
1445 x86_release_func( &p->elt8_func ); in translate_sse_release()
1446 x86_release_func( &p->elt16_func ); in translate_sse_release()
1447 x86_release_func( &p->elt_func ); in translate_sse_release()
1448 x86_release_func( &p->linear_func ); in translate_sse_release()
1450 os_free_aligned(p); in translate_sse_release()
1456 struct translate_sse *p = NULL; in translate_sse2_create() local
1463 p = os_malloc_aligned(sizeof(struct translate_sse), 16); in translate_sse2_create()
1464 if (p == NULL) in translate_sse2_create()
1466 memset(p, 0, sizeof(*p)); in translate_sse2_create()
1467 memcpy(p->consts, consts, sizeof(consts)); in translate_sse2_create()
1469 p->translate.key = *key; in translate_sse2_create()
1470 p->translate.release = translate_sse_release; in translate_sse2_create()
1471 p->translate.set_buffer = translate_sse_set_buffer; in translate_sse2_create()
1477 p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); in translate_sse2_create()
1480 p->use_instancing = TRUE; in translate_sse2_create()
1486 for (j = 0; j < p->nr_buffer_variants; j++) { in translate_sse2_create()
1487 if (p->buffer_variant[j].buffer_index == key->element[i].input_buffer && in translate_sse2_create()
1488 p->buffer_variant[j].instance_divisor == key->element[i].instance_divisor) { in translate_sse2_create()
1492 if (j == p->nr_buffer_variants) { in translate_sse2_create()
1493 p->buffer_variant[j].buffer_index = key->element[i].input_buffer; in translate_sse2_create()
1494 p->buffer_variant[j].instance_divisor = key->element[i].instance_divisor; in translate_sse2_create()
1495 p->nr_buffer_variants++; in translate_sse2_create()
1497 p->element_to_buffer_variant[i] = j; in translate_sse2_create()
1501 p->element_to_buffer_variant[i] = ELEMENT_BUFFER_INSTANCE_ID; in translate_sse2_create()
1505 if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); in translate_sse2_create()
1507 if (!build_vertex_emit(p, &p->linear_func, 0)) in translate_sse2_create()
1510 if (!build_vertex_emit(p, &p->elt_func, 4)) in translate_sse2_create()
1513 if (!build_vertex_emit(p, &p->elt16_func, 2)) in translate_sse2_create()
1516 if (!build_vertex_emit(p, &p->elt8_func, 1)) in translate_sse2_create()
1519 p->translate.run = (run_func) x86_get_func(&p->linear_func); in translate_sse2_create()
1520 if (p->translate.run == NULL) in translate_sse2_create()
1523 p->translate.run_elts = (run_elts_func) x86_get_func(&p->elt_func); in translate_sse2_create()
1524 if (p->translate.run_elts == NULL) in translate_sse2_create()
1527 p->translate.run_elts16 = (run_elts16_func) x86_get_func(&p->elt16_func); in translate_sse2_create()
1528 if (p->translate.run_elts16 == NULL) in translate_sse2_create()
1531 p->translate.run_elts8 = (run_elts8_func) x86_get_func(&p->elt8_func); in translate_sse2_create()
1532 if (p->translate.run_elts8 == NULL) in translate_sse2_create()
1535 return &p->translate; in translate_sse2_create()
1538 if (p) in translate_sse2_create()
1539 translate_sse_release( &p->translate ); in translate_sse2_create()