Lines Matching refs:p

142 get_const(struct translate_sse *p, unsigned id)  in get_const()  argument
147 if (p->const_to_reg[id] >= 0) in get_const()
148 return x86_make_reg(file_XMM, p->const_to_reg[id]); in get_const()
151 if (p->reg_to_const[i] < 0) in get_const()
161 if (p->reg_to_const[i] >= 0) in get_const()
162 p->const_to_reg[p->reg_to_const[i]] = -1; in get_const()
164 p->reg_to_const[i] = id; in get_const()
165 p->const_to_reg[id] = i; in get_const()
168 sse_movaps(p->func, reg, in get_const()
169 x86_make_disp(p->machine_EDI, in get_const()
170 get_offset(p, &p->consts[id][0]))); in get_const()
178 emit_load_sse2(struct translate_sse *p, in emit_load_sse2() argument
182 struct x86_reg tmp = p->tmp_EAX; in emit_load_sse2()
185 x86_movzx8(p->func, tmp, src); in emit_load_sse2()
186 sse2_movd(p->func, data, tmp); in emit_load_sse2()
189 x86_movzx16(p->func, tmp, src); in emit_load_sse2()
190 sse2_movd(p->func, data, tmp); in emit_load_sse2()
193 x86_movzx8(p->func, tmp, x86_make_disp(src, 2)); in emit_load_sse2()
194 x86_shl_imm(p->func, tmp, 16); in emit_load_sse2()
195 x86_mov16(p->func, tmp, src); in emit_load_sse2()
196 sse2_movd(p->func, data, tmp); in emit_load_sse2()
199 sse2_movd(p->func, data, src); in emit_load_sse2()
202 sse2_movd(p->func, data, src); in emit_load_sse2()
203 x86_movzx16(p->func, tmp, x86_make_disp(src, 4)); in emit_load_sse2()
204 sse2_movd(p->func, tmpXMM, tmp); in emit_load_sse2()
205 sse2_punpckldq(p->func, data, tmpXMM); in emit_load_sse2()
208 sse2_movq(p->func, data, src); in emit_load_sse2()
211 sse2_movq(p->func, data, src); in emit_load_sse2()
212 sse2_movd(p->func, tmpXMM, x86_make_disp(src, 8)); in emit_load_sse2()
213 sse2_punpcklqdq(p->func, data, tmpXMM); in emit_load_sse2()
216 sse2_movdqu(p->func, data, src); in emit_load_sse2()
237 emit_load_float32(struct translate_sse *p, struct x86_reg data, in emit_load_float32() argument
245 sse_movss(p->func, data, arg0); in emit_load_float32()
247 sse_orps(p->func, data, get_const(p, CONST_IDENTITY)); in emit_load_float32()
254 sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), in emit_load_float32()
257 sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY)); in emit_load_float32()
258 sse_movlps(p->func, data, arg0); in emit_load_float32()
268 sse_movss(p->func, data, x86_make_disp(arg0, 8)); in emit_load_float32()
270 sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), in emit_load_float32()
272 sse_shufps(p->func, data, data, SHUF(Y, Z, X, W)); in emit_load_float32()
273 sse_movlps(p->func, data, arg0); in emit_load_float32()
276 sse_movups(p->func, data, arg0); in emit_load_float32()
285 emit_load_float64to32(struct translate_sse *p, struct x86_reg data, in emit_load_float64to32() argument
291 sse2_movsd(p->func, data, arg0); in emit_load_float64to32()
293 sse2_cvtpd2ps(p->func, data, data); in emit_load_float64to32()
295 sse2_cvtsd2ss(p->func, data, data); in emit_load_float64to32()
297 sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), in emit_load_float64to32()
301 sse2_movupd(p->func, data, arg0); in emit_load_float64to32()
302 sse2_cvtpd2ps(p->func, data, data); in emit_load_float64to32()
304 sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), in emit_load_float64to32()
307 sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY)); in emit_load_float64to32()
310 sse2_movupd(p->func, data, arg0); in emit_load_float64to32()
311 sse2_cvtpd2ps(p->func, data, data); in emit_load_float64to32()
312 sse2_movsd(p->func, tmpXMM, x86_make_disp(arg0, 16)); in emit_load_float64to32()
314 sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM); in emit_load_float64to32()
316 sse2_cvtsd2ss(p->func, tmpXMM, tmpXMM); in emit_load_float64to32()
317 sse_movlhps(p->func, data, tmpXMM); in emit_load_float64to32()
319 sse_orps(p->func, data, get_const(p, CONST_IDENTITY)); in emit_load_float64to32()
322 sse2_movupd(p->func, data, arg0); in emit_load_float64to32()
323 sse2_cvtpd2ps(p->func, data, data); in emit_load_float64to32()
324 sse2_movupd(p->func, tmpXMM, x86_make_disp(arg0, 16)); in emit_load_float64to32()
325 sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM); in emit_load_float64to32()
326 sse_movlhps(p->func, data, tmpXMM); in emit_load_float64to32()
333 emit_mov64(struct translate_sse *p, struct x86_reg dst_gpr, in emit_mov64() argument
337 if (x86_target(p->func) != X86_32) in emit_mov64()
338 x64_mov64(p->func, dst_gpr, src_gpr); in emit_mov64()
341 if (x86_target_caps(p->func) & X86_SSE2) in emit_mov64()
342 sse2_movq(p->func, dst_xmm, src_xmm); in emit_mov64()
344 sse_movlps(p->func, dst_xmm, src_xmm); in emit_mov64()
350 emit_load64(struct translate_sse *p, struct x86_reg dst_gpr, in emit_load64() argument
353 emit_mov64(p, dst_gpr, dst_xmm, src, src); in emit_load64()
358 emit_store64(struct translate_sse *p, struct x86_reg dst, in emit_store64() argument
361 emit_mov64(p, dst, dst, src_gpr, src_xmm); in emit_store64()
366 emit_mov128(struct translate_sse *p, struct x86_reg dst, struct x86_reg src) in emit_mov128() argument
368 if (x86_target_caps(p->func) & X86_SSE2) in emit_mov128()
369 sse2_movdqu(p->func, dst, src); in emit_mov128()
371 sse_movups(p->func, dst, src); in emit_mov128()
380 emit_memcpy(struct translate_sse *p, struct x86_reg dst, struct x86_reg src, in emit_memcpy() argument
385 struct x86_reg dataGPR = p->tmp_EAX; in emit_memcpy()
386 struct x86_reg dataGPR2 = p->tmp2_EDX; in emit_memcpy()
391 x86_mov8(p->func, dataGPR, src); in emit_memcpy()
392 x86_mov8(p->func, dst, dataGPR); in emit_memcpy()
395 x86_mov16(p->func, dataGPR, src); in emit_memcpy()
396 x86_mov16(p->func, dst, dataGPR); in emit_memcpy()
399 x86_mov16(p->func, dataGPR, src); in emit_memcpy()
400 x86_mov8(p->func, dataGPR2, x86_make_disp(src, 2)); in emit_memcpy()
401 x86_mov16(p->func, dst, dataGPR); in emit_memcpy()
402 x86_mov8(p->func, x86_make_disp(dst, 2), dataGPR2); in emit_memcpy()
405 x86_mov(p->func, dataGPR, src); in emit_memcpy()
406 x86_mov(p->func, dst, dataGPR); in emit_memcpy()
409 x86_mov(p->func, dataGPR, src); in emit_memcpy()
410 x86_mov16(p->func, dataGPR2, x86_make_disp(src, 4)); in emit_memcpy()
411 x86_mov(p->func, dst, dataGPR); in emit_memcpy()
412 x86_mov16(p->func, x86_make_disp(dst, 4), dataGPR2); in emit_memcpy()
416 else if (!(x86_target_caps(p->func) & X86_SSE)) { in emit_memcpy()
420 x86_mov(p->func, dataGPR, x86_make_disp(src, i)); in emit_memcpy()
421 x86_mov(p->func, x86_make_disp(dst, i), dataGPR); in emit_memcpy()
427 emit_load64(p, dataGPR, dataXMM, src); in emit_memcpy()
428 emit_store64(p, dst, dataGPR, dataXMM); in emit_memcpy()
431 emit_load64(p, dataGPR2, dataXMM, src); in emit_memcpy()
432 x86_mov(p->func, dataGPR, x86_make_disp(src, 8)); in emit_memcpy()
433 emit_store64(p, dst, dataGPR2, dataXMM); in emit_memcpy()
434 x86_mov(p->func, x86_make_disp(dst, 8), dataGPR); in emit_memcpy()
437 emit_mov128(p, dataXMM, src); in emit_memcpy()
438 emit_mov128(p, dst, dataXMM); in emit_memcpy()
441 emit_mov128(p, dataXMM, src); in emit_memcpy()
442 emit_load64(p, dataGPR, dataXMM2, x86_make_disp(src, 16)); in emit_memcpy()
443 emit_mov128(p, dst, dataXMM); in emit_memcpy()
444 emit_store64(p, x86_make_disp(dst, 16), dataGPR, dataXMM2); in emit_memcpy()
447 emit_mov128(p, dataXMM, src); in emit_memcpy()
448 emit_mov128(p, dataXMM2, x86_make_disp(src, 16)); in emit_memcpy()
449 emit_mov128(p, dst, dataXMM); in emit_memcpy()
450 emit_mov128(p, x86_make_disp(dst, 16), dataXMM2); in emit_memcpy()
459 translate_attr_convert(struct translate_sse *p, in translate_attr_convert() argument
505 if ((x86_target_caps(p->func) & X86_SSE) && in translate_attr_convert()
528 if (!(x86_target_caps(p->func) & X86_SSE2)) in translate_attr_convert()
530 emit_load_sse2(p, dataXMM, src, in translate_attr_convert()
540 sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); in translate_attr_convert()
541 sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); in translate_attr_convert()
544 sse2_punpcklwd(p->func, dataXMM, get_const(p, CONST_IDENTITY)); in translate_attr_convert()
547 sse2_psrld_imm(p->func, dataXMM, 1); in translate_attr_convert()
552 sse2_cvtdq2ps(p->func, dataXMM, dataXMM); in translate_attr_convert()
557 factor = get_const(p, CONST_INV_255); in translate_attr_convert()
560 factor = get_const(p, CONST_INV_65535); in translate_attr_convert()
563 factor = get_const(p, CONST_INV_2147483647); in translate_attr_convert()
573 sse_mulps(p->func, dataXMM, factor); in translate_attr_convert()
577 sse_addps(p->func, dataXMM, dataXMM); in translate_attr_convert()
580 if (!(x86_target_caps(p->func) & X86_SSE2)) in translate_attr_convert()
582 emit_load_sse2(p, dataXMM, src, in translate_attr_convert()
589 sse2_punpcklbw(p->func, dataXMM, dataXMM); in translate_attr_convert()
590 sse2_punpcklbw(p->func, dataXMM, dataXMM); in translate_attr_convert()
591 sse2_psrad_imm(p->func, dataXMM, 24); in translate_attr_convert()
594 sse2_punpcklwd(p->func, dataXMM, dataXMM); in translate_attr_convert()
595 sse2_psrad_imm(p->func, dataXMM, 16); in translate_attr_convert()
602 sse2_cvtdq2ps(p->func, dataXMM, dataXMM); in translate_attr_convert()
607 factor = get_const(p, CONST_INV_127); in translate_attr_convert()
610 factor = get_const(p, CONST_INV_32767); in translate_attr_convert()
613 factor = get_const(p, CONST_INV_2147483647); in translate_attr_convert()
623 sse_mulps(p->func, dataXMM, factor); in translate_attr_convert()
640 emit_load_float32(p, dataXMM, src, needed_chans, in translate_attr_convert()
644 if (!(x86_target_caps(p->func) & X86_SSE2)) in translate_attr_convert()
646 emit_load_float64to32(p, dataXMM, src, needed_chans, in translate_attr_convert()
658 sse_shufps(p->func, dataXMM, dataXMM, in translate_attr_convert()
668 sse_movups(p->func, dst, dataXMM); in translate_attr_convert()
674 sse_movlps(p->func, dst, dataXMM); in translate_attr_convert()
678 sse_movss(p->func, dst, dataXMM); in translate_attr_convert()
681 x86_mov_imm(p->func, dst, in translate_attr_convert()
687 sse_shufps(p->func, dataXMM, dataXMM, SHUF(1, 1, 2, 3)); in translate_attr_convert()
688 sse_movss(p->func, x86_make_disp(dst, 4), dataXMM); in translate_attr_convert()
691 x86_mov_imm(p->func, x86_make_disp(dst, 4), in translate_attr_convert()
701 sse_movhps(p->func, x86_make_disp(dst, 8), dataXMM); in translate_attr_convert()
705 sse_shufps(p->func, dataXMM, dataXMM, SHUF(2, 2, 2, 3)); in translate_attr_convert()
706 sse_movss(p->func, x86_make_disp(dst, 8), dataXMM); in translate_attr_convert()
709 x86_mov_imm(p->func, x86_make_disp(dst, 8), in translate_attr_convert()
715 sse_shufps(p->func, dataXMM, dataXMM, SHUF(3, 3, 3, 3)); in translate_attr_convert()
716 sse_movss(p->func, x86_make_disp(dst, 12), dataXMM); in translate_attr_convert()
719 x86_mov_imm(p->func, x86_make_disp(dst, 12), in translate_attr_convert()
728 else if ((x86_target_caps(p->func) & X86_SSE2) in translate_attr_convert()
741 struct x86_reg tmp = p->tmp_EAX; in translate_attr_convert()
759 emit_load_sse2(p, dataXMM, src, in translate_attr_convert()
766 sse2_punpcklbw(p->func, dataXMM, dataXMM); in translate_attr_convert()
768 sse2_psrlw_imm(p->func, dataXMM, 1); in translate_attr_convert()
771 sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); in translate_attr_convert()
775 sse2_movq(p->func, tmpXMM, get_const(p, CONST_IDENTITY)); in translate_attr_convert()
776 sse2_punpcklbw(p->func, tmpXMM, dataXMM); in translate_attr_convert()
777 sse2_psllw_imm(p->func, dataXMM, 9); in translate_attr_convert()
778 sse2_psrlw_imm(p->func, dataXMM, 8); in translate_attr_convert()
779 sse2_por(p->func, tmpXMM, dataXMM); in translate_attr_convert()
780 sse2_psrlw_imm(p->func, dataXMM, 7); in translate_attr_convert()
781 sse2_por(p->func, tmpXMM, dataXMM); in translate_attr_convert()
789 sse2_punpcklbw(p->func, dataXMM, dataXMM); in translate_attr_convert()
790 sse2_psraw_imm(p->func, dataXMM, 8); in translate_attr_convert()
803 sse2_pshuflw(p->func, dataXMM, dataXMM, in translate_attr_convert()
813 sse2_movq(p->func, dst, dataXMM); in translate_attr_convert()
819 sse2_movd(p->func, dst, dataXMM); in translate_attr_convert()
822 sse2_movd(p->func, tmp, dataXMM); in translate_attr_convert()
823 x86_mov16(p->func, dst, tmp); in translate_attr_convert()
825 x86_mov16_imm(p->func, x86_make_disp(dst, 2), in translate_attr_convert()
832 x86_mov_imm(p->func, dst, in translate_attr_convert()
837 x86_mov16_imm(p->func, dst, in translate_attr_convert()
840 sse2_movd(p->func, tmp, dataXMM); in translate_attr_convert()
841 x86_shr_imm(p->func, tmp, 16); in translate_attr_convert()
842 x86_mov16(p->func, x86_make_disp(dst, 2), tmp); in translate_attr_convert()
851 sse2_psrlq_imm(p->func, dataXMM, 32); in translate_attr_convert()
852 sse2_movd(p->func, x86_make_disp(dst, 4), dataXMM); in translate_attr_convert()
855 sse2_psrlq_imm(p->func, dataXMM, 32); in translate_attr_convert()
856 sse2_movd(p->func, tmp, dataXMM); in translate_attr_convert()
857 x86_mov16(p->func, x86_make_disp(dst, 4), tmp); in translate_attr_convert()
859 x86_mov16_imm(p->func, x86_make_disp(dst, 6), in translate_attr_convert()
867 x86_mov_imm(p->func, x86_make_disp(dst, 4), in translate_attr_convert()
872 x86_mov16_imm(p->func, x86_make_disp(dst, 4), in translate_attr_convert()
876 sse2_psrlq_imm(p->func, dataXMM, 48); in translate_attr_convert()
877 sse2_movd(p->func, tmp, dataXMM); in translate_attr_convert()
878 x86_mov16(p->func, x86_make_disp(dst, 6), tmp); in translate_attr_convert()
888 struct x86_reg tmp = p->tmp_EAX; in translate_attr_convert()
898 x86_mov(p->func, tmp, src); in translate_attr_convert()
899 x86_bswap(p->func, tmp); in translate_attr_convert()
900 x86_mov(p->func, dst, tmp); in translate_attr_convert()
921 x86_mov8_imm(p->func, x86_make_disp(dst, i * 1), v); in translate_attr_convert()
924 x86_mov8(p->func, tmp, x86_make_disp(src, swizzle[i] * 1)); in translate_attr_convert()
925 x86_mov8(p->func, x86_make_disp(dst, i * 1), tmp); in translate_attr_convert()
946 x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), v); in translate_attr_convert()
949 x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), 0); in translate_attr_convert()
952 x86_mov16(p->func, tmp, x86_make_disp(src, swizzle[i] * 2)); in translate_attr_convert()
953 x86_mov16(p->func, x86_make_disp(dst, i * 2), tmp); in translate_attr_convert()
974 x86_mov_imm(p->func, x86_make_disp(dst, i * 4), v); in translate_attr_convert()
977 x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 4)); in translate_attr_convert()
978 x86_mov(p->func, x86_make_disp(dst, i * 4), tmp); in translate_attr_convert()
1003 x86_mov_imm(p->func, x86_make_disp(dst, i * 8), l); in translate_attr_convert()
1004 x86_mov_imm(p->func, x86_make_disp(dst, i * 8 + 4), h); in translate_attr_convert()
1007 if (x86_target_caps(p->func) & X86_SSE) { in translate_attr_convert()
1009 emit_load64(p, tmp, tmpXMM, in translate_attr_convert()
1011 emit_store64(p, x86_make_disp(dst, i * 8), tmp, tmpXMM); in translate_attr_convert()
1014 x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8)); in translate_attr_convert()
1015 x86_mov(p->func, x86_make_disp(dst, i * 8), tmp); in translate_attr_convert()
1016 x86_mov(p->func, tmp, in translate_attr_convert()
1018 x86_mov(p->func, x86_make_disp(dst, i * 8 + 4), tmp); in translate_attr_convert()
1029 else if ((x86_target_caps(p->func) & X86_SSE2) && in translate_attr_convert()
1036 sse_movups(p->func, dataXMM, src); in translate_attr_convert()
1039 sse_shufps(p->func, dataXMM, dataXMM, SHUF(2, 1, 0, 3)); in translate_attr_convert()
1043 sse_mulps(p->func, dataXMM, get_const(p, CONST_255)); in translate_attr_convert()
1046 sse2_cvtps2dq(p->func, dataXMM, dataXMM); in translate_attr_convert()
1047 sse2_packssdw(p->func, dataXMM, dataXMM); in translate_attr_convert()
1048 sse2_packuswb(p->func, dataXMM, dataXMM); in translate_attr_convert()
1049 sse2_movd(p->func, dst, dataXMM); in translate_attr_convert()
1059 translate_attr(struct translate_sse *p, in translate_attr() argument
1064 emit_memcpy(p, dst, src, util_format_get_stride(a->input_format, 1)); in translate_attr()
1068 return translate_attr_convert(p, a, src, dst); in translate_attr()
1073 init_inputs(struct translate_sse *p, unsigned index_size) in init_inputs() argument
1077 x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)); in init_inputs()
1079 x86_make_disp(p->machine_EDI, get_offset(p, &p->start_instance)); in init_inputs()
1081 for (i = 0; i < p->nr_buffer_variants; i++) { in init_inputs()
1082 struct translate_buffer_variant *variant = &p->buffer_variant[i]; in init_inputs()
1083 struct translate_buffer *buffer = &p->buffer[variant->buffer_index]; in init_inputs()
1087 x86_make_disp(p->machine_EDI, get_offset(p, &buffer->max_index)); in init_inputs()
1089 x86_make_disp(p->machine_EDI, get_offset(p, &buffer->stride)); in init_inputs()
1091 x86_make_disp(p->machine_EDI, get_offset(p, &variant->ptr)); in init_inputs()
1093 x86_make_disp(p->machine_EDI, get_offset(p, &buffer->base_ptr)); in init_inputs()
1094 struct x86_reg elt = p->idx_ESI; in init_inputs()
1095 struct x86_reg tmp_EAX = p->tmp_EAX; in init_inputs()
1101 struct x86_reg tmp_EDX = p->tmp2_EDX; in init_inputs()
1106 x86_mov(p->func, tmp_EAX, instance_id); in init_inputs()
1109 struct x86_reg tmp_ECX = p->src_ECX; in init_inputs()
1114 x86_xor(p->func, tmp_EDX, tmp_EDX); in init_inputs()
1115 x86_mov_reg_imm(p->func, tmp_ECX, variant->instance_divisor); in init_inputs()
1116 x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ in init_inputs()
1121 x86_mov(p->func, tmp_EDX, start_instance); in init_inputs()
1122 x86_add(p->func, tmp_EAX, tmp_EDX); in init_inputs()
1130 x86_mov(p->func, tmp_EAX, elt); in init_inputs()
1134 x86_cmp(p->func, tmp_EAX, buf_max_index); in init_inputs()
1135 x86_cmovcc(p->func, tmp_EAX, buf_max_index, cc_AE); in init_inputs()
1138 x86_mov(p->func, p->tmp2_EDX, buf_stride); in init_inputs()
1139 x64_rexw(p->func); in init_inputs()
1140 x86_imul(p->func, tmp_EAX, p->tmp2_EDX); in init_inputs()
1141 x64_rexw(p->func); in init_inputs()
1142 x86_add(p->func, tmp_EAX, buf_base_ptr); in init_inputs()
1144 x86_cmp(p->func, p->count_EBP, p->tmp_EAX); in init_inputs()
1149 if (!index_size && p->nr_buffer_variants == 1) { in init_inputs()
1150 x64_rexw(p->func); in init_inputs()
1151 x86_mov(p->func, elt, tmp_EAX); in init_inputs()
1154 x64_rexw(p->func); in init_inputs()
1155 x86_mov(p->func, buf_ptr, tmp_EAX); in init_inputs()
1165 get_buffer_ptr(struct translate_sse *p, in get_buffer_ptr() argument
1169 return x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)); in get_buffer_ptr()
1171 if (!index_size && p->nr_buffer_variants == 1) { in get_buffer_ptr()
1172 return p->idx_ESI; in get_buffer_ptr()
1174 else if (!index_size || p->buffer_variant[var_idx].instance_divisor) { in get_buffer_ptr()
1175 struct x86_reg ptr = p->src_ECX; in get_buffer_ptr()
1177 x86_make_disp(p->machine_EDI, in get_buffer_ptr()
1178 get_offset(p, &p->buffer_variant[var_idx].ptr)); in get_buffer_ptr()
1180 x64_rexw(p->func); in get_buffer_ptr()
1181 x86_mov(p->func, ptr, buf_ptr); in get_buffer_ptr()
1185 struct x86_reg ptr = p->src_ECX; in get_buffer_ptr()
1187 &p->buffer_variant[var_idx]; in get_buffer_ptr()
1189 x86_make_disp(p->machine_EDI, in get_buffer_ptr()
1190 get_offset(p, &p->buffer[variant->buffer_index].stride)); in get_buffer_ptr()
1192 x86_make_disp(p->machine_EDI, in get_buffer_ptr()
1193 get_offset(p, &p->buffer[variant->buffer_index].base_ptr)); in get_buffer_ptr()
1195 x86_make_disp(p->machine_EDI, in get_buffer_ptr()
1196 get_offset(p, &p->buffer[variant->buffer_index].max_index)); in get_buffer_ptr()
1202 x86_movzx8(p->func, ptr, elt); in get_buffer_ptr()
1205 x86_movzx16(p->func, ptr, elt); in get_buffer_ptr()
1208 x86_mov(p->func, ptr, elt); in get_buffer_ptr()
1214 x86_cmp(p->func, ptr, buf_max_index); in get_buffer_ptr()
1215 x86_cmovcc(p->func, ptr, buf_max_index, cc_AE); in get_buffer_ptr()
1217 x86_mov(p->func, p->tmp2_EDX, buf_stride); in get_buffer_ptr()
1218 x64_rexw(p->func); in get_buffer_ptr()
1219 x86_imul(p->func, ptr, p->tmp2_EDX); in get_buffer_ptr()
1220 x64_rexw(p->func); in get_buffer_ptr()
1221 x86_add(p->func, ptr, buf_base_ptr); in get_buffer_ptr()
1228 incr_inputs(struct translate_sse *p, unsigned index_size) in incr_inputs() argument
1230 if (!index_size && p->nr_buffer_variants == 1) { in incr_inputs()
1231 const unsigned buffer_index = p->buffer_variant[0].buffer_index; in incr_inputs()
1233 x86_make_disp(p->machine_EDI, in incr_inputs()
1234 get_offset(p, &p->buffer[buffer_index].stride)); in incr_inputs()
1236 if (p->buffer_variant[0].instance_divisor == 0) { in incr_inputs()
1237 x64_rexw(p->func); in incr_inputs()
1238 x86_add(p->func, p->idx_ESI, stride); in incr_inputs()
1239 sse_prefetchnta(p->func, x86_make_disp(p->idx_ESI, 192)); in incr_inputs()
1247 for (i = 0; i < p->nr_buffer_variants; i++) { in incr_inputs()
1248 struct translate_buffer_variant *variant = &p->buffer_variant[i]; in incr_inputs()
1249 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI, in incr_inputs()
1250 get_offset(p, &variant->ptr)); in incr_inputs()
1252 x86_make_disp(p->machine_EDI, in incr_inputs()
1253 get_offset(p, &p->buffer[variant->buffer_index].stride)); in incr_inputs()
1256 x86_mov(p->func, p->tmp_EAX, buf_stride); in incr_inputs()
1257 x64_rexw(p->func); in incr_inputs()
1258 x86_add(p->func, p->tmp_EAX, buf_ptr); in incr_inputs()
1260 sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); in incr_inputs()
1261 x64_rexw(p->func); in incr_inputs()
1262 x86_mov(p->func, buf_ptr, p->tmp_EAX); in incr_inputs()
1267 x64_rexw(p->func); in incr_inputs()
1268 x86_lea(p->func, p->idx_ESI, x86_make_disp(p->idx_ESI, index_size)); in incr_inputs()
1292 build_vertex_emit(struct translate_sse *p, in build_vertex_emit() argument
1298 memset(p->reg_to_const, 0xff, sizeof(p->reg_to_const)); in build_vertex_emit()
1299 memset(p->const_to_reg, 0xff, sizeof(p->const_to_reg)); in build_vertex_emit()
1301 p->tmp_EAX = x86_make_reg(file_REG32, reg_AX); in build_vertex_emit()
1302 p->idx_ESI = x86_make_reg(file_REG32, reg_SI); in build_vertex_emit()
1303 p->outbuf_EBX = x86_make_reg(file_REG32, reg_BX); in build_vertex_emit()
1304 p->machine_EDI = x86_make_reg(file_REG32, reg_DI); in build_vertex_emit()
1305 p->count_EBP = x86_make_reg(file_REG32, reg_BP); in build_vertex_emit()
1306 p->tmp2_EDX = x86_make_reg(file_REG32, reg_DX); in build_vertex_emit()
1307 p->src_ECX = x86_make_reg(file_REG32, reg_CX); in build_vertex_emit()
1309 p->func = func; in build_vertex_emit()
1311 x86_init_func(p->func); in build_vertex_emit()
1313 if (x86_target(p->func) == X86_64_WIN64_ABI) { in build_vertex_emit()
1317 sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8), in build_vertex_emit()
1319 sse2_movdqa(p->func, in build_vertex_emit()
1324 x86_push(p->func, p->outbuf_EBX); in build_vertex_emit()
1325 x86_push(p->func, p->count_EBP); in build_vertex_emit()
1328 if (x86_target(p->func) != X86_64_STD_ABI) { in build_vertex_emit()
1329 x86_push(p->func, p->machine_EDI); in build_vertex_emit()
1330 x86_push(p->func, p->idx_ESI); in build_vertex_emit()
1332 if (x86_target(p->func) != X86_32) { in build_vertex_emit()
1333 x64_mov64(p->func, p->machine_EDI, x86_fn_arg(p->func, 1)); in build_vertex_emit()
1334 x64_mov64(p->func, p->idx_ESI, x86_fn_arg(p->func, 2)); in build_vertex_emit()
1337 x86_mov(p->func, p->machine_EDI, x86_fn_arg(p->func, 1)); in build_vertex_emit()
1338 x86_mov(p->func, p->idx_ESI, x86_fn_arg(p->func, 2)); in build_vertex_emit()
1342 x86_mov(p->func, p->count_EBP, x86_fn_arg(p->func, 3)); in build_vertex_emit()
1344 if (x86_target(p->func) != X86_32) in build_vertex_emit()
1345 x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 6)); in build_vertex_emit()
1347 x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 6)); in build_vertex_emit()
1351 if (p->use_instancing) { in build_vertex_emit()
1352 x86_mov(p->func, p->tmp2_EDX, x86_fn_arg(p->func, 4)); in build_vertex_emit()
1353 x86_mov(p->func, in build_vertex_emit()
1354 x86_make_disp(p->machine_EDI, in build_vertex_emit()
1355 get_offset(p, &p->start_instance)), p->tmp2_EDX); in build_vertex_emit()
1357 x86_mov(p->func, p->tmp_EAX, x86_fn_arg(p->func, 5)); in build_vertex_emit()
1358 x86_mov(p->func, in build_vertex_emit()
1359 x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)), in build_vertex_emit()
1360 p->tmp_EAX); in build_vertex_emit()
1365 x86_xor(p->func, p->tmp_EAX, p->tmp_EAX); in build_vertex_emit()
1366 x86_cmp(p->func, p->count_EBP, p->tmp_EAX); in build_vertex_emit()
1367 fixup = x86_jcc_forward(p->func, cc_E); in build_vertex_emit()
1371 init_inputs(p, index_size); in build_vertex_emit()
1375 label = x86_get_label(p->func); in build_vertex_emit()
1377 struct x86_reg elt = !index_size ? p->idx_ESI : x86_deref(p->idx_ESI); in build_vertex_emit()
1381 for (j = 0; j < p->translate.key.nr_elements; j++) { in build_vertex_emit()
1382 const struct translate_element *a = &p->translate.key.element[j]; in build_vertex_emit()
1383 unsigned variant = p->element_to_buffer_variant[j]; in build_vertex_emit()
1389 vb = get_buffer_ptr(p, index_size, variant, elt); in build_vertex_emit()
1392 if (!translate_attr(p, a, in build_vertex_emit()
1394 x86_make_disp(p->outbuf_EBX, a->output_offset))) in build_vertex_emit()
1400 x64_rexw(p->func); in build_vertex_emit()
1401 x86_lea(p->func, p->outbuf_EBX, in build_vertex_emit()
1402 x86_make_disp(p->outbuf_EBX, p->translate.key.output_stride)); in build_vertex_emit()
1406 incr_inputs(p, index_size); in build_vertex_emit()
1411 x86_dec(p->func, p->count_EBP); in build_vertex_emit()
1412 x86_jcc(p->func, cc_NZ, label); in build_vertex_emit()
1416 if (p->func->need_emms) in build_vertex_emit()
1417 mmx_emms(p->func); in build_vertex_emit()
1421 x86_fixup_fwd_jump(p->func, fixup); in build_vertex_emit()
1425 if (x86_target(p->func) != X86_64_STD_ABI) { in build_vertex_emit()
1426 x86_pop(p->func, p->idx_ESI); in build_vertex_emit()
1427 x86_pop(p->func, p->machine_EDI); in build_vertex_emit()
1430 x86_pop(p->func, p->count_EBP); in build_vertex_emit()
1431 x86_pop(p->func, p->outbuf_EBX); in build_vertex_emit()
1433 if (x86_target(p->func) == X86_64_WIN64_ABI) { in build_vertex_emit()
1434 sse2_movdqa(p->func, x86_make_reg(file_XMM, 6), in build_vertex_emit()
1436 sse2_movdqa(p->func, x86_make_reg(file_XMM, 7), in build_vertex_emit()
1439 x86_ret(p->func); in build_vertex_emit()
1450 struct translate_sse *p = (struct translate_sse *) translate; in translate_sse_set_buffer() local
1452 if (buf < p->nr_buffers) { in translate_sse_set_buffer()
1453 p->buffer[buf].base_ptr = (char *) ptr; in translate_sse_set_buffer()
1454 p->buffer[buf].stride = stride; in translate_sse_set_buffer()
1455 p->buffer[buf].max_index = max_index; in translate_sse_set_buffer()
1460 __FUNCTION__, buf, p->nr_buffers, ptr, stride); in translate_sse_set_buffer()
1467 struct translate_sse *p = (struct translate_sse *) translate; in translate_sse_release() local
1469 x86_release_func(&p->elt8_func); in translate_sse_release()
1470 x86_release_func(&p->elt16_func); in translate_sse_release()
1471 x86_release_func(&p->elt_func); in translate_sse_release()
1472 x86_release_func(&p->linear_func); in translate_sse_release()
1474 os_free_aligned(p); in translate_sse_release()
1481 struct translate_sse *p = NULL; in translate_sse2_create() local
1488 p = os_malloc_aligned(sizeof(struct translate_sse), 16); in translate_sse2_create()
1489 if (!p) in translate_sse2_create()
1492 memset(p, 0, sizeof(*p)); in translate_sse2_create()
1493 memcpy(p->consts, consts, sizeof(consts)); in translate_sse2_create()
1495 p->translate.key = *key; in translate_sse2_create()
1496 p->translate.release = translate_sse_release; in translate_sse2_create()
1497 p->translate.set_buffer = translate_sse_set_buffer; in translate_sse2_create()
1505 p->nr_buffers = in translate_sse2_create()
1506 MAX2(p->nr_buffers, key->element[i].input_buffer + 1); in translate_sse2_create()
1509 p->use_instancing = TRUE; in translate_sse2_create()
1515 for (j = 0; j < p->nr_buffer_variants; j++) { in translate_sse2_create()
1516 if (p->buffer_variant[j].buffer_index == in translate_sse2_create()
1518 && p->buffer_variant[j].instance_divisor == in translate_sse2_create()
1523 if (j == p->nr_buffer_variants) { in translate_sse2_create()
1524 p->buffer_variant[j].buffer_index = key->element[i].input_buffer; in translate_sse2_create()
1525 p->buffer_variant[j].instance_divisor = in translate_sse2_create()
1527 p->nr_buffer_variants++; in translate_sse2_create()
1529 p->element_to_buffer_variant[i] = j; in translate_sse2_create()
1534 p->element_to_buffer_variant[i] = ELEMENT_BUFFER_INSTANCE_ID; in translate_sse2_create()
1539 debug_printf("nr_buffers: %d\n", p->nr_buffers); in translate_sse2_create()
1541 if (!build_vertex_emit(p, &p->linear_func, 0)) in translate_sse2_create()
1544 if (!build_vertex_emit(p, &p->elt_func, 4)) in translate_sse2_create()
1547 if (!build_vertex_emit(p, &p->elt16_func, 2)) in translate_sse2_create()
1550 if (!build_vertex_emit(p, &p->elt8_func, 1)) in translate_sse2_create()
1553 p->translate.run = (run_func) x86_get_func(&p->linear_func); in translate_sse2_create()
1554 if (p->translate.run == NULL) in translate_sse2_create()
1557 p->translate.run_elts = (run_elts_func) x86_get_func(&p->elt_func); in translate_sse2_create()
1558 if (p->translate.run_elts == NULL) in translate_sse2_create()
1561 p->translate.run_elts16 = (run_elts16_func) x86_get_func(&p->elt16_func); in translate_sse2_create()
1562 if (p->translate.run_elts16 == NULL) in translate_sse2_create()
1565 p->translate.run_elts8 = (run_elts8_func) x86_get_func(&p->elt8_func); in translate_sse2_create()
1566 if (p->translate.run_elts8 == NULL) in translate_sse2_create()
1569 return &p->translate; in translate_sse2_create()
1572 if (p) in translate_sse2_create()
1573 translate_sse_release(&p->translate); in translate_sse2_create()