/external/libavc/common/x86/ |
D | ih264_iquant_itrans_recon_ssse3.c | 140 …temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bi… in ih264_iquant_itrans_recon_4x4_ssse3() 142 …temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bi… in ih264_iquant_itrans_recon_4x4_ssse3() 145 src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long in ih264_iquant_itrans_recon_4x4_ssse3() 147 src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long in ih264_iquant_itrans_recon_4x4_ssse3() 249 pred_r0 = _mm_unpacklo_epi16(pred_r0, zero_8x16b); //p00 p01 p02 p03 -- 32 bits sign extended in ih264_iquant_itrans_recon_4x4_ssse3() 250 pred_r1 = _mm_unpacklo_epi16(pred_r1, zero_8x16b); //p10 p11 p12 p13 -- 32 bits sign extended in ih264_iquant_itrans_recon_4x4_ssse3() 251 pred_r2 = _mm_unpacklo_epi16(pred_r2, zero_8x16b); //p20 p21 p22 p23 -- 32 bits sign extended in ih264_iquant_itrans_recon_4x4_ssse3() 252 pred_r3 = _mm_unpacklo_epi16(pred_r3, zero_8x16b); //p30 p31 p32 p33 -- 32 bits sign extended in ih264_iquant_itrans_recon_4x4_ssse3() 415 src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long in ih264_iquant_itrans_recon_8x8_ssse3() 418 …scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- … in ih264_iquant_itrans_recon_8x8_ssse3() [all …]
|
D | ih264_deblk_chroma_ssse3.c | 124 temp1 = _mm_unpacklo_epi16(linea, lineb); in ih264_deblk_chroma_vert_bs4_ssse3() 125 temp2 = _mm_unpacklo_epi16(linec, lined); in ih264_deblk_chroma_vert_bs4_ssse3() 126 temp3 = _mm_unpacklo_epi16(linee, linef); in ih264_deblk_chroma_vert_bs4_ssse3() 127 temp4 = _mm_unpacklo_epi16(lineg, lineh); in ih264_deblk_chroma_vert_bs4_ssse3() 218 temp1 = _mm_unpacklo_epi16(p1_uv_16x8, p0_uv_16x8); in ih264_deblk_chroma_vert_bs4_ssse3() 220 temp3 = _mm_unpacklo_epi16(q0_uv_16x8, q1_uv_16x8); in ih264_deblk_chroma_vert_bs4_ssse3() 473 temp1 = _mm_unpacklo_epi16(linea, lineb); in ih264_deblk_chroma_vert_bslt4_ssse3() 474 temp2 = _mm_unpacklo_epi16(linec, lined); in ih264_deblk_chroma_vert_bslt4_ssse3() 475 temp3 = _mm_unpacklo_epi16(linee, linef); in ih264_deblk_chroma_vert_bslt4_ssse3() 476 temp4 = _mm_unpacklo_epi16(lineg, lineh); in ih264_deblk_chroma_vert_bslt4_ssse3() [all …]
|
D | ih264_deblk_luma_ssse3.c | 127 line1 = _mm_unpacklo_epi16(temp1, temp2); in ih264_deblk_luma_vert_bs4_ssse3() 129 line3 = _mm_unpacklo_epi16(temp3, temp4); in ih264_deblk_luma_vert_bs4_ssse3() 151 line1 = _mm_unpacklo_epi16(temp1, temp2); in ih264_deblk_luma_vert_bs4_ssse3() 153 line3 = _mm_unpacklo_epi16(temp3, temp4); in ih264_deblk_luma_vert_bs4_ssse3() 428 p3_8x16 = _mm_unpacklo_epi16(temp1, temp2); in ih264_deblk_luma_vert_bs4_ssse3() 430 q2_8x16 = _mm_unpacklo_epi16(temp3, temp4); in ih264_deblk_luma_vert_bs4_ssse3() 456 p3_8x16 = _mm_unpacklo_epi16(temp1, temp2); in ih264_deblk_luma_vert_bs4_ssse3() 458 q2_8x16 = _mm_unpacklo_epi16(temp3, temp4); in ih264_deblk_luma_vert_bs4_ssse3() 881 int1 = _mm_unpacklo_epi16(linea, lineb); in ih264_deblk_luma_vert_bslt4_ssse3() 884 int2 = _mm_unpacklo_epi16(linec, lined); in ih264_deblk_luma_vert_bslt4_ssse3() [all …]
|
D | ih264_inter_pred_filters_ssse3.c | 894 src_r0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); in ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3() 895 src_t1_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); in ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3() 896 src_t2_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); in ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3() 908 src_r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b); in ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3() 909 src_t1_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b); in ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3() 910 src_t2_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b); in ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3() 1087 src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); in ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3() 1088 src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); in ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3() 1089 src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); in ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3() 1118 src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b); in ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | transpose_sse2.h | 33 return _mm_unpacklo_epi16(a0, a1); in transpose_8bit_4x4() 62 const __m128i b0 = _mm_unpacklo_epi16(a0, a1); in transpose_8bit_8x8() 64 const __m128i b2 = _mm_unpacklo_epi16(a2, a3); in transpose_8bit_8x8() 106 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_4x4() 107 const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); in transpose_16bit_4x4() 136 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_4x8() 137 const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); in transpose_16bit_4x8() 138 const __m128i a2 = _mm_unpacklo_epi16(in[4], in[5]); in transpose_16bit_4x8() 139 const __m128i a3 = _mm_unpacklo_epi16(in[6], in[7]); in transpose_16bit_4x8() 175 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_8x4() [all …]
|
D | lpf_common_sse2.h | 34 w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13 in highbd_transpose6x6_sse2() 35 w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33 in highbd_transpose6x6_sse2() 36 w2 = _mm_unpacklo_epi16(*x4, *x5); // 40 50 41 51 42 52 43 53 in highbd_transpose6x6_sse2() 66 w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13 in highbd_transpose4x8_8x4_low_sse2() 67 w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33 in highbd_transpose4x8_8x4_low_sse2() 139 w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13 in highbd_transpose8x8_low_sse2() 140 w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33 in highbd_transpose8x8_low_sse2() 141 w2 = _mm_unpacklo_epi16(*x4, *x5); // 40 50 41 51 42 52 43 53 in highbd_transpose8x8_low_sse2() 142 w3 = _mm_unpacklo_epi16(*x6, *x7); // 60 70 61 71 62 72 63 73 in highbd_transpose8x8_low_sse2() 238 *d0 = _mm_unpacklo_epi16( in transpose4x8_8x4_low_sse2() [all …]
|
D | highbd_convolve_ssse3.c | 61 s[0] = _mm_unpacklo_epi16(s0, s1); in av1_highbd_convolve_y_sr_ssse3() 62 s[1] = _mm_unpacklo_epi16(s2, s3); in av1_highbd_convolve_y_sr_ssse3() 63 s[2] = _mm_unpacklo_epi16(s4, s5); in av1_highbd_convolve_y_sr_ssse3() 69 s[0 + 8] = _mm_unpacklo_epi16(s1, s2); in av1_highbd_convolve_y_sr_ssse3() 70 s[1 + 8] = _mm_unpacklo_epi16(s3, s4); in av1_highbd_convolve_y_sr_ssse3() 71 s[2 + 8] = _mm_unpacklo_epi16(s5, s6); in av1_highbd_convolve_y_sr_ssse3() 83 s[3] = _mm_unpacklo_epi16(s6, s7); in av1_highbd_convolve_y_sr_ssse3() 86 s[3 + 8] = _mm_unpacklo_epi16(s7, s8); in av1_highbd_convolve_y_sr_ssse3() 236 __m128i res = _mm_unpacklo_epi16(res_even1, res_odd1); in av1_highbd_convolve_x_sr_ssse3()
|
D | fwd_txfm_impl_sse2.h | 112 const __m128i t0 = _mm_unpacklo_epi16(r0, r1); in FDCT8x8_2D() 114 const __m128i t2 = _mm_unpacklo_epi16(r2, r3); in FDCT8x8_2D() 158 const __m128i d0 = _mm_unpacklo_epi16(q6, q5); in FDCT8x8_2D() 198 const __m128i t0 = _mm_unpacklo_epi16(x0, x3); in FDCT8x8_2D() 200 const __m128i t2 = _mm_unpacklo_epi16(x1, x2); in FDCT8x8_2D() 252 const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1); in FDCT8x8_2D() 253 const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3); in FDCT8x8_2D() 256 const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5); in FDCT8x8_2D() 257 const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7); in FDCT8x8_2D()
|
D | highbd_convolve_sse2.c | 51 srcReg23_lo = _mm_unpacklo_epi16(srcReg2, srcReg3); in aom_highbd_filter_block1d4_v4_sse2() 54 srcReg34_lo = _mm_unpacklo_epi16(srcReg3, srcReg4); in aom_highbd_filter_block1d4_v4_sse2() 58 srcReg45_lo = _mm_unpacklo_epi16(srcReg4, srcReg5); in aom_highbd_filter_block1d4_v4_sse2() 61 srcReg56_lo = _mm_unpacklo_epi16(srcReg5, srcReg6); in aom_highbd_filter_block1d4_v4_sse2() 190 srcReg23_lo = _mm_unpacklo_epi16(srcReg2, srcReg3); in aom_highbd_filter_block1d8_v4_sse2() 194 srcReg34_lo = _mm_unpacklo_epi16(srcReg3, srcReg4); in aom_highbd_filter_block1d8_v4_sse2() 200 srcReg45_lo = _mm_unpacklo_epi16(srcReg4, srcReg5); in aom_highbd_filter_block1d8_v4_sse2() 205 srcReg56_lo = _mm_unpacklo_epi16(srcReg5, srcReg6); in aom_highbd_filter_block1d8_v4_sse2()
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | transpose_sse2.h | 32 return _mm_unpacklo_epi16(a0, a1); in transpose_8bit_4x4() 61 const __m128i b0 = _mm_unpacklo_epi16(a0, a1); in transpose_8bit_8x8() 63 const __m128i b2 = _mm_unpacklo_epi16(a2, a3); in transpose_8bit_8x8() 105 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_4x4() 106 const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); in transpose_16bit_4x4() 131 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_4x8() 132 const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); in transpose_16bit_4x8() 133 const __m128i a2 = _mm_unpacklo_epi16(in[4], in[5]); in transpose_16bit_4x8() 134 const __m128i a3 = _mm_unpacklo_epi16(in[6], in[7]); in transpose_16bit_4x8() 177 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_8x8() [all …]
|
D | fwd_txfm_impl_sse2.h | 130 const __m128i r0 = _mm_unpacklo_epi16(in0, in1); in FDCT4x4_2D() 240 const __m128i y0 = _mm_unpacklo_epi16(x0, x1); in FDCT4x4_2D() 337 const __m128i t0 = _mm_unpacklo_epi16(r0, r1); in FDCT8x8_2D() 339 const __m128i t2 = _mm_unpacklo_epi16(r2, r3); in FDCT8x8_2D() 383 const __m128i d0 = _mm_unpacklo_epi16(q6, q5); in FDCT8x8_2D() 423 const __m128i t0 = _mm_unpacklo_epi16(x0, x3); in FDCT8x8_2D() 425 const __m128i t2 = _mm_unpacklo_epi16(x1, x2); in FDCT8x8_2D() 477 const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1); in FDCT8x8_2D() 478 const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3); in FDCT8x8_2D() 481 const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5); in FDCT8x8_2D() [all …]
|
D | post_proc_sse2.c | 55 sumsq_0 = _mm_unpacklo_epi16(tmp_0, tmp_1); in vpx_mbpost_proc_down_sse2() 64 sumsq_0 = _mm_add_epi32(sumsq_0, _mm_unpacklo_epi16(a, zero)); in vpx_mbpost_proc_down_sse2() 94 sumsq_0 = _mm_sub_epi32(sumsq_0, _mm_unpacklo_epi16(above_sq, zero)); in vpx_mbpost_proc_down_sse2() 98 sumsq_0 = _mm_add_epi32(sumsq_0, _mm_unpacklo_epi16(below_sq, zero)); in vpx_mbpost_proc_down_sse2() 110 mask_0 = _mm_sub_epi32(mask_0, _mm_unpacklo_epi16(multmp_0, multmp_1)); in vpx_mbpost_proc_down_sse2()
|
D | inv_txfm_sse2.c | 19 const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]); in transpose_16bit_4() 22 res[0] = _mm_unpacklo_epi16(tr0_0, tr0_1); in transpose_16bit_4() 91 u[0] = _mm_unpacklo_epi16(in[0], in[1]); in idct4_sse2() 255 s[0] = _mm_unpacklo_epi16(in[7], in[0]); in iadst8_sse2() 257 s[2] = _mm_unpacklo_epi16(in[5], in[2]); in iadst8_sse2() 259 s[4] = _mm_unpacklo_epi16(in[3], in[4]); in iadst8_sse2() 261 s[6] = _mm_unpacklo_epi16(in[1], in[6]); in iadst8_sse2() 332 u[0] = _mm_unpacklo_epi16(in[4], in[5]); in iadst8_sse2() 334 u[2] = _mm_unpacklo_epi16(in[6], in[7]); in iadst8_sse2() 371 u[0] = _mm_unpacklo_epi16(s[2], s[3]); in iadst8_sse2() [all …]
|
/external/libhevc/common/x86/ |
D | ihevc_itrans_recon_ssse3_intr.c | 172 m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_0, m_ge_zero16b_flag_row0); in ihevc_itrans_recon_4x4_ttype1_ssse3() 173 m_temp_reg_1 = _mm_unpacklo_epi16(m_temp_reg_1, m_ge_zero16b_flag_row1); in ihevc_itrans_recon_4x4_ttype1_ssse3() 174 m_temp_reg_2 = _mm_unpacklo_epi16(m_temp_reg_2, m_ge_zero16b_flag_row2); in ihevc_itrans_recon_4x4_ttype1_ssse3() 175 m_temp_reg_3 = _mm_unpacklo_epi16(m_temp_reg_3, m_ge_zero16b_flag_row3); in ihevc_itrans_recon_4x4_ttype1_ssse3() 295 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_ttype1_ssse3() 296 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_ssse3() 317 m_temp_reg_20 = _mm_unpacklo_epi16(m_temp_reg_30, m_ge_zero16b_flag_row0); in ihevc_itrans_recon_4x4_ttype1_ssse3() 318 m_temp_reg_21 = _mm_unpacklo_epi16(m_temp_reg_31, m_ge_zero16b_flag_row1); in ihevc_itrans_recon_4x4_ttype1_ssse3() 433 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_ttype1_ssse3() 434 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_ssse3() [all …]
|
D | ihevc_itrans_recon_sse42_intr.c | 246 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_ttype1_sse42() 247 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_sse42() 337 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_ttype1_sse42() 338 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_sse42() 572 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_sse42() 573 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_sse42() 664 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_sse42() 665 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_sse42() 885 m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); in ihevc_itrans_recon_8x8_sse42() 903 m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); in ihevc_itrans_recon_8x8_sse42() [all …]
|
D | ihevc_weighted_pred_ssse3_intr.c | 150 lvl_shift_4x32b = _mm_unpacklo_epi16(res_temp0_4x32b, res_temp1_4x32b); in ihevc_weighted_pred_uni_ssse3() 191 res_temp0_4x32b = _mm_unpacklo_epi16(res_temp0_4x32b, src_temp0_8x16b); in ihevc_weighted_pred_uni_ssse3() 192 res_temp1_4x32b = _mm_unpacklo_epi16(res_temp1_4x32b, src_temp1_8x16b); in ihevc_weighted_pred_uni_ssse3() 193 res_temp2_4x32b = _mm_unpacklo_epi16(res_temp2_4x32b, src_temp2_8x16b); in ihevc_weighted_pred_uni_ssse3() 194 res_temp3_4x32b = _mm_unpacklo_epi16(res_temp3_4x32b, src_temp3_8x16b); in ihevc_weighted_pred_uni_ssse3() 290 res_temp0_4x32b = _mm_unpacklo_epi16(res_temp0_4x32b, src_temp0_8x16b); in ihevc_weighted_pred_uni_ssse3() 291 res_temp1_4x32b = _mm_unpacklo_epi16(res_temp1_4x32b, src_temp1_8x16b); in ihevc_weighted_pred_uni_ssse3() 440 lvl_shift_4x32b = _mm_unpacklo_epi16(res_temp0_4x32b, res_temp1_4x32b); in ihevc_weighted_pred_chroma_uni_ssse3() 483 res_temp0_4x32b = _mm_unpacklo_epi16(res_temp0_4x32b, src_temp0_8x16b); in ihevc_weighted_pred_chroma_uni_ssse3() 484 res_temp1_4x32b = _mm_unpacklo_epi16(res_temp1_4x32b, src_temp1_8x16b); in ihevc_weighted_pred_chroma_uni_ssse3() [all …]
|
/external/libaom/libaom/av1/common/x86/ |
D | highbd_jnt_convolve_sse4.c | 73 s[0] = _mm_unpacklo_epi16(s0, s1); in av1_highbd_dist_wtd_convolve_y_sse4_1() 74 s[1] = _mm_unpacklo_epi16(s2, s3); in av1_highbd_dist_wtd_convolve_y_sse4_1() 75 s[2] = _mm_unpacklo_epi16(s4, s5); in av1_highbd_dist_wtd_convolve_y_sse4_1() 81 s[0 + 8] = _mm_unpacklo_epi16(s1, s2); in av1_highbd_dist_wtd_convolve_y_sse4_1() 82 s[1 + 8] = _mm_unpacklo_epi16(s3, s4); in av1_highbd_dist_wtd_convolve_y_sse4_1() 83 s[2 + 8] = _mm_unpacklo_epi16(s5, s6); in av1_highbd_dist_wtd_convolve_y_sse4_1() 95 s[3] = _mm_unpacklo_epi16(s6, s7); in av1_highbd_dist_wtd_convolve_y_sse4_1() 98 s[3 + 8] = _mm_unpacklo_epi16(s7, s8); in av1_highbd_dist_wtd_convolve_y_sse4_1() 121 const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero); in av1_highbd_dist_wtd_convolve_y_sse4_1() 122 const __m128i data_ref_1 = _mm_unpacklo_epi16(data_1, zero); in av1_highbd_dist_wtd_convolve_y_sse4_1() [all …]
|
D | highbd_convolve_2d_ssse3.c | 91 __m128i res = _mm_unpacklo_epi16(res_even1, res_odd1); in av1_highbd_convolve_2d_sr_ssse3() 106 s[0] = _mm_unpacklo_epi16(s0, s1); in av1_highbd_convolve_2d_sr_ssse3() 107 s[1] = _mm_unpacklo_epi16(s2, s3); in av1_highbd_convolve_2d_sr_ssse3() 108 s[2] = _mm_unpacklo_epi16(s4, s5); in av1_highbd_convolve_2d_sr_ssse3() 114 s[0 + 8] = _mm_unpacklo_epi16(s1, s2); in av1_highbd_convolve_2d_sr_ssse3() 115 s[1 + 8] = _mm_unpacklo_epi16(s3, s4); in av1_highbd_convolve_2d_sr_ssse3() 116 s[2 + 8] = _mm_unpacklo_epi16(s5, s6); in av1_highbd_convolve_2d_sr_ssse3() 128 s[3] = _mm_unpacklo_epi16(s6, s7); in av1_highbd_convolve_2d_sr_ssse3() 131 s[3 + 8] = _mm_unpacklo_epi16(s7, s8); in av1_highbd_convolve_2d_sr_ssse3()
|
D | cfl_sse2.c | 40 sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros), in subtract_average_sse2() 41 _mm_unpacklo_epi16(l1, zeros))); in subtract_average_sse2() 49 sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros), in subtract_average_sse2() 53 sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros), in subtract_average_sse2()
|
D | highbd_convolve_2d_sse4.c | 71 const __m128i data_ref_0_lo = _mm_unpacklo_epi16(data_0, zero); in av1_highbd_dist_wtd_convolve_2d_copy_sse4_1() 74 const __m128i res_32b_lo = _mm_unpacklo_epi16(res, zero); in av1_highbd_dist_wtd_convolve_2d_copy_sse4_1() 126 const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero); in av1_highbd_dist_wtd_convolve_2d_copy_sse4_1() 127 const __m128i data_ref_1 = _mm_unpacklo_epi16(data_1, zero); in av1_highbd_dist_wtd_convolve_2d_copy_sse4_1() 129 const __m128i res_32b = _mm_unpacklo_epi16(res, zero); in av1_highbd_dist_wtd_convolve_2d_copy_sse4_1() 305 _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride), in av1_highbd_dist_wtd_convolve_2d_sse4_1() 308 _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride), in av1_highbd_dist_wtd_convolve_2d_sse4_1() 311 _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride), in av1_highbd_dist_wtd_convolve_2d_sse4_1() 314 _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride), in av1_highbd_dist_wtd_convolve_2d_sse4_1()
|
D | av1_txfm_sse2.h | 32 const __m128i t0 = _mm_unpacklo_epi16(*in0, *in1); in btf_16_w4_sse2() 46 __m128i t0 = _mm_unpacklo_epi16(in0, in1); \ 62 __m128i t0 = _mm_unpacklo_epi16(in0, in1); \ 99 const __m128i a_lo = _mm_unpacklo_epi16(a, a); in store_16bit_to_32bit_w4() 106 const __m128i a_lo = _mm_unpacklo_epi16(a, a); in store_16bit_to_32bit() 123 const __m128i a_lo = _mm_unpacklo_epi16(a, one); in store_rect_16bit_to_32bit_w4() 131 const __m128i a_lo = _mm_unpacklo_epi16(a, one); in store_rect_16bit_to_32bit()
|
/external/libmpeg2/common/x86/ |
D | impeg2_idct_recon_sse42_intr.c | 223 m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); in impeg2_idct_recon_sse42() 241 m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); in impeg2_idct_recon_sse42() 277 m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73); in impeg2_idct_recon_sse42() 399 m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51); in impeg2_idct_recon_sse42() 400 m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53); in impeg2_idct_recon_sse42() 404 m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55); in impeg2_idct_recon_sse42() 405 m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57); in impeg2_idct_recon_sse42() 431 m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); in impeg2_idct_recon_sse42() 449 m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); in impeg2_idct_recon_sse42() 485 m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73); in impeg2_idct_recon_sse42() [all …]
|
/external/libvpx/libvpx/vp9/encoder/x86/ |
D | vp9_dct_intrin_sse2.c | 60 const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]); in transpose_4x4() 83 u[0] = _mm_unpacklo_epi16(in[0], in[1]); in fdct4_sse2() 84 u[1] = _mm_unpacklo_epi16(in[3], in[2]); in fdct4_sse2() 119 u[0] = _mm_unpacklo_epi16(in[0], in[1]); in fadst4_sse2() 120 u[1] = _mm_unpacklo_epi16(in[2], in[3]); in fadst4_sse2() 121 u[2] = _mm_unpacklo_epi16(in7, kZero); in fadst4_sse2() 122 u[3] = _mm_unpacklo_epi16(in[2], kZero); in fadst4_sse2() 123 u[4] = _mm_unpacklo_epi16(in[3], kZero); in fadst4_sse2() 265 const __m128i t0 = _mm_unpacklo_epi16(r0, r1); in vp9_fdct8x8_quant_sse2() 267 const __m128i t2 = _mm_unpacklo_epi16(r2, r3); in vp9_fdct8x8_quant_sse2() [all …]
|
/external/libaom/libaom/av1/encoder/x86/ |
D | wedge_utils_sse2.c | 51 const __m128i v_rd0l_w = _mm_unpacklo_epi16(v_d0_w, v_r0_w); in av1_wedge_sse_from_residuals_sse2() 53 const __m128i v_rd1l_w = _mm_unpacklo_epi16(v_d1_w, v_r1_w); in av1_wedge_sse_from_residuals_sse2() 58 const __m128i v_m0l_w = _mm_unpacklo_epi16(v_m0_w, v_mask_max_w); in av1_wedge_sse_from_residuals_sse2() 60 const __m128i v_m1l_w = _mm_unpacklo_epi16(v_m1_w, v_mask_max_w); in av1_wedge_sse_from_residuals_sse2() 211 const __m128i v_ab0l_w = _mm_unpacklo_epi16(v_a0_w, v_b0_w); in av1_wedge_compute_delta_squares_sse2() 213 const __m128i v_ab1l_w = _mm_unpacklo_epi16(v_a1_w, v_b1_w); in av1_wedge_compute_delta_squares_sse2() 215 const __m128i v_ab2l_w = _mm_unpacklo_epi16(v_a2_w, v_b2_w); in av1_wedge_compute_delta_squares_sse2() 217 const __m128i v_ab3l_w = _mm_unpacklo_epi16(v_a3_w, v_b3_w); in av1_wedge_compute_delta_squares_sse2()
|
/external/tensorflow/tensorflow/core/kernels/ |
D | sparse_matmul_op.h | 197 return _mm_castsi128_ps(_mm_unpacklo_epi16(zero, tmp)); 205 return _mm_castsi128_ps(_mm_unpacklo_epi16(zero, tmp)); 214 return _mm_castsi128_ps(_mm_unpacklo_epi16(zero, tmp)); in pexpand_bf16_l() 349 _mm_castsi128_ps(_mm_unpacklo_epi16(zero, tmp))); 357 _mm_castsi128_ps(_mm_unpacklo_epi16(zero, tmp))); 367 _mm_castsi128_ps(_mm_unpacklo_epi16(zero, tmp))); 375 _mm_castsi128_ps(_mm_unpacklo_epi16(zero, tmp))); 390 __m128i res_l = _mm_unpacklo_epi16(zero, low); in pexpand_bf16_l() 392 __m128i res_h = _mm_unpacklo_epi16(zero, high); in pexpand_bf16_l()
|