Home
last modified time | relevance | path

Searched refs:vmovl_s16 (Results 1 – 25 of 38) sorted by relevance

12

/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-neon-ld64-x32.c53 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
54 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
55 int32x4_t vacc89AB = vmulq_s32(vmovl_s16(vget_low_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
56 int32x4_t vaccCDEF = vmulq_s32(vmovl_s16(vget_high_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
57 int32x4_t vaccGHIJ = vmulq_s32(vmovl_s16(vget_low_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
58 int32x4_t vaccKLMN = vmulq_s32(vmovl_s16(vget_high_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
59 int32x4_t vaccOPQR = vmulq_s32(vmovl_s16(vget_low_s16(vexOPQRSTUV)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
60 int32x4_t vaccSTUV = vmulq_s32(vmovl_s16(vget_high_s16(vexOPQRSTUV)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
62 vacc0123 = vmlaq_s32(vacc0123, vmovl_s16(vget_low_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
63 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
[all …]
Dminmax-neon-ld64-x24.c49 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
50 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
51 int32x4_t vacc89AB = vmulq_s32(vmovl_s16(vget_low_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
52 int32x4_t vaccCDEF = vmulq_s32(vmovl_s16(vget_high_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
53 int32x4_t vaccGHIJ = vmulq_s32(vmovl_s16(vget_low_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
54 int32x4_t vaccKLMN = vmulq_s32(vmovl_s16(vget_high_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
56 vacc0123 = vmlaq_s32(vacc0123, vmovl_s16(vget_low_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
57 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
58 vacc89AB = vmlaq_s32(vacc89AB, vmovl_s16(vget_low_s16(vey89ABCDEF)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
59 vaccCDEF = vmlaq_s32(vaccCDEF, vmovl_s16(vget_high_s16(vey89ABCDEF)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
[all …]
Dminmax-neon-ld64-x16.c45 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
46 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
47 int32x4_t vacc89AB = vmulq_s32(vmovl_s16(vget_low_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
48 int32x4_t vaccCDEF = vmulq_s32(vmovl_s16(vget_high_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
50 vacc0123 = vmlaq_s32(vacc0123, vmovl_s16(vget_low_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
51 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
52 vacc89AB = vmlaq_s32(vacc89AB, vmovl_s16(vget_low_s16(vey89ABCDEF)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
53 vaccCDEF = vmlaq_s32(vaccCDEF, vmovl_s16(vget_high_s16(vey89ABCDEF)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
84 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
85 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
[all …]
Dminmax-neon-ld64-x8.c41 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
42 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
44 vacc0123 = vmlaq_s32(vacc0123, vmovl_s16(vget_low_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
45 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
71 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
72 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
74 vacc0123 = vmlaq_s32(vacc0123, vmovl_s16(vget_low_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
75 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
/external/XNNPACK/src/qu8-vadd/
Dminmax-neon.c49 int32x4_t vacc0_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa0)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
50 int32x4_t vacc1_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa1)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
51 int32x4_t vacc2_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa2)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
52 int32x4_t vacc3_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa3)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
58 vacc0_lo = vmlaq_s32(vacc0_lo, vmovl_s16(vget_low_s16(vxb0)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
59 vacc1_lo = vmlaq_s32(vacc1_lo, vmovl_s16(vget_low_s16(vxb1)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
60 vacc2_lo = vmlaq_s32(vacc2_lo, vmovl_s16(vget_low_s16(vxb2)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
61 vacc3_lo = vmlaq_s32(vacc3_lo, vmovl_s16(vget_low_s16(vxb3)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
115 int32x4_t vacc0_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa0)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
116 int32x4_t vacc1_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa1)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
[all …]
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-neon-ld64-x32.c50 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
51 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
52 int32x4_t vacc89AB = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
53 int32x4_t vaccCDEF = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
54 int32x4_t vaccGHIJ = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
55 int32x4_t vaccKLMN = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
56 int32x4_t vaccOPQR = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vexOPQRSTUV)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
57 int32x4_t vaccSTUV = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vexOPQRSTUV)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
100 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
101 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
Dminmax-neon-ld64-x24.c48 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
49 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
50 int32x4_t vacc89AB = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
51 int32x4_t vaccCDEF = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
52 int32x4_t vaccGHIJ = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
53 int32x4_t vaccKLMN = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
91 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
92 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
Dminmax-neon-ld64-x16.c46 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
47 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
48 int32x4_t vacc89AB = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
49 int32x4_t vaccCDEF = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
78 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
79 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
Dminmax-neon-ld64-x8.c44 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
45 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
69 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
70 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
/external/libhevc/encoder/arm/
Dihevce_hme_utils_neon.c169 dst0_4x32b = vmovl_s16(vget_low_s16(src4_8x16b)); in ihevce_get_wt_inp_4x8_neon()
170 dst1_4x32b = vmovl_s16(vget_low_s16(src5_8x16b)); in ihevce_get_wt_inp_4x8_neon()
171 dst2_4x32b = vmovl_s16(vget_low_s16(src6_8x16b)); in ihevce_get_wt_inp_4x8_neon()
172 dst3_4x32b = vmovl_s16(vget_low_s16(src7_8x16b)); in ihevce_get_wt_inp_4x8_neon()
174 dst4_4x32b = vmovl_s16(vget_high_s16(src4_8x16b)); in ihevce_get_wt_inp_4x8_neon()
175 dst5_4x32b = vmovl_s16(vget_high_s16(src5_8x16b)); in ihevce_get_wt_inp_4x8_neon()
176 dst6_4x32b = vmovl_s16(vget_high_s16(src6_8x16b)); in ihevce_get_wt_inp_4x8_neon()
177 dst7_4x32b = vmovl_s16(vget_high_s16(src7_8x16b)); in ihevce_get_wt_inp_4x8_neon()
592 dst0_4x32b = vmovl_s16(vget_low_s16(src4_8x16b)); in hme_get_wt_inp_ctb_neon()
593 dst1_4x32b = vmovl_s16(vget_low_s16(src5_8x16b)); in hme_get_wt_inp_ctb_neon()
[all …]
Dihevce_common_utils_neon.c129 reg0[0] = vmovl_s16(vget_low_s16(a2)); in ihevce_wt_avg_2d_16x1_neon()
130 reg0[1] = vmovl_s16(vget_high_s16(a2)); in ihevce_wt_avg_2d_16x1_neon()
131 reg0[2] = vmovl_s16(vget_low_s16(a3)); in ihevce_wt_avg_2d_16x1_neon()
132 reg0[3] = vmovl_s16(vget_high_s16(a3)); in ihevce_wt_avg_2d_16x1_neon()
134 reg1[0] = vmovl_s16(vget_low_s16(a4)); in ihevce_wt_avg_2d_16x1_neon()
135 reg1[1] = vmovl_s16(vget_high_s16(a4)); in ihevce_wt_avg_2d_16x1_neon()
136 reg1[2] = vmovl_s16(vget_low_s16(a5)); in ihevce_wt_avg_2d_16x1_neon()
137 reg1[3] = vmovl_s16(vget_high_s16(a5)); in ihevce_wt_avg_2d_16x1_neon()
193 a8 = vmovl_s16(vget_low_s16(a0)); in ihevce_wt_avg_2d_8x1_neon()
194 a9 = vmovl_s16(vget_high_s16(a0)); in ihevce_wt_avg_2d_8x1_neon()
[all …]
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/
Dadd.h88 int32x4_t x111 = vmovl_s16(input1_val_low_low); in AddElementwise()
89 int32x4_t x112 = vmovl_s16(input1_val_low_high); in AddElementwise()
90 int32x4_t x121 = vmovl_s16(input1_val_high_low); in AddElementwise()
91 int32x4_t x122 = vmovl_s16(input1_val_high_high); in AddElementwise()
92 int32x4_t x211 = vmovl_s16(input2_val_low_low); in AddElementwise()
93 int32x4_t x212 = vmovl_s16(input2_val_low_high); in AddElementwise()
94 int32x4_t x221 = vmovl_s16(input2_val_high_low); in AddElementwise()
95 int32x4_t x222 = vmovl_s16(input2_val_high_high); in AddElementwise()
196 int32x4_t x11 = vmovl_s16(input1_val_low); in AddScalarBroadcast()
197 int32x4_t x12 = vmovl_s16(input1_val_high); in AddScalarBroadcast()
[all …]
Dmean.h77 vmovl_s16(vget_low_s16(input_data_low_shift)); in MeanImpl()
79 vmovl_s16(vget_high_s16(input_data_low_shift)); in MeanImpl()
81 vmovl_s16(vget_low_s16(input_data_high_shift)); in MeanImpl()
83 vmovl_s16(vget_high_s16(input_data_high_shift)); in MeanImpl()
/external/XNNPACK/src/qs8-vadd/
Dneon-ld64.c.in43 … int32x4_t vacc${ABC[N:N+4]} = vmulq_s32(vmovl_s16(vget_low_s16(vex${ABC[N:N+8]})), vx_multiplier);
44 …int32x4_t vacc${ABC[N+4:N+8]} = vmulq_s32(vmovl_s16(vget_high_s16(vex${ABC[N:N+8]})), vx_multiplie…
47 …vacc${ABC[N:N+4]} = vmlaq_s32(vacc${ABC[N:N+4]}, vmovl_s16(vget_low_s16(vey${ABC[N:N+8]})), vy_mul…
48 …vacc${ABC[N+4:N+8]} = vmlaq_s32(vacc${ABC[N+4:N+8]}, vmovl_s16(vget_high_s16(vey${ABC[N:N+8]})), v…
95 int32x4_t vacc${ABC[0:4]} = vmulq_s32(vmovl_s16(vget_low_s16(vex${ABC[0:8]})), vx_multiplier);
96 … int32x4_t vacc${ABC[4:8]} = vmulq_s32(vmovl_s16(vget_high_s16(vex${ABC[0:8]})), vx_multiplier);
98 …vacc${ABC[0:4]} = vmlaq_s32(vacc${ABC[0:4]}, vmovl_s16(vget_low_s16(vey${ABC[0:8]})), vy_multiplie…
99 …vacc${ABC[4:8]} = vmlaq_s32(vacc${ABC[4:8]}, vmovl_s16(vget_high_s16(vey${ABC[0:8]})), vy_multipli…
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/
Dneon_tensor_utils.cc873 const int32x4_t first_half = vmovl_s16(vget_low_s16(output_val)); in NeonMatrixBatchVectorAccumulateImpl()
874 const int32x4_t second_half = vmovl_s16(vget_high_s16(output_val)); in NeonMatrixBatchVectorAccumulateImpl()
931 const int32x4_t output_val_1 = vmovl_s16(vget_low_s16(first_half)); in NeonMatrixBatchVectorAccumulateImpl()
932 const int32x4_t output_val_2 = vmovl_s16(vget_high_s16(first_half)); in NeonMatrixBatchVectorAccumulateImpl()
933 const int32x4_t output_val_3 = vmovl_s16(vget_low_s16(second_half)); in NeonMatrixBatchVectorAccumulateImpl()
934 const int32x4_t output_val_4 = vmovl_s16(vget_high_s16(second_half)); in NeonMatrixBatchVectorAccumulateImpl()
1507 const int32x4_t val_s32_0 = vmovl_s16(vget_low_s16(val_s16)); in NeonApplyLayerNorm()
1508 const int32x4_t val_s32_1 = vmovl_s16(vget_high_s16(val_s16)); in NeonApplyLayerNorm()
1549 vshlq_n_s32(vmovl_s16(vget_low_s16(val_s16_0)), 10), mean_dup); in NeonApplyLayerNorm()
1551 vshlq_n_s32(vmovl_s16(vget_high_s16(val_s16_0)), 10), mean_dup); in NeonApplyLayerNorm()
[all …]
Doptimized_ops.h1023 vmovl_s16(vget_low_s16(input_data_low_shift)); in MeanImpl()
1025 vmovl_s16(vget_high_s16(input_data_low_shift)); in MeanImpl()
1027 vmovl_s16(vget_low_s16(input_data_high_shift)); in MeanImpl()
1029 vmovl_s16(vget_high_s16(input_data_high_shift)); in MeanImpl()
1889 int32x4_t x11 = vmovl_s16(input1_val_low); in AddElementwise()
1890 int32x4_t x12 = vmovl_s16(input1_val_high); in AddElementwise()
1891 int32x4_t x21 = vmovl_s16(input2_val_low); in AddElementwise()
1892 int32x4_t x22 = vmovl_s16(input2_val_high); in AddElementwise()
1980 int32x4_t x11 = vmovl_s16(input1_val_low); in AddScalarBroadcast()
1981 int32x4_t x12 = vmovl_s16(input1_val_high); in AddScalarBroadcast()
[all …]
/external/XNNPACK/src/qs8-vaddc/
Dneon-ld64.c.in46 …int32x4_t vacc${ABC[N:N+4]} = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex${ABC[N:N+8]})), vx_mul…
47 …int32x4_t vacc${ABC[N+4:N+8]} = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex${ABC[N:N+8]})), vx_…
91 …int32x4_t vacc${ABC[0:4]} = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex${ABC[0:8]})), vx_multipl…
92 …int32x4_t vacc${ABC[4:8]} = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex${ABC[0:8]})), vx_multip…
/external/libgav1/libgav1/src/dsp/arm/
Dinverse_transform_10bit_neon.cc207 const int32x4_t result = vmovl_s16(vqmovn_s32(xy_shifted)); in DctDcOnly()
301 s[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(s[i], v_row_shift))); in Dct4_NEON()
379 s[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(s[i], v_row_shift))); in Dct8_NEON()
497 s[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(s[i], v_row_shift))); in Dct16_NEON()
692 output[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(output[i], v_row_shift))); in Dct32_NEON()
921 output[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(output[i], v_row_shift))); in Dct64_NEON()
979 x[0] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[0], v_row_shift))); in Adst4_NEON()
980 x[1] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[1], v_row_shift))); in Adst4_NEON()
981 x[2] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[2], v_row_shift))); in Adst4_NEON()
982 x[3] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[3], v_row_shift))); in Adst4_NEON()
[all …]
/external/libopus/silk/arm/
DNSQ_neon.c86 int32x4_t coef0 = vmovl_s16(vget_low_s16(coef16)); in silk_NSQ_noise_shape_feedback_loop_neon()
87 int32x4_t coef1 = vmovl_s16(vget_high_s16(coef16)); in silk_NSQ_noise_shape_feedback_loop_neon()
DNSQ_del_dec_neon_intr.c759 tmp1_s32x4 = vbslq_s32( t_u32x4, vmovl_s16( q1_Q10_s16x4 ), vmovl_s16( q2_Q10_s16x4 ) ); in silk_noise_shape_quantizer_del_dec_neon()
760 tmp2_s32x4 = vbslq_s32( t_u32x4, vmovl_s16( q2_Q10_s16x4 ), vmovl_s16( q1_Q10_s16x4 ) ); in silk_noise_shape_quantizer_del_dec_neon()
/external/libaom/libaom/av1/common/arm/
Dconvolve_neon.h89 sum_0 = vmovl_s16(vget_low_s16(sum)); in wiener_convolve8_horiz_8x8()
90 sum_1 = vmovl_s16(vget_high_s16(sum)); in wiener_convolve8_horiz_8x8()
140 sum_0 = vmovl_s16(sum); in wiener_convolve8_horiz_4x8()
Dselfguided_neon.c436 q12345 = vaddq_s32(vmovl_s16(s1), q2345); in boxsum2()
437 q23456 = vaddq_s32(q2345, vmovl_s16(s6)); in boxsum2()
438 q34567 = vaddq_s32(q4567, vmovl_s16(s3)); in boxsum2()
439 q45678 = vaddq_s32(q4567, vmovl_s16(s8)); in boxsum2()
1073 a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0); in final_filter_fast_internal()
1074 a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1); in final_filter_fast_internal()
1099 a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0); in final_filter_fast_internal()
1100 a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1); in final_filter_fast_internal()
1151 a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0); in final_filter_internal()
1152 a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1); in final_filter_internal()
Dmem_neon.h533 const int32x4_t v0 = vmovl_s16(vget_low_s16(a)); in store_s16q_to_tran_low()
534 const int32x4_t v1 = vmovl_s16(vget_high_s16(a)); in store_s16q_to_tran_low()
/external/libvpx/libvpx/vpx_dsp/arm/
Dmem_neon.h78 const int32x4_t v0 = vmovl_s16(vget_low_s16(a)); in store_s16q_to_tran_low()
79 const int32x4_t v1 = vmovl_s16(vget_high_s16(a)); in store_s16q_to_tran_low()
/external/webrtc/modules/audio_coding/codecs/isac/fix/source/
Dtransform_neon.c396 int32x4_t tmpr_0 = vmovl_s16(vget_low_s16(tmpr)); in PostShiftAndDivideAndDemodulateNeon()
397 int32x4_t tmpi_0 = vmovl_s16(vget_low_s16(tmpi)); in PostShiftAndDivideAndDemodulateNeon()
402 int32x4_t tmpr_1 = vmovl_s16(vget_high_s16(tmpr)); in PostShiftAndDivideAndDemodulateNeon()
403 int32x4_t tmpi_1 = vmovl_s16(vget_high_s16(tmpi)); in PostShiftAndDivideAndDemodulateNeon()

12