/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-neon-ld64-x32.c | 53 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 54 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 55 int32x4_t vacc89AB = vmulq_s32(vmovl_s16(vget_low_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 56 int32x4_t vaccCDEF = vmulq_s32(vmovl_s16(vget_high_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 57 int32x4_t vaccGHIJ = vmulq_s32(vmovl_s16(vget_low_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 58 int32x4_t vaccKLMN = vmulq_s32(vmovl_s16(vget_high_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 59 int32x4_t vaccOPQR = vmulq_s32(vmovl_s16(vget_low_s16(vexOPQRSTUV)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 60 int32x4_t vaccSTUV = vmulq_s32(vmovl_s16(vget_high_s16(vexOPQRSTUV)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 62 vacc0123 = vmlaq_s32(vacc0123, vmovl_s16(vget_low_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 63 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() [all …]
|
D | minmax-neon-ld64-x24.c | 49 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 50 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 51 int32x4_t vacc89AB = vmulq_s32(vmovl_s16(vget_low_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 52 int32x4_t vaccCDEF = vmulq_s32(vmovl_s16(vget_high_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 53 int32x4_t vaccGHIJ = vmulq_s32(vmovl_s16(vget_low_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 54 int32x4_t vaccKLMN = vmulq_s32(vmovl_s16(vget_high_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 56 vacc0123 = vmlaq_s32(vacc0123, vmovl_s16(vget_low_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 57 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 58 vacc89AB = vmlaq_s32(vacc89AB, vmovl_s16(vget_low_s16(vey89ABCDEF)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 59 vaccCDEF = vmlaq_s32(vaccCDEF, vmovl_s16(vget_high_s16(vey89ABCDEF)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() [all …]
|
D | minmax-neon-ld64-x16.c | 45 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 46 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 47 int32x4_t vacc89AB = vmulq_s32(vmovl_s16(vget_low_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 48 int32x4_t vaccCDEF = vmulq_s32(vmovl_s16(vget_high_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 50 vacc0123 = vmlaq_s32(vacc0123, vmovl_s16(vget_low_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 51 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 52 vacc89AB = vmlaq_s32(vacc89AB, vmovl_s16(vget_low_s16(vey89ABCDEF)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 53 vaccCDEF = vmlaq_s32(vaccCDEF, vmovl_s16(vget_high_s16(vey89ABCDEF)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 84 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 85 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() [all …]
|
D | minmax-neon-ld64-x8.c | 41 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 42 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 44 vacc0123 = vmlaq_s32(vacc0123, vmovl_s16(vget_low_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 45 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 71 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 72 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 74 vacc0123 = vmlaq_s32(vacc0123, vmovl_s16(vget_low_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 75 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
|
/external/XNNPACK/src/qu8-vadd/ |
D | minmax-neon.c | 49 int32x4_t vacc0_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa0)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() 50 int32x4_t vacc1_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa1)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() 51 int32x4_t vacc2_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa2)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() 52 int32x4_t vacc3_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa3)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() 58 vacc0_lo = vmlaq_s32(vacc0_lo, vmovl_s16(vget_low_s16(vxb0)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() 59 vacc1_lo = vmlaq_s32(vacc1_lo, vmovl_s16(vget_low_s16(vxb1)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() 60 vacc2_lo = vmlaq_s32(vacc2_lo, vmovl_s16(vget_low_s16(vxb2)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() 61 vacc3_lo = vmlaq_s32(vacc3_lo, vmovl_s16(vget_low_s16(vxb3)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() 115 int32x4_t vacc0_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa0)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() 116 int32x4_t vacc1_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa1)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon() [all …]
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-neon-ld64-x32.c | 50 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 51 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 52 int32x4_t vacc89AB = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 53 int32x4_t vaccCDEF = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 54 int32x4_t vaccGHIJ = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 55 int32x4_t vaccKLMN = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 56 int32x4_t vaccOPQR = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vexOPQRSTUV)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 57 int32x4_t vaccSTUV = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vexOPQRSTUV)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 100 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 101 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
|
D | minmax-neon-ld64-x24.c | 48 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 49 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 50 int32x4_t vacc89AB = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 51 int32x4_t vaccCDEF = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 52 int32x4_t vaccGHIJ = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 53 int32x4_t vaccKLMN = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vexGHIJKLMN)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 91 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 92 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
|
D | minmax-neon-ld64-x16.c | 46 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 47 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 48 int32x4_t vacc89AB = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 49 int32x4_t vaccCDEF = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex89ABCDEF)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 78 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 79 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
|
D | minmax-neon-ld64-x8.c | 44 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 45 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 69 int32x4_t vacc0123 = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 70 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
|
/external/libhevc/encoder/arm/ |
D | ihevce_hme_utils_neon.c | 169 dst0_4x32b = vmovl_s16(vget_low_s16(src4_8x16b)); in ihevce_get_wt_inp_4x8_neon() 170 dst1_4x32b = vmovl_s16(vget_low_s16(src5_8x16b)); in ihevce_get_wt_inp_4x8_neon() 171 dst2_4x32b = vmovl_s16(vget_low_s16(src6_8x16b)); in ihevce_get_wt_inp_4x8_neon() 172 dst3_4x32b = vmovl_s16(vget_low_s16(src7_8x16b)); in ihevce_get_wt_inp_4x8_neon() 174 dst4_4x32b = vmovl_s16(vget_high_s16(src4_8x16b)); in ihevce_get_wt_inp_4x8_neon() 175 dst5_4x32b = vmovl_s16(vget_high_s16(src5_8x16b)); in ihevce_get_wt_inp_4x8_neon() 176 dst6_4x32b = vmovl_s16(vget_high_s16(src6_8x16b)); in ihevce_get_wt_inp_4x8_neon() 177 dst7_4x32b = vmovl_s16(vget_high_s16(src7_8x16b)); in ihevce_get_wt_inp_4x8_neon() 592 dst0_4x32b = vmovl_s16(vget_low_s16(src4_8x16b)); in hme_get_wt_inp_ctb_neon() 593 dst1_4x32b = vmovl_s16(vget_low_s16(src5_8x16b)); in hme_get_wt_inp_ctb_neon() [all …]
|
D | ihevce_common_utils_neon.c | 129 reg0[0] = vmovl_s16(vget_low_s16(a2)); in ihevce_wt_avg_2d_16x1_neon() 130 reg0[1] = vmovl_s16(vget_high_s16(a2)); in ihevce_wt_avg_2d_16x1_neon() 131 reg0[2] = vmovl_s16(vget_low_s16(a3)); in ihevce_wt_avg_2d_16x1_neon() 132 reg0[3] = vmovl_s16(vget_high_s16(a3)); in ihevce_wt_avg_2d_16x1_neon() 134 reg1[0] = vmovl_s16(vget_low_s16(a4)); in ihevce_wt_avg_2d_16x1_neon() 135 reg1[1] = vmovl_s16(vget_high_s16(a4)); in ihevce_wt_avg_2d_16x1_neon() 136 reg1[2] = vmovl_s16(vget_low_s16(a5)); in ihevce_wt_avg_2d_16x1_neon() 137 reg1[3] = vmovl_s16(vget_high_s16(a5)); in ihevce_wt_avg_2d_16x1_neon() 193 a8 = vmovl_s16(vget_low_s16(a0)); in ihevce_wt_avg_2d_8x1_neon() 194 a9 = vmovl_s16(vget_high_s16(a0)); in ihevce_wt_avg_2d_8x1_neon() [all …]
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/ |
D | add.h | 88 int32x4_t x111 = vmovl_s16(input1_val_low_low); in AddElementwise() 89 int32x4_t x112 = vmovl_s16(input1_val_low_high); in AddElementwise() 90 int32x4_t x121 = vmovl_s16(input1_val_high_low); in AddElementwise() 91 int32x4_t x122 = vmovl_s16(input1_val_high_high); in AddElementwise() 92 int32x4_t x211 = vmovl_s16(input2_val_low_low); in AddElementwise() 93 int32x4_t x212 = vmovl_s16(input2_val_low_high); in AddElementwise() 94 int32x4_t x221 = vmovl_s16(input2_val_high_low); in AddElementwise() 95 int32x4_t x222 = vmovl_s16(input2_val_high_high); in AddElementwise() 196 int32x4_t x11 = vmovl_s16(input1_val_low); in AddScalarBroadcast() 197 int32x4_t x12 = vmovl_s16(input1_val_high); in AddScalarBroadcast() [all …]
|
D | mean.h | 77 vmovl_s16(vget_low_s16(input_data_low_shift)); in MeanImpl() 79 vmovl_s16(vget_high_s16(input_data_low_shift)); in MeanImpl() 81 vmovl_s16(vget_low_s16(input_data_high_shift)); in MeanImpl() 83 vmovl_s16(vget_high_s16(input_data_high_shift)); in MeanImpl()
|
/external/XNNPACK/src/qs8-vadd/ |
D | neon-ld64.c.in | 43 … int32x4_t vacc${ABC[N:N+4]} = vmulq_s32(vmovl_s16(vget_low_s16(vex${ABC[N:N+8]})), vx_multiplier); 44 …int32x4_t vacc${ABC[N+4:N+8]} = vmulq_s32(vmovl_s16(vget_high_s16(vex${ABC[N:N+8]})), vx_multiplie… 47 …vacc${ABC[N:N+4]} = vmlaq_s32(vacc${ABC[N:N+4]}, vmovl_s16(vget_low_s16(vey${ABC[N:N+8]})), vy_mul… 48 …vacc${ABC[N+4:N+8]} = vmlaq_s32(vacc${ABC[N+4:N+8]}, vmovl_s16(vget_high_s16(vey${ABC[N:N+8]})), v… 95 int32x4_t vacc${ABC[0:4]} = vmulq_s32(vmovl_s16(vget_low_s16(vex${ABC[0:8]})), vx_multiplier); 96 … int32x4_t vacc${ABC[4:8]} = vmulq_s32(vmovl_s16(vget_high_s16(vex${ABC[0:8]})), vx_multiplier); 98 …vacc${ABC[0:4]} = vmlaq_s32(vacc${ABC[0:4]}, vmovl_s16(vget_low_s16(vey${ABC[0:8]})), vy_multiplie… 99 …vacc${ABC[4:8]} = vmlaq_s32(vacc${ABC[4:8]}, vmovl_s16(vget_high_s16(vey${ABC[0:8]})), vy_multipli…
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/ |
D | neon_tensor_utils.cc | 873 const int32x4_t first_half = vmovl_s16(vget_low_s16(output_val)); in NeonMatrixBatchVectorAccumulateImpl() 874 const int32x4_t second_half = vmovl_s16(vget_high_s16(output_val)); in NeonMatrixBatchVectorAccumulateImpl() 931 const int32x4_t output_val_1 = vmovl_s16(vget_low_s16(first_half)); in NeonMatrixBatchVectorAccumulateImpl() 932 const int32x4_t output_val_2 = vmovl_s16(vget_high_s16(first_half)); in NeonMatrixBatchVectorAccumulateImpl() 933 const int32x4_t output_val_3 = vmovl_s16(vget_low_s16(second_half)); in NeonMatrixBatchVectorAccumulateImpl() 934 const int32x4_t output_val_4 = vmovl_s16(vget_high_s16(second_half)); in NeonMatrixBatchVectorAccumulateImpl() 1507 const int32x4_t val_s32_0 = vmovl_s16(vget_low_s16(val_s16)); in NeonApplyLayerNorm() 1508 const int32x4_t val_s32_1 = vmovl_s16(vget_high_s16(val_s16)); in NeonApplyLayerNorm() 1549 vshlq_n_s32(vmovl_s16(vget_low_s16(val_s16_0)), 10), mean_dup); in NeonApplyLayerNorm() 1551 vshlq_n_s32(vmovl_s16(vget_high_s16(val_s16_0)), 10), mean_dup); in NeonApplyLayerNorm() [all …]
|
D | optimized_ops.h | 1023 vmovl_s16(vget_low_s16(input_data_low_shift)); in MeanImpl() 1025 vmovl_s16(vget_high_s16(input_data_low_shift)); in MeanImpl() 1027 vmovl_s16(vget_low_s16(input_data_high_shift)); in MeanImpl() 1029 vmovl_s16(vget_high_s16(input_data_high_shift)); in MeanImpl() 1889 int32x4_t x11 = vmovl_s16(input1_val_low); in AddElementwise() 1890 int32x4_t x12 = vmovl_s16(input1_val_high); in AddElementwise() 1891 int32x4_t x21 = vmovl_s16(input2_val_low); in AddElementwise() 1892 int32x4_t x22 = vmovl_s16(input2_val_high); in AddElementwise() 1980 int32x4_t x11 = vmovl_s16(input1_val_low); in AddScalarBroadcast() 1981 int32x4_t x12 = vmovl_s16(input1_val_high); in AddScalarBroadcast() [all …]
|
/external/XNNPACK/src/qs8-vaddc/ |
D | neon-ld64.c.in | 46 …int32x4_t vacc${ABC[N:N+4]} = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex${ABC[N:N+8]})), vx_mul… 47 …int32x4_t vacc${ABC[N+4:N+8]} = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex${ABC[N:N+8]})), vx_… 91 …int32x4_t vacc${ABC[0:4]} = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vex${ABC[0:8]})), vx_multipl… 92 …int32x4_t vacc${ABC[4:8]} = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex${ABC[0:8]})), vx_multip…
|
/external/libgav1/libgav1/src/dsp/arm/ |
D | inverse_transform_10bit_neon.cc | 207 const int32x4_t result = vmovl_s16(vqmovn_s32(xy_shifted)); in DctDcOnly() 301 s[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(s[i], v_row_shift))); in Dct4_NEON() 379 s[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(s[i], v_row_shift))); in Dct8_NEON() 497 s[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(s[i], v_row_shift))); in Dct16_NEON() 692 output[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(output[i], v_row_shift))); in Dct32_NEON() 921 output[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(output[i], v_row_shift))); in Dct64_NEON() 979 x[0] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[0], v_row_shift))); in Adst4_NEON() 980 x[1] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[1], v_row_shift))); in Adst4_NEON() 981 x[2] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[2], v_row_shift))); in Adst4_NEON() 982 x[3] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[3], v_row_shift))); in Adst4_NEON() [all …]
|
/external/libopus/silk/arm/ |
D | NSQ_neon.c | 86 int32x4_t coef0 = vmovl_s16(vget_low_s16(coef16)); in silk_NSQ_noise_shape_feedback_loop_neon() 87 int32x4_t coef1 = vmovl_s16(vget_high_s16(coef16)); in silk_NSQ_noise_shape_feedback_loop_neon()
|
D | NSQ_del_dec_neon_intr.c | 759 tmp1_s32x4 = vbslq_s32( t_u32x4, vmovl_s16( q1_Q10_s16x4 ), vmovl_s16( q2_Q10_s16x4 ) ); in silk_noise_shape_quantizer_del_dec_neon() 760 tmp2_s32x4 = vbslq_s32( t_u32x4, vmovl_s16( q2_Q10_s16x4 ), vmovl_s16( q1_Q10_s16x4 ) ); in silk_noise_shape_quantizer_del_dec_neon()
|
/external/libaom/libaom/av1/common/arm/ |
D | convolve_neon.h | 89 sum_0 = vmovl_s16(vget_low_s16(sum)); in wiener_convolve8_horiz_8x8() 90 sum_1 = vmovl_s16(vget_high_s16(sum)); in wiener_convolve8_horiz_8x8() 140 sum_0 = vmovl_s16(sum); in wiener_convolve8_horiz_4x8()
|
D | selfguided_neon.c | 436 q12345 = vaddq_s32(vmovl_s16(s1), q2345); in boxsum2() 437 q23456 = vaddq_s32(q2345, vmovl_s16(s6)); in boxsum2() 438 q34567 = vaddq_s32(q4567, vmovl_s16(s3)); in boxsum2() 439 q45678 = vaddq_s32(q4567, vmovl_s16(s8)); in boxsum2() 1073 a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0); in final_filter_fast_internal() 1074 a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1); in final_filter_fast_internal() 1099 a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0); in final_filter_fast_internal() 1100 a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1); in final_filter_fast_internal() 1151 a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0); in final_filter_internal() 1152 a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1); in final_filter_internal()
|
D | mem_neon.h | 533 const int32x4_t v0 = vmovl_s16(vget_low_s16(a)); in store_s16q_to_tran_low() 534 const int32x4_t v1 = vmovl_s16(vget_high_s16(a)); in store_s16q_to_tran_low()
|
/external/libvpx/libvpx/vpx_dsp/arm/ |
D | mem_neon.h | 78 const int32x4_t v0 = vmovl_s16(vget_low_s16(a)); in store_s16q_to_tran_low() 79 const int32x4_t v1 = vmovl_s16(vget_high_s16(a)); in store_s16q_to_tran_low()
|
/external/webrtc/modules/audio_coding/codecs/isac/fix/source/ |
D | transform_neon.c | 396 int32x4_t tmpr_0 = vmovl_s16(vget_low_s16(tmpr)); in PostShiftAndDivideAndDemodulateNeon() 397 int32x4_t tmpi_0 = vmovl_s16(vget_low_s16(tmpi)); in PostShiftAndDivideAndDemodulateNeon() 402 int32x4_t tmpr_1 = vmovl_s16(vget_high_s16(tmpr)); in PostShiftAndDivideAndDemodulateNeon() 403 int32x4_t tmpi_1 = vmovl_s16(vget_high_s16(tmpi)); in PostShiftAndDivideAndDemodulateNeon()
|