/external/gemmlowp/fixedpoint/ |
D | fixedpoint_neon.h | 26 struct FixedPointRawTypeTraits<int32x4_t> { 38 inline int32x4_t BitAnd(int32x4_t a, int32x4_t b) { 48 inline int32x4_t BitOr(int32x4_t a, int32x4_t b) { 58 inline int32x4_t BitXor(int32x4_t a, int32x4_t b) { 68 inline int32x4_t BitNot(int32x4_t a) { 78 inline int32x4_t Add(int32x4_t a, int32x4_t b) { 88 inline int32x4_t Sub(int32x4_t a, int32x4_t b) { 98 inline int32x4_t Neg(int32x4_t a) { 108 inline int32x4_t ShiftLeft(int32x4_t a, int offset) { 118 inline int32x4_t ShiftLeft(int32x4_t a, int32x4_t offset) { [all …]
|
/external/clang/test/CodeGen/ |
D | aarch64-neon-2velem.c | 39 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlaq_lane_s32() 66 int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmla_laneq_s32() 75 int32x4_t test_vmlaq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { in test_vmlaq_laneq_s32() 111 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlsq_lane_s32() 138 int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmls_laneq_s32() 147 int32x4_t test_vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { in test_vmlsq_laneq_s32() 179 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) { in test_vmulq_lane_s32() 235 int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) { in test_vmul_laneq_s32() 243 int32x4_t test_vmulq_laneq_s32(int32x4_t a, int32x4_t v) { in test_vmulq_laneq_s32() 505 int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { in test_vmlal_lane_s16() [all …]
|
/external/llvm-project/clang/test/CodeGen/ |
D | aarch64-neon-2velem.c | 56 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlaq_lane_s32() 95 int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmla_laneq_s32() 108 int32x4_t test_vmlaq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { in test_vmlaq_laneq_s32() 160 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlsq_lane_s32() 199 int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmls_laneq_s32() 212 int32x4_t test_vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { in test_vmlsq_laneq_s32() 260 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) { in test_vmulq_lane_s32() 344 int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) { in test_vmul_laneq_s32() 356 int32x4_t test_vmulq_laneq_s32(int32x4_t a, int32x4_t v) { in test_vmulq_laneq_s32() 660 int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { in test_vmlal_lane_s16() [all …]
|
D | arm-neon-range-checks.c | 13 void test_vdotq_lane(int32x4_t r, int8x16_t a, int8x8_t b) { in test_vdotq_lane() 28 void test_vdotq_laneq(int32x4_t r, int8x16_t a, int8x16_t b) { in test_vdotq_laneq() 51 void test_vdup_laneq(int32x4_t v) { in test_vdup_laneq() 58 void test_vdupq_laneq(int32x4_t v) { in test_vdupq_laneq() 73 void test_vmlaq_lane(int32x4_t a, int32x4_t b, int32x2_t v) { in test_vmlaq_lane() 81 void test_vmla_laneq(int32x2_t a, int32x2_t b, int32x4_t v) { in test_vmla_laneq() 88 void test_vmlaq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) { in test_vmlaq_laneq() 95 void test_vmlal_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) { in test_vmlal_high_lane() 102 void test_vmlal_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) { in test_vmlal_high_laneq() 118 void test_vmlal_laneq(int64x2_t a, int32x2_t b, int32x4_t v) { in test_vmlal_laneq() [all …]
|
/external/llvm-project/clang/test/CodeGen/arm-mve-intrinsics/ |
D | vqdmlad.c | 38 int32x4_t test_vqdmladhq_s32(int32x4_t inactive, int32x4_t a, int32x4_t b) { in test_vqdmladhq_s32() 77 int32x4_t test_vqdmladhxq_s32(int32x4_t inactive, int32x4_t a, int32x4_t b) { in test_vqdmladhxq_s32() 116 int32x4_t test_vqdmlsdhq_s32(int32x4_t inactive, int32x4_t a, int32x4_t b) { in test_vqdmlsdhq_s32() 155 int32x4_t test_vqdmlsdhxq_s32(int32x4_t inactive, int32x4_t a, int32x4_t b) { in test_vqdmlsdhxq_s32() 194 int32x4_t test_vqrdmladhq_s32(int32x4_t inactive, int32x4_t a, int32x4_t b) { in test_vqrdmladhq_s32() 233 int32x4_t test_vqrdmladhxq_s32(int32x4_t inactive, int32x4_t a, int32x4_t b) { in test_vqrdmladhxq_s32() 272 int32x4_t test_vqrdmlsdhq_s32(int32x4_t inactive, int32x4_t a, int32x4_t b) { in test_vqrdmlsdhq_s32() 311 int32x4_t test_vqrdmlsdhxq_s32(int32x4_t inactive, int32x4_t a, int32x4_t b) { in test_vqrdmlsdhxq_s32() 356 int32x4_t test_vqdmladhq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { in test_vqdmladhq_m_s32() 401 int32x4_t test_vqdmladhxq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { in test_vqdmladhxq_m_s32() [all …]
|
D | vmlldav.c | 43 int64_t test_vmlaldavaq_s32(int64_t a, int32x4_t b, int32x4_t c) { in test_vmlaldavaq_s32() 131 int64_t test_vmlaldavaxq_s32(int64_t a, int32x4_t b, int32x4_t c) { in test_vmlaldavaxq_s32() 175 int64_t test_vmlsldavaq_s32(int64_t a, int32x4_t b, int32x4_t c) { in test_vmlsldavaq_s32() 219 int64_t test_vmlsldavaxq_s32(int64_t a, int32x4_t b, int32x4_t c) { in test_vmlsldavaxq_s32() 241 int64_t test_vrmlaldavhaq_s32(int64_t a, int32x4_t b, int32x4_t c) { in test_vrmlaldavhaq_s32() 285 int64_t test_vrmlaldavhaxq_s32(int64_t a, int32x4_t b, int32x4_t c) { in test_vrmlaldavhaxq_s32() 307 int64_t test_vrmlsldavhaq_s32(int64_t a, int32x4_t b, int32x4_t c) { in test_vrmlsldavhaq_s32() 329 int64_t test_vrmlsldavhaxq_s32(int64_t a, int32x4_t b, int32x4_t c) { in test_vrmlsldavhaxq_s32() 377 int64_t test_vmlaldavaq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) { in test_vmlaldavaq_p_s32() 473 int64_t test_vmlaldavaxq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) { in test_vmlaldavaxq_p_s32() [all …]
|
D | vadc.c | 17 int32x4_t test_vadciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out) in test_vadciq_s32() 82 int32x4_t test_vadcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred1… in test_vadcq_m_s32() 101 int32x4_t test_vsbciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out) { in test_vsbciq_s32() 139 int32x4_t test_vsbcq_s32(int32x4_t a, int32x4_t b, unsigned *carry) { in test_vsbcq_s32() 179 int32x4_t test_vsbciq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry_out, mve_… in test_vsbciq_m_s32() 221 int32x4_t test_vsbcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred1… in test_vsbcq_m_s32()
|
D | vhcaddq.c | 40 int32x4_t test_vhcaddq_rot90_s32(int32x4_t a, int32x4_t b) in test_vhcaddq_rot90_s32() 82 int32x4_t test_vhcaddq_rot270_s32(int32x4_t a, int32x4_t b) in test_vhcaddq_rot270_s32() 130 int32x4_t test_vhcaddq_rot90_x_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) in test_vhcaddq_rot90_x_s32() 178 int32x4_t test_vhcaddq_rot270_x_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) in test_vhcaddq_rot270_x_s32() 226 int32x4_t test_vhcaddq_rot90_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) in test_vhcaddq_rot90_m_s32() 274 int32x4_t test_vhcaddq_rot270_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) in test_vhcaddq_rot270_m_s32()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c8-minmax-neon-mull-padal.c | 59 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 60 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 61 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 62 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 63 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 64 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 65 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 66 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 67 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 68 …int32x4_t vacc0x9 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() [all …]
|
D | 3x16c8-minmax-neon-mull-padal.c | 55 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 56 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 57 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 58 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 59 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 60 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 61 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 62 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 63 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 64 …int32x4_t vacc0x9 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() [all …]
|
D | 4x8c8-minmax-neon-mull-padal.c | 59 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 60 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 61 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 62 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 63 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 64 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 65 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 66 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 67 int32x4_t vacc1x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 68 int32x4_t vacc1x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() [all …]
|
D | 2x16c8-minmax-neon-mull-padal.c | 51 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 52 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 53 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 54 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 55 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 56 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 57 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 58 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 59 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 60 …int32x4_t vacc0x9 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() [all …]
|
D | 4x16c16-minmax-neon-mlal-padal.c | 59 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 60 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 61 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 62 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 63 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 64 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 65 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 66 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 67 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 68 …int32x4_t vacc0x9 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() [all …]
|
D | 3x16c16-minmax-neon-mlal-padal.c | 55 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 56 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 57 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 58 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 59 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 60 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 61 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 62 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 63 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 64 …int32x4_t vacc0x9 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() [all …]
|
D | 3x8c8-minmax-neon-mull-padal.c | 55 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 56 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 57 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 58 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 59 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 60 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 61 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 62 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 63 int32x4_t vacc1x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() 64 int32x4_t vacc1x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() [all …]
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c8-minmax-neon-mull-padal.c | 62 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 63 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 64 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 65 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 66 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 67 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 68 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 69 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 70 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 71 …int32x4_t vacc0x9 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() [all …]
|
D | 3x16c8-minmax-neon-mull-padal.c | 56 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 57 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 58 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 59 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 60 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 61 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 62 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 63 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 64 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 65 …int32x4_t vacc0x9 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() [all …]
|
D | 4x8c8-minmax-neon-mull-padal.c | 62 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 63 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 64 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 65 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 66 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 67 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 68 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 69 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 70 int32x4_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 71 int32x4_t vacc1x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() [all …]
|
D | 2x16c8-minmax-neon-mull-padal.c | 50 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 51 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 52 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 53 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 54 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 55 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 56 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 57 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 58 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 59 …int32x4_t vacc0x9 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() [all …]
|
D | 4x16c16-minmax-neon-mlal-padal.c | 62 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 63 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 64 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 65 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 66 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 67 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 68 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 69 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 70 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 71 …int32x4_t vacc0x9 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() [all …]
|
D | 3x16c16-minmax-neon-mlal-padal.c | 56 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 57 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 58 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 59 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 60 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 61 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 62 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 63 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 64 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 65 …int32x4_t vacc0x9 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() [all …]
|
D | 3x8c8-minmax-neon-mull-padal.c | 56 …int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 57 …int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 58 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 59 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 60 …int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 61 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 62 …int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 63 …int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 64 int32x4_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() 65 int32x4_t vacc1x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() [all …]
|
/external/libopus/silk/arm/ |
D | NSQ_neon.c | 40 int32x4_t coef0 = vld1q_s32(coef32); in silk_noise_shape_quantizer_short_prediction_neon() 41 int32x4_t coef1 = vld1q_s32(coef32 + 4); in silk_noise_shape_quantizer_short_prediction_neon() 42 int32x4_t coef2 = vld1q_s32(coef32 + 8); in silk_noise_shape_quantizer_short_prediction_neon() 43 int32x4_t coef3 = vld1q_s32(coef32 + 12); in silk_noise_shape_quantizer_short_prediction_neon() 45 int32x4_t a0 = vld1q_s32(buf32 - 15); in silk_noise_shape_quantizer_short_prediction_neon() 46 int32x4_t a1 = vld1q_s32(buf32 - 11); in silk_noise_shape_quantizer_short_prediction_neon() 47 int32x4_t a2 = vld1q_s32(buf32 - 7); in silk_noise_shape_quantizer_short_prediction_neon() 48 int32x4_t a3 = vld1q_s32(buf32 - 3); in silk_noise_shape_quantizer_short_prediction_neon() 50 int32x4_t b0 = vqdmulhq_s32(coef0, a0); in silk_noise_shape_quantizer_short_prediction_neon() 51 int32x4_t b1 = vqdmulhq_s32(coef1, a1); in silk_noise_shape_quantizer_short_prediction_neon() [all …]
|
/external/XNNPACK/src/qu8-requantization/ |
D | q31-neon.c | 46 const int32x4_t vmultiplier = vdupq_n_s32(multiplier); in xnn_qu8_requantize_q31__neon() 48 const int32x4_t vshift = vdupq_n_s32(-shift); in xnn_qu8_requantize_q31__neon() 49 const int32x4_t vshift_eq_0_mask = vreinterpretq_s32_u32(vceqq_s32(vshift, vmovq_n_s32(0))); in xnn_qu8_requantize_q31__neon() 53 const int32x4_t x = vld1q_s32(input); in xnn_qu8_requantize_q31__neon() 54 const int32x4_t y = vld1q_s32(input + 4); in xnn_qu8_requantize_q31__neon() 55 const int32x4_t z = vld1q_s32(input + 8); in xnn_qu8_requantize_q31__neon() 56 const int32x4_t w = vld1q_s32(input + 12); in xnn_qu8_requantize_q31__neon() 61 const int32x4_t x_product = vqrdmulhq_s32(x, vmultiplier); in xnn_qu8_requantize_q31__neon() 62 const int32x4_t y_product = vqrdmulhq_s32(y, vmultiplier); in xnn_qu8_requantize_q31__neon() 63 const int32x4_t z_product = vqrdmulhq_s32(z, vmultiplier); in xnn_qu8_requantize_q31__neon() [all …]
|
/external/XNNPACK/src/qs8-requantization/ |
D | q31-neon.c | 46 const int32x4_t vmultiplier = vdupq_n_s32(multiplier); in xnn_qs8_requantize_q31__neon() 48 const int32x4_t vshift = vdupq_n_s32(-shift); in xnn_qs8_requantize_q31__neon() 49 const int32x4_t vshift_eq_0_mask = vreinterpretq_s32_u32(vceqq_s32(vshift, vmovq_n_s32(0))); in xnn_qs8_requantize_q31__neon() 53 const int32x4_t x = vld1q_s32(input); in xnn_qs8_requantize_q31__neon() 54 const int32x4_t y = vld1q_s32(input + 4); in xnn_qs8_requantize_q31__neon() 55 const int32x4_t z = vld1q_s32(input + 8); in xnn_qs8_requantize_q31__neon() 56 const int32x4_t w = vld1q_s32(input + 12); in xnn_qs8_requantize_q31__neon() 61 const int32x4_t x_product = vqrdmulhq_s32(x, vmultiplier); in xnn_qs8_requantize_q31__neon() 62 const int32x4_t y_product = vqrdmulhq_s32(y, vmultiplier); in xnn_qs8_requantize_q31__neon() 63 const int32x4_t z_product = vqrdmulhq_s32(z, vmultiplier); in xnn_qs8_requantize_q31__neon() [all …]
|