Lines Matching refs:int32x4_t

59int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo…  in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
60int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
61int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
62int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
63int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
64int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
65int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
66int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
67 int32x4_t vacc1x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
68 int32x4_t vacc1x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
69 int32x4_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
70 int32x4_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
71 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
72 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
73 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
74 int32x4_t vacc1x7 = vacc0x7; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
75 int32x4_t vacc2x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
76 int32x4_t vacc2x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
77 int32x4_t vacc2x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
78 int32x4_t vacc2x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
79 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
80 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
81 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
82 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
83 int32x4_t vacc3x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
84 int32x4_t vacc3x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
85 int32x4_t vacc3x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
86 int32x4_t vacc3x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
87 int32x4_t vacc3x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
88 int32x4_t vacc3x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
89 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
90 int32x4_t vacc3x7 = vacc0x7; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
201 const int32x4_t vsum0x01 = vpaddq_s32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
202 const int32x4_t vsum0x23 = vpaddq_s32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
203 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
204 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
205 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
206 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
207 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
208 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
209 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
210 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
211 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
212 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
213 const int32x4_t vsum3x01 = vpaddq_s32(vacc3x0, vacc3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
214 const int32x4_t vsum3x23 = vpaddq_s32(vacc3x2, vacc3x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
215 const int32x4_t vsum3x45 = vpaddq_s32(vacc3x4, vacc3x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
216 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
217 int32x4_t vacc0x0123 = vpaddq_s32(vsum0x01, vsum0x23); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
218 int32x4_t vacc0x4567 = vpaddq_s32(vsum0x45, vsum0x67); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
219 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
220 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
221 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
222 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
223 int32x4_t vacc3x0123 = vpaddq_s32(vsum3x01, vsum3x23); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
224 int32x4_t vacc3x4567 = vpaddq_s32(vsum3x45, vsum3x67); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
232 int32x4_t vacc0x0123 = vcombine_s32(vsum0x01, vsum0x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
239 int32x4_t vacc0x4567 = vcombine_s32(vsum0x45, vsum0x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
246 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
253 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
260 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
267 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
274 int32x4_t vacc3x0123 = vcombine_s32(vsum3x01, vsum3x23 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
281 int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
284 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
294 const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
295 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()