Lines Matching refs:int32x4_t

55int32x4_t vacc0x0 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo…  in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
56int32x4_t vacc0x1 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
57int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
58int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
59int32x4_t vacc0x4 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
60int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
61int32x4_t vacc0x6 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
62int32x4_t vacc0x7 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
63int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
64int32x4_t vacc0x9 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
65int32x4_t vacc0x10 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
66int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
67int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
68int32x4_t vacc0x13 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
69int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
70int32x4_t vacc0x15 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
71 int32x4_t vacc1x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
72 int32x4_t vacc1x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
73 int32x4_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
74 int32x4_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
75 int32x4_t vacc1x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
76 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
77 int32x4_t vacc1x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
78 int32x4_t vacc1x7 = vacc0x7; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
79 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
80 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
81 int32x4_t vacc1x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
82 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
83 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
84 int32x4_t vacc1x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
85 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
86 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
87 int32x4_t vacc2x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
88 int32x4_t vacc2x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
89 int32x4_t vacc2x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
90 int32x4_t vacc2x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
91 int32x4_t vacc2x4 = vacc0x4; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
92 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
93 int32x4_t vacc2x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
94 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
95 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
96 int32x4_t vacc2x9 = vacc0x9; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
97 int32x4_t vacc2x10 = vacc0x10; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
98 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
99 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
100 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
101 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
102 int32x4_t vacc2x15 = vacc0x15; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
296 const int32x4_t vsum0x01 = vpaddq_s32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
297 const int32x4_t vsum0x23 = vpaddq_s32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
298 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
299 const int32x4_t vsum0x67 = vpaddq_s32(vacc0x6, vacc0x7); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
300 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
301 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
302 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
303 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
304 const int32x4_t vsum1x01 = vpaddq_s32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
305 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
306 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
307 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
308 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
309 const int32x4_t vsum1xAB = vpaddq_s32(vacc1x10, vacc1x11); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
310 const int32x4_t vsum1xCD = vpaddq_s32(vacc1x12, vacc1x13); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
311 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
312 const int32x4_t vsum2x01 = vpaddq_s32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
313 const int32x4_t vsum2x23 = vpaddq_s32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
314 const int32x4_t vsum2x45 = vpaddq_s32(vacc2x4, vacc2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
315 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
316 const int32x4_t vsum2x89 = vpaddq_s32(vacc2x8, vacc2x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
317 const int32x4_t vsum2xAB = vpaddq_s32(vacc2x10, vacc2x11); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
318 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
319 const int32x4_t vsum2xEF = vpaddq_s32(vacc2x14, vacc2x15); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
320 int32x4_t vacc0x0123 = vpaddq_s32(vsum0x01, vsum0x23); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
321 int32x4_t vacc0x4567 = vpaddq_s32(vsum0x45, vsum0x67); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
322 int32x4_t vacc0x89AB = vpaddq_s32(vsum0x89, vsum0xAB); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
323 int32x4_t vacc0xCDEF = vpaddq_s32(vsum0xCD, vsum0xEF); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
324 int32x4_t vacc1x0123 = vpaddq_s32(vsum1x01, vsum1x23); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
325 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
326 int32x4_t vacc1x89AB = vpaddq_s32(vsum1x89, vsum1xAB); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
327 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
328 int32x4_t vacc2x0123 = vpaddq_s32(vsum2x01, vsum2x23); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
329 int32x4_t vacc2x4567 = vpaddq_s32(vsum2x45, vsum2x67); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
330 int32x4_t vacc2x89AB = vpaddq_s32(vsum2x89, vsum2xAB); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
331 int32x4_t vacc2xCDEF = vpaddq_s32(vsum2xCD, vsum2xEF); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
339 int32x4_t vacc0x0123 = vcombine_s32(vsum0x01, vsum0x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
346 int32x4_t vacc0x4567 = vcombine_s32(vsum0x45, vsum0x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
353 int32x4_t vacc0x89AB = vcombine_s32(vsum0x89, vsum0xAB ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
360 int32x4_t vacc0xCDEF = vcombine_s32(vsum0xCD, vsum0xEF ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
367 int32x4_t vacc1x0123 = vcombine_s32(vsum1x01, vsum1x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
374 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
381 int32x4_t vacc1x89AB = vcombine_s32(vsum1x89, vsum1xAB ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
388 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
395 int32x4_t vacc2x0123 = vcombine_s32(vsum2x01, vsum2x23 ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
402 int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
409 int32x4_t vacc2x89AB = vcombine_s32(vsum2x89, vsum2xAB ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
416 int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
419 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
433 const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
434 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()