Home
last modified time | relevance | path

Searched refs:vacc0x11 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D1x16c8-minmax-neon-mull-padal.c55 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
102 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
125 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
150 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c55 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
131 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
191 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
214 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
239 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c55 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
118 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
141 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
166 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c61 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
77 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
148 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
180 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
217 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c61 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
77 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
190 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
287 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
319 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
356 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c61 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
77 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
176 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
212 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
249 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c67 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
83 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
99 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
194 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
235 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
284 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c67 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
83 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
99 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
249 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
383 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
424 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
473 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c73 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
89 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
105 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
121 int32x4_t vacc3x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
240 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
290 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
351 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c67 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
83 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
99 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
234 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
283 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
332 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c73 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
89 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
105 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
121 int32x4_t vacc3x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
292 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
354 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
415 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c73 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
89 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
105 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
121 int32x4_t vacc3x11 = vacc0x11; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
308 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
479 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
529 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
590 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D1x16c8-minmax-neon-mlal-padal.c58 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
142 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
202 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
228 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
253 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D1x16c8-minmax-neon-mull-padal.c58 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
113 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
139 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
164 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16c16-minmax-neon-mlal-padal.c58 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
129 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
155 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
180 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c62 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
78 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
161 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
196 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
233 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c62 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
78 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
203 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
300 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
335 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
372 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c66 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
82 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
98 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
209 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
253 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
302 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c16-minmax-neon-mlal-padal.c62 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
78 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
189 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
228 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
265 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c70 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
86 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
102 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
118 int32x4_t vacc3x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
257 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
310 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
371 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c66 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
82 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
98 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
249 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
301 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
350 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c66 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
82 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
98 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
264 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
398 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
442 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
491 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c70 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
86 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
102 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
118 int32x4_t vacc3x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
309 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
374 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
435 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c70 …int32x4_t vacc0x11 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
86 int32x4_t vacc1x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
102 int32x4_t vacc2x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
118 int32x4_t vacc3x11 = vacc0x11; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
325 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
496 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
549 const int32x4_t vsum0xAB = vpaddq_s32(vacc0x10, vacc0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
610 const int32x2_t vpsum0xB = vadd_s32(vget_low_s32(vacc0x11), vget_high_s32(vacc0x11)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()