Home
last modified time | relevance | path

Searched refs:vbicq_s32 (Results 1 – 25 of 196) sorted by relevance

12345678

/external/XNNPACK/src/qu8-vadd/
Dminmax-neon.c68 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon()
69 vacc1_lo = vsraq_n_s32(vacc1_lo, vbicq_s32(vacc1_lo, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon()
70 vacc2_lo = vsraq_n_s32(vacc2_lo, vbicq_s32(vacc2_lo, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon()
71 vacc3_lo = vsraq_n_s32(vacc3_lo, vbicq_s32(vacc3_lo, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon()
72 vacc0_hi = vsraq_n_s32(vacc0_hi, vbicq_s32(vacc0_hi, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon()
73 vacc1_hi = vsraq_n_s32(vacc1_hi, vbicq_s32(vacc1_hi, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon()
74 vacc2_hi = vsraq_n_s32(vacc2_hi, vbicq_s32(vacc2_hi, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon()
75 vacc3_hi = vsraq_n_s32(vacc3_hi, vbicq_s32(vacc3_hi, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon()
129 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon()
130 vacc1_lo = vsraq_n_s32(vacc1_lo, vbicq_s32(vacc1_lo, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon()
[all …]
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-neon-ld64-x32.c59 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
60 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
61 vacc89AB = vsraq_n_s32(vacc89AB, vbicq_s32(vacc89AB, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
62 vaccCDEF = vsraq_n_s32(vaccCDEF, vbicq_s32(vaccCDEF, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
63 vaccGHIJ = vsraq_n_s32(vaccGHIJ, vbicq_s32(vaccGHIJ, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
64 vaccKLMN = vsraq_n_s32(vaccKLMN, vbicq_s32(vaccKLMN, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
65 vaccOPQR = vsraq_n_s32(vaccOPQR, vbicq_s32(vaccOPQR, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
66 vaccSTUV = vsraq_n_s32(vaccSTUV, vbicq_s32(vaccSTUV, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
103 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
104 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
Dminmax-neon-ld64-x24.c55 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
56 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
57 vacc89AB = vsraq_n_s32(vacc89AB, vbicq_s32(vacc89AB, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
58 vaccCDEF = vsraq_n_s32(vaccCDEF, vbicq_s32(vaccCDEF, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
59 vaccGHIJ = vsraq_n_s32(vaccGHIJ, vbicq_s32(vaccGHIJ, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
60 vaccKLMN = vsraq_n_s32(vaccKLMN, vbicq_s32(vaccKLMN, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
94 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
95 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
Dminmax-neon-ld64-x16.c51 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
52 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
53 vacc89AB = vsraq_n_s32(vacc89AB, vbicq_s32(vacc89AB, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
54 vaccCDEF = vsraq_n_s32(vaccCDEF, vbicq_s32(vaccCDEF, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
81 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
82 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
Dminmax-neon-ld64-x8.c47 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
48 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
72 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
73 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
/external/XNNPACK/src/qs8-gemm/gen/
D8x16c4-minmax-neondot.c303 vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
304 vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
305 vacc0x89AB = vsraq_n_s32(vproduct0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
306 vacc0xCDEF = vsraq_n_s32(vproduct0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
307 vacc1x0123 = vsraq_n_s32(vproduct1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
308 vacc1x4567 = vsraq_n_s32(vproduct1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
309 vacc1x89AB = vsraq_n_s32(vproduct1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
310 vacc1xCDEF = vsraq_n_s32(vproduct1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
311 vacc2x0123 = vsraq_n_s32(vproduct2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
312 vacc2x4567 = vsraq_n_s32(vproduct2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
[all …]
D6x16c4-minmax-neondot.c247 vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
248 vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
249 vacc0x89AB = vsraq_n_s32(vproduct0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
250 vacc0xCDEF = vsraq_n_s32(vproduct0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
251 vacc1x0123 = vsraq_n_s32(vproduct1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
252 vacc1x4567 = vsraq_n_s32(vproduct1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
253 vacc1x89AB = vsraq_n_s32(vproduct1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
254 vacc1xCDEF = vsraq_n_s32(vproduct1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
255 vacc2x0123 = vsraq_n_s32(vproduct2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
256 vacc2x4567 = vsraq_n_s32(vproduct2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
[all …]
D4x16c4-minmax-neondot.c191 vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
192 vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
193 vacc0x89AB = vsraq_n_s32(vproduct0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
194 vacc0xCDEF = vsraq_n_s32(vproduct0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
195 vacc1x0123 = vsraq_n_s32(vproduct1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
196 vacc1x4567 = vsraq_n_s32(vproduct1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
197 vacc1x89AB = vsraq_n_s32(vproduct1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
198 vacc1xCDEF = vsraq_n_s32(vproduct1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
199 vacc2x0123 = vsraq_n_s32(vproduct2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
200 vacc2x4567 = vsraq_n_s32(vproduct2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
[all …]
D8x8c4-minmax-neondot.c217 vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
218 vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
219 vacc1x0123 = vsraq_n_s32(vproduct1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
220 vacc1x4567 = vsraq_n_s32(vproduct1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
221 vacc2x0123 = vsraq_n_s32(vproduct2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
222 vacc2x4567 = vsraq_n_s32(vproduct2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
223 vacc3x0123 = vsraq_n_s32(vproduct3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
224 vacc3x4567 = vsraq_n_s32(vproduct3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
225 vacc4x0123 = vsraq_n_s32(vproduct4x0123, vbicq_s32(vacc4x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
226 vacc4x4567 = vsraq_n_s32(vproduct4x4567, vbicq_s32(vacc4x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
[all …]
D6x8c4-minmax-neondot.c181 vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
182 vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
183 vacc1x0123 = vsraq_n_s32(vproduct1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
184 vacc1x4567 = vsraq_n_s32(vproduct1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
185 vacc2x0123 = vsraq_n_s32(vproduct2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
186 vacc2x4567 = vsraq_n_s32(vproduct2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
187 vacc3x0123 = vsraq_n_s32(vproduct3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
188 vacc3x4567 = vsraq_n_s32(vproduct3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
189 vacc4x0123 = vsraq_n_s32(vproduct4x0123, vbicq_s32(vacc4x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
190 vacc4x4567 = vsraq_n_s32(vproduct4x4567, vbicq_s32(vacc4x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
[all …]
D4x8c4-minmax-neondot.c145 vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
146 vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
147 vacc1x0123 = vsraq_n_s32(vproduct1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
148 vacc1x4567 = vsraq_n_s32(vproduct1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
149 vacc2x0123 = vsraq_n_s32(vproduct2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
150 vacc2x4567 = vsraq_n_s32(vproduct2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
151 vacc3x0123 = vsraq_n_s32(vproduct3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
152 vacc3x4567 = vsraq_n_s32(vproduct3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
D4x16c2-minmax-neon-mull-padal-dup.c381 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
382 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
383 vacc0x89AB = vsraq_n_s32(vacc0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
384 vacc0xCDEF = vsraq_n_s32(vacc0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
385 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
386 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
387 vacc1x89AB = vsraq_n_s32(vacc1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
388 vacc1xCDEF = vsraq_n_s32(vacc1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
389 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
390 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D8x16c4-minmax-neondot.c327 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
328 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
329 vacc0x89AB = vsraq_n_s32(vacc0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
330 vacc0xCDEF = vsraq_n_s32(vacc0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
331 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
332 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
333 vacc1x89AB = vsraq_n_s32(vacc1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
334 vacc1xCDEF = vsraq_n_s32(vacc1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
335 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
336 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
[all …]
D6x16c4-minmax-neondot.c267 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
268 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
269 vacc0x89AB = vsraq_n_s32(vacc0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
270 vacc0xCDEF = vsraq_n_s32(vacc0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
271 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
272 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
273 vacc1x89AB = vsraq_n_s32(vacc1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
274 vacc1xCDEF = vsraq_n_s32(vacc1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
275 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
276 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
[all …]
D4x16c4-minmax-neondot.c207 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
208 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
209 vacc0x89AB = vsraq_n_s32(vacc0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
210 vacc0xCDEF = vsraq_n_s32(vacc0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
211 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
212 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
213 vacc1x89AB = vsraq_n_s32(vacc1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
214 vacc1xCDEF = vsraq_n_s32(vacc1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
215 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
216 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
[all …]
D8x8c4-minmax-neondot.c241 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
242 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
243 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
244 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
245 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
246 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
247 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
248 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
249 vacc4x0123 = vsraq_n_s32(vacc4x0123, vbicq_s32(vacc4x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
250 vacc4x4567 = vsraq_n_s32(vacc4x4567, vbicq_s32(vacc4x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
[all …]
D6x8c4-minmax-neondot.c201 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
202 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
203 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
204 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
205 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
206 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
207 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
208 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
209 vacc4x0123 = vsraq_n_s32(vacc4x0123, vbicq_s32(vacc4x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
210 vacc4x4567 = vsraq_n_s32(vacc4x4567, vbicq_s32(vacc4x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
[all …]
D4x8c4-minmax-neondot.c161 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
162 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
163 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
164 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
165 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
166 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
167 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
168 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
D4x16c2-minmax-neon-mull-padal-dup.c401 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
402 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
403 vacc0x89AB = vsraq_n_s32(vacc0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
404 vacc0xCDEF = vsraq_n_s32(vacc0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
405 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
406 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
407 vacc1x89AB = vsraq_n_s32(vacc1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
408 vacc1xCDEF = vsraq_n_s32(vacc1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
409 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
410 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
[all …]
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-neon-ld64-x32.c71 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
72 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
73 vacc89AB = vsraq_n_s32(vacc89AB, vbicq_s32(vacc89AB, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
74 vaccCDEF = vsraq_n_s32(vaccCDEF, vbicq_s32(vaccCDEF, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
75 vaccGHIJ = vsraq_n_s32(vaccGHIJ, vbicq_s32(vaccGHIJ, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
76 vaccKLMN = vsraq_n_s32(vaccKLMN, vbicq_s32(vaccKLMN, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
77 vaccOPQR = vsraq_n_s32(vaccOPQR, vbicq_s32(vaccOPQR, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
78 vaccSTUV = vsraq_n_s32(vaccSTUV, vbicq_s32(vaccSTUV, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
120 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
121 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
Dminmax-neon-ld64-x24.c63 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
64 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
65 vacc89AB = vsraq_n_s32(vacc89AB, vbicq_s32(vacc89AB, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
66 vaccCDEF = vsraq_n_s32(vaccCDEF, vbicq_s32(vaccCDEF, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
67 vaccGHIJ = vsraq_n_s32(vaccGHIJ, vbicq_s32(vaccGHIJ, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
68 vaccKLMN = vsraq_n_s32(vaccKLMN, vbicq_s32(vaccKLMN, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
107 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
108 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
Dminmax-neon-ld64-x16.c55 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
56 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
57 vacc89AB = vsraq_n_s32(vacc89AB, vbicq_s32(vacc89AB, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
58 vaccCDEF = vsraq_n_s32(vaccCDEF, vbicq_s32(vaccCDEF, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
90 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
91 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
Dminmax-neon-ld64-x8.c47 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
48 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
77 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
78 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
/external/XNNPACK/src/qu8-requantization/
Dq31-neon.c72 const int32x4_t x_adjusted_product = vsraq_n_s32(x_product, vbicq_s32(x, vshift_eq_0_mask), 31); in xnn_qu8_requantize_q31__neon()
73 const int32x4_t y_adjusted_product = vsraq_n_s32(y_product, vbicq_s32(y, vshift_eq_0_mask), 31); in xnn_qu8_requantize_q31__neon()
74 const int32x4_t z_adjusted_product = vsraq_n_s32(z_product, vbicq_s32(z, vshift_eq_0_mask), 31); in xnn_qu8_requantize_q31__neon()
75 const int32x4_t w_adjusted_product = vsraq_n_s32(w_product, vbicq_s32(w, vshift_eq_0_mask), 31); in xnn_qu8_requantize_q31__neon()
/external/XNNPACK/src/qs8-requantization/
Dq31-neon.c72 const int32x4_t x_adjusted_product = vsraq_n_s32(x_product, vbicq_s32(x, vshift_eq_0_mask), 31); in xnn_qs8_requantize_q31__neon()
73 const int32x4_t y_adjusted_product = vsraq_n_s32(y_product, vbicq_s32(y, vshift_eq_0_mask), 31); in xnn_qs8_requantize_q31__neon()
74 const int32x4_t z_adjusted_product = vsraq_n_s32(z_product, vbicq_s32(z, vshift_eq_0_mask), 31); in xnn_qs8_requantize_q31__neon()
75 const int32x4_t w_adjusted_product = vsraq_n_s32(w_product, vbicq_s32(w, vshift_eq_0_mask), 31); in xnn_qs8_requantize_q31__neon()

12345678