/external/XNNPACK/src/qu8-vadd/ |
D | minmax-neon.c | 68 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon() 69 vacc1_lo = vsraq_n_s32(vacc1_lo, vbicq_s32(vacc1_lo, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon() 70 vacc2_lo = vsraq_n_s32(vacc2_lo, vbicq_s32(vacc2_lo, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon() 71 vacc3_lo = vsraq_n_s32(vacc3_lo, vbicq_s32(vacc3_lo, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon() 72 vacc0_hi = vsraq_n_s32(vacc0_hi, vbicq_s32(vacc0_hi, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon() 73 vacc1_hi = vsraq_n_s32(vacc1_hi, vbicq_s32(vacc1_hi, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon() 74 vacc2_hi = vsraq_n_s32(vacc2_hi, vbicq_s32(vacc2_hi, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon() 75 vacc3_hi = vsraq_n_s32(vacc3_hi, vbicq_s32(vacc3_hi, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon() 129 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon() 130 vacc1_lo = vsraq_n_s32(vacc1_lo, vbicq_s32(vacc1_lo, vzero_shift_mask), 31); in xnn_qu8_vadd_minmax_ukernel__neon() [all …]
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-neon-ld64-x32.c | 59 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 60 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 61 vacc89AB = vsraq_n_s32(vacc89AB, vbicq_s32(vacc89AB, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 62 vaccCDEF = vsraq_n_s32(vaccCDEF, vbicq_s32(vaccCDEF, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 63 vaccGHIJ = vsraq_n_s32(vaccGHIJ, vbicq_s32(vaccGHIJ, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 64 vaccKLMN = vsraq_n_s32(vaccKLMN, vbicq_s32(vaccKLMN, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 65 vaccOPQR = vsraq_n_s32(vaccOPQR, vbicq_s32(vaccOPQR, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 66 vaccSTUV = vsraq_n_s32(vaccSTUV, vbicq_s32(vaccSTUV, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 103 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 104 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
|
D | minmax-neon-ld64-x24.c | 55 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 56 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 57 vacc89AB = vsraq_n_s32(vacc89AB, vbicq_s32(vacc89AB, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 58 vaccCDEF = vsraq_n_s32(vaccCDEF, vbicq_s32(vaccCDEF, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 59 vaccGHIJ = vsraq_n_s32(vaccGHIJ, vbicq_s32(vaccGHIJ, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 60 vaccKLMN = vsraq_n_s32(vaccKLMN, vbicq_s32(vaccKLMN, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 94 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 95 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
|
D | minmax-neon-ld64-x16.c | 51 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 52 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 53 vacc89AB = vsraq_n_s32(vacc89AB, vbicq_s32(vacc89AB, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 54 vaccCDEF = vsraq_n_s32(vaccCDEF, vbicq_s32(vaccCDEF, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 81 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 82 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
|
D | minmax-neon-ld64-x8.c | 47 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 48 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 72 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 73 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 8x16c4-minmax-neondot.c | 303 vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 304 vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 305 vacc0x89AB = vsraq_n_s32(vproduct0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 306 vacc0xCDEF = vsraq_n_s32(vproduct0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 307 vacc1x0123 = vsraq_n_s32(vproduct1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 308 vacc1x4567 = vsraq_n_s32(vproduct1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 309 vacc1x89AB = vsraq_n_s32(vproduct1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 310 vacc1xCDEF = vsraq_n_s32(vproduct1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 311 vacc2x0123 = vsraq_n_s32(vproduct2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 312 vacc2x4567 = vsraq_n_s32(vproduct2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() [all …]
|
D | 6x16c4-minmax-neondot.c | 247 vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 248 vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 249 vacc0x89AB = vsraq_n_s32(vproduct0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 250 vacc0xCDEF = vsraq_n_s32(vproduct0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 251 vacc1x0123 = vsraq_n_s32(vproduct1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 252 vacc1x4567 = vsraq_n_s32(vproduct1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 253 vacc1x89AB = vsraq_n_s32(vproduct1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 254 vacc1xCDEF = vsraq_n_s32(vproduct1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 255 vacc2x0123 = vsraq_n_s32(vproduct2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 256 vacc2x4567 = vsraq_n_s32(vproduct2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-neondot.c | 191 vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 192 vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 193 vacc0x89AB = vsraq_n_s32(vproduct0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 194 vacc0xCDEF = vsraq_n_s32(vproduct0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 195 vacc1x0123 = vsraq_n_s32(vproduct1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 196 vacc1x4567 = vsraq_n_s32(vproduct1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 197 vacc1x89AB = vsraq_n_s32(vproduct1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 198 vacc1xCDEF = vsraq_n_s32(vproduct1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 199 vacc2x0123 = vsraq_n_s32(vproduct2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 200 vacc2x4567 = vsraq_n_s32(vproduct2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() [all …]
|
D | 8x8c4-minmax-neondot.c | 217 vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 218 vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 219 vacc1x0123 = vsraq_n_s32(vproduct1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 220 vacc1x4567 = vsraq_n_s32(vproduct1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 221 vacc2x0123 = vsraq_n_s32(vproduct2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 222 vacc2x4567 = vsraq_n_s32(vproduct2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 223 vacc3x0123 = vsraq_n_s32(vproduct3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 224 vacc3x4567 = vsraq_n_s32(vproduct3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 225 vacc4x0123 = vsraq_n_s32(vproduct4x0123, vbicq_s32(vacc4x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 226 vacc4x4567 = vsraq_n_s32(vproduct4x4567, vbicq_s32(vacc4x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() [all …]
|
D | 6x8c4-minmax-neondot.c | 181 vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 182 vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 183 vacc1x0123 = vsraq_n_s32(vproduct1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 184 vacc1x4567 = vsraq_n_s32(vproduct1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 185 vacc2x0123 = vsraq_n_s32(vproduct2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 186 vacc2x4567 = vsraq_n_s32(vproduct2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 187 vacc3x0123 = vsraq_n_s32(vproduct3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 188 vacc3x4567 = vsraq_n_s32(vproduct3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 189 vacc4x0123 = vsraq_n_s32(vproduct4x0123, vbicq_s32(vacc4x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 190 vacc4x4567 = vsraq_n_s32(vproduct4x4567, vbicq_s32(vacc4x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() [all …]
|
D | 4x8c4-minmax-neondot.c | 145 vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 146 vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 147 vacc1x0123 = vsraq_n_s32(vproduct1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 148 vacc1x4567 = vsraq_n_s32(vproduct1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 149 vacc2x0123 = vsraq_n_s32(vproduct2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 150 vacc2x4567 = vsraq_n_s32(vproduct2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 151 vacc3x0123 = vsraq_n_s32(vproduct3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 152 vacc3x4567 = vsraq_n_s32(vproduct3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
|
D | 4x16c2-minmax-neon-mull-padal-dup.c | 381 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 382 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 383 vacc0x89AB = vsraq_n_s32(vacc0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 384 vacc0xCDEF = vsraq_n_s32(vacc0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 385 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 386 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 387 vacc1x89AB = vsraq_n_s32(vacc1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 388 vacc1xCDEF = vsraq_n_s32(vacc1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 389 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 390 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() [all …]
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 8x16c4-minmax-neondot.c | 327 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 328 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 329 vacc0x89AB = vsraq_n_s32(vacc0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 330 vacc0xCDEF = vsraq_n_s32(vacc0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 331 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 332 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 333 vacc1x89AB = vsraq_n_s32(vacc1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 334 vacc1xCDEF = vsraq_n_s32(vacc1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 335 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 336 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() [all …]
|
D | 6x16c4-minmax-neondot.c | 267 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 268 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 269 vacc0x89AB = vsraq_n_s32(vacc0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 270 vacc0xCDEF = vsraq_n_s32(vacc0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 271 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 272 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 273 vacc1x89AB = vsraq_n_s32(vacc1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 274 vacc1xCDEF = vsraq_n_s32(vacc1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 275 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 276 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-neondot.c | 207 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 208 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 209 vacc0x89AB = vsraq_n_s32(vacc0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 210 vacc0xCDEF = vsraq_n_s32(vacc0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 211 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 212 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 213 vacc1x89AB = vsraq_n_s32(vacc1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 214 vacc1xCDEF = vsraq_n_s32(vacc1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 215 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 216 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() [all …]
|
D | 8x8c4-minmax-neondot.c | 241 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 242 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 243 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 244 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 245 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 246 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 247 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 248 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 249 vacc4x0123 = vsraq_n_s32(vacc4x0123, vbicq_s32(vacc4x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 250 vacc4x4567 = vsraq_n_s32(vacc4x4567, vbicq_s32(vacc4x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() [all …]
|
D | 6x8c4-minmax-neondot.c | 201 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 202 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 203 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 204 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 205 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 206 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 207 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 208 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 209 vacc4x0123 = vsraq_n_s32(vacc4x0123, vbicq_s32(vacc4x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 210 vacc4x4567 = vsraq_n_s32(vacc4x4567, vbicq_s32(vacc4x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() [all …]
|
D | 4x8c4-minmax-neondot.c | 161 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 162 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 163 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 164 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 165 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 166 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 167 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 168 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
|
D | 4x16c2-minmax-neon-mull-padal-dup.c | 401 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 402 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 403 vacc0x89AB = vsraq_n_s32(vacc0x89AB, vbicq_s32(vacc0x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 404 vacc0xCDEF = vsraq_n_s32(vacc0xCDEF, vbicq_s32(vacc0xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 405 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 406 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 407 vacc1x89AB = vsraq_n_s32(vacc1x89AB, vbicq_s32(vacc1x89AB, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 408 vacc1xCDEF = vsraq_n_s32(vacc1xCDEF, vbicq_s32(vacc1xCDEF, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 409 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 410 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() [all …]
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-neon-ld64-x32.c | 71 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 72 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 73 vacc89AB = vsraq_n_s32(vacc89AB, vbicq_s32(vacc89AB, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 74 vaccCDEF = vsraq_n_s32(vaccCDEF, vbicq_s32(vaccCDEF, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 75 vaccGHIJ = vsraq_n_s32(vaccGHIJ, vbicq_s32(vaccGHIJ, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 76 vaccKLMN = vsraq_n_s32(vaccKLMN, vbicq_s32(vaccKLMN, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 77 vaccOPQR = vsraq_n_s32(vaccOPQR, vbicq_s32(vaccOPQR, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 78 vaccSTUV = vsraq_n_s32(vaccSTUV, vbicq_s32(vaccSTUV, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 120 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 121 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
|
D | minmax-neon-ld64-x24.c | 63 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 64 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 65 vacc89AB = vsraq_n_s32(vacc89AB, vbicq_s32(vacc89AB, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 66 vaccCDEF = vsraq_n_s32(vaccCDEF, vbicq_s32(vaccCDEF, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 67 vaccGHIJ = vsraq_n_s32(vaccGHIJ, vbicq_s32(vaccGHIJ, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 68 vaccKLMN = vsraq_n_s32(vaccKLMN, vbicq_s32(vaccKLMN, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 107 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 108 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
|
D | minmax-neon-ld64-x16.c | 55 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 56 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 57 vacc89AB = vsraq_n_s32(vacc89AB, vbicq_s32(vacc89AB, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 58 vaccCDEF = vsraq_n_s32(vaccCDEF, vbicq_s32(vaccCDEF, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 90 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 91 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
|
D | minmax-neon-ld64-x8.c | 47 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 48 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 77 vacc0123 = vsraq_n_s32(vacc0123, vbicq_s32(vacc0123, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 78 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
|
/external/XNNPACK/src/qu8-requantization/ |
D | q31-neon.c | 72 const int32x4_t x_adjusted_product = vsraq_n_s32(x_product, vbicq_s32(x, vshift_eq_0_mask), 31); in xnn_qu8_requantize_q31__neon() 73 const int32x4_t y_adjusted_product = vsraq_n_s32(y_product, vbicq_s32(y, vshift_eq_0_mask), 31); in xnn_qu8_requantize_q31__neon() 74 const int32x4_t z_adjusted_product = vsraq_n_s32(z_product, vbicq_s32(z, vshift_eq_0_mask), 31); in xnn_qu8_requantize_q31__neon() 75 const int32x4_t w_adjusted_product = vsraq_n_s32(w_product, vbicq_s32(w, vshift_eq_0_mask), 31); in xnn_qu8_requantize_q31__neon()
|
/external/XNNPACK/src/qs8-requantization/ |
D | q31-neon.c | 72 const int32x4_t x_adjusted_product = vsraq_n_s32(x_product, vbicq_s32(x, vshift_eq_0_mask), 31); in xnn_qs8_requantize_q31__neon() 73 const int32x4_t y_adjusted_product = vsraq_n_s32(y_product, vbicq_s32(y, vshift_eq_0_mask), 31); in xnn_qs8_requantize_q31__neon() 74 const int32x4_t z_adjusted_product = vsraq_n_s32(z_product, vbicq_s32(z, vshift_eq_0_mask), 31); in xnn_qs8_requantize_q31__neon() 75 const int32x4_t w_adjusted_product = vsraq_n_s32(w_product, vbicq_s32(w, vshift_eq_0_mask), 31); in xnn_qs8_requantize_q31__neon()
|