/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c8-minmax-neon-mlal-padal.c | 137 int16x8_t vprod3x0 = vmull_s8(vb0x0, va3x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 141 vprod3x0 = vmlal_s8(vprod3x0, vb0x1, va3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 145 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 252 const int16x8_t vprod3x0 = vmull_s8(vb0, va3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 256 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 132 int16x8_t vprod3x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 136 vprod3x0 = vmlal_s8(vprod3x0, vget_high_s8(vb0), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 140 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 177 int16x8_t vprod3x0 = vmull_s8(vb0x0, va3x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 181 vprod3x0 = vmlal_s8(vprod3x0, vb0x1, va3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 185 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 396 const int16x8_t vprod3x0 = vmull_s8(vb0, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 400 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 172 int16x8_t vprod3x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 176 vprod3x0 = vmlal_s8(vprod3x0, vget_high_s8(vb0), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 180 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 125 const int16x8_t vprod3x0 = vmull_s8(vb0, va3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 129 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 157 const int16x8_t vprod3x0 = vmull_s8(vb0, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 161 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c8-minmax-neon-mlal-padal.c | 120 int16x8_t vprod3x0 = vmull_s8(vb0x0, va3x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 124 vprod3x0 = vmlal_s8(vprod3x0, vb0x1, va3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 128 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 235 const int16x8_t vprod3x0 = vmull_s8(vb0, va3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 239 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 115 int16x8_t vprod3x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 119 vprod3x0 = vmlal_s8(vprod3x0, vget_high_s8(vb0), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 123 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 160 int16x8_t vprod3x0 = vmull_s8(vb0x0, va3x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 164 vprod3x0 = vmlal_s8(vprod3x0, vb0x1, va3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 168 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 379 const int16x8_t vprod3x0 = vmull_s8(vb0, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 383 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 155 int16x8_t vprod3x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 159 vprod3x0 = vmlal_s8(vprod3x0, vget_high_s8(vb0), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 163 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 108 const int16x8_t vprod3x0 = vmull_s8(vb0, va3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 112 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 140 const int16x8_t vprod3x0 = vmull_s8(vb0, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 144 vacc3x0 = vpadalq_s16(vacc3x0, vprod3x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|