Home
last modified time | relevance | path

Searched refs:vb10 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-igemm/gen/
D3x16c16-minmax-neon-mlal-padal.c137 const int8x16_t vb10 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
234 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
235 int16x8_t vprod1x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
236 int16x8_t vprod2x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
237 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
238 vprod1x10 = vmlal_s8(vprod1x10, vget_high_s8(vb10), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
239 vprod2x10 = vmlal_s8(vprod2x10, vget_high_s8(vb10), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c162 const int8x16_t vb10 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
289 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
290 int16x8_t vprod1x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
291 int16x8_t vprod2x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
292 int16x8_t vprod3x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
293 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
294 vprod1x10 = vmlal_s8(vprod1x10, vget_high_s8(vb10), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
295 vprod2x10 = vmlal_s8(vprod2x10, vget_high_s8(vb10), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
296 vprod3x10 = vmlal_s8(vprod3x10, vget_high_s8(vb10), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c112 const int8x16_t vb10 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
179 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
180 int16x8_t vprod1x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
181 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
182 vprod1x10 = vmlal_s8(vprod1x10, vget_high_s8(vb10), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c87 const int8x16_t vb10 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
124 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
125 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c153 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
154 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
155 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mull-padal.c243 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
244 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
245 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
246 const int16x8_t vprod2x10 = vmull_s8(vb10, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
247 const int16x8_t vprod3x10 = vmull_s8(vb10, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mull-padal.c198 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
199 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
200 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
201 const int16x8_t vprod2x10 = vmull_s8(vb10, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mull-padal.c108 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
109 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c292 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
293 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
294 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D1x16c8-minmax-neon-mlal-padal.c197 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
198 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c482 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
483 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
484 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
485 const int16x8_t vprod2x10 = vmull_s8(vb10, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
486 const int16x8_t vprod3x10 = vmull_s8(vb10, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c387 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
388 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
389 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
390 const int16x8_t vprod2x10 = vmull_s8(vb10, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-gemm/gen/
D3x16c16-minmax-neon-mlal-padal.c122 const int8x16_t vb10 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
219 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
220 int16x8_t vprod1x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
221 int16x8_t vprod2x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
222 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
223 vprod1x10 = vmlal_s8(vprod1x10, vget_high_s8(vb10), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
224 vprod2x10 = vmlal_s8(vprod2x10, vget_high_s8(vb10), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c145 const int8x16_t vb10 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
272 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
273 int16x8_t vprod1x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
274 int16x8_t vprod2x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
275 int16x8_t vprod3x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
276 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
277 vprod1x10 = vmlal_s8(vprod1x10, vget_high_s8(vb10), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
278 vprod2x10 = vmlal_s8(vprod2x10, vget_high_s8(vb10), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
279 vprod3x10 = vmlal_s8(vprod3x10, vget_high_s8(vb10), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c99 const int8x16_t vb10 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
166 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
167 int16x8_t vprod1x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
168 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
169 vprod1x10 = vmlal_s8(vprod1x10, vget_high_s8(vb10), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c76 const int8x16_t vb10 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
113 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
114 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D1x16c8-minmax-neon-mull-padal.c97 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
98 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c140 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
141 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
142 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mull-padal.c226 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
227 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
228 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
229 const int16x8_t vprod2x10 = vmull_s8(vb10, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
230 const int16x8_t vprod3x10 = vmull_s8(vb10, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mull-padal.c183 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
184 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
185 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
186 const int16x8_t vprod2x10 = vmull_s8(vb10, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c372 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
373 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
374 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
375 const int16x8_t vprod2x10 = vmull_s8(vb10, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c279 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
280 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
281 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D1x16c8-minmax-neon-mlal-padal.c186 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
187 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c465 const int8x8_t vb10 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
466 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
467 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
468 const int16x8_t vprod2x10 = vmull_s8(vb10, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
469 const int16x8_t vprod3x10 = vmull_s8(vb10, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()