Home
last modified time | relevance | path

Searched refs:vb14 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-igemm/gen/
D3x16c16-minmax-neon-mlal-padal.c141 const int8x16_t vb14 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
270 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
271 int16x8_t vprod1x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
272 int16x8_t vprod2x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
273 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
274 vprod1x14 = vmlal_s8(vprod1x14, vget_high_s8(vb14), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
275 vprod2x14 = vmlal_s8(vprod2x14, vget_high_s8(vb14), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c166 const int8x16_t vb14 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
337 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
338 int16x8_t vprod1x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
339 int16x8_t vprod2x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
340 int16x8_t vprod3x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
341 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
342 vprod1x14 = vmlal_s8(vprod1x14, vget_high_s8(vb14), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
343 vprod2x14 = vmlal_s8(vprod2x14, vget_high_s8(vb14), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
344 vprod3x14 = vmlal_s8(vprod3x14, vget_high_s8(vb14), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c116 const int8x16_t vb14 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
203 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
204 int16x8_t vprod1x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
205 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
206 vprod1x14 = vmlal_s8(vprod1x14, vget_high_s8(vb14), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c91 const int8x16_t vb14 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
136 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
137 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c173 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
174 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
175 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mull-padal.c279 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
280 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
281 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
282 const int16x8_t vprod2x14 = vmull_s8(vb14, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
283 const int16x8_t vprod3x14 = vmull_s8(vb14, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mull-padal.c226 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
227 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
228 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
229 const int16x8_t vprod2x14 = vmull_s8(vb14, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mull-padal.c120 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
121 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c312 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
313 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
314 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D1x16c8-minmax-neon-mlal-padal.c209 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
210 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c518 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
519 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
520 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
521 const int16x8_t vprod2x14 = vmull_s8(vb14, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
522 const int16x8_t vprod3x14 = vmull_s8(vb14, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c415 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
416 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
417 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
418 const int16x8_t vprod2x14 = vmull_s8(vb14, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-gemm/gen/
D3x16c16-minmax-neon-mlal-padal.c126 const int8x16_t vb14 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
255 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
256 int16x8_t vprod1x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
257 int16x8_t vprod2x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
258 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
259 vprod1x14 = vmlal_s8(vprod1x14, vget_high_s8(vb14), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
260 vprod2x14 = vmlal_s8(vprod2x14, vget_high_s8(vb14), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c149 const int8x16_t vb14 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
320 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
321 int16x8_t vprod1x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
322 int16x8_t vprod2x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
323 int16x8_t vprod3x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
324 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
325 vprod1x14 = vmlal_s8(vprod1x14, vget_high_s8(vb14), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
326 vprod2x14 = vmlal_s8(vprod2x14, vget_high_s8(vb14), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
327 vprod3x14 = vmlal_s8(vprod3x14, vget_high_s8(vb14), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c103 const int8x16_t vb14 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
190 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
191 int16x8_t vprod1x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
192 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
193 vprod1x14 = vmlal_s8(vprod1x14, vget_high_s8(vb14), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c80 const int8x16_t vb14 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
125 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
126 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D1x16c8-minmax-neon-mull-padal.c109 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
110 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c160 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
161 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
162 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mull-padal.c262 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
263 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
264 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
265 const int16x8_t vprod2x14 = vmull_s8(vb14, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
266 const int16x8_t vprod3x14 = vmull_s8(vb14, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mull-padal.c211 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
212 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
213 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
214 const int16x8_t vprod2x14 = vmull_s8(vb14, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c400 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
401 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
402 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
403 const int16x8_t vprod2x14 = vmull_s8(vb14, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c299 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
300 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
301 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D1x16c8-minmax-neon-mlal-padal.c198 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
199 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c501 const int8x8_t vb14 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
502 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
503 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
504 const int16x8_t vprod2x14 = vmull_s8(vb14, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
505 const int16x8_t vprod3x14 = vmull_s8(vb14, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()