Home
last modified time | relevance | path

Searched refs:vb89ABCDEFc3 (Results 1 – 25 of 40) sorted by relevance

12

/external/XNNPACK/src/f16-gemm/gen/
D6x16-minmax-neonfp16arith-ld64.c209 …const float16x8_t vb89ABCDEFc3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
218 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc3, va0, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
219 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc3, va1, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
220 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
221 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
222 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
223 vacc5x89ABCDEF = vfmaq_lane_f16(vacc5x89ABCDEF, vb89ABCDEFc3, va5, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
238 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
239 vacc1x89ABCDEF = vfmaq_f16(vacc1x89ABCDEF, va1c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
240 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16-minmax-neonfp16arith-ld64.c257 …const float16x8_t vb89ABCDEFc3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
268 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc3, va0, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
269 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc3, va1, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
270 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
271 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
272 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
273 vacc5x89ABCDEF = vfmaq_lane_f16(vacc5x89ABCDEF, vb89ABCDEFc3, va5, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
274 vacc6x89ABCDEF = vfmaq_lane_f16(vacc6x89ABCDEF, vb89ABCDEFc3, va6, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
275 vacc7x89ABCDEF = vfmaq_lane_f16(vacc7x89ABCDEF, vb89ABCDEFc3, va7, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
294 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c161 …const float16x8_t vb89ABCDEFc3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
168 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc3, va0, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
169 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc3, va1, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
170 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
171 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
182 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
183 vacc1x89ABCDEF = vfmaq_f16(vacc1x89ABCDEF, va1c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
184 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
185 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x16-minmax-neonfp16arith-ld64.c89 …const float16x8_t vb89ABCDEFc3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() local
93 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc3, va0, 3); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
98 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
/external/XNNPACK/src/f16-gemm/gen-inc/
D8x16inc-minmax-neonfp16arith-ld64.c259 …const float16x8_t vb89ABCDEFc3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() local
270 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
271 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc3, va1, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
272 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
273 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
274 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
275 vacc5x89ABCDEF = vfmaq_lane_f16(vacc5x89ABCDEF, vb89ABCDEFc3, va5, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
276 vacc6x89ABCDEF = vfmaq_lane_f16(vacc6x89ABCDEF, vb89ABCDEFc3, va6, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
277 vacc7x89ABCDEF = vfmaq_lane_f16(vacc7x89ABCDEF, vb89ABCDEFc3, va7, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
296 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D6x16inc-minmax-neonfp16arith-ld64.c211 …const float16x8_t vb89ABCDEFc3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local
220 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
221 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc3, va1, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
222 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
223 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
224 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
225 vacc5x89ABCDEF = vfmaq_lane_f16(vacc5x89ABCDEF, vb89ABCDEFc3, va5, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
240 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
241 vacc1x89ABCDEF = vfmaq_f16(vacc1x89ABCDEF, va1c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
242 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D4x16inc-minmax-neonfp16arith-ld64.c163 …const float16x8_t vb89ABCDEFc3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() local
170 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
171 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc3, va1, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
172 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
173 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
184 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
185 vacc1x89ABCDEF = vfmaq_f16(vacc1x89ABCDEF, va1c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
186 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
187 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x16inc-minmax-neonfp16arith-ld64.c91 …const float16x8_t vb89ABCDEFc3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() local
95 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
100 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
/external/XNNPACK/src/f16-igemm/gen/
D8x16-minmax-neonfp16arith-ld64.c289 …const float16x8_t vb89ABCDEFc3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
300 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc3, va0, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
301 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc3, va1, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
302 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
303 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
304 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
305 vacc5x89ABCDEF = vfmaq_lane_f16(vacc5x89ABCDEF, vb89ABCDEFc3, va5, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
306 vacc6x89ABCDEF = vfmaq_lane_f16(vacc6x89ABCDEF, vb89ABCDEFc3, va6, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
307 vacc7x89ABCDEF = vfmaq_lane_f16(vacc7x89ABCDEF, vb89ABCDEFc3, va7, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
326 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c235 …const float16x8_t vb89ABCDEFc3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
244 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc3, va0, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
245 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc3, va1, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
246 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
247 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
248 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
249 vacc5x89ABCDEF = vfmaq_lane_f16(vacc5x89ABCDEF, vb89ABCDEFc3, va5, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
264 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
265 vacc1x89ABCDEF = vfmaq_f16(vacc1x89ABCDEF, va1c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
266 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c181 …const float16x8_t vb89ABCDEFc3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
188 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc3, va0, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
189 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc3, va1, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
190 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
191 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc3, va3, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
202 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
203 vacc1x89ABCDEF = vfmaq_f16(vacc1x89ABCDEF, va1c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
204 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
205 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x16-minmax-neonfp16arith-ld64.c100 …const float16x8_t vb89ABCDEFc3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() local
104 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc3, va0, 3); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
109 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
/external/XNNPACK/src/f32-gemm/gen/
D4x16s4-minmax-fma3-broadcast.c134 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
140 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
141 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc3, vacc1x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
142 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
143 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc3, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
D5x16s4-minmax-fma3-broadcast.c153 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local
160 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
161 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc3, vacc1x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
162 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
163 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc3, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
164 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc3, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
D3x16s4-minmax-fma3-broadcast.c115 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() local
120 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
121 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc3, vacc1x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
122 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
D1x16s4-minmax-fma3-broadcast.c77 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() local
80 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-gemm/gen-inc/
D4x16s4inc-minmax-fma3-broadcast.c136 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local
142 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
143 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc3, vacc1x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
144 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
145 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc3, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
D5x16s4inc-minmax-fma3-broadcast.c155 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() local
162 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
163 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc3, vacc1x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
164 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
165 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc3, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
166 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc3, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
D3x16s4inc-minmax-fma3-broadcast.c117 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() local
122 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
123 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc3, vacc1x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
124 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
D1x16s4inc-minmax-fma3-broadcast.c79 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() local
82 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D5x16s4-minmax-fma3-broadcast.c178 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local
185 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
186 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc3, vacc1x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
187 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
188 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc3, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
189 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc3, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
D4x16s4-minmax-fma3-broadcast.c156 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() local
162 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
163 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc3, vacc1x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
164 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
165 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc3, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
D3x16s4-minmax-fma3-broadcast.c134 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() local
139 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
140 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc3, vacc1x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
141 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
D1x16s4-minmax-fma3-broadcast.c90 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() local
93 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc3, vacc0x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast()
/external/XNNPACK/src/qs8-igemm/gen/
D4x16-minmax-neon-mull-addw-dup.c200 … const int8x8_t vb89ABCDEFc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup() local
202 const int16x8_t vprod0x89ABCDEFc3 = vmull_s8(vb89ABCDEFc3, vdup_lane_s8(va0, 3)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
205 const int16x8_t vprod1x89ABCDEFc3 = vmull_s8(vb89ABCDEFc3, vdup_lane_s8(va1, 3)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
208 const int16x8_t vprod2x89ABCDEFc3 = vmull_s8(vb89ABCDEFc3, vdup_lane_s8(va2, 3)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
211 const int16x8_t vprod3x89ABCDEFc3 = vmull_s8(vb89ABCDEFc3, vdup_lane_s8(va3, 3)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
423 … const int8x8_t vb89ABCDEFc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup() local
428 const int16x8_t vprod0x89ABCDEFc3 = vmull_s8(vb89ABCDEFc3, vdup_lane_s8(va0, 3)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
434 const int16x8_t vprod1x89ABCDEFc3 = vmull_s8(vb89ABCDEFc3, vdup_lane_s8(va1, 3)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
440 const int16x8_t vprod2x89ABCDEFc3 = vmull_s8(vb89ABCDEFc3, vdup_lane_s8(va2, 3)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
446 const int16x8_t vprod3x89ABCDEFc3 = vmull_s8(vb89ABCDEFc3, vdup_lane_s8(va3, 3)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()

12