Home
last modified time | relevance | path

Searched refs:vacc5x01234567 (Results 1 – 25 of 37) sorted by relevance

12

/external/XNNPACK/src/f16-gemm/gen/
D6x8-minmax-neonfp16arith-ld64.c80 float16x8_t vacc5x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
99 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
113 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
123 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c1, va5, 1); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
137 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
147 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
161 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
171 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
185 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
206 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5, vb01234567); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D8x8-minmax-neonfp16arith-ld64.c92 float16x8_t vacc5x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
115 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
133 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
145 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c1, va5, 1); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
163 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
175 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
193 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
205 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
223 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
248 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5, vb01234567); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c85 float16x8_t vacc5x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
106 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
126 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
143 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c1, va5, 1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
163 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
180 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
200 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
217 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
237 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
265 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5, vb01234567); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16-minmax-neonfp16arith-ld64.c97 float16x8_t vacc5x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
124 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
150 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
171 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c1, va5, 1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
197 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
218 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
244 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
265 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
291 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
325 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5, vb01234567); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-gemm/gen-inc/
D6x8inc-minmax-neonfp16arith-ld64.c82 …float16x8_t vacc5x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64() local
101 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
115 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
125 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c1, va5, 1); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
139 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
149 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
163 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
173 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
187 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
208 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5, vb01234567); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D8x8inc-minmax-neonfp16arith-ld64.c94 …float16x8_t vacc5x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() local
117 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
135 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
147 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c1, va5, 1); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
165 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
177 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
195 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
207 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
225 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
250 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5, vb01234567); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x16inc-minmax-neonfp16arith-ld64.c87 …float16x8_t vacc5x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local
108 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
128 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
145 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c1, va5, 1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
165 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
182 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
202 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
219 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
239 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
267 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5, vb01234567); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16inc-minmax-neonfp16arith-ld64.c99 …float16x8_t vacc5x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() local
126 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
152 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
173 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c1, va5, 1); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
199 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
220 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
246 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
267 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
293 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
327 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5, vb01234567); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-igemm/gen/
D6x8-minmax-neonfp16arith-ld64.c72 float16x8_t vacc5x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
125 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
139 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
149 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c1, va5, 1); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
163 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
173 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
187 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
197 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
211 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
230 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5, vb01234567); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D8x8-minmax-neonfp16arith-ld64.c80 float16x8_t vacc5x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
147 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
165 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
177 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c1, va5, 1); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
195 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
207 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
225 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
237 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
255 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
278 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5, vb01234567); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c77 float16x8_t vacc5x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
132 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
152 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
169 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c1, va5, 1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
189 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
206 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
226 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
243 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
263 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
289 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5, vb01234567); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16-minmax-neonfp16arith-ld64.c85 float16x8_t vacc5x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
156 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
182 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
203 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c1, va5, 1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
229 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
250 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c2, va5, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
276 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
297 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
323 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
355 vacc5x01234567 = vfmaq_f16(vacc5x01234567, va5, vb01234567); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f32-gemm/gen-inc/
D6x8inc-minmax-fma3-broadcast.c79 __m256 vacc5x01234567 = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast() local
105 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast()
116 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast()
124 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast()
127 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast()
149 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast()
163 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast()
D6x8inc-minmax-avx-broadcast.c79 __m256 vacc5x01234567 = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast() local
105 vacc5x01234567 = _mm256_add_ps(vacc5x01234567, _mm256_mul_ps(va5, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast()
116 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast()
124 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast()
127 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast()
149 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast()
163 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast()
D7x8inc-minmax-fma3-broadcast.c85 __m256 vacc5x01234567 = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast() local
114 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
126 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
135 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
141 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
165 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
181 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
D7x8inc-minmax-avx-broadcast.c85 __m256 vacc5x01234567 = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast() local
114 vacc5x01234567 = _mm256_add_ps(vacc5x01234567, _mm256_mul_ps(va5, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
126 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
135 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
141 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
165 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
181 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
D8x8inc-minmax-fma3-broadcast.c91 __m256 vacc5x01234567 = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() local
123 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
136 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
146 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
155 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
181 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
199 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
/external/XNNPACK/src/f32-gemm/gen/
D6x8-minmax-avx-broadcast.c77 __m256 vacc5x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast() local
103 vacc5x01234567 = _mm256_add_ps(vacc5x01234567, _mm256_mul_ps(va5, vb01234567)); in xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast()
114 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast()
122 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast()
125 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast()
147 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast()
161 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast()
D6x8-minmax-fma3-broadcast.c77 __m256 vacc5x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast() local
103 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast()
114 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast()
122 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast()
125 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast()
147 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast()
161 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast()
D7x8-minmax-avx-broadcast.c83 __m256 vacc5x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast() local
112 vacc5x01234567 = _mm256_add_ps(vacc5x01234567, _mm256_mul_ps(va5, vb01234567)); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
124 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
133 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
139 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
163 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
179 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
D7x8-minmax-fma3-broadcast.c83 __m256 vacc5x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast() local
112 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
124 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
133 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
139 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
163 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
179 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D6x8-minmax-avx-broadcast.c71 __m256 vacc5x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast() local
131 vacc5x01234567 = _mm256_add_ps(vacc5x01234567, _mm256_mul_ps(va5, vb01234567)); in xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast()
143 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast()
151 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast()
154 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast()
170 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast()
184 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast()
D6x8-minmax-fma3-broadcast.c71 __m256 vacc5x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast() local
131 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast()
143 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast()
151 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast()
154 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast()
170 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast()
184 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast()
D7x8-minmax-avx-broadcast.c75 __m256 vacc5x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast() local
143 vacc5x01234567 = _mm256_add_ps(vacc5x01234567, _mm256_mul_ps(va5, vb01234567)); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
156 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
165 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
171 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
188 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
204 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
D7x8-minmax-fma3-broadcast.c75 __m256 vacc5x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast() local
143 vacc5x01234567 = _mm256_fmadd_ps(va5, vb01234567, vacc5x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
156 vacc5x01234567 = _mm256_min_ps(vacc5x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
165 vacc5x01234567 = _mm256_max_ps(vacc5x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
171 _mm256_storeu_ps(c5, vacc5x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
188 __m128 vacc5x0123 = _mm256_castps256_ps128(vacc5x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
204 vacc5x0123 = _mm256_extractf128_ps(vacc5x01234567, 1); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()

12