Home
last modified time | relevance | path

Searched refs:vacc4x89ABCDEF (Results 1 – 19 of 19) sorted by relevance

/external/XNNPACK/src/f16-gemm/gen/
D6x16-minmax-neonfp16arith-ld64.c84 float16x8_t vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
111 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
131 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
148 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc1, va4, 1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
168 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c1, vb89ABCDEFc1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
185 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc2, va4, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
205 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c2, vb89ABCDEFc2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
222 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
242 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
270 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4, vb89ABCDEF); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16-minmax-neonfp16arith-ld64.c96 float16x8_t vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
131 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
157 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
178 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc1, va4, 1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
204 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c1, vb89ABCDEFc1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
225 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc2, va4, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
251 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c2, vb89ABCDEFc2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
272 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
298 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
332 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4, vb89ABCDEF); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-gemm/gen-inc/
D6x16inc-minmax-neonfp16arith-ld64.c86 …float16x8_t vacc4x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local
113 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
133 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
150 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc1, va4, 1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
170 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c1, vb89ABCDEFc1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
187 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc2, va4, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
207 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c2, vb89ABCDEFc2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
224 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
244 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
272 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4, vb89ABCDEF); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16inc-minmax-neonfp16arith-ld64.c98 …float16x8_t vacc4x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() local
133 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
159 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
180 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc1, va4, 1); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
206 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c1, vb89ABCDEFc1); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
227 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc2, va4, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
253 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c2, vb89ABCDEFc2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
274 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
300 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
334 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4, vb89ABCDEF); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-igemm/gen/
D6x16-minmax-neonfp16arith-ld64.c76 float16x8_t vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
137 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
157 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
174 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc1, va4, 1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
194 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c1, vb89ABCDEFc1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
211 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc2, va4, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
231 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c2, vb89ABCDEFc2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
248 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
268 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
294 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4, vb89ABCDEF); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16-minmax-neonfp16arith-ld64.c84 float16x8_t vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
163 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
189 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
210 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc1, va4, 1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
236 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c1, vb89ABCDEFc1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
257 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc2, va4, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
283 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c2, vb89ABCDEFc2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
304 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
330 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
362 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4, vb89ABCDEF); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D5x16s4-minmax-fma3-broadcast.c75 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local
104 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc0, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
124 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc1, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
144 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc2, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
164 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc3, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
196 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEF, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
212 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
224 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
228 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
258 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
D5x16-minmax-fma3-broadcast.c75 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast() local
104 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEF, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast()
119 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast()
131 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast()
135 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast()
165 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast()
D5x16-minmax-avx-broadcast.c75 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast() local
104 vacc4x89ABCDEF = _mm256_add_ps(vacc4x89ABCDEF, _mm256_mul_ps(va4, vb89ABCDEF)); in xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast()
119 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast()
131 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast()
135 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast()
165 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast()
/external/XNNPACK/src/f32-gemm/gen-inc/
D5x16s4inc-minmax-fma3-broadcast.c77 __m256 vacc4x89ABCDEF = _mm256_load_ps(acc + 72); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() local
106 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc0, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
126 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc1, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
146 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc2, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
166 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc3, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
198 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEF, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
214 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
226 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
230 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
260 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
D5x16inc-minmax-avx-broadcast.c77 __m256 vacc4x89ABCDEF = _mm256_load_ps(acc + 72); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast() local
106 vacc4x89ABCDEF = _mm256_add_ps(vacc4x89ABCDEF, _mm256_mul_ps(va4, vb89ABCDEF)); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast()
121 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast()
133 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast()
137 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast()
167 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast()
D5x16inc-minmax-fma3-broadcast.c77 __m256 vacc4x89ABCDEF = _mm256_load_ps(acc + 72); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast() local
106 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEF, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast()
121 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast()
133 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast()
137 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast()
167 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D5x16s4-minmax-fma3-broadcast.c71 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local
129 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc0, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
149 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc1, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
169 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc2, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
189 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc3, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
221 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEF, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
239 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
251 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
255 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
280 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
D5x16-minmax-avx-broadcast.c71 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast() local
129 vacc4x89ABCDEF = _mm256_add_ps(vacc4x89ABCDEF, _mm256_mul_ps(va4, vb89ABCDEF)); in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast()
145 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast()
157 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast()
161 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast()
186 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast()
D5x16-minmax-fma3-broadcast.c71 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast() local
129 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEF, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast()
145 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast()
157 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast()
161 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast()
186 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast()
/external/XNNPACK/src/qs8-gemm/gen/
D6x16c4-minmax-neondot.c308 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x89AB), vacc4xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local
316 int8x16_t vout4x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc4x01234567), vacc4x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
328 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc4x89AB), vqmovn_s32(vacc4x… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local
336 …x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
D8x16c4-minmax-neondot.c380 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x89AB), vacc4xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local
392 int8x16_t vout4x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc4x01234567), vacc4x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
406 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc4x89AB), vqmovn_s32(vacc4x… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local
418 …x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
/external/XNNPACK/src/qs8-igemm/gen/
D6x16c4-minmax-neondot.c328 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x89AB), vacc4xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local
336 int8x16_t vout4x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc4x01234567), vacc4x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
348 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc4x89AB), vqmovn_s32(vacc4x… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local
356 …x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
D8x16c4-minmax-neondot.c404 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x89AB), vacc4xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local
416 int8x16_t vout4x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc4x01234567), vacc4x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
430 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc4x89AB), vqmovn_s32(vacc4x… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local
442 …x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()