/external/XNNPACK/src/f16-gemm/gen/ |
D | 6x16-minmax-neonfp16arith-ld64.c | 84 float16x8_t vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local 111 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 131 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 148 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc1, va4, 1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 168 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c1, vb89ABCDEFc1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 185 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc2, va4, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 205 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c2, vb89ABCDEFc2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 222 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 242 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 270 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4, vb89ABCDEF); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() [all …]
|
D | 8x16-minmax-neonfp16arith-ld64.c | 96 float16x8_t vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local 131 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 157 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 178 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc1, va4, 1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 204 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c1, vb89ABCDEFc1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 225 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc2, va4, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 251 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c2, vb89ABCDEFc2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 272 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 298 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 332 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4, vb89ABCDEF); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f16-gemm/gen-inc/ |
D | 6x16inc-minmax-neonfp16arith-ld64.c | 86 …float16x8_t vacc4x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local 113 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 133 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 150 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc1, va4, 1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 170 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c1, vb89ABCDEFc1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 187 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc2, va4, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 207 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c2, vb89ABCDEFc2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 224 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 244 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 272 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4, vb89ABCDEF); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() [all …]
|
D | 8x16inc-minmax-neonfp16arith-ld64.c | 98 …float16x8_t vacc4x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() local 133 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 159 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 180 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc1, va4, 1); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 206 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c1, vb89ABCDEFc1); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 227 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc2, va4, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 253 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c2, vb89ABCDEFc2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 274 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 300 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 334 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4, vb89ABCDEF); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f16-igemm/gen/ |
D | 6x16-minmax-neonfp16arith-ld64.c | 76 float16x8_t vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local 137 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 157 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 174 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc1, va4, 1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 194 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c1, vb89ABCDEFc1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 211 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc2, va4, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 231 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c2, vb89ABCDEFc2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 248 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 268 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 294 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4, vb89ABCDEF); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() [all …]
|
D | 8x16-minmax-neonfp16arith-ld64.c | 84 float16x8_t vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local 163 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 189 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 210 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc1, va4, 1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 236 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c1, vb89ABCDEFc1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 257 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc2, va4, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 283 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c2, vb89ABCDEFc2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 304 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc3, va4, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 330 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 362 vacc4x89ABCDEF = vfmaq_f16(vacc4x89ABCDEF, va4, vb89ABCDEF); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 5x16s4-minmax-fma3-broadcast.c | 75 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local 104 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc0, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 124 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc1, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 144 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc2, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 164 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc3, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 196 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEF, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 212 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 224 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 228 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 258 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
|
D | 5x16-minmax-fma3-broadcast.c | 75 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast() local 104 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEF, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast() 119 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast() 131 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast() 135 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast() 165 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast()
|
D | 5x16-minmax-avx-broadcast.c | 75 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast() local 104 vacc4x89ABCDEF = _mm256_add_ps(vacc4x89ABCDEF, _mm256_mul_ps(va4, vb89ABCDEF)); in xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast() 119 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast() 131 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast() 135 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast() 165 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 5x16s4inc-minmax-fma3-broadcast.c | 77 __m256 vacc4x89ABCDEF = _mm256_load_ps(acc + 72); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() local 106 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc0, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 126 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc1, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 146 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc2, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 166 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc3, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 198 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEF, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 214 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 226 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 230 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 260 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
|
D | 5x16inc-minmax-avx-broadcast.c | 77 __m256 vacc4x89ABCDEF = _mm256_load_ps(acc + 72); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast() local 106 vacc4x89ABCDEF = _mm256_add_ps(vacc4x89ABCDEF, _mm256_mul_ps(va4, vb89ABCDEF)); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast() 121 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast() 133 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast() 137 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast() 167 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast()
|
D | 5x16inc-minmax-fma3-broadcast.c | 77 __m256 vacc4x89ABCDEF = _mm256_load_ps(acc + 72); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast() local 106 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEF, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast() 121 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast() 133 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast() 137 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast() 167 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 5x16s4-minmax-fma3-broadcast.c | 71 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local 129 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc0, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 149 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc1, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 169 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc2, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 189 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc3, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 221 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEF, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 239 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 251 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 255 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 280 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
|
D | 5x16-minmax-avx-broadcast.c | 71 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast() local 129 vacc4x89ABCDEF = _mm256_add_ps(vacc4x89ABCDEF, _mm256_mul_ps(va4, vb89ABCDEF)); in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast() 145 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast() 157 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast() 161 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast() 186 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast()
|
D | 5x16-minmax-fma3-broadcast.c | 71 __m256 vacc4x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast() local 129 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEF, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast() 145 vacc4x89ABCDEF = _mm256_min_ps(vacc4x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast() 157 vacc4x89ABCDEF = _mm256_max_ps(vacc4x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast() 161 _mm256_storeu_ps(c4 + 8, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast() 186 vacc4x01234567 = vacc4x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 6x16c4-minmax-neondot.c | 308 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x89AB), vacc4xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 316 int8x16_t vout4x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc4x01234567), vacc4x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 328 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc4x89AB), vqmovn_s32(vacc4x… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 336 …x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 380 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x89AB), vacc4xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 392 int8x16_t vout4x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc4x01234567), vacc4x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 406 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc4x89AB), vqmovn_s32(vacc4x… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 418 …x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 6x16c4-minmax-neondot.c | 328 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x89AB), vacc4xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 336 int8x16_t vout4x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc4x01234567), vacc4x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 348 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc4x89AB), vqmovn_s32(vacc4x… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 356 …x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 404 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x89AB), vacc4xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 416 int8x16_t vout4x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc4x01234567), vacc4x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 430 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc4x89AB), vqmovn_s32(vacc4x… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 442 …x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
|