/external/XNNPACK/src/f32-dwconv/gen/ |
D | up16x25-minmax-fma3.c | 168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() local 178 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi0x89ABCDEF, vk0x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 187 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi1x89ABCDEF, vk1x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 196 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi2x89ABCDEF, vk2x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 205 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi3x89ABCDEF, vk3x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 214 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi4x89ABCDEF, vk4x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 223 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi5x89ABCDEF, vk5x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 232 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi6x89ABCDEF, vk6x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 241 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi7x89ABCDEF, vk7x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 250 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi8x89ABCDEF, vk8x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() [all …]
|
D | up16x25-minmax-avx.c | 168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() local 178 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi0x89ABCDEF, vk0x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 187 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi1x89ABCDEF, vk1x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 196 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi2x89ABCDEF, vk2x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 205 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi3x89ABCDEF, vk3x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 214 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi4x89ABCDEF, vk4x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 223 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi5x89ABCDEF, vk5x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 232 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi6x89ABCDEF, vk6x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 241 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi7x89ABCDEF, vk7x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 250 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi8x89ABCDEF, vk8x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() [all …]
|
D | up16x9-minmax-fma3.c | 88 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local 98 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi0x89ABCDEF, vk0x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 107 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi1x89ABCDEF, vk1x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 116 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi2x89ABCDEF, vk2x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 125 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi3x89ABCDEF, vk3x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 134 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi4x89ABCDEF, vk4x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 143 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi5x89ABCDEF, vk5x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 152 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi6x89ABCDEF, vk6x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 161 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi7x89ABCDEF, vk7x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 170 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi8x89ABCDEF, vk8x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() [all …]
|
D | up16x9-minmax-avx.c | 88 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() local 98 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi0x89ABCDEF, vk0x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() 107 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi1x89ABCDEF, vk1x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() 116 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi2x89ABCDEF, vk2x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() 125 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi3x89ABCDEF, vk3x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() 134 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi4x89ABCDEF, vk4x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() 143 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi5x89ABCDEF, vk5x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() 152 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi6x89ABCDEF, vk6x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() 161 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi7x89ABCDEF, vk7x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() 170 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi8x89ABCDEF, vk8x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() [all …]
|
D | up16x4-minmax-fma3.c | 63 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() local 73 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi0x89ABCDEF, vk0x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() 82 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi1x89ABCDEF, vk1x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() 91 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi2x89ABCDEF, vk2x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() 100 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi3x89ABCDEF, vk3x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() 106 __m256 vacc89ABCDEF = _mm256_max_ps(vacc89ABCDEFp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
|
D | up16x4-minmax-avx.c | 63 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() local 73 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi0x89ABCDEF, vk0x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() 82 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi1x89ABCDEF, vk1x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() 91 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi2x89ABCDEF, vk2x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() 100 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi3x89ABCDEF, vk3x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() 106 __m256 vacc89ABCDEF = _mm256_max_ps(vacc89ABCDEFp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
|
D | up16x9-minmax-fma3-acc2.c | 88 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local 98 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi0x89ABCDEF, vk0x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 116 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi2x89ABCDEF, vk2x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 134 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi4x89ABCDEF, vk4x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 152 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi6x89ABCDEF, vk6x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 170 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi8x89ABCDEF, vk8x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 176 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, vacc89ABCDEFp1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 179 __m256 vacc89ABCDEF = _mm256_max_ps(vacc89ABCDEFp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
|
D | up16x25-minmax-fma3-acc2.c | 168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() local 178 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi0x89ABCDEF, vk0x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 196 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi2x89ABCDEF, vk2x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 214 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi4x89ABCDEF, vk4x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 232 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi6x89ABCDEF, vk6x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 250 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi8x89ABCDEF, vk8x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 268 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi10x89ABCDEF, vk10x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 286 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi12x89ABCDEF, vk12x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 304 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi14x89ABCDEF, vk14x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 322 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi16x89ABCDEF, vk16x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() [all …]
|
D | up16x25-minmax-avx-acc2.c | 168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() local 178 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi0x89ABCDEF, vk0x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 196 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi2x89ABCDEF, vk2x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 214 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi4x89ABCDEF, vk4x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 232 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi6x89ABCDEF, vk6x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 250 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi8x89ABCDEF, vk8x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 268 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi10x89ABCDEF, vk10x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 286 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi12x89ABCDEF, vk12x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 304 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi14x89ABCDEF, vk14x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 322 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi16x89ABCDEF, vk16x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() [all …]
|
D | up16x4-minmax-fma3-acc2.c | 63 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local 73 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi0x89ABCDEF, vk0x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() 91 vacc89ABCDEFp0 = _mm256_fmadd_ps(vi2x89ABCDEF, vk2x89ABCDEF, vacc89ABCDEFp0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() 106 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, vacc89ABCDEFp1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() 109 __m256 vacc89ABCDEF = _mm256_max_ps(vacc89ABCDEFp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2()
|
D | up16x4-minmax-avx-acc2.c | 63 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local 73 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi0x89ABCDEF, vk0x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() 91 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi2x89ABCDEF, vk2x89ABCDEF)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() 106 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, vacc89ABCDEFp1); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() 109 __m256 vacc89ABCDEF = _mm256_max_ps(vacc89ABCDEFp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2()
|
/external/XNNPACK/src/f16-dwconv/gen/ |
D | up16x25-minmax-neonfp16arith.c | 168 float16x8_t vacc89ABCDEFp0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() local 176 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi0x89ABCDEF, vk0x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 183 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi1x89ABCDEF, vk1x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 190 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi2x89ABCDEF, vk2x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 197 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi3x89ABCDEF, vk3x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 204 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi4x89ABCDEF, vk4x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 211 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi5x89ABCDEF, vk5x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 218 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi6x89ABCDEF, vk6x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 225 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi7x89ABCDEF, vk7x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() 232 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi8x89ABCDEF, vk8x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() [all …]
|
D | up16x9-minmax-neonfp16arith.c | 88 float16x8_t vacc89ABCDEFp0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() local 96 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi0x89ABCDEF, vk0x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 103 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi1x89ABCDEF, vk1x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 110 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi2x89ABCDEF, vk2x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 117 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi3x89ABCDEF, vk3x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 124 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi4x89ABCDEF, vk4x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 131 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi5x89ABCDEF, vk5x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 138 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi6x89ABCDEF, vk6x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 145 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi7x89ABCDEF, vk7x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() 152 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi8x89ABCDEF, vk8x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() [all …]
|
D | up16x4-minmax-neonfp16arith.c | 63 float16x8_t vacc89ABCDEFp0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() local 71 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi0x89ABCDEF, vk0x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 78 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi1x89ABCDEF, vk1x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 85 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi2x89ABCDEF, vk2x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 92 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi3x89ABCDEF, vk3x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 96 float16x8_t vacc89ABCDEF = vmaxq_f16(vacc89ABCDEFp0, vmin); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
|
D | up16x25-minmax-neonfp16arith-acc2.c | 168 float16x8_t vacc89ABCDEFp0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() local 176 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi0x89ABCDEF, vk0x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 190 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi2x89ABCDEF, vk2x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 204 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi4x89ABCDEF, vk4x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 218 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi6x89ABCDEF, vk6x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 232 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi8x89ABCDEF, vk8x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 246 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi10x89ABCDEF, vk10x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 260 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi12x89ABCDEF, vk12x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 274 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi14x89ABCDEF, vk14x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 288 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi16x89ABCDEF, vk16x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() [all …]
|
D | up16x9-minmax-neonfp16arith-acc2.c | 88 float16x8_t vacc89ABCDEFp0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local 96 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi0x89ABCDEF, vk0x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 110 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi2x89ABCDEF, vk2x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 124 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi4x89ABCDEF, vk4x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 138 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi6x89ABCDEF, vk6x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 152 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi8x89ABCDEF, vk8x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 156 vacc89ABCDEFp0 = vaddq_f16(vacc89ABCDEFp0, vacc89ABCDEFp1); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 159 float16x8_t vacc89ABCDEF = vmaxq_f16(vacc89ABCDEFp0, vmin); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
|
D | up16x4-minmax-neonfp16arith-acc2.c | 63 float16x8_t vacc89ABCDEFp0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local 71 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi0x89ABCDEF, vk0x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 85 vacc89ABCDEFp0 = vfmaq_f16(vacc89ABCDEFp0, vi2x89ABCDEF, vk2x89ABCDEF); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 96 vacc89ABCDEFp0 = vaddq_f16(vacc89ABCDEFp0, vacc89ABCDEFp1); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 99 float16x8_t vacc89ABCDEF = vmaxq_f16(vacc89ABCDEFp0, vmin); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2()
|