/external/XNNPACK/src/f32-dwconv/gen/ |
D | up32x25-minmax-avx512f.c | 167 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local 177 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi0xGHIJKLMNOPQRSTUV, vk0xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 186 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi1xGHIJKLMNOPQRSTUV, vk1xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 195 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi2xGHIJKLMNOPQRSTUV, vk2xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 204 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi3xGHIJKLMNOPQRSTUV, vk3xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 213 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi4xGHIJKLMNOPQRSTUV, vk4xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 222 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi5xGHIJKLMNOPQRSTUV, vk5xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 231 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi6xGHIJKLMNOPQRSTUV, vk6xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 240 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi7xGHIJKLMNOPQRSTUV, vk7xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 249 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi8xGHIJKLMNOPQRSTUV, vk8xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() [all …]
|
D | up32x9-minmax-avx512f.c | 87 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local 97 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi0xGHIJKLMNOPQRSTUV, vk0xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 106 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi1xGHIJKLMNOPQRSTUV, vk1xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 115 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi2xGHIJKLMNOPQRSTUV, vk2xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 124 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi3xGHIJKLMNOPQRSTUV, vk3xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 133 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi4xGHIJKLMNOPQRSTUV, vk4xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 142 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi5xGHIJKLMNOPQRSTUV, vk5xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 151 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi6xGHIJKLMNOPQRSTUV, vk6xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 160 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi7xGHIJKLMNOPQRSTUV, vk7xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 169 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi8xGHIJKLMNOPQRSTUV, vk8xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() [all …]
|
D | up32x4-minmax-avx512f.c | 62 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() local 72 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi0xGHIJKLMNOPQRSTUV, vk0xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 81 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi1xGHIJKLMNOPQRSTUV, vk1xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 90 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi2xGHIJKLMNOPQRSTUV, vk2xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 99 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi3xGHIJKLMNOPQRSTUV, vk3xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 105 __m512 vaccGHIJKLMNOPQRSTUV = _mm512_max_ps(vaccGHIJKLMNOPQRSTUVp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f()
|
D | up32x9-minmax-avx512f-acc2.c | 87 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local 97 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi0xGHIJKLMNOPQRSTUV, vk0xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 115 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi2xGHIJKLMNOPQRSTUV, vk2xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 133 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi4xGHIJKLMNOPQRSTUV, vk4xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 151 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi6xGHIJKLMNOPQRSTUV, vk6xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 169 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi8xGHIJKLMNOPQRSTUV, vk8xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 175 vaccGHIJKLMNOPQRSTUVp0 = _mm512_add_ps(vaccGHIJKLMNOPQRSTUVp0, vaccGHIJKLMNOPQRSTUVp1); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 178 __m512 vaccGHIJKLMNOPQRSTUV = _mm512_max_ps(vaccGHIJKLMNOPQRSTUVp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
|
D | up32x25-minmax-avx512f-acc2.c | 167 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local 177 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi0xGHIJKLMNOPQRSTUV, vk0xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 195 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi2xGHIJKLMNOPQRSTUV, vk2xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 213 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi4xGHIJKLMNOPQRSTUV, vk4xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 231 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi6xGHIJKLMNOPQRSTUV, vk6xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 249 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi8xGHIJKLMNOPQRSTUV, vk8xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 267 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi10xGHIJKLMNOPQRSTUV, vk10xGHIJKLMNOPQRSTUV, vaccGHIJKLM… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 285 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi12xGHIJKLMNOPQRSTUV, vk12xGHIJKLMNOPQRSTUV, vaccGHIJKLM… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 303 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi14xGHIJKLMNOPQRSTUV, vk14xGHIJKLMNOPQRSTUV, vaccGHIJKLM… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 321 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi16xGHIJKLMNOPQRSTUV, vk16xGHIJKLMNOPQRSTUV, vaccGHIJKLM… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() [all …]
|
D | up32x4-minmax-avx512f-acc2.c | 62 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() local 72 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi0xGHIJKLMNOPQRSTUV, vk0xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 90 …vaccGHIJKLMNOPQRSTUVp0 = _mm512_fmadd_ps(vi2xGHIJKLMNOPQRSTUV, vk2xGHIJKLMNOPQRSTUV, vaccGHIJKLMNO… in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 105 vaccGHIJKLMNOPQRSTUVp0 = _mm512_add_ps(vaccGHIJKLMNOPQRSTUVp0, vaccGHIJKLMNOPQRSTUVp1); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 108 __m512 vaccGHIJKLMNOPQRSTUV = _mm512_max_ps(vaccGHIJKLMNOPQRSTUVp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2()
|