/external/XNNPACK/src/f32-dwconv/gen/ |
D | up32x25-minmax-avx512f-acc2.c | 170 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 171 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 179 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 180 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 188 const __m512 vi2x0123456789ABCDEF = _mm512_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 189 const __m512 vi2xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i2 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 197 const __m512 vi3x0123456789ABCDEF = _mm512_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 198 const __m512 vi3xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i3 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 206 const __m512 vi4x0123456789ABCDEF = _mm512_loadu_ps(i4); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 207 const __m512 vi4xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i4 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() [all …]
|
D | up32x25-minmax-avx512f.c | 170 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 171 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 179 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 180 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 188 const __m512 vi2x0123456789ABCDEF = _mm512_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 189 const __m512 vi2xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i2 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 197 const __m512 vi3x0123456789ABCDEF = _mm512_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 198 const __m512 vi3xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i3 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 206 const __m512 vi4x0123456789ABCDEF = _mm512_loadu_ps(i4); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 207 const __m512 vi4xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i4 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() [all …]
|
D | up32x9-minmax-avx512f-acc2.c | 90 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 91 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 99 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 100 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 108 const __m512 vi2x0123456789ABCDEF = _mm512_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 109 const __m512 vi2xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i2 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 117 const __m512 vi3x0123456789ABCDEF = _mm512_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 118 const __m512 vi3xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i3 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 126 const __m512 vi4x0123456789ABCDEF = _mm512_loadu_ps(i4); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 127 const __m512 vi4xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i4 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() [all …]
|
D | up32x9-minmax-avx512f.c | 90 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 91 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 99 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 100 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 108 const __m512 vi2x0123456789ABCDEF = _mm512_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 109 const __m512 vi2xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i2 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 117 const __m512 vi3x0123456789ABCDEF = _mm512_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 118 const __m512 vi3xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i3 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 126 const __m512 vi4x0123456789ABCDEF = _mm512_loadu_ps(i4); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 127 const __m512 vi4xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i4 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() [all …]
|
D | up16x25-minmax-avx512f.c | 169 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 175 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 181 const __m512 vi2x0123456789ABCDEF = _mm512_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 187 const __m512 vi3x0123456789ABCDEF = _mm512_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 193 const __m512 vi4x0123456789ABCDEF = _mm512_loadu_ps(i4); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 199 const __m512 vi5x0123456789ABCDEF = _mm512_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 205 const __m512 vi6x0123456789ABCDEF = _mm512_loadu_ps(i6); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 211 const __m512 vi7x0123456789ABCDEF = _mm512_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 217 const __m512 vi8x0123456789ABCDEF = _mm512_loadu_ps(i8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 223 const __m512 vi9x0123456789ABCDEF = _mm512_loadu_ps(i9); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() [all …]
|
D | up16x25-minmax-avx512f-acc2.c | 169 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 175 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 181 const __m512 vi2x0123456789ABCDEF = _mm512_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 187 const __m512 vi3x0123456789ABCDEF = _mm512_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 193 const __m512 vi4x0123456789ABCDEF = _mm512_loadu_ps(i4); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 199 const __m512 vi5x0123456789ABCDEF = _mm512_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 205 const __m512 vi6x0123456789ABCDEF = _mm512_loadu_ps(i6); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 211 const __m512 vi7x0123456789ABCDEF = _mm512_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 217 const __m512 vi8x0123456789ABCDEF = _mm512_loadu_ps(i8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 223 const __m512 vi9x0123456789ABCDEF = _mm512_loadu_ps(i9); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() [all …]
|
D | up32x4-minmax-avx512f.c | 65 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 66 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 74 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 75 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 83 const __m512 vi2x0123456789ABCDEF = _mm512_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 84 const __m512 vi2xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i2 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 92 const __m512 vi3x0123456789ABCDEF = _mm512_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 93 const __m512 vi3xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i3 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 116 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 122 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() [all …]
|
D | up32x4-minmax-avx512f-acc2.c | 65 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 66 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 74 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 75 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 83 const __m512 vi2x0123456789ABCDEF = _mm512_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 84 const __m512 vi2xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i2 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 92 const __m512 vi3x0123456789ABCDEF = _mm512_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 93 const __m512 vi3xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i3 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 119 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 125 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() [all …]
|
D | up16x9-minmax-avx512f-acc2.c | 89 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 95 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 101 const __m512 vi2x0123456789ABCDEF = _mm512_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 107 const __m512 vi3x0123456789ABCDEF = _mm512_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 113 const __m512 vi4x0123456789ABCDEF = _mm512_loadu_ps(i4); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 119 const __m512 vi5x0123456789ABCDEF = _mm512_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 125 const __m512 vi6x0123456789ABCDEF = _mm512_loadu_ps(i6); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 131 const __m512 vi7x0123456789ABCDEF = _mm512_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 137 const __m512 vi8x0123456789ABCDEF = _mm512_loadu_ps(i8); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2()
|
D | up16x9-minmax-avx512f.c | 89 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 95 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 101 const __m512 vi2x0123456789ABCDEF = _mm512_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 107 const __m512 vi3x0123456789ABCDEF = _mm512_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 113 const __m512 vi4x0123456789ABCDEF = _mm512_loadu_ps(i4); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 119 const __m512 vi5x0123456789ABCDEF = _mm512_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 125 const __m512 vi6x0123456789ABCDEF = _mm512_loadu_ps(i6); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 131 const __m512 vi7x0123456789ABCDEF = _mm512_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 137 const __m512 vi8x0123456789ABCDEF = _mm512_loadu_ps(i8); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f()
|
/external/XNNPACK/src/f32-vbinary/gen/ |
D | vmin-avx512f-x32.c | 34 const __m512 va0123456789ABCDEF = _mm512_loadu_ps(a); in xnn_f32_vmin_ukernel__avx512f_x32() 35 const __m512 vaGHIJKLMNOPQRSTUV = _mm512_loadu_ps(a + 16); in xnn_f32_vmin_ukernel__avx512f_x32() 38 const __m512 vb0123456789ABCDEF = _mm512_loadu_ps(b); in xnn_f32_vmin_ukernel__avx512f_x32() 39 const __m512 vbGHIJKLMNOPQRSTUV = _mm512_loadu_ps(b + 16); in xnn_f32_vmin_ukernel__avx512f_x32() 52 const __m512 va = _mm512_loadu_ps(a); in xnn_f32_vmin_ukernel__avx512f_x32() 55 const __m512 vb = _mm512_loadu_ps(b); in xnn_f32_vmin_ukernel__avx512f_x32()
|
D | vmax-avx512f-x32.c | 34 const __m512 va0123456789ABCDEF = _mm512_loadu_ps(a); in xnn_f32_vmax_ukernel__avx512f_x32() 35 const __m512 vaGHIJKLMNOPQRSTUV = _mm512_loadu_ps(a + 16); in xnn_f32_vmax_ukernel__avx512f_x32() 38 const __m512 vb0123456789ABCDEF = _mm512_loadu_ps(b); in xnn_f32_vmax_ukernel__avx512f_x32() 39 const __m512 vbGHIJKLMNOPQRSTUV = _mm512_loadu_ps(b + 16); in xnn_f32_vmax_ukernel__avx512f_x32() 52 const __m512 va = _mm512_loadu_ps(a); in xnn_f32_vmax_ukernel__avx512f_x32() 55 const __m512 vb = _mm512_loadu_ps(b); in xnn_f32_vmax_ukernel__avx512f_x32()
|
D | vsqrdiff-avx512f-x32.c | 34 const __m512 va0123456789ABCDEF = _mm512_loadu_ps(a); in xnn_f32_vsqrdiff_ukernel__avx512f_x32() 35 const __m512 vaGHIJKLMNOPQRSTUV = _mm512_loadu_ps(a + 16); in xnn_f32_vsqrdiff_ukernel__avx512f_x32() 38 const __m512 vb0123456789ABCDEF = _mm512_loadu_ps(b); in xnn_f32_vsqrdiff_ukernel__avx512f_x32() 39 const __m512 vbGHIJKLMNOPQRSTUV = _mm512_loadu_ps(b + 16); in xnn_f32_vsqrdiff_ukernel__avx512f_x32() 54 const __m512 va = _mm512_loadu_ps(a); in xnn_f32_vsqrdiff_ukernel__avx512f_x32() 57 const __m512 vb = _mm512_loadu_ps(b); in xnn_f32_vsqrdiff_ukernel__avx512f_x32()
|
D | vadd-minmax-avx512f-x32.c | 36 const __m512 va0123456789ABCDEF = _mm512_loadu_ps(a); in xnn_f32_vadd_minmax_ukernel__avx512f_x32() 37 const __m512 vaGHIJKLMNOPQRSTUV = _mm512_loadu_ps(a + 16); in xnn_f32_vadd_minmax_ukernel__avx512f_x32() 40 const __m512 vb0123456789ABCDEF = _mm512_loadu_ps(b); in xnn_f32_vadd_minmax_ukernel__avx512f_x32() 41 const __m512 vbGHIJKLMNOPQRSTUV = _mm512_loadu_ps(b + 16); in xnn_f32_vadd_minmax_ukernel__avx512f_x32() 59 const __m512 va = _mm512_loadu_ps(a); in xnn_f32_vadd_minmax_ukernel__avx512f_x32() 62 const __m512 vb = _mm512_loadu_ps(b); in xnn_f32_vadd_minmax_ukernel__avx512f_x32()
|
D | vdiv-minmax-avx512f-x32.c | 36 const __m512 va0123456789ABCDEF = _mm512_loadu_ps(a); in xnn_f32_vdiv_minmax_ukernel__avx512f_x32() 37 const __m512 vaGHIJKLMNOPQRSTUV = _mm512_loadu_ps(a + 16); in xnn_f32_vdiv_minmax_ukernel__avx512f_x32() 40 const __m512 vb0123456789ABCDEF = _mm512_loadu_ps(b); in xnn_f32_vdiv_minmax_ukernel__avx512f_x32() 41 const __m512 vbGHIJKLMNOPQRSTUV = _mm512_loadu_ps(b + 16); in xnn_f32_vdiv_minmax_ukernel__avx512f_x32() 59 const __m512 va = _mm512_loadu_ps(a); in xnn_f32_vdiv_minmax_ukernel__avx512f_x32() 62 const __m512 vb = _mm512_loadu_ps(b); in xnn_f32_vdiv_minmax_ukernel__avx512f_x32()
|
D | vmul-minmax-avx512f-x32.c | 36 const __m512 va0123456789ABCDEF = _mm512_loadu_ps(a); in xnn_f32_vmul_minmax_ukernel__avx512f_x32() 37 const __m512 vaGHIJKLMNOPQRSTUV = _mm512_loadu_ps(a + 16); in xnn_f32_vmul_minmax_ukernel__avx512f_x32() 40 const __m512 vb0123456789ABCDEF = _mm512_loadu_ps(b); in xnn_f32_vmul_minmax_ukernel__avx512f_x32() 41 const __m512 vbGHIJKLMNOPQRSTUV = _mm512_loadu_ps(b + 16); in xnn_f32_vmul_minmax_ukernel__avx512f_x32() 59 const __m512 va = _mm512_loadu_ps(a); in xnn_f32_vmul_minmax_ukernel__avx512f_x32() 62 const __m512 vb = _mm512_loadu_ps(b); in xnn_f32_vmul_minmax_ukernel__avx512f_x32()
|
D | vsub-minmax-avx512f-x32.c | 36 const __m512 va0123456789ABCDEF = _mm512_loadu_ps(a); in xnn_f32_vsub_minmax_ukernel__avx512f_x32() 37 const __m512 vaGHIJKLMNOPQRSTUV = _mm512_loadu_ps(a + 16); in xnn_f32_vsub_minmax_ukernel__avx512f_x32() 40 const __m512 vb0123456789ABCDEF = _mm512_loadu_ps(b); in xnn_f32_vsub_minmax_ukernel__avx512f_x32() 41 const __m512 vbGHIJKLMNOPQRSTUV = _mm512_loadu_ps(b + 16); in xnn_f32_vsub_minmax_ukernel__avx512f_x32() 59 const __m512 va = _mm512_loadu_ps(a); in xnn_f32_vsub_minmax_ukernel__avx512f_x32() 62 const __m512 vb = _mm512_loadu_ps(b); in xnn_f32_vsub_minmax_ukernel__avx512f_x32()
|
/external/XNNPACK/src/f32-vscale/ |
D | avx512f-x64.c | 26 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_vscale_ukernel__avx512f_x64() 27 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_vscale_ukernel__avx512f_x64() 28 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_vscale_ukernel__avx512f_x64() 29 const __m512 vx3 = _mm512_loadu_ps(x + 48); in xnn_f32_vscale_ukernel__avx512f_x64() 44 const __m512 vx = _mm512_loadu_ps(x); in xnn_f32_vscale_ukernel__avx512f_x64()
|
/external/XNNPACK/src/f32-rmax/ |
D | avx512f.c | 27 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_rmax_ukernel__avx512f() 28 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_rmax_ukernel__avx512f() 29 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_rmax_ukernel__avx512f() 30 const __m512 vx3 = _mm512_loadu_ps(x + 48); in xnn_f32_rmax_ukernel__avx512f() 40 const __m512 vx = _mm512_loadu_ps(x); in xnn_f32_rmax_ukernel__avx512f()
|
/external/XNNPACK/src/f32-vsqrt/gen/ |
D | avx512f-nr1fma1adj-x128.c | 30 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 31 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 32 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 33 const __m512 vx3 = _mm512_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 34 const __m512 vx4 = _mm512_loadu_ps(x + 64); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 35 const __m512 vx5 = _mm512_loadu_ps(x + 80); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 36 const __m512 vx6 = _mm512_loadu_ps(x + 96); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 37 const __m512 vx7 = _mm512_loadu_ps(x + 112); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 121 const __m512 vx = _mm512_loadu_ps(x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
|
D | avx512f-nr1fma1adj-x112.c | 30 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 31 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 32 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 33 const __m512 vx3 = _mm512_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 34 const __m512 vx4 = _mm512_loadu_ps(x + 64); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 35 const __m512 vx5 = _mm512_loadu_ps(x + 80); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 36 const __m512 vx6 = _mm512_loadu_ps(x + 96); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 111 const __m512 vx = _mm512_loadu_ps(x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | avx512f-2x32.c | 53 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_prelu_ukernel__avx512f_2x32() 54 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_prelu_ukernel__avx512f_2x32() 56 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_prelu_ukernel__avx512f_2x32() 57 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_prelu_ukernel__avx512f_2x32() 80 const __m512 vi0 = _mm512_loadu_ps(i0); in xnn_f32_prelu_ukernel__avx512f_2x32() 82 const __m512 vi1 = _mm512_loadu_ps(i1); in xnn_f32_prelu_ukernel__avx512f_2x32()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 43 const __m512 vi0 = _mm512_loadu_ps(input); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 44 const __m512 vi1 = _mm512_loadu_ps(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 45 const __m512 vi2 = _mm512_loadu_ps(input + 32); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 46 const __m512 vi3 = _mm512_loadu_ps(input + 48); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 47 const __m512 vi4 = _mm512_loadu_ps(input + 64); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 48 const __m512 vi5 = _mm512_loadu_ps(input + 80); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 49 const __m512 vi6 = _mm512_loadu_ps(input + 96); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 50 const __m512 vi7 = _mm512_loadu_ps(input + 112); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 51 const __m512 vi8 = _mm512_loadu_ps(input + 128); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 52 const __m512 vi9 = _mm512_loadu_ps(input + 144); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 44 const __m512 vi0 = _mm512_loadu_ps(input); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 45 const __m512 vi1 = _mm512_loadu_ps(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 46 const __m512 vi2 = _mm512_loadu_ps(input + 32); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 47 const __m512 vi3 = _mm512_loadu_ps(input + 48); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 48 const __m512 vi4 = _mm512_loadu_ps(input + 64); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 49 const __m512 vi5 = _mm512_loadu_ps(input + 80); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 50 const __m512 vi6 = _mm512_loadu_ps(input + 96); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 51 const __m512 vi7 = _mm512_loadu_ps(input + 112); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 52 const __m512 vi8 = _mm512_loadu_ps(input + 128); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 53 const __m512 vi9 = _mm512_loadu_ps(input + 144); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 42 const __m512 vi0 = _mm512_loadu_ps(input); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 43 const __m512 vi1 = _mm512_loadu_ps(input + 16); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 44 const __m512 vi2 = _mm512_loadu_ps(input + 32); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 45 const __m512 vi3 = _mm512_loadu_ps(input + 48); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 46 const __m512 vi4 = _mm512_loadu_ps(input + 64); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 47 const __m512 vi5 = _mm512_loadu_ps(input + 80); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 48 const __m512 vi6 = _mm512_loadu_ps(input + 96); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 49 const __m512 vi7 = _mm512_loadu_ps(input + 112); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 50 const __m512 vi8 = _mm512_loadu_ps(input + 128); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 51 const __m512 vi9 = _mm512_loadu_ps(input + 144); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|