/external/XNNPACK/src/f32-dwconv/gen/ |
D | up32x25-minmax-avx512f-acc2.c | 166 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 167 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 174 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 175 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 183 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 184 const __m512 vk1xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 192 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 193 const __m512 vk2xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 201 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 202 const __m512 vk3xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() [all …]
|
D | up32x25-minmax-avx512f.c | 166 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 167 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 174 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 175 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 183 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 184 const __m512 vk1xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 192 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 193 const __m512 vk2xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 201 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 202 const __m512 vk3xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() [all …]
|
D | up32x9-minmax-avx512f-acc2.c | 86 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 87 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 94 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 95 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 103 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 104 const __m512 vk1xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 112 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 113 const __m512 vk2xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 121 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 122 const __m512 vk3xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() [all …]
|
D | up32x9-minmax-avx512f.c | 86 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 87 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 94 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 95 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 103 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 104 const __m512 vk1xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 112 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 113 const __m512 vk2xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 121 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 122 const __m512 vk3xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() [all …]
|
D | up32x4-minmax-avx512f.c | 61 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 62 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 69 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 70 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 78 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 79 const __m512 vk1xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 87 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 88 const __m512 vk2xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 96 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 97 const __m512 vk3xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() [all …]
|
D | up32x4-minmax-avx512f-acc2.c | 61 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 62 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 69 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 70 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 78 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 79 const __m512 vk1xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 87 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 88 const __m512 vk2xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 96 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 97 const __m512 vk3xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() [all …]
|
D | up16x25-minmax-avx512f.c | 166 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 172 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 178 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 184 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 190 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 196 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 202 const __m512 vk5x0123456789ABCDEF = _mm512_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 208 const __m512 vk6x0123456789ABCDEF = _mm512_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 214 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 220 const __m512 vk8x0123456789ABCDEF = _mm512_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() [all …]
|
D | up16x25-minmax-avx512f-acc2.c | 166 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 172 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 178 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 184 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 190 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 196 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 202 const __m512 vk5x0123456789ABCDEF = _mm512_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 208 const __m512 vk6x0123456789ABCDEF = _mm512_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 214 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 220 const __m512 vk8x0123456789ABCDEF = _mm512_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() [all …]
|
D | up16x9-minmax-avx512f-acc2.c | 86 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 92 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 98 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 104 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 110 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 116 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 122 const __m512 vk5x0123456789ABCDEF = _mm512_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 128 const __m512 vk6x0123456789ABCDEF = _mm512_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 134 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 140 const __m512 vk8x0123456789ABCDEF = _mm512_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2()
|
D | up16x9-minmax-avx512f.c | 86 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 92 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 98 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 104 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 110 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 116 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 122 const __m512 vk5x0123456789ABCDEF = _mm512_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 128 const __m512 vk6x0123456789ABCDEF = _mm512_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 134 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 140 const __m512 vk8x0123456789ABCDEF = _mm512_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f()
|
D | up16x4-minmax-avx512f-acc2.c | 61 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2() 67 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2() 73 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2() 79 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2() 85 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2()
|
D | up16x4-minmax-avx512f.c | 61 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f() 67 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f() 73 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f() 79 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f() 85 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 8x16inc-minmax-avx512f-broadcast.c | 87 __m512 vacc0x0123456789ABCDEF = _mm512_load_ps(acc + 0); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 88 __m512 vacc1x0123456789ABCDEF = _mm512_load_ps(acc + 16); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 89 __m512 vacc2x0123456789ABCDEF = _mm512_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 90 __m512 vacc3x0123456789ABCDEF = _mm512_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 91 __m512 vacc4x0123456789ABCDEF = _mm512_load_ps(acc + 64); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 92 __m512 vacc5x0123456789ABCDEF = _mm512_load_ps(acc + 80); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 93 __m512 vacc6x0123456789ABCDEF = _mm512_load_ps(acc + 96); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 94 __m512 vacc7x0123456789ABCDEF = _mm512_load_ps(acc + 112); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 99 const __m512 vb0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast()
|
D | 7x16inc-minmax-avx512f-broadcast.c | 81 __m512 vacc0x0123456789ABCDEF = _mm512_load_ps(acc + 0); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 82 __m512 vacc1x0123456789ABCDEF = _mm512_load_ps(acc + 16); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 83 __m512 vacc2x0123456789ABCDEF = _mm512_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 84 __m512 vacc3x0123456789ABCDEF = _mm512_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 85 __m512 vacc4x0123456789ABCDEF = _mm512_load_ps(acc + 64); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 86 __m512 vacc5x0123456789ABCDEF = _mm512_load_ps(acc + 80); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 87 __m512 vacc6x0123456789ABCDEF = _mm512_load_ps(acc + 96); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 92 const __m512 vb0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast()
|
D | 6x16inc-minmax-avx512f-broadcast.c | 75 __m512 vacc0x0123456789ABCDEF = _mm512_load_ps(acc + 0); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 76 __m512 vacc1x0123456789ABCDEF = _mm512_load_ps(acc + 16); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 77 __m512 vacc2x0123456789ABCDEF = _mm512_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 78 __m512 vacc3x0123456789ABCDEF = _mm512_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 79 __m512 vacc4x0123456789ABCDEF = _mm512_load_ps(acc + 64); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 80 __m512 vacc5x0123456789ABCDEF = _mm512_load_ps(acc + 80); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 85 const __m512 vb0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast()
|
D | 5x16inc-minmax-avx512f-broadcast.c | 69 __m512 vacc0x0123456789ABCDEF = _mm512_load_ps(acc + 0); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 70 __m512 vacc1x0123456789ABCDEF = _mm512_load_ps(acc + 16); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 71 __m512 vacc2x0123456789ABCDEF = _mm512_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 72 __m512 vacc3x0123456789ABCDEF = _mm512_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 73 __m512 vacc4x0123456789ABCDEF = _mm512_load_ps(acc + 64); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 78 const __m512 vb0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast()
|
D | 4x16inc-minmax-avx512f-broadcast.c | 63 __m512 vacc0x0123456789ABCDEF = _mm512_load_ps(acc + 0); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 64 __m512 vacc1x0123456789ABCDEF = _mm512_load_ps(acc + 16); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 65 __m512 vacc2x0123456789ABCDEF = _mm512_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 66 __m512 vacc3x0123456789ABCDEF = _mm512_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 71 const __m512 vb0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast()
|
D | 1x16inc-minmax-avx512f-broadcast.c | 45 __m512 vacc0x0123456789ABCDEF = _mm512_load_ps(acc + 0); in xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast() 50 const __m512 vb0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast()
|
/external/XNNPACK/src/f32-gemm/ |
D | avx512-broadcast.c.in | 66 __m512 vacc${M}x${ABC[N:N+16]} = _mm512_load_ps(acc + ${M*NR+N}); 69 __m512 vacc0x${ABC[0:16]} = _mm512_load_ps(w); 71 __m512 vacc0x${ABC[N:N+16]} = _mm512_load_ps(w + ${N}); 79 const __m512 vb${ABC[0:16]} = _mm512_load_ps(w); 81 const __m512 vb${ABC[N:N+16]} = _mm512_load_ps(w + ${N});
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 1x16-minmax-avx512f-broadcast.c | 43 __m512 vacc0x0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast() 48 const __m512 vb0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast()
|
D | 4x16-minmax-avx512f-broadcast.c | 61 __m512 vacc0x0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 69 const __m512 vb0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | avx512f-2x32.c | 49 const __m512 vw0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_prelu_ukernel__avx512f_2x32() 50 const __m512 vwGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 16); in xnn_f32_prelu_ukernel__avx512f_2x32() 77 const __m512 vw = _mm512_load_ps(w); in xnn_f32_prelu_ukernel__avx512f_2x32()
|
/external/XNNPACK/src/f32-dwconv/ |
D | up-avx512.c.in | 47 __m512 vacc${ABC[0:16]}p0 = _mm512_load_ps(w); 49 __m512 vacc${ABC[C:C+16]}p0 = _mm512_load_ps(w + ${C}); 59 const __m512 vk${K}x${ABC[C:C+16]} = _mm512_load_ps(w + ${(K + 1) * CHANNEL_TILE + C}); 90 __m512 vacc${ABC[0:16]}p0 = _mm512_load_ps(w); 96 const __m512 vk${K}x${ABC[0:16]} = _mm512_load_ps(w + ${(K + 1) * CHANNEL_TILE});
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 1x16-minmax-avx512f-broadcast.c | 47 __m512 vacc0x0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast() 61 const __m512 vb0123456789ABCDEF = _mm512_load_ps(w); in xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast()
|
/external/XNNPACK/src/f32-igemm/ |
D | avx512-broadcast.c.in | 59 __m512 vacc0x${ABC[0:16]} = _mm512_load_ps(w); 61 __m512 vacc0x${ABC[N:N+16]} = _mm512_load_ps(w + ${N}); 79 const __m512 vb${ABC[0:16]} = _mm512_load_ps(w); 81 const __m512 vb${ABC[N:N+16]} = _mm512_load_ps(w + ${N});
|