/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x16-minmax-avx512f-broadcast.c | 62 __m512 vacc1x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() local 73 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 87 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 93 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 102 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 120 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast()
|
D | 5x16-minmax-avx512f-broadcast.c | 68 __m512 vacc1x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() local 80 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() 96 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() 103 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() 115 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() 135 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast()
|
D | 6x16-minmax-avx512f-broadcast.c | 74 __m512 vacc1x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() local 87 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() 105 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() 113 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() 128 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() 150 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast()
|
D | 7x16-minmax-avx512f-broadcast.c | 80 __m512 vacc1x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() local 94 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() 114 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() 123 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() 141 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() 165 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast()
|
D | 8x16-minmax-avx512f-broadcast.c | 86 __m512 vacc1x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() local 101 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() 123 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() 133 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() 154 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() 180 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 4x16inc-minmax-avx512f-broadcast.c | 64 __m512 vacc1x0123456789ABCDEF = _mm512_load_ps(acc + 16); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() local 75 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 89 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 95 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 104 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 122 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast()
|
D | 5x16inc-minmax-avx512f-broadcast.c | 70 __m512 vacc1x0123456789ABCDEF = _mm512_load_ps(acc + 16); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() local 82 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 98 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 105 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 117 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 137 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast()
|
D | 6x16inc-minmax-avx512f-broadcast.c | 76 __m512 vacc1x0123456789ABCDEF = _mm512_load_ps(acc + 16); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() local 89 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 107 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 115 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 130 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 152 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast()
|
D | 7x16inc-minmax-avx512f-broadcast.c | 82 __m512 vacc1x0123456789ABCDEF = _mm512_load_ps(acc + 16); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() local 96 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 116 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 125 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 143 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 167 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast()
|
D | 8x16inc-minmax-avx512f-broadcast.c | 88 __m512 vacc1x0123456789ABCDEF = _mm512_load_ps(acc + 16); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() local 103 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 125 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 135 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 156 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 182 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x16-minmax-avx512f-broadcast.c | 60 __m512 vacc1x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() local 95 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() 111 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() 117 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() 126 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() 140 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast()
|
D | 5x16-minmax-avx512f-broadcast.c | 64 __m512 vacc1x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() local 105 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() 123 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() 130 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() 142 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() 157 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast()
|
D | 6x16-minmax-avx512f-broadcast.c | 68 __m512 vacc1x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() local 115 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() 135 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() 143 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() 158 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() 174 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast()
|
D | 7x16-minmax-avx512f-broadcast.c | 72 __m512 vacc1x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() local 125 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() 147 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() 156 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() 174 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() 191 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast()
|
D | 8x16-minmax-avx512f-broadcast.c | 76 __m512 vacc1x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() local 135 …vacc1x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a1), vb0123456789ABCDEF, vacc1x0123456789… in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() 159 vacc1x0123456789ABCDEF = _mm512_min_ps(vacc1x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() 169 vacc1x0123456789ABCDEF = _mm512_max_ps(vacc1x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() 190 _mm512_storeu_ps(c1, vacc1x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() 208 _mm512_mask_storeu_ps(c1, vmask, vacc1x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | avx512f-2x16.c | 60 …const __m512 vacc1x0123456789ABCDEF = _mm512_mask_mul_ps(vi1x0123456789ABCDEF, vsign1x0123456789AB… in xnn_f32_prelu_ukernel__avx512f_2x16() local 64 _mm512_storeu_ps(o1, vacc1x0123456789ABCDEF); in xnn_f32_prelu_ukernel__avx512f_2x16()
|
D | avx512f-2x32.c | 65 …const __m512 vacc1x0123456789ABCDEF = _mm512_mask_mul_ps(vi1x0123456789ABCDEF, vsign1x0123456789AB… in xnn_f32_prelu_ukernel__avx512f_2x32() local 72 _mm512_storeu_ps(o1, vacc1x0123456789ABCDEF); in xnn_f32_prelu_ukernel__avx512f_2x32()
|