Home
last modified time | relevance | path

Searched refs:vt4 (Results 1 – 25 of 223) sorted by relevance

123456789

/external/XNNPACK/src/f32-velu/gen/
Dvelu-scalar-rr2-p6-x5.c78 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local
84 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
104 vt4 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
111 float vp4 = vc6 * vt4 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
117 vp4 = vp4 * vt4 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
123 vp4 = vp4 * vt4 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
129 vp4 = vp4 * vt4 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
135 vp4 *= vt4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
145 vt4 *= vs4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
152 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
Dvelu-wasm-rr2-p6-x5.c78 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() local
84 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
91 float vp4 = vc6 * vt4 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
97 vp4 = vp4 * vt4 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
103 vp4 = vp4 * vt4 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
109 vp4 = vp4 * vt4 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
115 vp4 *= vt4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
125 vt4 *= vs4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
132 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
Dvelu-scalar-rr2-p6-x6.c83 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local
90 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
111 vt4 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
122 float vp4 = vc6 * vt4 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
129 vp4 = vp4 * vt4 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
136 vp4 = vp4 * vt4 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
143 vp4 = vp4 * vt4 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
150 vp4 *= vt4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
161 vt4 *= vs4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
170 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
Dvelu-wasm-rr2-p6-x6.c83 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local
90 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
98 float vp4 = vc6 * vt4 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
105 vp4 = vp4 * vt4 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
112 vp4 = vp4 * vt4 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
119 vp4 = vp4 * vt4 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
126 vp4 *= vt4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
137 vt4 *= vs4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
146 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
Dvelu-scalar-rr2-lut16-p3-x5.c87 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local
110 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
113 vt4 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
120 float vp4 = vc3 * vt4 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
126 vp4 *= vt4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
136 vt4 *= vs4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
143 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
Dvelu-scalar-rr2-lut16-p3-x6.c93 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local
118 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
121 vt4 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
133 float vp4 = vc3 * vt4 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
140 vp4 *= vt4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
151 vt4 *= vs4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
160 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
Dvelu-wasm-rr2-lut16-p3-x5.c87 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local
94 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
100 float vp4 = vc3 * vt4 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
106 vp4 *= vt4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
116 vt4 *= vs4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
123 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
Dvelu-wasm-rr2-lut16-p3-x6.c93 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local
102 vt4 = vn4 * vminus_ln2_lo + vt4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
109 float vp4 = vc3 * vt4 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
116 vp4 *= vt4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
127 vt4 *= vs4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
136 vp4 = vp4 * vt4 + vt4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
Dvelu-avx-rr2-p6-x40.c88 __m256 vt4 = _mm256_add_ps(_mm256_mul_ps(vn4, vminus_ln2_hi), vz4); in xnn_f32_velu_ukernel__avx_rr2_p6_x40() local
95 vt4 = _mm256_add_ps(_mm256_mul_ps(vn4, vminus_ln2_lo), vt4); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
101 __m256 vp4 = _mm256_add_ps(_mm256_mul_ps(vc6, vt4), vc5); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
107 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vc4); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
113 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vc3); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
119 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vc2); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
125 vp4 = _mm256_mul_ps(vp4, vt4); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
135 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
142 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vt4); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
Dvelu-avx512f-rr1-p6-x80.c77 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() local
83 __m512 vp4 = _mm512_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
89 vp4 = _mm512_fmadd_ps(vp4, vt4, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
111 vp4 = _mm512_mul_ps(vp4, vt4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
112 vt4 = _mm512_mul_ps(vt4, vs4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
124 vp4 = _mm512_fmadd_ps(vp4, vt4, vt4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
Dvelu-avx2-rr1-p6-x40.c77 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40() local
83 __m256 vp4 = _mm256_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
89 vp4 = _mm256_fmadd_ps(vp4, vt4, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
95 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
101 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
111 vp4 = _mm256_mul_ps(vp4, vt4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
112 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
123 vp4 = _mm256_fmadd_ps(vp4, vt4, vt4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
Dvelu-avx512f-rr1-p6-x96.c82 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() local
89 __m512 vp4 = _mm512_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
96 vp4 = _mm512_fmadd_ps(vp4, vt4, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
103 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
110 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
121 vp4 = _mm512_mul_ps(vp4, vt4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
122 vt4 = _mm512_mul_ps(vt4, vs4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
137 vp4 = _mm512_fmadd_ps(vp4, vt4, vt4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
Dvelu-avx2-rr1-p6-x48.c82 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() local
89 __m256 vp4 = _mm256_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
96 vp4 = _mm256_fmadd_ps(vp4, vt4, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
103 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
110 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
121 vp4 = _mm256_mul_ps(vp4, vt4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
122 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
135 vp4 = _mm256_fmadd_ps(vp4, vt4, vt4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
Dvelu-avx-rr2-p6-x48.c94 __m256 vt4 = _mm256_add_ps(_mm256_mul_ps(vn4, vminus_ln2_hi), vz4); in xnn_f32_velu_ukernel__avx_rr2_p6_x48() local
103 vt4 = _mm256_add_ps(_mm256_mul_ps(vn4, vminus_ln2_lo), vt4); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
110 __m256 vp4 = _mm256_add_ps(_mm256_mul_ps(vc6, vt4), vc5); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
117 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vc4); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
124 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vc3); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
131 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vc2); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
138 vp4 = _mm256_mul_ps(vp4, vt4); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
149 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
158 vp4 = _mm256_add_ps(_mm256_mul_ps(vp4, vt4), vt4); in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x40.c89 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() local
95 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
102 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
108 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
114 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
120 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
130 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
136 __m256 vf4 = _mm256_fmadd_ps(vt4, vp4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
Davx2-p5-x48.c94 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() local
101 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
109 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
116 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
123 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
130 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
141 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
148 __m256 vf4 = _mm256_fmadd_ps(vt4, vp4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
Davx512f-p5-scalef-x80.c70 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() local
76 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
83 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
89 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
Davx512f-p5-scalef-x96.c73 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() local
80 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
88 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
102 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
Davx2-p5-x56.c99 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() local
107 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
116 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
124 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
132 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
140 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
152 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
160 __m256 vf4 = _mm256_fmadd_ps(vt4, vp4, vs4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
Davx512f-p5-scalef-x112.c76 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() local
84 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
93 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
117 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
/external/XNNPACK/src/f32-vscaleextexp/gen/
Davx512f-p5-scalef-x80.c64 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() local
70 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
77 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
83 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
89 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
Davx512f-p5-scalef-x96.c66 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() local
73 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
81 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
88 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
102 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
Davx2-p5-x40.c70 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() local
76 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
83 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
89 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
95 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
101 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
107 vp4 = _mm256_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
Davx512f-p5-scalef-x112.c68 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() local
76 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
85 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
93 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
117 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Davx2-p5-x64-acc2.c104 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() local
113 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
123 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
132 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
141 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
150 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
163 vt4 = _mm256_mul_ps(vt4, vs4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
172 __m256 vf4 = _mm256_fmadd_ps(vt4, vp4, vs4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()

123456789