Home
last modified time | relevance | path

Searched refs:vhalfrsqrtx (Results 1 – 25 of 33) sorted by relevance

12

/external/XNNPACK/src/math/
Dsqrt-neonfma-nr3fma.c29 float32x4_t vhalfrsqrtx = vmulq_f32(vrsqrtx, vhalf); in xnn_math_f32_sqrt__neonfma_nr3fma() local
35 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr3fma()
36 vhalfrsqrtx = vfmaq_f32(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr3fma()
39 vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr3fma()
40 vhalfrsqrtx = vfmaq_f32(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr3fma()
43 vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr3fma()
Dsqrt-neonfma-nr2fma1adj.c29 float32x4_t vhalfrsqrtx = vmulq_f32(vrsqrtx, vhalf); in xnn_math_f32_sqrt__neonfma_nr2fma1adj() local
35 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr2fma1adj()
36 vhalfrsqrtx = vfmaq_f32(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr2fma1adj()
39 vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr2fma1adj()
40 vhalfrsqrtx = vfmaq_f32(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr2fma1adj()
47 vsqrtx = vfmaq_f32(vsqrtx, vhalfrsqrtx, vadjustment); in xnn_math_f32_sqrt__neonfma_nr2fma1adj()
Dsqrt-neonfma-nr2fma.c29 float32x4_t vhalfrsqrtx = vmulq_f32(vrsqrtx, vhalf); in xnn_math_f32_sqrt__neonfma_nr2fma() local
35 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr2fma()
36 vhalfrsqrtx = vfmaq_f32(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr2fma()
39 vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr2fma()
Dsqrt-avx512f-nr2fma.c30 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_math_f32_sqrt__avx512f_nr2fma() local
36 __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_math_f32_sqrt__avx512f_nr2fma()
37 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_math_f32_sqrt__avx512f_nr2fma()
40 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_math_f32_sqrt__avx512f_nr2fma()
Dsqrt-fma3-nr2fma.c30 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_math_f32_sqrt__fma3_nr2fma() local
36 __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_math_f32_sqrt__fma3_nr2fma()
37 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_math_f32_sqrt__fma3_nr2fma()
40 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_math_f32_sqrt__fma3_nr2fma()
Dsqrt-avx512f-nr1fma1adj.c30 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_math_f32_sqrt__avx512f_nr1fma1adj() local
36 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_math_f32_sqrt__avx512f_nr1fma1adj()
37 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_math_f32_sqrt__avx512f_nr1fma1adj()
44 vsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_math_f32_sqrt__avx512f_nr1fma1adj()
Dsqrt-fma3-nr1fma1adj.c30 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_math_f32_sqrt__fma3_nr1fma1adj() local
36 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_math_f32_sqrt__fma3_nr1fma1adj()
37 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_math_f32_sqrt__fma3_nr1fma1adj()
44 vsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_math_f32_sqrt__fma3_nr1fma1adj()
Dsqrt-neonfma-nr1rsqrts1fma1adj.c34 float32x4_t vhalfrsqrtx = vmulq_f32(vrsqrtx, vhalf); in xnn_math_f32_sqrt__neonfma_nr1rsqrts1fma1adj() local
40 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr1rsqrts1fma1adj()
41 vhalfrsqrtx = vfmaq_f32(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr1rsqrts1fma1adj()
48 vsqrtx = vfmaq_f32(vsqrtx, vhalfrsqrtx, vadjustment); in xnn_math_f32_sqrt__neonfma_nr1rsqrts1fma1adj()
/external/XNNPACK/src/f32-vsqrt/gen/
Davx512f-nr1fma1adj-x16.c35 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16() local
36 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16()
37 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16()
40 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16()
55 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16() local
56 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16()
57 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16()
60 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16()
Dfma3-nr1fma1adj-x8.c36 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8() local
37 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8()
38 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8()
41 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8()
55 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8() local
56 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8()
57 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8()
60 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8()
Davx512f-nr1fma1adj-x32.c66 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32() local
67 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32()
68 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32()
71 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32()
86 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32() local
87 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32()
88 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32()
91 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32()
Dfma3-nr1fma1adj-x16.c67 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16() local
68 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16()
69 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16()
72 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16()
86 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16() local
87 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16()
88 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16()
91 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16()
Dfma3-nr1fma1adj-x24.c77 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() local
78 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
79 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
82 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
96 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() local
97 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
98 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
101 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
Davx512f-nr1fma1adj-x48.c76 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() local
77 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
78 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
81 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
96 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() local
97 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
98 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
101 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
Davx512f-nr1fma1adj-x64.c86 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local
87 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
88 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
91 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
106 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local
107 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
108 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
111 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
Dfma3-nr1fma1adj-x32.c87 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local
88 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
89 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
92 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
106 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local
107 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
108 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
111 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
Davx512f-nr1fma1adj-x80.c96 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() local
97 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
98 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
101 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
116 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() local
117 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
118 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
121 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
Dfma3-nr1fma1adj-x40.c97 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() local
98 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
99 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
102 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
116 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() local
117 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
118 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
121 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
Davx512f-nr1fma1adj-x96.c106 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() local
107 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
108 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
111 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
126 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() local
127 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
128 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
131 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
Dfma3-nr1fma1adj-x48.c107 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() local
108 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
109 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
112 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
126 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() local
127 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
128 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
131 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
Davx512f-nr1fma1adj-x112.c116 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() local
117 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
118 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
121 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
136 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() local
137 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
138 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
141 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
Dfma3-nr1fma1adj-x56.c117 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() local
118 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
119 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
122 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
136 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() local
137 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
138 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
141 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
/external/XNNPACK/src/f32-vsqrt/
Davx512f-nr1fma1adj.c.in41 __m512 vhalfrsqrtx${ABC[N]} = _mm512_mul_ps(vrsqrtx${ABC[N]}, vhalf);
44 … const __m512 vresidual${ABC[N]} = _mm512_fnmadd_ps(vsqrtx${ABC[N]}, vhalfrsqrtx${ABC[N]}, vhalf);
47vhalfrsqrtx${ABC[N]} = _mm512_fmadd_ps(vhalfrsqrtx${ABC[N]}, vresidual${ABC[N]}, vhalfrsqrtx${ABC[…
54 …const __m512 vy${ABC[N]} = _mm512_fmadd_ps(vhalfrsqrtx${ABC[N]}, vadjustment${ABC[N]}, vsqrtx${ABC…
67 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); variable
68 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf);
69 vhalfrsqrtx = _mm512_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx);
72 const __m512 vy = _mm512_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx);
87 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); variable
88 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf);
[all …]
Dfma3-nr1fma1adj.c.in42 __m256 vhalfrsqrtx${ABC[N]} = _mm256_mul_ps(vrsqrtx${ABC[N]}, vhalf);
45 … const __m256 vresidual${ABC[N]} = _mm256_fnmadd_ps(vsqrtx${ABC[N]}, vhalfrsqrtx${ABC[N]}, vhalf);
48vhalfrsqrtx${ABC[N]} = _mm256_fmadd_ps(vhalfrsqrtx${ABC[N]}, vresidual${ABC[N]}, vhalfrsqrtx${ABC[…
55 …const __m256 vy${ABC[N]} = _mm256_fmadd_ps(vhalfrsqrtx${ABC[N]}, vadjustment${ABC[N]}, vsqrtx${ABC…
68 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); variable
69 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf);
70 vhalfrsqrtx = _mm256_fmadd_ps(vhalfrsqrtx, vresidual, vhalfrsqrtx);
73 const __m256 vy = _mm256_fmadd_ps(vhalfrsqrtx, vadjustment, vsqrtx);
87 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); variable
88 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf);
[all …]
Dneonfma-nr2fma1adj.c.in38 float32x4_t vhalfrsqrtx${ABC[N:N+4]} = vmulq_f32(vrsqrtx${ABC[N:N+4]}, vhalf);
41 …float32x4_t vresidual${ABC[N:N+4]} = vfmsq_f32(vhalf, vsqrtx${ABC[N:N+4]}, vhalfrsqrtx${ABC[N:N+4]…
44vhalfrsqrtx${ABC[N:N+4]} = vfmaq_f32(vhalfrsqrtx${ABC[N:N+4]}, vresidual${ABC[N:N+4]}, vhalfrsqrtx
48 vresidual${ABC[N:N+4]} = vfmsq_f32(vhalf, vsqrtx${ABC[N:N+4]}, vhalfrsqrtx${ABC[N:N+4]});
51vhalfrsqrtx${ABC[N:N+4]} = vfmaq_f32(vhalfrsqrtx${ABC[N:N+4]}, vresidual${ABC[N:N+4]}, vhalfrsqrtx
58 …const float32x4_t vy${ABC[N:N+4]} = vfmaq_f32(vsqrtx${ABC[N:N+4]}, vhalfrsqrtx${ABC[N:N+4]}, vadju…

12