Home
last modified time | relevance | path

Searched refs:vrsqrtx (Results 1 – 25 of 39) sorted by relevance

12

/external/XNNPACK/src/math/
Dsqrt-neon-nr3rsqrts.c26 float32x4_t vrsqrtx = vrsqrteq_f32(vx); in xnn_math_f32_sqrt__neon_nr3rsqrts() local
31 vrsqrtx = vmulq_f32(vrsqrtx, vrsqrtsq_f32(vx, vmulq_f32(vrsqrtx, vrsqrtx))); in xnn_math_f32_sqrt__neon_nr3rsqrts()
32 vrsqrtx = vmulq_f32(vrsqrtx, vrsqrtsq_f32(vmulq_f32(vrsqrtx, vx), vrsqrtx)); in xnn_math_f32_sqrt__neon_nr3rsqrts()
33 vrsqrtx = vmulq_f32(vrsqrtx, vrsqrtsq_f32(vmulq_f32(vrsqrtx, vx), vrsqrtx)); in xnn_math_f32_sqrt__neon_nr3rsqrts()
36 const float32x4_t vy = vmulq_f32(vrsqrtx, vx); in xnn_math_f32_sqrt__neon_nr3rsqrts()
Dsqrt-neon-nr2rsqrts.c26 float32x4_t vrsqrtx = vrsqrteq_f32(vx); in xnn_math_f32_sqrt__neon_nr2rsqrts() local
31 vrsqrtx = vmulq_f32(vrsqrtx, vrsqrtsq_f32(vx, vmulq_f32(vrsqrtx, vrsqrtx))); in xnn_math_f32_sqrt__neon_nr2rsqrts()
32 vrsqrtx = vmulq_f32(vrsqrtx, vrsqrtsq_f32(vmulq_f32(vrsqrtx, vx), vrsqrtx)); in xnn_math_f32_sqrt__neon_nr2rsqrts()
35 const float32x4_t vy = vmulq_f32(vrsqrtx, vx); in xnn_math_f32_sqrt__neon_nr2rsqrts()
Dsqrt-sse-nr2mac.c29 __m128 vrsqrtx = _mm_rsqrt_ps(vx); in xnn_math_f32_sqrt__sse_nr2mac() local
34vrsqrtx = _mm_mul_ps(vrsqrtx, _mm_sub_ps(_mm_mul_ps(_mm_mul_ps(vhalfx, vrsqrtx), vrsqrtx), vthree_… in xnn_math_f32_sqrt__sse_nr2mac()
35vrsqrtx = _mm_mul_ps(vrsqrtx, _mm_sub_ps(_mm_mul_ps(_mm_mul_ps(vhalfx, vrsqrtx), vrsqrtx), vthree_… in xnn_math_f32_sqrt__sse_nr2mac()
38 const __m128 vy = _mm_mul_ps(vrsqrtx, vx); in xnn_math_f32_sqrt__sse_nr2mac()
Dsqrt-neonfma-nr1rsqrts1fma1adj.c27 float32x4_t vrsqrtx = vrsqrteq_f32(vx); in xnn_math_f32_sqrt__neonfma_nr1rsqrts1fma1adj() local
31 vrsqrtx = vmulq_f32(vrsqrtx, vrsqrtsq_f32(vx, vmulq_f32(vrsqrtx, vrsqrtx))); in xnn_math_f32_sqrt__neonfma_nr1rsqrts1fma1adj()
33 float32x4_t vsqrtx = vmulq_f32(vrsqrtx, vx); in xnn_math_f32_sqrt__neonfma_nr1rsqrts1fma1adj()
34 float32x4_t vhalfrsqrtx = vmulq_f32(vrsqrtx, vhalf); in xnn_math_f32_sqrt__neonfma_nr1rsqrts1fma1adj()
Dsqrt-neon-nr1rsqrts.c26 float32x4_t vrsqrtx = vrsqrteq_f32(vx); in xnn_math_f32_sqrt__neon_nr1rsqrts() local
30 vrsqrtx = vmulq_f32(vrsqrtx, vrsqrtsq_f32(vx, vmulq_f32(vrsqrtx, vrsqrtx))); in xnn_math_f32_sqrt__neon_nr1rsqrts()
33 const float32x4_t vy = vmulq_f32(vrsqrtx, vx); in xnn_math_f32_sqrt__neon_nr1rsqrts()
Dsqrt-sse-nr1mac.c29 __m128 vrsqrtx = _mm_rsqrt_ps(vx); in xnn_math_f32_sqrt__sse_nr1mac() local
34vrsqrtx = _mm_mul_ps(vrsqrtx, _mm_sub_ps(vthree_halfs, _mm_mul_ps(vhalfx, _mm_mul_ps(vrsqrtx, vrsq… in xnn_math_f32_sqrt__sse_nr1mac()
37 const __m128 vy = _mm_mul_ps(vrsqrtx, vx); in xnn_math_f32_sqrt__sse_nr1mac()
Dsqrt-sse-hh1mac.c30 __m128 vrsqrtx = _mm_rsqrt_ps(vx); in xnn_math_f32_sqrt__sse_hh1mac() local
35 const __m128 vt = _mm_mul_ps(_mm_mul_ps(vx, vrsqrtx), vrsqrtx); in xnn_math_f32_sqrt__sse_hh1mac()
36vrsqrtx = _mm_mul_ps(vrsqrtx, _mm_add_ps(_mm_mul_ps(vt, _mm_sub_ps(_mm_mul_ps(vt, vc0375), vc1250)… in xnn_math_f32_sqrt__sse_hh1mac()
39 const __m128 vy = _mm_mul_ps(vrsqrtx, vx); in xnn_math_f32_sqrt__sse_hh1mac()
Dsqrt-fma3-nr1fma.c28 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_math_f32_sqrt__fma3_nr1fma() local
29 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_math_f32_sqrt__fma3_nr1fma()
30 const __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_math_f32_sqrt__fma3_nr1fma()
Dsqrt-neonfma-nr1fma.c27 const float32x4_t vrsqrtx = vrsqrteq_f32(vx); in xnn_math_f32_sqrt__neonfma_nr1fma() local
28 float32x4_t vsqrtx = vmulq_f32(vrsqrtx, vx); in xnn_math_f32_sqrt__neonfma_nr1fma()
29 const float32x4_t vhalfrsqrtx = vmulq_f32(vrsqrtx, vhalf); in xnn_math_f32_sqrt__neonfma_nr1fma()
Dsqrt-avx512f-nr1fma.c28 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_math_f32_sqrt__avx512f_nr1fma() local
29 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_math_f32_sqrt__avx512f_nr1fma()
30 const __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_math_f32_sqrt__avx512f_nr1fma()
Dsqrt-neonfma-nr2fma.c27 const float32x4_t vrsqrtx = vrsqrteq_f32(vx); in xnn_math_f32_sqrt__neonfma_nr2fma() local
28 float32x4_t vsqrtx = vmulq_f32(vrsqrtx, vx); in xnn_math_f32_sqrt__neonfma_nr2fma()
29 float32x4_t vhalfrsqrtx = vmulq_f32(vrsqrtx, vhalf); in xnn_math_f32_sqrt__neonfma_nr2fma()
Dsqrt-avx512f-nr2fma.c28 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_math_f32_sqrt__avx512f_nr2fma() local
29 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_math_f32_sqrt__avx512f_nr2fma()
30 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_math_f32_sqrt__avx512f_nr2fma()
Dsqrt-fma3-nr2fma.c28 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_math_f32_sqrt__fma3_nr2fma() local
29 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_math_f32_sqrt__fma3_nr2fma()
30 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_math_f32_sqrt__fma3_nr2fma()
Dsqrt-avx512f-nr1fma1adj.c28 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_math_f32_sqrt__avx512f_nr1fma1adj() local
29 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_math_f32_sqrt__avx512f_nr1fma1adj()
30 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_math_f32_sqrt__avx512f_nr1fma1adj()
/external/XNNPACK/src/f32-vsqrt/gen/
Davx512f-nr1fma1adj-x16.c33 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16() local
34 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16()
35 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16()
53 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16() local
54 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16()
55 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16()
Dfma3-nr1fma1adj-x8.c34 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8() local
35 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8()
36 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8()
53 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8() local
54 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8()
55 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8()
Davx512f-nr1fma1adj-x32.c64 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32() local
65 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32()
66 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32()
84 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32() local
85 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32()
86 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32()
Dfma3-nr1fma1adj-x16.c65 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16() local
66 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16()
67 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16()
84 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16() local
85 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16()
86 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16()
Dfma3-nr1fma1adj-x24.c75 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() local
76 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
77 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
94 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() local
95 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
96 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
Davx512f-nr1fma1adj-x48.c74 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() local
75 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
76 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
94 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() local
95 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
96 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
Davx512f-nr1fma1adj-x64.c84 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local
85 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
86 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
104 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local
105 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
106 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
Dfma3-nr1fma1adj-x32.c85 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local
86 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
87 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
104 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local
105 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
106 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
/external/XNNPACK/src/f32-vsqrt/
Davx512f-nr1fma1adj.c.in37 const __m512 vrsqrtx${ABC[N]} = _mm512_rsqrt14_ps(vx${ABC[N]});
40 __m512 vsqrtx${ABC[N]} = _mm512_mul_ps(vrsqrtx${ABC[N]}, vx${ABC[N]});
41 __m512 vhalfrsqrtx${ABC[N]} = _mm512_mul_ps(vrsqrtx${ABC[N]}, vhalf);
65 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); variable
66 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx);
67 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf);
85 const __m512 vrsqrtx = _mm512_rsqrt14_ps(vx); variable
86 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx);
87 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf);
Dfma3-nr1fma1adj.c.in38 const __m256 vrsqrtx${ABC[N]} = _mm256_rsqrt_ps(vx${ABC[N]});
41 __m256 vsqrtx${ABC[N]} = _mm256_mul_ps(vrsqrtx${ABC[N]}, vx${ABC[N]});
42 __m256 vhalfrsqrtx${ABC[N]} = _mm256_mul_ps(vrsqrtx${ABC[N]}, vhalf);
66 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); variable
67 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx);
68 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf);
85 const __m256 vrsqrtx = _mm256_rsqrt_ps(vx); variable
86 __m256 vsqrtx = _mm256_mul_ps(vrsqrtx, vx);
87 __m256 vhalfrsqrtx = _mm256_mul_ps(vrsqrtx, vhalf);
Dneonfma-nr1rsqrts1fma1adj.c.in34 float32x4_t vrsqrtx${ABC[N:N+4]} = vrsqrteq_f32(vx${ABC[N:N+4]});
37 const float32x4_t vrx${ABC[N:N+4]} = vmulq_f32(vrsqrtx${ABC[N:N+4]}, vrsqrtx${ABC[N:N+4]});
43 vrsqrtx${ABC[N:N+4]} = vmulq_f32(vrsqrtx${ABC[N:N+4]}, vcorrection${ABC[N:N+4]});
46 float32x4_t vsqrtx${ABC[N:N+4]} = vmulq_f32(vrsqrtx${ABC[N:N+4]}, vx${ABC[N:N+4]});
47 float32x4_t vhalfrsqrtx${ABC[N:N+4]} = vmulq_f32(vrsqrtx${ABC[N:N+4]}, vhalf);

12