1///////////////////////////////////////////////////////////////////////////////////////////////////
2// OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net)
3///////////////////////////////////////////////////////////////////////////////////////////////////
4// Created : 2013-04-22
5// Updated : 2013-04-22
6// Licence : This source is under MIT License
7// File    : glm/gtx/simd_quat.inl
8///////////////////////////////////////////////////////////////////////////////////////////////////
9
10
11namespace glm{
12namespace detail{
13
14
15//////////////////////////////////////
16// Debugging
17#if 0
18void print(__m128 v)
19{
20    GLM_ALIGN(16) float result[4];
21    _mm_store_ps(result, v);
22
23    printf("__m128:    %f %f %f %f\n", result[0], result[1], result[2], result[3]);
24}
25
26void print(const fvec4SIMD &v)
27{
28    printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w);
29}
30#endif
31
32
33//////////////////////////////////////
34// Implicit basic constructors
35
36GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD()
37#ifdef GLM_SIMD_ENABLE_DEFAULT_INIT
38    : Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f))
39#endif
40{}
41
42GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) :
43	Data(Data)
44{}
45
46GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) :
47	Data(q.Data)
48{}
49
50
51//////////////////////////////////////
52// Explicit basic constructors
53
54GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) :
55	Data(_mm_set_ps(w, z, y, x))
56{}
57
58GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) :
59	Data(_mm_set_ps(q.w, q.z, q.y, q.x))
60{}
61
62GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles)
63{
64    vec3 c = glm::cos(eulerAngles * 0.5f);
65	vec3 s = glm::sin(eulerAngles * 0.5f);
66
67    Data = _mm_set_ps(
68        (c.x * c.y * c.z) + (s.x * s.y * s.z),
69        (c.x * c.y * s.z) - (s.x * s.y * c.z),
70        (c.x * s.y * c.z) + (s.x * c.y * s.z),
71        (s.x * c.y * c.z) - (c.x * s.y * s.z));
72}
73
74
75//////////////////////////////////////
76// Unary arithmetic operators
77
78GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q)
79{
80    this->Data = q.Data;
81    return *this;
82}
83
84GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s)
85{
86	this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));
87	return *this;
88}
89
90GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s)
91{
92	this->Data = _mm_div_ps(Data, _mm_set1_ps(s));
93	return *this;
94}
95
96
97
98// negate operator
99GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q)
100{
101    return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f)));
102}
103
104// operator+
105GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2)
106{
107	return fquatSIMD(_mm_add_ps(q1.Data, q2.Data));
108}
109
110//operator*
111GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2)
112{
113    // SSE2 STATS:
114    //    11 shuffle
115    //    8  mul
116    //    8  add
117
118    // SSE4 STATS:
119    //    3 shuffle
120    //    4 mul
121    //    4 dpps
122
123    __m128 mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3)));
124    __m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2)));
125    __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
126    __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);
127
128#   if((GLM_ARCH & GLM_ARCH_SSE4))
129    __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f,  1.0f,  1.0f), 0xff);
130    __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f,  1.0f,  1.0f, -1.0f), 0xff);
131    __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f,  1.0f, -1.0f,  1.0f), 0xff);
132    __m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff);
133#   else
134           mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f,  1.0f,  1.0f));
135    __m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0));
136           add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1));
137
138           mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f,  1.0f,  1.0f, -1.0f));
139    __m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1));
140           add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1));
141
142           mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f,  1.0f, -1.0f,  1.0f));
143    __m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2));
144           add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1));
145
146           mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f));
147    __m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3));
148           add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1));
149#endif
150
151
152    // This SIMD code is a politically correct way of doing this, but in every test I've tried it has been slower than
153    // the final code below. I'll keep this here for reference - maybe somebody else can do something better...
154    //
155    //__m128 xxyy = _mm_shuffle_ps(add0, add1, _MM_SHUFFLE(0, 0, 0, 0));
156    //__m128 zzww = _mm_shuffle_ps(add2, add3, _MM_SHUFFLE(0, 0, 0, 0));
157    //
158    //return _mm_shuffle_ps(xxyy, zzww, _MM_SHUFFLE(2, 0, 2, 0));
159
160    float x;
161    float y;
162    float z;
163    float w;
164
165    _mm_store_ss(&x, add0);
166    _mm_store_ss(&y, add1);
167    _mm_store_ss(&z, add2);
168    _mm_store_ss(&w, add3);
169
170    return detail::fquatSIMD(w, x, y, z);
171}
172
173GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v)
174{
175    static const __m128 two = _mm_set1_ps(2.0f);
176
177    __m128 q_wwww  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
178    __m128 q_swp0  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
179	__m128 q_swp1  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
180	__m128 v_swp0  = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 0, 2, 1));
181	__m128 v_swp1  = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 1, 0, 2));
182
183	__m128 uv      = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0));
184    __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1));
185    __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2));
186    __m128 uuv     = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0));
187
188
189    uv  = _mm_mul_ps(uv,  _mm_mul_ps(q_wwww, two));
190    uuv = _mm_mul_ps(uuv, two);
191
192    return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv));
193}
194
195GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q)
196{
197	return glm::inverse(q) * v;
198}
199
200GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s)
201{
202	return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s)));
203}
204
205GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q)
206{
207	return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data));
208}
209
210
211//operator/
212GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s)
213{
214	return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s)));
215}
216
217
218}//namespace detail
219
220
221GLM_FUNC_QUALIFIER quat quat_cast
222(
223	detail::fquatSIMD const & x
224)
225{
226	GLM_ALIGN(16) quat Result;
227	_mm_store_ps(&Result[0], x.Data);
228
229	return Result;
230}
231
232template <typename T>
233GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast_impl(const T m0[], const T m1[], const T m2[])
234{
235    T trace = m0[0] + m1[1] + m2[2] + T(1.0);
236    if (trace > T(0))
237    {
238        T s = static_cast<T>(0.5) / sqrt(trace);
239
240        return _mm_set_ps(
241            static_cast<float>(T(0.25) / s),
242            static_cast<float>((m0[1] - m1[0]) * s),
243            static_cast<float>((m2[0] - m0[2]) * s),
244            static_cast<float>((m1[2] - m2[1]) * s));
245    }
246    else
247    {
248        if (m0[0] > m1[1])
249        {
250            if (m0[0] > m2[2])
251            {
252                // X is biggest.
253                T s = sqrt(m0[0] - m1[1] - m2[2] + T(1.0)) * T(0.5);
254
255                return _mm_set_ps(
256                    static_cast<float>((m1[2] - m2[1]) * s),
257                    static_cast<float>((m2[0] + m0[2]) * s),
258                    static_cast<float>((m0[1] + m1[0]) * s),
259                    static_cast<float>(T(0.5)          * s));
260            }
261        }
262        else
263        {
264            if (m1[1] > m2[2])
265            {
266                // Y is biggest.
267                T s = sqrt(m1[1] - m0[0] - m2[2] + T(1.0)) * T(0.5);
268
269                return _mm_set_ps(
270                    static_cast<float>((m2[0] - m0[2]) * s),
271                    static_cast<float>((m1[2] + m2[1]) * s),
272                    static_cast<float>(T(0.5)          * s),
273                    static_cast<float>((m0[1] + m1[0]) * s));
274            }
275        }
276
277        // Z is biggest.
278        T s = sqrt(m2[2] - m0[0] - m1[1] + T(1.0)) * T(0.5);
279
280        return _mm_set_ps(
281            static_cast<float>((m0[1] - m1[0]) * s),
282            static_cast<float>(T(0.5)          * s),
283            static_cast<float>((m1[2] + m2[1]) * s),
284            static_cast<float>((m2[0] + m0[2]) * s));
285    }
286}
287
288GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
289(
290	detail::fmat4x4SIMD const & m
291)
292{
293    // Scalar implementation for now.
294    GLM_ALIGN(16) float m0[4];
295    GLM_ALIGN(16) float m1[4];
296    GLM_ALIGN(16) float m2[4];
297
298    _mm_store_ps(m0, m[0].Data);
299    _mm_store_ps(m1, m[1].Data);
300    _mm_store_ps(m2, m[2].Data);
301
302    return quatSIMD_cast_impl(m0, m1, m2);
303}
304
305template <typename T, precision P>
306GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
307(
308    detail::tmat4x4<T, P> const & m
309)
310{
311    return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
312}
313
314template <typename T, precision P>
315GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
316(
317    detail::tmat3x3<T, P> const & m
318)
319{
320    return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
321}
322
323
324GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast
325(
326	detail::fquatSIMD const & q
327)
328{
329    detail::fmat4x4SIMD result;
330
331    __m128 _wwww  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
332    __m128 _xyzw  = q.Data;
333    __m128 _zxyw  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
334    __m128 _yzxw  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
335
336    __m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw);
337    __m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2));
338    __m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1));
339
340    __m128 _tmp0  = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw));
341           _tmp0  = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw));
342
343    __m128 _tmp1  = _mm_mul_ps(_yzxw2, _xyzw);
344           _tmp1  = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww));
345
346    __m128 _tmp2  = _mm_mul_ps(_zxyw2, _xyzw);
347           _tmp2  = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww));
348
349
350    // There's probably a better, more politically correct way of doing this...
351    result[0].Data = _mm_set_ps(
352        0.0f,
353        reinterpret_cast<float*>(&_tmp2)[0],
354        reinterpret_cast<float*>(&_tmp1)[0],
355        reinterpret_cast<float*>(&_tmp0)[0]);
356
357    result[1].Data = _mm_set_ps(
358        0.0f,
359        reinterpret_cast<float*>(&_tmp1)[1],
360        reinterpret_cast<float*>(&_tmp0)[1],
361        reinterpret_cast<float*>(&_tmp2)[1]);
362
363    result[2].Data = _mm_set_ps(
364        0.0f,
365        reinterpret_cast<float*>(&_tmp0)[2],
366        reinterpret_cast<float*>(&_tmp2)[2],
367        reinterpret_cast<float*>(&_tmp1)[2]);
368
369   result[3].Data = _mm_set_ps(
370        1.0f,
371        0.0f,
372        0.0f,
373        0.0f);
374
375
376    return result;
377}
378
379GLM_FUNC_QUALIFIER mat4 mat4_cast
380(
381	detail::fquatSIMD const & q
382)
383{
384    return mat4_cast(mat4SIMD_cast(q));
385}
386
387
388
389GLM_FUNC_QUALIFIER float length
390(
391	detail::fquatSIMD const & q
392)
393{
394    return glm::sqrt(dot(q, q));
395}
396
397GLM_FUNC_QUALIFIER detail::fquatSIMD normalize
398(
399	detail::fquatSIMD const & q
400)
401{
402    return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q)));
403}
404
405GLM_FUNC_QUALIFIER float dot
406(
407	detail::fquatSIMD const & q1,
408	detail::fquatSIMD const & q2
409)
410{
411    float result;
412    _mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data));
413
414    return result;
415}
416
417GLM_FUNC_QUALIFIER detail::fquatSIMD mix
418(
419	detail::fquatSIMD const & x,
420	detail::fquatSIMD const & y,
421	float const & a
422)
423{
424	float cosTheta = dot(x, y);
425
426    if (cosTheta > 1.0f - glm::epsilon<float>())
427    {
428	    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
429    }
430    else
431    {
432        float angle = glm::acos(cosTheta);
433
434
435        float s0 = glm::sin((1.0f - a) * angle);
436        float s1 = glm::sin(a * angle);
437        float d  = 1.0f / glm::sin(angle);
438
439        return (s0 * x + s1 * y) * d;
440    }
441}
442
443GLM_FUNC_QUALIFIER detail::fquatSIMD lerp
444(
445	detail::fquatSIMD const & x,
446	detail::fquatSIMD const & y,
447	float const & a
448)
449{
450	// Lerp is only defined in [0, 1]
451	assert(a >= 0.0f);
452	assert(a <= 1.0f);
453
454    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
455}
456
457GLM_FUNC_QUALIFIER detail::fquatSIMD slerp
458(
459	detail::fquatSIMD const & x,
460	detail::fquatSIMD const & y,
461	float const & a
462)
463{
464	detail::fquatSIMD z = y;
465
466	float cosTheta = dot(x, y);
467
468	// If cosTheta < 0, the interpolation will take the long way around the sphere.
469	// To fix this, one quat must be negated.
470	if (cosTheta < 0.0f)
471	{
472		z        = -y;
473		cosTheta = -cosTheta;
474	}
475
476	// Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator
477	if(cosTheta > 1.0f - epsilon<float>())
478	{
479		return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
480	}
481	else
482	{
483        float angle = glm::acos(cosTheta);
484
485
486		float s0 = glm::sin((1.0f - a) * angle);
487        float s1 = glm::sin(a * angle);
488        float d  = 1.0f / glm::sin(angle);
489
490        return (s0 * x + s1 * y) * d;
491	}
492}
493
494
495GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix
496(
497	detail::fquatSIMD const & x,
498	detail::fquatSIMD const & y,
499	float const & a
500)
501{
502	float cosTheta = dot(x, y);
503
504    if (cosTheta > 1.0f - glm::epsilon<float>())
505    {
506	    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
507    }
508    else
509    {
510        float angle = glm::fastAcos(cosTheta);
511
512
513        __m128 s  = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
514
515        __m128 s0 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
516        __m128 s1 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
517        __m128 d  = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
518
519        return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
520    }
521}
522
523GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp
524(
525	detail::fquatSIMD const & x,
526	detail::fquatSIMD const & y,
527	float const & a
528)
529{
530	detail::fquatSIMD z = y;
531
532	float cosTheta = dot(x, y);
533	if (cosTheta < 0.0f)
534	{
535		z        = -y;
536		cosTheta = -cosTheta;
537	}
538
539
540	if(cosTheta > 1.0f - epsilon<float>())
541	{
542		return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
543	}
544	else
545	{
546        float angle = glm::fastAcos(cosTheta);
547
548
549        __m128 s  = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
550
551        __m128 s0 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
552        __m128 s1 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
553        __m128 d  = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
554
555        return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
556	}
557}
558
559
560
561GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate
562(
563	detail::fquatSIMD const & q
564)
565{
566	return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)));
567}
568
569GLM_FUNC_QUALIFIER detail::fquatSIMD inverse
570(
571	detail::fquatSIMD const & q
572)
573{
574	return conjugate(q) / dot(q, q);
575}
576
577
578GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
579(
580	float const & angle,
581	vec3 const & v
582)
583{
584#ifdef GLM_FORCE_RADIANS
585	float a(angle);
586#else
587#	pragma message("GLM: rotateZ function taking degrees as parameters is deprecated. #define GLM_FORCE_RADIANS before including GLM headers to remove this message.")
588	float a(glm::radians(angle));
589#endif
590	float s = glm::sin(a * 0.5f);
591
592	return _mm_set_ps(
593		glm::cos(a * 0.5f),
594		v.z * s,
595		v.y * s,
596		v.x * s);
597}
598
599GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
600(
601	float const & angle,
602	float const & x,
603	float const & y,
604	float const & z
605)
606{
607	return angleAxisSIMD(angle, vec3(x, y, z));
608}
609
610
611GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x)
612{
613    static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f);
614    static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f);
615    static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f);
616
617    __m128 x3 = _mm_mul_ps(x,  _mm_mul_ps(x, x));
618    __m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x));
619    __m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x));
620
621    __m128 y0 = _mm_mul_ps(x3, c0);
622    __m128 y1 = _mm_mul_ps(x5, c1);
623    __m128 y2 = _mm_mul_ps(x7, c2);
624
625    return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2);
626}
627
628
629}//namespace glm
630