1/*
2 * Copyright (c) 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 */
22
23#include <clc/clc.h>
24
25_CLC_OVERLOAD _CLC_DEF float normalize(float p) {
26  return sign(p);
27}
28
29_CLC_OVERLOAD _CLC_DEF float2 normalize(float2 p) {
30  if (all(p == (float2)0.0F))
31    return p;
32
33  float l2 = dot(p, p);
34
35  if (l2 < FLT_MIN) {
36    p *= 0x1.0p+86F;
37    l2 = dot(p, p);
38  } else if (l2 == INFINITY) {
39    p *= 0x1.0p-65f;
40    l2 = dot(p, p);
41    if (l2 == INFINITY) {
42      p = copysign(select((float2)0.0F, (float2)1.0F, isinf(p)), p);
43      l2 = dot(p, p);
44    }
45  }
46  return p * rsqrt(l2);
47}
48
49_CLC_OVERLOAD _CLC_DEF float3 normalize(float3 p) {
50  if (all(p == (float3)0.0F))
51    return p;
52
53  float l2 = dot(p, p);
54
55  if (l2 < FLT_MIN) {
56    p *= 0x1.0p+86F;
57    l2 = dot(p, p);
58  } else if (l2 == INFINITY) {
59    p *= 0x1.0p-66f;
60    l2 = dot(p, p);
61    if (l2 == INFINITY) {
62      p = copysign(select((float3)0.0F, (float3)1.0F, isinf(p)), p);
63      l2 = dot(p, p);
64    }
65  }
66  return p * rsqrt(l2);
67}
68
69_CLC_OVERLOAD _CLC_DEF float4 normalize(float4 p) {
70  if (all(p == (float4)0.0F))
71    return p;
72
73  float l2 = dot(p, p);
74
75  if (l2 < FLT_MIN) {
76    p *= 0x1.0p+86F;
77    l2 = dot(p, p);
78  } else if (l2 == INFINITY) {
79    p *= 0x1.0p-66f;
80    l2 = dot(p, p);
81    if (l2 == INFINITY) {
82      p = copysign(select((float4)0.0F, (float4)1.0F, isinf(p)), p);
83      l2 = dot(p, p);
84    }
85  }
86  return p * rsqrt(l2);
87}
88
89#ifdef cl_khr_fp64
90
91#pragma OPENCL EXTENSION cl_khr_fp64 : enable
92
93_CLC_OVERLOAD _CLC_DEF double normalize(double p) {
94  return sign(p);
95}
96
97_CLC_OVERLOAD _CLC_DEF double2 normalize(double2 p) {
98  if (all(p == (double2)0.0))
99    return p;
100
101  double l2 = dot(p, p);
102
103  if (l2 < DBL_MIN) {
104    p *= 0x1.0p+563;
105    l2 = dot(p, p);
106  } else if (l2 == INFINITY) {
107    p *= 0x1.0p-513;
108    l2 = dot(p, p);
109    if (l2 == INFINITY) {
110      p = copysign(select((double2)0.0, (double2)1.0, isinf(p)), p);
111      l2 = dot(p, p);
112    }
113  }
114  return p * rsqrt(l2);
115}
116
117_CLC_OVERLOAD _CLC_DEF double3 normalize(double3 p) {
118  if (all(p == (double3)0.0))
119    return p;
120
121  double l2 = dot(p, p);
122
123  if (l2 < DBL_MIN) {
124    p *= 0x1.0p+563;
125    l2 = dot(p, p);
126  } else if (l2 == INFINITY) {
127    p *= 0x1.0p-514;
128    l2 = dot(p, p);
129    if (l2 == INFINITY) {
130      p = copysign(select((double3)0.0, (double3)1.0, isinf(p)), p);
131      l2 = dot(p, p);
132    }
133  }
134  return p * rsqrt(l2);
135}
136
137_CLC_OVERLOAD _CLC_DEF double4 normalize(double4 p) {
138  if (all(p == (double4)0.0))
139    return p;
140
141  double l2 = dot(p, p);
142
143  if (l2 < DBL_MIN) {
144    p *= 0x1.0p+563;
145    l2 = dot(p, p);
146  } else if (l2 == INFINITY) {
147    p *= 0x1.0p-514;
148    l2 = dot(p, p);
149    if (l2 == INFINITY) {
150      p = copysign(select((double4)0.0, (double4)1.0, isinf(p)), p);
151      l2 = dot(p, p);
152    }
153  }
154  return p * rsqrt(l2);
155}
156
157#endif
158