1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "EmulatedIntrinsics.hpp"
16 
17 #include <algorithm>
18 #include <cmath>
19 #include <functional>
20 #include <mutex>
21 #include <utility>
22 
23 namespace rr {
24 namespace {
25 
26 template<typename T>
27 struct UnderlyingType
28 {
29 	using Type = typename decltype(rr::Extract(std::declval<RValue<T>>(), 0))::rvalue_underlying_type;
30 };
31 
32 template<typename T>
33 using UnderlyingTypeT = typename UnderlyingType<T>::Type;
34 
35 // Call single arg function on a vector type
36 template<typename Func, typename T>
call4(Func func,const RValue<T> & x)37 RValue<T> call4(Func func, const RValue<T> &x)
38 {
39 	T result;
40 	result = Insert(result, Call(func, Extract(x, 0)), 0);
41 	result = Insert(result, Call(func, Extract(x, 1)), 1);
42 	result = Insert(result, Call(func, Extract(x, 2)), 2);
43 	result = Insert(result, Call(func, Extract(x, 3)), 3);
44 	return result;
45 }
46 
47 // Call two arg function on a vector type
48 template<typename Func, typename T>
call4(Func func,const RValue<T> & x,const RValue<T> & y)49 RValue<T> call4(Func func, const RValue<T> &x, const RValue<T> &y)
50 {
51 	T result;
52 	result = Insert(result, Call(func, Extract(x, 0), Extract(y, 0)), 0);
53 	result = Insert(result, Call(func, Extract(x, 1), Extract(y, 1)), 1);
54 	result = Insert(result, Call(func, Extract(x, 2), Extract(y, 2)), 2);
55 	result = Insert(result, Call(func, Extract(x, 3), Extract(y, 3)), 3);
56 	return result;
57 }
58 
59 template<typename T, typename EL = UnderlyingTypeT<T>>
gather(T & out,RValue<Pointer<EL>> base,RValue<Int4> offsets,RValue<Int4> mask,unsigned int alignment,bool zeroMaskedLanes)60 void gather(T &out, RValue<Pointer<EL>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes)
61 {
62 	constexpr bool atomic = false;
63 	constexpr std::memory_order order = std::memory_order_relaxed;
64 
65 	Pointer<Byte> baseBytePtr = base;
66 
67 	out = T(0);
68 	for(int i = 0; i < 4; i++)
69 	{
70 		If(Extract(mask, i) != 0)
71 		{
72 			auto offset = Extract(offsets, i);
73 			auto el = Load(Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
74 			out = Insert(out, el, i);
75 		}
76 		Else If(zeroMaskedLanes)
77 		{
78 			out = Insert(out, EL(0), i);
79 		}
80 	}
81 }
82 
83 template<typename T, typename EL = UnderlyingTypeT<T>>
scatter(RValue<Pointer<EL>> base,RValue<T> val,RValue<Int4> offsets,RValue<Int4> mask,unsigned int alignment)84 void scatter(RValue<Pointer<EL>> base, RValue<T> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
85 {
86 	constexpr bool atomic = false;
87 	constexpr std::memory_order order = std::memory_order_relaxed;
88 
89 	Pointer<Byte> baseBytePtr = base;
90 
91 	for(int i = 0; i < 4; i++)
92 	{
93 		If(Extract(mask, i) != 0)
94 		{
95 			auto offset = Extract(offsets, i);
96 			Store(Extract(val, i), Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
97 		}
98 	}
99 }
100 
101 // TODO(b/148276653): Both atomicMin and atomicMax use a static (global) mutex that makes all min
102 // operations for a given T mutually exclusive, rather than only the ones on the value pointed to
103 // by ptr. Use a CAS loop, as is done for LLVMReactor's min/max atomic for Android.
104 // TODO(b/148207274): Or, move this down into Subzero as a CAS-based operation.
105 template<typename T>
atomicMin(T * ptr,T value)106 static T atomicMin(T *ptr, T value)
107 {
108 	static std::mutex m;
109 
110 	std::lock_guard<std::mutex> lock(m);
111 	T origValue = *ptr;
112 	*ptr = std::min(origValue, value);
113 	return origValue;
114 }
115 template<typename T>
atomicMax(T * ptr,T value)116 static T atomicMax(T *ptr, T value)
117 {
118 	static std::mutex m;
119 
120 	std::lock_guard<std::mutex> lock(m);
121 	T origValue = *ptr;
122 	*ptr = std::max(origValue, value);
123 	return origValue;
124 }
125 
126 }  // anonymous namespace
127 
128 namespace emulated {
129 
Gather(RValue<Pointer<Float>> base,RValue<Int4> offsets,RValue<Int4> mask,unsigned int alignment,bool zeroMaskedLanes)130 RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
131 {
132 	Float4 result{};
133 	gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
134 	return result;
135 }
136 
Gather(RValue<Pointer<Int>> base,RValue<Int4> offsets,RValue<Int4> mask,unsigned int alignment,bool zeroMaskedLanes)137 RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
138 {
139 	Int4 result{};
140 	gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
141 	return result;
142 }
143 
Scatter(RValue<Pointer<Float>> base,RValue<Float4> val,RValue<Int4> offsets,RValue<Int4> mask,unsigned int alignment)144 void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
145 {
146 	scatter(base, val, offsets, mask, alignment);
147 }
148 
Scatter(RValue<Pointer<Int>> base,RValue<Int4> val,RValue<Int4> offsets,RValue<Int4> mask,unsigned int alignment)149 void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
150 {
151 	scatter<Int4>(base, val, offsets, mask, alignment);
152 }
153 
Exp2(RValue<Float> x)154 RValue<Float> Exp2(RValue<Float> x)
155 {
156 	return Call(exp2f, x);
157 }
158 
Log2(RValue<Float> x)159 RValue<Float> Log2(RValue<Float> x)
160 {
161 	return Call(log2f, x);
162 }
163 
Sin(RValue<Float4> x)164 RValue<Float4> Sin(RValue<Float4> x)
165 {
166 	return call4(sinf, x);
167 }
168 
Cos(RValue<Float4> x)169 RValue<Float4> Cos(RValue<Float4> x)
170 {
171 	return call4(cosf, x);
172 }
173 
Tan(RValue<Float4> x)174 RValue<Float4> Tan(RValue<Float4> x)
175 {
176 	return call4(tanf, x);
177 }
178 
Asin(RValue<Float4> x)179 RValue<Float4> Asin(RValue<Float4> x)
180 {
181 	return call4(asinf, x);
182 }
183 
Acos(RValue<Float4> x)184 RValue<Float4> Acos(RValue<Float4> x)
185 {
186 	return call4(acosf, x);
187 }
188 
Atan(RValue<Float4> x)189 RValue<Float4> Atan(RValue<Float4> x)
190 {
191 	return call4(atanf, x);
192 }
193 
Sinh(RValue<Float4> x)194 RValue<Float4> Sinh(RValue<Float4> x)
195 {
196 	// TODO(b/149110874) Use coshf/sinhf when we've implemented SpirV versions at the SpirV level
197 	return Float4(0.5f) * (emulated::Exp(x) - emulated::Exp(-x));
198 }
199 
Cosh(RValue<Float4> x)200 RValue<Float4> Cosh(RValue<Float4> x)
201 {
202 	// TODO(b/149110874) Use coshf/sinhf when we've implemented SpirV versions at the SpirV level
203 	return Float4(0.5f) * (emulated::Exp(x) + emulated::Exp(-x));
204 }
205 
Tanh(RValue<Float4> x)206 RValue<Float4> Tanh(RValue<Float4> x)
207 {
208 	return call4(tanhf, x);
209 }
210 
Asinh(RValue<Float4> x)211 RValue<Float4> Asinh(RValue<Float4> x)
212 {
213 	return call4(asinhf, x);
214 }
215 
Acosh(RValue<Float4> x)216 RValue<Float4> Acosh(RValue<Float4> x)
217 {
218 	return call4(acoshf, x);
219 }
220 
Atanh(RValue<Float4> x)221 RValue<Float4> Atanh(RValue<Float4> x)
222 {
223 	return call4(atanhf, x);
224 }
225 
Atan2(RValue<Float4> x,RValue<Float4> y)226 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
227 {
228 	return call4(atan2f, x, y);
229 }
230 
Pow(RValue<Float4> x,RValue<Float4> y)231 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
232 {
233 	return call4(powf, x, y);
234 }
235 
Exp(RValue<Float4> x)236 RValue<Float4> Exp(RValue<Float4> x)
237 {
238 	return call4(expf, x);
239 }
240 
Log(RValue<Float4> x)241 RValue<Float4> Log(RValue<Float4> x)
242 {
243 	return call4(logf, x);
244 }
245 
Exp2(RValue<Float4> x)246 RValue<Float4> Exp2(RValue<Float4> x)
247 {
248 	return call4(exp2f, x);
249 }
250 
Log2(RValue<Float4> x)251 RValue<Float4> Log2(RValue<Float4> x)
252 {
253 	return call4(log2f, x);
254 }
255 
MinAtomic(RValue<Pointer<Int>> x,RValue<Int> y,std::memory_order memoryOrder)256 RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
257 {
258 	return Call(atomicMin<int32_t>, x, y);
259 }
260 
MinAtomic(RValue<Pointer<UInt>> x,RValue<UInt> y,std::memory_order memoryOrder)261 RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
262 {
263 	return Call(atomicMin<uint32_t>, x, y);
264 }
265 
MaxAtomic(RValue<Pointer<Int>> x,RValue<Int> y,std::memory_order memoryOrder)266 RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
267 {
268 	return Call(atomicMax<int32_t>, x, y);
269 }
270 
MaxAtomic(RValue<Pointer<UInt>> x,RValue<UInt> y,std::memory_order memoryOrder)271 RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
272 {
273 	return Call(atomicMax<uint32_t>, x, y);
274 }
275 
FRem(RValue<Float4> lhs,RValue<Float4> rhs)276 RValue<Float4> FRem(RValue<Float4> lhs, RValue<Float4> rhs)
277 {
278 	return call4(fmodf, lhs, rhs);
279 }
280 
281 }  // namespace emulated
282 }  // namespace rr
283