// Copyright 2019 The SwiftShader Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "EmulatedIntrinsics.hpp" #include #include #include #include #include namespace rr { namespace { template struct UnderlyingType { using Type = typename decltype(rr::Extract(std::declval>(), 0))::rvalue_underlying_type; }; template using UnderlyingTypeT = typename UnderlyingType::Type; // Call single arg function on a vector type template RValue call4(Func func, const RValue &x) { T result; result = Insert(result, Call(func, Extract(x, 0)), 0); result = Insert(result, Call(func, Extract(x, 1)), 1); result = Insert(result, Call(func, Extract(x, 2)), 2); result = Insert(result, Call(func, Extract(x, 3)), 3); return result; } // Call two arg function on a vector type template RValue call4(Func func, const RValue &x, const RValue &y) { T result; result = Insert(result, Call(func, Extract(x, 0), Extract(y, 0)), 0); result = Insert(result, Call(func, Extract(x, 1), Extract(y, 1)), 1); result = Insert(result, Call(func, Extract(x, 2), Extract(y, 2)), 2); result = Insert(result, Call(func, Extract(x, 3), Extract(y, 3)), 3); return result; } template> void gather(T &out, RValue> base, RValue offsets, RValue mask, unsigned int alignment, bool zeroMaskedLanes) { constexpr bool atomic = false; constexpr std::memory_order order = std::memory_order_relaxed; Pointer baseBytePtr = base; out = T(0); for(int i = 0; i < 4; i++) { If(Extract(mask, i) != 0) { auto offset = Extract(offsets, i); auto el = Load(Pointer(&baseBytePtr[offset]), alignment, atomic, order); out = Insert(out, el, i); } Else If(zeroMaskedLanes) { out = Insert(out, EL(0), i); } } } template> void scatter(RValue> base, RValue val, RValue offsets, RValue mask, unsigned int alignment) { constexpr bool atomic = false; constexpr std::memory_order order = std::memory_order_relaxed; Pointer baseBytePtr = base; for(int i = 0; i < 4; i++) { If(Extract(mask, i) != 0) { auto offset = Extract(offsets, i); Store(Extract(val, i), Pointer(&baseBytePtr[offset]), alignment, atomic, order); } } } // TODO(b/148276653): Both atomicMin and atomicMax use a static (global) mutex that makes all min // operations for a given T mutually exclusive, rather than only the ones on the value pointed to // by ptr. Use a CAS loop, as is done for LLVMReactor's min/max atomic for Android. // TODO(b/148207274): Or, move this down into Subzero as a CAS-based operation. template static T atomicMin(T *ptr, T value) { static std::mutex m; std::lock_guard lock(m); T origValue = *ptr; *ptr = std::min(origValue, value); return origValue; } template static T atomicMax(T *ptr, T value) { static std::mutex m; std::lock_guard lock(m); T origValue = *ptr; *ptr = std::max(origValue, value); return origValue; } } // anonymous namespace namespace emulated { RValue Gather(RValue> base, RValue offsets, RValue mask, unsigned int alignment, bool zeroMaskedLanes /* = false */) { Float4 result{}; gather(result, base, offsets, mask, alignment, zeroMaskedLanes); return result; } RValue Gather(RValue> base, RValue offsets, RValue mask, unsigned int alignment, bool zeroMaskedLanes /* = false */) { Int4 result{}; gather(result, base, offsets, mask, alignment, zeroMaskedLanes); return result; } void Scatter(RValue> base, RValue val, RValue offsets, RValue mask, unsigned int alignment) { scatter(base, val, offsets, mask, alignment); } void Scatter(RValue> base, RValue val, RValue offsets, RValue mask, unsigned int alignment) { scatter(base, val, offsets, mask, alignment); } RValue Exp2(RValue x) { return Call(exp2f, x); } RValue Log2(RValue x) { return Call(log2f, x); } RValue Sin(RValue x) { return call4(sinf, x); } RValue Cos(RValue x) { return call4(cosf, x); } RValue Tan(RValue x) { return call4(tanf, x); } RValue Asin(RValue x) { return call4(asinf, x); } RValue Acos(RValue x) { return call4(acosf, x); } RValue Atan(RValue x) { return call4(atanf, x); } RValue Sinh(RValue x) { // TODO(b/149110874) Use coshf/sinhf when we've implemented SpirV versions at the SpirV level return Float4(0.5f) * (emulated::Exp(x) - emulated::Exp(-x)); } RValue Cosh(RValue x) { // TODO(b/149110874) Use coshf/sinhf when we've implemented SpirV versions at the SpirV level return Float4(0.5f) * (emulated::Exp(x) + emulated::Exp(-x)); } RValue Tanh(RValue x) { return call4(tanhf, x); } RValue Asinh(RValue x) { return call4(asinhf, x); } RValue Acosh(RValue x) { return call4(acoshf, x); } RValue Atanh(RValue x) { return call4(atanhf, x); } RValue Atan2(RValue x, RValue y) { return call4(atan2f, x, y); } RValue Pow(RValue x, RValue y) { return call4(powf, x, y); } RValue Exp(RValue x) { return call4(expf, x); } RValue Log(RValue x) { return call4(logf, x); } RValue Exp2(RValue x) { return call4(exp2f, x); } RValue Log2(RValue x) { return call4(log2f, x); } RValue MinAtomic(RValue> x, RValue y, std::memory_order memoryOrder) { return Call(atomicMin, x, y); } RValue MinAtomic(RValue> x, RValue y, std::memory_order memoryOrder) { return Call(atomicMin, x, y); } RValue MaxAtomic(RValue> x, RValue y, std::memory_order memoryOrder) { return Call(atomicMax, x, y); } RValue MaxAtomic(RValue> x, RValue y, std::memory_order memoryOrder) { return Call(atomicMax, x, y); } RValue FRem(RValue lhs, RValue rhs) { return call4(fmodf, lhs, rhs); } } // namespace emulated } // namespace rr