1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef BERBERIS_TESTS_INLINE_ASM_TESTS_UTILITY_H_
18 #define BERBERIS_TESTS_INLINE_ASM_TESTS_UTILITY_H_
19
20 #include <cstdint>
21 #include <cstring>
22 #include <tuple>
23
24 extern "C" uint64_t get_fp64_literal();
25
26 template <class Dest, class Source>
bit_cast(const Source & source)27 inline Dest bit_cast(const Source& source) {
28 static_assert(sizeof(Dest) == sizeof(Source));
29 Dest dest;
30 memcpy(&dest, &source, sizeof(dest));
31 return dest;
32 }
33
MakeF32x4(float f1,float f2,float f3,float f4)34 inline __uint128_t MakeF32x4(float f1, float f2, float f3, float f4) {
35 float array[] = {f1, f2, f3, f4};
36 return bit_cast<__uint128_t>(array);
37 }
38
MakeF64x2(double d1,double d2)39 inline __uint128_t MakeF64x2(double d1, double d2) {
40 double array[] = {d1, d2};
41 return bit_cast<__uint128_t>(array);
42 }
43
MakeUInt128(uint64_t low,uint64_t high)44 constexpr __uint128_t MakeUInt128(uint64_t low, uint64_t high) {
45 return (static_cast<__uint128_t>(high) << 64) | static_cast<__uint128_t>(low);
46 }
47
MakeU32x4(uint32_t u0,uint32_t u1,uint32_t u2,uint32_t u3)48 constexpr __uint128_t MakeU32x4(uint32_t u0, uint32_t u1, uint32_t u2, uint32_t u3) {
49 return (static_cast<__uint128_t>(u3) << 96) | (static_cast<__uint128_t>(u2) << 64) |
50 (static_cast<__uint128_t>(u1) << 32) | static_cast<__uint128_t>(u0);
51 }
52
53 // Floating-point literals
54 constexpr uint32_t kOneF32 = 0x3f800000U;
55 constexpr uint64_t kOneF64 = 0x3ff0000000000000ULL;
56 constexpr uint32_t kDefaultNaN32 = 0x7fc00000U;
57 constexpr uint64_t kDefaultNaN64 = 0x7ff8000000000000ULL;
58 constexpr uint32_t kQuietNaN32 = kDefaultNaN32;
59 constexpr uint64_t kQuietNaN64 = kDefaultNaN64;
60 // There are multiple quiet and signaling NaNs. These are the ones that have the LSB "on".
61 constexpr uint32_t kSignalingNaN32_1 = 0x7f800001U;
62 constexpr uint64_t kSignalingNaN64_1 = 0x7ff0000000000001ULL;
63 constexpr uint32_t kQuietNaN32_1 = kQuietNaN32 | 1;
64 constexpr uint64_t kQuietNaN64_1 = kQuietNaN64 | 1;
65
66 constexpr uint32_t kFpcrFzBit = 1U << 24;
67 constexpr uint32_t kFpcrDnBit = 1U << 25;
68 constexpr uint32_t kFpcrRModeTieEven = 0b00U << 22;
69 constexpr uint32_t kFpcrRModePosInf = 0b01U << 22;
70 constexpr uint32_t kFpcrRModeNegInf = 0b10U << 22;
71 constexpr uint32_t kFpcrRModeZero = 0b11U << 22;
72 constexpr uint32_t kFpcrIdeBit = 1 << 15;
73 constexpr uint32_t kFpcrIxeBit = 1 << 12;
74 constexpr uint32_t kFpcrUfeBit = 1 << 11;
75 constexpr uint32_t kFpcrOfeBit = 1 << 10;
76 constexpr uint32_t kFpcrDzeBit = 1 << 9;
77 constexpr uint32_t kFpcrIoeBit = 1 << 8;
78
79 constexpr uint32_t kFpsrQcBit = 1U << 27;
80 constexpr uint32_t kFpsrIdcBit = 1 << 7; // Input Denormal cumulative exception flag.
81 constexpr uint32_t kFpsrIxcBit = 1 << 4; // Inexact cumulative exception flag.
82 constexpr uint32_t kFpsrUfcBit = 1 << 3; // Underflow cumulative exception flag.
83 constexpr uint32_t kFpsrOfcBit = 1 << 2; // Overflow cumulative exception flag.
84 constexpr uint32_t kFpsrDzcBit = 1 << 1; // Division by Zero cumulative exception flag.
85 constexpr uint32_t kFpsrIocBit = 1 << 0; // Invalid Operation cumulative exception flag.
86
87 #define ASM_INSN_WRAP_FUNC_W_RES(ASM) \
88 []() -> __uint128_t { \
89 __uint128_t res; \
90 asm(ASM : "=w"(res)); \
91 return res; \
92 }
93
94 #define ASM_INSN_WRAP_FUNC_R_RES_W_ARG(ASM) \
95 [](__uint128_t arg) -> uint64_t { \
96 uint64_t res; \
97 asm(ASM : "=r"(res) : "w"(arg)); \
98 return res; \
99 }
100
101 #define ASM_INSN_WRAP_FUNC_W_RES_R_ARG(ASM) \
102 [](uint64_t arg) -> __uint128_t { \
103 __uint128_t res; \
104 asm(ASM : "=w"(res) : "r"(arg)); \
105 return res; \
106 }
107
108 #define ASM_INSN_WRAP_FUNC_W_RES_W_ARG(ASM) \
109 [](__uint128_t arg) -> __uint128_t { \
110 __uint128_t res; \
111 asm(ASM : "=w"(res) : "w"(arg)); \
112 return res; \
113 }
114
115 #define ASM_INSN_WRAP_FUNC_W_RES_WW_ARG(ASM) \
116 [](__uint128_t arg1, __uint128_t arg2) -> __uint128_t { \
117 __uint128_t res; \
118 asm(ASM : "=w"(res) : "w"(arg1), "w"(arg2)); \
119 return res; \
120 }
121
122 #define ASM_INSN_WRAP_FUNC_W_RES_W0_ARG(ASM) \
123 [](__uint128_t arg1, __uint128_t arg2) -> __uint128_t { \
124 __uint128_t res; \
125 asm(ASM : "=w"(res) : "w"(arg1), "0"(arg2)); \
126 return res; \
127 }
128
129 #define ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG(ASM) \
130 [](__uint128_t arg1, __uint128_t arg2, __uint128_t arg3) -> __uint128_t { \
131 __uint128_t res; \
132 asm(ASM : "=w"(res) : "w"(arg1), "w"(arg2), "w"(arg3)); \
133 return res; \
134 }
135
136 #define ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG(ASM) \
137 [](__uint128_t arg1, __uint128_t arg2, __uint128_t arg3) -> __uint128_t { \
138 __uint128_t res; \
139 asm(ASM : "=w"(res) : "w"(arg1), "w"(arg2), "0"(arg3)); \
140 return res; \
141 }
142
143 // clang-format off
144 // We turn off clang-format here because it would place ASM like so:
145 //
146 // asm("msr fpsr, xzr\n\t" ASM
147 // "\n\t"
148 // "mrs %1, fpsr"
149 // : "=w"(res), "=r"(fpsr)
150 // : "w"(arg));
151 #define ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG(ASM) \
152 [](__uint128_t arg) -> std::tuple<__uint128_t, uint32_t> { \
153 __uint128_t res; \
154 uint64_t fpsr; \
155 asm("msr fpsr, xzr\n\t" \
156 ASM "\n\t" \
157 "mrs %1, fpsr" \
158 : "=w"(res), "=r"(fpsr) \
159 : "w"(arg)); \
160 return {res, fpsr}; \
161 }
162
163 #define ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG(ASM) \
164 [](__uint128_t arg1, __uint128_t arg2) -> std::tuple<__uint128_t, uint32_t> { \
165 __uint128_t res; \
166 uint64_t fpsr; \
167 asm("msr fpsr, xzr\n\t" \
168 ASM "\n\t" \
169 "mrs %1, fpsr" \
170 : "=w"(res), "=r"(fpsr) \
171 : "w"(arg1), "0"(arg2)); \
172 return {res, fpsr}; \
173 }
174
175 #define ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG(ASM) \
176 [](__uint128_t arg1, __uint128_t arg2) -> std::tuple<__uint128_t, uint32_t> { \
177 __uint128_t res; \
178 uint64_t fpsr; \
179 asm("msr fpsr, xzr\n\t" \
180 ASM "\n\t" \
181 "mrs %1, fpsr" \
182 : "=w"(res), "=r"(fpsr) \
183 : "w"(arg1), "w"(arg2)); \
184 return {res, fpsr}; \
185 }
186
187 #define ASM_INSN_WRAP_FUNC_W_RES_WC_ARG(ASM) \
188 [](__uint128_t arg, uint32_t fpcr) -> __uint128_t { \
189 __uint128_t res; \
190 asm("msr fpcr, %x2\n\t" \
191 ASM "\n\t" \
192 "msr fpcr, xzr" \
193 : "=w"(res) \
194 : "w"(arg), "r"(fpcr)); \
195 return res; \
196 }
197
198 #define ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG(ASM) \
199 [](__uint128_t arg1, __uint128_t arg2, __uint128_t arg3) -> std::tuple<__uint128_t, uint32_t> { \
200 __uint128_t res; \
201 uint64_t fpsr; \
202 asm("msr fpsr, xzr\n\t" \
203 ASM "\n\t" \
204 "mrs %1, fpsr" \
205 : "=w"(res), "=r"(fpsr) \
206 : "w"(arg1), "w"(arg2), "0"(arg3)); \
207 return {res, fpsr}; \
208 }
209
210 #define ASM_INSN_WRAP_FUNC_W_RES_WWC_ARG(ASM) \
211 [](__uint128_t arg1, __uint128_t arg2, uint32_t fpcr) -> __uint128_t { \
212 __uint128_t res; \
213 asm("msr fpcr, %x3\n\t" \
214 ASM "\n\t" \
215 "msr fpcr, xzr" \
216 : "=w"(res) \
217 : "w"(arg1), "w"(arg2), "r"(fpcr)); \
218 return res; \
219 }
220
221 // clang-format on
222
223 #endif // BERBERIS_TESTS_INLINE_ASM_TESTS_UTILITY_H_
224