1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14
15 #include <gtest/gtest.h>
16
17 #include <fp16.h>
18
19 #include <xnnpack/AlignedAllocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/math-stubs.h>
22
23
24 constexpr int kBlockSize = 1024;
25
26 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDD__SSE_ADDSUB,positive_zero)27 TEST(ROUNDD__SSE_ADDSUB, positive_zero) {
28 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
29 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
30 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
31 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
32 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
33 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
34 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
35 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
36 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
37 }
38
TEST(ROUNDD__SSE_ADDSUB,negative_zero)39 TEST(ROUNDD__SSE_ADDSUB, negative_zero) {
40 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
41 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
42 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
43 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
44 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
45 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
46 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
47 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
48 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
49 }
50
TEST(ROUNDD__SSE_ADDSUB,positive_subnormal)51 TEST(ROUNDD__SSE_ADDSUB, positive_subnormal) {
52 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
53 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
54 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
55 for (uint32_t i = 0; i < kBlockSize; i++) {
56 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
57 }
58 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
59 for (uint32_t i = 0; i < kBlockSize; i++) {
60 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
61 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
62 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
63 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
64 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
65 }
66 }
67 }
68
TEST(ROUNDD__SSE_ADDSUB,negative_subnormal)69 TEST(ROUNDD__SSE_ADDSUB, negative_subnormal) {
70 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
71 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
72 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
73 for (uint32_t i = 0; i < kBlockSize; i++) {
74 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
75 }
76 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
77 for (uint32_t i = 0; i < kBlockSize; i++) {
78 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
79 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
80 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
81 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
82 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
83 }
84 }
85 }
86
TEST(ROUNDD__SSE_ADDSUB,positive_normal)87 TEST(ROUNDD__SSE_ADDSUB, positive_normal) {
88 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
89 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
90 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
91 for (uint32_t i = 0; i < kBlockSize; i++) {
92 inputs[i] = fp32_from_bits(n + i);
93 }
94 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
95 for (uint32_t i = 0; i < kBlockSize; i++) {
96 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
97 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
98 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
99 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
100 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
101 }
102 }
103 }
104
TEST(ROUNDD__SSE_ADDSUB,negative_normal)105 TEST(ROUNDD__SSE_ADDSUB, negative_normal) {
106 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
107 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
108 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
109 for (uint32_t i = 0; i < kBlockSize; i++) {
110 inputs[i] = fp32_from_bits(n + i);
111 }
112 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
113 for (uint32_t i = 0; i < kBlockSize; i++) {
114 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
115 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
116 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
117 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
118 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
119 }
120 }
121 }
122
TEST(ROUNDD__SSE_ADDSUB,positive_integral)123 TEST(ROUNDD__SSE_ADDSUB, positive_integral) {
124 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
125 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
126 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
127 for (uint32_t i = 0; i < kBlockSize; i++) {
128 inputs[i] = fp32_from_bits(n + i);
129 }
130 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
131 for (uint32_t i = 0; i < kBlockSize; i++) {
132 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
133 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
134 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
135 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
136 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
137 }
138 }
139 }
140
TEST(ROUNDD__SSE_ADDSUB,negative_integral)141 TEST(ROUNDD__SSE_ADDSUB, negative_integral) {
142 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
143 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
144 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
145 for (uint32_t i = 0; i < kBlockSize; i++) {
146 inputs[i] = fp32_from_bits(n + i);
147 }
148 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
149 for (uint32_t i = 0; i < kBlockSize; i++) {
150 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
151 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
152 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
153 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
154 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
155 }
156 }
157 }
158
TEST(ROUNDD__SSE_ADDSUB,positive_infinity)159 TEST(ROUNDD__SSE_ADDSUB, positive_infinity) {
160 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
161 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
162 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
163 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
164 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
165 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
166 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
167 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
168 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
169 }
170
TEST(ROUNDD__SSE_ADDSUB,negative_infinity)171 TEST(ROUNDD__SSE_ADDSUB, negative_infinity) {
172 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
173 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
174 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
175 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
176 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
177 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
178 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
179 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
180 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
181 }
182
TEST(ROUNDD__SSE_ADDSUB,positive_qnan)183 TEST(ROUNDD__SSE_ADDSUB, positive_qnan) {
184 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
185 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
186 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
187 for (uint32_t i = 0; i < kBlockSize; i++) {
188 inputs[i] = fp32_from_bits(n + i);
189 }
190 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
191 for (uint32_t i = 0; i < kBlockSize; i++) {
192 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
193 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
194 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
195 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
196 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
197 }
198 }
199 }
200
TEST(ROUNDD__SSE_ADDSUB,negative_qnan)201 TEST(ROUNDD__SSE_ADDSUB, negative_qnan) {
202 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
203 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
204 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
205 for (uint32_t i = 0; i < kBlockSize; i++) {
206 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
207 }
208 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
209 for (uint32_t i = 0; i < kBlockSize; i++) {
210 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
211 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
212 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
213 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
214 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
215 }
216 }
217 }
218
TEST(ROUNDD__SSE_ADDSUB,positive_snan)219 TEST(ROUNDD__SSE_ADDSUB, positive_snan) {
220 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
221 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
222 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
223 for (uint32_t i = 0; i < kBlockSize; i++) {
224 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
225 }
226 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
227 for (uint32_t i = 0; i < kBlockSize; i++) {
228 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
229 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
230 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
231 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
232 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
233 }
234 }
235 }
236
TEST(ROUNDD__SSE_ADDSUB,negative_snan)237 TEST(ROUNDD__SSE_ADDSUB, negative_snan) {
238 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
239 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
240 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
241 for (uint32_t i = 0; i < kBlockSize; i++) {
242 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
243 }
244 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
245 for (uint32_t i = 0; i < kBlockSize; i++) {
246 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
247 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
248 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
249 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
250 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
251 }
252 }
253 }
254
TEST(ROUNDD__SSE_ADDSUB,positive_snan_to_qnan)255 TEST(ROUNDD__SSE_ADDSUB, positive_snan_to_qnan) {
256 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
257 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
258 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
259 for (uint32_t i = 0; i < kBlockSize; i++) {
260 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
261 }
262 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
263 for (uint32_t i = 0; i < kBlockSize; i++) {
264 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
265 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
266 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
267 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
268 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
269 }
270 }
271 }
272
TEST(ROUNDD__SSE_ADDSUB,negative_snan_to_qnan)273 TEST(ROUNDD__SSE_ADDSUB, negative_snan_to_qnan) {
274 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
275 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
276 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
277 for (uint32_t i = 0; i < kBlockSize; i++) {
278 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
279 }
280 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
281 for (uint32_t i = 0; i < kBlockSize; i++) {
282 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
283 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
284 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
285 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
286 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
287 }
288 }
289 }
290 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
291
292 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDD__SSE2_CVT,positive_zero)293 TEST(ROUNDD__SSE2_CVT, positive_zero) {
294 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
295 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
296 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
297 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
298 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
299 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
300 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
301 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
302 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
303 }
304
TEST(ROUNDD__SSE2_CVT,negative_zero)305 TEST(ROUNDD__SSE2_CVT, negative_zero) {
306 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
307 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
308 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
309 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
310 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
311 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
312 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
313 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
314 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
315 }
316
TEST(ROUNDD__SSE2_CVT,positive_subnormal)317 TEST(ROUNDD__SSE2_CVT, positive_subnormal) {
318 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
319 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
320 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
321 for (uint32_t i = 0; i < kBlockSize; i++) {
322 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
323 }
324 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
325 for (uint32_t i = 0; i < kBlockSize; i++) {
326 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
327 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
328 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
329 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
330 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
331 }
332 }
333 }
334
TEST(ROUNDD__SSE2_CVT,negative_subnormal)335 TEST(ROUNDD__SSE2_CVT, negative_subnormal) {
336 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
337 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
338 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
339 for (uint32_t i = 0; i < kBlockSize; i++) {
340 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
341 }
342 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
343 for (uint32_t i = 0; i < kBlockSize; i++) {
344 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
345 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
346 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
347 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
348 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
349 }
350 }
351 }
352
TEST(ROUNDD__SSE2_CVT,positive_normal)353 TEST(ROUNDD__SSE2_CVT, positive_normal) {
354 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
355 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
356 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
357 for (uint32_t i = 0; i < kBlockSize; i++) {
358 inputs[i] = fp32_from_bits(n + i);
359 }
360 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
361 for (uint32_t i = 0; i < kBlockSize; i++) {
362 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
363 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
364 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
365 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
366 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
367 }
368 }
369 }
370
TEST(ROUNDD__SSE2_CVT,negative_normal)371 TEST(ROUNDD__SSE2_CVT, negative_normal) {
372 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
373 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
374 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
375 for (uint32_t i = 0; i < kBlockSize; i++) {
376 inputs[i] = fp32_from_bits(n + i);
377 }
378 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
379 for (uint32_t i = 0; i < kBlockSize; i++) {
380 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
381 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
382 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
383 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
384 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
385 }
386 }
387 }
388
TEST(ROUNDD__SSE2_CVT,positive_integral)389 TEST(ROUNDD__SSE2_CVT, positive_integral) {
390 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
391 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
392 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
393 for (uint32_t i = 0; i < kBlockSize; i++) {
394 inputs[i] = fp32_from_bits(n + i);
395 }
396 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
397 for (uint32_t i = 0; i < kBlockSize; i++) {
398 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
399 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
400 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
401 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
402 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
403 }
404 }
405 }
406
TEST(ROUNDD__SSE2_CVT,negative_integral)407 TEST(ROUNDD__SSE2_CVT, negative_integral) {
408 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
409 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
410 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
411 for (uint32_t i = 0; i < kBlockSize; i++) {
412 inputs[i] = fp32_from_bits(n + i);
413 }
414 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
415 for (uint32_t i = 0; i < kBlockSize; i++) {
416 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
417 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
418 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
419 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
420 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
421 }
422 }
423 }
424
TEST(ROUNDD__SSE2_CVT,positive_infinity)425 TEST(ROUNDD__SSE2_CVT, positive_infinity) {
426 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
427 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
428 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
429 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
430 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
431 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
432 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
433 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
434 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
435 }
436
TEST(ROUNDD__SSE2_CVT,negative_infinity)437 TEST(ROUNDD__SSE2_CVT, negative_infinity) {
438 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
439 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
440 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
441 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
442 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
443 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
444 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
445 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
446 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
447 }
448
TEST(ROUNDD__SSE2_CVT,positive_qnan)449 TEST(ROUNDD__SSE2_CVT, positive_qnan) {
450 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
451 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
452 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
453 for (uint32_t i = 0; i < kBlockSize; i++) {
454 inputs[i] = fp32_from_bits(n + i);
455 }
456 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
457 for (uint32_t i = 0; i < kBlockSize; i++) {
458 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
459 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
460 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
461 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
462 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
463 }
464 }
465 }
466
TEST(ROUNDD__SSE2_CVT,negative_qnan)467 TEST(ROUNDD__SSE2_CVT, negative_qnan) {
468 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
469 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
470 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
471 for (uint32_t i = 0; i < kBlockSize; i++) {
472 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
473 }
474 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
475 for (uint32_t i = 0; i < kBlockSize; i++) {
476 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
477 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
478 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
479 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
480 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
481 }
482 }
483 }
484
TEST(ROUNDD__SSE2_CVT,positive_snan)485 TEST(ROUNDD__SSE2_CVT, positive_snan) {
486 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
487 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
488 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
489 for (uint32_t i = 0; i < kBlockSize; i++) {
490 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
491 }
492 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
493 for (uint32_t i = 0; i < kBlockSize; i++) {
494 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
495 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
496 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
497 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
498 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
499 }
500 }
501 }
502
TEST(ROUNDD__SSE2_CVT,negative_snan)503 TEST(ROUNDD__SSE2_CVT, negative_snan) {
504 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
505 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
506 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
507 for (uint32_t i = 0; i < kBlockSize; i++) {
508 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
509 }
510 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
511 for (uint32_t i = 0; i < kBlockSize; i++) {
512 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
513 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
514 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
515 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
516 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
517 }
518 }
519 }
520
TEST(ROUNDD__SSE2_CVT,positive_snan_to_qnan)521 TEST(ROUNDD__SSE2_CVT, positive_snan_to_qnan) {
522 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
523 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
524 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
525 for (uint32_t i = 0; i < kBlockSize; i++) {
526 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
527 }
528 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
529 for (uint32_t i = 0; i < kBlockSize; i++) {
530 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
531 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
532 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
533 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
534 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
535 }
536 }
537 }
538
TEST(ROUNDD__SSE2_CVT,negative_snan_to_qnan)539 TEST(ROUNDD__SSE2_CVT, negative_snan_to_qnan) {
540 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
541 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
542 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
543 for (uint32_t i = 0; i < kBlockSize; i++) {
544 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
545 }
546 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
547 for (uint32_t i = 0; i < kBlockSize; i++) {
548 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
549 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
550 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
551 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
552 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
553 }
554 }
555 }
556 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
557
558 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDD__SSE41,positive_zero)559 TEST(ROUNDD__SSE41, positive_zero) {
560 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
561 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
562 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
563 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
564 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
565 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
566 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
567 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
568 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
569 }
570
TEST(ROUNDD__SSE41,negative_zero)571 TEST(ROUNDD__SSE41, negative_zero) {
572 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
573 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
574 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
575 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
576 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
577 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
578 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
579 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
580 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
581 }
582
TEST(ROUNDD__SSE41,positive_subnormal)583 TEST(ROUNDD__SSE41, positive_subnormal) {
584 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
585 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
586 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
587 for (uint32_t i = 0; i < kBlockSize; i++) {
588 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
589 }
590 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
591 for (uint32_t i = 0; i < kBlockSize; i++) {
592 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
593 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
594 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
595 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
596 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
597 }
598 }
599 }
600
TEST(ROUNDD__SSE41,negative_subnormal)601 TEST(ROUNDD__SSE41, negative_subnormal) {
602 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
603 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
604 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
605 for (uint32_t i = 0; i < kBlockSize; i++) {
606 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
607 }
608 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
609 for (uint32_t i = 0; i < kBlockSize; i++) {
610 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
611 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
612 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
613 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
614 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
615 }
616 }
617 }
618
TEST(ROUNDD__SSE41,positive_normal)619 TEST(ROUNDD__SSE41, positive_normal) {
620 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
621 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
622 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
623 for (uint32_t i = 0; i < kBlockSize; i++) {
624 inputs[i] = fp32_from_bits(n + i);
625 }
626 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
627 for (uint32_t i = 0; i < kBlockSize; i++) {
628 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
629 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
630 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
631 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
632 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
633 }
634 }
635 }
636
TEST(ROUNDD__SSE41,negative_normal)637 TEST(ROUNDD__SSE41, negative_normal) {
638 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
639 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
640 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
641 for (uint32_t i = 0; i < kBlockSize; i++) {
642 inputs[i] = fp32_from_bits(n + i);
643 }
644 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
645 for (uint32_t i = 0; i < kBlockSize; i++) {
646 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
647 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
648 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
649 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
650 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
651 }
652 }
653 }
654
TEST(ROUNDD__SSE41,positive_integral)655 TEST(ROUNDD__SSE41, positive_integral) {
656 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
657 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
658 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
659 for (uint32_t i = 0; i < kBlockSize; i++) {
660 inputs[i] = fp32_from_bits(n + i);
661 }
662 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
663 for (uint32_t i = 0; i < kBlockSize; i++) {
664 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
665 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
666 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
667 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
668 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
669 }
670 }
671 }
672
TEST(ROUNDD__SSE41,negative_integral)673 TEST(ROUNDD__SSE41, negative_integral) {
674 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
675 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
676 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
677 for (uint32_t i = 0; i < kBlockSize; i++) {
678 inputs[i] = fp32_from_bits(n + i);
679 }
680 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
681 for (uint32_t i = 0; i < kBlockSize; i++) {
682 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
683 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
684 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
685 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
686 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
687 }
688 }
689 }
690
TEST(ROUNDD__SSE41,positive_infinity)691 TEST(ROUNDD__SSE41, positive_infinity) {
692 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
693 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
694 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
695 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
696 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
697 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
698 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
699 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
700 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
701 }
702
TEST(ROUNDD__SSE41,negative_infinity)703 TEST(ROUNDD__SSE41, negative_infinity) {
704 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
705 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
706 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
707 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
708 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
709 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
710 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
711 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
712 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
713 }
714
TEST(ROUNDD__SSE41,positive_qnan)715 TEST(ROUNDD__SSE41, positive_qnan) {
716 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
717 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
718 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
719 for (uint32_t i = 0; i < kBlockSize; i++) {
720 inputs[i] = fp32_from_bits(n + i);
721 }
722 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
723 for (uint32_t i = 0; i < kBlockSize; i++) {
724 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
725 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
726 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
727 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
728 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
729 }
730 }
731 }
732
TEST(ROUNDD__SSE41,negative_qnan)733 TEST(ROUNDD__SSE41, negative_qnan) {
734 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
735 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
736 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
737 for (uint32_t i = 0; i < kBlockSize; i++) {
738 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
739 }
740 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
741 for (uint32_t i = 0; i < kBlockSize; i++) {
742 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
743 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
744 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
745 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
746 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
747 }
748 }
749 }
750
TEST(ROUNDD__SSE41,positive_snan)751 TEST(ROUNDD__SSE41, positive_snan) {
752 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
753 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
754 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
755 for (uint32_t i = 0; i < kBlockSize; i++) {
756 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
757 }
758 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
759 for (uint32_t i = 0; i < kBlockSize; i++) {
760 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
761 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
762 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
763 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
764 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
765 }
766 }
767 }
768
TEST(ROUNDD__SSE41,negative_snan)769 TEST(ROUNDD__SSE41, negative_snan) {
770 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
771 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
772 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
773 for (uint32_t i = 0; i < kBlockSize; i++) {
774 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
775 }
776 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
777 for (uint32_t i = 0; i < kBlockSize; i++) {
778 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
779 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
780 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
781 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
782 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
783 }
784 }
785 }
786
TEST(ROUNDD__SSE41,positive_snan_to_qnan)787 TEST(ROUNDD__SSE41, positive_snan_to_qnan) {
788 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
789 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
790 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
791 for (uint32_t i = 0; i < kBlockSize; i++) {
792 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
793 }
794 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
795 for (uint32_t i = 0; i < kBlockSize; i++) {
796 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
797 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
798 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
799 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
800 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
801 }
802 }
803 }
804
TEST(ROUNDD__SSE41,negative_snan_to_qnan)805 TEST(ROUNDD__SSE41, negative_snan_to_qnan) {
806 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
807 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
808 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
809 for (uint32_t i = 0; i < kBlockSize; i++) {
810 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
811 }
812 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
813 for (uint32_t i = 0; i < kBlockSize; i++) {
814 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
815 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
816 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
817 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
818 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
819 }
820 }
821 }
822 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
823
824 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDD__NEON_ADDSUB,positive_zero)825 TEST(ROUNDD__NEON_ADDSUB, positive_zero) {
826 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
827 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
828 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
829 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
830 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
831 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
832 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
833 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
834 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
835 }
836
TEST(ROUNDD__NEON_ADDSUB,negative_zero)837 TEST(ROUNDD__NEON_ADDSUB, negative_zero) {
838 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
839 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
840 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
841 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
842 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
843 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
844 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
845 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
846 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
847 }
848
TEST(ROUNDD__NEON_ADDSUB,positive_subnormal)849 TEST(ROUNDD__NEON_ADDSUB, positive_subnormal) {
850 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
851 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
852 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
853 for (uint32_t i = 0; i < kBlockSize; i++) {
854 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
855 }
856 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
857 for (uint32_t i = 0; i < kBlockSize; i++) {
858 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
859 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
860 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
861 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
862 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
863 }
864 }
865 }
866
TEST(ROUNDD__NEON_ADDSUB,negative_subnormal)867 TEST(ROUNDD__NEON_ADDSUB, negative_subnormal) {
868 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
869 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
870 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
871 for (uint32_t i = 0; i < kBlockSize; i++) {
872 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
873 }
874 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
875 for (uint32_t i = 0; i < kBlockSize; i++) {
876 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
877 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
878 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
879 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
880 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
881 }
882 }
883 }
884
TEST(ROUNDD__NEON_ADDSUB,positive_normal)885 TEST(ROUNDD__NEON_ADDSUB, positive_normal) {
886 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
887 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
888 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
889 for (uint32_t i = 0; i < kBlockSize; i++) {
890 inputs[i] = fp32_from_bits(n + i);
891 }
892 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
893 for (uint32_t i = 0; i < kBlockSize; i++) {
894 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
895 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
896 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
897 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
898 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
899 }
900 }
901 }
902
TEST(ROUNDD__NEON_ADDSUB,negative_normal)903 TEST(ROUNDD__NEON_ADDSUB, negative_normal) {
904 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
905 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
906 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
907 for (uint32_t i = 0; i < kBlockSize; i++) {
908 inputs[i] = fp32_from_bits(n + i);
909 }
910 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
911 for (uint32_t i = 0; i < kBlockSize; i++) {
912 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
913 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
914 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
915 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
916 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
917 }
918 }
919 }
920
TEST(ROUNDD__NEON_ADDSUB,positive_integral)921 TEST(ROUNDD__NEON_ADDSUB, positive_integral) {
922 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
923 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
924 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
925 for (uint32_t i = 0; i < kBlockSize; i++) {
926 inputs[i] = fp32_from_bits(n + i);
927 }
928 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
929 for (uint32_t i = 0; i < kBlockSize; i++) {
930 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
931 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
932 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
933 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
934 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
935 }
936 }
937 }
938
TEST(ROUNDD__NEON_ADDSUB,negative_integral)939 TEST(ROUNDD__NEON_ADDSUB, negative_integral) {
940 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
941 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
942 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
943 for (uint32_t i = 0; i < kBlockSize; i++) {
944 inputs[i] = fp32_from_bits(n + i);
945 }
946 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
947 for (uint32_t i = 0; i < kBlockSize; i++) {
948 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
949 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
950 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
951 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
952 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
953 }
954 }
955 }
956
TEST(ROUNDD__NEON_ADDSUB,positive_infinity)957 TEST(ROUNDD__NEON_ADDSUB, positive_infinity) {
958 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
959 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
960 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
961 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
962 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
963 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
964 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
965 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
966 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
967 }
968
TEST(ROUNDD__NEON_ADDSUB,negative_infinity)969 TEST(ROUNDD__NEON_ADDSUB, negative_infinity) {
970 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
971 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
972 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
973 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
974 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
975 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
976 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
977 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
978 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
979 }
980
TEST(ROUNDD__NEON_ADDSUB,positive_qnan)981 TEST(ROUNDD__NEON_ADDSUB, positive_qnan) {
982 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
983 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
984 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
985 for (uint32_t i = 0; i < kBlockSize; i++) {
986 inputs[i] = fp32_from_bits(n + i);
987 }
988 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
989 for (uint32_t i = 0; i < kBlockSize; i++) {
990 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
991 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
992 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
993 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
994 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
995 }
996 }
997 }
998
TEST(ROUNDD__NEON_ADDSUB,negative_qnan)999 TEST(ROUNDD__NEON_ADDSUB, negative_qnan) {
1000 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1001 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1002 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1003 for (uint32_t i = 0; i < kBlockSize; i++) {
1004 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1005 }
1006 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1007 for (uint32_t i = 0; i < kBlockSize; i++) {
1008 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1009 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1010 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1011 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1012 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1013 }
1014 }
1015 }
1016
TEST(ROUNDD__NEON_ADDSUB,positive_snan)1017 TEST(ROUNDD__NEON_ADDSUB, positive_snan) {
1018 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1019 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1020 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1021 for (uint32_t i = 0; i < kBlockSize; i++) {
1022 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1023 }
1024 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1025 for (uint32_t i = 0; i < kBlockSize; i++) {
1026 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1027 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1028 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1029 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1030 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1031 }
1032 }
1033 }
1034
TEST(ROUNDD__NEON_ADDSUB,negative_snan)1035 TEST(ROUNDD__NEON_ADDSUB, negative_snan) {
1036 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1037 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1038 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1039 for (uint32_t i = 0; i < kBlockSize; i++) {
1040 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1041 }
1042 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1043 for (uint32_t i = 0; i < kBlockSize; i++) {
1044 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1045 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1046 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1047 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1048 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1049 }
1050 }
1051 }
1052
TEST(ROUNDD__NEON_ADDSUB,positive_snan_to_qnan)1053 TEST(ROUNDD__NEON_ADDSUB, positive_snan_to_qnan) {
1054 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1055 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1056 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1057 for (uint32_t i = 0; i < kBlockSize; i++) {
1058 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1059 }
1060 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1061 for (uint32_t i = 0; i < kBlockSize; i++) {
1062 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1063 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1064 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1065 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1066 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1067 }
1068 }
1069 }
1070
TEST(ROUNDD__NEON_ADDSUB,negative_snan_to_qnan)1071 TEST(ROUNDD__NEON_ADDSUB, negative_snan_to_qnan) {
1072 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1073 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1074 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1075 for (uint32_t i = 0; i < kBlockSize; i++) {
1076 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1077 }
1078 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1079 for (uint32_t i = 0; i < kBlockSize; i++) {
1080 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1081 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1082 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1083 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1084 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1085 }
1086 }
1087 }
1088 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1089
1090 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDD__NEON_CVT,positive_zero)1091 TEST(ROUNDD__NEON_CVT, positive_zero) {
1092 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1093 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1094 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1095 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1096 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1097 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1098 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1099 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1100 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1101 }
1102
TEST(ROUNDD__NEON_CVT,negative_zero)1103 TEST(ROUNDD__NEON_CVT, negative_zero) {
1104 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1105 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1106 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1107 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1108 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1109 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1110 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1111 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1112 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1113 }
1114
TEST(ROUNDD__NEON_CVT,positive_subnormal)1115 TEST(ROUNDD__NEON_CVT, positive_subnormal) {
1116 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1117 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1118 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1119 for (uint32_t i = 0; i < kBlockSize; i++) {
1120 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1121 }
1122 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1123 for (uint32_t i = 0; i < kBlockSize; i++) {
1124 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1125 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1126 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1127 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1128 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1129 }
1130 }
1131 }
1132
TEST(ROUNDD__NEON_CVT,negative_subnormal)1133 TEST(ROUNDD__NEON_CVT, negative_subnormal) {
1134 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1135 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1136 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1137 for (uint32_t i = 0; i < kBlockSize; i++) {
1138 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1139 }
1140 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1141 for (uint32_t i = 0; i < kBlockSize; i++) {
1142 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1143 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1144 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1145 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1146 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1147 }
1148 }
1149 }
1150
TEST(ROUNDD__NEON_CVT,positive_normal)1151 TEST(ROUNDD__NEON_CVT, positive_normal) {
1152 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1153 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1154 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1155 for (uint32_t i = 0; i < kBlockSize; i++) {
1156 inputs[i] = fp32_from_bits(n + i);
1157 }
1158 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1159 for (uint32_t i = 0; i < kBlockSize; i++) {
1160 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1161 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1162 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1163 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1164 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1165 }
1166 }
1167 }
1168
TEST(ROUNDD__NEON_CVT,negative_normal)1169 TEST(ROUNDD__NEON_CVT, negative_normal) {
1170 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1171 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1172 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1173 for (uint32_t i = 0; i < kBlockSize; i++) {
1174 inputs[i] = fp32_from_bits(n + i);
1175 }
1176 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1177 for (uint32_t i = 0; i < kBlockSize; i++) {
1178 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1179 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1180 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1181 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1182 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1183 }
1184 }
1185 }
1186
TEST(ROUNDD__NEON_CVT,positive_integral)1187 TEST(ROUNDD__NEON_CVT, positive_integral) {
1188 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1189 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1190 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1191 for (uint32_t i = 0; i < kBlockSize; i++) {
1192 inputs[i] = fp32_from_bits(n + i);
1193 }
1194 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1195 for (uint32_t i = 0; i < kBlockSize; i++) {
1196 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1197 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1198 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1199 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1200 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1201 }
1202 }
1203 }
1204
TEST(ROUNDD__NEON_CVT,negative_integral)1205 TEST(ROUNDD__NEON_CVT, negative_integral) {
1206 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1207 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1208 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1209 for (uint32_t i = 0; i < kBlockSize; i++) {
1210 inputs[i] = fp32_from_bits(n + i);
1211 }
1212 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1213 for (uint32_t i = 0; i < kBlockSize; i++) {
1214 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1215 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1216 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1217 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1218 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1219 }
1220 }
1221 }
1222
TEST(ROUNDD__NEON_CVT,positive_infinity)1223 TEST(ROUNDD__NEON_CVT, positive_infinity) {
1224 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1225 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1226 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1227 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1228 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1229 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1230 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1231 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1232 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1233 }
1234
TEST(ROUNDD__NEON_CVT,negative_infinity)1235 TEST(ROUNDD__NEON_CVT, negative_infinity) {
1236 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1237 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1238 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1239 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1240 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1241 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1242 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1243 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1244 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1245 }
1246
TEST(ROUNDD__NEON_CVT,positive_qnan)1247 TEST(ROUNDD__NEON_CVT, positive_qnan) {
1248 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1249 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1250 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1251 for (uint32_t i = 0; i < kBlockSize; i++) {
1252 inputs[i] = fp32_from_bits(n + i);
1253 }
1254 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1255 for (uint32_t i = 0; i < kBlockSize; i++) {
1256 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1257 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1258 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1259 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1260 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1261 }
1262 }
1263 }
1264
TEST(ROUNDD__NEON_CVT,negative_qnan)1265 TEST(ROUNDD__NEON_CVT, negative_qnan) {
1266 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1267 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1268 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1269 for (uint32_t i = 0; i < kBlockSize; i++) {
1270 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1271 }
1272 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1273 for (uint32_t i = 0; i < kBlockSize; i++) {
1274 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1275 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1276 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1277 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1278 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1279 }
1280 }
1281 }
1282
TEST(ROUNDD__NEON_CVT,positive_snan)1283 TEST(ROUNDD__NEON_CVT, positive_snan) {
1284 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1285 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1286 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1287 for (uint32_t i = 0; i < kBlockSize; i++) {
1288 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1289 }
1290 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1291 for (uint32_t i = 0; i < kBlockSize; i++) {
1292 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1293 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1294 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1295 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1296 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1297 }
1298 }
1299 }
1300
TEST(ROUNDD__NEON_CVT,negative_snan)1301 TEST(ROUNDD__NEON_CVT, negative_snan) {
1302 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1303 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1304 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1305 for (uint32_t i = 0; i < kBlockSize; i++) {
1306 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1307 }
1308 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1309 for (uint32_t i = 0; i < kBlockSize; i++) {
1310 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1311 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1312 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1313 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1314 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1315 }
1316 }
1317 }
1318
TEST(ROUNDD__NEON_CVT,positive_snan_to_qnan)1319 TEST(ROUNDD__NEON_CVT, positive_snan_to_qnan) {
1320 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1321 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1322 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1323 for (uint32_t i = 0; i < kBlockSize; i++) {
1324 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1325 }
1326 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1327 for (uint32_t i = 0; i < kBlockSize; i++) {
1328 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1329 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1330 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1331 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1332 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1333 }
1334 }
1335 }
1336
TEST(ROUNDD__NEON_CVT,negative_snan_to_qnan)1337 TEST(ROUNDD__NEON_CVT, negative_snan_to_qnan) {
1338 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1339 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1340 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1341 for (uint32_t i = 0; i < kBlockSize; i++) {
1342 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1343 }
1344 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1345 for (uint32_t i = 0; i < kBlockSize; i++) {
1346 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1347 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1348 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1349 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1350 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1351 }
1352 }
1353 }
1354 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1355
1356 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDD__NEONV8,positive_zero)1357 TEST(ROUNDD__NEONV8, positive_zero) {
1358 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1359 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1360 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1361 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1362 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1363 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1364 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1365 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1366 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1367 }
1368
TEST(ROUNDD__NEONV8,negative_zero)1369 TEST(ROUNDD__NEONV8, negative_zero) {
1370 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1371 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1372 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1373 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1374 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1375 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1376 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1377 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1378 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1379 }
1380
TEST(ROUNDD__NEONV8,positive_subnormal)1381 TEST(ROUNDD__NEONV8, positive_subnormal) {
1382 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1383 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1384 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1385 for (uint32_t i = 0; i < kBlockSize; i++) {
1386 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1387 }
1388 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1389 for (uint32_t i = 0; i < kBlockSize; i++) {
1390 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1391 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1392 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1393 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1394 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1395 }
1396 }
1397 }
1398
TEST(ROUNDD__NEONV8,negative_subnormal)1399 TEST(ROUNDD__NEONV8, negative_subnormal) {
1400 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1401 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1402 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1403 for (uint32_t i = 0; i < kBlockSize; i++) {
1404 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1405 }
1406 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1407 for (uint32_t i = 0; i < kBlockSize; i++) {
1408 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1409 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1410 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1411 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1412 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1413 }
1414 }
1415 }
1416
TEST(ROUNDD__NEONV8,positive_normal)1417 TEST(ROUNDD__NEONV8, positive_normal) {
1418 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1419 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1420 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1421 for (uint32_t i = 0; i < kBlockSize; i++) {
1422 inputs[i] = fp32_from_bits(n + i);
1423 }
1424 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1425 for (uint32_t i = 0; i < kBlockSize; i++) {
1426 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1427 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1428 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1429 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1430 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1431 }
1432 }
1433 }
1434
TEST(ROUNDD__NEONV8,negative_normal)1435 TEST(ROUNDD__NEONV8, negative_normal) {
1436 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1437 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1438 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1439 for (uint32_t i = 0; i < kBlockSize; i++) {
1440 inputs[i] = fp32_from_bits(n + i);
1441 }
1442 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1443 for (uint32_t i = 0; i < kBlockSize; i++) {
1444 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1445 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1446 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1447 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1448 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1449 }
1450 }
1451 }
1452
TEST(ROUNDD__NEONV8,positive_integral)1453 TEST(ROUNDD__NEONV8, positive_integral) {
1454 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1455 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1456 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1457 for (uint32_t i = 0; i < kBlockSize; i++) {
1458 inputs[i] = fp32_from_bits(n + i);
1459 }
1460 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1461 for (uint32_t i = 0; i < kBlockSize; i++) {
1462 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1463 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1464 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1465 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1466 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1467 }
1468 }
1469 }
1470
TEST(ROUNDD__NEONV8,negative_integral)1471 TEST(ROUNDD__NEONV8, negative_integral) {
1472 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1473 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1474 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1475 for (uint32_t i = 0; i < kBlockSize; i++) {
1476 inputs[i] = fp32_from_bits(n + i);
1477 }
1478 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1479 for (uint32_t i = 0; i < kBlockSize; i++) {
1480 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1481 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1482 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1483 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1484 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1485 }
1486 }
1487 }
1488
TEST(ROUNDD__NEONV8,positive_infinity)1489 TEST(ROUNDD__NEONV8, positive_infinity) {
1490 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1491 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1492 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1493 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1494 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1495 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1496 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1497 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1498 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1499 }
1500
TEST(ROUNDD__NEONV8,negative_infinity)1501 TEST(ROUNDD__NEONV8, negative_infinity) {
1502 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1503 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1504 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1505 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1506 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1507 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1508 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1509 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1510 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1511 }
1512
TEST(ROUNDD__NEONV8,positive_qnan)1513 TEST(ROUNDD__NEONV8, positive_qnan) {
1514 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1515 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1516 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1517 for (uint32_t i = 0; i < kBlockSize; i++) {
1518 inputs[i] = fp32_from_bits(n + i);
1519 }
1520 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1521 for (uint32_t i = 0; i < kBlockSize; i++) {
1522 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1523 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1524 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1525 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1526 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1527 }
1528 }
1529 }
1530
TEST(ROUNDD__NEONV8,negative_qnan)1531 TEST(ROUNDD__NEONV8, negative_qnan) {
1532 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1533 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1534 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1535 for (uint32_t i = 0; i < kBlockSize; i++) {
1536 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1537 }
1538 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1539 for (uint32_t i = 0; i < kBlockSize; i++) {
1540 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1541 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1542 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1543 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1544 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1545 }
1546 }
1547 }
1548
TEST(ROUNDD__NEONV8,positive_snan)1549 TEST(ROUNDD__NEONV8, positive_snan) {
1550 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1551 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1552 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1553 for (uint32_t i = 0; i < kBlockSize; i++) {
1554 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1555 }
1556 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1557 for (uint32_t i = 0; i < kBlockSize; i++) {
1558 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1559 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1560 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1561 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1562 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1563 }
1564 }
1565 }
1566
TEST(ROUNDD__NEONV8,negative_snan)1567 TEST(ROUNDD__NEONV8, negative_snan) {
1568 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1569 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1570 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1571 for (uint32_t i = 0; i < kBlockSize; i++) {
1572 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1573 }
1574 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1575 for (uint32_t i = 0; i < kBlockSize; i++) {
1576 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1577 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1578 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1579 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1580 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1581 }
1582 }
1583 }
1584
TEST(ROUNDD__NEONV8,positive_snan_to_qnan)1585 TEST(ROUNDD__NEONV8, positive_snan_to_qnan) {
1586 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1587 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1588 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1589 for (uint32_t i = 0; i < kBlockSize; i++) {
1590 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1591 }
1592 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1593 for (uint32_t i = 0; i < kBlockSize; i++) {
1594 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1595 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1596 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1597 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1598 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1599 }
1600 }
1601 }
1602
TEST(ROUNDD__NEONV8,negative_snan_to_qnan)1603 TEST(ROUNDD__NEONV8, negative_snan_to_qnan) {
1604 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1605 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1606 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1607 for (uint32_t i = 0; i < kBlockSize; i++) {
1608 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1609 }
1610 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1611 for (uint32_t i = 0; i < kBlockSize; i++) {
1612 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1613 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1614 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1615 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1616 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1617 }
1618 }
1619 }
1620 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1621
1622 #if XNN_ARCH_WASMSIMD
TEST(ROUNDD__WASMSIMD_ADDSUB,positive_zero)1623 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_zero) {
1624 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1625 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1626 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1627 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1628 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1629 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1630 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1631 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1632 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1633 }
1634
TEST(ROUNDD__WASMSIMD_ADDSUB,negative_zero)1635 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_zero) {
1636 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1637 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1638 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1639 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1640 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1641 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1642 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1643 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1644 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1645 }
1646
TEST(ROUNDD__WASMSIMD_ADDSUB,positive_subnormal)1647 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_subnormal) {
1648 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1649 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1650 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1651 for (uint32_t i = 0; i < kBlockSize; i++) {
1652 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1653 }
1654 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1655 for (uint32_t i = 0; i < kBlockSize; i++) {
1656 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1657 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1658 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1659 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1660 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1661 }
1662 }
1663 }
1664
TEST(ROUNDD__WASMSIMD_ADDSUB,negative_subnormal)1665 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_subnormal) {
1666 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1667 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1668 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1669 for (uint32_t i = 0; i < kBlockSize; i++) {
1670 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1671 }
1672 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1673 for (uint32_t i = 0; i < kBlockSize; i++) {
1674 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1675 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1676 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1677 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1678 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1679 }
1680 }
1681 }
1682
TEST(ROUNDD__WASMSIMD_ADDSUB,positive_normal)1683 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_normal) {
1684 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1685 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1686 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1687 for (uint32_t i = 0; i < kBlockSize; i++) {
1688 inputs[i] = fp32_from_bits(n + i);
1689 }
1690 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1691 for (uint32_t i = 0; i < kBlockSize; i++) {
1692 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1693 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1694 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1695 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1696 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1697 }
1698 }
1699 }
1700
TEST(ROUNDD__WASMSIMD_ADDSUB,negative_normal)1701 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_normal) {
1702 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1703 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1704 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1705 for (uint32_t i = 0; i < kBlockSize; i++) {
1706 inputs[i] = fp32_from_bits(n + i);
1707 }
1708 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1709 for (uint32_t i = 0; i < kBlockSize; i++) {
1710 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1711 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1712 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1713 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1714 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1715 }
1716 }
1717 }
1718
TEST(ROUNDD__WASMSIMD_ADDSUB,positive_integral)1719 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_integral) {
1720 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1721 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1722 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1723 for (uint32_t i = 0; i < kBlockSize; i++) {
1724 inputs[i] = fp32_from_bits(n + i);
1725 }
1726 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1727 for (uint32_t i = 0; i < kBlockSize; i++) {
1728 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1729 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1730 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1731 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1732 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1733 }
1734 }
1735 }
1736
TEST(ROUNDD__WASMSIMD_ADDSUB,negative_integral)1737 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_integral) {
1738 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1739 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1740 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1741 for (uint32_t i = 0; i < kBlockSize; i++) {
1742 inputs[i] = fp32_from_bits(n + i);
1743 }
1744 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1745 for (uint32_t i = 0; i < kBlockSize; i++) {
1746 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1747 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1748 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1749 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1750 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1751 }
1752 }
1753 }
1754
TEST(ROUNDD__WASMSIMD_ADDSUB,positive_infinity)1755 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_infinity) {
1756 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1757 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1758 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1759 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1760 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1761 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1762 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1763 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1764 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1765 }
1766
TEST(ROUNDD__WASMSIMD_ADDSUB,negative_infinity)1767 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_infinity) {
1768 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1769 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1770 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1771 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1772 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1773 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1774 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1775 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1776 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1777 }
1778
TEST(ROUNDD__WASMSIMD_ADDSUB,positive_qnan)1779 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_qnan) {
1780 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1781 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1782 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1783 for (uint32_t i = 0; i < kBlockSize; i++) {
1784 inputs[i] = fp32_from_bits(n + i);
1785 }
1786 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1787 for (uint32_t i = 0; i < kBlockSize; i++) {
1788 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1789 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1790 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1791 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1792 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1793 }
1794 }
1795 }
1796
TEST(ROUNDD__WASMSIMD_ADDSUB,negative_qnan)1797 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_qnan) {
1798 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1799 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1800 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1801 for (uint32_t i = 0; i < kBlockSize; i++) {
1802 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1803 }
1804 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1805 for (uint32_t i = 0; i < kBlockSize; i++) {
1806 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1807 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1808 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1809 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1810 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1811 }
1812 }
1813 }
1814
TEST(ROUNDD__WASMSIMD_ADDSUB,positive_snan)1815 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_snan) {
1816 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1817 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1818 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1819 for (uint32_t i = 0; i < kBlockSize; i++) {
1820 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1821 }
1822 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1823 for (uint32_t i = 0; i < kBlockSize; i++) {
1824 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1825 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1826 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1827 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1828 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1829 }
1830 }
1831 }
1832
TEST(ROUNDD__WASMSIMD_ADDSUB,negative_snan)1833 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_snan) {
1834 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1835 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1836 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1837 for (uint32_t i = 0; i < kBlockSize; i++) {
1838 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1839 }
1840 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1841 for (uint32_t i = 0; i < kBlockSize; i++) {
1842 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1843 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1844 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1845 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1846 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1847 }
1848 }
1849 }
1850
TEST(ROUNDD__WASMSIMD_ADDSUB,positive_snan_to_qnan)1851 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
1852 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1853 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1854 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1855 for (uint32_t i = 0; i < kBlockSize; i++) {
1856 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1857 }
1858 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1859 for (uint32_t i = 0; i < kBlockSize; i++) {
1860 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1861 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1862 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1863 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1864 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1865 }
1866 }
1867 }
1868
TEST(ROUNDD__WASMSIMD_ADDSUB,negative_snan_to_qnan)1869 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
1870 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1871 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1872 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1873 for (uint32_t i = 0; i < kBlockSize; i++) {
1874 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1875 }
1876 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1877 for (uint32_t i = 0; i < kBlockSize; i++) {
1878 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1879 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1880 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1881 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1882 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1883 }
1884 }
1885 }
1886 #endif // XNN_ARCH_WASMSIMD
1887
1888 #if XNN_ARCH_WASMSIMD
TEST(ROUNDD__WASMSIMD_CVT,positive_zero)1889 TEST(ROUNDD__WASMSIMD_CVT, positive_zero) {
1890 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1891 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1892 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1893 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1894 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1895 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1896 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1897 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1898 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1899 }
1900
TEST(ROUNDD__WASMSIMD_CVT,negative_zero)1901 TEST(ROUNDD__WASMSIMD_CVT, negative_zero) {
1902 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1903 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1904 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1905 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1906 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1907 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1908 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1909 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1910 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1911 }
1912
TEST(ROUNDD__WASMSIMD_CVT,positive_subnormal)1913 TEST(ROUNDD__WASMSIMD_CVT, positive_subnormal) {
1914 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1915 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1916 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1917 for (uint32_t i = 0; i < kBlockSize; i++) {
1918 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1919 }
1920 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1921 for (uint32_t i = 0; i < kBlockSize; i++) {
1922 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1923 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1924 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1925 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1926 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1927 }
1928 }
1929 }
1930
TEST(ROUNDD__WASMSIMD_CVT,negative_subnormal)1931 TEST(ROUNDD__WASMSIMD_CVT, negative_subnormal) {
1932 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1933 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1934 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1935 for (uint32_t i = 0; i < kBlockSize; i++) {
1936 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1937 }
1938 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1939 for (uint32_t i = 0; i < kBlockSize; i++) {
1940 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1941 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1942 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1943 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1944 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1945 }
1946 }
1947 }
1948
TEST(ROUNDD__WASMSIMD_CVT,positive_normal)1949 TEST(ROUNDD__WASMSIMD_CVT, positive_normal) {
1950 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1951 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1952 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1953 for (uint32_t i = 0; i < kBlockSize; i++) {
1954 inputs[i] = fp32_from_bits(n + i);
1955 }
1956 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1957 for (uint32_t i = 0; i < kBlockSize; i++) {
1958 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1959 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1960 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1961 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1962 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1963 }
1964 }
1965 }
1966
TEST(ROUNDD__WASMSIMD_CVT,negative_normal)1967 TEST(ROUNDD__WASMSIMD_CVT, negative_normal) {
1968 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1969 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1970 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1971 for (uint32_t i = 0; i < kBlockSize; i++) {
1972 inputs[i] = fp32_from_bits(n + i);
1973 }
1974 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1975 for (uint32_t i = 0; i < kBlockSize; i++) {
1976 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1977 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1978 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1979 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1980 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1981 }
1982 }
1983 }
1984
TEST(ROUNDD__WASMSIMD_CVT,positive_integral)1985 TEST(ROUNDD__WASMSIMD_CVT, positive_integral) {
1986 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1987 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1988 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1989 for (uint32_t i = 0; i < kBlockSize; i++) {
1990 inputs[i] = fp32_from_bits(n + i);
1991 }
1992 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1993 for (uint32_t i = 0; i < kBlockSize; i++) {
1994 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1995 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1996 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1997 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1998 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1999 }
2000 }
2001 }
2002
TEST(ROUNDD__WASMSIMD_CVT,negative_integral)2003 TEST(ROUNDD__WASMSIMD_CVT, negative_integral) {
2004 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2005 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2006 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2007 for (uint32_t i = 0; i < kBlockSize; i++) {
2008 inputs[i] = fp32_from_bits(n + i);
2009 }
2010 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2011 for (uint32_t i = 0; i < kBlockSize; i++) {
2012 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2013 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2014 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2015 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2016 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2017 }
2018 }
2019 }
2020
TEST(ROUNDD__WASMSIMD_CVT,positive_infinity)2021 TEST(ROUNDD__WASMSIMD_CVT, positive_infinity) {
2022 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2023 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2024 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2025 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2026 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2027 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2028 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2029 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2030 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2031 }
2032
TEST(ROUNDD__WASMSIMD_CVT,negative_infinity)2033 TEST(ROUNDD__WASMSIMD_CVT, negative_infinity) {
2034 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2035 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2036 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2037 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2038 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2039 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2040 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2041 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2042 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2043 }
2044
TEST(ROUNDD__WASMSIMD_CVT,positive_qnan)2045 TEST(ROUNDD__WASMSIMD_CVT, positive_qnan) {
2046 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2047 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2048 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2049 for (uint32_t i = 0; i < kBlockSize; i++) {
2050 inputs[i] = fp32_from_bits(n + i);
2051 }
2052 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2053 for (uint32_t i = 0; i < kBlockSize; i++) {
2054 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2055 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2056 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2057 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2058 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2059 }
2060 }
2061 }
2062
TEST(ROUNDD__WASMSIMD_CVT,negative_qnan)2063 TEST(ROUNDD__WASMSIMD_CVT, negative_qnan) {
2064 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2065 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2066 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2067 for (uint32_t i = 0; i < kBlockSize; i++) {
2068 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
2069 }
2070 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2071 for (uint32_t i = 0; i < kBlockSize; i++) {
2072 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2073 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2074 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2075 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2076 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2077 }
2078 }
2079 }
2080
TEST(ROUNDD__WASMSIMD_CVT,positive_snan)2081 TEST(ROUNDD__WASMSIMD_CVT, positive_snan) {
2082 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2083 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2084 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2085 for (uint32_t i = 0; i < kBlockSize; i++) {
2086 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2087 }
2088 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2089 for (uint32_t i = 0; i < kBlockSize; i++) {
2090 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2091 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2092 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2093 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2094 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2095 }
2096 }
2097 }
2098
TEST(ROUNDD__WASMSIMD_CVT,negative_snan)2099 TEST(ROUNDD__WASMSIMD_CVT, negative_snan) {
2100 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2101 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2102 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2103 for (uint32_t i = 0; i < kBlockSize; i++) {
2104 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2105 }
2106 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2107 for (uint32_t i = 0; i < kBlockSize; i++) {
2108 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2109 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2110 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2111 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2112 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2113 }
2114 }
2115 }
2116
TEST(ROUNDD__WASMSIMD_CVT,positive_snan_to_qnan)2117 TEST(ROUNDD__WASMSIMD_CVT, positive_snan_to_qnan) {
2118 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2119 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2120 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2121 for (uint32_t i = 0; i < kBlockSize; i++) {
2122 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2123 }
2124 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2125 for (uint32_t i = 0; i < kBlockSize; i++) {
2126 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2127 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2128 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2129 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2130 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2131 }
2132 }
2133 }
2134
TEST(ROUNDD__WASMSIMD_CVT,negative_snan_to_qnan)2135 TEST(ROUNDD__WASMSIMD_CVT, negative_snan_to_qnan) {
2136 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2137 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2138 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2139 for (uint32_t i = 0; i < kBlockSize; i++) {
2140 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2141 }
2142 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2143 for (uint32_t i = 0; i < kBlockSize; i++) {
2144 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2145 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2146 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2147 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2148 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2149 }
2150 }
2151 }
2152 #endif // XNN_ARCH_WASMSIMD
2153
TEST(ROUNDD__SCALAR_ADDSUB,positive_zero)2154 TEST(ROUNDD__SCALAR_ADDSUB, positive_zero) {
2155 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2156 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2157 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
2158 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2159 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2160 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2161 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2162 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2163 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2164 }
2165
TEST(ROUNDD__SCALAR_ADDSUB,negative_zero)2166 TEST(ROUNDD__SCALAR_ADDSUB, negative_zero) {
2167 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2168 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2169 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
2170 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2171 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2172 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2173 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2174 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2175 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2176 }
2177
TEST(ROUNDD__SCALAR_ADDSUB,positive_subnormal)2178 TEST(ROUNDD__SCALAR_ADDSUB, positive_subnormal) {
2179 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2180 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2181 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
2182 for (uint32_t i = 0; i < kBlockSize; i++) {
2183 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
2184 }
2185 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2186 for (uint32_t i = 0; i < kBlockSize; i++) {
2187 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2188 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2189 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2190 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2191 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2192 }
2193 }
2194 }
2195
TEST(ROUNDD__SCALAR_ADDSUB,negative_subnormal)2196 TEST(ROUNDD__SCALAR_ADDSUB, negative_subnormal) {
2197 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2198 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2199 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
2200 for (uint32_t i = 0; i < kBlockSize; i++) {
2201 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
2202 }
2203 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2204 for (uint32_t i = 0; i < kBlockSize; i++) {
2205 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2206 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2207 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2208 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2209 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2210 }
2211 }
2212 }
2213
TEST(ROUNDD__SCALAR_ADDSUB,positive_normal)2214 TEST(ROUNDD__SCALAR_ADDSUB, positive_normal) {
2215 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2216 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2217 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
2218 for (uint32_t i = 0; i < kBlockSize; i++) {
2219 inputs[i] = fp32_from_bits(n + i);
2220 }
2221 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2222 for (uint32_t i = 0; i < kBlockSize; i++) {
2223 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2224 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2225 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2226 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2227 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2228 }
2229 }
2230 }
2231
TEST(ROUNDD__SCALAR_ADDSUB,negative_normal)2232 TEST(ROUNDD__SCALAR_ADDSUB, negative_normal) {
2233 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2234 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2235 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
2236 for (uint32_t i = 0; i < kBlockSize; i++) {
2237 inputs[i] = fp32_from_bits(n + i);
2238 }
2239 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2240 for (uint32_t i = 0; i < kBlockSize; i++) {
2241 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2242 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2243 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2244 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2245 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2246 }
2247 }
2248 }
2249
TEST(ROUNDD__SCALAR_ADDSUB,positive_integral)2250 TEST(ROUNDD__SCALAR_ADDSUB, positive_integral) {
2251 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2252 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2253 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2254 for (uint32_t i = 0; i < kBlockSize; i++) {
2255 inputs[i] = fp32_from_bits(n + i);
2256 }
2257 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2258 for (uint32_t i = 0; i < kBlockSize; i++) {
2259 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2260 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2261 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2262 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2263 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2264 }
2265 }
2266 }
2267
TEST(ROUNDD__SCALAR_ADDSUB,negative_integral)2268 TEST(ROUNDD__SCALAR_ADDSUB, negative_integral) {
2269 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2270 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2271 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2272 for (uint32_t i = 0; i < kBlockSize; i++) {
2273 inputs[i] = fp32_from_bits(n + i);
2274 }
2275 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2276 for (uint32_t i = 0; i < kBlockSize; i++) {
2277 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2278 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2279 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2280 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2281 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2282 }
2283 }
2284 }
2285
TEST(ROUNDD__SCALAR_ADDSUB,positive_infinity)2286 TEST(ROUNDD__SCALAR_ADDSUB, positive_infinity) {
2287 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2288 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2289 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2290 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2291 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2292 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2293 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2294 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2295 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2296 }
2297
TEST(ROUNDD__SCALAR_ADDSUB,negative_infinity)2298 TEST(ROUNDD__SCALAR_ADDSUB, negative_infinity) {
2299 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2300 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2301 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2302 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2303 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2304 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2305 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2306 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2307 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2308 }
2309
TEST(ROUNDD__SCALAR_ADDSUB,positive_qnan)2310 TEST(ROUNDD__SCALAR_ADDSUB, positive_qnan) {
2311 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2312 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2313 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2314 for (uint32_t i = 0; i < kBlockSize; i++) {
2315 inputs[i] = fp32_from_bits(n + i);
2316 }
2317 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2318 for (uint32_t i = 0; i < kBlockSize; i++) {
2319 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2320 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2321 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2322 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2323 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2324 }
2325 }
2326 }
2327
TEST(ROUNDD__SCALAR_ADDSUB,negative_qnan)2328 TEST(ROUNDD__SCALAR_ADDSUB, negative_qnan) {
2329 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2330 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2331 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2332 for (uint32_t i = 0; i < kBlockSize; i++) {
2333 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
2334 }
2335 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2336 for (uint32_t i = 0; i < kBlockSize; i++) {
2337 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2338 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2339 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2340 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2341 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2342 }
2343 }
2344 }
2345
TEST(ROUNDD__SCALAR_ADDSUB,positive_snan)2346 TEST(ROUNDD__SCALAR_ADDSUB, positive_snan) {
2347 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2348 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2349 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2350 for (uint32_t i = 0; i < kBlockSize; i++) {
2351 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2352 }
2353 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2354 for (uint32_t i = 0; i < kBlockSize; i++) {
2355 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2356 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2357 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2358 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2359 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2360 }
2361 }
2362 }
2363
TEST(ROUNDD__SCALAR_ADDSUB,negative_snan)2364 TEST(ROUNDD__SCALAR_ADDSUB, negative_snan) {
2365 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2366 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2367 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2368 for (uint32_t i = 0; i < kBlockSize; i++) {
2369 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2370 }
2371 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2372 for (uint32_t i = 0; i < kBlockSize; i++) {
2373 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2374 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2375 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2376 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2377 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2378 }
2379 }
2380 }
2381
TEST(ROUNDD__SCALAR_ADDSUB,positive_snan_to_qnan)2382 TEST(ROUNDD__SCALAR_ADDSUB, positive_snan_to_qnan) {
2383 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2384 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2385 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2386 for (uint32_t i = 0; i < kBlockSize; i++) {
2387 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2388 }
2389 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2390 for (uint32_t i = 0; i < kBlockSize; i++) {
2391 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2392 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2393 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2394 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2395 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2396 }
2397 }
2398 }
2399
TEST(ROUNDD__SCALAR_ADDSUB,negative_snan_to_qnan)2400 TEST(ROUNDD__SCALAR_ADDSUB, negative_snan_to_qnan) {
2401 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2402 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2403 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2404 for (uint32_t i = 0; i < kBlockSize; i++) {
2405 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2406 }
2407 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2408 for (uint32_t i = 0; i < kBlockSize; i++) {
2409 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2410 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2411 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2412 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2413 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2414 }
2415 }
2416 }
2417
TEST(ROUNDD__SCALAR_CVT,positive_zero)2418 TEST(ROUNDD__SCALAR_CVT, positive_zero) {
2419 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2420 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2421 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
2422 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2423 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2424 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2425 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2426 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2427 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2428 }
2429
TEST(ROUNDD__SCALAR_CVT,negative_zero)2430 TEST(ROUNDD__SCALAR_CVT, negative_zero) {
2431 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2432 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2433 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
2434 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2435 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2436 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2437 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2438 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2439 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2440 }
2441
TEST(ROUNDD__SCALAR_CVT,positive_subnormal)2442 TEST(ROUNDD__SCALAR_CVT, positive_subnormal) {
2443 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2444 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2445 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
2446 for (uint32_t i = 0; i < kBlockSize; i++) {
2447 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
2448 }
2449 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2450 for (uint32_t i = 0; i < kBlockSize; i++) {
2451 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2452 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2453 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2454 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2455 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2456 }
2457 }
2458 }
2459
TEST(ROUNDD__SCALAR_CVT,negative_subnormal)2460 TEST(ROUNDD__SCALAR_CVT, negative_subnormal) {
2461 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2462 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2463 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
2464 for (uint32_t i = 0; i < kBlockSize; i++) {
2465 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
2466 }
2467 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2468 for (uint32_t i = 0; i < kBlockSize; i++) {
2469 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2470 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2471 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2472 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2473 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2474 }
2475 }
2476 }
2477
TEST(ROUNDD__SCALAR_CVT,positive_normal)2478 TEST(ROUNDD__SCALAR_CVT, positive_normal) {
2479 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2480 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2481 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
2482 for (uint32_t i = 0; i < kBlockSize; i++) {
2483 inputs[i] = fp32_from_bits(n + i);
2484 }
2485 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2486 for (uint32_t i = 0; i < kBlockSize; i++) {
2487 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2488 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2489 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2490 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2491 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2492 }
2493 }
2494 }
2495
TEST(ROUNDD__SCALAR_CVT,negative_normal)2496 TEST(ROUNDD__SCALAR_CVT, negative_normal) {
2497 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2498 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2499 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
2500 for (uint32_t i = 0; i < kBlockSize; i++) {
2501 inputs[i] = fp32_from_bits(n + i);
2502 }
2503 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2504 for (uint32_t i = 0; i < kBlockSize; i++) {
2505 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2506 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2507 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2508 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2509 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2510 }
2511 }
2512 }
2513
TEST(ROUNDD__SCALAR_CVT,positive_integral)2514 TEST(ROUNDD__SCALAR_CVT, positive_integral) {
2515 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2516 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2517 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2518 for (uint32_t i = 0; i < kBlockSize; i++) {
2519 inputs[i] = fp32_from_bits(n + i);
2520 }
2521 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2522 for (uint32_t i = 0; i < kBlockSize; i++) {
2523 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2524 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2525 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2526 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2527 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2528 }
2529 }
2530 }
2531
TEST(ROUNDD__SCALAR_CVT,negative_integral)2532 TEST(ROUNDD__SCALAR_CVT, negative_integral) {
2533 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2534 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2535 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2536 for (uint32_t i = 0; i < kBlockSize; i++) {
2537 inputs[i] = fp32_from_bits(n + i);
2538 }
2539 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2540 for (uint32_t i = 0; i < kBlockSize; i++) {
2541 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2542 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2543 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2544 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2545 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2546 }
2547 }
2548 }
2549
TEST(ROUNDD__SCALAR_CVT,positive_infinity)2550 TEST(ROUNDD__SCALAR_CVT, positive_infinity) {
2551 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2552 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2553 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2554 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2555 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2556 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2557 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2558 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2559 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2560 }
2561
TEST(ROUNDD__SCALAR_CVT,negative_infinity)2562 TEST(ROUNDD__SCALAR_CVT, negative_infinity) {
2563 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2564 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2565 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2566 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2567 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2568 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2569 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2570 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2571 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2572 }
2573
TEST(ROUNDD__SCALAR_CVT,positive_qnan)2574 TEST(ROUNDD__SCALAR_CVT, positive_qnan) {
2575 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2576 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2577 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2578 for (uint32_t i = 0; i < kBlockSize; i++) {
2579 inputs[i] = fp32_from_bits(n + i);
2580 }
2581 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2582 for (uint32_t i = 0; i < kBlockSize; i++) {
2583 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2584 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2585 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2586 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2587 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2588 }
2589 }
2590 }
2591
TEST(ROUNDD__SCALAR_CVT,negative_qnan)2592 TEST(ROUNDD__SCALAR_CVT, negative_qnan) {
2593 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2594 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2595 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2596 for (uint32_t i = 0; i < kBlockSize; i++) {
2597 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
2598 }
2599 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2600 for (uint32_t i = 0; i < kBlockSize; i++) {
2601 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2602 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2603 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2604 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2605 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2606 }
2607 }
2608 }
2609
TEST(ROUNDD__SCALAR_CVT,positive_snan)2610 TEST(ROUNDD__SCALAR_CVT, positive_snan) {
2611 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2612 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2613 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2614 for (uint32_t i = 0; i < kBlockSize; i++) {
2615 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2616 }
2617 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2618 for (uint32_t i = 0; i < kBlockSize; i++) {
2619 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2620 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2621 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2622 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2623 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2624 }
2625 }
2626 }
2627
TEST(ROUNDD__SCALAR_CVT,negative_snan)2628 TEST(ROUNDD__SCALAR_CVT, negative_snan) {
2629 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2630 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2631 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2632 for (uint32_t i = 0; i < kBlockSize; i++) {
2633 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2634 }
2635 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2636 for (uint32_t i = 0; i < kBlockSize; i++) {
2637 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2638 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2639 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2640 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2641 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2642 }
2643 }
2644 }
2645
TEST(ROUNDD__SCALAR_CVT,positive_snan_to_qnan)2646 TEST(ROUNDD__SCALAR_CVT, positive_snan_to_qnan) {
2647 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2648 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2649 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2650 for (uint32_t i = 0; i < kBlockSize; i++) {
2651 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2652 }
2653 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2654 for (uint32_t i = 0; i < kBlockSize; i++) {
2655 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2656 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2657 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2658 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2659 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2660 }
2661 }
2662 }
2663
TEST(ROUNDD__SCALAR_CVT,negative_snan_to_qnan)2664 TEST(ROUNDD__SCALAR_CVT, negative_snan_to_qnan) {
2665 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2666 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2667 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2668 for (uint32_t i = 0; i < kBlockSize; i++) {
2669 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2670 }
2671 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2672 for (uint32_t i = 0; i < kBlockSize; i++) {
2673 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2674 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2675 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2676 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2677 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2678 }
2679 }
2680 }
2681