1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14 
15 #include <gtest/gtest.h>
16 
17 #include <fp16.h>
18 
19 #include <xnnpack/AlignedAllocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/isa-checks.h>
22 #include <xnnpack/math-stubs.h>
23 
24 
25 constexpr int kBlockSize = 1024;
26 
27 
28 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,negative_zero)29   TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, negative_zero) {
30     TEST_REQUIRES_ARM_NEON_FMA;
31 
32     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
33     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
34     std::fill(inputs.begin(), inputs.end(), -0.0f);
35     xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
36     const float reference_output = 1.0f;
37     ASSERT_EQ(reference_output, outputs[0])
38       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
39       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
40       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
41   }
42 
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,positive_zero)43   TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, positive_zero) {
44     TEST_REQUIRES_ARM_NEON_FMA;
45 
46     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
47     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
48     std::fill(inputs.begin(), inputs.end(), +0.0f);
49     xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
50     const float reference_output = 1.0f;
51     ASSERT_EQ(reference_output, outputs[0])
52       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
53       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
54       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
55   }
56 
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,negative_saturation)57   TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, negative_saturation) {
58     TEST_REQUIRES_ARM_NEON_FMA;
59 
60     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
61     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
62     for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
63       for (uint32_t i = 0; i < kBlockSize; i++) {
64         inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
65       }
66       xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
67       for (uint32_t i = 0; i < kBlockSize; i++) {
68         const uint32_t reference_output = UINT32_C(0x00000000);
69         ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
70           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
71           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
72           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
73       }
74     }
75   }
76 
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,positive_nan)77   TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, positive_nan) {
78     TEST_REQUIRES_ARM_NEON_FMA;
79 
80     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
81     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
82     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
83       for (uint32_t i = 0; i < kBlockSize; i++) {
84         inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
85       }
86       xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
87       for (uint32_t i = 0; i < kBlockSize; i++) {
88         ASSERT_TRUE(std::isnan(outputs[i]))
89           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
90           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
91       }
92     }
93   }
94 
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,negative_nan)95   TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, negative_nan) {
96     TEST_REQUIRES_ARM_NEON_FMA;
97 
98     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
99     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
100     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
101       for (uint32_t i = 0; i < kBlockSize; i++) {
102         inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
103       }
104       xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
105       for (uint32_t i = 0; i < kBlockSize; i++) {
106         ASSERT_TRUE(std::isnan(outputs[i]))
107           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
108           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
109       }
110     }
111   }
112 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
113 
114 
115 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,negative_zero)116   TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, negative_zero) {
117     TEST_REQUIRES_ARM_NEON_FMA;
118 
119     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
120     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
121     std::fill(inputs.begin(), inputs.end(), -0.0f);
122     xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
123     const float reference_output = 1.0f;
124     ASSERT_EQ(reference_output, outputs[0])
125       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
126       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
127       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
128   }
129 
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,positive_zero)130   TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, positive_zero) {
131     TEST_REQUIRES_ARM_NEON_FMA;
132 
133     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
134     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
135     std::fill(inputs.begin(), inputs.end(), +0.0f);
136     xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
137     const float reference_output = 1.0f;
138     ASSERT_EQ(reference_output, outputs[0])
139       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
140       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
141       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
142   }
143 
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,negative_saturation)144   TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, negative_saturation) {
145     TEST_REQUIRES_ARM_NEON_FMA;
146 
147     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
148     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
149     for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
150       for (uint32_t i = 0; i < kBlockSize; i++) {
151         inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
152       }
153       xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
154       for (uint32_t i = 0; i < kBlockSize; i++) {
155         const uint32_t reference_output = UINT32_C(0x00000000);
156         ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
157           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
158           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
159           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
160       }
161     }
162   }
163 
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,positive_nan)164   TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, positive_nan) {
165     TEST_REQUIRES_ARM_NEON_FMA;
166 
167     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
168     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
169     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
170       for (uint32_t i = 0; i < kBlockSize; i++) {
171         inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
172       }
173       xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
174       for (uint32_t i = 0; i < kBlockSize; i++) {
175         ASSERT_TRUE(std::isnan(outputs[i]))
176           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
177           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
178       }
179     }
180   }
181 
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,negative_nan)182   TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, negative_nan) {
183     TEST_REQUIRES_ARM_NEON_FMA;
184 
185     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
186     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
187     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
188       for (uint32_t i = 0; i < kBlockSize; i++) {
189         inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
190       }
191       xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
192       for (uint32_t i = 0; i < kBlockSize; i++) {
193         ASSERT_TRUE(std::isnan(outputs[i]))
194           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
195           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
196       }
197     }
198   }
199 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
200 
201 
202 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPMINUS__NEONFMA_RR2_P5,negative_zero)203   TEST(EXPMINUS__NEONFMA_RR2_P5, negative_zero) {
204     TEST_REQUIRES_ARM_NEON_FMA;
205 
206     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
207     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
208     std::fill(inputs.begin(), inputs.end(), -0.0f);
209     xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
210     const float reference_output = 1.0f;
211     ASSERT_EQ(reference_output, outputs[0])
212       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
213       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
214       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
215   }
216 
TEST(EXPMINUS__NEONFMA_RR2_P5,positive_zero)217   TEST(EXPMINUS__NEONFMA_RR2_P5, positive_zero) {
218     TEST_REQUIRES_ARM_NEON_FMA;
219 
220     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
221     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
222     std::fill(inputs.begin(), inputs.end(), +0.0f);
223     xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
224     const float reference_output = 1.0f;
225     ASSERT_EQ(reference_output, outputs[0])
226       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
227       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
228       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
229   }
230 
TEST(EXPMINUS__NEONFMA_RR2_P5,negative_saturation)231   TEST(EXPMINUS__NEONFMA_RR2_P5, negative_saturation) {
232     TEST_REQUIRES_ARM_NEON_FMA;
233 
234     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
235     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
236     for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
237       for (uint32_t i = 0; i < kBlockSize; i++) {
238         inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
239       }
240       xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
241       for (uint32_t i = 0; i < kBlockSize; i++) {
242         const uint32_t reference_output = UINT32_C(0x00000000);
243         ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
244           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
245           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
246           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
247       }
248     }
249   }
250 
TEST(EXPMINUS__NEONFMA_RR2_P5,positive_nan)251   TEST(EXPMINUS__NEONFMA_RR2_P5, positive_nan) {
252     TEST_REQUIRES_ARM_NEON_FMA;
253 
254     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
255     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
256     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
257       for (uint32_t i = 0; i < kBlockSize; i++) {
258         inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
259       }
260       xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
261       for (uint32_t i = 0; i < kBlockSize; i++) {
262         ASSERT_TRUE(std::isnan(outputs[i]))
263           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
264           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
265       }
266     }
267   }
268 
TEST(EXPMINUS__NEONFMA_RR2_P5,negative_nan)269   TEST(EXPMINUS__NEONFMA_RR2_P5, negative_nan) {
270     TEST_REQUIRES_ARM_NEON_FMA;
271 
272     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
273     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
274     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
275       for (uint32_t i = 0; i < kBlockSize; i++) {
276         inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
277       }
278       xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
279       for (uint32_t i = 0; i < kBlockSize; i++) {
280         ASSERT_TRUE(std::isnan(outputs[i]))
281           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
282           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
283       }
284     }
285   }
286 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
287 
288 
289 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPMINUS__AVX2_RR2_P5,negative_zero)290   TEST(EXPMINUS__AVX2_RR2_P5, negative_zero) {
291     TEST_REQUIRES_X86_AVX2;
292 
293     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
294     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
295     std::fill(inputs.begin(), inputs.end(), -0.0f);
296     xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
297     const float reference_output = 1.0f;
298     ASSERT_EQ(reference_output, outputs[0])
299       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
300       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
301       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
302   }
303 
TEST(EXPMINUS__AVX2_RR2_P5,positive_zero)304   TEST(EXPMINUS__AVX2_RR2_P5, positive_zero) {
305     TEST_REQUIRES_X86_AVX2;
306 
307     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
308     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
309     std::fill(inputs.begin(), inputs.end(), +0.0f);
310     xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
311     const float reference_output = 1.0f;
312     ASSERT_EQ(reference_output, outputs[0])
313       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
314       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
315       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
316   }
317 
TEST(EXPMINUS__AVX2_RR2_P5,negative_saturation)318   TEST(EXPMINUS__AVX2_RR2_P5, negative_saturation) {
319     TEST_REQUIRES_X86_AVX2;
320 
321     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
322     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
323     for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
324       for (uint32_t i = 0; i < kBlockSize; i++) {
325         inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
326       }
327       xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
328       for (uint32_t i = 0; i < kBlockSize; i++) {
329         const uint32_t reference_output = UINT32_C(0x00000000);
330         ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
331           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
332           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
333           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
334       }
335     }
336   }
337 
TEST(EXPMINUS__AVX2_RR2_P5,positive_nan)338   TEST(EXPMINUS__AVX2_RR2_P5, positive_nan) {
339     TEST_REQUIRES_X86_AVX2;
340 
341     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
342     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
343     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
344       for (uint32_t i = 0; i < kBlockSize; i++) {
345         inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
346       }
347       xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
348       for (uint32_t i = 0; i < kBlockSize; i++) {
349         ASSERT_TRUE(std::isnan(outputs[i]))
350           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
351           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
352       }
353     }
354   }
355 
TEST(EXPMINUS__AVX2_RR2_P5,negative_nan)356   TEST(EXPMINUS__AVX2_RR2_P5, negative_nan) {
357     TEST_REQUIRES_X86_AVX2;
358 
359     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
360     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
361     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
362       for (uint32_t i = 0; i < kBlockSize; i++) {
363         inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
364       }
365       xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
366       for (uint32_t i = 0; i < kBlockSize; i++) {
367         ASSERT_TRUE(std::isnan(outputs[i]))
368           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
369           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
370       }
371     }
372   }
373 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
374 
375 
376 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPMINUS__SSE2_RR2_P5,negative_zero)377   TEST(EXPMINUS__SSE2_RR2_P5, negative_zero) {
378     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
379     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
380     std::fill(inputs.begin(), inputs.end(), -0.0f);
381     xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
382     const float reference_output = 1.0f;
383     ASSERT_EQ(reference_output, outputs[0])
384       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
385       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
386       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
387   }
388 
TEST(EXPMINUS__SSE2_RR2_P5,positive_zero)389   TEST(EXPMINUS__SSE2_RR2_P5, positive_zero) {
390     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
391     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
392     std::fill(inputs.begin(), inputs.end(), +0.0f);
393     xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
394     const float reference_output = 1.0f;
395     ASSERT_EQ(reference_output, outputs[0])
396       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
397       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
398       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
399   }
400 
TEST(EXPMINUS__SSE2_RR2_P5,negative_saturation)401   TEST(EXPMINUS__SSE2_RR2_P5, negative_saturation) {
402     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
403     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
404     for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
405       for (uint32_t i = 0; i < kBlockSize; i++) {
406         inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
407       }
408       xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
409       for (uint32_t i = 0; i < kBlockSize; i++) {
410         const uint32_t reference_output = UINT32_C(0x00000000);
411         ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
412           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
413           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
414           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
415       }
416     }
417   }
418 
TEST(EXPMINUS__SSE2_RR2_P5,positive_nan)419   TEST(EXPMINUS__SSE2_RR2_P5, positive_nan) {
420     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
421     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
422     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
423       for (uint32_t i = 0; i < kBlockSize; i++) {
424         inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
425       }
426       xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
427       for (uint32_t i = 0; i < kBlockSize; i++) {
428         ASSERT_TRUE(std::isnan(outputs[i]))
429           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
430           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
431       }
432     }
433   }
434 
TEST(EXPMINUS__SSE2_RR2_P5,negative_nan)435   TEST(EXPMINUS__SSE2_RR2_P5, negative_nan) {
436     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
437     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
438     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
439       for (uint32_t i = 0; i < kBlockSize; i++) {
440         inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
441       }
442       xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
443       for (uint32_t i = 0; i < kBlockSize; i++) {
444         ASSERT_TRUE(std::isnan(outputs[i]))
445           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
446           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
447       }
448     }
449   }
450 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
451 
452 
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,negative_zero)453 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, negative_zero) {
454   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
455   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
456   std::fill(inputs.begin(), inputs.end(), -0.0f);
457   xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
458   const float reference_output = 1.0f;
459   ASSERT_EQ(reference_output, outputs[0])
460     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
461     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
462     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
463 }
464 
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,positive_zero)465 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, positive_zero) {
466   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
467   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
468   std::fill(inputs.begin(), inputs.end(), +0.0f);
469   xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
470   const float reference_output = 1.0f;
471   ASSERT_EQ(reference_output, outputs[0])
472     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
473     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
474     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
475 }
476 
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,negative_saturation)477 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, negative_saturation) {
478   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
479   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
480   for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
481     for (uint32_t i = 0; i < kBlockSize; i++) {
482       inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
483     }
484     xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
485     for (uint32_t i = 0; i < kBlockSize; i++) {
486       const uint32_t reference_output = UINT32_C(0x00000000);
487       ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
488         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
489         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
490         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
491     }
492   }
493 }
494 
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,positive_nan)495 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, positive_nan) {
496   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
497   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
498   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
499     for (uint32_t i = 0; i < kBlockSize; i++) {
500       inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
501     }
502     xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
503     for (uint32_t i = 0; i < kBlockSize; i++) {
504       ASSERT_TRUE(std::isnan(outputs[i]))
505         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
506         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
507     }
508   }
509 }
510 
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,negative_nan)511 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, negative_nan) {
512   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
513   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
514   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
515     for (uint32_t i = 0; i < kBlockSize; i++) {
516       inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
517     }
518     xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
519     for (uint32_t i = 0; i < kBlockSize; i++) {
520       ASSERT_TRUE(std::isnan(outputs[i]))
521         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
522         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
523     }
524   }
525 }
526 
527 
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,negative_zero)528 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, negative_zero) {
529   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
530   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
531   std::fill(inputs.begin(), inputs.end(), -0.0f);
532   xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
533   const float reference_output = 1.0f;
534   ASSERT_EQ(reference_output, outputs[0])
535     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
536     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
537     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
538 }
539 
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,positive_zero)540 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, positive_zero) {
541   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
542   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
543   std::fill(inputs.begin(), inputs.end(), +0.0f);
544   xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
545   const float reference_output = 1.0f;
546   ASSERT_EQ(reference_output, outputs[0])
547     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
548     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
549     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
550 }
551 
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,negative_saturation)552 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, negative_saturation) {
553   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
554   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
555   for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
556     for (uint32_t i = 0; i < kBlockSize; i++) {
557       inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
558     }
559     xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
560     for (uint32_t i = 0; i < kBlockSize; i++) {
561       const uint32_t reference_output = UINT32_C(0x00000000);
562       ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
563         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
564         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
565         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
566     }
567   }
568 }
569 
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,positive_nan)570 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, positive_nan) {
571   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
572   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
573   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
574     for (uint32_t i = 0; i < kBlockSize; i++) {
575       inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
576     }
577     xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
578     for (uint32_t i = 0; i < kBlockSize; i++) {
579       ASSERT_TRUE(std::isnan(outputs[i]))
580         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
581         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
582     }
583   }
584 }
585 
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,negative_nan)586 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, negative_nan) {
587   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
588   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
589   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
590     for (uint32_t i = 0; i < kBlockSize; i++) {
591       inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
592     }
593     xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
594     for (uint32_t i = 0; i < kBlockSize; i++) {
595       ASSERT_TRUE(std::isnan(outputs[i]))
596         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
597         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
598     }
599   }
600 }
601 
602 
TEST(EXPMINUS__SCALAR_RR2_P5,negative_zero)603 TEST(EXPMINUS__SCALAR_RR2_P5, negative_zero) {
604   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
605   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
606   std::fill(inputs.begin(), inputs.end(), -0.0f);
607   xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
608   const float reference_output = 1.0f;
609   ASSERT_EQ(reference_output, outputs[0])
610     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
611     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
612     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
613 }
614 
TEST(EXPMINUS__SCALAR_RR2_P5,positive_zero)615 TEST(EXPMINUS__SCALAR_RR2_P5, positive_zero) {
616   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
617   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
618   std::fill(inputs.begin(), inputs.end(), +0.0f);
619   xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
620   const float reference_output = 1.0f;
621   ASSERT_EQ(reference_output, outputs[0])
622     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
623     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
624     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
625 }
626 
TEST(EXPMINUS__SCALAR_RR2_P5,negative_saturation)627 TEST(EXPMINUS__SCALAR_RR2_P5, negative_saturation) {
628   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
629   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
630   for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
631     for (uint32_t i = 0; i < kBlockSize; i++) {
632       inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
633     }
634     xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
635     for (uint32_t i = 0; i < kBlockSize; i++) {
636       const uint32_t reference_output = UINT32_C(0x00000000);
637       ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
638         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
639         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
640         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
641     }
642   }
643 }
644 
TEST(EXPMINUS__SCALAR_RR2_P5,positive_nan)645 TEST(EXPMINUS__SCALAR_RR2_P5, positive_nan) {
646   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
647   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
648   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
649     for (uint32_t i = 0; i < kBlockSize; i++) {
650       inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
651     }
652     xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
653     for (uint32_t i = 0; i < kBlockSize; i++) {
654       ASSERT_TRUE(std::isnan(outputs[i]))
655         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
656         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
657     }
658   }
659 }
660 
TEST(EXPMINUS__SCALAR_RR2_P5,negative_nan)661 TEST(EXPMINUS__SCALAR_RR2_P5, negative_nan) {
662   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
663   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
664   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
665     for (uint32_t i = 0; i < kBlockSize; i++) {
666       inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
667     }
668     xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
669     for (uint32_t i = 0; i < kBlockSize; i++) {
670       ASSERT_TRUE(std::isnan(outputs[i]))
671         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
672         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
673     }
674   }
675 }
676