1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2020-2022 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 /**
19 * @brief Unit tests for the vectorized SIMD functionality.
20 */
21
22 #include <limits>
23
24 #include "gtest/gtest.h"
25
26 #include "../astcenc_internal.h"
27 #include "../astcenc_vecmathlib.h"
28
29 namespace astcenc
30 {
31
32 // Misc utility tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
33
round_down(unsigned int x)34 static unsigned int round_down(unsigned int x)
35 {
36 unsigned int remainder = x % ASTCENC_SIMD_WIDTH;
37 return x - remainder;
38 }
39
round_up(unsigned int x)40 static unsigned int round_up(unsigned int x)
41 {
42 unsigned int remainder = x % ASTCENC_SIMD_WIDTH;
43 if (!remainder)
44 {
45 return x;
46 }
47
48 return x - remainder + ASTCENC_SIMD_WIDTH;
49 }
50
51 /** @brief Test VLA loop limit round down. */
TEST(misc,RoundDownVLA)52 TEST(misc, RoundDownVLA)
53 {
54 // Static ones which are valid for all VLA widths
55 EXPECT_EQ(round_down_to_simd_multiple_vla(0), 0u);
56 EXPECT_EQ(round_down_to_simd_multiple_vla(8), 8u);
57 EXPECT_EQ(round_down_to_simd_multiple_vla(16), 16u);
58
59 // Variable ones which depend on VLA width
60 EXPECT_EQ(round_down_to_simd_multiple_vla(3), round_down(3));
61 EXPECT_EQ(round_down_to_simd_multiple_vla(5), round_down(5));
62 EXPECT_EQ(round_down_to_simd_multiple_vla(7), round_down(7));
63 EXPECT_EQ(round_down_to_simd_multiple_vla(231), round_down(231));
64 }
65
66 /** @brief Test VLA loop limit round up. */
TEST(misc,RoundUpVLA)67 TEST(misc, RoundUpVLA)
68 {
69 // Static ones which are valid for all VLA widths
70 EXPECT_EQ(round_up_to_simd_multiple_vla(0), 0u);
71 EXPECT_EQ(round_up_to_simd_multiple_vla(8), 8u);
72 EXPECT_EQ(round_up_to_simd_multiple_vla(16), 16u);
73
74 // Variable ones which depend on VLA width
75 EXPECT_EQ(round_up_to_simd_multiple_vla(3), round_up(3));
76 EXPECT_EQ(round_up_to_simd_multiple_vla(5), round_up(5));
77 EXPECT_EQ(round_up_to_simd_multiple_vla(7), round_up(7));
78 EXPECT_EQ(round_up_to_simd_multiple_vla(231), round_up(231));
79 }
80
81 #if ASTCENC_SIMD_WIDTH == 1
82
83 // VLA (1-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
84
85 /** @brief Test VLA change_sign. */
TEST(vfloat,ChangeSign)86 TEST(vfloat, ChangeSign)
87 {
88 vfloat a0(-1.0f);
89 vfloat b0(-1.0f);
90 vfloat r0 = change_sign(a0, b0);
91 EXPECT_EQ(r0.lane<0>(), 1.0f);
92
93 vfloat a1( 1.0f);
94 vfloat b1(-1.0f);
95 vfloat r1 = change_sign(a1, b1);
96 EXPECT_EQ(r1.lane<0>(), -1.0f);
97
98 vfloat a2(-3.12f);
99 vfloat b2( 3.12f);
100 vfloat r2 = change_sign(a2, b2);
101 EXPECT_EQ(r2.lane<0>(), -3.12f);
102
103 vfloat a3( 3.12f);
104 vfloat b3( 3.12f);
105 vfloat r3 = change_sign(a3, b3);
106 EXPECT_EQ(r3.lane<0>(), 3.12f);
107 }
108
109 /** @brief Test VLA atan. */
TEST(vfloat,Atan)110 TEST(vfloat, Atan)
111 {
112 vfloat a0(-0.15f);
113 vfloat r0 = atan(a0);
114 EXPECT_NEAR(r0.lane<0>(), -0.149061f, 0.005f);
115
116 vfloat a1(0.0f);
117 vfloat r1 = atan(a1);
118 EXPECT_NEAR(r1.lane<0>(), 0.000000f, 0.005f);
119
120 vfloat a2(0.9f);
121 vfloat r2 = atan(a2);
122 EXPECT_NEAR(r2.lane<0>(), 0.733616f, 0.005f);
123
124 vfloat a3(2.1f);
125 vfloat r3 = atan(a3);
126 EXPECT_NEAR(r3.lane<0>(), 1.123040f, 0.005f);
127 }
128
129 /** @brief Test VLA atan2. */
TEST(vfloat,Atan2)130 TEST(vfloat, Atan2)
131 {
132 vfloat a0(-0.15f);
133 vfloat b0( 1.15f);
134 vfloat r0 = atan2(a0, b0);
135 EXPECT_NEAR(r0.lane<0>(), -0.129816f, 0.005f);
136
137 vfloat a1( 0.0f);
138 vfloat b1(-3.0f);
139 vfloat r1 = atan2(a1, b1);
140 EXPECT_NEAR(r1.lane<0>(), 3.141592f, 0.005f);
141
142 vfloat a2( 0.9f);
143 vfloat b2(-0.9f);
144 vfloat r2 = atan2(a2, b2);
145 EXPECT_NEAR(r2.lane<0>(), 2.360342f, 0.005f);
146
147 vfloat a3( 2.1f);
148 vfloat b3( 1.1f);
149 vfloat r3 = atan2(a3, b3);
150 EXPECT_NEAR(r3.lane<0>(), 1.084357f, 0.005f);
151 }
152
153 #elif ASTCENC_SIMD_WIDTH == 4
154
155 // VLA (4-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
156
157 /** @brief Test VLA change_sign. */
TEST(vfloat,ChangeSign)158 TEST(vfloat, ChangeSign)
159 {
160 vfloat a(-1.0f, 1.0f, -3.12f, 3.12f);
161 vfloat b(-1.0f, -1.0f, 3.12f, 3.12f);
162 vfloat r = change_sign(a, b);
163 EXPECT_EQ(r.lane<0>(), 1.0f);
164 EXPECT_EQ(r.lane<1>(), -1.0f);
165 EXPECT_EQ(r.lane<2>(), -3.12f);
166 EXPECT_EQ(r.lane<3>(), 3.12f);
167 }
168
169 /** @brief Test VLA atan. */
TEST(vfloat,Atan)170 TEST(vfloat, Atan)
171 {
172 vfloat a(-0.15f, 0.0f, 0.9f, 2.1f);
173 vfloat r = atan(a);
174 EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f);
175 EXPECT_NEAR(r.lane<1>(), 0.000000f, 0.005f);
176 EXPECT_NEAR(r.lane<2>(), 0.733616f, 0.005f);
177 EXPECT_NEAR(r.lane<3>(), 1.123040f, 0.005f);
178 }
179
180 /** @brief Test VLA atan2. */
TEST(vfloat,Atan2)181 TEST(vfloat, Atan2)
182 {
183 vfloat a(-0.15f, 0.0f, 0.9f, 2.1f);
184 vfloat b(1.15f, -3.0f, -0.9f, 1.1f);
185 vfloat r = atan2(a, b);
186 EXPECT_NEAR(r.lane<0>(), -0.129816f, 0.005f);
187 EXPECT_NEAR(r.lane<1>(), 3.141592f, 0.005f);
188 EXPECT_NEAR(r.lane<2>(), 2.360342f, 0.005f);
189 EXPECT_NEAR(r.lane<3>(), 1.084357f, 0.005f);
190 }
191
192 #elif ASTCENC_SIMD_WIDTH == 8
193
194 // VLA (8-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
195
196 /** @brief Test VLA change_sign. */
TEST(vfloat,ChangeSign)197 TEST(vfloat, ChangeSign)
198 {
199 vfloat a(-1.0f, 1.0f, -3.12f, 3.12f, -1.0f, 1.0f, -3.12f, 3.12f);
200 vfloat b(-1.0f, -1.0f, 3.12f, 3.12f, -1.0f, -1.0f, 3.12f, 3.12f);
201 vfloat r = change_sign(a, b);
202 EXPECT_EQ(r.lane<0>(), 1.0f);
203 EXPECT_EQ(r.lane<1>(), -1.0f);
204 EXPECT_EQ(r.lane<2>(), -3.12f);
205 EXPECT_EQ(r.lane<3>(), 3.12f);
206 EXPECT_EQ(r.lane<4>(), 1.0f);
207 EXPECT_EQ(r.lane<5>(), -1.0f);
208 EXPECT_EQ(r.lane<6>(), -3.12f);
209 EXPECT_EQ(r.lane<7>(), 3.12f);
210 }
211
212 /** @brief Test VLA atan. */
TEST(vfloat,Atan)213 TEST(vfloat, Atan)
214 {
215 vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f);
216 vfloat r = atan(a);
217 EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f);
218 EXPECT_NEAR(r.lane<1>(), 0.000000f, 0.005f);
219 EXPECT_NEAR(r.lane<2>(), 0.733616f, 0.005f);
220 EXPECT_NEAR(r.lane<3>(), 1.123040f, 0.005f);
221 EXPECT_NEAR(r.lane<4>(), -0.149061f, 0.005f);
222 EXPECT_NEAR(r.lane<5>(), 0.000000f, 0.005f);
223 EXPECT_NEAR(r.lane<6>(), 0.733616f, 0.005f);
224 EXPECT_NEAR(r.lane<7>(), 1.123040f, 0.005f);
225 }
226
227 /** @brief Test VLA atan2. */
TEST(vfloat,Atan2)228 TEST(vfloat, Atan2)
229 {
230 vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f);
231 vfloat b(1.15f, -3.0f, -0.9f, 1.1f, 1.15f, -3.0f, -0.9f, 1.1f);
232 vfloat r = atan2(a, b);
233 EXPECT_NEAR(r.lane<0>(), -0.129816f, 0.005f);
234 EXPECT_NEAR(r.lane<1>(), 3.141592f, 0.005f);
235 EXPECT_NEAR(r.lane<2>(), 2.360342f, 0.005f);
236 EXPECT_NEAR(r.lane<3>(), 1.084357f, 0.005f);
237 EXPECT_NEAR(r.lane<4>(), -0.129816f, 0.005f);
238 EXPECT_NEAR(r.lane<5>(), 3.141592f, 0.005f);
239 EXPECT_NEAR(r.lane<6>(), 2.360342f, 0.005f);
240 EXPECT_NEAR(r.lane<7>(), 1.084357f, 0.005f);
241 }
242
243 #endif
244
245 static const float qnan = std::numeric_limits<float>::quiet_NaN();
246
247 alignas(32) static const float f32_data[9] {
248 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f
249 };
250
251 alignas(32) static const int s32_data[9] {
252 0, 1, 2, 3, 4, 5 , 6, 7, 8
253 };
254
255 alignas(32) static const uint8_t u8_data[9] {
256 0, 1, 2, 3, 4, 5 , 6, 7, 8
257 };
258
259 // VFLOAT4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
260
261 /** @brief Test unaligned vfloat4 data load. */
TEST(vfloat4,UnalignedLoad)262 TEST(vfloat4, UnalignedLoad)
263 {
264 vfloat4 a(&(f32_data[1]));
265 EXPECT_EQ(a.lane<0>(), 1.0f);
266 EXPECT_EQ(a.lane<1>(), 2.0f);
267 EXPECT_EQ(a.lane<2>(), 3.0f);
268 EXPECT_EQ(a.lane<3>(), 4.0f);
269 }
270
271 /** @brief Test scalar duplicated vfloat4 load. */
TEST(vfloat4,ScalarDupLoad)272 TEST(vfloat4, ScalarDupLoad)
273 {
274 vfloat4 a(1.1f);
275 EXPECT_EQ(a.lane<0>(), 1.1f);
276 EXPECT_EQ(a.lane<1>(), 1.1f);
277 EXPECT_EQ(a.lane<2>(), 1.1f);
278 EXPECT_EQ(a.lane<3>(), 1.1f);
279 }
280
281 /** @brief Test scalar vfloat4 load. */
TEST(vfloat4,ScalarLoad)282 TEST(vfloat4, ScalarLoad)
283 {
284 vfloat4 a(1.1f, 2.2f, 3.3f, 4.4f);
285 EXPECT_EQ(a.lane<0>(), 1.1f);
286 EXPECT_EQ(a.lane<1>(), 2.2f);
287 EXPECT_EQ(a.lane<2>(), 3.3f);
288 EXPECT_EQ(a.lane<3>(), 4.4f);
289 }
290
291 /** @brief Test copy vfloat4 load. */
TEST(vfloat4,CopyLoad)292 TEST(vfloat4, CopyLoad)
293 {
294 vfloat4 s(1.1f, 2.2f, 3.3f, 4.4f);
295 vfloat4 a(s.m);
296 EXPECT_EQ(a.lane<0>(), 1.1f);
297 EXPECT_EQ(a.lane<1>(), 2.2f);
298 EXPECT_EQ(a.lane<2>(), 3.3f);
299 EXPECT_EQ(a.lane<3>(), 4.4f);
300 }
301
302 /** @brief Test vfloat4 scalar lane set. */
TEST(vfloat4,SetLane)303 TEST(vfloat4, SetLane)
304 {
305 vfloat4 a(0.0f);
306
307 a.set_lane<0>(1.0f);
308 EXPECT_EQ(a.lane<0>(), 1.0f);
309 EXPECT_EQ(a.lane<1>(), 0.0f);
310 EXPECT_EQ(a.lane<2>(), 0.0f);
311 EXPECT_EQ(a.lane<3>(), 0.0f);
312
313 a.set_lane<1>(2.0f);
314 EXPECT_EQ(a.lane<0>(), 1.0f);
315 EXPECT_EQ(a.lane<1>(), 2.0f);
316 EXPECT_EQ(a.lane<2>(), 0.0f);
317 EXPECT_EQ(a.lane<3>(), 0.0f);
318
319 a.set_lane<2>(3.0f);
320 EXPECT_EQ(a.lane<0>(), 1.0f);
321 EXPECT_EQ(a.lane<1>(), 2.0f);
322 EXPECT_EQ(a.lane<2>(), 3.0f);
323 EXPECT_EQ(a.lane<3>(), 0.0f);
324
325 a.set_lane<3>(4.0f);
326 EXPECT_EQ(a.lane<0>(), 1.0f);
327 EXPECT_EQ(a.lane<1>(), 2.0f);
328 EXPECT_EQ(a.lane<2>(), 3.0f);
329 EXPECT_EQ(a.lane<3>(), 4.0f);
330 }
331
332 /** @brief Test vfloat4 zero. */
TEST(vfloat4,Zero)333 TEST(vfloat4, Zero)
334 {
335 vfloat4 a = vfloat4::zero();
336 EXPECT_EQ(a.lane<0>(), 0.0f);
337 EXPECT_EQ(a.lane<1>(), 0.0f);
338 EXPECT_EQ(a.lane<2>(), 0.0f);
339 EXPECT_EQ(a.lane<3>(), 0.0f);
340 }
341
342 /** @brief Test vfloat4 load1. */
TEST(vfloat4,Load1)343 TEST(vfloat4, Load1)
344 {
345 float s = 3.14f;
346 vfloat4 a = vfloat4::load1(&s);
347 EXPECT_EQ(a.lane<0>(), 3.14f);
348 EXPECT_EQ(a.lane<1>(), 3.14f);
349 EXPECT_EQ(a.lane<2>(), 3.14f);
350 EXPECT_EQ(a.lane<3>(), 3.14f);
351 }
352
353 /** @brief Test vfloat4 loada. */
TEST(vfloat4,Loada)354 TEST(vfloat4, Loada)
355 {
356 vfloat4 a = vfloat4::loada(&(f32_data[0]));
357 EXPECT_EQ(a.lane<0>(), 0.0f);
358 EXPECT_EQ(a.lane<1>(), 1.0f);
359 EXPECT_EQ(a.lane<2>(), 2.0f);
360 EXPECT_EQ(a.lane<3>(), 3.0f);
361 }
362
363 /** @brief Test vfloat4 lane_id. */
TEST(vfloat4,LaneID)364 TEST(vfloat4, LaneID)
365 {
366 vfloat4 a = vfloat4::lane_id();
367 EXPECT_EQ(a.lane<0>(), 0.0f);
368 EXPECT_EQ(a.lane<1>(), 1.0f);
369 EXPECT_EQ(a.lane<2>(), 2.0f);
370 EXPECT_EQ(a.lane<3>(), 3.0f);
371 }
372
373 /** @brief Test vfloat4 swz to float4. */
TEST(vfloat4,swz4)374 TEST(vfloat4, swz4)
375 {
376 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
377 vfloat4 r = a.swz<0, 3, 2, 1>();
378 EXPECT_EQ(r.lane<0>(), 1.0f);
379 EXPECT_EQ(r.lane<1>(), 4.0f);
380 EXPECT_EQ(r.lane<2>(), 3.0f);
381 EXPECT_EQ(r.lane<3>(), 2.0f);
382
383 r = a.swz<3, 1, 1, 0>();
384 EXPECT_EQ(r.lane<0>(), 4.0f);
385 EXPECT_EQ(r.lane<1>(), 2.0f);
386 EXPECT_EQ(r.lane<2>(), 2.0f);
387 EXPECT_EQ(r.lane<3>(), 1.0f);
388 }
389
390 /** @brief Test vfloat4 swz to float3. */
TEST(vfloat4,swz3)391 TEST(vfloat4, swz3)
392 {
393 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
394 vfloat4 r = a.swz<0, 3, 2>();
395 EXPECT_EQ(r.lane<0>(), 1.0f);
396 EXPECT_EQ(r.lane<1>(), 4.0f);
397 EXPECT_EQ(r.lane<2>(), 3.0f);
398 EXPECT_EQ(r.lane<3>(), 0.0f);
399
400 r = a.swz<3, 1, 1>();
401 EXPECT_EQ(r.lane<0>(), 4.0f);
402 EXPECT_EQ(r.lane<1>(), 2.0f);
403 EXPECT_EQ(r.lane<2>(), 2.0f);
404 EXPECT_EQ(r.lane<3>(), 0.0f);
405 }
406
407 /** @brief Test vfloat4 swz to float2. */
TEST(vfloat4,swz2)408 TEST(vfloat4, swz2)
409 {
410 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
411 vfloat4 r = a.swz<0, 3>();
412 EXPECT_EQ(r.lane<0>(), 1.0f);
413 EXPECT_EQ(r.lane<1>(), 4.0f);
414
415 r = a.swz<2, 1>();
416 EXPECT_EQ(r.lane<0>(), 3.0f);
417 EXPECT_EQ(r.lane<1>(), 2.0f);
418 }
419
420 /** @brief Test vfloat4 add. */
TEST(vfloat4,vadd)421 TEST(vfloat4, vadd)
422 {
423 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
424 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
425 a = a + b;
426 EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f);
427 EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f);
428 EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f);
429 EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f);
430 }
431
432 /** @brief Test vfloat4 self-add. */
TEST(vfloat4,vselfadd1)433 TEST(vfloat4, vselfadd1)
434 {
435 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
436 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
437
438 // Test increment by another variable
439 a += b;
440 EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f);
441 EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f);
442 EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f);
443 EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f);
444
445 // Test increment by an expression
446 a += b + b;
447 EXPECT_NEAR(a.lane<0>(), 1.0f + 0.3f, 0.001f);
448 EXPECT_NEAR(a.lane<1>(), 2.0f + 0.6f, 0.001f);
449 EXPECT_NEAR(a.lane<2>(), 3.0f + 0.9f, 0.001f);
450 EXPECT_NEAR(a.lane<3>(), 4.0f + 1.2f, 0.001f);
451 }
452
453 /** @brief Test vfloat4 sub. */
TEST(vfloat4,vsub)454 TEST(vfloat4, vsub)
455 {
456 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
457 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
458 a = a - b;
459 EXPECT_EQ(a.lane<0>(), 1.0f - 0.1f);
460 EXPECT_EQ(a.lane<1>(), 2.0f - 0.2f);
461 EXPECT_EQ(a.lane<2>(), 3.0f - 0.3f);
462 EXPECT_EQ(a.lane<3>(), 4.0f - 0.4f);
463 }
464
465 /** @brief Test vfloat4 mul. */
TEST(vfloat4,vmul)466 TEST(vfloat4, vmul)
467 {
468 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
469 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
470 a = a * b;
471 EXPECT_EQ(a.lane<0>(), 1.0f * 0.1f);
472 EXPECT_EQ(a.lane<1>(), 2.0f * 0.2f);
473 EXPECT_EQ(a.lane<2>(), 3.0f * 0.3f);
474 EXPECT_EQ(a.lane<3>(), 4.0f * 0.4f);
475 }
476
477 /** @brief Test vfloat4 mul. */
TEST(vfloat4,vsmul)478 TEST(vfloat4, vsmul)
479 {
480 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
481 float b = 3.14f;
482 a = a * b;
483 EXPECT_EQ(a.lane<0>(), 1.0f * 3.14f);
484 EXPECT_EQ(a.lane<1>(), 2.0f * 3.14f);
485 EXPECT_EQ(a.lane<2>(), 3.0f * 3.14f);
486 EXPECT_EQ(a.lane<3>(), 4.0f * 3.14f);
487 }
488
489 /** @brief Test vfloat4 mul. */
TEST(vfloat4,svmul)490 TEST(vfloat4, svmul)
491 {
492 float a = 3.14f;
493 vfloat4 b(1.0f, 2.0f, 3.0f, 4.0f);
494 b = a * b;
495 EXPECT_EQ(b.lane<0>(), 3.14f * 1.0f);
496 EXPECT_EQ(b.lane<1>(), 3.14f * 2.0f);
497 EXPECT_EQ(b.lane<2>(), 3.14f * 3.0f);
498 EXPECT_EQ(b.lane<3>(), 3.14f * 4.0f);
499 }
500
501 /** @brief Test vfloat4 div. */
TEST(vfloat4,vdiv)502 TEST(vfloat4, vdiv)
503 {
504 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
505 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
506 a = a / b;
507 EXPECT_EQ(a.lane<0>(), 1.0f / 0.1f);
508 EXPECT_EQ(a.lane<1>(), 2.0f / 0.2f);
509 EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f);
510 EXPECT_EQ(a.lane<3>(), 4.0f / 0.4f);
511 }
512
513 /** @brief Test vfloat4 div. */
TEST(vfloat4,vsdiv)514 TEST(vfloat4, vsdiv)
515 {
516 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
517 float b = 0.3f;
518 a = a / b;
519 EXPECT_EQ(a.lane<0>(), 1.0f / 0.3f);
520 EXPECT_EQ(a.lane<1>(), 2.0f / 0.3f);
521 EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f);
522 EXPECT_EQ(a.lane<3>(), 4.0f / 0.3f);
523 }
524
525 /** @brief Test vfloat4 div. */
TEST(vfloat4,svdiv)526 TEST(vfloat4, svdiv)
527 {
528 float a = 3.0f;
529 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
530 b = a / b;
531 EXPECT_EQ(b.lane<0>(), 3.0f / 0.1f);
532 EXPECT_EQ(b.lane<1>(), 3.0f / 0.2f);
533 EXPECT_EQ(b.lane<2>(), 3.0f / 0.3f);
534 EXPECT_EQ(b.lane<3>(), 3.0f / 0.4f);
535 }
536
537 /** @brief Test vfloat4 ceq. */
TEST(vfloat4,ceq)538 TEST(vfloat4, ceq)
539 {
540 vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
541 vfloat4 b1(0.1f, 0.2f, 0.3f, 0.4f);
542 vmask4 r1 = a1 == b1;
543 EXPECT_EQ(0u, mask(r1));
544 EXPECT_EQ(false, any(r1));
545 EXPECT_EQ(false, all(r1));
546
547 vfloat4 a2(1.0f, 2.0f, 3.0f, 4.0f);
548 vfloat4 b2(1.0f, 0.2f, 0.3f, 0.4f);
549 vmask4 r2 = a2 == b2;
550 EXPECT_EQ(0x1u, mask(r2));
551 EXPECT_EQ(true, any(r2));
552 EXPECT_EQ(false, all(r2));
553
554 vfloat4 a3(1.0f, 2.0f, 3.0f, 4.0f);
555 vfloat4 b3(1.0f, 0.2f, 3.0f, 0.4f);
556 vmask4 r3 = a3 == b3;
557 EXPECT_EQ(0x5u, mask(r3));
558 EXPECT_EQ(true, any(r3));
559 EXPECT_EQ(false, all(r3));
560
561 vfloat4 a4(1.0f, 2.0f, 3.0f, 4.0f);
562 vmask4 r4 = a4 == a4;
563 EXPECT_EQ(0xFu, mask(r4));
564 EXPECT_EQ(true, any(r4));
565 EXPECT_EQ(true, all(r4));
566 }
567
568 /** @brief Test vfloat4 cne. */
TEST(vfloat4,cne)569 TEST(vfloat4, cne)
570 {
571 vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
572 vfloat4 b1(0.1f, 0.2f, 0.3f, 0.4f);
573 vmask4 r1 = a1 != b1;
574 EXPECT_EQ(0xFu, mask(r1));
575 EXPECT_EQ(true, any(r1));
576 EXPECT_EQ(true, all(r1));
577
578 vfloat4 a2(1.0f, 2.0f, 3.0f, 4.0f);
579 vfloat4 b2(1.0f, 0.2f, 0.3f, 0.4f);
580 vmask4 r2 = a2 != b2;
581 EXPECT_EQ(0xEu, mask(r2));
582 EXPECT_EQ(true, any(r2));
583 EXPECT_EQ(false, all(r2));
584
585 vfloat4 a3(1.0f, 2.0f, 3.0f, 4.0f);
586 vfloat4 b3(1.0f, 0.2f, 3.0f, 0.4f);
587 vmask4 r3 = a3 != b3;
588 EXPECT_EQ(0xAu, mask(r3));
589 EXPECT_EQ(true, any(r3));
590 EXPECT_EQ(false, all(r3));
591
592 vfloat4 a4(1.0f, 2.0f, 3.0f, 4.0f);
593 vmask4 r4 = a4 != a4;
594 EXPECT_EQ(0u, mask(r4));
595 EXPECT_EQ(false, any(r4));
596 EXPECT_EQ(false, all(r4));
597 }
598
599 /** @brief Test vfloat4 clt. */
TEST(vfloat4,clt)600 TEST(vfloat4, clt)
601 {
602 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
603 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
604 vmask4 r = a < b;
605 EXPECT_EQ(0xAu, mask(r));
606 }
607
608 /** @brief Test vfloat4 cle. */
TEST(vfloat4,cle)609 TEST(vfloat4, cle)
610 {
611 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
612 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
613 vmask4 r = a <= b;
614 EXPECT_EQ(0xEu, mask(r));
615 }
616
617 /** @brief Test vfloat4 cgt. */
TEST(vfloat4,cgt)618 TEST(vfloat4, cgt)
619 {
620 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
621 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
622 vmask4 r = a > b;
623 EXPECT_EQ(0x1u, mask(r));
624 }
625
626 /** @brief Test vfloat4 cge. */
TEST(vfloat4,cge)627 TEST(vfloat4, cge)
628 {
629 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
630 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
631 vmask4 r = a >= b;
632 EXPECT_EQ(0x5u, mask(r));
633 }
634
635 /** @brief Test vfloat4 min. */
TEST(vfloat4,min)636 TEST(vfloat4, min)
637 {
638 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
639 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
640 vfloat4 r = min(a, b);
641 EXPECT_EQ(r.lane<0>(), 0.9f);
642 EXPECT_EQ(r.lane<1>(), 2.0f);
643 EXPECT_EQ(r.lane<2>(), 3.0f);
644 EXPECT_EQ(r.lane<3>(), 4.0f);
645
646 float c = 0.3f;
647 r = min(a, c);
648 EXPECT_EQ(r.lane<0>(), 0.3f);
649 EXPECT_EQ(r.lane<1>(), 0.3f);
650 EXPECT_EQ(r.lane<2>(), 0.3f);
651 EXPECT_EQ(r.lane<3>(), 0.3f);
652
653 float d = 1.5f;
654 r = min(a, d);
655 EXPECT_EQ(r.lane<0>(), 1.0f);
656 EXPECT_EQ(r.lane<1>(), 1.5f);
657 EXPECT_EQ(r.lane<2>(), 1.5f);
658 EXPECT_EQ(r.lane<3>(), 1.5f);
659 }
660
661 /** @brief Test vfloat4 max. */
TEST(vfloat4,max)662 TEST(vfloat4, max)
663 {
664 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
665 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
666 vfloat4 r = max(a, b);
667 EXPECT_EQ(r.lane<0>(), 1.0f);
668 EXPECT_EQ(r.lane<1>(), 2.1f);
669 EXPECT_EQ(r.lane<2>(), 3.0f);
670 EXPECT_EQ(r.lane<3>(), 4.1f);
671
672 float c = 4.3f;
673 r = max(a, c);
674 EXPECT_EQ(r.lane<0>(), 4.3f);
675 EXPECT_EQ(r.lane<1>(), 4.3f);
676 EXPECT_EQ(r.lane<2>(), 4.3f);
677 EXPECT_EQ(r.lane<3>(), 4.3f);
678
679 float d = 1.5f;
680 r = max(a, d);
681 EXPECT_EQ(r.lane<0>(), 1.5f);
682 EXPECT_EQ(r.lane<1>(), 2.0f);
683 EXPECT_EQ(r.lane<2>(), 3.0f);
684 EXPECT_EQ(r.lane<3>(), 4.0f);
685 }
686
687 /** @brief Test vfloat4 clamp. */
TEST(vfloat4,clamp)688 TEST(vfloat4, clamp)
689 {
690 vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
691 vfloat4 r1 = clamp(2.1f, 3.0f, a1);
692 EXPECT_EQ(r1.lane<0>(), 2.1f);
693 EXPECT_EQ(r1.lane<1>(), 2.1f);
694 EXPECT_EQ(r1.lane<2>(), 3.0f);
695 EXPECT_EQ(r1.lane<3>(), 3.0f);
696
697 vfloat4 a2(1.0f, 2.0f, qnan, 4.0f);
698 vfloat4 r2 = clamp(2.1f, 3.0f, a2);
699 EXPECT_EQ(r2.lane<0>(), 2.1f);
700 EXPECT_EQ(r2.lane<1>(), 2.1f);
701 EXPECT_EQ(r2.lane<2>(), 2.1f);
702 EXPECT_EQ(r2.lane<3>(), 3.0f);
703 }
704
705 /** @brief Test vfloat4 clampz. */
TEST(vfloat4,clampz)706 TEST(vfloat4, clampz)
707 {
708 vfloat4 a1(-1.0f, 0.0f, 0.1f, 4.0f);
709 vfloat4 r1 = clampz(3.0f, a1);
710 EXPECT_EQ(r1.lane<0>(), 0.0f);
711 EXPECT_EQ(r1.lane<1>(), 0.0f);
712 EXPECT_EQ(r1.lane<2>(), 0.1f);
713 EXPECT_EQ(r1.lane<3>(), 3.0f);
714
715 vfloat4 a2(-1.0f, 0.0f, qnan, 4.0f);
716 vfloat4 r2 = clampz(3.0f, a2);
717 EXPECT_EQ(r2.lane<0>(), 0.0f);
718 EXPECT_EQ(r2.lane<1>(), 0.0f);
719 EXPECT_EQ(r2.lane<2>(), 0.0f);
720 EXPECT_EQ(r2.lane<3>(), 3.0f);
721 }
722
723 /** @brief Test vfloat4 clampz. */
TEST(vfloat4,clampzo)724 TEST(vfloat4, clampzo)
725 {
726 vfloat4 a1(-1.0f, 0.0f, 0.1f, 4.0f);
727 vfloat4 r1 = clampzo(a1);
728 EXPECT_EQ(r1.lane<0>(), 0.0f);
729 EXPECT_EQ(r1.lane<1>(), 0.0f);
730 EXPECT_EQ(r1.lane<2>(), 0.1f);
731 EXPECT_EQ(r1.lane<3>(), 1.0f);
732
733 vfloat4 a2(-1.0f, 0.0f, qnan, 4.0f);
734 vfloat4 r2 = clampzo(a2);
735 EXPECT_EQ(r2.lane<0>(), 0.0f);
736 EXPECT_EQ(r2.lane<1>(), 0.0f);
737 EXPECT_EQ(r2.lane<2>(), 0.0f);
738 EXPECT_EQ(r2.lane<3>(), 1.0f);
739 }
740
741 /** @brief Test vfloat4 abs. */
TEST(vfloat4,abs)742 TEST(vfloat4, abs)
743 {
744 vfloat4 a(-1.0f, 0.0f, 0.1f, 4.0f);
745 vfloat4 r = abs(a);
746 EXPECT_EQ(r.lane<0>(), 1.0f);
747 EXPECT_EQ(r.lane<1>(), 0.0f);
748 EXPECT_EQ(r.lane<2>(), 0.1f);
749 EXPECT_EQ(r.lane<3>(), 4.0f);
750 }
751
752 /** @brief Test vfloat4 round. */
TEST(vfloat4,round)753 TEST(vfloat4, round)
754 {
755 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
756 vfloat4 r1 = round(a1);
757 EXPECT_EQ(r1.lane<0>(), 1.0f);
758 EXPECT_EQ(r1.lane<1>(), 2.0f);
759 EXPECT_EQ(r1.lane<2>(), 2.0f);
760 EXPECT_EQ(r1.lane<3>(), 4.0f);
761
762 vfloat4 a2(-2.5f, -2.5f, -3.5f, -3.5f);
763 vfloat4 r2 = round(a2);
764 EXPECT_EQ(r2.lane<0>(), -2.0f);
765 EXPECT_EQ(r2.lane<2>(), -4.0f);
766 }
767
768 /** @brief Test vfloat4 hmin. */
TEST(vfloat4,hmin)769 TEST(vfloat4, hmin)
770 {
771 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
772 vfloat4 r1 = hmin(a1);
773 EXPECT_EQ(r1.lane<0>(), 1.1f);
774 EXPECT_EQ(r1.lane<1>(), 1.1f);
775 EXPECT_EQ(r1.lane<2>(), 1.1f);
776 EXPECT_EQ(r1.lane<3>(), 1.1f);
777
778 vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
779 vfloat4 r2 = hmin(a2);
780 EXPECT_EQ(r2.lane<0>(), 0.2f);
781 EXPECT_EQ(r2.lane<1>(), 0.2f);
782 EXPECT_EQ(r2.lane<2>(), 0.2f);
783 EXPECT_EQ(r2.lane<3>(), 0.2f);
784 }
785
786 /** @brief Test vfloat4 hmin_s. */
TEST(vfloat4,hmin_s)787 TEST(vfloat4, hmin_s)
788 {
789 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
790 float r1 = hmin_s(a1);
791 EXPECT_EQ(r1, 1.1f);
792
793 vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
794 float r2 = hmin_s(a2);
795 EXPECT_EQ(r2, 0.2f);
796 }
797
798 /** @brief Test vfloat4 hmin_rgb_s. */
TEST(vfloat4,hmin_rgb_s)799 TEST(vfloat4, hmin_rgb_s)
800 {
801 vfloat4 a1(1.1f, 1.5f, 1.6f, 0.2f);
802 float r1 = hmin_rgb_s(a1);
803 EXPECT_EQ(r1, 1.1f);
804
805 vfloat4 a2(1.5f, 0.9f, 1.6f, 1.2f);
806 float r2 = hmin_rgb_s(a2);
807 EXPECT_EQ(r2, 0.9f);
808 }
809
810 /** @brief Test vfloat4 hmax. */
TEST(vfloat4,hmax)811 TEST(vfloat4, hmax)
812 {
813 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
814 vfloat4 r1 = hmax(a1);
815 EXPECT_EQ(r1.lane<0>(), 4.0f);
816 EXPECT_EQ(r1.lane<1>(), 4.0f);
817 EXPECT_EQ(r1.lane<2>(), 4.0f);
818 EXPECT_EQ(r1.lane<3>(), 4.0f);
819
820 vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
821 vfloat4 r2 = hmax(a2);
822 EXPECT_EQ(r2.lane<0>(), 1.6f);
823 EXPECT_EQ(r2.lane<1>(), 1.6f);
824 EXPECT_EQ(r2.lane<2>(), 1.6f);
825 EXPECT_EQ(r2.lane<3>(), 1.6f);
826 }
827
828 /** @brief Test vfloat4 hmax_s. */
TEST(vfloat4,hmax_s)829 TEST(vfloat4, hmax_s)
830 {
831 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
832 float r1 = hmax_s(a1);
833 EXPECT_EQ(r1, 4.0f);
834
835 vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
836 float r2 = hmax_s(a2);
837 EXPECT_EQ(r2, 1.6f);
838 }
839
840 /** @brief Test vfloat4 hadd_s. */
TEST(vfloat4,hadd_s)841 TEST(vfloat4, hadd_s)
842 {
843 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
844 float sum = 1.1f + 1.5f + 1.6f + 4.0f;
845 float r = hadd_s(a1);
846 EXPECT_NEAR(r, sum, 0.005f);
847 }
848
849 /** @brief Test vfloat4 hadd_rgb_s. */
TEST(vfloat4,hadd_rgb_s)850 TEST(vfloat4, hadd_rgb_s)
851 {
852 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
853 float sum = 1.1f + 1.5f + 1.6f;
854 float r = hadd_rgb_s(a1);
855 EXPECT_NEAR(r, sum, 0.005f);
856 }
857
858 /** @brief Test vfloat4 sqrt. */
TEST(vfloat4,sqrt)859 TEST(vfloat4, sqrt)
860 {
861 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
862 vfloat4 r = sqrt(a);
863 EXPECT_EQ(r.lane<0>(), std::sqrt(1.0f));
864 EXPECT_EQ(r.lane<1>(), std::sqrt(2.0f));
865 EXPECT_EQ(r.lane<2>(), std::sqrt(3.0f));
866 EXPECT_EQ(r.lane<3>(), std::sqrt(4.0f));
867 }
868
869 /** @brief Test vfloat4 select. */
TEST(vfloat4,select)870 TEST(vfloat4, select)
871 {
872 vfloat4 m1(1.0f, 1.0f, 1.0f, 1.0f);
873 vfloat4 m2(1.0f, 2.0f, 1.0f, 2.0f);
874 vmask4 cond = m1 == m2;
875
876 vfloat4 a(1.0f, 3.0f, 3.0f, 1.0f);
877 vfloat4 b(4.0f, 2.0f, 2.0f, 4.0f);
878
879 // Select in one direction
880 vfloat4 r1 = select(a, b, cond);
881 EXPECT_EQ(r1.lane<0>(), 4.0f);
882 EXPECT_EQ(r1.lane<1>(), 3.0f);
883 EXPECT_EQ(r1.lane<2>(), 2.0f);
884 EXPECT_EQ(r1.lane<3>(), 1.0f);
885
886 // Select in the other
887 vfloat4 r2 = select(b, a, cond);
888 EXPECT_EQ(r2.lane<0>(), 1.0f);
889 EXPECT_EQ(r2.lane<1>(), 2.0f);
890 EXPECT_EQ(r2.lane<2>(), 3.0f);
891 EXPECT_EQ(r2.lane<3>(), 4.0f);
892 }
893
894 /** @brief Test vfloat4 select MSB only. */
TEST(vfloat4,select_msb)895 TEST(vfloat4, select_msb)
896 {
897 int msb_set = static_cast<int>(0x80000000);
898 vint4 msb(msb_set, 0, msb_set, 0);
899 vmask4 cond(msb.m);
900
901 vfloat4 a(1.0f, 3.0f, 3.0f, 1.0f);
902 vfloat4 b(4.0f, 2.0f, 2.0f, 4.0f);
903
904 // Select in one direction
905 vfloat4 r1 = select_msb(a, b, cond);
906 EXPECT_EQ(r1.lane<0>(), 4.0f);
907 EXPECT_EQ(r1.lane<1>(), 3.0f);
908 EXPECT_EQ(r1.lane<2>(), 2.0f);
909 EXPECT_EQ(r1.lane<3>(), 1.0f);
910
911 // Select in the other
912 vfloat4 r2 = select_msb(b, a, cond);
913 EXPECT_EQ(r2.lane<0>(), 1.0f);
914 EXPECT_EQ(r2.lane<1>(), 2.0f);
915 EXPECT_EQ(r2.lane<2>(), 3.0f);
916 EXPECT_EQ(r2.lane<3>(), 4.0f);
917 }
918
919 /** @brief Test vfloat4 gatherf. */
TEST(vfloat4,gatherf)920 TEST(vfloat4, gatherf)
921 {
922 vint4 indices(0, 4, 3, 2);
923 vfloat4 r = gatherf(f32_data, indices);
924 EXPECT_EQ(r.lane<0>(), 0.0f);
925 EXPECT_EQ(r.lane<1>(), 4.0f);
926 EXPECT_EQ(r.lane<2>(), 3.0f);
927 EXPECT_EQ(r.lane<3>(), 2.0f);
928 }
929
930 /** @brief Test vfloat4 storea. */
TEST(vfloat4,storea)931 TEST(vfloat4, storea)
932 {
933 alignas(16) float out[4];
934 vfloat4 a(f32_data);
935 storea(a, out);
936 EXPECT_EQ(out[0], 0.0f);
937 EXPECT_EQ(out[1], 1.0f);
938 EXPECT_EQ(out[2], 2.0f);
939 EXPECT_EQ(out[3], 3.0f);
940 }
941
942 /** @brief Test vfloat4 store. */
TEST(vfloat4,store)943 TEST(vfloat4, store)
944 {
945 alignas(16) float out[5];
946 vfloat4 a(f32_data);
947 store(a, &(out[1]));
948 EXPECT_EQ(out[1], 0.0f);
949 EXPECT_EQ(out[2], 1.0f);
950 EXPECT_EQ(out[3], 2.0f);
951 EXPECT_EQ(out[4], 3.0f);
952 }
953
954 /** @brief Test vfloat4 dot. */
TEST(vfloat4,dot)955 TEST(vfloat4, dot)
956 {
957 vfloat4 a1(1.0f, 2.0f, 4.0f, 8.0f);
958 vfloat4 b1(1.0f, 0.5f, 0.25f, 0.125f);
959 vfloat4 r1 = dot(a1, b1);
960 EXPECT_EQ(r1.lane<0>(), 4.0f);
961 EXPECT_EQ(r1.lane<1>(), 4.0f);
962 EXPECT_EQ(r1.lane<2>(), 4.0f);
963 EXPECT_EQ(r1.lane<3>(), 4.0f);
964
965 // These values will fail to add to the same value if reassociated
966 float l0 = 141.2540435791015625f;
967 float l1 = 5345345.5000000000000000f;
968 float l2 = 234234.7031250000000000f;
969 float l3 = 124353454080.0000000000000000f;
970
971 vfloat4 a2(1.0f, 1.0f, 1.0f, 1.0f);
972 vfloat4 b2(l0, l1, l2, l3);
973 vfloat4 r2 = dot(a2, b2);
974
975 // Test that reassociation causes a failure with the numbers we chose
976 EXPECT_FALSE(any(r2 == vfloat4(l0 + l1 + l2 + l3)));
977
978 // Test that the sum works, for the association pattern we want used
979 EXPECT_TRUE(all(r2 == vfloat4((l0 + l2) + (l1 + l3))));
980 }
981
982 /** @brief Test vfloat4 dot_s. */
TEST(vfloat4,dot_s)983 TEST(vfloat4, dot_s)
984 {
985 vfloat4 a1(1.0f, 2.0f, 4.0f, 8.0f);
986 vfloat4 b1(1.0f, 0.5f, 0.25f, 0.125f);
987 float r1 = dot_s(a1, b1);
988 EXPECT_EQ(r1, 4.0f);
989
990 // These values will fail to add to the same value if reassociated
991 float l0 = 141.2540435791015625f;
992 float l1 = 5345345.5000000000000000f;
993 float l2 = 234234.7031250000000000f;
994 float l3 = 124353454080.0000000000000000f;
995
996 vfloat4 a2(1.0f, 1.0f, 1.0f, 1.0f);
997 vfloat4 b2(l0, l1, l2, l3);
998 float r2 = dot_s(a2, b2);
999
1000 // Test that reassociation causes a failure with the numbers we chose
1001 EXPECT_NE(r2, l0 + l1 + l2 + l3);
1002
1003 // Test that the sum works, for the association pattern we want used
1004 EXPECT_EQ(r2, (l0 + l2) + (l1 + l3));
1005 }
1006
1007 /** @brief Test vfloat4 dot3. */
TEST(vfloat4,dot3)1008 TEST(vfloat4, dot3)
1009 {
1010 vfloat4 a(1.0f, 2.0f, 4.0f, 8.0f);
1011 vfloat4 b(1.0f, 0.5f, 0.25f, 0.125f);
1012 vfloat4 r = dot3(a, b);
1013 EXPECT_EQ(r.lane<0>(), 3.0f);
1014 EXPECT_EQ(r.lane<1>(), 3.0f);
1015 EXPECT_EQ(r.lane<2>(), 3.0f);
1016 EXPECT_EQ(r.lane<3>(), 0.0f);
1017 }
1018
1019 /** @brief Test vfloat4 dot3_s. */
TEST(vfloat4,dot3_s)1020 TEST(vfloat4, dot3_s)
1021 {
1022 vfloat4 a(1.0f, 2.0f, 4.0f, 8.0f);
1023 vfloat4 b(1.0f, 0.5f, 0.25f, 0.125f);
1024 float r = dot3_s(a, b);
1025 EXPECT_EQ(r, 3.0f);
1026 }
1027
1028 /** @brief Test vfloat4 normalize. */
TEST(vfloat4,normalize)1029 TEST(vfloat4, normalize)
1030 {
1031 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
1032 vfloat4 r = normalize(a);
1033 EXPECT_NEAR(r.lane<0>(), 1.0f / astc::sqrt(30.0f), 0.0005f);
1034 EXPECT_NEAR(r.lane<1>(), 2.0f / astc::sqrt(30.0f), 0.0005f);
1035 EXPECT_NEAR(r.lane<2>(), 3.0f / astc::sqrt(30.0f), 0.0005f);
1036 EXPECT_NEAR(r.lane<3>(), 4.0f / astc::sqrt(30.0f), 0.0005f);
1037 }
1038
1039 /** @brief Test vfloat4 normalize_safe. */
TEST(vfloat4,normalize_safe)1040 TEST(vfloat4, normalize_safe)
1041 {
1042 vfloat4 s(-1.0f, -1.0f, -1.0f, -1.0f);
1043
1044 vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
1045 vfloat4 r1 = normalize_safe(a1, s);
1046 EXPECT_NEAR(r1.lane<0>(), 1.0f / astc::sqrt(30.0f), 0.0005f);
1047 EXPECT_NEAR(r1.lane<1>(), 2.0f / astc::sqrt(30.0f), 0.0005f);
1048 EXPECT_NEAR(r1.lane<2>(), 3.0f / astc::sqrt(30.0f), 0.0005f);
1049 EXPECT_NEAR(r1.lane<3>(), 4.0f / astc::sqrt(30.0f), 0.0005f);
1050
1051 vfloat4 a2(0.0f, 0.0f, 0.0f, 0.0f);
1052 vfloat4 r2 = normalize_safe(a2, s);
1053 EXPECT_EQ(r2.lane<0>(), -1.0f);
1054 EXPECT_EQ(r2.lane<1>(), -1.0f);
1055 EXPECT_EQ(r2.lane<2>(), -1.0f);
1056 EXPECT_EQ(r2.lane<3>(), -1.0f);
1057 }
1058
1059 /** @brief Test vfloat4 float_to_int. */
TEST(vfloat4,float_to_int)1060 TEST(vfloat4, float_to_int)
1061 {
1062 vfloat4 a(1.1f, 1.5f, -1.6f, 4.0f);
1063 vint4 r = float_to_int(a);
1064 EXPECT_EQ(r.lane<0>(), 1);
1065 EXPECT_EQ(r.lane<1>(), 1);
1066 EXPECT_EQ(r.lane<2>(), -1);
1067 EXPECT_EQ(r.lane<3>(), 4);
1068 }
1069
1070 /** @brief Test vfloat4 round. */
TEST(vfloat4,float_to_int_rtn)1071 TEST(vfloat4, float_to_int_rtn)
1072 {
1073 vfloat4 a(1.1f, 1.5f, 1.6f, 4.0f);
1074 vint4 r = float_to_int_rtn(a);
1075 EXPECT_EQ(r.lane<0>(), 1);
1076 EXPECT_EQ(r.lane<1>(), 2);
1077 EXPECT_EQ(r.lane<2>(), 2);
1078 EXPECT_EQ(r.lane<3>(), 4);
1079 }
1080
1081 /** @brief Test vfloat4 round. */
TEST(vfloat4,int_to_float)1082 TEST(vfloat4, int_to_float)
1083 {
1084 vint4 a(1, 2, 3, 4);
1085 vfloat4 r = int_to_float(a);
1086 EXPECT_EQ(r.lane<0>(), 1.0f);
1087 EXPECT_EQ(r.lane<1>(), 2.0f);
1088 EXPECT_EQ(r.lane<2>(), 3.0f);
1089 EXPECT_EQ(r.lane<3>(), 4.0f);
1090 }
1091
1092 /** @brief Test vfloat4 float to fp16 conversion. */
TEST(vfloat4,float_to_float16)1093 TEST(vfloat4, float_to_float16)
1094 {
1095 vfloat4 a(1.5, 234.5, 345345.0, qnan);
1096 vint4 r = float_to_float16(a);
1097
1098 // Normal numbers
1099 EXPECT_EQ(r.lane<0>(), 0x3E00);
1100 EXPECT_EQ(r.lane<1>(), 0x5B54);
1101
1102 // Large numbers convert to infinity
1103 EXPECT_EQ(r.lane<2>(), 0x7C00);
1104
1105 // NaN must convert to any valid NaN encoding
1106 EXPECT_EQ((r.lane<3>() >> 10) & 0x1F, 0x1F); // Exponent must be all 1s
1107 EXPECT_NE(r.lane<3>() & (0x3FF), 0); // Mantissa must be non-zero
1108 }
1109
1110 /** @brief Test float to fp16 conversion. */
TEST(sfloat,float_to_float16)1111 TEST(sfloat, float_to_float16)
1112 {
1113 int r = float_to_float16(234.5);
1114 EXPECT_EQ(r, 0x5B54);
1115 }
1116
1117 /** @brief Test vfloat4 fp16 to float conversion. */
TEST(vfloat4,float16_to_float)1118 TEST(vfloat4, float16_to_float)
1119 { vint4 a(0x3E00, 0x5B54, 0x7C00, 0xFFFF);
1120 vfloat4 r = float16_to_float(a);
1121
1122 // Normal numbers
1123 EXPECT_EQ(r.lane<0>(), 1.5);
1124 EXPECT_EQ(r.lane<1>(), 234.5);
1125
1126 // Infinities must be preserved
1127 EXPECT_NE(std::isinf(r.lane<2>()), 0);
1128
1129 // NaNs must be preserved
1130 EXPECT_NE(std::isnan(r.lane<3>()), 0);
1131 }
1132
1133 /** @brief Test fp16 to float conversion. */
TEST(sfloat,float16_to_float)1134 TEST(sfloat, float16_to_float)
1135 {
1136 float r = float16_to_float(0x5B54);
1137 EXPECT_EQ(r, 234.5);
1138 }
1139
1140 // VINT4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1141
1142 /** @brief Test unaligned vint4 data load. */
TEST(vint4,UnalignedLoad)1143 TEST(vint4, UnalignedLoad)
1144 {
1145 vint4 a(&(s32_data[1]));
1146 EXPECT_EQ(a.lane<0>(), 1);
1147 EXPECT_EQ(a.lane<1>(), 2);
1148 EXPECT_EQ(a.lane<2>(), 3);
1149 EXPECT_EQ(a.lane<3>(), 4);
1150 }
1151
1152 /** @brief Test unaligned vint4 data load. */
TEST(vint4,UnalignedLoad8)1153 TEST(vint4, UnalignedLoad8)
1154 {
1155 vint4 a(&(u8_data[1]));
1156 EXPECT_EQ(a.lane<0>(), 1);
1157 EXPECT_EQ(a.lane<1>(), 2);
1158 EXPECT_EQ(a.lane<2>(), 3);
1159 EXPECT_EQ(a.lane<3>(), 4);
1160 }
1161
1162 /** @brief Test scalar duplicated vint4 load. */
TEST(vint4,ScalarDupLoad)1163 TEST(vint4, ScalarDupLoad)
1164 {
1165 vint4 a(42);
1166 EXPECT_EQ(a.lane<0>(), 42);
1167 EXPECT_EQ(a.lane<1>(), 42);
1168 EXPECT_EQ(a.lane<2>(), 42);
1169 EXPECT_EQ(a.lane<3>(), 42);
1170 }
1171
1172 /** @brief Test scalar vint4 load. */
TEST(vint4,ScalarLoad)1173 TEST(vint4, ScalarLoad)
1174 {
1175 vint4 a(11, 22, 33, 44);
1176 EXPECT_EQ(a.lane<0>(), 11);
1177 EXPECT_EQ(a.lane<1>(), 22);
1178 EXPECT_EQ(a.lane<2>(), 33);
1179 EXPECT_EQ(a.lane<3>(), 44);
1180 }
1181
1182 /** @brief Test copy vint4 load. */
TEST(vint4,CopyLoad)1183 TEST(vint4, CopyLoad)
1184 {
1185 vint4 s(11, 22, 33, 44);
1186 vint4 a(s.m);
1187 EXPECT_EQ(a.lane<0>(), 11);
1188 EXPECT_EQ(a.lane<1>(), 22);
1189 EXPECT_EQ(a.lane<2>(), 33);
1190 EXPECT_EQ(a.lane<3>(), 44);
1191 }
1192
1193 /** @brief Test vint4 scalar lane set. */
TEST(int4,SetLane)1194 TEST(int4, SetLane)
1195 {
1196 vint4 a(0);
1197
1198 a.set_lane<0>(1);
1199 EXPECT_EQ(a.lane<0>(), 1);
1200 EXPECT_EQ(a.lane<1>(), 0);
1201 EXPECT_EQ(a.lane<2>(), 0);
1202 EXPECT_EQ(a.lane<3>(), 0);
1203
1204 a.set_lane<1>(2);
1205 EXPECT_EQ(a.lane<0>(), 1);
1206 EXPECT_EQ(a.lane<1>(), 2);
1207 EXPECT_EQ(a.lane<2>(), 0);
1208 EXPECT_EQ(a.lane<3>(), 0);
1209
1210 a.set_lane<2>(3);
1211 EXPECT_EQ(a.lane<0>(), 1);
1212 EXPECT_EQ(a.lane<1>(), 2);
1213 EXPECT_EQ(a.lane<2>(), 3);
1214 EXPECT_EQ(a.lane<3>(), 0);
1215
1216 a.set_lane<3>(4);
1217 EXPECT_EQ(a.lane<0>(), 1);
1218 EXPECT_EQ(a.lane<1>(), 2);
1219 EXPECT_EQ(a.lane<2>(), 3);
1220 EXPECT_EQ(a.lane<3>(), 4);
1221 }
1222
1223 /** @brief Test vint4 zero. */
TEST(vint4,Zero)1224 TEST(vint4, Zero)
1225 {
1226 vint4 a = vint4::zero();
1227 EXPECT_EQ(a.lane<0>(), 0);
1228 EXPECT_EQ(a.lane<1>(), 0);
1229 EXPECT_EQ(a.lane<2>(), 0);
1230 EXPECT_EQ(a.lane<3>(), 0);
1231 }
1232
1233 /** @brief Test vint4 load1. */
TEST(vint4,Load1)1234 TEST(vint4, Load1)
1235 {
1236 int s = 42;
1237 vint4 a = vint4::load1(&s);
1238 EXPECT_EQ(a.lane<0>(), 42);
1239 EXPECT_EQ(a.lane<1>(), 42);
1240 EXPECT_EQ(a.lane<2>(), 42);
1241 EXPECT_EQ(a.lane<3>(), 42);
1242 }
1243
1244 /** @brief Test vint4 loada. */
TEST(vint4,Loada)1245 TEST(vint4, Loada)
1246 {
1247 vint4 a = vint4::loada(&(s32_data[0]));
1248 EXPECT_EQ(a.lane<0>(), 0);
1249 EXPECT_EQ(a.lane<1>(), 1);
1250 EXPECT_EQ(a.lane<2>(), 2);
1251 EXPECT_EQ(a.lane<3>(), 3);
1252 }
1253
1254 /** @brief Test vint4 lane_id. */
TEST(vint4,LaneID)1255 TEST(vint4, LaneID)
1256 {
1257 vint4 a = vint4::lane_id();
1258 EXPECT_EQ(a.lane<0>(), 0);
1259 EXPECT_EQ(a.lane<1>(), 1);
1260 EXPECT_EQ(a.lane<2>(), 2);
1261 EXPECT_EQ(a.lane<3>(), 3);
1262 }
1263
1264 /** @brief Test vint4 add. */
TEST(vint4,vadd)1265 TEST(vint4, vadd)
1266 {
1267 vint4 a(1, 2, 3, 4);
1268 vint4 b(2, 3, 4, 5);
1269 a = a + b;
1270 EXPECT_EQ(a.lane<0>(), 1 + 2);
1271 EXPECT_EQ(a.lane<1>(), 2 + 3);
1272 EXPECT_EQ(a.lane<2>(), 3 + 4);
1273 EXPECT_EQ(a.lane<3>(), 4 + 5);
1274 }
1275
1276 /** @brief Test vint4 self-add. */
TEST(vint4,vselfadd)1277 TEST(vint4, vselfadd)
1278 {
1279 vint4 a(1, 2, 3, 4);
1280 vint4 b(2, 3, 4, 5);
1281 a += b;
1282
1283 EXPECT_EQ(a.lane<0>(), 1 + 2);
1284 EXPECT_EQ(a.lane<1>(), 2 + 3);
1285 EXPECT_EQ(a.lane<2>(), 3 + 4);
1286 EXPECT_EQ(a.lane<3>(), 4 + 5);
1287 }
1288
1289 /** @brief Test vint4 add. */
TEST(vint4,vsadd)1290 TEST(vint4, vsadd)
1291 {
1292 vint4 a(1, 2, 3, 4);
1293 int b = 5;
1294 a = a + b;
1295 EXPECT_EQ(a.lane<0>(), 1 + 5);
1296 EXPECT_EQ(a.lane<1>(), 2 + 5);
1297 EXPECT_EQ(a.lane<2>(), 3 + 5);
1298 EXPECT_EQ(a.lane<3>(), 4 + 5);
1299 }
1300
1301 /** @brief Test vint4 sub. */
TEST(vint4,vsub)1302 TEST(vint4, vsub)
1303 {
1304 vint4 a(1, 2, 4, 4);
1305 vint4 b(2, 3, 3, 5);
1306 a = a - b;
1307 EXPECT_EQ(a.lane<0>(), 1 - 2);
1308 EXPECT_EQ(a.lane<1>(), 2 - 3);
1309 EXPECT_EQ(a.lane<2>(), 4 - 3);
1310 EXPECT_EQ(a.lane<3>(), 4 - 5);
1311 }
1312
1313 /** @brief Test vint4 sub. */
TEST(vint4,vssub)1314 TEST(vint4, vssub)
1315 {
1316 vint4 a(1, 2, 4, 4);
1317 int b = 5;
1318 a = a - b;
1319 EXPECT_EQ(a.lane<0>(), 1 - 5);
1320 EXPECT_EQ(a.lane<1>(), 2 - 5);
1321 EXPECT_EQ(a.lane<2>(), 4 - 5);
1322 EXPECT_EQ(a.lane<3>(), 4 - 5);
1323 }
1324
1325 /** @brief Test vint4 mul. */
TEST(vint4,vmul)1326 TEST(vint4, vmul)
1327 {
1328 vint4 a(1, 2, 4, 4);
1329 vint4 b(2, 3, 3, 5);
1330 a = a * b;
1331 EXPECT_EQ(a.lane<0>(), 1 * 2);
1332 EXPECT_EQ(a.lane<1>(), 2 * 3);
1333 EXPECT_EQ(a.lane<2>(), 4 * 3);
1334 EXPECT_EQ(a.lane<3>(), 4 * 5);
1335 }
1336
1337 /** @brief Test vint4 mul. */
TEST(vint4,vsmul)1338 TEST(vint4, vsmul)
1339 {
1340 vint4 a(1, 2, 4, 4);
1341 a = a * 3;
1342 EXPECT_EQ(a.lane<0>(), 1 * 3);
1343 EXPECT_EQ(a.lane<1>(), 2 * 3);
1344 EXPECT_EQ(a.lane<2>(), 4 * 3);
1345 EXPECT_EQ(a.lane<3>(), 4 * 3);
1346
1347 vint4 b(1, 2, -4, 4);
1348 b = b * -3;
1349 EXPECT_EQ(b.lane<0>(), 1 * -3);
1350 EXPECT_EQ(b.lane<1>(), 2 * -3);
1351 EXPECT_EQ(b.lane<2>(), -4 * -3);
1352 EXPECT_EQ(b.lane<3>(), 4 * -3);
1353 }
1354
1355 /** @brief Test vint4 bitwise invert. */
TEST(vint4,bit_invert)1356 TEST(vint4, bit_invert)
1357 {
1358 vint4 a(-1, 0, 1, 2);
1359 a = ~a;
1360 EXPECT_EQ(a.lane<0>(), ~-1);
1361 EXPECT_EQ(a.lane<1>(), ~0);
1362 EXPECT_EQ(a.lane<2>(), ~1);
1363 EXPECT_EQ(a.lane<3>(), ~2);
1364 }
1365
1366 /** @brief Test vint4 bitwise or. */
TEST(vint4,bit_vor)1367 TEST(vint4, bit_vor)
1368 {
1369 vint4 a(1, 2, 3, 4);
1370 vint4 b(2, 3, 4, 5);
1371 a = a | b;
1372 EXPECT_EQ(a.lane<0>(), 3);
1373 EXPECT_EQ(a.lane<1>(), 3);
1374 EXPECT_EQ(a.lane<2>(), 7);
1375 EXPECT_EQ(a.lane<3>(), 5);
1376 }
1377
TEST(vint4,bit_vsor)1378 TEST(vint4, bit_vsor)
1379 {
1380 vint4 a(1, 2, 3, 4);
1381 int b = 2;
1382 a = a | b;
1383 EXPECT_EQ(a.lane<0>(), 3);
1384 EXPECT_EQ(a.lane<1>(), 2);
1385 EXPECT_EQ(a.lane<2>(), 3);
1386 EXPECT_EQ(a.lane<3>(), 6);
1387 }
1388
1389 /** @brief Test vint4 bitwise and. */
TEST(vint4,bit_vand)1390 TEST(vint4, bit_vand)
1391 {
1392 vint4 a(1, 2, 3, 4);
1393 vint4 b(2, 3, 4, 5);
1394 a = a & b;
1395 EXPECT_EQ(a.lane<0>(), 0);
1396 EXPECT_EQ(a.lane<1>(), 2);
1397 EXPECT_EQ(a.lane<2>(), 0);
1398 EXPECT_EQ(a.lane<3>(), 4);
1399 }
1400
1401 /** @brief Test vint4 bitwise and. */
TEST(vint4,bit_vsand)1402 TEST(vint4, bit_vsand)
1403 {
1404 vint4 a(1, 2, 3, 4);
1405 int b = 2;
1406 a = a & b;
1407 EXPECT_EQ(a.lane<0>(), 0);
1408 EXPECT_EQ(a.lane<1>(), 2);
1409 EXPECT_EQ(a.lane<2>(), 2);
1410 EXPECT_EQ(a.lane<3>(), 0);
1411 }
1412
1413 /** @brief Test vint4 bitwise xor. */
TEST(vint4,bit_vxor)1414 TEST(vint4, bit_vxor)
1415 {
1416 vint4 a(1, 2, 3, 4);
1417 vint4 b(2, 3, 4, 5);
1418 a = a ^ b;
1419 EXPECT_EQ(a.lane<0>(), 3);
1420 EXPECT_EQ(a.lane<1>(), 1);
1421 EXPECT_EQ(a.lane<2>(), 7);
1422 EXPECT_EQ(a.lane<3>(), 1);
1423 }
1424
1425 /** @brief Test vint4 bitwise xor. */
TEST(vint4,bit_vsxor)1426 TEST(vint4, bit_vsxor)
1427 {
1428 vint4 a(1, 2, 3, 4);
1429 int b = 2;
1430 a = a ^ b;
1431 EXPECT_EQ(a.lane<0>(), 3);
1432 EXPECT_EQ(a.lane<1>(), 0);
1433 EXPECT_EQ(a.lane<2>(), 1);
1434 EXPECT_EQ(a.lane<3>(), 6);
1435 }
1436
1437 /** @brief Test vint4 ceq. */
TEST(vint4,ceq)1438 TEST(vint4, ceq)
1439 {
1440 vint4 a1(1, 2, 3, 4);
1441 vint4 b1(0, 1, 2, 3);
1442 vmask4 r1 = a1 == b1;
1443 EXPECT_EQ(0u, mask(r1));
1444 EXPECT_EQ(false, any(r1));
1445 EXPECT_EQ(false, all(r1));
1446
1447 vint4 a2(1, 2, 3, 4);
1448 vint4 b2(1, 0, 0, 0);
1449 vmask4 r2 = a2 == b2;
1450 EXPECT_EQ(0x1u, mask(r2));
1451 EXPECT_EQ(true, any(r2));
1452 EXPECT_EQ(false, all(r2));
1453
1454 vint4 a3(1, 2, 3, 4);
1455 vint4 b3(1, 0, 3, 0);
1456 vmask4 r3 = a3 == b3;
1457 EXPECT_EQ(0x5u, mask(r3));
1458 EXPECT_EQ(true, any(r3));
1459 EXPECT_EQ(false, all(r3));
1460
1461 vint4 a4(1, 2, 3, 4);
1462 vmask4 r4 = a4 == a4;
1463 EXPECT_EQ(0xFu, mask(r4));
1464 EXPECT_EQ(true, any(r4));
1465 EXPECT_EQ(true, all(r4));
1466 }
1467
1468 /** @brief Test vint4 cne. */
TEST(vint4,cne)1469 TEST(vint4, cne)
1470 {
1471 vint4 a1(1, 2, 3, 4);
1472 vint4 b1(0, 1, 2, 3);
1473 vmask4 r1 = a1 != b1;
1474 EXPECT_EQ(0xFu, mask(r1));
1475 EXPECT_EQ(true, any(r1));
1476 EXPECT_EQ(true, all(r1));
1477
1478 vint4 a2(1, 2, 3, 4);
1479 vint4 b2(1, 0, 0, 0);
1480 vmask4 r2 = a2 != b2;
1481 EXPECT_EQ(0xEu, mask(r2));
1482 EXPECT_EQ(true, any(r2));
1483 EXPECT_EQ(false, all(r2));
1484
1485 vint4 a3(1, 2, 3, 4);
1486 vint4 b3(1, 0, 3, 0);
1487 vmask4 r3 = a3 != b3;
1488 EXPECT_EQ(0xAu, mask(r3));
1489 EXPECT_EQ(true, any(r3));
1490 EXPECT_EQ(false, all(r3));
1491
1492 vint4 a4(1, 2, 3, 4);
1493 vmask4 r4 = a4 != a4;
1494 EXPECT_EQ(0u, mask(r4));
1495 EXPECT_EQ(false, any(r4));
1496 EXPECT_EQ(false, all(r4));
1497 }
1498
1499 /** @brief Test vint4 clt. */
TEST(vint4,clt)1500 TEST(vint4, clt)
1501 {
1502 vint4 a(1, 2, 3, 4);
1503 vint4 b(0, 3, 3, 5);
1504 vmask4 r = a < b;
1505 EXPECT_EQ(0xAu, mask(r));
1506 }
1507
1508 /** @brief Test vint4 cgt. */
TEST(vint4,cle)1509 TEST(vint4, cle)
1510 {
1511 vint4 a(1, 2, 3, 4);
1512 vint4 b(0, 3, 3, 5);
1513 vmask4 r = a > b;
1514 EXPECT_EQ(0x1u, mask(r));
1515 }
1516
1517 /** @brief Test vint4 lsl. */
TEST(vint4,lsl)1518 TEST(vint4, lsl)
1519 {
1520 vint4 a(1, 2, 4, 4);
1521 a = lsl<0>(a);
1522 EXPECT_EQ(a.lane<0>(), 1);
1523 EXPECT_EQ(a.lane<1>(), 2);
1524 EXPECT_EQ(a.lane<2>(), 4);
1525 EXPECT_EQ(a.lane<3>(), 4);
1526
1527 a = lsl<1>(a);
1528 EXPECT_EQ(a.lane<0>(), 2);
1529 EXPECT_EQ(a.lane<1>(), 4);
1530 EXPECT_EQ(a.lane<2>(), 8);
1531 EXPECT_EQ(a.lane<3>(), 8);
1532
1533 a = lsl<2>(a);
1534 EXPECT_EQ(a.lane<0>(), 8);
1535 EXPECT_EQ(a.lane<1>(), 16);
1536 EXPECT_EQ(a.lane<2>(), 32);
1537 EXPECT_EQ(a.lane<3>(), 32);
1538 }
1539
1540 /** @brief Test vint4 lsr. */
TEST(vint4,lsr)1541 TEST(vint4, lsr)
1542 {
1543 vint4 a(1, 2, 4, -4);
1544 a = lsr<0>(a);
1545 EXPECT_EQ(a.lane<0>(), 1);
1546 EXPECT_EQ(a.lane<1>(), 2);
1547 EXPECT_EQ(a.lane<2>(), 4);
1548 EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFFC));
1549
1550 a = lsr<1>(a);
1551 EXPECT_EQ(a.lane<0>(), 0);
1552 EXPECT_EQ(a.lane<1>(), 1);
1553 EXPECT_EQ(a.lane<2>(), 2);
1554 EXPECT_EQ(a.lane<3>(), 0x7FFFFFFE);
1555
1556 a = lsr<2>(a);
1557 EXPECT_EQ(a.lane<0>(), 0);
1558 EXPECT_EQ(a.lane<1>(), 0);
1559 EXPECT_EQ(a.lane<2>(), 0);
1560 EXPECT_EQ(a.lane<3>(), 0x1FFFFFFF);
1561 }
1562
1563 /** @brief Test vint4 asr. */
TEST(vint4,asr)1564 TEST(vint4, asr)
1565 {
1566 vint4 a(1, 2, 4, -4);
1567 a = asr<0>(a);
1568 EXPECT_EQ(a.lane<0>(), 1);
1569 EXPECT_EQ(a.lane<1>(), 2);
1570 EXPECT_EQ(a.lane<2>(), 4);
1571 EXPECT_EQ(a.lane<3>(), -4);
1572
1573 a = asr<1>(a);
1574 EXPECT_EQ(a.lane<0>(), 0);
1575 EXPECT_EQ(a.lane<1>(), 1);
1576 EXPECT_EQ(a.lane<2>(), 2);
1577 EXPECT_EQ(a.lane<3>(), -2);
1578
1579 // Note - quirk of asr is that you will get "stuck" at -1
1580 a = asr<2>(a);
1581 EXPECT_EQ(a.lane<0>(), 0);
1582 EXPECT_EQ(a.lane<1>(), 0);
1583 EXPECT_EQ(a.lane<2>(), 0);
1584 EXPECT_EQ(a.lane<3>(), -1);
1585 }
1586
1587 /** @brief Test vint4 min. */
TEST(vint4,min)1588 TEST(vint4, min)
1589 {
1590 vint4 a(1, 2, 3, 4);
1591 vint4 b(0, 3, 3, 5);
1592 vint4 r = min(a, b);
1593 EXPECT_EQ(r.lane<0>(), 0);
1594 EXPECT_EQ(r.lane<1>(), 2);
1595 EXPECT_EQ(r.lane<2>(), 3);
1596 EXPECT_EQ(r.lane<3>(), 4);
1597 }
1598
1599 /** @brief Test vint4 max. */
TEST(vint4,max)1600 TEST(vint4, max)
1601 {
1602 vint4 a(1, 2, 3, 4);
1603 vint4 b(0, 3, 3, 5);
1604 vint4 r = max(a, b);
1605 EXPECT_EQ(r.lane<0>(), 1);
1606 EXPECT_EQ(r.lane<1>(), 3);
1607 EXPECT_EQ(r.lane<2>(), 3);
1608 EXPECT_EQ(r.lane<3>(), 5);
1609 }
1610
1611 /** @brief Test vint4 clamp. */
TEST(vint4,clamp)1612 TEST(vint4, clamp)
1613 {
1614 vint4 a(1, 2, 3, 4);
1615 vint4 r = clamp(2, 3, a);
1616 EXPECT_EQ(r.lane<0>(), 2);
1617 EXPECT_EQ(r.lane<1>(), 2);
1618 EXPECT_EQ(r.lane<2>(), 3);
1619 EXPECT_EQ(r.lane<3>(), 3);
1620 }
1621
1622 /** @brief Test vint4 hmin. */
TEST(vint4,hmin)1623 TEST(vint4, hmin)
1624 {
1625 vint4 a1(1, 2, 1, 2);
1626 vint4 r1 = hmin(a1);
1627 EXPECT_EQ(r1.lane<0>(), 1);
1628 EXPECT_EQ(r1.lane<1>(), 1);
1629 EXPECT_EQ(r1.lane<2>(), 1);
1630 EXPECT_EQ(r1.lane<3>(), 1);
1631
1632 vint4 a2(1, 2, -1, 5);
1633 vint4 r2 = hmin(a2);
1634 EXPECT_EQ(r2.lane<0>(), -1);
1635 EXPECT_EQ(r2.lane<1>(), -1);
1636 EXPECT_EQ(r2.lane<2>(), -1);
1637 EXPECT_EQ(r2.lane<3>(), -1);
1638 }
1639
1640 /** @brief Test vint4 hmax. */
TEST(vint4,hmax)1641 TEST(vint4, hmax)
1642 {
1643 vint4 a1(1, 3, 1, 2);
1644 vint4 r1 = hmax(a1);
1645 EXPECT_EQ(r1.lane<0>(), 3);
1646 EXPECT_EQ(r1.lane<1>(), 3);
1647 EXPECT_EQ(r1.lane<2>(), 3);
1648 EXPECT_EQ(r1.lane<3>(), 3);
1649
1650 vint4 a2(1, 2, -1, 5);
1651 vint4 r2 = hmax(a2);
1652 EXPECT_EQ(r2.lane<0>(), 5);
1653 EXPECT_EQ(r2.lane<1>(), 5);
1654 EXPECT_EQ(r2.lane<2>(), 5);
1655 EXPECT_EQ(r2.lane<3>(), 5);
1656 }
1657
1658 /** @brief Test vint4 hadd_s. */
TEST(vint4,hadd_s)1659 TEST(vint4, hadd_s)
1660 {
1661 vint4 a1(1, 3, 5, 7);
1662 int r1 = hadd_s(a1);
1663 EXPECT_EQ(r1, 16);
1664
1665 vint4 a2(1, 2, -1, 5);
1666 int r2 = hadd_s(a2);
1667 EXPECT_EQ(r2, 7);
1668 }
1669
1670 /** @brief Test vint4 hadd_rgb_s. */
TEST(vint4,hadd_rgb_s)1671 TEST(vint4, hadd_rgb_s)
1672 {
1673 vint4 a1(1, 3, 5, 7);
1674 int r1 = hadd_rgb_s(a1);
1675 EXPECT_EQ(r1, 9);
1676
1677 vint4 a2(1, 2, -1, 5);
1678 int r2 = hadd_rgb_s(a2);
1679 EXPECT_EQ(r2, 2);
1680 }
1681
1682 /** @brief Test vint4 clz. */
TEST(vint4,clz)1683 TEST(vint4, clz)
1684 {
1685 int msb_set = static_cast<int>(0x80000000);
1686 vint4 a1(msb_set, 0x40000000, 0x20000000, 0x10000000);
1687 vint4 r1 = clz(a1);
1688 EXPECT_EQ(r1.lane<0>(), 0);
1689 EXPECT_EQ(r1.lane<1>(), 1);
1690 EXPECT_EQ(r1.lane<2>(), 2);
1691 EXPECT_EQ(r1.lane<3>(), 3);
1692
1693 vint4 a2(0x0, 0x1, 0x2, 0x4);
1694 vint4 r2 = clz(a2);
1695 EXPECT_EQ(r2.lane<0>(), 32);
1696 EXPECT_EQ(r2.lane<1>(), 31);
1697 EXPECT_EQ(r2.lane<2>(), 30);
1698 EXPECT_EQ(r2.lane<3>(), 29);
1699 }
1700
1701 /** @brief Test vint4 two_to_the_n. */
TEST(vint4,two_to_the_n)1702 TEST(vint4, two_to_the_n)
1703 {
1704 vint4 a1(0, 1, 2, 3);
1705 vint4 r1 = two_to_the_n(a1);
1706 EXPECT_EQ(r1.lane<0>(), 1 << 0);
1707 EXPECT_EQ(r1.lane<1>(), 1 << 1);
1708 EXPECT_EQ(r1.lane<2>(), 1 << 2);
1709 EXPECT_EQ(r1.lane<3>(), 1 << 3);
1710
1711 vint4 a2(27, 28, 29, 30);
1712 vint4 r2 = two_to_the_n(a2);
1713 EXPECT_EQ(r2.lane<0>(), 1 << 27);
1714 EXPECT_EQ(r2.lane<1>(), 1 << 28);
1715 EXPECT_EQ(r2.lane<2>(), 1 << 29);
1716 EXPECT_EQ(r2.lane<3>(), 1 << 30);
1717
1718 // Shifts higher than 30 are not allowed as it overflows the int type;
1719 // and results in implementation-defined behavior because of how we
1720 // generate the shifted result in two_to_the_n().
1721 // - Shift by 31 shifts into sign bit
1722 // - Shift by 32 shifts off the end
1723 }
1724
1725 /** @brief Test vint4 storea. */
TEST(vint4,storea)1726 TEST(vint4, storea)
1727 {
1728 alignas(16) int out[4];
1729 vint4 a(s32_data);
1730 storea(a, out);
1731 EXPECT_EQ(out[0], 0);
1732 EXPECT_EQ(out[1], 1);
1733 EXPECT_EQ(out[2], 2);
1734 EXPECT_EQ(out[3], 3);
1735 }
1736
1737 /** @brief Test vint4 store. */
TEST(vint4,store)1738 TEST(vint4, store)
1739 {
1740 alignas(16) int out[5];
1741 vint4 a(s32_data);
1742 store(a, &(out[1]));
1743 EXPECT_EQ(out[1], 0);
1744 EXPECT_EQ(out[2], 1);
1745 EXPECT_EQ(out[3], 2);
1746 EXPECT_EQ(out[4], 3);
1747 }
1748
1749 /** @brief Test vint4 store_nbytes. */
TEST(vint4,store_nbytes)1750 TEST(vint4, store_nbytes)
1751 {
1752 alignas(16) int out;
1753 vint4 a(42, 314, 75, 90);
1754 store_nbytes(a, reinterpret_cast<uint8_t*>(&out));
1755 EXPECT_EQ(out, 42);
1756 }
1757
1758 /** @brief Test vint8 store_lanes_masked. */
TEST(vint4,store_lanes_masked)1759 TEST(vint4, store_lanes_masked)
1760 {
1761 int resulta[4] { 0 };
1762
1763 // Store nothing
1764 vmask4 mask1 = vint4(0) == vint4(1);
1765 vint4 data1 = vint4(1);
1766
1767 store_lanes_masked(resulta, data1, mask1);
1768 vint4 result1v(resulta);
1769 vint4 expect1v = vint4::zero();
1770 EXPECT_TRUE(all(result1v == expect1v));
1771
1772 // Store half
1773 vmask4 mask2 = vint4(1, 1, 0, 0) == vint4(1);
1774 vint4 data2 = vint4(2);
1775
1776 store_lanes_masked(resulta, data2, mask2);
1777 vint4 result2v(resulta);
1778 vint4 expect2v = vint4(2, 2, 0, 0);
1779 EXPECT_TRUE(all(result2v == expect2v));
1780
1781 // Store all
1782 vmask4 mask3 = vint4(1) == vint4(1);
1783 vint4 data3 = vint4(3);
1784
1785 store_lanes_masked(resulta, data3, mask3);
1786 vint4 result3v(resulta);
1787 vint4 expect3v = vint4(3);
1788 EXPECT_TRUE(all(result3v == expect3v));
1789 }
1790
1791 /** @brief Test vint8 store_lanes_masked to unaligned address. */
TEST(vint4,store_lanes_masked_unaligned)1792 TEST(vint4, store_lanes_masked_unaligned)
1793 {
1794 int8_t resulta[17] { 0 };
1795
1796 // Store nothing
1797 vmask4 mask1 = vint4(0) == vint4(1);
1798 vint4 data1 = vint4(1);
1799
1800 store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data1, mask1);
1801 vint4 result1v(reinterpret_cast<int*>(resulta + 1));
1802 vint4 expect1v = vint4::zero();
1803 EXPECT_TRUE(all(result1v == expect1v));
1804
1805 // Store half
1806 vmask4 mask2 = vint4(1, 1, 0, 0) == vint4(1);
1807 vint4 data2 = vint4(2);
1808
1809 store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data2, mask2);
1810 vint4 result2v(reinterpret_cast<int*>(resulta + 1));
1811 vint4 expect2v = vint4(2, 2, 0, 0);
1812 EXPECT_TRUE(all(result2v == expect2v));
1813
1814 // Store all
1815 vmask4 mask3 = vint4(1) == vint4(1);
1816 vint4 data3 = vint4(3);
1817
1818 store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data3, mask3);
1819 vint4 result3v(reinterpret_cast<int*>(resulta + 1));
1820 vint4 expect3v = vint4(3);
1821 EXPECT_TRUE(all(result3v == expect3v));
1822 }
1823
1824 /** @brief Test vint4 gatheri. */
TEST(vint4,gatheri)1825 TEST(vint4, gatheri)
1826 {
1827 vint4 indices(0, 4, 3, 2);
1828 vint4 r = gatheri(s32_data, indices);
1829 EXPECT_EQ(r.lane<0>(), 0);
1830 EXPECT_EQ(r.lane<1>(), 4);
1831 EXPECT_EQ(r.lane<2>(), 3);
1832 EXPECT_EQ(r.lane<3>(), 2);
1833 }
1834
1835 /** @brief Test vint4 pack_low_bytes. */
TEST(vint4,pack_low_bytes)1836 TEST(vint4, pack_low_bytes)
1837 {
1838 vint4 a(1, 2, 3, 4);
1839 vint4 r = pack_low_bytes(a);
1840 EXPECT_EQ(r.lane<0>(), (4 << 24) | (3 << 16) | (2 << 8) | (1 << 0));
1841 }
1842
1843 /** @brief Test vint4 select. */
TEST(vint4,select)1844 TEST(vint4, select)
1845 {
1846 vint4 m1(1, 1, 1, 1);
1847 vint4 m2(1, 2, 1, 2);
1848 vmask4 cond = m1 == m2;
1849
1850 vint4 a(1, 3, 3, 1);
1851 vint4 b(4, 2, 2, 4);
1852
1853 vint4 r1 = select(a, b, cond);
1854 EXPECT_EQ(r1.lane<0>(), 4);
1855 EXPECT_EQ(r1.lane<1>(), 3);
1856 EXPECT_EQ(r1.lane<2>(), 2);
1857 EXPECT_EQ(r1.lane<3>(), 1);
1858
1859 vint4 r2 = select(b, a, cond);
1860 EXPECT_EQ(r2.lane<0>(), 1);
1861 EXPECT_EQ(r2.lane<1>(), 2);
1862 EXPECT_EQ(r2.lane<2>(), 3);
1863 EXPECT_EQ(r2.lane<3>(), 4);
1864 }
1865
1866 // VMASK4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1867 /** @brief Test vmask4 scalar literal constructor. */
TEST(vmask4,scalar_literal_construct)1868 TEST(vmask4, scalar_literal_construct)
1869 {
1870 vfloat4 m1a(0.0f, 0.0f, 0.0f, 0.0f);
1871 vfloat4 m1b(1.0f, 1.0f, 1.0f, 1.0f);
1872 vmask4 m1(true);
1873
1874 vfloat4 r = select(m1a, m1b, m1);
1875
1876 EXPECT_EQ(r.lane<0>(), 1.0f);
1877 EXPECT_EQ(r.lane<1>(), 1.0f);
1878 EXPECT_EQ(r.lane<2>(), 1.0f);
1879 EXPECT_EQ(r.lane<3>(), 1.0f);
1880
1881 r = select(m1b, m1a, m1);
1882
1883 EXPECT_EQ(r.lane<0>(), 0.0f);
1884 EXPECT_EQ(r.lane<1>(), 0.0f);
1885 EXPECT_EQ(r.lane<2>(), 0.0f);
1886 EXPECT_EQ(r.lane<3>(), 0.0f);
1887 }
1888
1889 /** @brief Test vmask4 literal constructor. */
TEST(vmask4,literal_construct)1890 TEST(vmask4, literal_construct)
1891 {
1892 vfloat4 m1a(0.0f, 0.0f, 0.0f, 0.0f);
1893 vfloat4 m1b(1.0f, 1.0f, 1.0f, 1.0f);
1894 vmask4 m1(true, false, true, false);
1895
1896 vfloat4 r = select(m1a, m1b, m1);
1897
1898 EXPECT_EQ(r.lane<0>(), 1.0f);
1899 EXPECT_EQ(r.lane<1>(), 0.0f);
1900 EXPECT_EQ(r.lane<2>(), 1.0f);
1901 EXPECT_EQ(r.lane<3>(), 0.0f);
1902 }
1903
1904 /** @brief Test vmask4 or. */
TEST(vmask4,or)1905 TEST(vmask4, or)
1906 {
1907 vfloat4 m1a(0, 1, 0, 1);
1908 vfloat4 m1b(1, 1, 1, 1);
1909 vmask4 m1 = m1a == m1b;
1910
1911 vfloat4 m2a(1, 1, 0, 0);
1912 vfloat4 m2b(1, 1, 1, 1);
1913 vmask4 m2 = m2a == m2b;
1914
1915 vmask4 r = m1 | m2;
1916 EXPECT_EQ(mask(r), 0xBu);
1917 }
1918
1919 /** @brief Test vmask4 and. */
TEST(vmask4,and)1920 TEST(vmask4, and)
1921 {
1922 vfloat4 m1a(0, 1, 0, 1);
1923 vfloat4 m1b(1, 1, 1, 1);
1924 vmask4 m1 = m1a == m1b;
1925
1926 vfloat4 m2a(1, 1, 0, 0);
1927 vfloat4 m2b(1, 1, 1, 1);
1928 vmask4 m2 = m2a == m2b;
1929
1930 vmask4 r = m1 & m2;
1931 EXPECT_EQ(mask(r), 0x2u);
1932 }
1933
1934 /** @brief Test vmask4 xor. */
TEST(vmask4,xor)1935 TEST(vmask4, xor)
1936 {
1937 vfloat4 m1a(0, 1, 0, 1);
1938 vfloat4 m1b(1, 1, 1, 1);
1939 vmask4 m1 = m1a == m1b;
1940
1941 vfloat4 m2a(1, 1, 0, 0);
1942 vfloat4 m2b(1, 1, 1, 1);
1943 vmask4 m2 = m2a == m2b;
1944
1945 vmask4 r = m1 ^ m2;
1946 EXPECT_EQ(mask(r), 0x9u);
1947 }
1948
1949 /** @brief Test vmask4 not. */
TEST(vmask4,not)1950 TEST(vmask4, not)
1951 {
1952 vfloat4 m1a(0, 1, 0, 1);
1953 vfloat4 m1b(1, 1, 1, 1);
1954 vmask4 m1 = m1a == m1b;
1955 vmask4 r = ~m1;
1956 EXPECT_EQ(mask(r), 0x5u);
1957 }
1958
1959 /** @brief Test vint4 table permute. */
TEST(vint4,vtable_8bt_32bi_32entry)1960 TEST(vint4, vtable_8bt_32bi_32entry)
1961 {
1962 vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
1963 vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
1964
1965 vint4 table0p, table1p;
1966 vtable_prepare(table0, table1, table0p, table1p);
1967
1968 vint4 index(0, 7, 4, 31);
1969
1970 vint4 result = vtable_8bt_32bi(table0p, table1p, index);
1971
1972 EXPECT_EQ(result.lane<0>(), 3);
1973 EXPECT_EQ(result.lane<1>(), 4);
1974 EXPECT_EQ(result.lane<2>(), 7);
1975 EXPECT_EQ(result.lane<3>(), 28);
1976 }
1977
1978 /** @brief Test vint4 table permute. */
TEST(vint4,vtable_8bt_32bi_64entry)1979 TEST(vint4, vtable_8bt_32bi_64entry)
1980 {
1981 vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
1982 vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
1983 vint4 table2(0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f);
1984 vint4 table3(0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f);
1985
1986 vint4 table0p, table1p, table2p, table3p;
1987 vtable_prepare(table0, table1, table2, table3, table0p, table1p, table2p, table3p);
1988
1989 vint4 index(0, 7, 38, 63);
1990
1991 vint4 result = vtable_8bt_32bi(table0p, table1p, table2p, table3p, index);
1992
1993 EXPECT_EQ(result.lane<0>(), 3);
1994 EXPECT_EQ(result.lane<1>(), 4);
1995 EXPECT_EQ(result.lane<2>(), 37);
1996 EXPECT_EQ(result.lane<3>(), 60);
1997 }
1998
1999 /** @brief Test vint4 rgba byte interleave. */
TEST(vint4,interleave_rgba8)2000 TEST(vint4, interleave_rgba8)
2001 {
2002 vint4 r(0x01, 0x11, 0x21, 0x31);
2003 vint4 g(0x02, 0x12, 0x22, 0x32);
2004 vint4 b(0x03, 0x13, 0x23, 0x33);
2005 vint4 a(0x04, 0x14, 0x24, 0x34);
2006
2007 vint4 result = interleave_rgba8(r, g, b, a);
2008
2009 EXPECT_EQ(result.lane<0>(), 0x04030201);
2010 EXPECT_EQ(result.lane<1>(), 0x14131211);
2011 EXPECT_EQ(result.lane<2>(), 0x24232221);
2012 EXPECT_EQ(result.lane<3>(), 0x34333231);
2013 }
2014
2015 # if ASTCENC_SIMD_WIDTH == 8
2016
2017 // VFLOAT8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2018
2019 /** @brief Test unaligned vfloat8 data load. */
TEST(vfloat8,UnalignedLoad)2020 TEST(vfloat8, UnalignedLoad)
2021 {
2022 vfloat8 a(&(f32_data[1]));
2023 EXPECT_EQ(a.lane<0>(), 1.0f);
2024 EXPECT_EQ(a.lane<1>(), 2.0f);
2025 EXPECT_EQ(a.lane<2>(), 3.0f);
2026 EXPECT_EQ(a.lane<3>(), 4.0f);
2027 EXPECT_EQ(a.lane<4>(), 5.0f);
2028 EXPECT_EQ(a.lane<5>(), 6.0f);
2029 EXPECT_EQ(a.lane<6>(), 7.0f);
2030 EXPECT_EQ(a.lane<7>(), 8.0f);
2031 }
2032
2033 /** @brief Test scalar duplicated vfloat8 load. */
TEST(vfloat8,ScalarDupLoad)2034 TEST(vfloat8, ScalarDupLoad)
2035 {
2036 vfloat8 a(1.1f);
2037 EXPECT_EQ(a.lane<0>(), 1.1f);
2038 EXPECT_EQ(a.lane<1>(), 1.1f);
2039 EXPECT_EQ(a.lane<2>(), 1.1f);
2040 EXPECT_EQ(a.lane<3>(), 1.1f);
2041 EXPECT_EQ(a.lane<4>(), 1.1f);
2042 EXPECT_EQ(a.lane<5>(), 1.1f);
2043 EXPECT_EQ(a.lane<6>(), 1.1f);
2044 EXPECT_EQ(a.lane<7>(), 1.1f);
2045 }
2046
2047 /** @brief Test scalar vfloat8 load. */
TEST(vfloat8,ScalarLoad)2048 TEST(vfloat8, ScalarLoad)
2049 {
2050 vfloat8 a(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
2051 EXPECT_EQ(a.lane<0>(), 1.1f);
2052 EXPECT_EQ(a.lane<1>(), 2.2f);
2053 EXPECT_EQ(a.lane<2>(), 3.3f);
2054 EXPECT_EQ(a.lane<3>(), 4.4f);
2055 EXPECT_EQ(a.lane<4>(), 5.5f);
2056 EXPECT_EQ(a.lane<5>(), 6.6f);
2057 EXPECT_EQ(a.lane<6>(), 7.7f);
2058 EXPECT_EQ(a.lane<7>(), 8.8f);
2059 }
2060
2061 /** @brief Test copy vfloat8 load. */
TEST(vfloat8,CopyLoad)2062 TEST(vfloat8, CopyLoad)
2063 {
2064 vfloat8 s(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
2065 vfloat8 a(s.m);
2066 EXPECT_EQ(a.lane<0>(), 1.1f);
2067 EXPECT_EQ(a.lane<1>(), 2.2f);
2068 EXPECT_EQ(a.lane<2>(), 3.3f);
2069 EXPECT_EQ(a.lane<3>(), 4.4f);
2070 EXPECT_EQ(a.lane<4>(), 5.5f);
2071 EXPECT_EQ(a.lane<5>(), 6.6f);
2072 EXPECT_EQ(a.lane<6>(), 7.7f);
2073 EXPECT_EQ(a.lane<7>(), 8.8f);
2074 }
2075
2076 /** @brief Test vfloat8 zero. */
TEST(vfloat8,Zero)2077 TEST(vfloat8, Zero)
2078 {
2079 vfloat8 a = vfloat8::zero();
2080 EXPECT_EQ(a.lane<0>(), 0.0f);
2081 EXPECT_EQ(a.lane<1>(), 0.0f);
2082 EXPECT_EQ(a.lane<2>(), 0.0f);
2083 EXPECT_EQ(a.lane<3>(), 0.0f);
2084 EXPECT_EQ(a.lane<4>(), 0.0f);
2085 EXPECT_EQ(a.lane<5>(), 0.0f);
2086 EXPECT_EQ(a.lane<6>(), 0.0f);
2087 EXPECT_EQ(a.lane<7>(), 0.0f);
2088 }
2089
2090 /** @brief Test vfloat8 load1. */
TEST(vfloat8,Load1)2091 TEST(vfloat8, Load1)
2092 {
2093 float s = 3.14f;
2094 vfloat8 a = vfloat8::load1(&s);
2095 EXPECT_EQ(a.lane<0>(), 3.14f);
2096 EXPECT_EQ(a.lane<1>(), 3.14f);
2097 EXPECT_EQ(a.lane<2>(), 3.14f);
2098 EXPECT_EQ(a.lane<3>(), 3.14f);
2099 EXPECT_EQ(a.lane<4>(), 3.14f);
2100 EXPECT_EQ(a.lane<5>(), 3.14f);
2101 EXPECT_EQ(a.lane<6>(), 3.14f);
2102 EXPECT_EQ(a.lane<7>(), 3.14f);
2103 }
2104
2105 /** @brief Test vfloat8 loada. */
TEST(vfloat8,Loada)2106 TEST(vfloat8, Loada)
2107 {
2108 vfloat8 a = vfloat8::loada(&(f32_data[0]));
2109 EXPECT_EQ(a.lane<0>(), 0.0f);
2110 EXPECT_EQ(a.lane<1>(), 1.0f);
2111 EXPECT_EQ(a.lane<2>(), 2.0f);
2112 EXPECT_EQ(a.lane<3>(), 3.0f);
2113 EXPECT_EQ(a.lane<4>(), 4.0f);
2114 EXPECT_EQ(a.lane<5>(), 5.0f);
2115 EXPECT_EQ(a.lane<6>(), 6.0f);
2116 EXPECT_EQ(a.lane<7>(), 7.0f);
2117 }
2118
2119 /** @brief Test vfloat8 lane_id. */
TEST(vfloat8,LaneID)2120 TEST(vfloat8, LaneID)
2121 {
2122 vfloat8 a = vfloat8::lane_id();
2123 EXPECT_EQ(a.lane<0>(), 0.0f);
2124 EXPECT_EQ(a.lane<1>(), 1.0f);
2125 EXPECT_EQ(a.lane<2>(), 2.0f);
2126 EXPECT_EQ(a.lane<3>(), 3.0f);
2127 EXPECT_EQ(a.lane<4>(), 4.0f);
2128 EXPECT_EQ(a.lane<5>(), 5.0f);
2129 EXPECT_EQ(a.lane<6>(), 6.0f);
2130 EXPECT_EQ(a.lane<7>(), 7.0f);
2131 }
2132
2133 /** @brief Test vfloat8 add. */
TEST(vfloat8,vadd)2134 TEST(vfloat8, vadd)
2135 {
2136 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2137 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2138 a = a + b;
2139 EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f);
2140 EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f);
2141 EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f);
2142 EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f);
2143 EXPECT_EQ(a.lane<4>(), 5.0f + 0.5f);
2144 EXPECT_EQ(a.lane<5>(), 6.0f + 0.6f);
2145 EXPECT_EQ(a.lane<6>(), 7.0f + 0.7f);
2146 EXPECT_EQ(a.lane<7>(), 8.0f + 0.8f);
2147 }
2148
2149 /** @brief Test vfloat8 sub. */
TEST(vfloat8,vsub)2150 TEST(vfloat8, vsub)
2151 {
2152 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2153 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2154 a = a - b;
2155 EXPECT_EQ(a.lane<0>(), 1.0f - 0.1f);
2156 EXPECT_EQ(a.lane<1>(), 2.0f - 0.2f);
2157 EXPECT_EQ(a.lane<2>(), 3.0f - 0.3f);
2158 EXPECT_EQ(a.lane<3>(), 4.0f - 0.4f);
2159 EXPECT_EQ(a.lane<4>(), 5.0f - 0.5f);
2160 EXPECT_EQ(a.lane<5>(), 6.0f - 0.6f);
2161 EXPECT_EQ(a.lane<6>(), 7.0f - 0.7f);
2162 EXPECT_EQ(a.lane<7>(), 8.0f - 0.8f);
2163 }
2164
2165 /** @brief Test vfloat8 mul. */
TEST(vfloat8,vmul)2166 TEST(vfloat8, vmul)
2167 {
2168 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2169 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2170 a = a * b;
2171 EXPECT_EQ(a.lane<0>(), 1.0f * 0.1f);
2172 EXPECT_EQ(a.lane<1>(), 2.0f * 0.2f);
2173 EXPECT_EQ(a.lane<2>(), 3.0f * 0.3f);
2174 EXPECT_EQ(a.lane<3>(), 4.0f * 0.4f);
2175 EXPECT_EQ(a.lane<4>(), 5.0f * 0.5f);
2176 EXPECT_EQ(a.lane<5>(), 6.0f * 0.6f);
2177 EXPECT_EQ(a.lane<6>(), 7.0f * 0.7f);
2178 EXPECT_EQ(a.lane<7>(), 8.0f * 0.8f);
2179 }
2180
2181 /** @brief Test vfloat8 mul. */
TEST(vfloat8,vsmul)2182 TEST(vfloat8, vsmul)
2183 {
2184 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2185 float b = 3.14f;
2186 a = a * b;
2187 EXPECT_EQ(a.lane<0>(), 1.0f * 3.14f);
2188 EXPECT_EQ(a.lane<1>(), 2.0f * 3.14f);
2189 EXPECT_EQ(a.lane<2>(), 3.0f * 3.14f);
2190 EXPECT_EQ(a.lane<3>(), 4.0f * 3.14f);
2191 EXPECT_EQ(a.lane<4>(), 5.0f * 3.14f);
2192 EXPECT_EQ(a.lane<5>(), 6.0f * 3.14f);
2193 EXPECT_EQ(a.lane<6>(), 7.0f * 3.14f);
2194 EXPECT_EQ(a.lane<7>(), 8.0f * 3.14f);
2195 }
2196
2197 /** @brief Test vfloat8 mul. */
TEST(vfloat8,svmul)2198 TEST(vfloat8, svmul)
2199 {
2200 float a = 3.14f;
2201 vfloat8 b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2202 b = a * b;
2203 EXPECT_EQ(b.lane<0>(), 3.14f * 1.0f);
2204 EXPECT_EQ(b.lane<1>(), 3.14f * 2.0f);
2205 EXPECT_EQ(b.lane<2>(), 3.14f * 3.0f);
2206 EXPECT_EQ(b.lane<3>(), 3.14f * 4.0f);
2207 EXPECT_EQ(b.lane<4>(), 3.14f * 5.0f);
2208 EXPECT_EQ(b.lane<5>(), 3.14f * 6.0f);
2209 EXPECT_EQ(b.lane<6>(), 3.14f * 7.0f);
2210 EXPECT_EQ(b.lane<7>(), 3.14f * 8.0f);
2211 }
2212
2213 /** @brief Test vfloat8 div. */
TEST(vfloat8,vdiv)2214 TEST(vfloat8, vdiv)
2215 {
2216 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2217 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2218 a = a / b;
2219 EXPECT_EQ(a.lane<0>(), 1.0f / 0.1f);
2220 EXPECT_EQ(a.lane<1>(), 2.0f / 0.2f);
2221 EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f);
2222 EXPECT_EQ(a.lane<3>(), 4.0f / 0.4f);
2223 EXPECT_EQ(a.lane<4>(), 5.0f / 0.5f);
2224 EXPECT_EQ(a.lane<5>(), 6.0f / 0.6f);
2225 EXPECT_EQ(a.lane<6>(), 7.0f / 0.7f);
2226 EXPECT_EQ(a.lane<7>(), 8.0f / 0.8f);
2227 }
2228
2229 /** @brief Test vfloat8 div. */
TEST(vfloat8,vsdiv)2230 TEST(vfloat8, vsdiv)
2231 {
2232 vfloat8 a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2233 float b = 3.14f;
2234 vfloat8 r = a / b;
2235
2236 EXPECT_EQ(r.lane<0>(), 0.1f / 3.14f);
2237 EXPECT_EQ(r.lane<1>(), 0.2f / 3.14f);
2238 EXPECT_EQ(r.lane<2>(), 0.3f / 3.14f);
2239 EXPECT_EQ(r.lane<3>(), 0.4f / 3.14f);
2240 EXPECT_EQ(r.lane<4>(), 0.5f / 3.14f);
2241 EXPECT_EQ(r.lane<5>(), 0.6f / 3.14f);
2242 EXPECT_EQ(r.lane<6>(), 0.7f / 3.14f);
2243 EXPECT_EQ(r.lane<7>(), 0.8f / 3.14f);
2244 }
2245
2246 /** @brief Test vfloat8 div. */
TEST(vfloat8,svdiv)2247 TEST(vfloat8, svdiv)
2248 {
2249 float a = 3.14f;
2250 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2251 vfloat8 r = a / b;
2252
2253 EXPECT_EQ(r.lane<0>(), 3.14f / 0.1f);
2254 EXPECT_EQ(r.lane<1>(), 3.14f / 0.2f);
2255 EXPECT_EQ(r.lane<2>(), 3.14f / 0.3f);
2256 EXPECT_EQ(r.lane<3>(), 3.14f / 0.4f);
2257 EXPECT_EQ(r.lane<4>(), 3.14f / 0.5f);
2258 EXPECT_EQ(r.lane<5>(), 3.14f / 0.6f);
2259 EXPECT_EQ(r.lane<6>(), 3.14f / 0.7f);
2260 EXPECT_EQ(r.lane<7>(), 3.14f / 0.8f);
2261 }
2262
2263 /** @brief Test vfloat8 ceq. */
TEST(vfloat8,ceq)2264 TEST(vfloat8, ceq)
2265 {
2266 vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2267 vfloat8 b1(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2268 vmask8 r1 = a1 == b1;
2269 EXPECT_EQ(0u, mask(r1));
2270 EXPECT_EQ(false, any(r1));
2271 EXPECT_EQ(false, all(r1));
2272
2273 vfloat8 a2(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2274 vfloat8 b2(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2275 vmask8 r2 = a2 == b2;
2276 EXPECT_EQ(0x1u, mask(r2));
2277 EXPECT_EQ(true, any(r2));
2278 EXPECT_EQ(false, all(r2));
2279
2280 vfloat8 a3(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2281 vfloat8 b3(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2282 vmask8 r3 = a3 == b3;
2283 EXPECT_EQ(0x5u, mask(r3));
2284 EXPECT_EQ(true, any(r3));
2285 EXPECT_EQ(false, all(r3));
2286
2287 vfloat8 a4(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2288 vmask8 r4 = a4 == a4;
2289 EXPECT_EQ(0xFFu, mask(r4));
2290 EXPECT_EQ(true, any(r4));
2291 EXPECT_EQ(true, all(r4));
2292 }
2293
2294 /** @brief Test vfloat8 cne. */
TEST(vfloat8,cne)2295 TEST(vfloat8, cne)
2296 {
2297 vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2298 vfloat8 b1(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2299 vmask8 r1 = a1 != b1;
2300 EXPECT_EQ(0xFFu, mask(r1));
2301 EXPECT_EQ(true, any(r1));
2302 EXPECT_EQ(true, all(r1));
2303
2304 vfloat8 a2(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2305 vfloat8 b2(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2306 vmask8 r2 = a2 != b2;
2307 EXPECT_EQ(0xFEu, mask(r2));
2308 EXPECT_EQ(true, any(r2));
2309 EXPECT_EQ(false, all(r2));
2310
2311 vfloat8 a3(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2312 vfloat8 b3(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2313 vmask8 r3 = a3 != b3;
2314 EXPECT_EQ(0xFAu, mask(r3));
2315 EXPECT_EQ(true, any(r3));
2316 EXPECT_EQ(false, all(r3));
2317
2318 vfloat8 a4(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2319 vmask8 r4 = a4 != a4;
2320 EXPECT_EQ(0u, mask(r4));
2321 EXPECT_EQ(false, any(r4));
2322 EXPECT_EQ(false, all(r4));
2323 }
2324
2325 /** @brief Test vfloat8 clt. */
TEST(vfloat8,clt)2326 TEST(vfloat8, clt)
2327 {
2328 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2329 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2330 vmask8 r = a < b;
2331 EXPECT_EQ(0xAAu, mask(r));
2332 }
2333
2334 /** @brief Test vfloat8 cle. */
TEST(vfloat8,cle)2335 TEST(vfloat8, cle)
2336 {
2337 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2338 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2339 vmask8 r = a <= b;
2340 EXPECT_EQ(0xEEu, mask(r));
2341 }
2342
2343 /** @brief Test vfloat8 cgt. */
TEST(vfloat8,cgt)2344 TEST(vfloat8, cgt)
2345 {
2346 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2347 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2348 vmask8 r = a > b;
2349 EXPECT_EQ(0x11u, mask(r));
2350 }
2351
2352 /** @brief Test vfloat8 cge. */
TEST(vfloat8,cge)2353 TEST(vfloat8, cge)
2354 {
2355 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2356 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2357 vmask8 r = a >= b;
2358 EXPECT_EQ(0x55u, mask(r));
2359 }
2360
2361 /** @brief Test vfloat8 min. */
TEST(vfloat8,min)2362 TEST(vfloat8, min)
2363 {
2364 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2365 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2366 vfloat8 r = min(a, b);
2367 EXPECT_EQ(r.lane<0>(), 0.9f);
2368 EXPECT_EQ(r.lane<1>(), 2.0f);
2369 EXPECT_EQ(r.lane<2>(), 3.0f);
2370 EXPECT_EQ(r.lane<3>(), 4.0f);
2371 EXPECT_EQ(r.lane<4>(), 0.9f);
2372 EXPECT_EQ(r.lane<5>(), 2.0f);
2373 EXPECT_EQ(r.lane<6>(), 3.0f);
2374 EXPECT_EQ(r.lane<7>(), 4.0f);
2375 }
2376
2377 /** @brief Test vfloat8 max. */
TEST(vfloat8,max)2378 TEST(vfloat8, max)
2379 {
2380 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2381 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2382 vfloat8 r = max(a, b);
2383 EXPECT_EQ(r.lane<0>(), 1.0f);
2384 EXPECT_EQ(r.lane<1>(), 2.1f);
2385 EXPECT_EQ(r.lane<2>(), 3.0f);
2386 EXPECT_EQ(r.lane<3>(), 4.1f);
2387 EXPECT_EQ(r.lane<4>(), 1.0f);
2388 EXPECT_EQ(r.lane<5>(), 2.1f);
2389 EXPECT_EQ(r.lane<6>(), 3.0f);
2390 EXPECT_EQ(r.lane<7>(), 4.1f);
2391 }
2392
2393 /** @brief Test vfloat8 clamp. */
TEST(vfloat8,clamp)2394 TEST(vfloat8, clamp)
2395 {
2396 vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2397 vfloat8 r1 = clamp(2.1f, 3.0f, a1);
2398 EXPECT_EQ(r1.lane<0>(), 2.1f);
2399 EXPECT_EQ(r1.lane<1>(), 2.1f);
2400 EXPECT_EQ(r1.lane<2>(), 3.0f);
2401 EXPECT_EQ(r1.lane<3>(), 3.0f);
2402 EXPECT_EQ(r1.lane<4>(), 2.1f);
2403 EXPECT_EQ(r1.lane<5>(), 2.1f);
2404 EXPECT_EQ(r1.lane<6>(), 3.0f);
2405 EXPECT_EQ(r1.lane<7>(), 3.0f);
2406
2407 vfloat8 a2(1.0f, 2.0f, qnan, 4.0f, 1.0f, 2.0f, qnan, 4.0f);
2408 vfloat8 r2 = clamp(2.1f, 3.0f, a2);
2409 EXPECT_EQ(r2.lane<0>(), 2.1f);
2410 EXPECT_EQ(r2.lane<1>(), 2.1f);
2411 EXPECT_EQ(r2.lane<2>(), 2.1f);
2412 EXPECT_EQ(r2.lane<3>(), 3.0f);
2413 EXPECT_EQ(r2.lane<4>(), 2.1f);
2414 EXPECT_EQ(r2.lane<5>(), 2.1f);
2415 EXPECT_EQ(r2.lane<6>(), 2.1f);
2416 EXPECT_EQ(r2.lane<7>(), 3.0f);
2417 }
2418
2419 /** @brief Test vfloat8 clampz. */
TEST(vfloat8,clampz)2420 TEST(vfloat8, clampz)
2421 {
2422 vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f);
2423 vfloat8 r1 = clampz(3.0f, a1);
2424 EXPECT_EQ(r1.lane<0>(), 0.0f);
2425 EXPECT_EQ(r1.lane<1>(), 0.0f);
2426 EXPECT_EQ(r1.lane<2>(), 0.1f);
2427 EXPECT_EQ(r1.lane<3>(), 3.0f);
2428 EXPECT_EQ(r1.lane<4>(), 0.0f);
2429 EXPECT_EQ(r1.lane<5>(), 0.0f);
2430 EXPECT_EQ(r1.lane<6>(), 0.1f);
2431 EXPECT_EQ(r1.lane<7>(), 3.0f);
2432
2433 vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f);
2434 vfloat8 r2 = clampz(3.0f, a2);
2435 EXPECT_EQ(r2.lane<0>(), 0.0f);
2436 EXPECT_EQ(r2.lane<1>(), 0.0f);
2437 EXPECT_EQ(r2.lane<2>(), 0.0f);
2438 EXPECT_EQ(r2.lane<3>(), 3.0f);
2439 EXPECT_EQ(r2.lane<4>(), 0.0f);
2440 EXPECT_EQ(r2.lane<5>(), 0.0f);
2441 EXPECT_EQ(r2.lane<6>(), 0.0f);
2442 EXPECT_EQ(r2.lane<7>(), 3.0f);
2443 }
2444
2445 /** @brief Test vfloat8 clampz. */
TEST(vfloat8,clampzo)2446 TEST(vfloat8, clampzo)
2447 {
2448 vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f);
2449 vfloat8 r1 = clampzo(a1);
2450 EXPECT_EQ(r1.lane<0>(), 0.0f);
2451 EXPECT_EQ(r1.lane<1>(), 0.0f);
2452 EXPECT_EQ(r1.lane<2>(), 0.1f);
2453 EXPECT_EQ(r1.lane<3>(), 1.0f);
2454 EXPECT_EQ(r1.lane<4>(), 0.0f);
2455 EXPECT_EQ(r1.lane<5>(), 0.0f);
2456 EXPECT_EQ(r1.lane<6>(), 0.1f);
2457 EXPECT_EQ(r1.lane<7>(), 1.0f);
2458
2459 vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f);
2460 vfloat8 r2 = clampzo(a2);
2461 EXPECT_EQ(r2.lane<0>(), 0.0f);
2462 EXPECT_EQ(r2.lane<1>(), 0.0f);
2463 EXPECT_EQ(r2.lane<2>(), 0.0f);
2464 EXPECT_EQ(r2.lane<3>(), 1.0f);
2465 EXPECT_EQ(r2.lane<4>(), 0.0f);
2466 EXPECT_EQ(r2.lane<5>(), 0.0f);
2467 EXPECT_EQ(r2.lane<6>(), 0.0f);
2468 EXPECT_EQ(r2.lane<7>(), 1.0f);
2469 }
2470
2471 /** @brief Test vfloat8 abs. */
TEST(vfloat8,abs)2472 TEST(vfloat8, abs)
2473 {
2474 vfloat8 a(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f);
2475 vfloat8 r = abs(a);
2476 EXPECT_EQ(r.lane<0>(), 1.0f);
2477 EXPECT_EQ(r.lane<1>(), 0.0f);
2478 EXPECT_EQ(r.lane<2>(), 0.1f);
2479 EXPECT_EQ(r.lane<3>(), 4.0f);
2480 EXPECT_EQ(r.lane<4>(), 1.0f);
2481 EXPECT_EQ(r.lane<5>(), 0.0f);
2482 EXPECT_EQ(r.lane<6>(), 0.1f);
2483 EXPECT_EQ(r.lane<7>(), 4.0f);
2484 }
2485
2486 /** @brief Test vfloat8 round. */
TEST(vfloat8,round)2487 TEST(vfloat8, round)
2488 {
2489 vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2490 vfloat8 r = round(a);
2491 EXPECT_EQ(r.lane<0>(), 1.0f);
2492 EXPECT_EQ(r.lane<1>(), 2.0f);
2493 EXPECT_EQ(r.lane<2>(), 2.0f);
2494 EXPECT_EQ(r.lane<3>(), 4.0f);
2495 EXPECT_EQ(r.lane<4>(), 1.0f);
2496 EXPECT_EQ(r.lane<5>(), 2.0f);
2497 EXPECT_EQ(r.lane<6>(), 2.0f);
2498 EXPECT_EQ(r.lane<7>(), 4.0f);
2499 }
2500
2501 /** @brief Test vfloat8 hmin. */
TEST(vfloat8,hmin)2502 TEST(vfloat8, hmin)
2503 {
2504 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2505 vfloat8 r1 = hmin(a1);
2506 EXPECT_EQ(r1.lane<0>(), 1.1f);
2507 EXPECT_EQ(r1.lane<1>(), 1.1f);
2508 EXPECT_EQ(r1.lane<2>(), 1.1f);
2509 EXPECT_EQ(r1.lane<3>(), 1.1f);
2510 EXPECT_EQ(r1.lane<4>(), 1.1f);
2511 EXPECT_EQ(r1.lane<5>(), 1.1f);
2512 EXPECT_EQ(r1.lane<6>(), 1.1f);
2513 EXPECT_EQ(r1.lane<7>(), 1.1f);
2514
2515 vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2516 vfloat8 r2 = hmin(a2);
2517 EXPECT_EQ(r2.lane<0>(), 0.2f);
2518 EXPECT_EQ(r2.lane<1>(), 0.2f);
2519 EXPECT_EQ(r2.lane<2>(), 0.2f);
2520 EXPECT_EQ(r2.lane<3>(), 0.2f);
2521 EXPECT_EQ(r2.lane<4>(), 0.2f);
2522 EXPECT_EQ(r2.lane<5>(), 0.2f);
2523 EXPECT_EQ(r2.lane<6>(), 0.2f);
2524 EXPECT_EQ(r2.lane<7>(), 0.2f);
2525 }
2526
2527 /** @brief Test vfloat8 hmin_s. */
TEST(vfloat8,hmin_s)2528 TEST(vfloat8, hmin_s)
2529 {
2530 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2531 float r1 = hmin_s(a1);
2532 EXPECT_EQ(r1, 1.1f);
2533
2534 vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2535 float r2 = hmin_s(a2);
2536 EXPECT_EQ(r2, 0.2f);
2537 }
2538
2539 /** @brief Test vfloat8 hmax. */
TEST(vfloat8,hmax)2540 TEST(vfloat8, hmax)
2541 {
2542 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2543 vfloat8 r1 = hmax(a1);
2544 EXPECT_EQ(r1.lane<0>(), 4.0f);
2545 EXPECT_EQ(r1.lane<1>(), 4.0f);
2546 EXPECT_EQ(r1.lane<2>(), 4.0f);
2547 EXPECT_EQ(r1.lane<3>(), 4.0f);
2548 EXPECT_EQ(r1.lane<4>(), 4.0f);
2549 EXPECT_EQ(r1.lane<5>(), 4.0f);
2550 EXPECT_EQ(r1.lane<6>(), 4.0f);
2551 EXPECT_EQ(r1.lane<7>(), 4.0f);
2552
2553 vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2554 vfloat8 r2 = hmax(a2);
2555 EXPECT_EQ(r2.lane<0>(), 1.6f);
2556 EXPECT_EQ(r2.lane<1>(), 1.6f);
2557 EXPECT_EQ(r2.lane<2>(), 1.6f);
2558 EXPECT_EQ(r2.lane<3>(), 1.6f);
2559 EXPECT_EQ(r2.lane<4>(), 1.6f);
2560 EXPECT_EQ(r2.lane<5>(), 1.6f);
2561 EXPECT_EQ(r2.lane<6>(), 1.6f);
2562 EXPECT_EQ(r2.lane<7>(), 1.6f);
2563 }
2564
2565 /** @brief Test vfloat8 hmax_s. */
TEST(vfloat8,hmax_s)2566 TEST(vfloat8, hmax_s)
2567 {
2568 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2569 float r1 = hmax_s(a1);
2570 EXPECT_EQ(r1, 4.0f);
2571
2572 vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2573 float r2 = hmax_s(a2);
2574 EXPECT_EQ(r2, 1.6f);
2575 }
2576
2577 /** @brief Test vfloat8 hadd_s. */
TEST(vfloat8,hadd_s)2578 TEST(vfloat8, hadd_s)
2579 {
2580 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2581 float sum = 1.1f + 1.5f + 1.6f + 4.0f + 1.1f + 1.5f + 1.6f + 4.0f;
2582 float r = hadd_s(a1);
2583 EXPECT_NEAR(r, sum, 0.005f);
2584 }
2585
2586 /** @brief Test vfloat8 sqrt. */
TEST(vfloat8,sqrt)2587 TEST(vfloat8, sqrt)
2588 {
2589 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2590 vfloat8 r = sqrt(a);
2591 EXPECT_EQ(r.lane<0>(), std::sqrt(1.0f));
2592 EXPECT_EQ(r.lane<1>(), std::sqrt(2.0f));
2593 EXPECT_EQ(r.lane<2>(), std::sqrt(3.0f));
2594 EXPECT_EQ(r.lane<3>(), std::sqrt(4.0f));
2595 EXPECT_EQ(r.lane<4>(), std::sqrt(1.0f));
2596 EXPECT_EQ(r.lane<5>(), std::sqrt(2.0f));
2597 EXPECT_EQ(r.lane<6>(), std::sqrt(3.0f));
2598 EXPECT_EQ(r.lane<7>(), std::sqrt(4.0f));
2599 }
2600
2601 /** @brief Test vfloat8 select. */
TEST(vfloat8,select)2602 TEST(vfloat8, select)
2603 {
2604 vfloat8 m1(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f);
2605 vfloat8 m2(1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f);
2606 vmask8 cond = m1 == m2;
2607
2608 vfloat8 a(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0);
2609 vfloat8 b(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0);
2610
2611 // Select in one direction
2612 vfloat8 r1 = select(a, b, cond);
2613 EXPECT_EQ(r1.lane<0>(), 4.0f);
2614 EXPECT_EQ(r1.lane<1>(), 3.0f);
2615 EXPECT_EQ(r1.lane<2>(), 2.0f);
2616 EXPECT_EQ(r1.lane<3>(), 1.0f);
2617 EXPECT_EQ(r1.lane<4>(), 4.0f);
2618 EXPECT_EQ(r1.lane<5>(), 3.0f);
2619 EXPECT_EQ(r1.lane<6>(), 2.0f);
2620 EXPECT_EQ(r1.lane<7>(), 1.0f);
2621
2622 // Select in the other
2623 vfloat8 r2 = select(b, a, cond);
2624 EXPECT_EQ(r2.lane<0>(), 1.0f);
2625 EXPECT_EQ(r2.lane<1>(), 2.0f);
2626 EXPECT_EQ(r2.lane<2>(), 3.0f);
2627 EXPECT_EQ(r2.lane<3>(), 4.0f);
2628 EXPECT_EQ(r2.lane<4>(), 1.0f);
2629 EXPECT_EQ(r2.lane<5>(), 2.0f);
2630 EXPECT_EQ(r2.lane<6>(), 3.0f);
2631 EXPECT_EQ(r2.lane<7>(), 4.0f);
2632 }
2633
2634 /** @brief Test vfloat8 select MSB only. */
TEST(vfloat8,select_msb)2635 TEST(vfloat8, select_msb)
2636 {
2637 int msb_set = static_cast<int>(0x80000000);
2638 vint8 msb(msb_set, 0, msb_set, 0, msb_set, 0, msb_set, 0);
2639 vmask8 cond(msb.m);
2640
2641 vfloat8 a(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0f);
2642 vfloat8 b(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0f);
2643
2644 // Select in one direction
2645 vfloat8 r1 = select(a, b, cond);
2646 EXPECT_EQ(r1.lane<0>(), 4.0f);
2647 EXPECT_EQ(r1.lane<1>(), 3.0f);
2648 EXPECT_EQ(r1.lane<2>(), 2.0f);
2649 EXPECT_EQ(r1.lane<3>(), 1.0f);
2650 EXPECT_EQ(r1.lane<4>(), 4.0f);
2651 EXPECT_EQ(r1.lane<5>(), 3.0f);
2652 EXPECT_EQ(r1.lane<6>(), 2.0f);
2653 EXPECT_EQ(r1.lane<7>(), 1.0f);
2654
2655 // Select in the other
2656 vfloat8 r2 = select(b, a, cond);
2657 EXPECT_EQ(r2.lane<0>(), 1.0f);
2658 EXPECT_EQ(r2.lane<1>(), 2.0f);
2659 EXPECT_EQ(r2.lane<2>(), 3.0f);
2660 EXPECT_EQ(r2.lane<3>(), 4.0f);
2661 EXPECT_EQ(r2.lane<4>(), 1.0f);
2662 EXPECT_EQ(r2.lane<5>(), 2.0f);
2663 EXPECT_EQ(r2.lane<6>(), 3.0f);
2664 EXPECT_EQ(r2.lane<7>(), 4.0f);
2665 }
2666
2667 /** @brief Test vfloat8 gatherf. */
TEST(vfloat8,gatherf)2668 TEST(vfloat8, gatherf)
2669 {
2670 vint8 indices(0, 4, 3, 2, 7, 4, 3, 2);
2671 vfloat8 r = gatherf(f32_data, indices);
2672 EXPECT_EQ(r.lane<0>(), 0.0f);
2673 EXPECT_EQ(r.lane<1>(), 4.0f);
2674 EXPECT_EQ(r.lane<2>(), 3.0f);
2675 EXPECT_EQ(r.lane<3>(), 2.0f);
2676 EXPECT_EQ(r.lane<4>(), 7.0f);
2677 EXPECT_EQ(r.lane<5>(), 4.0f);
2678 EXPECT_EQ(r.lane<6>(), 3.0f);
2679 EXPECT_EQ(r.lane<7>(), 2.0f);
2680 }
2681
2682 /** @brief Test vfloat8 store. */
TEST(vfloat8,store)2683 TEST(vfloat8, store)
2684 {
2685 alignas(32) float out[9];
2686 vfloat8 a(f32_data);
2687 store(a, &(out[1]));
2688 EXPECT_EQ(out[1], 0.0f);
2689 EXPECT_EQ(out[2], 1.0f);
2690 EXPECT_EQ(out[3], 2.0f);
2691 EXPECT_EQ(out[4], 3.0f);
2692 EXPECT_EQ(out[5], 4.0f);
2693 EXPECT_EQ(out[6], 5.0f);
2694 EXPECT_EQ(out[7], 6.0f);
2695 EXPECT_EQ(out[8], 7.0f);
2696 }
2697
2698 /** @brief Test vfloat8 storea. */
TEST(vfloat8,storea)2699 TEST(vfloat8, storea)
2700 {
2701 alignas(32) float out[9];
2702 vfloat8 a(f32_data);
2703 store(a, out);
2704 EXPECT_EQ(out[0], 0.0f);
2705 EXPECT_EQ(out[1], 1.0f);
2706 EXPECT_EQ(out[2], 2.0f);
2707 EXPECT_EQ(out[3], 3.0f);
2708 EXPECT_EQ(out[4], 4.0f);
2709 EXPECT_EQ(out[5], 5.0f);
2710 EXPECT_EQ(out[6], 6.0f);
2711 EXPECT_EQ(out[7], 7.0f);
2712 }
2713
2714 /** @brief Test vfloat8 float_to_int. */
TEST(vfloat8,float_to_int)2715 TEST(vfloat8, float_to_int)
2716 {
2717 vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2718 vint8 r = float_to_int(a);
2719 EXPECT_EQ(r.lane<0>(), 1);
2720 EXPECT_EQ(r.lane<1>(), 1);
2721 EXPECT_EQ(r.lane<2>(), 1);
2722 EXPECT_EQ(r.lane<3>(), 4);
2723 EXPECT_EQ(r.lane<4>(), 1);
2724 EXPECT_EQ(r.lane<5>(), 1);
2725 EXPECT_EQ(r.lane<6>(), 1);
2726 EXPECT_EQ(r.lane<7>(), 4);
2727 }
2728
2729 // vint8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2730
2731 /** @brief Test unaligned vint8 data load. */
TEST(vint8,UnalignedLoad)2732 TEST(vint8, UnalignedLoad)
2733 {
2734 vint8 a(&(s32_data[1]));
2735 EXPECT_EQ(a.lane<0>(), 1);
2736 EXPECT_EQ(a.lane<1>(), 2);
2737 EXPECT_EQ(a.lane<2>(), 3);
2738 EXPECT_EQ(a.lane<3>(), 4);
2739 EXPECT_EQ(a.lane<4>(), 5);
2740 EXPECT_EQ(a.lane<5>(), 6);
2741 EXPECT_EQ(a.lane<6>(), 7);
2742 EXPECT_EQ(a.lane<7>(), 8);
2743 }
2744
2745 /** @brief Test unaligned vint8 data load. */
TEST(vint8,UnalignedLoad8)2746 TEST(vint8, UnalignedLoad8)
2747 {
2748 vint8 a(&(u8_data[1]));
2749 EXPECT_EQ(a.lane<0>(), 1);
2750 EXPECT_EQ(a.lane<1>(), 2);
2751 EXPECT_EQ(a.lane<2>(), 3);
2752 EXPECT_EQ(a.lane<3>(), 4);
2753 EXPECT_EQ(a.lane<4>(), 5);
2754 EXPECT_EQ(a.lane<5>(), 6);
2755 EXPECT_EQ(a.lane<6>(), 7);
2756 EXPECT_EQ(a.lane<7>(), 8);
2757 }
2758
2759 /** @brief Test scalar duplicated vint8 load. */
TEST(vint8,ScalarDupLoad)2760 TEST(vint8, ScalarDupLoad)
2761 {
2762 vint8 a(42);
2763 EXPECT_EQ(a.lane<0>(), 42);
2764 EXPECT_EQ(a.lane<1>(), 42);
2765 EXPECT_EQ(a.lane<2>(), 42);
2766 EXPECT_EQ(a.lane<3>(), 42);
2767 EXPECT_EQ(a.lane<4>(), 42);
2768 EXPECT_EQ(a.lane<5>(), 42);
2769 EXPECT_EQ(a.lane<6>(), 42);
2770 EXPECT_EQ(a.lane<7>(), 42);
2771 }
2772
2773 /** @brief Test scalar vint8 load. */
TEST(vint8,ScalarLoad)2774 TEST(vint8, ScalarLoad)
2775 {
2776 vint8 a(11, 22, 33, 44, 55, 66, 77, 88);
2777 EXPECT_EQ(a.lane<0>(), 11);
2778 EXPECT_EQ(a.lane<1>(), 22);
2779 EXPECT_EQ(a.lane<2>(), 33);
2780 EXPECT_EQ(a.lane<3>(), 44);
2781 EXPECT_EQ(a.lane<4>(), 55);
2782 EXPECT_EQ(a.lane<5>(), 66);
2783 EXPECT_EQ(a.lane<6>(), 77);
2784 EXPECT_EQ(a.lane<7>(), 88);
2785 }
2786
2787 /** @brief Test copy vint8 load. */
TEST(vint8,CopyLoad)2788 TEST(vint8, CopyLoad)
2789 {
2790 vint8 s(11, 22, 33, 44, 55, 66, 77, 88);
2791 vint8 a(s.m);
2792 EXPECT_EQ(a.lane<0>(), 11);
2793 EXPECT_EQ(a.lane<1>(), 22);
2794 EXPECT_EQ(a.lane<2>(), 33);
2795 EXPECT_EQ(a.lane<3>(), 44);
2796 EXPECT_EQ(a.lane<4>(), 55);
2797 EXPECT_EQ(a.lane<5>(), 66);
2798 EXPECT_EQ(a.lane<6>(), 77);
2799 EXPECT_EQ(a.lane<7>(), 88);
2800 }
2801
2802 /** @brief Test vint8 zero. */
TEST(vint8,Zero)2803 TEST(vint8, Zero)
2804 {
2805 vint8 a = vint8::zero();
2806 EXPECT_EQ(a.lane<0>(), 0);
2807 EXPECT_EQ(a.lane<1>(), 0);
2808 EXPECT_EQ(a.lane<2>(), 0);
2809 EXPECT_EQ(a.lane<3>(), 0);
2810 EXPECT_EQ(a.lane<4>(), 0);
2811 EXPECT_EQ(a.lane<5>(), 0);
2812 EXPECT_EQ(a.lane<6>(), 0);
2813 EXPECT_EQ(a.lane<7>(), 0);
2814 }
2815
2816 /** @brief Test vint8 load1. */
TEST(vint8,Load1)2817 TEST(vint8, Load1)
2818 {
2819 int s = 42;
2820 vint8 a = vint8::load1(&s);
2821 EXPECT_EQ(a.lane<0>(), 42);
2822 EXPECT_EQ(a.lane<1>(), 42);
2823 EXPECT_EQ(a.lane<2>(), 42);
2824 EXPECT_EQ(a.lane<3>(), 42);
2825 EXPECT_EQ(a.lane<4>(), 42);
2826 EXPECT_EQ(a.lane<5>(), 42);
2827 EXPECT_EQ(a.lane<6>(), 42);
2828 EXPECT_EQ(a.lane<7>(), 42);
2829 }
2830
2831 /** @brief Test vint8 loada. */
TEST(vint8,Loada)2832 TEST(vint8, Loada)
2833 {
2834 vint8 a = vint8::loada(&(s32_data[0]));
2835 EXPECT_EQ(a.lane<0>(), 0);
2836 EXPECT_EQ(a.lane<1>(), 1);
2837 EXPECT_EQ(a.lane<2>(), 2);
2838 EXPECT_EQ(a.lane<3>(), 3);
2839 EXPECT_EQ(a.lane<4>(), 4);
2840 EXPECT_EQ(a.lane<5>(), 5);
2841 EXPECT_EQ(a.lane<6>(), 6);
2842 EXPECT_EQ(a.lane<7>(), 7);
2843 }
2844
2845 /** @brief Test vint8 lane_id. */
TEST(vint8,LaneID)2846 TEST(vint8, LaneID)
2847 {
2848 vint8 a = vint8::lane_id();
2849 EXPECT_EQ(a.lane<0>(), 0);
2850 EXPECT_EQ(a.lane<1>(), 1);
2851 EXPECT_EQ(a.lane<2>(), 2);
2852 EXPECT_EQ(a.lane<3>(), 3);
2853 EXPECT_EQ(a.lane<4>(), 4);
2854 EXPECT_EQ(a.lane<5>(), 5);
2855 EXPECT_EQ(a.lane<6>(), 6);
2856 EXPECT_EQ(a.lane<7>(), 7);
2857 }
2858
2859 /** @brief Test vint8 add. */
TEST(vint8,vadd)2860 TEST(vint8, vadd)
2861 {
2862 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2863 vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2864 a = a + b;
2865 EXPECT_EQ(a.lane<0>(), 1 + 2);
2866 EXPECT_EQ(a.lane<1>(), 2 + 3);
2867 EXPECT_EQ(a.lane<2>(), 3 + 4);
2868 EXPECT_EQ(a.lane<3>(), 4 + 5);
2869 EXPECT_EQ(a.lane<4>(), 1 + 2);
2870 EXPECT_EQ(a.lane<5>(), 2 + 3);
2871 EXPECT_EQ(a.lane<6>(), 3 + 4);
2872 EXPECT_EQ(a.lane<7>(), 4 + 5);
2873 }
2874
2875
2876 /** @brief Test vint8 self-add. */
TEST(vint8,vselfadd1)2877 TEST(vint8, vselfadd1)
2878 {
2879 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2880 vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2881 a += b;
2882
2883 EXPECT_EQ(a.lane<0>(), 1 + 2);
2884 EXPECT_EQ(a.lane<1>(), 2 + 3);
2885 EXPECT_EQ(a.lane<2>(), 3 + 4);
2886 EXPECT_EQ(a.lane<3>(), 4 + 5);
2887 EXPECT_EQ(a.lane<4>(), 1 + 2);
2888 EXPECT_EQ(a.lane<5>(), 2 + 3);
2889 EXPECT_EQ(a.lane<6>(), 3 + 4);
2890 EXPECT_EQ(a.lane<7>(), 4 + 5);
2891 }
2892
2893 /** @brief Test vint8 sub. */
TEST(vint8,vsub)2894 TEST(vint8, vsub)
2895 {
2896 vint8 a(1, 2, 4, 4, 1, 2, 4, 4);
2897 vint8 b(2, 3, 3, 5, 2, 3, 3, 5);
2898 a = a - b;
2899 EXPECT_EQ(a.lane<0>(), 1 - 2);
2900 EXPECT_EQ(a.lane<1>(), 2 - 3);
2901 EXPECT_EQ(a.lane<2>(), 4 - 3);
2902 EXPECT_EQ(a.lane<3>(), 4 - 5);
2903 EXPECT_EQ(a.lane<4>(), 1 - 2);
2904 EXPECT_EQ(a.lane<5>(), 2 - 3);
2905 EXPECT_EQ(a.lane<6>(), 4 - 3);
2906 EXPECT_EQ(a.lane<7>(), 4 - 5);
2907 }
2908
2909 /** @brief Test vint8 mul. */
TEST(vint8,vmul)2910 TEST(vint8, vmul)
2911 {
2912 vint8 a(1, 2, 4, 4, 1, 2, 4, 4);
2913 vint8 b(2, 3, 3, 5, 2, 3, 3, 5);
2914 a = a * b;
2915 EXPECT_EQ(a.lane<0>(), 1 * 2);
2916 EXPECT_EQ(a.lane<1>(), 2 * 3);
2917 EXPECT_EQ(a.lane<2>(), 4 * 3);
2918 EXPECT_EQ(a.lane<3>(), 4 * 5);
2919 EXPECT_EQ(a.lane<4>(), 1 * 2);
2920 EXPECT_EQ(a.lane<5>(), 2 * 3);
2921 EXPECT_EQ(a.lane<6>(), 4 * 3);
2922 EXPECT_EQ(a.lane<7>(), 4 * 5);
2923 }
2924
2925 /** @brief Test vint8 bitwise invert. */
TEST(vint8,bit_invert)2926 TEST(vint8, bit_invert)
2927 {
2928 vint8 a(-1, 0, 1, 2, -1, 0, 1, 2);
2929 a = ~a;
2930 EXPECT_EQ(a.lane<0>(), ~-1);
2931 EXPECT_EQ(a.lane<1>(), ~0);
2932 EXPECT_EQ(a.lane<2>(), ~1);
2933 EXPECT_EQ(a.lane<3>(), ~2);
2934 EXPECT_EQ(a.lane<4>(), ~-1);
2935 EXPECT_EQ(a.lane<5>(), ~0);
2936 EXPECT_EQ(a.lane<6>(), ~1);
2937 EXPECT_EQ(a.lane<7>(), ~2);
2938 }
2939
2940 /** @brief Test vint8 bitwise or. */
TEST(vint8,bit_vor)2941 TEST(vint8, bit_vor)
2942 {
2943 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2944 vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2945 a = a | b;
2946 EXPECT_EQ(a.lane<0>(), 3);
2947 EXPECT_EQ(a.lane<1>(), 3);
2948 EXPECT_EQ(a.lane<2>(), 7);
2949 EXPECT_EQ(a.lane<3>(), 5);
2950 EXPECT_EQ(a.lane<4>(), 3);
2951 EXPECT_EQ(a.lane<5>(), 3);
2952 EXPECT_EQ(a.lane<6>(), 7);
2953 EXPECT_EQ(a.lane<7>(), 5);
2954 }
2955
2956 /** @brief Test vint8 bitwise and. */
TEST(vint8,bit_vand)2957 TEST(vint8, bit_vand)
2958 {
2959 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2960 vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2961 a = a & b;
2962 EXPECT_EQ(a.lane<0>(), 0);
2963 EXPECT_EQ(a.lane<1>(), 2);
2964 EXPECT_EQ(a.lane<2>(), 0);
2965 EXPECT_EQ(a.lane<3>(), 4);
2966 EXPECT_EQ(a.lane<4>(), 0);
2967 EXPECT_EQ(a.lane<5>(), 2);
2968 EXPECT_EQ(a.lane<6>(), 0);
2969 EXPECT_EQ(a.lane<7>(), 4);
2970 }
2971
2972 /** @brief Test vint8 bitwise xor. */
TEST(vint8,bit_vxor)2973 TEST(vint8, bit_vxor)
2974 {
2975 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2976 vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2977 a = a ^ b;
2978 EXPECT_EQ(a.lane<0>(), 3);
2979 EXPECT_EQ(a.lane<1>(), 1);
2980 EXPECT_EQ(a.lane<2>(), 7);
2981 EXPECT_EQ(a.lane<3>(), 1);
2982 EXPECT_EQ(a.lane<4>(), 3);
2983 EXPECT_EQ(a.lane<5>(), 1);
2984 EXPECT_EQ(a.lane<6>(), 7);
2985 EXPECT_EQ(a.lane<7>(), 1);
2986 }
2987
2988 /** @brief Test vint8 ceq. */
TEST(vint8,ceq)2989 TEST(vint8, ceq)
2990 {
2991 vint8 a1(1, 2, 3, 4, 1, 2, 3, 4);
2992 vint8 b1(0, 1, 2, 3, 0, 1, 2, 3);
2993 vmask8 r1 = a1 == b1;
2994 EXPECT_EQ(0u, mask(r1));
2995 EXPECT_EQ(false, any(r1));
2996 EXPECT_EQ(false, all(r1));
2997
2998 vint8 a2(1, 2, 3, 4, 1, 2, 3, 4);
2999 vint8 b2(1, 0, 0, 0, 1, 0, 0, 0);
3000 vmask8 r2 = a2 == b2;
3001 EXPECT_EQ(0x11u, mask(r2));
3002 EXPECT_EQ(true, any(r2));
3003 EXPECT_EQ(false, all(r2));
3004
3005 vint8 a3(1, 2, 3, 4, 1, 2, 3, 4);
3006 vint8 b3(1, 0, 3, 0, 1, 0, 3, 0);
3007 vmask8 r3 = a3 == b3;
3008 EXPECT_EQ(0x55u, mask(r3));
3009 EXPECT_EQ(true, any(r3));
3010 EXPECT_EQ(false, all(r3));
3011
3012 vint8 a4(1, 2, 3, 4, 1, 2, 3, 4);
3013 vmask8 r4 = a4 == a4;
3014 EXPECT_EQ(0xFFu, mask(r4));
3015 EXPECT_EQ(true, any(r4));
3016 EXPECT_EQ(true, all(r4));
3017 }
3018
3019 /** @brief Test vint8 cne. */
TEST(vint8,cne)3020 TEST(vint8, cne)
3021 {
3022 vint8 a1(1, 2, 3, 4, 1, 2, 3, 4);
3023 vint8 b1(0, 1, 2, 3, 0, 1, 2, 3);
3024 vmask8 r1 = a1 != b1;
3025 EXPECT_EQ(0xFFu, mask(r1));
3026 EXPECT_EQ(true, any(r1));
3027 EXPECT_EQ(true, all(r1));
3028
3029 vint8 a2(1, 2, 3, 4, 1, 2, 3, 4);
3030 vint8 b2(1, 0, 0, 0, 1, 0, 0, 0);
3031 vmask8 r2 = a2 != b2;
3032 EXPECT_EQ(0xEEu, mask(r2));
3033 EXPECT_EQ(true, any(r2));
3034 EXPECT_EQ(false, all(r2));
3035
3036 vint8 a3(1, 2, 3, 4, 1, 2, 3, 4);
3037 vint8 b3(1, 0, 3, 0, 1, 0, 3, 0);
3038 vmask8 r3 = a3 != b3;
3039 EXPECT_EQ(0xAAu, mask(r3));
3040 EXPECT_EQ(true, any(r3));
3041 EXPECT_EQ(false, all(r3));
3042
3043 vint8 a4(1, 2, 3, 4, 1, 2, 3, 4);
3044 vmask8 r4 = a4 != a4;
3045 EXPECT_EQ(0u, mask(r4));
3046 EXPECT_EQ(false, any(r4));
3047 EXPECT_EQ(false, all(r4));
3048 }
3049
3050 /** @brief Test vint8 clt. */
TEST(vint8,clt)3051 TEST(vint8, clt)
3052 {
3053 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
3054 vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
3055 vmask8 r = a < b;
3056 EXPECT_EQ(0xAAu, mask(r));
3057 }
3058
3059 /** @brief Test vint8 cgt. */
TEST(vint8,cgt)3060 TEST(vint8, cgt)
3061 {
3062 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
3063 vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
3064 vmask8 r = a > b;
3065 EXPECT_EQ(0x11u, mask(r));
3066 }
3067
3068 /** @brief Test vint8 min. */
TEST(vint8,min)3069 TEST(vint8, min)
3070 {
3071 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
3072 vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
3073 vint8 r = min(a, b);
3074 EXPECT_EQ(r.lane<0>(), 0);
3075 EXPECT_EQ(r.lane<1>(), 2);
3076 EXPECT_EQ(r.lane<2>(), 3);
3077 EXPECT_EQ(r.lane<3>(), 4);
3078 EXPECT_EQ(r.lane<4>(), 0);
3079 EXPECT_EQ(r.lane<5>(), 2);
3080 EXPECT_EQ(r.lane<6>(), 3);
3081 EXPECT_EQ(r.lane<7>(), 4);
3082 }
3083
3084 /** @brief Test vint8 max. */
TEST(vint8,max)3085 TEST(vint8, max)
3086 {
3087 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
3088 vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
3089 vint8 r = max(a, b);
3090 EXPECT_EQ(r.lane<0>(), 1);
3091 EXPECT_EQ(r.lane<1>(), 3);
3092 EXPECT_EQ(r.lane<2>(), 3);
3093 EXPECT_EQ(r.lane<3>(), 5);
3094 EXPECT_EQ(r.lane<4>(), 1);
3095 EXPECT_EQ(r.lane<5>(), 3);
3096 EXPECT_EQ(r.lane<6>(), 3);
3097 EXPECT_EQ(r.lane<7>(), 5);
3098 }
3099
3100 /** @brief Test vint8 lsl. */
TEST(vint8,lsl)3101 TEST(vint8, lsl)
3102 {
3103 vint8 a(1, 2, 4, -4, 1, 2, 4, -4);
3104 a = lsl<0>(a);
3105 EXPECT_EQ(a.lane<0>(), 1);
3106 EXPECT_EQ(a.lane<1>(), 2);
3107 EXPECT_EQ(a.lane<2>(), 4);
3108 EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFFC));
3109 EXPECT_EQ(a.lane<4>(), 1);
3110 EXPECT_EQ(a.lane<5>(), 2);
3111 EXPECT_EQ(a.lane<6>(), 4);
3112 EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFFC));
3113
3114
3115 a = lsl<1>(a);
3116 EXPECT_EQ(a.lane<0>(), 2);
3117 EXPECT_EQ(a.lane<1>(), 4);
3118 EXPECT_EQ(a.lane<2>(), 8);
3119 EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFF8));
3120 EXPECT_EQ(a.lane<4>(), 2);
3121 EXPECT_EQ(a.lane<5>(), 4);
3122 EXPECT_EQ(a.lane<6>(), 8);
3123 EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFF8));
3124
3125 a = lsl<2>(a);
3126 EXPECT_EQ(a.lane<0>(), 8);
3127 EXPECT_EQ(a.lane<1>(), 16);
3128 EXPECT_EQ(a.lane<2>(), 32);
3129 EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFE0));
3130 EXPECT_EQ(a.lane<4>(), 8);
3131 EXPECT_EQ(a.lane<5>(), 16);
3132 EXPECT_EQ(a.lane<6>(), 32);
3133 EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFE0));
3134 }
3135
3136 /** @brief Test vint8 lsr. */
TEST(vint8,lsr)3137 TEST(vint8, lsr)
3138 {
3139 vint8 a(1, 2, 4, -4, 1, 2, 4, -4);
3140 a = lsr<0>(a);
3141 EXPECT_EQ(a.lane<0>(), 1);
3142 EXPECT_EQ(a.lane<1>(), 2);
3143 EXPECT_EQ(a.lane<2>(), 4);
3144 EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFFC));
3145 EXPECT_EQ(a.lane<4>(), 1);
3146 EXPECT_EQ(a.lane<5>(), 2);
3147 EXPECT_EQ(a.lane<6>(), 4);
3148 EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFFC));
3149
3150
3151 a = lsr<1>(a);
3152 EXPECT_EQ(a.lane<0>(), 0);
3153 EXPECT_EQ(a.lane<1>(), 1);
3154 EXPECT_EQ(a.lane<2>(), 2);
3155 EXPECT_EQ(a.lane<3>(), 0x7FFFFFFE);
3156 EXPECT_EQ(a.lane<4>(), 0);
3157 EXPECT_EQ(a.lane<5>(), 1);
3158 EXPECT_EQ(a.lane<6>(), 2);
3159 EXPECT_EQ(a.lane<7>(), 0x7FFFFFFE);
3160
3161 a = lsr<2>(a);
3162 EXPECT_EQ(a.lane<0>(), 0);
3163 EXPECT_EQ(a.lane<1>(), 0);
3164 EXPECT_EQ(a.lane<2>(), 0);
3165 EXPECT_EQ(a.lane<3>(), 0x1FFFFFFF);
3166 EXPECT_EQ(a.lane<4>(), 0);
3167 EXPECT_EQ(a.lane<5>(), 0);
3168 EXPECT_EQ(a.lane<6>(), 0);
3169 EXPECT_EQ(a.lane<7>(), 0x1FFFFFFF);
3170 }
3171
3172 /** @brief Test vint8 asr. */
TEST(vint8,asr)3173 TEST(vint8, asr)
3174 {
3175 vint8 a(1, 2, 4, -4, 1, 2, 4, -4);
3176 a = asr<0>(a);
3177 EXPECT_EQ(a.lane<0>(), 1);
3178 EXPECT_EQ(a.lane<1>(), 2);
3179 EXPECT_EQ(a.lane<2>(), 4);
3180 EXPECT_EQ(a.lane<3>(), -4);
3181 EXPECT_EQ(a.lane<4>(), 1);
3182 EXPECT_EQ(a.lane<5>(), 2);
3183 EXPECT_EQ(a.lane<6>(), 4);
3184 EXPECT_EQ(a.lane<7>(), -4);
3185
3186 a = asr<1>(a);
3187 EXPECT_EQ(a.lane<0>(), 0);
3188 EXPECT_EQ(a.lane<1>(), 1);
3189 EXPECT_EQ(a.lane<2>(), 2);
3190 EXPECT_EQ(a.lane<3>(), -2);
3191 EXPECT_EQ(a.lane<4>(), 0);
3192 EXPECT_EQ(a.lane<5>(), 1);
3193 EXPECT_EQ(a.lane<6>(), 2);
3194 EXPECT_EQ(a.lane<7>(), -2);
3195
3196 // Note - quirk of asr is that you will get "stuck" at -1
3197 a = asr<2>(a);
3198 EXPECT_EQ(a.lane<0>(), 0);
3199 EXPECT_EQ(a.lane<1>(), 0);
3200 EXPECT_EQ(a.lane<2>(), 0);
3201 EXPECT_EQ(a.lane<3>(), -1);
3202 EXPECT_EQ(a.lane<4>(), 0);
3203 EXPECT_EQ(a.lane<5>(), 0);
3204 EXPECT_EQ(a.lane<6>(), 0);
3205 EXPECT_EQ(a.lane<7>(), -1);
3206 }
3207
3208 /** @brief Test vint8 hmin. */
TEST(vint8,hmin)3209 TEST(vint8, hmin)
3210 {
3211 vint8 a1(1, 2, 1, 2, 1, 2, 1, 2);
3212 vint8 r1 = hmin(a1);
3213 EXPECT_EQ(r1.lane<0>(), 1);
3214 EXPECT_EQ(r1.lane<1>(), 1);
3215 EXPECT_EQ(r1.lane<2>(), 1);
3216 EXPECT_EQ(r1.lane<3>(), 1);
3217 EXPECT_EQ(r1.lane<4>(), 1);
3218 EXPECT_EQ(r1.lane<5>(), 1);
3219 EXPECT_EQ(r1.lane<6>(), 1);
3220 EXPECT_EQ(r1.lane<7>(), 1);
3221
3222 vint8 a2(1, 2, -1, 5, 1, 2, -1, 5);
3223 vint8 r2 = hmin(a2);
3224 EXPECT_EQ(r2.lane<0>(), -1);
3225 EXPECT_EQ(r2.lane<1>(), -1);
3226 EXPECT_EQ(r2.lane<2>(), -1);
3227 EXPECT_EQ(r2.lane<3>(), -1);
3228 EXPECT_EQ(r2.lane<4>(), -1);
3229 EXPECT_EQ(r2.lane<5>(), -1);
3230 EXPECT_EQ(r2.lane<6>(), -1);
3231 EXPECT_EQ(r2.lane<7>(), -1);
3232 }
3233
3234 /** @brief Test vint8 hmax. */
TEST(vint8,hmax)3235 TEST(vint8, hmax)
3236 {
3237 vint8 a1(1, 2, 1, 2, 1, 3, 1, 2);
3238 vint8 r1 = hmax(a1);
3239 EXPECT_EQ(r1.lane<0>(), 3);
3240 EXPECT_EQ(r1.lane<1>(), 3);
3241 EXPECT_EQ(r1.lane<2>(), 3);
3242 EXPECT_EQ(r1.lane<3>(), 3);
3243 EXPECT_EQ(r1.lane<4>(), 3);
3244 EXPECT_EQ(r1.lane<5>(), 3);
3245 EXPECT_EQ(r1.lane<6>(), 3);
3246 EXPECT_EQ(r1.lane<7>(), 3);
3247
3248 vint8 a2(1, 2, -1, 5, 1, 2, -1, 5);
3249 vint8 r2 = hmax(a2);
3250 EXPECT_EQ(r2.lane<0>(), 5);
3251 EXPECT_EQ(r2.lane<1>(), 5);
3252 EXPECT_EQ(r2.lane<2>(), 5);
3253 EXPECT_EQ(r2.lane<3>(), 5);
3254 EXPECT_EQ(r2.lane<4>(), 5);
3255 EXPECT_EQ(r2.lane<5>(), 5);
3256 EXPECT_EQ(r2.lane<6>(), 5);
3257 EXPECT_EQ(r2.lane<7>(), 5);
3258 }
3259
3260 /** @brief Test vint8 storea. */
TEST(vint8,storea)3261 TEST(vint8, storea)
3262 {
3263 alignas(32) int out[8];
3264 vint8 a(s32_data);
3265 storea(a, out);
3266 EXPECT_EQ(out[0], 0);
3267 EXPECT_EQ(out[1], 1);
3268 EXPECT_EQ(out[2], 2);
3269 EXPECT_EQ(out[3], 3);
3270 EXPECT_EQ(out[4], 4);
3271 EXPECT_EQ(out[5], 5);
3272 EXPECT_EQ(out[6], 6);
3273 EXPECT_EQ(out[7], 7);
3274 }
3275
3276 /** @brief Test vint8 store. */
TEST(vint8,store)3277 TEST(vint8, store)
3278 {
3279 alignas(32) int out[9];
3280 vint8 a(s32_data);
3281 store(a, out + 1);
3282 EXPECT_EQ(out[1], 0);
3283 EXPECT_EQ(out[2], 1);
3284 EXPECT_EQ(out[3], 2);
3285 EXPECT_EQ(out[4], 3);
3286 EXPECT_EQ(out[5], 4);
3287 EXPECT_EQ(out[6], 5);
3288 EXPECT_EQ(out[7], 6);
3289 EXPECT_EQ(out[8], 7);
3290 }
3291
3292 /** @brief Test vint8 store_nbytes. */
TEST(vint8,store_nbytes)3293 TEST(vint8, store_nbytes)
3294 {
3295 alignas(32) int out[2];
3296 vint8 a(42, 314, 75, 90, 42, 314, 75, 90);
3297 store_nbytes(a, reinterpret_cast<uint8_t*>(&out));
3298 EXPECT_EQ(out[0], 42);
3299 EXPECT_EQ(out[1], 314);
3300 }
3301
3302 /** @brief Test vint8 store_lanes_masked. */
TEST(vint8,store_lanes_masked)3303 TEST(vint8, store_lanes_masked)
3304 {
3305 int resulta[8] { 0 };
3306
3307 // Store nothing
3308 vmask8 mask1 = vint8(0) == vint8(1);
3309 vint8 data1 = vint8(1);
3310
3311 store_lanes_masked(resulta, data1, mask1);
3312 vint8 result1v(resulta);
3313 vint8 expect1v = vint8::zero();
3314 EXPECT_TRUE(all(result1v == expect1v));
3315
3316 // Store half
3317 vmask8 mask2 = vint8(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1);
3318 vint8 data2 = vint8(2);
3319
3320 store_lanes_masked(resulta, data2, mask2);
3321 vint8 result2v(resulta);
3322 vint8 expect2v = vint8(2, 2, 2, 2, 0, 0, 0, 0);
3323 EXPECT_TRUE(all(result2v == expect2v));
3324
3325 // Store all
3326 vmask8 mask3 = vint8(1) == vint8(1);
3327 vint8 data3 = vint8(3);
3328
3329 store_lanes_masked(resulta, data3, mask3);
3330 vint8 result3v(resulta);
3331 vint8 expect3v = vint8(3);
3332 EXPECT_TRUE(all(result3v == expect3v));
3333 }
3334
3335 /** @brief Test vint8 store_lanes_masked to unaligned address. */
TEST(vint8,store_lanes_masked_unaligned)3336 TEST(vint8, store_lanes_masked_unaligned)
3337 {
3338 int8_t resulta[33] { 0 };
3339
3340 // Store nothing
3341 vmask8 mask1 = vint8(0) == vint8(1);
3342 vint8 data1 = vint8(1);
3343
3344 store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data1, mask1);
3345 vint8 result1v(reinterpret_cast<int*>(resulta + 1));
3346 vint8 expect1v = vint8::zero();
3347 EXPECT_TRUE(all(result1v == expect1v));
3348
3349 // Store half
3350 vmask8 mask2 = vint8(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1);
3351 vint8 data2 = vint8(2);
3352
3353 store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data2, mask2);
3354 vint8 result2v(reinterpret_cast<int*>(resulta + 1));
3355 vint8 expect2v = vint8(2, 2, 2, 2, 0, 0, 0, 0);
3356 EXPECT_TRUE(all(result2v == expect2v));
3357
3358 // Store all
3359 vmask8 mask3 = vint8(1) == vint8(1);
3360 vint8 data3 = vint8(3);
3361
3362 store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data3, mask3);
3363 vint8 result3v(reinterpret_cast<int*>(resulta + 1));
3364 vint8 expect3v = vint8(3);
3365 EXPECT_TRUE(all(result3v == expect3v));
3366 }
3367
3368 /** @brief Test vint8 gatheri. */
TEST(vint8,gatheri)3369 TEST(vint8, gatheri)
3370 {
3371 vint8 indices(0, 4, 3, 2, 7, 4, 3, 2);
3372 vint8 r = gatheri(s32_data, indices);
3373 EXPECT_EQ(r.lane<0>(), 0);
3374 EXPECT_EQ(r.lane<1>(), 4);
3375 EXPECT_EQ(r.lane<2>(), 3);
3376 EXPECT_EQ(r.lane<3>(), 2);
3377 EXPECT_EQ(r.lane<4>(), 7);
3378 EXPECT_EQ(r.lane<5>(), 4);
3379 EXPECT_EQ(r.lane<6>(), 3);
3380 EXPECT_EQ(r.lane<7>(), 2);
3381 }
3382
3383 /** @brief Test vint8 pack_low_bytes. */
TEST(vint8,pack_low_bytes)3384 TEST(vint8, pack_low_bytes)
3385 {
3386 vint8 a(1, 2, 3, 4, 2, 3, 4, 5);
3387 vint8 r = pack_low_bytes(a);
3388 EXPECT_EQ(r.lane<0>(), (4 << 24) | (3 << 16) | (2 << 8) | (1 << 0));
3389 EXPECT_EQ(r.lane<1>(), (5 << 24) | (4 << 16) | (3 << 8) | (2 << 0));
3390 }
3391
3392 /** @brief Test vint8 select. */
TEST(vint8,select)3393 TEST(vint8, select)
3394 {
3395 vint8 m1(1, 1, 1, 1, 1, 1, 1, 1);
3396 vint8 m2(1, 2, 1, 2, 1, 2, 1, 2);
3397 vmask8 cond = m1 == m2;
3398
3399 vint8 a(1, 3, 3, 1, 1, 3, 3, 1);
3400 vint8 b(4, 2, 2, 4, 4, 2, 2, 4);
3401
3402 vint8 r1 = select(a, b, cond);
3403 EXPECT_EQ(r1.lane<0>(), 4);
3404 EXPECT_EQ(r1.lane<1>(), 3);
3405 EXPECT_EQ(r1.lane<2>(), 2);
3406 EXPECT_EQ(r1.lane<3>(), 1);
3407 EXPECT_EQ(r1.lane<4>(), 4);
3408 EXPECT_EQ(r1.lane<5>(), 3);
3409 EXPECT_EQ(r1.lane<6>(), 2);
3410 EXPECT_EQ(r1.lane<7>(), 1);
3411
3412 vint8 r2 = select(b, a, cond);
3413 EXPECT_EQ(r2.lane<0>(), 1);
3414 EXPECT_EQ(r2.lane<1>(), 2);
3415 EXPECT_EQ(r2.lane<2>(), 3);
3416 EXPECT_EQ(r2.lane<3>(), 4);
3417 EXPECT_EQ(r2.lane<4>(), 1);
3418 EXPECT_EQ(r2.lane<5>(), 2);
3419 EXPECT_EQ(r2.lane<6>(), 3);
3420 EXPECT_EQ(r2.lane<7>(), 4);
3421 }
3422
3423 // vmask8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3424
3425 /** @brief Test vmask8 scalar literal constructor. */
TEST(vmask8,scalar_literal_construct)3426 TEST(vmask8, scalar_literal_construct)
3427 {
3428 vfloat8 ma(0.0f);
3429 vfloat8 mb(1.0f);
3430
3431 vmask8 m1(true);
3432 vfloat8 r1 = select(ma, mb, m1);
3433 vmask8 rm1 = r1 == mb;
3434 EXPECT_EQ(all(rm1), true);
3435
3436 vmask8 m2(false);
3437 vfloat8 r2 = select(ma, mb, m2);
3438 vmask8 rm2 = r2 == mb;
3439 EXPECT_EQ(any(rm2), false);
3440 }
3441
3442 /** @brief Test vmask8 or. */
TEST(vmask8,or)3443 TEST(vmask8, or)
3444 {
3445 vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3446 vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3447 vmask8 m1 = m1a == m1b;
3448
3449 vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0);
3450 vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1);
3451 vmask8 m2 = m2a == m2b;
3452
3453 vmask8 r = m1 | m2;
3454 EXPECT_EQ(mask(r), 0xBBu);
3455 }
3456
3457 /** @brief Test vmask8 and. */
TEST(vmask8,and)3458 TEST(vmask8, and)
3459 {
3460 vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3461 vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3462 vmask8 m1 = m1a == m1b;
3463
3464 vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0);
3465 vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1);
3466 vmask8 m2 = m2a == m2b;
3467
3468 vmask8 r = m1 & m2;
3469 EXPECT_EQ(mask(r), 0x22u);
3470 }
3471
3472 /** @brief Test vmask8 xor. */
TEST(vmask8,xor)3473 TEST(vmask8, xor)
3474 {
3475 vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3476 vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3477 vmask8 m1 = m1a == m1b;
3478
3479 vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0);
3480 vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1);
3481 vmask8 m2 = m2a == m2b;
3482
3483 vmask8 r = m1 ^ m2;
3484 EXPECT_EQ(mask(r), 0x99u);
3485 }
3486
3487 /** @brief Test vmask8 not. */
TEST(vmask8,not)3488 TEST(vmask8, not)
3489 {
3490 vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3491 vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3492 vmask8 m1 = m1a == m1b;
3493 vmask8 r = ~m1;
3494 EXPECT_EQ(mask(r), 0x55u);
3495 }
3496
3497 /** @brief Test vint8 table permute. */
TEST(vint8,vtable_8bt_32bi_32entry)3498 TEST(vint8, vtable_8bt_32bi_32entry)
3499 {
3500 vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
3501 vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
3502
3503 vint8 table0p, table1p;
3504 vtable_prepare(table0, table1, table0p, table1p);
3505
3506 vint8 index(0, 7, 4, 15, 16, 20, 23, 31);
3507
3508 vint8 result = vtable_8bt_32bi(table0p, table1p, index);
3509
3510 EXPECT_EQ(result.lane<0>(), 3);
3511 EXPECT_EQ(result.lane<1>(), 4);
3512 EXPECT_EQ(result.lane<2>(), 7);
3513 EXPECT_EQ(result.lane<3>(), 12);
3514 EXPECT_EQ(result.lane<4>(), 19);
3515 EXPECT_EQ(result.lane<5>(), 23);
3516 EXPECT_EQ(result.lane<6>(), 20);
3517 EXPECT_EQ(result.lane<7>(), 28);
3518 }
3519
3520 /** @brief Test vint4 table permute. */
TEST(vint8,vtable_8bt_32bi_64entry)3521 TEST(vint8, vtable_8bt_32bi_64entry)
3522 {
3523 vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
3524 vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
3525 vint4 table2(0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f);
3526 vint4 table3(0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f);
3527
3528 vint8 table0p, table1p, table2p, table3p;
3529 vtable_prepare(table0, table1, table2, table3, table0p, table1p, table2p, table3p);
3530
3531 vint8 index(0, 7, 4, 15, 16, 20, 38, 63);
3532
3533 vint8 result = vtable_8bt_32bi(table0p, table1p, table2p, table3p, index);
3534
3535 EXPECT_EQ(result.lane<0>(), 3);
3536 EXPECT_EQ(result.lane<1>(), 4);
3537 EXPECT_EQ(result.lane<2>(), 7);
3538 EXPECT_EQ(result.lane<3>(), 12);
3539 EXPECT_EQ(result.lane<4>(), 19);
3540 EXPECT_EQ(result.lane<5>(), 23);
3541 EXPECT_EQ(result.lane<6>(), 37);
3542 EXPECT_EQ(result.lane<7>(), 60);
3543 }
3544
3545 #endif
3546
3547 }
3548