1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2020-2022 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Unit tests for the vectorized SIMD functionality.
20  */
21 
22 #include <limits>
23 
24 #include "gtest/gtest.h"
25 
26 #include "../astcenc_internal.h"
27 #include "../astcenc_vecmathlib.h"
28 
29 namespace astcenc
30 {
31 
32 // Misc utility tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
33 
round_down(unsigned int x)34 static unsigned int round_down(unsigned int x)
35 {
36 	unsigned int remainder = x % ASTCENC_SIMD_WIDTH;
37 	return x - remainder;
38 }
39 
round_up(unsigned int x)40 static unsigned int round_up(unsigned int x)
41 {
42 	unsigned int remainder = x % ASTCENC_SIMD_WIDTH;
43 	if (!remainder)
44 	{
45 		return x;
46 	}
47 
48 	return x - remainder + ASTCENC_SIMD_WIDTH;
49 }
50 
51 /** @brief Test VLA loop limit round down. */
TEST(misc,RoundDownVLA)52 TEST(misc, RoundDownVLA)
53 {
54 	// Static ones which are valid for all VLA widths
55 	EXPECT_EQ(round_down_to_simd_multiple_vla(0),  0u);
56 	EXPECT_EQ(round_down_to_simd_multiple_vla(8),  8u);
57 	EXPECT_EQ(round_down_to_simd_multiple_vla(16), 16u);
58 
59 	// Variable ones which depend on VLA width
60 	EXPECT_EQ(round_down_to_simd_multiple_vla(3),   round_down(3));
61 	EXPECT_EQ(round_down_to_simd_multiple_vla(5),   round_down(5));
62 	EXPECT_EQ(round_down_to_simd_multiple_vla(7),   round_down(7));
63 	EXPECT_EQ(round_down_to_simd_multiple_vla(231), round_down(231));
64 }
65 
66 /** @brief Test VLA loop limit round up. */
TEST(misc,RoundUpVLA)67 TEST(misc, RoundUpVLA)
68 {
69 	// Static ones which are valid for all VLA widths
70 	EXPECT_EQ(round_up_to_simd_multiple_vla(0),  0u);
71 	EXPECT_EQ(round_up_to_simd_multiple_vla(8),  8u);
72 	EXPECT_EQ(round_up_to_simd_multiple_vla(16), 16u);
73 
74 	// Variable ones which depend on VLA width
75 	EXPECT_EQ(round_up_to_simd_multiple_vla(3),   round_up(3));
76 	EXPECT_EQ(round_up_to_simd_multiple_vla(5),   round_up(5));
77 	EXPECT_EQ(round_up_to_simd_multiple_vla(7),   round_up(7));
78 	EXPECT_EQ(round_up_to_simd_multiple_vla(231), round_up(231));
79 }
80 
81 #if ASTCENC_SIMD_WIDTH == 1
82 
83 // VLA (1-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
84 
85 /** @brief Test VLA change_sign. */
TEST(vfloat,ChangeSign)86 TEST(vfloat, ChangeSign)
87 {
88 	vfloat a0(-1.0f);
89 	vfloat b0(-1.0f);
90 	vfloat r0 = change_sign(a0, b0);
91 	EXPECT_EQ(r0.lane<0>(), 1.0f);
92 
93 	vfloat a1( 1.0f);
94 	vfloat b1(-1.0f);
95 	vfloat r1 = change_sign(a1, b1);
96 	EXPECT_EQ(r1.lane<0>(), -1.0f);
97 
98 	vfloat a2(-3.12f);
99 	vfloat b2( 3.12f);
100 	vfloat r2 = change_sign(a2, b2);
101 	EXPECT_EQ(r2.lane<0>(), -3.12f);
102 
103 	vfloat a3( 3.12f);
104 	vfloat b3( 3.12f);
105 	vfloat r3 = change_sign(a3, b3);
106 	EXPECT_EQ(r3.lane<0>(), 3.12f);
107 }
108 
109 /** @brief Test VLA atan. */
TEST(vfloat,Atan)110 TEST(vfloat, Atan)
111 {
112 	vfloat a0(-0.15f);
113 	vfloat r0 = atan(a0);
114 	EXPECT_NEAR(r0.lane<0>(), -0.149061f, 0.005f);
115 
116 	vfloat a1(0.0f);
117 	vfloat r1 = atan(a1);
118 	EXPECT_NEAR(r1.lane<0>(),  0.000000f, 0.005f);
119 
120 	vfloat a2(0.9f);
121 	vfloat r2 = atan(a2);
122 	EXPECT_NEAR(r2.lane<0>(),  0.733616f, 0.005f);
123 
124 	vfloat a3(2.1f);
125 	vfloat r3 = atan(a3);
126 	EXPECT_NEAR(r3.lane<0>(),  1.123040f, 0.005f);
127 }
128 
129 /** @brief Test VLA atan2. */
TEST(vfloat,Atan2)130 TEST(vfloat, Atan2)
131 {
132 	vfloat a0(-0.15f);
133 	vfloat b0( 1.15f);
134 	vfloat r0 = atan2(a0, b0);
135 	EXPECT_NEAR(r0.lane<0>(), -0.129816f, 0.005f);
136 
137 	vfloat a1( 0.0f);
138 	vfloat b1(-3.0f);
139 	vfloat r1 = atan2(a1, b1);
140 	EXPECT_NEAR(r1.lane<0>(),  3.141592f, 0.005f);
141 
142 	vfloat a2( 0.9f);
143 	vfloat b2(-0.9f);
144 	vfloat r2 = atan2(a2, b2);
145 	EXPECT_NEAR(r2.lane<0>(),  2.360342f, 0.005f);
146 
147 	vfloat a3( 2.1f);
148 	vfloat b3( 1.1f);
149 	vfloat r3 = atan2(a3, b3);
150 	EXPECT_NEAR(r3.lane<0>(),  1.084357f, 0.005f);
151 }
152 
153 #elif ASTCENC_SIMD_WIDTH == 4
154 
155 // VLA (4-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
156 
157 /** @brief Test VLA change_sign. */
TEST(vfloat,ChangeSign)158 TEST(vfloat, ChangeSign)
159 {
160 	vfloat a(-1.0f,  1.0f, -3.12f, 3.12f);
161 	vfloat b(-1.0f, -1.0f,  3.12f, 3.12f);
162 	vfloat r = change_sign(a, b);
163 	EXPECT_EQ(r.lane<0>(),  1.0f);
164 	EXPECT_EQ(r.lane<1>(), -1.0f);
165 	EXPECT_EQ(r.lane<2>(), -3.12f);
166 	EXPECT_EQ(r.lane<3>(),  3.12f);
167 }
168 
169 /** @brief Test VLA atan. */
TEST(vfloat,Atan)170 TEST(vfloat, Atan)
171 {
172 	vfloat a(-0.15f, 0.0f, 0.9f, 2.1f);
173 	vfloat r = atan(a);
174 	EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f);
175 	EXPECT_NEAR(r.lane<1>(),  0.000000f, 0.005f);
176 	EXPECT_NEAR(r.lane<2>(),  0.733616f, 0.005f);
177 	EXPECT_NEAR(r.lane<3>(),  1.123040f, 0.005f);
178 }
179 
180 /** @brief Test VLA atan2. */
TEST(vfloat,Atan2)181 TEST(vfloat, Atan2)
182 {
183 	vfloat a(-0.15f, 0.0f, 0.9f, 2.1f);
184 	vfloat b(1.15f, -3.0f, -0.9f, 1.1f);
185 	vfloat r = atan2(a, b);
186 	EXPECT_NEAR(r.lane<0>(), -0.129816f, 0.005f);
187 	EXPECT_NEAR(r.lane<1>(),  3.141592f, 0.005f);
188 	EXPECT_NEAR(r.lane<2>(),  2.360342f, 0.005f);
189 	EXPECT_NEAR(r.lane<3>(),  1.084357f, 0.005f);
190 }
191 
192 #elif ASTCENC_SIMD_WIDTH == 8
193 
194 // VLA (8-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
195 
196 /** @brief Test VLA change_sign. */
TEST(vfloat,ChangeSign)197 TEST(vfloat, ChangeSign)
198 {
199 	vfloat a(-1.0f,  1.0f, -3.12f, 3.12f, -1.0f,  1.0f, -3.12f, 3.12f);
200 	vfloat b(-1.0f, -1.0f,  3.12f, 3.12f, -1.0f, -1.0f,  3.12f, 3.12f);
201 	vfloat r = change_sign(a, b);
202 	EXPECT_EQ(r.lane<0>(),  1.0f);
203 	EXPECT_EQ(r.lane<1>(), -1.0f);
204 	EXPECT_EQ(r.lane<2>(), -3.12f);
205 	EXPECT_EQ(r.lane<3>(),  3.12f);
206 	EXPECT_EQ(r.lane<4>(),  1.0f);
207 	EXPECT_EQ(r.lane<5>(), -1.0f);
208 	EXPECT_EQ(r.lane<6>(), -3.12f);
209 	EXPECT_EQ(r.lane<7>(),  3.12f);
210 }
211 
212 /** @brief Test VLA atan. */
TEST(vfloat,Atan)213 TEST(vfloat, Atan)
214 {
215 	vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f);
216 	vfloat r = atan(a);
217 	EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f);
218 	EXPECT_NEAR(r.lane<1>(),  0.000000f, 0.005f);
219 	EXPECT_NEAR(r.lane<2>(),  0.733616f, 0.005f);
220 	EXPECT_NEAR(r.lane<3>(),  1.123040f, 0.005f);
221 	EXPECT_NEAR(r.lane<4>(), -0.149061f, 0.005f);
222 	EXPECT_NEAR(r.lane<5>(),  0.000000f, 0.005f);
223 	EXPECT_NEAR(r.lane<6>(),  0.733616f, 0.005f);
224 	EXPECT_NEAR(r.lane<7>(),  1.123040f, 0.005f);
225 }
226 
227 /** @brief Test VLA atan2. */
TEST(vfloat,Atan2)228 TEST(vfloat, Atan2)
229 {
230 	vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f);
231 	vfloat b(1.15f, -3.0f, -0.9f, 1.1f, 1.15f, -3.0f, -0.9f, 1.1f);
232 	vfloat r = atan2(a, b);
233 	EXPECT_NEAR(r.lane<0>(), -0.129816f, 0.005f);
234 	EXPECT_NEAR(r.lane<1>(),  3.141592f, 0.005f);
235 	EXPECT_NEAR(r.lane<2>(),  2.360342f, 0.005f);
236 	EXPECT_NEAR(r.lane<3>(),  1.084357f, 0.005f);
237 	EXPECT_NEAR(r.lane<4>(), -0.129816f, 0.005f);
238 	EXPECT_NEAR(r.lane<5>(),  3.141592f, 0.005f);
239 	EXPECT_NEAR(r.lane<6>(),  2.360342f, 0.005f);
240 	EXPECT_NEAR(r.lane<7>(),  1.084357f, 0.005f);
241 }
242 
243 #endif
244 
245 static const float qnan = std::numeric_limits<float>::quiet_NaN();
246 
247 alignas(32) static const float f32_data[9] {
248 	0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f
249 };
250 
251 alignas(32) static const int s32_data[9] {
252 	0, 1, 2, 3, 4, 5 , 6, 7, 8
253 };
254 
255 alignas(32) static const uint8_t u8_data[9] {
256 	0, 1, 2, 3, 4, 5 , 6, 7, 8
257 };
258 
259 // VFLOAT4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
260 
261 /** @brief Test unaligned vfloat4 data load. */
TEST(vfloat4,UnalignedLoad)262 TEST(vfloat4, UnalignedLoad)
263 {
264 	vfloat4 a(&(f32_data[1]));
265 	EXPECT_EQ(a.lane<0>(), 1.0f);
266 	EXPECT_EQ(a.lane<1>(), 2.0f);
267 	EXPECT_EQ(a.lane<2>(), 3.0f);
268 	EXPECT_EQ(a.lane<3>(), 4.0f);
269 }
270 
271 /** @brief Test scalar duplicated vfloat4 load. */
TEST(vfloat4,ScalarDupLoad)272 TEST(vfloat4, ScalarDupLoad)
273 {
274 	vfloat4 a(1.1f);
275 	EXPECT_EQ(a.lane<0>(), 1.1f);
276 	EXPECT_EQ(a.lane<1>(), 1.1f);
277 	EXPECT_EQ(a.lane<2>(), 1.1f);
278 	EXPECT_EQ(a.lane<3>(), 1.1f);
279 }
280 
281 /** @brief Test scalar vfloat4 load. */
TEST(vfloat4,ScalarLoad)282 TEST(vfloat4, ScalarLoad)
283 {
284 	vfloat4 a(1.1f, 2.2f, 3.3f, 4.4f);
285 	EXPECT_EQ(a.lane<0>(), 1.1f);
286 	EXPECT_EQ(a.lane<1>(), 2.2f);
287 	EXPECT_EQ(a.lane<2>(), 3.3f);
288 	EXPECT_EQ(a.lane<3>(), 4.4f);
289 }
290 
291 /** @brief Test copy vfloat4 load. */
TEST(vfloat4,CopyLoad)292 TEST(vfloat4, CopyLoad)
293 {
294 	vfloat4 s(1.1f, 2.2f, 3.3f, 4.4f);
295 	vfloat4 a(s.m);
296 	EXPECT_EQ(a.lane<0>(), 1.1f);
297 	EXPECT_EQ(a.lane<1>(), 2.2f);
298 	EXPECT_EQ(a.lane<2>(), 3.3f);
299 	EXPECT_EQ(a.lane<3>(), 4.4f);
300 }
301 
302 /** @brief Test vfloat4 scalar lane set. */
TEST(vfloat4,SetLane)303 TEST(vfloat4, SetLane)
304 {
305 	vfloat4 a(0.0f);
306 
307 	a.set_lane<0>(1.0f);
308 	EXPECT_EQ(a.lane<0>(), 1.0f);
309 	EXPECT_EQ(a.lane<1>(), 0.0f);
310 	EXPECT_EQ(a.lane<2>(), 0.0f);
311 	EXPECT_EQ(a.lane<3>(), 0.0f);
312 
313 	a.set_lane<1>(2.0f);
314 	EXPECT_EQ(a.lane<0>(), 1.0f);
315 	EXPECT_EQ(a.lane<1>(), 2.0f);
316 	EXPECT_EQ(a.lane<2>(), 0.0f);
317 	EXPECT_EQ(a.lane<3>(), 0.0f);
318 
319 	a.set_lane<2>(3.0f);
320 	EXPECT_EQ(a.lane<0>(), 1.0f);
321 	EXPECT_EQ(a.lane<1>(), 2.0f);
322 	EXPECT_EQ(a.lane<2>(), 3.0f);
323 	EXPECT_EQ(a.lane<3>(), 0.0f);
324 
325 	a.set_lane<3>(4.0f);
326 	EXPECT_EQ(a.lane<0>(), 1.0f);
327 	EXPECT_EQ(a.lane<1>(), 2.0f);
328 	EXPECT_EQ(a.lane<2>(), 3.0f);
329 	EXPECT_EQ(a.lane<3>(), 4.0f);
330 }
331 
332 /** @brief Test vfloat4 zero. */
TEST(vfloat4,Zero)333 TEST(vfloat4, Zero)
334 {
335 	vfloat4 a = vfloat4::zero();
336 	EXPECT_EQ(a.lane<0>(), 0.0f);
337 	EXPECT_EQ(a.lane<1>(), 0.0f);
338 	EXPECT_EQ(a.lane<2>(), 0.0f);
339 	EXPECT_EQ(a.lane<3>(), 0.0f);
340 }
341 
342 /** @brief Test vfloat4 load1. */
TEST(vfloat4,Load1)343 TEST(vfloat4, Load1)
344 {
345 	float s = 3.14f;
346 	vfloat4 a = vfloat4::load1(&s);
347 	EXPECT_EQ(a.lane<0>(), 3.14f);
348 	EXPECT_EQ(a.lane<1>(), 3.14f);
349 	EXPECT_EQ(a.lane<2>(), 3.14f);
350 	EXPECT_EQ(a.lane<3>(), 3.14f);
351 }
352 
353 /** @brief Test vfloat4 loada. */
TEST(vfloat4,Loada)354 TEST(vfloat4, Loada)
355 {
356 	vfloat4 a = vfloat4::loada(&(f32_data[0]));
357 	EXPECT_EQ(a.lane<0>(), 0.0f);
358 	EXPECT_EQ(a.lane<1>(), 1.0f);
359 	EXPECT_EQ(a.lane<2>(), 2.0f);
360 	EXPECT_EQ(a.lane<3>(), 3.0f);
361 }
362 
363 /** @brief Test vfloat4 lane_id. */
TEST(vfloat4,LaneID)364 TEST(vfloat4, LaneID)
365 {
366 	vfloat4 a = vfloat4::lane_id();
367 	EXPECT_EQ(a.lane<0>(), 0.0f);
368 	EXPECT_EQ(a.lane<1>(), 1.0f);
369 	EXPECT_EQ(a.lane<2>(), 2.0f);
370 	EXPECT_EQ(a.lane<3>(), 3.0f);
371 }
372 
373 /** @brief Test vfloat4 swz to float4. */
TEST(vfloat4,swz4)374 TEST(vfloat4, swz4)
375 {
376 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
377 	vfloat4 r = a.swz<0, 3, 2, 1>();
378 	EXPECT_EQ(r.lane<0>(), 1.0f);
379 	EXPECT_EQ(r.lane<1>(), 4.0f);
380 	EXPECT_EQ(r.lane<2>(), 3.0f);
381 	EXPECT_EQ(r.lane<3>(), 2.0f);
382 
383 	r = a.swz<3, 1, 1, 0>();
384 	EXPECT_EQ(r.lane<0>(), 4.0f);
385 	EXPECT_EQ(r.lane<1>(), 2.0f);
386 	EXPECT_EQ(r.lane<2>(), 2.0f);
387 	EXPECT_EQ(r.lane<3>(), 1.0f);
388 }
389 
390 /** @brief Test vfloat4 swz to float3. */
TEST(vfloat4,swz3)391 TEST(vfloat4, swz3)
392 {
393 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
394 	vfloat4 r = a.swz<0, 3, 2>();
395 	EXPECT_EQ(r.lane<0>(), 1.0f);
396 	EXPECT_EQ(r.lane<1>(), 4.0f);
397 	EXPECT_EQ(r.lane<2>(), 3.0f);
398 	EXPECT_EQ(r.lane<3>(), 0.0f);
399 
400 	r = a.swz<3, 1, 1>();
401 	EXPECT_EQ(r.lane<0>(), 4.0f);
402 	EXPECT_EQ(r.lane<1>(), 2.0f);
403 	EXPECT_EQ(r.lane<2>(), 2.0f);
404 	EXPECT_EQ(r.lane<3>(), 0.0f);
405 }
406 
407 /** @brief Test vfloat4 swz to float2. */
TEST(vfloat4,swz2)408 TEST(vfloat4, swz2)
409 {
410 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
411 	vfloat4 r = a.swz<0, 3>();
412 	EXPECT_EQ(r.lane<0>(), 1.0f);
413 	EXPECT_EQ(r.lane<1>(), 4.0f);
414 
415 	r = a.swz<2, 1>();
416 	EXPECT_EQ(r.lane<0>(), 3.0f);
417 	EXPECT_EQ(r.lane<1>(), 2.0f);
418 }
419 
420 /** @brief Test vfloat4 add. */
TEST(vfloat4,vadd)421 TEST(vfloat4, vadd)
422 {
423 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
424 	vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
425 	a = a + b;
426 	EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f);
427 	EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f);
428 	EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f);
429 	EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f);
430 }
431 
432 /** @brief Test vfloat4 self-add. */
TEST(vfloat4,vselfadd1)433 TEST(vfloat4, vselfadd1)
434 {
435 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
436 	vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
437 
438 	// Test increment by another variable
439 	a += b;
440 	EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f);
441 	EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f);
442 	EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f);
443 	EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f);
444 
445 	// Test increment by an expression
446 	a += b + b;
447 	EXPECT_NEAR(a.lane<0>(), 1.0f + 0.3f, 0.001f);
448 	EXPECT_NEAR(a.lane<1>(), 2.0f + 0.6f, 0.001f);
449 	EXPECT_NEAR(a.lane<2>(), 3.0f + 0.9f, 0.001f);
450 	EXPECT_NEAR(a.lane<3>(), 4.0f + 1.2f, 0.001f);
451 }
452 
453 /** @brief Test vfloat4 sub. */
TEST(vfloat4,vsub)454 TEST(vfloat4, vsub)
455 {
456 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
457 	vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
458 	a = a - b;
459 	EXPECT_EQ(a.lane<0>(), 1.0f - 0.1f);
460 	EXPECT_EQ(a.lane<1>(), 2.0f - 0.2f);
461 	EXPECT_EQ(a.lane<2>(), 3.0f - 0.3f);
462 	EXPECT_EQ(a.lane<3>(), 4.0f - 0.4f);
463 }
464 
465 /** @brief Test vfloat4 mul. */
TEST(vfloat4,vmul)466 TEST(vfloat4, vmul)
467 {
468 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
469 	vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
470 	a = a * b;
471 	EXPECT_EQ(a.lane<0>(), 1.0f * 0.1f);
472 	EXPECT_EQ(a.lane<1>(), 2.0f * 0.2f);
473 	EXPECT_EQ(a.lane<2>(), 3.0f * 0.3f);
474 	EXPECT_EQ(a.lane<3>(), 4.0f * 0.4f);
475 }
476 
477 /** @brief Test vfloat4 mul. */
TEST(vfloat4,vsmul)478 TEST(vfloat4, vsmul)
479 {
480 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
481 	float b = 3.14f;
482 	a = a * b;
483 	EXPECT_EQ(a.lane<0>(), 1.0f * 3.14f);
484 	EXPECT_EQ(a.lane<1>(), 2.0f * 3.14f);
485 	EXPECT_EQ(a.lane<2>(), 3.0f * 3.14f);
486 	EXPECT_EQ(a.lane<3>(), 4.0f * 3.14f);
487 }
488 
489 /** @brief Test vfloat4 mul. */
TEST(vfloat4,svmul)490 TEST(vfloat4, svmul)
491 {
492 	float a = 3.14f;
493 	vfloat4 b(1.0f, 2.0f, 3.0f, 4.0f);
494 	b = a * b;
495 	EXPECT_EQ(b.lane<0>(), 3.14f * 1.0f);
496 	EXPECT_EQ(b.lane<1>(), 3.14f * 2.0f);
497 	EXPECT_EQ(b.lane<2>(), 3.14f * 3.0f);
498 	EXPECT_EQ(b.lane<3>(), 3.14f * 4.0f);
499 }
500 
501 /** @brief Test vfloat4 div. */
TEST(vfloat4,vdiv)502 TEST(vfloat4, vdiv)
503 {
504 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
505 	vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
506 	a = a / b;
507 	EXPECT_EQ(a.lane<0>(), 1.0f / 0.1f);
508 	EXPECT_EQ(a.lane<1>(), 2.0f / 0.2f);
509 	EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f);
510 	EXPECT_EQ(a.lane<3>(), 4.0f / 0.4f);
511 }
512 
513 /** @brief Test vfloat4 div. */
TEST(vfloat4,vsdiv)514 TEST(vfloat4, vsdiv)
515 {
516 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
517 	float b = 0.3f;
518 	a = a / b;
519 	EXPECT_EQ(a.lane<0>(), 1.0f / 0.3f);
520 	EXPECT_EQ(a.lane<1>(), 2.0f / 0.3f);
521 	EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f);
522 	EXPECT_EQ(a.lane<3>(), 4.0f / 0.3f);
523 }
524 
525 /** @brief Test vfloat4 div. */
TEST(vfloat4,svdiv)526 TEST(vfloat4, svdiv)
527 {
528 	float a = 3.0f;
529 	vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
530 	b = a / b;
531 	EXPECT_EQ(b.lane<0>(), 3.0f / 0.1f);
532 	EXPECT_EQ(b.lane<1>(), 3.0f / 0.2f);
533 	EXPECT_EQ(b.lane<2>(), 3.0f / 0.3f);
534 	EXPECT_EQ(b.lane<3>(), 3.0f / 0.4f);
535 }
536 
537 /** @brief Test vfloat4 ceq. */
TEST(vfloat4,ceq)538 TEST(vfloat4, ceq)
539 {
540 	vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
541 	vfloat4 b1(0.1f, 0.2f, 0.3f, 0.4f);
542 	vmask4 r1 = a1 == b1;
543 	EXPECT_EQ(0u, mask(r1));
544 	EXPECT_EQ(false, any(r1));
545 	EXPECT_EQ(false, all(r1));
546 
547 	vfloat4 a2(1.0f, 2.0f, 3.0f, 4.0f);
548 	vfloat4 b2(1.0f, 0.2f, 0.3f, 0.4f);
549 	vmask4 r2 = a2 == b2;
550 	EXPECT_EQ(0x1u, mask(r2));
551 	EXPECT_EQ(true, any(r2));
552 	EXPECT_EQ(false, all(r2));
553 
554 	vfloat4 a3(1.0f, 2.0f, 3.0f, 4.0f);
555 	vfloat4 b3(1.0f, 0.2f, 3.0f, 0.4f);
556 	vmask4 r3 = a3 == b3;
557 	EXPECT_EQ(0x5u, mask(r3));
558 	EXPECT_EQ(true, any(r3));
559 	EXPECT_EQ(false, all(r3));
560 
561 	vfloat4 a4(1.0f, 2.0f, 3.0f, 4.0f);
562 	vmask4 r4 = a4 == a4;
563 	EXPECT_EQ(0xFu, mask(r4));
564 	EXPECT_EQ(true, any(r4));
565 	EXPECT_EQ(true, all(r4));
566 }
567 
568 /** @brief Test vfloat4 cne. */
TEST(vfloat4,cne)569 TEST(vfloat4, cne)
570 {
571 	vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
572 	vfloat4 b1(0.1f, 0.2f, 0.3f, 0.4f);
573 	vmask4 r1 = a1 != b1;
574 	EXPECT_EQ(0xFu, mask(r1));
575 	EXPECT_EQ(true, any(r1));
576 	EXPECT_EQ(true, all(r1));
577 
578 	vfloat4 a2(1.0f, 2.0f, 3.0f, 4.0f);
579 	vfloat4 b2(1.0f, 0.2f, 0.3f, 0.4f);
580 	vmask4 r2 = a2 != b2;
581 	EXPECT_EQ(0xEu, mask(r2));
582 	EXPECT_EQ(true, any(r2));
583 	EXPECT_EQ(false, all(r2));
584 
585 	vfloat4 a3(1.0f, 2.0f, 3.0f, 4.0f);
586 	vfloat4 b3(1.0f, 0.2f, 3.0f, 0.4f);
587 	vmask4 r3 = a3 != b3;
588 	EXPECT_EQ(0xAu, mask(r3));
589 	EXPECT_EQ(true, any(r3));
590 	EXPECT_EQ(false, all(r3));
591 
592 	vfloat4 a4(1.0f, 2.0f, 3.0f, 4.0f);
593 	vmask4 r4 = a4 != a4;
594 	EXPECT_EQ(0u, mask(r4));
595 	EXPECT_EQ(false, any(r4));
596 	EXPECT_EQ(false, all(r4));
597 }
598 
599 /** @brief Test vfloat4 clt. */
TEST(vfloat4,clt)600 TEST(vfloat4, clt)
601 {
602 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
603 	vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
604 	vmask4 r = a < b;
605 	EXPECT_EQ(0xAu, mask(r));
606 }
607 
608 /** @brief Test vfloat4 cle. */
TEST(vfloat4,cle)609 TEST(vfloat4, cle)
610 {
611 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
612 	vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
613 	vmask4 r = a <= b;
614 	EXPECT_EQ(0xEu, mask(r));
615 }
616 
617 /** @brief Test vfloat4 cgt. */
TEST(vfloat4,cgt)618 TEST(vfloat4, cgt)
619 {
620 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
621 	vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
622 	vmask4 r = a > b;
623 	EXPECT_EQ(0x1u, mask(r));
624 }
625 
626 /** @brief Test vfloat4 cge. */
TEST(vfloat4,cge)627 TEST(vfloat4, cge)
628 {
629 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
630 	vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
631 	vmask4 r = a >= b;
632 	EXPECT_EQ(0x5u, mask(r));
633 }
634 
635 /** @brief Test vfloat4 min. */
TEST(vfloat4,min)636 TEST(vfloat4, min)
637 {
638 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
639 	vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
640 	vfloat4 r = min(a, b);
641 	EXPECT_EQ(r.lane<0>(), 0.9f);
642 	EXPECT_EQ(r.lane<1>(), 2.0f);
643 	EXPECT_EQ(r.lane<2>(), 3.0f);
644 	EXPECT_EQ(r.lane<3>(), 4.0f);
645 
646 	float c = 0.3f;
647 	r = min(a, c);
648 	EXPECT_EQ(r.lane<0>(), 0.3f);
649 	EXPECT_EQ(r.lane<1>(), 0.3f);
650 	EXPECT_EQ(r.lane<2>(), 0.3f);
651 	EXPECT_EQ(r.lane<3>(), 0.3f);
652 
653 	float d = 1.5f;
654 	r = min(a, d);
655 	EXPECT_EQ(r.lane<0>(), 1.0f);
656 	EXPECT_EQ(r.lane<1>(), 1.5f);
657 	EXPECT_EQ(r.lane<2>(), 1.5f);
658 	EXPECT_EQ(r.lane<3>(), 1.5f);
659 }
660 
661 /** @brief Test vfloat4 max. */
TEST(vfloat4,max)662 TEST(vfloat4, max)
663 {
664 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
665 	vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
666 	vfloat4 r = max(a, b);
667 	EXPECT_EQ(r.lane<0>(), 1.0f);
668 	EXPECT_EQ(r.lane<1>(), 2.1f);
669 	EXPECT_EQ(r.lane<2>(), 3.0f);
670 	EXPECT_EQ(r.lane<3>(), 4.1f);
671 
672 	float c = 4.3f;
673 	r = max(a, c);
674 	EXPECT_EQ(r.lane<0>(), 4.3f);
675 	EXPECT_EQ(r.lane<1>(), 4.3f);
676 	EXPECT_EQ(r.lane<2>(), 4.3f);
677 	EXPECT_EQ(r.lane<3>(), 4.3f);
678 
679 	float d = 1.5f;
680 	r = max(a, d);
681 	EXPECT_EQ(r.lane<0>(), 1.5f);
682 	EXPECT_EQ(r.lane<1>(), 2.0f);
683 	EXPECT_EQ(r.lane<2>(), 3.0f);
684 	EXPECT_EQ(r.lane<3>(), 4.0f);
685 }
686 
687 /** @brief Test vfloat4 clamp. */
TEST(vfloat4,clamp)688 TEST(vfloat4, clamp)
689 {
690 	vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
691 	vfloat4 r1 = clamp(2.1f, 3.0f, a1);
692 	EXPECT_EQ(r1.lane<0>(), 2.1f);
693 	EXPECT_EQ(r1.lane<1>(), 2.1f);
694 	EXPECT_EQ(r1.lane<2>(), 3.0f);
695 	EXPECT_EQ(r1.lane<3>(), 3.0f);
696 
697 	vfloat4 a2(1.0f, 2.0f, qnan, 4.0f);
698 	vfloat4 r2 = clamp(2.1f, 3.0f, a2);
699 	EXPECT_EQ(r2.lane<0>(), 2.1f);
700 	EXPECT_EQ(r2.lane<1>(), 2.1f);
701 	EXPECT_EQ(r2.lane<2>(), 2.1f);
702 	EXPECT_EQ(r2.lane<3>(), 3.0f);
703 }
704 
705 /** @brief Test vfloat4 clampz. */
TEST(vfloat4,clampz)706 TEST(vfloat4, clampz)
707 {
708 	vfloat4 a1(-1.0f, 0.0f, 0.1f, 4.0f);
709 	vfloat4 r1 = clampz(3.0f, a1);
710 	EXPECT_EQ(r1.lane<0>(), 0.0f);
711 	EXPECT_EQ(r1.lane<1>(), 0.0f);
712 	EXPECT_EQ(r1.lane<2>(), 0.1f);
713 	EXPECT_EQ(r1.lane<3>(), 3.0f);
714 
715 	vfloat4 a2(-1.0f, 0.0f, qnan, 4.0f);
716 	vfloat4 r2 = clampz(3.0f, a2);
717 	EXPECT_EQ(r2.lane<0>(), 0.0f);
718 	EXPECT_EQ(r2.lane<1>(), 0.0f);
719 	EXPECT_EQ(r2.lane<2>(), 0.0f);
720 	EXPECT_EQ(r2.lane<3>(), 3.0f);
721 }
722 
723 /** @brief Test vfloat4 clampz. */
TEST(vfloat4,clampzo)724 TEST(vfloat4, clampzo)
725 {
726 	vfloat4 a1(-1.0f, 0.0f, 0.1f, 4.0f);
727 	vfloat4 r1 = clampzo(a1);
728 	EXPECT_EQ(r1.lane<0>(), 0.0f);
729 	EXPECT_EQ(r1.lane<1>(), 0.0f);
730 	EXPECT_EQ(r1.lane<2>(), 0.1f);
731 	EXPECT_EQ(r1.lane<3>(), 1.0f);
732 
733 	vfloat4 a2(-1.0f, 0.0f, qnan, 4.0f);
734 	vfloat4 r2 = clampzo(a2);
735 	EXPECT_EQ(r2.lane<0>(), 0.0f);
736 	EXPECT_EQ(r2.lane<1>(), 0.0f);
737 	EXPECT_EQ(r2.lane<2>(), 0.0f);
738 	EXPECT_EQ(r2.lane<3>(), 1.0f);
739 }
740 
741 /** @brief Test vfloat4 abs. */
TEST(vfloat4,abs)742 TEST(vfloat4, abs)
743 {
744 	vfloat4 a(-1.0f, 0.0f, 0.1f, 4.0f);
745 	vfloat4 r = abs(a);
746 	EXPECT_EQ(r.lane<0>(), 1.0f);
747 	EXPECT_EQ(r.lane<1>(), 0.0f);
748 	EXPECT_EQ(r.lane<2>(), 0.1f);
749 	EXPECT_EQ(r.lane<3>(), 4.0f);
750 }
751 
752 /** @brief Test vfloat4 round. */
TEST(vfloat4,round)753 TEST(vfloat4, round)
754 {
755 	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
756 	vfloat4 r1 = round(a1);
757 	EXPECT_EQ(r1.lane<0>(), 1.0f);
758 	EXPECT_EQ(r1.lane<1>(), 2.0f);
759 	EXPECT_EQ(r1.lane<2>(), 2.0f);
760 	EXPECT_EQ(r1.lane<3>(), 4.0f);
761 
762 	vfloat4 a2(-2.5f, -2.5f, -3.5f, -3.5f);
763 	vfloat4 r2 = round(a2);
764 	EXPECT_EQ(r2.lane<0>(), -2.0f);
765 	EXPECT_EQ(r2.lane<2>(), -4.0f);
766 }
767 
768 /** @brief Test vfloat4 hmin. */
TEST(vfloat4,hmin)769 TEST(vfloat4, hmin)
770 {
771 	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
772 	vfloat4 r1 = hmin(a1);
773 	EXPECT_EQ(r1.lane<0>(), 1.1f);
774 	EXPECT_EQ(r1.lane<1>(), 1.1f);
775 	EXPECT_EQ(r1.lane<2>(), 1.1f);
776 	EXPECT_EQ(r1.lane<3>(), 1.1f);
777 
778 	vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
779 	vfloat4 r2 = hmin(a2);
780 	EXPECT_EQ(r2.lane<0>(), 0.2f);
781 	EXPECT_EQ(r2.lane<1>(), 0.2f);
782 	EXPECT_EQ(r2.lane<2>(), 0.2f);
783 	EXPECT_EQ(r2.lane<3>(), 0.2f);
784 }
785 
786 /** @brief Test vfloat4 hmin_s. */
TEST(vfloat4,hmin_s)787 TEST(vfloat4, hmin_s)
788 {
789 	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
790 	float r1 = hmin_s(a1);
791 	EXPECT_EQ(r1, 1.1f);
792 
793 	vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
794 	float r2 = hmin_s(a2);
795 	EXPECT_EQ(r2, 0.2f);
796 }
797 
798 /** @brief Test vfloat4 hmin_rgb_s. */
TEST(vfloat4,hmin_rgb_s)799 TEST(vfloat4, hmin_rgb_s)
800 {
801 	vfloat4 a1(1.1f, 1.5f, 1.6f, 0.2f);
802 	float r1 = hmin_rgb_s(a1);
803 	EXPECT_EQ(r1, 1.1f);
804 
805 	vfloat4 a2(1.5f, 0.9f, 1.6f, 1.2f);
806 	float r2 = hmin_rgb_s(a2);
807 	EXPECT_EQ(r2, 0.9f);
808 }
809 
810 /** @brief Test vfloat4 hmax. */
TEST(vfloat4,hmax)811 TEST(vfloat4, hmax)
812 {
813 	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
814 	vfloat4 r1 = hmax(a1);
815 	EXPECT_EQ(r1.lane<0>(), 4.0f);
816 	EXPECT_EQ(r1.lane<1>(), 4.0f);
817 	EXPECT_EQ(r1.lane<2>(), 4.0f);
818 	EXPECT_EQ(r1.lane<3>(), 4.0f);
819 
820 	vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
821 	vfloat4 r2 = hmax(a2);
822 	EXPECT_EQ(r2.lane<0>(), 1.6f);
823 	EXPECT_EQ(r2.lane<1>(), 1.6f);
824 	EXPECT_EQ(r2.lane<2>(), 1.6f);
825 	EXPECT_EQ(r2.lane<3>(), 1.6f);
826 }
827 
828 /** @brief Test vfloat4 hmax_s. */
TEST(vfloat4,hmax_s)829 TEST(vfloat4, hmax_s)
830 {
831 	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
832 	float r1 = hmax_s(a1);
833 	EXPECT_EQ(r1, 4.0f);
834 
835 	vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
836 	float r2 = hmax_s(a2);
837 	EXPECT_EQ(r2, 1.6f);
838 }
839 
840 /** @brief Test vfloat4 hadd_s. */
TEST(vfloat4,hadd_s)841 TEST(vfloat4, hadd_s)
842 {
843 	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
844 	float sum = 1.1f + 1.5f + 1.6f + 4.0f;
845 	float r = hadd_s(a1);
846 	EXPECT_NEAR(r, sum, 0.005f);
847 }
848 
849 /** @brief Test vfloat4 hadd_rgb_s. */
TEST(vfloat4,hadd_rgb_s)850 TEST(vfloat4, hadd_rgb_s)
851 {
852 	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
853 	float sum = 1.1f + 1.5f + 1.6f;
854 	float r = hadd_rgb_s(a1);
855 	EXPECT_NEAR(r, sum, 0.005f);
856 }
857 
858 /** @brief Test vfloat4 sqrt. */
TEST(vfloat4,sqrt)859 TEST(vfloat4, sqrt)
860 {
861 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
862 	vfloat4 r = sqrt(a);
863 	EXPECT_EQ(r.lane<0>(), std::sqrt(1.0f));
864 	EXPECT_EQ(r.lane<1>(), std::sqrt(2.0f));
865 	EXPECT_EQ(r.lane<2>(), std::sqrt(3.0f));
866 	EXPECT_EQ(r.lane<3>(), std::sqrt(4.0f));
867 }
868 
869 /** @brief Test vfloat4 select. */
TEST(vfloat4,select)870 TEST(vfloat4, select)
871 {
872 	vfloat4 m1(1.0f, 1.0f, 1.0f, 1.0f);
873 	vfloat4 m2(1.0f, 2.0f, 1.0f, 2.0f);
874 	vmask4 cond = m1 == m2;
875 
876 	vfloat4 a(1.0f, 3.0f, 3.0f, 1.0f);
877 	vfloat4 b(4.0f, 2.0f, 2.0f, 4.0f);
878 
879 	// Select in one direction
880 	vfloat4 r1 = select(a, b, cond);
881 	EXPECT_EQ(r1.lane<0>(), 4.0f);
882 	EXPECT_EQ(r1.lane<1>(), 3.0f);
883 	EXPECT_EQ(r1.lane<2>(), 2.0f);
884 	EXPECT_EQ(r1.lane<3>(), 1.0f);
885 
886 	// Select in the other
887 	vfloat4 r2 = select(b, a, cond);
888 	EXPECT_EQ(r2.lane<0>(), 1.0f);
889 	EXPECT_EQ(r2.lane<1>(), 2.0f);
890 	EXPECT_EQ(r2.lane<2>(), 3.0f);
891 	EXPECT_EQ(r2.lane<3>(), 4.0f);
892 }
893 
894 /** @brief Test vfloat4 select MSB only. */
TEST(vfloat4,select_msb)895 TEST(vfloat4, select_msb)
896 {
897 	int msb_set = static_cast<int>(0x80000000);
898 	vint4 msb(msb_set, 0, msb_set, 0);
899 	vmask4 cond(msb.m);
900 
901 	vfloat4 a(1.0f, 3.0f, 3.0f, 1.0f);
902 	vfloat4 b(4.0f, 2.0f, 2.0f, 4.0f);
903 
904 	// Select in one direction
905 	vfloat4 r1 = select_msb(a, b, cond);
906 	EXPECT_EQ(r1.lane<0>(), 4.0f);
907 	EXPECT_EQ(r1.lane<1>(), 3.0f);
908 	EXPECT_EQ(r1.lane<2>(), 2.0f);
909 	EXPECT_EQ(r1.lane<3>(), 1.0f);
910 
911 	// Select in the other
912 	vfloat4 r2 = select_msb(b, a, cond);
913 	EXPECT_EQ(r2.lane<0>(), 1.0f);
914 	EXPECT_EQ(r2.lane<1>(), 2.0f);
915 	EXPECT_EQ(r2.lane<2>(), 3.0f);
916 	EXPECT_EQ(r2.lane<3>(), 4.0f);
917 }
918 
919 /** @brief Test vfloat4 gatherf. */
TEST(vfloat4,gatherf)920 TEST(vfloat4, gatherf)
921 {
922 	vint4 indices(0, 4, 3, 2);
923 	vfloat4 r = gatherf(f32_data, indices);
924 	EXPECT_EQ(r.lane<0>(), 0.0f);
925 	EXPECT_EQ(r.lane<1>(), 4.0f);
926 	EXPECT_EQ(r.lane<2>(), 3.0f);
927 	EXPECT_EQ(r.lane<3>(), 2.0f);
928 }
929 
930 /** @brief Test vfloat4 storea. */
TEST(vfloat4,storea)931 TEST(vfloat4, storea)
932 {
933 	alignas(16) float out[4];
934 	vfloat4 a(f32_data);
935 	storea(a, out);
936 	EXPECT_EQ(out[0], 0.0f);
937 	EXPECT_EQ(out[1], 1.0f);
938 	EXPECT_EQ(out[2], 2.0f);
939 	EXPECT_EQ(out[3], 3.0f);
940 }
941 
942 /** @brief Test vfloat4 store. */
TEST(vfloat4,store)943 TEST(vfloat4, store)
944 {
945 	alignas(16) float out[5];
946 	vfloat4 a(f32_data);
947 	store(a, &(out[1]));
948 	EXPECT_EQ(out[1], 0.0f);
949 	EXPECT_EQ(out[2], 1.0f);
950 	EXPECT_EQ(out[3], 2.0f);
951 	EXPECT_EQ(out[4], 3.0f);
952 }
953 
954 /** @brief Test vfloat4 dot. */
TEST(vfloat4,dot)955 TEST(vfloat4, dot)
956 {
957 	vfloat4 a1(1.0f, 2.0f, 4.0f, 8.0f);
958 	vfloat4 b1(1.0f, 0.5f, 0.25f, 0.125f);
959 	vfloat4 r1 = dot(a1, b1);
960 	EXPECT_EQ(r1.lane<0>(), 4.0f);
961 	EXPECT_EQ(r1.lane<1>(), 4.0f);
962 	EXPECT_EQ(r1.lane<2>(), 4.0f);
963 	EXPECT_EQ(r1.lane<3>(), 4.0f);
964 
965 	// These values will fail to add to the same value if reassociated
966 	float l0 =          141.2540435791015625f;
967 	float l1 =      5345345.5000000000000000f;
968 	float l2 =       234234.7031250000000000f;
969 	float l3 = 124353454080.0000000000000000f;
970 
971 	vfloat4 a2(1.0f, 1.0f, 1.0f, 1.0f);
972 	vfloat4 b2(l0, l1, l2, l3);
973 	vfloat4 r2 = dot(a2, b2);
974 
975 	// Test that reassociation causes a failure with the numbers we chose
976 	EXPECT_FALSE(any(r2 == vfloat4(l0 + l1 + l2 + l3)));
977 
978 	// Test that the sum works, for the association pattern we want used
979 	EXPECT_TRUE(all(r2 == vfloat4((l0 + l2) + (l1 + l3))));
980 }
981 
982 /** @brief Test vfloat4 dot_s. */
TEST(vfloat4,dot_s)983 TEST(vfloat4, dot_s)
984 {
985 	vfloat4 a1(1.0f, 2.0f, 4.0f, 8.0f);
986 	vfloat4 b1(1.0f, 0.5f, 0.25f, 0.125f);
987 	float r1 = dot_s(a1, b1);
988 	EXPECT_EQ(r1, 4.0f);
989 
990 	// These values will fail to add to the same value if reassociated
991 	float l0 =          141.2540435791015625f;
992 	float l1 =      5345345.5000000000000000f;
993 	float l2 =       234234.7031250000000000f;
994 	float l3 = 124353454080.0000000000000000f;
995 
996 	vfloat4 a2(1.0f, 1.0f, 1.0f, 1.0f);
997 	vfloat4 b2(l0, l1, l2, l3);
998 	float r2 = dot_s(a2, b2);
999 
1000 	// Test that reassociation causes a failure with the numbers we chose
1001 	EXPECT_NE(r2, l0 + l1 + l2 + l3);
1002 
1003 	// Test that the sum works, for the association pattern we want used
1004 	EXPECT_EQ(r2, (l0 + l2) + (l1 + l3));
1005 }
1006 
1007 /** @brief Test vfloat4 dot3. */
TEST(vfloat4,dot3)1008 TEST(vfloat4, dot3)
1009 {
1010 	vfloat4 a(1.0f, 2.0f, 4.0f, 8.0f);
1011 	vfloat4 b(1.0f, 0.5f, 0.25f, 0.125f);
1012 	vfloat4 r = dot3(a, b);
1013 	EXPECT_EQ(r.lane<0>(), 3.0f);
1014 	EXPECT_EQ(r.lane<1>(), 3.0f);
1015 	EXPECT_EQ(r.lane<2>(), 3.0f);
1016 	EXPECT_EQ(r.lane<3>(), 0.0f);
1017 }
1018 
1019 /** @brief Test vfloat4 dot3_s. */
TEST(vfloat4,dot3_s)1020 TEST(vfloat4, dot3_s)
1021 {
1022 	vfloat4 a(1.0f, 2.0f, 4.0f, 8.0f);
1023 	vfloat4 b(1.0f, 0.5f, 0.25f, 0.125f);
1024 	float r = dot3_s(a, b);
1025 	EXPECT_EQ(r, 3.0f);
1026 }
1027 
1028 /** @brief Test vfloat4 normalize. */
TEST(vfloat4,normalize)1029 TEST(vfloat4, normalize)
1030 {
1031 	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
1032 	vfloat4 r = normalize(a);
1033 	EXPECT_NEAR(r.lane<0>(), 1.0f / astc::sqrt(30.0f), 0.0005f);
1034 	EXPECT_NEAR(r.lane<1>(), 2.0f / astc::sqrt(30.0f), 0.0005f);
1035 	EXPECT_NEAR(r.lane<2>(), 3.0f / astc::sqrt(30.0f), 0.0005f);
1036 	EXPECT_NEAR(r.lane<3>(), 4.0f / astc::sqrt(30.0f), 0.0005f);
1037 }
1038 
1039 /** @brief Test vfloat4 normalize_safe. */
TEST(vfloat4,normalize_safe)1040 TEST(vfloat4, normalize_safe)
1041 {
1042 	vfloat4 s(-1.0f, -1.0f, -1.0f, -1.0f);
1043 
1044 	vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
1045 	vfloat4 r1 = normalize_safe(a1, s);
1046 	EXPECT_NEAR(r1.lane<0>(), 1.0f / astc::sqrt(30.0f), 0.0005f);
1047 	EXPECT_NEAR(r1.lane<1>(), 2.0f / astc::sqrt(30.0f), 0.0005f);
1048 	EXPECT_NEAR(r1.lane<2>(), 3.0f / astc::sqrt(30.0f), 0.0005f);
1049 	EXPECT_NEAR(r1.lane<3>(), 4.0f / astc::sqrt(30.0f), 0.0005f);
1050 
1051 	vfloat4 a2(0.0f, 0.0f, 0.0f, 0.0f);
1052 	vfloat4 r2 = normalize_safe(a2, s);
1053 	EXPECT_EQ(r2.lane<0>(), -1.0f);
1054 	EXPECT_EQ(r2.lane<1>(), -1.0f);
1055 	EXPECT_EQ(r2.lane<2>(), -1.0f);
1056 	EXPECT_EQ(r2.lane<3>(), -1.0f);
1057 }
1058 
1059 /** @brief Test vfloat4 float_to_int. */
TEST(vfloat4,float_to_int)1060 TEST(vfloat4, float_to_int)
1061 {
1062 	vfloat4 a(1.1f, 1.5f, -1.6f, 4.0f);
1063 	vint4 r = float_to_int(a);
1064 	EXPECT_EQ(r.lane<0>(), 1);
1065 	EXPECT_EQ(r.lane<1>(), 1);
1066 	EXPECT_EQ(r.lane<2>(), -1);
1067 	EXPECT_EQ(r.lane<3>(), 4);
1068 }
1069 
1070 /** @brief Test vfloat4 round. */
TEST(vfloat4,float_to_int_rtn)1071 TEST(vfloat4, float_to_int_rtn)
1072 {
1073 	vfloat4 a(1.1f, 1.5f, 1.6f, 4.0f);
1074 	vint4 r = float_to_int_rtn(a);
1075 	EXPECT_EQ(r.lane<0>(), 1);
1076 	EXPECT_EQ(r.lane<1>(), 2);
1077 	EXPECT_EQ(r.lane<2>(), 2);
1078 	EXPECT_EQ(r.lane<3>(), 4);
1079 }
1080 
1081 /** @brief Test vfloat4 round. */
TEST(vfloat4,int_to_float)1082 TEST(vfloat4, int_to_float)
1083 {
1084 	vint4 a(1, 2, 3, 4);
1085 	vfloat4 r = int_to_float(a);
1086 	EXPECT_EQ(r.lane<0>(), 1.0f);
1087 	EXPECT_EQ(r.lane<1>(), 2.0f);
1088 	EXPECT_EQ(r.lane<2>(), 3.0f);
1089 	EXPECT_EQ(r.lane<3>(), 4.0f);
1090 }
1091 
1092 /** @brief Test vfloat4 float to fp16 conversion. */
TEST(vfloat4,float_to_float16)1093 TEST(vfloat4, float_to_float16)
1094 {
1095 	vfloat4 a(1.5, 234.5, 345345.0, qnan);
1096 	vint4 r = float_to_float16(a);
1097 
1098 	// Normal numbers
1099 	EXPECT_EQ(r.lane<0>(), 0x3E00);
1100 	EXPECT_EQ(r.lane<1>(), 0x5B54);
1101 
1102 	// Large numbers convert to infinity
1103 	EXPECT_EQ(r.lane<2>(), 0x7C00);
1104 
1105 	// NaN must convert to any valid NaN encoding
1106 	EXPECT_EQ((r.lane<3>() >> 10) & 0x1F, 0x1F); // Exponent must be all 1s
1107 	EXPECT_NE(r.lane<3>() & (0x3FF), 0);         // Mantissa must be non-zero
1108 }
1109 
1110 /** @brief Test float to fp16 conversion. */
TEST(sfloat,float_to_float16)1111 TEST(sfloat, float_to_float16)
1112 {
1113 	int r = float_to_float16(234.5);
1114 	EXPECT_EQ(r, 0x5B54);
1115 }
1116 
1117 /** @brief Test vfloat4 fp16 to float conversion. */
TEST(vfloat4,float16_to_float)1118 TEST(vfloat4, float16_to_float)
1119 {	vint4 a(0x3E00, 0x5B54, 0x7C00, 0xFFFF);
1120 	vfloat4 r = float16_to_float(a);
1121 
1122 	// Normal numbers
1123 	EXPECT_EQ(r.lane<0>(), 1.5);
1124 	EXPECT_EQ(r.lane<1>(), 234.5);
1125 
1126 	// Infinities must be preserved
1127 	EXPECT_NE(std::isinf(r.lane<2>()), 0);
1128 
1129 	// NaNs must be preserved
1130 	EXPECT_NE(std::isnan(r.lane<3>()), 0);
1131 }
1132 
1133 /** @brief Test fp16 to float conversion. */
TEST(sfloat,float16_to_float)1134 TEST(sfloat, float16_to_float)
1135 {
1136 	float r = float16_to_float(0x5B54);
1137 	EXPECT_EQ(r, 234.5);
1138 }
1139 
1140 // VINT4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1141 
1142 /** @brief Test unaligned vint4 data load. */
TEST(vint4,UnalignedLoad)1143 TEST(vint4, UnalignedLoad)
1144 {
1145 	vint4 a(&(s32_data[1]));
1146 	EXPECT_EQ(a.lane<0>(), 1);
1147 	EXPECT_EQ(a.lane<1>(), 2);
1148 	EXPECT_EQ(a.lane<2>(), 3);
1149 	EXPECT_EQ(a.lane<3>(), 4);
1150 }
1151 
1152 /** @brief Test unaligned vint4 data load. */
TEST(vint4,UnalignedLoad8)1153 TEST(vint4, UnalignedLoad8)
1154 {
1155 	vint4 a(&(u8_data[1]));
1156 	EXPECT_EQ(a.lane<0>(), 1);
1157 	EXPECT_EQ(a.lane<1>(), 2);
1158 	EXPECT_EQ(a.lane<2>(), 3);
1159 	EXPECT_EQ(a.lane<3>(), 4);
1160 }
1161 
1162 /** @brief Test scalar duplicated vint4 load. */
TEST(vint4,ScalarDupLoad)1163 TEST(vint4, ScalarDupLoad)
1164 {
1165 	vint4 a(42);
1166 	EXPECT_EQ(a.lane<0>(), 42);
1167 	EXPECT_EQ(a.lane<1>(), 42);
1168 	EXPECT_EQ(a.lane<2>(), 42);
1169 	EXPECT_EQ(a.lane<3>(), 42);
1170 }
1171 
1172 /** @brief Test scalar vint4 load. */
TEST(vint4,ScalarLoad)1173 TEST(vint4, ScalarLoad)
1174 {
1175 	vint4 a(11, 22, 33, 44);
1176 	EXPECT_EQ(a.lane<0>(), 11);
1177 	EXPECT_EQ(a.lane<1>(), 22);
1178 	EXPECT_EQ(a.lane<2>(), 33);
1179 	EXPECT_EQ(a.lane<3>(), 44);
1180 }
1181 
1182 /** @brief Test copy vint4 load. */
TEST(vint4,CopyLoad)1183 TEST(vint4, CopyLoad)
1184 {
1185 	vint4 s(11, 22, 33, 44);
1186 	vint4 a(s.m);
1187 	EXPECT_EQ(a.lane<0>(), 11);
1188 	EXPECT_EQ(a.lane<1>(), 22);
1189 	EXPECT_EQ(a.lane<2>(), 33);
1190 	EXPECT_EQ(a.lane<3>(), 44);
1191 }
1192 
1193 /** @brief Test vint4 scalar lane set. */
TEST(int4,SetLane)1194 TEST(int4, SetLane)
1195 {
1196 	vint4 a(0);
1197 
1198 	a.set_lane<0>(1);
1199 	EXPECT_EQ(a.lane<0>(), 1);
1200 	EXPECT_EQ(a.lane<1>(), 0);
1201 	EXPECT_EQ(a.lane<2>(), 0);
1202 	EXPECT_EQ(a.lane<3>(), 0);
1203 
1204 	a.set_lane<1>(2);
1205 	EXPECT_EQ(a.lane<0>(), 1);
1206 	EXPECT_EQ(a.lane<1>(), 2);
1207 	EXPECT_EQ(a.lane<2>(), 0);
1208 	EXPECT_EQ(a.lane<3>(), 0);
1209 
1210 	a.set_lane<2>(3);
1211 	EXPECT_EQ(a.lane<0>(), 1);
1212 	EXPECT_EQ(a.lane<1>(), 2);
1213 	EXPECT_EQ(a.lane<2>(), 3);
1214 	EXPECT_EQ(a.lane<3>(), 0);
1215 
1216 	a.set_lane<3>(4);
1217 	EXPECT_EQ(a.lane<0>(), 1);
1218 	EXPECT_EQ(a.lane<1>(), 2);
1219 	EXPECT_EQ(a.lane<2>(), 3);
1220 	EXPECT_EQ(a.lane<3>(), 4);
1221 }
1222 
1223 /** @brief Test vint4 zero. */
TEST(vint4,Zero)1224 TEST(vint4, Zero)
1225 {
1226 	vint4 a = vint4::zero();
1227 	EXPECT_EQ(a.lane<0>(), 0);
1228 	EXPECT_EQ(a.lane<1>(), 0);
1229 	EXPECT_EQ(a.lane<2>(), 0);
1230 	EXPECT_EQ(a.lane<3>(), 0);
1231 }
1232 
1233 /** @brief Test vint4 load1. */
TEST(vint4,Load1)1234 TEST(vint4, Load1)
1235 {
1236 	int s = 42;
1237 	vint4 a = vint4::load1(&s);
1238 	EXPECT_EQ(a.lane<0>(), 42);
1239 	EXPECT_EQ(a.lane<1>(), 42);
1240 	EXPECT_EQ(a.lane<2>(), 42);
1241 	EXPECT_EQ(a.lane<3>(), 42);
1242 }
1243 
1244 /** @brief Test vint4 loada. */
TEST(vint4,Loada)1245 TEST(vint4, Loada)
1246 {
1247 	vint4 a = vint4::loada(&(s32_data[0]));
1248 	EXPECT_EQ(a.lane<0>(), 0);
1249 	EXPECT_EQ(a.lane<1>(), 1);
1250 	EXPECT_EQ(a.lane<2>(), 2);
1251 	EXPECT_EQ(a.lane<3>(), 3);
1252 }
1253 
1254 /** @brief Test vint4 lane_id. */
TEST(vint4,LaneID)1255 TEST(vint4, LaneID)
1256 {
1257 	vint4 a = vint4::lane_id();
1258 	EXPECT_EQ(a.lane<0>(), 0);
1259 	EXPECT_EQ(a.lane<1>(), 1);
1260 	EXPECT_EQ(a.lane<2>(), 2);
1261 	EXPECT_EQ(a.lane<3>(), 3);
1262 }
1263 
1264 /** @brief Test vint4 add. */
TEST(vint4,vadd)1265 TEST(vint4, vadd)
1266 {
1267 	vint4 a(1, 2, 3, 4);
1268 	vint4 b(2, 3, 4, 5);
1269 	a = a + b;
1270 	EXPECT_EQ(a.lane<0>(), 1 + 2);
1271 	EXPECT_EQ(a.lane<1>(), 2 + 3);
1272 	EXPECT_EQ(a.lane<2>(), 3 + 4);
1273 	EXPECT_EQ(a.lane<3>(), 4 + 5);
1274 }
1275 
1276 /** @brief Test vint4 self-add. */
TEST(vint4,vselfadd)1277 TEST(vint4, vselfadd)
1278 {
1279 	vint4 a(1, 2, 3, 4);
1280 	vint4 b(2, 3, 4, 5);
1281 	a += b;
1282 
1283 	EXPECT_EQ(a.lane<0>(), 1 + 2);
1284 	EXPECT_EQ(a.lane<1>(), 2 + 3);
1285 	EXPECT_EQ(a.lane<2>(), 3 + 4);
1286 	EXPECT_EQ(a.lane<3>(), 4 + 5);
1287 }
1288 
1289 /** @brief Test vint4 add. */
TEST(vint4,vsadd)1290 TEST(vint4, vsadd)
1291 {
1292 	vint4 a(1, 2, 3, 4);
1293 	int b = 5;
1294 	a = a + b;
1295 	EXPECT_EQ(a.lane<0>(), 1 + 5);
1296 	EXPECT_EQ(a.lane<1>(), 2 + 5);
1297 	EXPECT_EQ(a.lane<2>(), 3 + 5);
1298 	EXPECT_EQ(a.lane<3>(), 4 + 5);
1299 }
1300 
1301 /** @brief Test vint4 sub. */
TEST(vint4,vsub)1302 TEST(vint4, vsub)
1303 {
1304 	vint4 a(1, 2, 4, 4);
1305 	vint4 b(2, 3, 3, 5);
1306 	a = a - b;
1307 	EXPECT_EQ(a.lane<0>(), 1 - 2);
1308 	EXPECT_EQ(a.lane<1>(), 2 - 3);
1309 	EXPECT_EQ(a.lane<2>(), 4 - 3);
1310 	EXPECT_EQ(a.lane<3>(), 4 - 5);
1311 }
1312 
1313 /** @brief Test vint4 sub. */
TEST(vint4,vssub)1314 TEST(vint4, vssub)
1315 {
1316 	vint4 a(1, 2, 4, 4);
1317 	int b = 5;
1318 	a = a - b;
1319 	EXPECT_EQ(a.lane<0>(), 1 - 5);
1320 	EXPECT_EQ(a.lane<1>(), 2 - 5);
1321 	EXPECT_EQ(a.lane<2>(), 4 - 5);
1322 	EXPECT_EQ(a.lane<3>(), 4 - 5);
1323 }
1324 
1325 /** @brief Test vint4 mul. */
TEST(vint4,vmul)1326 TEST(vint4, vmul)
1327 {
1328 	vint4 a(1, 2, 4, 4);
1329 	vint4 b(2, 3, 3, 5);
1330 	a = a * b;
1331 	EXPECT_EQ(a.lane<0>(), 1 * 2);
1332 	EXPECT_EQ(a.lane<1>(), 2 * 3);
1333 	EXPECT_EQ(a.lane<2>(), 4 * 3);
1334 	EXPECT_EQ(a.lane<3>(), 4 * 5);
1335 }
1336 
1337 /** @brief Test vint4 mul. */
TEST(vint4,vsmul)1338 TEST(vint4, vsmul)
1339 {
1340 	vint4 a(1, 2, 4, 4);
1341 	a = a * 3;
1342 	EXPECT_EQ(a.lane<0>(), 1 * 3);
1343 	EXPECT_EQ(a.lane<1>(), 2 * 3);
1344 	EXPECT_EQ(a.lane<2>(), 4 * 3);
1345 	EXPECT_EQ(a.lane<3>(), 4 * 3);
1346 
1347 	vint4 b(1, 2, -4, 4);
1348 	b = b * -3;
1349 	EXPECT_EQ(b.lane<0>(), 1 * -3);
1350 	EXPECT_EQ(b.lane<1>(), 2 * -3);
1351 	EXPECT_EQ(b.lane<2>(), -4 * -3);
1352 	EXPECT_EQ(b.lane<3>(), 4 * -3);
1353 }
1354 
1355 /** @brief Test vint4 bitwise invert. */
TEST(vint4,bit_invert)1356 TEST(vint4, bit_invert)
1357 {
1358 	vint4 a(-1, 0, 1, 2);
1359 	a = ~a;
1360 	EXPECT_EQ(a.lane<0>(), ~-1);
1361 	EXPECT_EQ(a.lane<1>(), ~0);
1362 	EXPECT_EQ(a.lane<2>(), ~1);
1363 	EXPECT_EQ(a.lane<3>(), ~2);
1364 }
1365 
1366 /** @brief Test vint4 bitwise or. */
TEST(vint4,bit_vor)1367 TEST(vint4, bit_vor)
1368 {
1369 	vint4 a(1, 2, 3, 4);
1370 	vint4 b(2, 3, 4, 5);
1371 	a = a | b;
1372 	EXPECT_EQ(a.lane<0>(), 3);
1373 	EXPECT_EQ(a.lane<1>(), 3);
1374 	EXPECT_EQ(a.lane<2>(), 7);
1375 	EXPECT_EQ(a.lane<3>(), 5);
1376 }
1377 
TEST(vint4,bit_vsor)1378 TEST(vint4, bit_vsor)
1379 {
1380 	vint4 a(1, 2, 3, 4);
1381 	int b = 2;
1382 	a = a | b;
1383 	EXPECT_EQ(a.lane<0>(), 3);
1384 	EXPECT_EQ(a.lane<1>(), 2);
1385 	EXPECT_EQ(a.lane<2>(), 3);
1386 	EXPECT_EQ(a.lane<3>(), 6);
1387 }
1388 
1389 /** @brief Test vint4 bitwise and. */
TEST(vint4,bit_vand)1390 TEST(vint4, bit_vand)
1391 {
1392 	vint4 a(1, 2, 3, 4);
1393 	vint4 b(2, 3, 4, 5);
1394 	a = a & b;
1395 	EXPECT_EQ(a.lane<0>(), 0);
1396 	EXPECT_EQ(a.lane<1>(), 2);
1397 	EXPECT_EQ(a.lane<2>(), 0);
1398 	EXPECT_EQ(a.lane<3>(), 4);
1399 }
1400 
1401 /** @brief Test vint4 bitwise and. */
TEST(vint4,bit_vsand)1402 TEST(vint4, bit_vsand)
1403 {
1404 	vint4 a(1, 2, 3, 4);
1405 	int b = 2;
1406 	a = a & b;
1407 	EXPECT_EQ(a.lane<0>(), 0);
1408 	EXPECT_EQ(a.lane<1>(), 2);
1409 	EXPECT_EQ(a.lane<2>(), 2);
1410 	EXPECT_EQ(a.lane<3>(), 0);
1411 }
1412 
1413 /** @brief Test vint4 bitwise xor. */
TEST(vint4,bit_vxor)1414 TEST(vint4, bit_vxor)
1415 {
1416 	vint4 a(1, 2, 3, 4);
1417 	vint4 b(2, 3, 4, 5);
1418 	a = a ^ b;
1419 	EXPECT_EQ(a.lane<0>(), 3);
1420 	EXPECT_EQ(a.lane<1>(), 1);
1421 	EXPECT_EQ(a.lane<2>(), 7);
1422 	EXPECT_EQ(a.lane<3>(), 1);
1423 }
1424 
1425 /** @brief Test vint4 bitwise xor. */
TEST(vint4,bit_vsxor)1426 TEST(vint4, bit_vsxor)
1427 {
1428 	vint4 a(1, 2, 3, 4);
1429 	int b = 2;
1430 	a = a ^ b;
1431 	EXPECT_EQ(a.lane<0>(), 3);
1432 	EXPECT_EQ(a.lane<1>(), 0);
1433 	EXPECT_EQ(a.lane<2>(), 1);
1434 	EXPECT_EQ(a.lane<3>(), 6);
1435 }
1436 
1437 /** @brief Test vint4 ceq. */
TEST(vint4,ceq)1438 TEST(vint4, ceq)
1439 {
1440 	vint4 a1(1, 2, 3, 4);
1441 	vint4 b1(0, 1, 2, 3);
1442 	vmask4 r1 = a1 == b1;
1443 	EXPECT_EQ(0u, mask(r1));
1444 	EXPECT_EQ(false, any(r1));
1445 	EXPECT_EQ(false, all(r1));
1446 
1447 	vint4 a2(1, 2, 3, 4);
1448 	vint4 b2(1, 0, 0, 0);
1449 	vmask4 r2 = a2 == b2;
1450 	EXPECT_EQ(0x1u, mask(r2));
1451 	EXPECT_EQ(true, any(r2));
1452 	EXPECT_EQ(false, all(r2));
1453 
1454 	vint4 a3(1, 2, 3, 4);
1455 	vint4 b3(1, 0, 3, 0);
1456 	vmask4 r3 = a3 == b3;
1457 	EXPECT_EQ(0x5u, mask(r3));
1458 	EXPECT_EQ(true, any(r3));
1459 	EXPECT_EQ(false, all(r3));
1460 
1461 	vint4 a4(1, 2, 3, 4);
1462 	vmask4 r4 = a4 == a4;
1463 	EXPECT_EQ(0xFu, mask(r4));
1464 	EXPECT_EQ(true, any(r4));
1465 	EXPECT_EQ(true, all(r4));
1466 }
1467 
1468 /** @brief Test vint4 cne. */
TEST(vint4,cne)1469 TEST(vint4, cne)
1470 {
1471 	vint4 a1(1, 2, 3, 4);
1472 	vint4 b1(0, 1, 2, 3);
1473 	vmask4 r1 = a1 != b1;
1474 	EXPECT_EQ(0xFu, mask(r1));
1475 	EXPECT_EQ(true, any(r1));
1476 	EXPECT_EQ(true, all(r1));
1477 
1478 	vint4 a2(1, 2, 3, 4);
1479 	vint4 b2(1, 0, 0, 0);
1480 	vmask4 r2 = a2 != b2;
1481 	EXPECT_EQ(0xEu, mask(r2));
1482 	EXPECT_EQ(true, any(r2));
1483 	EXPECT_EQ(false, all(r2));
1484 
1485 	vint4 a3(1, 2, 3, 4);
1486 	vint4 b3(1, 0, 3, 0);
1487 	vmask4 r3 = a3 != b3;
1488 	EXPECT_EQ(0xAu, mask(r3));
1489 	EXPECT_EQ(true, any(r3));
1490 	EXPECT_EQ(false, all(r3));
1491 
1492 	vint4 a4(1, 2, 3, 4);
1493 	vmask4 r4 = a4 != a4;
1494 	EXPECT_EQ(0u, mask(r4));
1495 	EXPECT_EQ(false, any(r4));
1496 	EXPECT_EQ(false, all(r4));
1497 }
1498 
1499 /** @brief Test vint4 clt. */
TEST(vint4,clt)1500 TEST(vint4, clt)
1501 {
1502 	vint4 a(1, 2, 3, 4);
1503 	vint4 b(0, 3, 3, 5);
1504 	vmask4 r = a < b;
1505 	EXPECT_EQ(0xAu, mask(r));
1506 }
1507 
1508 /** @brief Test vint4 cgt. */
TEST(vint4,cle)1509 TEST(vint4, cle)
1510 {
1511 	vint4 a(1, 2, 3, 4);
1512 	vint4 b(0, 3, 3, 5);
1513 	vmask4 r = a > b;
1514 	EXPECT_EQ(0x1u, mask(r));
1515 }
1516 
1517 /** @brief Test vint4 lsl. */
TEST(vint4,lsl)1518 TEST(vint4, lsl)
1519 {
1520 	vint4 a(1, 2, 4, 4);
1521 	a = lsl<0>(a);
1522 	EXPECT_EQ(a.lane<0>(), 1);
1523 	EXPECT_EQ(a.lane<1>(), 2);
1524 	EXPECT_EQ(a.lane<2>(), 4);
1525 	EXPECT_EQ(a.lane<3>(), 4);
1526 
1527 	a = lsl<1>(a);
1528 	EXPECT_EQ(a.lane<0>(), 2);
1529 	EXPECT_EQ(a.lane<1>(), 4);
1530 	EXPECT_EQ(a.lane<2>(), 8);
1531 	EXPECT_EQ(a.lane<3>(), 8);
1532 
1533 	a = lsl<2>(a);
1534 	EXPECT_EQ(a.lane<0>(), 8);
1535 	EXPECT_EQ(a.lane<1>(), 16);
1536 	EXPECT_EQ(a.lane<2>(), 32);
1537 	EXPECT_EQ(a.lane<3>(), 32);
1538 }
1539 
1540 /** @brief Test vint4 lsr. */
TEST(vint4,lsr)1541 TEST(vint4, lsr)
1542 {
1543 	vint4 a(1, 2, 4, -4);
1544 	a = lsr<0>(a);
1545 	EXPECT_EQ(a.lane<0>(),  1);
1546 	EXPECT_EQ(a.lane<1>(),  2);
1547 	EXPECT_EQ(a.lane<2>(),  4);
1548 	EXPECT_EQ(a.lane<3>(),  static_cast<int>(0xFFFFFFFC));
1549 
1550 	a = lsr<1>(a);
1551 	EXPECT_EQ(a.lane<0>(),  0);
1552 	EXPECT_EQ(a.lane<1>(),  1);
1553 	EXPECT_EQ(a.lane<2>(),  2);
1554 	EXPECT_EQ(a.lane<3>(),  0x7FFFFFFE);
1555 
1556 	a = lsr<2>(a);
1557 	EXPECT_EQ(a.lane<0>(),  0);
1558 	EXPECT_EQ(a.lane<1>(),  0);
1559 	EXPECT_EQ(a.lane<2>(),  0);
1560 	EXPECT_EQ(a.lane<3>(),  0x1FFFFFFF);
1561 }
1562 
1563 /** @brief Test vint4 asr. */
TEST(vint4,asr)1564 TEST(vint4, asr)
1565 {
1566 	vint4 a(1, 2, 4, -4);
1567 	a = asr<0>(a);
1568 	EXPECT_EQ(a.lane<0>(),  1);
1569 	EXPECT_EQ(a.lane<1>(),  2);
1570 	EXPECT_EQ(a.lane<2>(),  4);
1571 	EXPECT_EQ(a.lane<3>(), -4);
1572 
1573 	a = asr<1>(a);
1574 	EXPECT_EQ(a.lane<0>(),  0);
1575 	EXPECT_EQ(a.lane<1>(),  1);
1576 	EXPECT_EQ(a.lane<2>(),  2);
1577 	EXPECT_EQ(a.lane<3>(), -2);
1578 
1579 	// Note - quirk of asr is that you will get "stuck" at -1
1580 	a = asr<2>(a);
1581 	EXPECT_EQ(a.lane<0>(),  0);
1582 	EXPECT_EQ(a.lane<1>(),  0);
1583 	EXPECT_EQ(a.lane<2>(),  0);
1584 	EXPECT_EQ(a.lane<3>(), -1);
1585 }
1586 
1587 /** @brief Test vint4 min. */
TEST(vint4,min)1588 TEST(vint4, min)
1589 {
1590 	vint4 a(1, 2, 3, 4);
1591 	vint4 b(0, 3, 3, 5);
1592 	vint4 r = min(a, b);
1593 	EXPECT_EQ(r.lane<0>(), 0);
1594 	EXPECT_EQ(r.lane<1>(), 2);
1595 	EXPECT_EQ(r.lane<2>(), 3);
1596 	EXPECT_EQ(r.lane<3>(), 4);
1597 }
1598 
1599 /** @brief Test vint4 max. */
TEST(vint4,max)1600 TEST(vint4, max)
1601 {
1602 	vint4 a(1, 2, 3, 4);
1603 	vint4 b(0, 3, 3, 5);
1604 	vint4 r = max(a, b);
1605 	EXPECT_EQ(r.lane<0>(), 1);
1606 	EXPECT_EQ(r.lane<1>(), 3);
1607 	EXPECT_EQ(r.lane<2>(), 3);
1608 	EXPECT_EQ(r.lane<3>(), 5);
1609 }
1610 
1611 /** @brief Test vint4 clamp. */
TEST(vint4,clamp)1612 TEST(vint4, clamp)
1613 {
1614 	vint4 a(1, 2, 3, 4);
1615 	vint4 r = clamp(2, 3, a);
1616 	EXPECT_EQ(r.lane<0>(), 2);
1617 	EXPECT_EQ(r.lane<1>(), 2);
1618 	EXPECT_EQ(r.lane<2>(), 3);
1619 	EXPECT_EQ(r.lane<3>(), 3);
1620 }
1621 
1622 /** @brief Test vint4 hmin. */
TEST(vint4,hmin)1623 TEST(vint4, hmin)
1624 {
1625 	vint4 a1(1, 2, 1, 2);
1626 	vint4 r1 = hmin(a1);
1627 	EXPECT_EQ(r1.lane<0>(), 1);
1628 	EXPECT_EQ(r1.lane<1>(), 1);
1629 	EXPECT_EQ(r1.lane<2>(), 1);
1630 	EXPECT_EQ(r1.lane<3>(), 1);
1631 
1632 	vint4 a2(1, 2, -1, 5);
1633 	vint4 r2 = hmin(a2);
1634 	EXPECT_EQ(r2.lane<0>(), -1);
1635 	EXPECT_EQ(r2.lane<1>(), -1);
1636 	EXPECT_EQ(r2.lane<2>(), -1);
1637 	EXPECT_EQ(r2.lane<3>(), -1);
1638 }
1639 
1640 /** @brief Test vint4 hmax. */
TEST(vint4,hmax)1641 TEST(vint4, hmax)
1642 {
1643 	vint4 a1(1, 3, 1, 2);
1644 	vint4 r1 = hmax(a1);
1645 	EXPECT_EQ(r1.lane<0>(), 3);
1646 	EXPECT_EQ(r1.lane<1>(), 3);
1647 	EXPECT_EQ(r1.lane<2>(), 3);
1648 	EXPECT_EQ(r1.lane<3>(), 3);
1649 
1650 	vint4 a2(1, 2, -1, 5);
1651 	vint4 r2 = hmax(a2);
1652 	EXPECT_EQ(r2.lane<0>(), 5);
1653 	EXPECT_EQ(r2.lane<1>(), 5);
1654 	EXPECT_EQ(r2.lane<2>(), 5);
1655 	EXPECT_EQ(r2.lane<3>(), 5);
1656 }
1657 
1658 /** @brief Test vint4 hadd_s. */
TEST(vint4,hadd_s)1659 TEST(vint4, hadd_s)
1660 {
1661 	vint4 a1(1, 3, 5, 7);
1662 	int r1 = hadd_s(a1);
1663 	EXPECT_EQ(r1, 16);
1664 
1665 	vint4 a2(1, 2, -1, 5);
1666 	int r2 = hadd_s(a2);
1667 	EXPECT_EQ(r2, 7);
1668 }
1669 
1670 /** @brief Test vint4 hadd_rgb_s. */
TEST(vint4,hadd_rgb_s)1671 TEST(vint4, hadd_rgb_s)
1672 {
1673 	vint4 a1(1, 3, 5, 7);
1674 	int r1 = hadd_rgb_s(a1);
1675 	EXPECT_EQ(r1, 9);
1676 
1677 	vint4 a2(1, 2, -1, 5);
1678 	int r2 = hadd_rgb_s(a2);
1679 	EXPECT_EQ(r2, 2);
1680 }
1681 
1682 /** @brief Test vint4 clz. */
TEST(vint4,clz)1683 TEST(vint4, clz)
1684 {
1685 	int msb_set = static_cast<int>(0x80000000);
1686 	vint4 a1(msb_set, 0x40000000, 0x20000000, 0x10000000);
1687 	vint4 r1 = clz(a1);
1688 	EXPECT_EQ(r1.lane<0>(), 0);
1689 	EXPECT_EQ(r1.lane<1>(), 1);
1690 	EXPECT_EQ(r1.lane<2>(), 2);
1691 	EXPECT_EQ(r1.lane<3>(), 3);
1692 
1693 	vint4 a2(0x0, 0x1, 0x2, 0x4);
1694 	vint4 r2 = clz(a2);
1695 	EXPECT_EQ(r2.lane<0>(), 32);
1696 	EXPECT_EQ(r2.lane<1>(), 31);
1697 	EXPECT_EQ(r2.lane<2>(), 30);
1698 	EXPECT_EQ(r2.lane<3>(), 29);
1699 }
1700 
1701 /** @brief Test vint4 two_to_the_n. */
TEST(vint4,two_to_the_n)1702 TEST(vint4, two_to_the_n)
1703 {
1704 	vint4 a1(0, 1, 2, 3);
1705 	vint4 r1 = two_to_the_n(a1);
1706 	EXPECT_EQ(r1.lane<0>(), 1 << 0);
1707 	EXPECT_EQ(r1.lane<1>(), 1 << 1);
1708 	EXPECT_EQ(r1.lane<2>(), 1 << 2);
1709 	EXPECT_EQ(r1.lane<3>(), 1 << 3);
1710 
1711 	vint4 a2(27, 28, 29, 30);
1712 	vint4 r2 = two_to_the_n(a2);
1713 	EXPECT_EQ(r2.lane<0>(), 1 << 27);
1714 	EXPECT_EQ(r2.lane<1>(), 1 << 28);
1715 	EXPECT_EQ(r2.lane<2>(), 1 << 29);
1716 	EXPECT_EQ(r2.lane<3>(), 1 << 30);
1717 
1718 	// Shifts higher than 30 are not allowed as it overflows the int type;
1719 	// and results in implementation-defined behavior because of how we
1720 	// generate the shifted result in two_to_the_n().
1721 	// -  Shift by 31 shifts into sign bit
1722 	// -  Shift by 32 shifts off the end
1723 }
1724 
1725 /** @brief Test vint4 storea. */
TEST(vint4,storea)1726 TEST(vint4, storea)
1727 {
1728 	alignas(16) int out[4];
1729 	vint4 a(s32_data);
1730 	storea(a, out);
1731 	EXPECT_EQ(out[0], 0);
1732 	EXPECT_EQ(out[1], 1);
1733 	EXPECT_EQ(out[2], 2);
1734 	EXPECT_EQ(out[3], 3);
1735 }
1736 
1737 /** @brief Test vint4 store. */
TEST(vint4,store)1738 TEST(vint4, store)
1739 {
1740 	alignas(16) int out[5];
1741 	vint4 a(s32_data);
1742 	store(a, &(out[1]));
1743 	EXPECT_EQ(out[1], 0);
1744 	EXPECT_EQ(out[2], 1);
1745 	EXPECT_EQ(out[3], 2);
1746 	EXPECT_EQ(out[4], 3);
1747 }
1748 
1749 /** @brief Test vint4 store_nbytes. */
TEST(vint4,store_nbytes)1750 TEST(vint4, store_nbytes)
1751 {
1752 	alignas(16) int out;
1753 	vint4 a(42, 314, 75, 90);
1754 	store_nbytes(a, reinterpret_cast<uint8_t*>(&out));
1755 	EXPECT_EQ(out, 42);
1756 }
1757 
1758 /** @brief Test vint8 store_lanes_masked. */
TEST(vint4,store_lanes_masked)1759 TEST(vint4, store_lanes_masked)
1760 {
1761 	int resulta[4] { 0 };
1762 
1763 	// Store nothing
1764 	vmask4 mask1 = vint4(0) == vint4(1);
1765 	vint4 data1 = vint4(1);
1766 
1767 	store_lanes_masked(resulta, data1, mask1);
1768 	vint4 result1v(resulta);
1769 	vint4 expect1v = vint4::zero();
1770 	EXPECT_TRUE(all(result1v == expect1v));
1771 
1772 	// Store half
1773 	vmask4 mask2 = vint4(1, 1, 0, 0) == vint4(1);
1774 	vint4 data2 = vint4(2);
1775 
1776 	store_lanes_masked(resulta, data2, mask2);
1777 	vint4 result2v(resulta);
1778 	vint4 expect2v = vint4(2, 2, 0, 0);
1779 	EXPECT_TRUE(all(result2v == expect2v));
1780 
1781 	// Store all
1782 	vmask4 mask3 = vint4(1) == vint4(1);
1783 	vint4 data3 = vint4(3);
1784 
1785 	store_lanes_masked(resulta, data3, mask3);
1786 	vint4 result3v(resulta);
1787 	vint4 expect3v = vint4(3);
1788 	EXPECT_TRUE(all(result3v == expect3v));
1789 }
1790 
1791 /** @brief Test vint8 store_lanes_masked to unaligned address. */
TEST(vint4,store_lanes_masked_unaligned)1792 TEST(vint4, store_lanes_masked_unaligned)
1793 {
1794 	int8_t resulta[17] { 0 };
1795 
1796 	// Store nothing
1797 	vmask4 mask1 = vint4(0) == vint4(1);
1798 	vint4 data1 = vint4(1);
1799 
1800 	store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data1, mask1);
1801 	vint4 result1v(reinterpret_cast<int*>(resulta + 1));
1802 	vint4 expect1v = vint4::zero();
1803 	EXPECT_TRUE(all(result1v == expect1v));
1804 
1805 	// Store half
1806 	vmask4 mask2 = vint4(1, 1, 0, 0) == vint4(1);
1807 	vint4 data2 = vint4(2);
1808 
1809 	store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data2, mask2);
1810 	vint4 result2v(reinterpret_cast<int*>(resulta + 1));
1811 	vint4 expect2v = vint4(2, 2, 0, 0);
1812 	EXPECT_TRUE(all(result2v == expect2v));
1813 
1814 	// Store all
1815 	vmask4 mask3 = vint4(1) == vint4(1);
1816 	vint4 data3 = vint4(3);
1817 
1818 	store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data3, mask3);
1819 	vint4 result3v(reinterpret_cast<int*>(resulta + 1));
1820 	vint4 expect3v = vint4(3);
1821 	EXPECT_TRUE(all(result3v == expect3v));
1822 }
1823 
1824 /** @brief Test vint4 gatheri. */
TEST(vint4,gatheri)1825 TEST(vint4, gatheri)
1826 {
1827 	vint4 indices(0, 4, 3, 2);
1828 	vint4 r = gatheri(s32_data, indices);
1829 	EXPECT_EQ(r.lane<0>(), 0);
1830 	EXPECT_EQ(r.lane<1>(), 4);
1831 	EXPECT_EQ(r.lane<2>(), 3);
1832 	EXPECT_EQ(r.lane<3>(), 2);
1833 }
1834 
1835 /** @brief Test vint4 pack_low_bytes. */
TEST(vint4,pack_low_bytes)1836 TEST(vint4, pack_low_bytes)
1837 {
1838 	vint4 a(1, 2, 3, 4);
1839 	vint4 r = pack_low_bytes(a);
1840 	EXPECT_EQ(r.lane<0>(), (4 << 24) | (3 << 16) | (2  << 8) | (1 << 0));
1841 }
1842 
1843 /** @brief Test vint4 select. */
TEST(vint4,select)1844 TEST(vint4, select)
1845 {
1846 	vint4 m1(1, 1, 1, 1);
1847 	vint4 m2(1, 2, 1, 2);
1848 	vmask4 cond = m1 == m2;
1849 
1850 	vint4 a(1, 3, 3, 1);
1851 	vint4 b(4, 2, 2, 4);
1852 
1853 	vint4 r1 = select(a, b, cond);
1854 	EXPECT_EQ(r1.lane<0>(), 4);
1855 	EXPECT_EQ(r1.lane<1>(), 3);
1856 	EXPECT_EQ(r1.lane<2>(), 2);
1857 	EXPECT_EQ(r1.lane<3>(), 1);
1858 
1859 	vint4 r2 = select(b, a, cond);
1860 	EXPECT_EQ(r2.lane<0>(), 1);
1861 	EXPECT_EQ(r2.lane<1>(), 2);
1862 	EXPECT_EQ(r2.lane<2>(), 3);
1863 	EXPECT_EQ(r2.lane<3>(), 4);
1864 }
1865 
1866 // VMASK4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1867 /** @brief Test vmask4 scalar literal constructor. */
TEST(vmask4,scalar_literal_construct)1868 TEST(vmask4, scalar_literal_construct)
1869 {
1870 	vfloat4 m1a(0.0f, 0.0f, 0.0f, 0.0f);
1871 	vfloat4 m1b(1.0f, 1.0f, 1.0f, 1.0f);
1872 	vmask4 m1(true);
1873 
1874 	vfloat4 r = select(m1a, m1b, m1);
1875 
1876 	EXPECT_EQ(r.lane<0>(), 1.0f);
1877 	EXPECT_EQ(r.lane<1>(), 1.0f);
1878 	EXPECT_EQ(r.lane<2>(), 1.0f);
1879 	EXPECT_EQ(r.lane<3>(), 1.0f);
1880 
1881 	r = select(m1b, m1a, m1);
1882 
1883 	EXPECT_EQ(r.lane<0>(), 0.0f);
1884 	EXPECT_EQ(r.lane<1>(), 0.0f);
1885 	EXPECT_EQ(r.lane<2>(), 0.0f);
1886 	EXPECT_EQ(r.lane<3>(), 0.0f);
1887 }
1888 
1889 /** @brief Test vmask4 literal constructor. */
TEST(vmask4,literal_construct)1890 TEST(vmask4, literal_construct)
1891 {
1892 	vfloat4 m1a(0.0f, 0.0f, 0.0f, 0.0f);
1893 	vfloat4 m1b(1.0f, 1.0f, 1.0f, 1.0f);
1894 	vmask4 m1(true, false, true, false);
1895 
1896 	vfloat4 r = select(m1a, m1b, m1);
1897 
1898 	EXPECT_EQ(r.lane<0>(), 1.0f);
1899 	EXPECT_EQ(r.lane<1>(), 0.0f);
1900 	EXPECT_EQ(r.lane<2>(), 1.0f);
1901 	EXPECT_EQ(r.lane<3>(), 0.0f);
1902 }
1903 
1904 /** @brief Test vmask4 or. */
TEST(vmask4,or)1905 TEST(vmask4, or)
1906 {
1907 	vfloat4 m1a(0, 1, 0, 1);
1908 	vfloat4 m1b(1, 1, 1, 1);
1909 	vmask4 m1 = m1a == m1b;
1910 
1911 	vfloat4 m2a(1, 1, 0, 0);
1912 	vfloat4 m2b(1, 1, 1, 1);
1913 	vmask4 m2 = m2a == m2b;
1914 
1915 	vmask4 r = m1 | m2;
1916 	EXPECT_EQ(mask(r), 0xBu);
1917 }
1918 
1919 /** @brief Test vmask4 and. */
TEST(vmask4,and)1920 TEST(vmask4, and)
1921 {
1922 	vfloat4 m1a(0, 1, 0, 1);
1923 	vfloat4 m1b(1, 1, 1, 1);
1924 	vmask4 m1 = m1a == m1b;
1925 
1926 	vfloat4 m2a(1, 1, 0, 0);
1927 	vfloat4 m2b(1, 1, 1, 1);
1928 	vmask4 m2 = m2a == m2b;
1929 
1930 	vmask4 r = m1 & m2;
1931 	EXPECT_EQ(mask(r), 0x2u);
1932 }
1933 
1934 /** @brief Test vmask4 xor. */
TEST(vmask4,xor)1935 TEST(vmask4, xor)
1936 {
1937 	vfloat4 m1a(0, 1, 0, 1);
1938 	vfloat4 m1b(1, 1, 1, 1);
1939 	vmask4 m1 = m1a == m1b;
1940 
1941 	vfloat4 m2a(1, 1, 0, 0);
1942 	vfloat4 m2b(1, 1, 1, 1);
1943 	vmask4 m2 = m2a == m2b;
1944 
1945 	vmask4 r = m1 ^ m2;
1946 	EXPECT_EQ(mask(r), 0x9u);
1947 }
1948 
1949 /** @brief Test vmask4 not. */
TEST(vmask4,not)1950 TEST(vmask4, not)
1951 {
1952 	vfloat4 m1a(0, 1, 0, 1);
1953 	vfloat4 m1b(1, 1, 1, 1);
1954 	vmask4 m1 = m1a == m1b;
1955 	vmask4 r = ~m1;
1956 	EXPECT_EQ(mask(r), 0x5u);
1957 }
1958 
1959 /** @brief Test vint4 table permute. */
TEST(vint4,vtable_8bt_32bi_32entry)1960 TEST(vint4, vtable_8bt_32bi_32entry)
1961 {
1962 	vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
1963 	vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
1964 
1965 	vint4 table0p, table1p;
1966 	vtable_prepare(table0, table1, table0p, table1p);
1967 
1968 	vint4 index(0, 7, 4, 31);
1969 
1970 	vint4 result = vtable_8bt_32bi(table0p, table1p, index);
1971 
1972 	EXPECT_EQ(result.lane<0>(),  3);
1973 	EXPECT_EQ(result.lane<1>(),  4);
1974 	EXPECT_EQ(result.lane<2>(),  7);
1975 	EXPECT_EQ(result.lane<3>(), 28);
1976 }
1977 
1978 /** @brief Test vint4 table permute. */
TEST(vint4,vtable_8bt_32bi_64entry)1979 TEST(vint4, vtable_8bt_32bi_64entry)
1980 {
1981 	vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
1982 	vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
1983 	vint4 table2(0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f);
1984 	vint4 table3(0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f);
1985 
1986 	vint4 table0p, table1p, table2p, table3p;
1987 	vtable_prepare(table0, table1, table2, table3, table0p, table1p, table2p, table3p);
1988 
1989 	vint4 index(0, 7, 38, 63);
1990 
1991 	vint4 result = vtable_8bt_32bi(table0p, table1p, table2p, table3p, index);
1992 
1993 	EXPECT_EQ(result.lane<0>(),  3);
1994 	EXPECT_EQ(result.lane<1>(),  4);
1995 	EXPECT_EQ(result.lane<2>(), 37);
1996 	EXPECT_EQ(result.lane<3>(), 60);
1997 }
1998 
1999 /** @brief Test vint4 rgba byte interleave. */
TEST(vint4,interleave_rgba8)2000 TEST(vint4, interleave_rgba8)
2001 {
2002 	vint4 r(0x01, 0x11, 0x21, 0x31);
2003 	vint4 g(0x02, 0x12, 0x22, 0x32);
2004 	vint4 b(0x03, 0x13, 0x23, 0x33);
2005 	vint4 a(0x04, 0x14, 0x24, 0x34);
2006 
2007 	vint4 result = interleave_rgba8(r, g, b, a);
2008 
2009 	EXPECT_EQ(result.lane<0>(), 0x04030201);
2010 	EXPECT_EQ(result.lane<1>(), 0x14131211);
2011 	EXPECT_EQ(result.lane<2>(), 0x24232221);
2012 	EXPECT_EQ(result.lane<3>(), 0x34333231);
2013 }
2014 
2015 # if ASTCENC_SIMD_WIDTH == 8
2016 
2017 // VFLOAT8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2018 
2019 /** @brief Test unaligned vfloat8 data load. */
TEST(vfloat8,UnalignedLoad)2020 TEST(vfloat8, UnalignedLoad)
2021 {
2022 	vfloat8 a(&(f32_data[1]));
2023 	EXPECT_EQ(a.lane<0>(), 1.0f);
2024 	EXPECT_EQ(a.lane<1>(), 2.0f);
2025 	EXPECT_EQ(a.lane<2>(), 3.0f);
2026 	EXPECT_EQ(a.lane<3>(), 4.0f);
2027 	EXPECT_EQ(a.lane<4>(), 5.0f);
2028 	EXPECT_EQ(a.lane<5>(), 6.0f);
2029 	EXPECT_EQ(a.lane<6>(), 7.0f);
2030 	EXPECT_EQ(a.lane<7>(), 8.0f);
2031 }
2032 
2033 /** @brief Test scalar duplicated vfloat8 load. */
TEST(vfloat8,ScalarDupLoad)2034 TEST(vfloat8, ScalarDupLoad)
2035 {
2036 	vfloat8 a(1.1f);
2037 	EXPECT_EQ(a.lane<0>(), 1.1f);
2038 	EXPECT_EQ(a.lane<1>(), 1.1f);
2039 	EXPECT_EQ(a.lane<2>(), 1.1f);
2040 	EXPECT_EQ(a.lane<3>(), 1.1f);
2041 	EXPECT_EQ(a.lane<4>(), 1.1f);
2042 	EXPECT_EQ(a.lane<5>(), 1.1f);
2043 	EXPECT_EQ(a.lane<6>(), 1.1f);
2044 	EXPECT_EQ(a.lane<7>(), 1.1f);
2045 }
2046 
2047 /** @brief Test scalar vfloat8 load. */
TEST(vfloat8,ScalarLoad)2048 TEST(vfloat8, ScalarLoad)
2049 {
2050 	vfloat8 a(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
2051 	EXPECT_EQ(a.lane<0>(), 1.1f);
2052 	EXPECT_EQ(a.lane<1>(), 2.2f);
2053 	EXPECT_EQ(a.lane<2>(), 3.3f);
2054 	EXPECT_EQ(a.lane<3>(), 4.4f);
2055 	EXPECT_EQ(a.lane<4>(), 5.5f);
2056 	EXPECT_EQ(a.lane<5>(), 6.6f);
2057 	EXPECT_EQ(a.lane<6>(), 7.7f);
2058 	EXPECT_EQ(a.lane<7>(), 8.8f);
2059 }
2060 
2061 /** @brief Test copy vfloat8 load. */
TEST(vfloat8,CopyLoad)2062 TEST(vfloat8, CopyLoad)
2063 {
2064 	vfloat8 s(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
2065 	vfloat8 a(s.m);
2066 	EXPECT_EQ(a.lane<0>(), 1.1f);
2067 	EXPECT_EQ(a.lane<1>(), 2.2f);
2068 	EXPECT_EQ(a.lane<2>(), 3.3f);
2069 	EXPECT_EQ(a.lane<3>(), 4.4f);
2070 	EXPECT_EQ(a.lane<4>(), 5.5f);
2071 	EXPECT_EQ(a.lane<5>(), 6.6f);
2072 	EXPECT_EQ(a.lane<6>(), 7.7f);
2073 	EXPECT_EQ(a.lane<7>(), 8.8f);
2074 }
2075 
2076 /** @brief Test vfloat8 zero. */
TEST(vfloat8,Zero)2077 TEST(vfloat8, Zero)
2078 {
2079 	vfloat8 a = vfloat8::zero();
2080 	EXPECT_EQ(a.lane<0>(), 0.0f);
2081 	EXPECT_EQ(a.lane<1>(), 0.0f);
2082 	EXPECT_EQ(a.lane<2>(), 0.0f);
2083 	EXPECT_EQ(a.lane<3>(), 0.0f);
2084 	EXPECT_EQ(a.lane<4>(), 0.0f);
2085 	EXPECT_EQ(a.lane<5>(), 0.0f);
2086 	EXPECT_EQ(a.lane<6>(), 0.0f);
2087 	EXPECT_EQ(a.lane<7>(), 0.0f);
2088 }
2089 
2090 /** @brief Test vfloat8 load1. */
TEST(vfloat8,Load1)2091 TEST(vfloat8, Load1)
2092 {
2093 	float s = 3.14f;
2094 	vfloat8 a = vfloat8::load1(&s);
2095 	EXPECT_EQ(a.lane<0>(), 3.14f);
2096 	EXPECT_EQ(a.lane<1>(), 3.14f);
2097 	EXPECT_EQ(a.lane<2>(), 3.14f);
2098 	EXPECT_EQ(a.lane<3>(), 3.14f);
2099 	EXPECT_EQ(a.lane<4>(), 3.14f);
2100 	EXPECT_EQ(a.lane<5>(), 3.14f);
2101 	EXPECT_EQ(a.lane<6>(), 3.14f);
2102 	EXPECT_EQ(a.lane<7>(), 3.14f);
2103 }
2104 
2105 /** @brief Test vfloat8 loada. */
TEST(vfloat8,Loada)2106 TEST(vfloat8, Loada)
2107 {
2108 	vfloat8 a = vfloat8::loada(&(f32_data[0]));
2109 	EXPECT_EQ(a.lane<0>(), 0.0f);
2110 	EXPECT_EQ(a.lane<1>(), 1.0f);
2111 	EXPECT_EQ(a.lane<2>(), 2.0f);
2112 	EXPECT_EQ(a.lane<3>(), 3.0f);
2113 	EXPECT_EQ(a.lane<4>(), 4.0f);
2114 	EXPECT_EQ(a.lane<5>(), 5.0f);
2115 	EXPECT_EQ(a.lane<6>(), 6.0f);
2116 	EXPECT_EQ(a.lane<7>(), 7.0f);
2117 }
2118 
2119 /** @brief Test vfloat8 lane_id. */
TEST(vfloat8,LaneID)2120 TEST(vfloat8, LaneID)
2121 {
2122 	vfloat8 a = vfloat8::lane_id();
2123 	EXPECT_EQ(a.lane<0>(), 0.0f);
2124 	EXPECT_EQ(a.lane<1>(), 1.0f);
2125 	EXPECT_EQ(a.lane<2>(), 2.0f);
2126 	EXPECT_EQ(a.lane<3>(), 3.0f);
2127 	EXPECT_EQ(a.lane<4>(), 4.0f);
2128 	EXPECT_EQ(a.lane<5>(), 5.0f);
2129 	EXPECT_EQ(a.lane<6>(), 6.0f);
2130 	EXPECT_EQ(a.lane<7>(), 7.0f);
2131 }
2132 
2133 /** @brief Test vfloat8 add. */
TEST(vfloat8,vadd)2134 TEST(vfloat8, vadd)
2135 {
2136 	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2137 	vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2138 	a = a + b;
2139 	EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f);
2140 	EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f);
2141 	EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f);
2142 	EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f);
2143 	EXPECT_EQ(a.lane<4>(), 5.0f + 0.5f);
2144 	EXPECT_EQ(a.lane<5>(), 6.0f + 0.6f);
2145 	EXPECT_EQ(a.lane<6>(), 7.0f + 0.7f);
2146 	EXPECT_EQ(a.lane<7>(), 8.0f + 0.8f);
2147 }
2148 
2149 /** @brief Test vfloat8 sub. */
TEST(vfloat8,vsub)2150 TEST(vfloat8, vsub)
2151 {
2152 	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2153 	vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2154 	a = a - b;
2155 	EXPECT_EQ(a.lane<0>(), 1.0f - 0.1f);
2156 	EXPECT_EQ(a.lane<1>(), 2.0f - 0.2f);
2157 	EXPECT_EQ(a.lane<2>(), 3.0f - 0.3f);
2158 	EXPECT_EQ(a.lane<3>(), 4.0f - 0.4f);
2159 	EXPECT_EQ(a.lane<4>(), 5.0f - 0.5f);
2160 	EXPECT_EQ(a.lane<5>(), 6.0f - 0.6f);
2161 	EXPECT_EQ(a.lane<6>(), 7.0f - 0.7f);
2162 	EXPECT_EQ(a.lane<7>(), 8.0f - 0.8f);
2163 }
2164 
2165 /** @brief Test vfloat8 mul. */
TEST(vfloat8,vmul)2166 TEST(vfloat8, vmul)
2167 {
2168 	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2169 	vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2170 	a = a * b;
2171 	EXPECT_EQ(a.lane<0>(), 1.0f * 0.1f);
2172 	EXPECT_EQ(a.lane<1>(), 2.0f * 0.2f);
2173 	EXPECT_EQ(a.lane<2>(), 3.0f * 0.3f);
2174 	EXPECT_EQ(a.lane<3>(), 4.0f * 0.4f);
2175 	EXPECT_EQ(a.lane<4>(), 5.0f * 0.5f);
2176 	EXPECT_EQ(a.lane<5>(), 6.0f * 0.6f);
2177 	EXPECT_EQ(a.lane<6>(), 7.0f * 0.7f);
2178 	EXPECT_EQ(a.lane<7>(), 8.0f * 0.8f);
2179 }
2180 
2181 /** @brief Test vfloat8 mul. */
TEST(vfloat8,vsmul)2182 TEST(vfloat8, vsmul)
2183 {
2184 	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2185 	float b = 3.14f;
2186 	a = a * b;
2187 	EXPECT_EQ(a.lane<0>(), 1.0f * 3.14f);
2188 	EXPECT_EQ(a.lane<1>(), 2.0f * 3.14f);
2189 	EXPECT_EQ(a.lane<2>(), 3.0f * 3.14f);
2190 	EXPECT_EQ(a.lane<3>(), 4.0f * 3.14f);
2191 	EXPECT_EQ(a.lane<4>(), 5.0f * 3.14f);
2192 	EXPECT_EQ(a.lane<5>(), 6.0f * 3.14f);
2193 	EXPECT_EQ(a.lane<6>(), 7.0f * 3.14f);
2194 	EXPECT_EQ(a.lane<7>(), 8.0f * 3.14f);
2195 }
2196 
2197 /** @brief Test vfloat8 mul. */
TEST(vfloat8,svmul)2198 TEST(vfloat8, svmul)
2199 {
2200 	float a = 3.14f;
2201 	vfloat8 b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2202 	b = a * b;
2203 	EXPECT_EQ(b.lane<0>(), 3.14f * 1.0f);
2204 	EXPECT_EQ(b.lane<1>(), 3.14f * 2.0f);
2205 	EXPECT_EQ(b.lane<2>(), 3.14f * 3.0f);
2206 	EXPECT_EQ(b.lane<3>(), 3.14f * 4.0f);
2207 	EXPECT_EQ(b.lane<4>(), 3.14f * 5.0f);
2208 	EXPECT_EQ(b.lane<5>(), 3.14f * 6.0f);
2209 	EXPECT_EQ(b.lane<6>(), 3.14f * 7.0f);
2210 	EXPECT_EQ(b.lane<7>(), 3.14f * 8.0f);
2211 }
2212 
2213 /** @brief Test vfloat8 div. */
TEST(vfloat8,vdiv)2214 TEST(vfloat8, vdiv)
2215 {
2216 	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2217 	vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2218 	a = a / b;
2219 	EXPECT_EQ(a.lane<0>(), 1.0f / 0.1f);
2220 	EXPECT_EQ(a.lane<1>(), 2.0f / 0.2f);
2221 	EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f);
2222 	EXPECT_EQ(a.lane<3>(), 4.0f / 0.4f);
2223 	EXPECT_EQ(a.lane<4>(), 5.0f / 0.5f);
2224 	EXPECT_EQ(a.lane<5>(), 6.0f / 0.6f);
2225 	EXPECT_EQ(a.lane<6>(), 7.0f / 0.7f);
2226 	EXPECT_EQ(a.lane<7>(), 8.0f / 0.8f);
2227 }
2228 
2229 /** @brief Test vfloat8 div. */
TEST(vfloat8,vsdiv)2230 TEST(vfloat8, vsdiv)
2231 {
2232 	vfloat8 a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2233 	float b = 3.14f;
2234 	vfloat8 r = a / b;
2235 
2236 	EXPECT_EQ(r.lane<0>(), 0.1f / 3.14f);
2237 	EXPECT_EQ(r.lane<1>(), 0.2f / 3.14f);
2238 	EXPECT_EQ(r.lane<2>(), 0.3f / 3.14f);
2239 	EXPECT_EQ(r.lane<3>(), 0.4f / 3.14f);
2240 	EXPECT_EQ(r.lane<4>(), 0.5f / 3.14f);
2241 	EXPECT_EQ(r.lane<5>(), 0.6f / 3.14f);
2242 	EXPECT_EQ(r.lane<6>(), 0.7f / 3.14f);
2243 	EXPECT_EQ(r.lane<7>(), 0.8f / 3.14f);
2244 }
2245 
2246 /** @brief Test vfloat8 div. */
TEST(vfloat8,svdiv)2247 TEST(vfloat8, svdiv)
2248 {
2249 	float a = 3.14f;
2250 	vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2251 	vfloat8 r = a / b;
2252 
2253 	EXPECT_EQ(r.lane<0>(), 3.14f / 0.1f);
2254 	EXPECT_EQ(r.lane<1>(), 3.14f / 0.2f);
2255 	EXPECT_EQ(r.lane<2>(), 3.14f / 0.3f);
2256 	EXPECT_EQ(r.lane<3>(), 3.14f / 0.4f);
2257 	EXPECT_EQ(r.lane<4>(), 3.14f / 0.5f);
2258 	EXPECT_EQ(r.lane<5>(), 3.14f / 0.6f);
2259 	EXPECT_EQ(r.lane<6>(), 3.14f / 0.7f);
2260 	EXPECT_EQ(r.lane<7>(), 3.14f / 0.8f);
2261 }
2262 
2263 /** @brief Test vfloat8 ceq. */
TEST(vfloat8,ceq)2264 TEST(vfloat8, ceq)
2265 {
2266 	vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2267 	vfloat8 b1(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2268 	vmask8 r1 = a1 == b1;
2269 	EXPECT_EQ(0u, mask(r1));
2270 	EXPECT_EQ(false, any(r1));
2271 	EXPECT_EQ(false, all(r1));
2272 
2273 	vfloat8 a2(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2274 	vfloat8 b2(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2275 	vmask8 r2 = a2 == b2;
2276 	EXPECT_EQ(0x1u, mask(r2));
2277 	EXPECT_EQ(true, any(r2));
2278 	EXPECT_EQ(false, all(r2));
2279 
2280 	vfloat8 a3(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2281 	vfloat8 b3(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2282 	vmask8 r3 = a3 == b3;
2283 	EXPECT_EQ(0x5u, mask(r3));
2284 	EXPECT_EQ(true, any(r3));
2285 	EXPECT_EQ(false, all(r3));
2286 
2287 	vfloat8 a4(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2288 	vmask8 r4 = a4 == a4;
2289 	EXPECT_EQ(0xFFu, mask(r4));
2290 	EXPECT_EQ(true, any(r4));
2291 	EXPECT_EQ(true, all(r4));
2292 }
2293 
2294 /** @brief Test vfloat8 cne. */
TEST(vfloat8,cne)2295 TEST(vfloat8, cne)
2296 {
2297 	vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2298 	vfloat8 b1(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2299 	vmask8 r1 = a1 != b1;
2300 	EXPECT_EQ(0xFFu, mask(r1));
2301 	EXPECT_EQ(true, any(r1));
2302 	EXPECT_EQ(true, all(r1));
2303 
2304 	vfloat8 a2(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2305 	vfloat8 b2(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2306 	vmask8 r2 = a2 != b2;
2307 	EXPECT_EQ(0xFEu, mask(r2));
2308 	EXPECT_EQ(true, any(r2));
2309 	EXPECT_EQ(false, all(r2));
2310 
2311 	vfloat8 a3(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2312 	vfloat8 b3(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2313 	vmask8 r3 = a3 != b3;
2314 	EXPECT_EQ(0xFAu, mask(r3));
2315 	EXPECT_EQ(true, any(r3));
2316 	EXPECT_EQ(false, all(r3));
2317 
2318 	vfloat8 a4(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2319 	vmask8 r4 = a4 != a4;
2320 	EXPECT_EQ(0u, mask(r4));
2321 	EXPECT_EQ(false, any(r4));
2322 	EXPECT_EQ(false, all(r4));
2323 }
2324 
2325 /** @brief Test vfloat8 clt. */
TEST(vfloat8,clt)2326 TEST(vfloat8, clt)
2327 {
2328 	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2329 	vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2330 	vmask8 r = a < b;
2331 	EXPECT_EQ(0xAAu, mask(r));
2332 }
2333 
2334 /** @brief Test vfloat8 cle. */
TEST(vfloat8,cle)2335 TEST(vfloat8, cle)
2336 {
2337 	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2338 	vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2339 	vmask8 r = a <= b;
2340 	EXPECT_EQ(0xEEu, mask(r));
2341 }
2342 
2343 /** @brief Test vfloat8 cgt. */
TEST(vfloat8,cgt)2344 TEST(vfloat8, cgt)
2345 {
2346 	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2347 	vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2348 	vmask8 r = a > b;
2349 	EXPECT_EQ(0x11u, mask(r));
2350 }
2351 
2352 /** @brief Test vfloat8 cge. */
TEST(vfloat8,cge)2353 TEST(vfloat8, cge)
2354 {
2355 	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2356 	vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2357 	vmask8 r = a >= b;
2358 	EXPECT_EQ(0x55u, mask(r));
2359 }
2360 
2361 /** @brief Test vfloat8 min. */
TEST(vfloat8,min)2362 TEST(vfloat8, min)
2363 {
2364 	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2365 	vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2366 	vfloat8 r = min(a, b);
2367 	EXPECT_EQ(r.lane<0>(), 0.9f);
2368 	EXPECT_EQ(r.lane<1>(), 2.0f);
2369 	EXPECT_EQ(r.lane<2>(), 3.0f);
2370 	EXPECT_EQ(r.lane<3>(), 4.0f);
2371 	EXPECT_EQ(r.lane<4>(), 0.9f);
2372 	EXPECT_EQ(r.lane<5>(), 2.0f);
2373 	EXPECT_EQ(r.lane<6>(), 3.0f);
2374 	EXPECT_EQ(r.lane<7>(), 4.0f);
2375 }
2376 
2377 /** @brief Test vfloat8 max. */
TEST(vfloat8,max)2378 TEST(vfloat8, max)
2379 {
2380 	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2381 	vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2382 	vfloat8 r = max(a, b);
2383 	EXPECT_EQ(r.lane<0>(), 1.0f);
2384 	EXPECT_EQ(r.lane<1>(), 2.1f);
2385 	EXPECT_EQ(r.lane<2>(), 3.0f);
2386 	EXPECT_EQ(r.lane<3>(), 4.1f);
2387 	EXPECT_EQ(r.lane<4>(), 1.0f);
2388 	EXPECT_EQ(r.lane<5>(), 2.1f);
2389 	EXPECT_EQ(r.lane<6>(), 3.0f);
2390 	EXPECT_EQ(r.lane<7>(), 4.1f);
2391 }
2392 
2393 /** @brief Test vfloat8 clamp. */
TEST(vfloat8,clamp)2394 TEST(vfloat8, clamp)
2395 {
2396 	vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2397 	vfloat8 r1 = clamp(2.1f, 3.0f, a1);
2398 	EXPECT_EQ(r1.lane<0>(), 2.1f);
2399 	EXPECT_EQ(r1.lane<1>(), 2.1f);
2400 	EXPECT_EQ(r1.lane<2>(), 3.0f);
2401 	EXPECT_EQ(r1.lane<3>(), 3.0f);
2402 	EXPECT_EQ(r1.lane<4>(), 2.1f);
2403 	EXPECT_EQ(r1.lane<5>(), 2.1f);
2404 	EXPECT_EQ(r1.lane<6>(), 3.0f);
2405 	EXPECT_EQ(r1.lane<7>(), 3.0f);
2406 
2407 	vfloat8 a2(1.0f, 2.0f, qnan, 4.0f, 1.0f, 2.0f, qnan, 4.0f);
2408 	vfloat8 r2 = clamp(2.1f, 3.0f, a2);
2409 	EXPECT_EQ(r2.lane<0>(), 2.1f);
2410 	EXPECT_EQ(r2.lane<1>(), 2.1f);
2411 	EXPECT_EQ(r2.lane<2>(), 2.1f);
2412 	EXPECT_EQ(r2.lane<3>(), 3.0f);
2413 	EXPECT_EQ(r2.lane<4>(), 2.1f);
2414 	EXPECT_EQ(r2.lane<5>(), 2.1f);
2415 	EXPECT_EQ(r2.lane<6>(), 2.1f);
2416 	EXPECT_EQ(r2.lane<7>(), 3.0f);
2417 }
2418 
2419 /** @brief Test vfloat8 clampz. */
TEST(vfloat8,clampz)2420 TEST(vfloat8, clampz)
2421 {
2422 	vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f);
2423 	vfloat8 r1 = clampz(3.0f, a1);
2424 	EXPECT_EQ(r1.lane<0>(), 0.0f);
2425 	EXPECT_EQ(r1.lane<1>(), 0.0f);
2426 	EXPECT_EQ(r1.lane<2>(), 0.1f);
2427 	EXPECT_EQ(r1.lane<3>(), 3.0f);
2428 	EXPECT_EQ(r1.lane<4>(), 0.0f);
2429 	EXPECT_EQ(r1.lane<5>(), 0.0f);
2430 	EXPECT_EQ(r1.lane<6>(), 0.1f);
2431 	EXPECT_EQ(r1.lane<7>(), 3.0f);
2432 
2433 	vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f);
2434 	vfloat8 r2 = clampz(3.0f, a2);
2435 	EXPECT_EQ(r2.lane<0>(), 0.0f);
2436 	EXPECT_EQ(r2.lane<1>(), 0.0f);
2437 	EXPECT_EQ(r2.lane<2>(), 0.0f);
2438 	EXPECT_EQ(r2.lane<3>(), 3.0f);
2439 	EXPECT_EQ(r2.lane<4>(), 0.0f);
2440 	EXPECT_EQ(r2.lane<5>(), 0.0f);
2441 	EXPECT_EQ(r2.lane<6>(), 0.0f);
2442 	EXPECT_EQ(r2.lane<7>(), 3.0f);
2443 }
2444 
2445 /** @brief Test vfloat8 clampz. */
TEST(vfloat8,clampzo)2446 TEST(vfloat8, clampzo)
2447 {
2448 	vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f);
2449 	vfloat8 r1 = clampzo(a1);
2450 	EXPECT_EQ(r1.lane<0>(), 0.0f);
2451 	EXPECT_EQ(r1.lane<1>(), 0.0f);
2452 	EXPECT_EQ(r1.lane<2>(), 0.1f);
2453 	EXPECT_EQ(r1.lane<3>(), 1.0f);
2454 	EXPECT_EQ(r1.lane<4>(), 0.0f);
2455 	EXPECT_EQ(r1.lane<5>(), 0.0f);
2456 	EXPECT_EQ(r1.lane<6>(), 0.1f);
2457 	EXPECT_EQ(r1.lane<7>(), 1.0f);
2458 
2459 	vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f);
2460 	vfloat8 r2 = clampzo(a2);
2461 	EXPECT_EQ(r2.lane<0>(), 0.0f);
2462 	EXPECT_EQ(r2.lane<1>(), 0.0f);
2463 	EXPECT_EQ(r2.lane<2>(), 0.0f);
2464 	EXPECT_EQ(r2.lane<3>(), 1.0f);
2465 	EXPECT_EQ(r2.lane<4>(), 0.0f);
2466 	EXPECT_EQ(r2.lane<5>(), 0.0f);
2467 	EXPECT_EQ(r2.lane<6>(), 0.0f);
2468 	EXPECT_EQ(r2.lane<7>(), 1.0f);
2469 }
2470 
2471 /** @brief Test vfloat8 abs. */
TEST(vfloat8,abs)2472 TEST(vfloat8, abs)
2473 {
2474 	vfloat8 a(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f);
2475 	vfloat8 r = abs(a);
2476 	EXPECT_EQ(r.lane<0>(), 1.0f);
2477 	EXPECT_EQ(r.lane<1>(), 0.0f);
2478 	EXPECT_EQ(r.lane<2>(), 0.1f);
2479 	EXPECT_EQ(r.lane<3>(), 4.0f);
2480 	EXPECT_EQ(r.lane<4>(), 1.0f);
2481 	EXPECT_EQ(r.lane<5>(), 0.0f);
2482 	EXPECT_EQ(r.lane<6>(), 0.1f);
2483 	EXPECT_EQ(r.lane<7>(), 4.0f);
2484 }
2485 
2486 /** @brief Test vfloat8 round. */
TEST(vfloat8,round)2487 TEST(vfloat8, round)
2488 {
2489 	vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2490 	vfloat8 r = round(a);
2491 	EXPECT_EQ(r.lane<0>(), 1.0f);
2492 	EXPECT_EQ(r.lane<1>(), 2.0f);
2493 	EXPECT_EQ(r.lane<2>(), 2.0f);
2494 	EXPECT_EQ(r.lane<3>(), 4.0f);
2495 	EXPECT_EQ(r.lane<4>(), 1.0f);
2496 	EXPECT_EQ(r.lane<5>(), 2.0f);
2497 	EXPECT_EQ(r.lane<6>(), 2.0f);
2498 	EXPECT_EQ(r.lane<7>(), 4.0f);
2499 }
2500 
2501 /** @brief Test vfloat8 hmin. */
TEST(vfloat8,hmin)2502 TEST(vfloat8, hmin)
2503 {
2504 	vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2505 	vfloat8 r1 = hmin(a1);
2506 	EXPECT_EQ(r1.lane<0>(), 1.1f);
2507 	EXPECT_EQ(r1.lane<1>(), 1.1f);
2508 	EXPECT_EQ(r1.lane<2>(), 1.1f);
2509 	EXPECT_EQ(r1.lane<3>(), 1.1f);
2510 	EXPECT_EQ(r1.lane<4>(), 1.1f);
2511 	EXPECT_EQ(r1.lane<5>(), 1.1f);
2512 	EXPECT_EQ(r1.lane<6>(), 1.1f);
2513 	EXPECT_EQ(r1.lane<7>(), 1.1f);
2514 
2515 	vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2516 	vfloat8 r2 = hmin(a2);
2517 	EXPECT_EQ(r2.lane<0>(), 0.2f);
2518 	EXPECT_EQ(r2.lane<1>(), 0.2f);
2519 	EXPECT_EQ(r2.lane<2>(), 0.2f);
2520 	EXPECT_EQ(r2.lane<3>(), 0.2f);
2521 	EXPECT_EQ(r2.lane<4>(), 0.2f);
2522 	EXPECT_EQ(r2.lane<5>(), 0.2f);
2523 	EXPECT_EQ(r2.lane<6>(), 0.2f);
2524 	EXPECT_EQ(r2.lane<7>(), 0.2f);
2525 }
2526 
2527 /** @brief Test vfloat8 hmin_s. */
TEST(vfloat8,hmin_s)2528 TEST(vfloat8, hmin_s)
2529 {
2530 	vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2531 	float r1 = hmin_s(a1);
2532 	EXPECT_EQ(r1, 1.1f);
2533 
2534 	vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2535 	float r2 = hmin_s(a2);
2536 	EXPECT_EQ(r2, 0.2f);
2537 }
2538 
2539 /** @brief Test vfloat8 hmax. */
TEST(vfloat8,hmax)2540 TEST(vfloat8, hmax)
2541 {
2542 	vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2543 	vfloat8 r1 = hmax(a1);
2544 	EXPECT_EQ(r1.lane<0>(), 4.0f);
2545 	EXPECT_EQ(r1.lane<1>(), 4.0f);
2546 	EXPECT_EQ(r1.lane<2>(), 4.0f);
2547 	EXPECT_EQ(r1.lane<3>(), 4.0f);
2548 	EXPECT_EQ(r1.lane<4>(), 4.0f);
2549 	EXPECT_EQ(r1.lane<5>(), 4.0f);
2550 	EXPECT_EQ(r1.lane<6>(), 4.0f);
2551 	EXPECT_EQ(r1.lane<7>(), 4.0f);
2552 
2553 	vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2554 	vfloat8 r2 = hmax(a2);
2555 	EXPECT_EQ(r2.lane<0>(), 1.6f);
2556 	EXPECT_EQ(r2.lane<1>(), 1.6f);
2557 	EXPECT_EQ(r2.lane<2>(), 1.6f);
2558 	EXPECT_EQ(r2.lane<3>(), 1.6f);
2559 	EXPECT_EQ(r2.lane<4>(), 1.6f);
2560 	EXPECT_EQ(r2.lane<5>(), 1.6f);
2561 	EXPECT_EQ(r2.lane<6>(), 1.6f);
2562 	EXPECT_EQ(r2.lane<7>(), 1.6f);
2563 }
2564 
2565 /** @brief Test vfloat8 hmax_s. */
TEST(vfloat8,hmax_s)2566 TEST(vfloat8, hmax_s)
2567 {
2568 	vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2569 	float r1 = hmax_s(a1);
2570 	EXPECT_EQ(r1, 4.0f);
2571 
2572 	vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2573 	float r2 = hmax_s(a2);
2574 	EXPECT_EQ(r2, 1.6f);
2575 }
2576 
2577 /** @brief Test vfloat8 hadd_s. */
TEST(vfloat8,hadd_s)2578 TEST(vfloat8, hadd_s)
2579 {
2580 	vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2581 	float sum = 1.1f + 1.5f + 1.6f + 4.0f + 1.1f + 1.5f + 1.6f + 4.0f;
2582 	float r = hadd_s(a1);
2583 	EXPECT_NEAR(r, sum, 0.005f);
2584 }
2585 
2586 /** @brief Test vfloat8 sqrt. */
TEST(vfloat8,sqrt)2587 TEST(vfloat8, sqrt)
2588 {
2589 	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2590 	vfloat8 r = sqrt(a);
2591 	EXPECT_EQ(r.lane<0>(), std::sqrt(1.0f));
2592 	EXPECT_EQ(r.lane<1>(), std::sqrt(2.0f));
2593 	EXPECT_EQ(r.lane<2>(), std::sqrt(3.0f));
2594 	EXPECT_EQ(r.lane<3>(), std::sqrt(4.0f));
2595 	EXPECT_EQ(r.lane<4>(), std::sqrt(1.0f));
2596 	EXPECT_EQ(r.lane<5>(), std::sqrt(2.0f));
2597 	EXPECT_EQ(r.lane<6>(), std::sqrt(3.0f));
2598 	EXPECT_EQ(r.lane<7>(), std::sqrt(4.0f));
2599 }
2600 
2601 /** @brief Test vfloat8 select. */
TEST(vfloat8,select)2602 TEST(vfloat8, select)
2603 {
2604 	vfloat8 m1(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f);
2605 	vfloat8 m2(1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f);
2606 	vmask8 cond = m1 == m2;
2607 
2608 	vfloat8 a(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0);
2609 	vfloat8 b(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0);
2610 
2611 	// Select in one direction
2612 	vfloat8 r1 = select(a, b, cond);
2613 	EXPECT_EQ(r1.lane<0>(), 4.0f);
2614 	EXPECT_EQ(r1.lane<1>(), 3.0f);
2615 	EXPECT_EQ(r1.lane<2>(), 2.0f);
2616 	EXPECT_EQ(r1.lane<3>(), 1.0f);
2617 	EXPECT_EQ(r1.lane<4>(), 4.0f);
2618 	EXPECT_EQ(r1.lane<5>(), 3.0f);
2619 	EXPECT_EQ(r1.lane<6>(), 2.0f);
2620 	EXPECT_EQ(r1.lane<7>(), 1.0f);
2621 
2622 	// Select in the other
2623 	vfloat8 r2 = select(b, a, cond);
2624 	EXPECT_EQ(r2.lane<0>(), 1.0f);
2625 	EXPECT_EQ(r2.lane<1>(), 2.0f);
2626 	EXPECT_EQ(r2.lane<2>(), 3.0f);
2627 	EXPECT_EQ(r2.lane<3>(), 4.0f);
2628 	EXPECT_EQ(r2.lane<4>(), 1.0f);
2629 	EXPECT_EQ(r2.lane<5>(), 2.0f);
2630 	EXPECT_EQ(r2.lane<6>(), 3.0f);
2631 	EXPECT_EQ(r2.lane<7>(), 4.0f);
2632 }
2633 
2634 /** @brief Test vfloat8 select MSB only. */
TEST(vfloat8,select_msb)2635 TEST(vfloat8, select_msb)
2636 {
2637 	int msb_set = static_cast<int>(0x80000000);
2638 	vint8 msb(msb_set, 0, msb_set, 0, msb_set, 0, msb_set, 0);
2639 	vmask8 cond(msb.m);
2640 
2641 	vfloat8 a(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0f);
2642 	vfloat8 b(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0f);
2643 
2644 	// Select in one direction
2645 	vfloat8 r1 = select(a, b, cond);
2646 	EXPECT_EQ(r1.lane<0>(), 4.0f);
2647 	EXPECT_EQ(r1.lane<1>(), 3.0f);
2648 	EXPECT_EQ(r1.lane<2>(), 2.0f);
2649 	EXPECT_EQ(r1.lane<3>(), 1.0f);
2650 	EXPECT_EQ(r1.lane<4>(), 4.0f);
2651 	EXPECT_EQ(r1.lane<5>(), 3.0f);
2652 	EXPECT_EQ(r1.lane<6>(), 2.0f);
2653 	EXPECT_EQ(r1.lane<7>(), 1.0f);
2654 
2655 	// Select in the other
2656 	vfloat8 r2 = select(b, a, cond);
2657 	EXPECT_EQ(r2.lane<0>(), 1.0f);
2658 	EXPECT_EQ(r2.lane<1>(), 2.0f);
2659 	EXPECT_EQ(r2.lane<2>(), 3.0f);
2660 	EXPECT_EQ(r2.lane<3>(), 4.0f);
2661 	EXPECT_EQ(r2.lane<4>(), 1.0f);
2662 	EXPECT_EQ(r2.lane<5>(), 2.0f);
2663 	EXPECT_EQ(r2.lane<6>(), 3.0f);
2664 	EXPECT_EQ(r2.lane<7>(), 4.0f);
2665 }
2666 
2667 /** @brief Test vfloat8 gatherf. */
TEST(vfloat8,gatherf)2668 TEST(vfloat8, gatherf)
2669 {
2670 	vint8 indices(0, 4, 3, 2, 7, 4, 3, 2);
2671 	vfloat8 r = gatherf(f32_data, indices);
2672 	EXPECT_EQ(r.lane<0>(), 0.0f);
2673 	EXPECT_EQ(r.lane<1>(), 4.0f);
2674 	EXPECT_EQ(r.lane<2>(), 3.0f);
2675 	EXPECT_EQ(r.lane<3>(), 2.0f);
2676 	EXPECT_EQ(r.lane<4>(), 7.0f);
2677 	EXPECT_EQ(r.lane<5>(), 4.0f);
2678 	EXPECT_EQ(r.lane<6>(), 3.0f);
2679 	EXPECT_EQ(r.lane<7>(), 2.0f);
2680 }
2681 
2682 /** @brief Test vfloat8 store. */
TEST(vfloat8,store)2683 TEST(vfloat8, store)
2684 {
2685 	alignas(32) float out[9];
2686 	vfloat8 a(f32_data);
2687 	store(a, &(out[1]));
2688 	EXPECT_EQ(out[1], 0.0f);
2689 	EXPECT_EQ(out[2], 1.0f);
2690 	EXPECT_EQ(out[3], 2.0f);
2691 	EXPECT_EQ(out[4], 3.0f);
2692 	EXPECT_EQ(out[5], 4.0f);
2693 	EXPECT_EQ(out[6], 5.0f);
2694 	EXPECT_EQ(out[7], 6.0f);
2695 	EXPECT_EQ(out[8], 7.0f);
2696 }
2697 
2698 /** @brief Test vfloat8 storea. */
TEST(vfloat8,storea)2699 TEST(vfloat8, storea)
2700 {
2701 	alignas(32) float out[9];
2702 	vfloat8 a(f32_data);
2703 	store(a, out);
2704 	EXPECT_EQ(out[0], 0.0f);
2705 	EXPECT_EQ(out[1], 1.0f);
2706 	EXPECT_EQ(out[2], 2.0f);
2707 	EXPECT_EQ(out[3], 3.0f);
2708 	EXPECT_EQ(out[4], 4.0f);
2709 	EXPECT_EQ(out[5], 5.0f);
2710 	EXPECT_EQ(out[6], 6.0f);
2711 	EXPECT_EQ(out[7], 7.0f);
2712 }
2713 
2714 /** @brief Test vfloat8 float_to_int. */
TEST(vfloat8,float_to_int)2715 TEST(vfloat8, float_to_int)
2716 {
2717 	vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2718 	vint8 r = float_to_int(a);
2719 	EXPECT_EQ(r.lane<0>(), 1);
2720 	EXPECT_EQ(r.lane<1>(), 1);
2721 	EXPECT_EQ(r.lane<2>(), 1);
2722 	EXPECT_EQ(r.lane<3>(), 4);
2723 	EXPECT_EQ(r.lane<4>(), 1);
2724 	EXPECT_EQ(r.lane<5>(), 1);
2725 	EXPECT_EQ(r.lane<6>(), 1);
2726 	EXPECT_EQ(r.lane<7>(), 4);
2727 }
2728 
2729 // vint8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2730 
2731 /** @brief Test unaligned vint8 data load. */
TEST(vint8,UnalignedLoad)2732 TEST(vint8, UnalignedLoad)
2733 {
2734 	vint8 a(&(s32_data[1]));
2735 	EXPECT_EQ(a.lane<0>(), 1);
2736 	EXPECT_EQ(a.lane<1>(), 2);
2737 	EXPECT_EQ(a.lane<2>(), 3);
2738 	EXPECT_EQ(a.lane<3>(), 4);
2739 	EXPECT_EQ(a.lane<4>(), 5);
2740 	EXPECT_EQ(a.lane<5>(), 6);
2741 	EXPECT_EQ(a.lane<6>(), 7);
2742 	EXPECT_EQ(a.lane<7>(), 8);
2743 }
2744 
2745 /** @brief Test unaligned vint8 data load. */
TEST(vint8,UnalignedLoad8)2746 TEST(vint8, UnalignedLoad8)
2747 {
2748 	vint8 a(&(u8_data[1]));
2749 	EXPECT_EQ(a.lane<0>(), 1);
2750 	EXPECT_EQ(a.lane<1>(), 2);
2751 	EXPECT_EQ(a.lane<2>(), 3);
2752 	EXPECT_EQ(a.lane<3>(), 4);
2753 	EXPECT_EQ(a.lane<4>(), 5);
2754 	EXPECT_EQ(a.lane<5>(), 6);
2755 	EXPECT_EQ(a.lane<6>(), 7);
2756 	EXPECT_EQ(a.lane<7>(), 8);
2757 }
2758 
2759 /** @brief Test scalar duplicated vint8 load. */
TEST(vint8,ScalarDupLoad)2760 TEST(vint8, ScalarDupLoad)
2761 {
2762 	vint8 a(42);
2763 	EXPECT_EQ(a.lane<0>(), 42);
2764 	EXPECT_EQ(a.lane<1>(), 42);
2765 	EXPECT_EQ(a.lane<2>(), 42);
2766 	EXPECT_EQ(a.lane<3>(), 42);
2767 	EXPECT_EQ(a.lane<4>(), 42);
2768 	EXPECT_EQ(a.lane<5>(), 42);
2769 	EXPECT_EQ(a.lane<6>(), 42);
2770 	EXPECT_EQ(a.lane<7>(), 42);
2771 }
2772 
2773 /** @brief Test scalar vint8 load. */
TEST(vint8,ScalarLoad)2774 TEST(vint8, ScalarLoad)
2775 {
2776 	vint8 a(11, 22, 33, 44, 55, 66, 77, 88);
2777 	EXPECT_EQ(a.lane<0>(), 11);
2778 	EXPECT_EQ(a.lane<1>(), 22);
2779 	EXPECT_EQ(a.lane<2>(), 33);
2780 	EXPECT_EQ(a.lane<3>(), 44);
2781 	EXPECT_EQ(a.lane<4>(), 55);
2782 	EXPECT_EQ(a.lane<5>(), 66);
2783 	EXPECT_EQ(a.lane<6>(), 77);
2784 	EXPECT_EQ(a.lane<7>(), 88);
2785 }
2786 
2787 /** @brief Test copy vint8 load. */
TEST(vint8,CopyLoad)2788 TEST(vint8, CopyLoad)
2789 {
2790 	vint8 s(11, 22, 33, 44, 55, 66, 77, 88);
2791 	vint8 a(s.m);
2792 	EXPECT_EQ(a.lane<0>(), 11);
2793 	EXPECT_EQ(a.lane<1>(), 22);
2794 	EXPECT_EQ(a.lane<2>(), 33);
2795 	EXPECT_EQ(a.lane<3>(), 44);
2796 	EXPECT_EQ(a.lane<4>(), 55);
2797 	EXPECT_EQ(a.lane<5>(), 66);
2798 	EXPECT_EQ(a.lane<6>(), 77);
2799 	EXPECT_EQ(a.lane<7>(), 88);
2800 }
2801 
2802 /** @brief Test vint8 zero. */
TEST(vint8,Zero)2803 TEST(vint8, Zero)
2804 {
2805 	vint8 a = vint8::zero();
2806 	EXPECT_EQ(a.lane<0>(), 0);
2807 	EXPECT_EQ(a.lane<1>(), 0);
2808 	EXPECT_EQ(a.lane<2>(), 0);
2809 	EXPECT_EQ(a.lane<3>(), 0);
2810 	EXPECT_EQ(a.lane<4>(), 0);
2811 	EXPECT_EQ(a.lane<5>(), 0);
2812 	EXPECT_EQ(a.lane<6>(), 0);
2813 	EXPECT_EQ(a.lane<7>(), 0);
2814 }
2815 
2816 /** @brief Test vint8 load1. */
TEST(vint8,Load1)2817 TEST(vint8, Load1)
2818 {
2819 	int s = 42;
2820 	vint8 a = vint8::load1(&s);
2821 	EXPECT_EQ(a.lane<0>(), 42);
2822 	EXPECT_EQ(a.lane<1>(), 42);
2823 	EXPECT_EQ(a.lane<2>(), 42);
2824 	EXPECT_EQ(a.lane<3>(), 42);
2825 	EXPECT_EQ(a.lane<4>(), 42);
2826 	EXPECT_EQ(a.lane<5>(), 42);
2827 	EXPECT_EQ(a.lane<6>(), 42);
2828 	EXPECT_EQ(a.lane<7>(), 42);
2829 }
2830 
2831 /** @brief Test vint8 loada. */
TEST(vint8,Loada)2832 TEST(vint8, Loada)
2833 {
2834 	vint8 a = vint8::loada(&(s32_data[0]));
2835 	EXPECT_EQ(a.lane<0>(), 0);
2836 	EXPECT_EQ(a.lane<1>(), 1);
2837 	EXPECT_EQ(a.lane<2>(), 2);
2838 	EXPECT_EQ(a.lane<3>(), 3);
2839 	EXPECT_EQ(a.lane<4>(), 4);
2840 	EXPECT_EQ(a.lane<5>(), 5);
2841 	EXPECT_EQ(a.lane<6>(), 6);
2842 	EXPECT_EQ(a.lane<7>(), 7);
2843 }
2844 
2845 /** @brief Test vint8 lane_id. */
TEST(vint8,LaneID)2846 TEST(vint8, LaneID)
2847 {
2848 	vint8 a = vint8::lane_id();
2849 	EXPECT_EQ(a.lane<0>(), 0);
2850 	EXPECT_EQ(a.lane<1>(), 1);
2851 	EXPECT_EQ(a.lane<2>(), 2);
2852 	EXPECT_EQ(a.lane<3>(), 3);
2853 	EXPECT_EQ(a.lane<4>(), 4);
2854 	EXPECT_EQ(a.lane<5>(), 5);
2855 	EXPECT_EQ(a.lane<6>(), 6);
2856 	EXPECT_EQ(a.lane<7>(), 7);
2857 }
2858 
2859 /** @brief Test vint8 add. */
TEST(vint8,vadd)2860 TEST(vint8, vadd)
2861 {
2862 	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2863 	vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2864 	a = a + b;
2865 	EXPECT_EQ(a.lane<0>(), 1 + 2);
2866 	EXPECT_EQ(a.lane<1>(), 2 + 3);
2867 	EXPECT_EQ(a.lane<2>(), 3 + 4);
2868 	EXPECT_EQ(a.lane<3>(), 4 + 5);
2869 	EXPECT_EQ(a.lane<4>(), 1 + 2);
2870 	EXPECT_EQ(a.lane<5>(), 2 + 3);
2871 	EXPECT_EQ(a.lane<6>(), 3 + 4);
2872 	EXPECT_EQ(a.lane<7>(), 4 + 5);
2873 }
2874 
2875 
2876 /** @brief Test vint8 self-add. */
TEST(vint8,vselfadd1)2877 TEST(vint8, vselfadd1)
2878 {
2879 	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2880 	vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2881 	a += b;
2882 
2883 	EXPECT_EQ(a.lane<0>(), 1 + 2);
2884 	EXPECT_EQ(a.lane<1>(), 2 + 3);
2885 	EXPECT_EQ(a.lane<2>(), 3 + 4);
2886 	EXPECT_EQ(a.lane<3>(), 4 + 5);
2887 	EXPECT_EQ(a.lane<4>(), 1 + 2);
2888 	EXPECT_EQ(a.lane<5>(), 2 + 3);
2889 	EXPECT_EQ(a.lane<6>(), 3 + 4);
2890 	EXPECT_EQ(a.lane<7>(), 4 + 5);
2891 }
2892 
2893 /** @brief Test vint8 sub. */
TEST(vint8,vsub)2894 TEST(vint8, vsub)
2895 {
2896 	vint8 a(1, 2, 4, 4, 1, 2, 4, 4);
2897 	vint8 b(2, 3, 3, 5, 2, 3, 3, 5);
2898 	a = a - b;
2899 	EXPECT_EQ(a.lane<0>(), 1 - 2);
2900 	EXPECT_EQ(a.lane<1>(), 2 - 3);
2901 	EXPECT_EQ(a.lane<2>(), 4 - 3);
2902 	EXPECT_EQ(a.lane<3>(), 4 - 5);
2903 	EXPECT_EQ(a.lane<4>(), 1 - 2);
2904 	EXPECT_EQ(a.lane<5>(), 2 - 3);
2905 	EXPECT_EQ(a.lane<6>(), 4 - 3);
2906 	EXPECT_EQ(a.lane<7>(), 4 - 5);
2907 }
2908 
2909 /** @brief Test vint8 mul. */
TEST(vint8,vmul)2910 TEST(vint8, vmul)
2911 {
2912 	vint8 a(1, 2, 4, 4, 1, 2, 4, 4);
2913 	vint8 b(2, 3, 3, 5, 2, 3, 3, 5);
2914 	a = a * b;
2915 	EXPECT_EQ(a.lane<0>(), 1 * 2);
2916 	EXPECT_EQ(a.lane<1>(), 2 * 3);
2917 	EXPECT_EQ(a.lane<2>(), 4 * 3);
2918 	EXPECT_EQ(a.lane<3>(), 4 * 5);
2919 	EXPECT_EQ(a.lane<4>(), 1 * 2);
2920 	EXPECT_EQ(a.lane<5>(), 2 * 3);
2921 	EXPECT_EQ(a.lane<6>(), 4 * 3);
2922 	EXPECT_EQ(a.lane<7>(), 4 * 5);
2923 }
2924 
2925 /** @brief Test vint8 bitwise invert. */
TEST(vint8,bit_invert)2926 TEST(vint8, bit_invert)
2927 {
2928 	vint8 a(-1, 0, 1, 2, -1, 0, 1, 2);
2929 	a = ~a;
2930 	EXPECT_EQ(a.lane<0>(), ~-1);
2931 	EXPECT_EQ(a.lane<1>(), ~0);
2932 	EXPECT_EQ(a.lane<2>(), ~1);
2933 	EXPECT_EQ(a.lane<3>(), ~2);
2934 	EXPECT_EQ(a.lane<4>(), ~-1);
2935 	EXPECT_EQ(a.lane<5>(), ~0);
2936 	EXPECT_EQ(a.lane<6>(), ~1);
2937 	EXPECT_EQ(a.lane<7>(), ~2);
2938 }
2939 
2940 /** @brief Test vint8 bitwise or. */
TEST(vint8,bit_vor)2941 TEST(vint8, bit_vor)
2942 {
2943 	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2944 	vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2945 	a = a | b;
2946 	EXPECT_EQ(a.lane<0>(), 3);
2947 	EXPECT_EQ(a.lane<1>(), 3);
2948 	EXPECT_EQ(a.lane<2>(), 7);
2949 	EXPECT_EQ(a.lane<3>(), 5);
2950 	EXPECT_EQ(a.lane<4>(), 3);
2951 	EXPECT_EQ(a.lane<5>(), 3);
2952 	EXPECT_EQ(a.lane<6>(), 7);
2953 	EXPECT_EQ(a.lane<7>(), 5);
2954 }
2955 
2956 /** @brief Test vint8 bitwise and. */
TEST(vint8,bit_vand)2957 TEST(vint8, bit_vand)
2958 {
2959 	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2960 	vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2961 	a = a & b;
2962 	EXPECT_EQ(a.lane<0>(), 0);
2963 	EXPECT_EQ(a.lane<1>(), 2);
2964 	EXPECT_EQ(a.lane<2>(), 0);
2965 	EXPECT_EQ(a.lane<3>(), 4);
2966 	EXPECT_EQ(a.lane<4>(), 0);
2967 	EXPECT_EQ(a.lane<5>(), 2);
2968 	EXPECT_EQ(a.lane<6>(), 0);
2969 	EXPECT_EQ(a.lane<7>(), 4);
2970 }
2971 
2972 /** @brief Test vint8 bitwise xor. */
TEST(vint8,bit_vxor)2973 TEST(vint8, bit_vxor)
2974 {
2975 	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2976 	vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2977 	a = a ^ b;
2978 	EXPECT_EQ(a.lane<0>(), 3);
2979 	EXPECT_EQ(a.lane<1>(), 1);
2980 	EXPECT_EQ(a.lane<2>(), 7);
2981 	EXPECT_EQ(a.lane<3>(), 1);
2982 	EXPECT_EQ(a.lane<4>(), 3);
2983 	EXPECT_EQ(a.lane<5>(), 1);
2984 	EXPECT_EQ(a.lane<6>(), 7);
2985 	EXPECT_EQ(a.lane<7>(), 1);
2986 }
2987 
2988 /** @brief Test vint8 ceq. */
TEST(vint8,ceq)2989 TEST(vint8, ceq)
2990 {
2991 	vint8 a1(1, 2, 3, 4, 1, 2, 3, 4);
2992 	vint8 b1(0, 1, 2, 3, 0, 1, 2, 3);
2993 	vmask8 r1 = a1 == b1;
2994 	EXPECT_EQ(0u, mask(r1));
2995 	EXPECT_EQ(false, any(r1));
2996 	EXPECT_EQ(false, all(r1));
2997 
2998 	vint8 a2(1, 2, 3, 4, 1, 2, 3, 4);
2999 	vint8 b2(1, 0, 0, 0, 1, 0, 0, 0);
3000 	vmask8 r2 = a2 == b2;
3001 	EXPECT_EQ(0x11u, mask(r2));
3002 	EXPECT_EQ(true, any(r2));
3003 	EXPECT_EQ(false, all(r2));
3004 
3005 	vint8 a3(1, 2, 3, 4, 1, 2, 3, 4);
3006 	vint8 b3(1, 0, 3, 0, 1, 0, 3, 0);
3007 	vmask8 r3 = a3 == b3;
3008 	EXPECT_EQ(0x55u, mask(r3));
3009 	EXPECT_EQ(true, any(r3));
3010 	EXPECT_EQ(false, all(r3));
3011 
3012 	vint8 a4(1, 2, 3, 4, 1, 2, 3, 4);
3013 	vmask8 r4 = a4 == a4;
3014 	EXPECT_EQ(0xFFu, mask(r4));
3015 	EXPECT_EQ(true, any(r4));
3016 	EXPECT_EQ(true, all(r4));
3017 }
3018 
3019 /** @brief Test vint8 cne. */
TEST(vint8,cne)3020 TEST(vint8, cne)
3021 {
3022 	vint8 a1(1, 2, 3, 4, 1, 2, 3, 4);
3023 	vint8 b1(0, 1, 2, 3, 0, 1, 2, 3);
3024 	vmask8 r1 = a1 != b1;
3025 	EXPECT_EQ(0xFFu, mask(r1));
3026 	EXPECT_EQ(true, any(r1));
3027 	EXPECT_EQ(true, all(r1));
3028 
3029 	vint8 a2(1, 2, 3, 4, 1, 2, 3, 4);
3030 	vint8 b2(1, 0, 0, 0, 1, 0, 0, 0);
3031 	vmask8 r2 = a2 != b2;
3032 	EXPECT_EQ(0xEEu, mask(r2));
3033 	EXPECT_EQ(true, any(r2));
3034 	EXPECT_EQ(false, all(r2));
3035 
3036 	vint8 a3(1, 2, 3, 4, 1, 2, 3, 4);
3037 	vint8 b3(1, 0, 3, 0, 1, 0, 3, 0);
3038 	vmask8 r3 = a3 != b3;
3039 	EXPECT_EQ(0xAAu, mask(r3));
3040 	EXPECT_EQ(true, any(r3));
3041 	EXPECT_EQ(false, all(r3));
3042 
3043 	vint8 a4(1, 2, 3, 4, 1, 2, 3, 4);
3044 	vmask8 r4 = a4 != a4;
3045 	EXPECT_EQ(0u, mask(r4));
3046 	EXPECT_EQ(false, any(r4));
3047 	EXPECT_EQ(false, all(r4));
3048 }
3049 
3050 /** @brief Test vint8 clt. */
TEST(vint8,clt)3051 TEST(vint8, clt)
3052 {
3053 	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
3054 	vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
3055 	vmask8 r = a < b;
3056 	EXPECT_EQ(0xAAu, mask(r));
3057 }
3058 
3059 /** @brief Test vint8 cgt. */
TEST(vint8,cgt)3060 TEST(vint8, cgt)
3061 {
3062 	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
3063 	vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
3064 	vmask8 r = a > b;
3065 	EXPECT_EQ(0x11u, mask(r));
3066 }
3067 
3068 /** @brief Test vint8 min. */
TEST(vint8,min)3069 TEST(vint8, min)
3070 {
3071 	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
3072 	vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
3073 	vint8 r = min(a, b);
3074 	EXPECT_EQ(r.lane<0>(), 0);
3075 	EXPECT_EQ(r.lane<1>(), 2);
3076 	EXPECT_EQ(r.lane<2>(), 3);
3077 	EXPECT_EQ(r.lane<3>(), 4);
3078 	EXPECT_EQ(r.lane<4>(), 0);
3079 	EXPECT_EQ(r.lane<5>(), 2);
3080 	EXPECT_EQ(r.lane<6>(), 3);
3081 	EXPECT_EQ(r.lane<7>(), 4);
3082 }
3083 
3084 /** @brief Test vint8 max. */
TEST(vint8,max)3085 TEST(vint8, max)
3086 {
3087 	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
3088 	vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
3089 	vint8 r = max(a, b);
3090 	EXPECT_EQ(r.lane<0>(), 1);
3091 	EXPECT_EQ(r.lane<1>(), 3);
3092 	EXPECT_EQ(r.lane<2>(), 3);
3093 	EXPECT_EQ(r.lane<3>(), 5);
3094 	EXPECT_EQ(r.lane<4>(), 1);
3095 	EXPECT_EQ(r.lane<5>(), 3);
3096 	EXPECT_EQ(r.lane<6>(), 3);
3097 	EXPECT_EQ(r.lane<7>(), 5);
3098 }
3099 
3100 /** @brief Test vint8 lsl. */
TEST(vint8,lsl)3101 TEST(vint8, lsl)
3102 {
3103 	vint8 a(1, 2, 4, -4, 1, 2, 4, -4);
3104 	a = lsl<0>(a);
3105 	EXPECT_EQ(a.lane<0>(), 1);
3106 	EXPECT_EQ(a.lane<1>(), 2);
3107 	EXPECT_EQ(a.lane<2>(), 4);
3108 	EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFFC));
3109 	EXPECT_EQ(a.lane<4>(), 1);
3110 	EXPECT_EQ(a.lane<5>(), 2);
3111 	EXPECT_EQ(a.lane<6>(), 4);
3112 	EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFFC));
3113 
3114 
3115 	a = lsl<1>(a);
3116 	EXPECT_EQ(a.lane<0>(), 2);
3117 	EXPECT_EQ(a.lane<1>(), 4);
3118 	EXPECT_EQ(a.lane<2>(), 8);
3119 	EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFF8));
3120 	EXPECT_EQ(a.lane<4>(), 2);
3121 	EXPECT_EQ(a.lane<5>(), 4);
3122 	EXPECT_EQ(a.lane<6>(), 8);
3123 	EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFF8));
3124 
3125 	a = lsl<2>(a);
3126 	EXPECT_EQ(a.lane<0>(), 8);
3127 	EXPECT_EQ(a.lane<1>(), 16);
3128 	EXPECT_EQ(a.lane<2>(), 32);
3129 	EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFE0));
3130 	EXPECT_EQ(a.lane<4>(), 8);
3131 	EXPECT_EQ(a.lane<5>(), 16);
3132 	EXPECT_EQ(a.lane<6>(), 32);
3133 	EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFE0));
3134 }
3135 
3136 /** @brief Test vint8 lsr. */
TEST(vint8,lsr)3137 TEST(vint8, lsr)
3138 {
3139 	vint8 a(1, 2, 4, -4, 1, 2, 4, -4);
3140 	a = lsr<0>(a);
3141 	EXPECT_EQ(a.lane<0>(), 1);
3142 	EXPECT_EQ(a.lane<1>(), 2);
3143 	EXPECT_EQ(a.lane<2>(), 4);
3144 	EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFFC));
3145 	EXPECT_EQ(a.lane<4>(), 1);
3146 	EXPECT_EQ(a.lane<5>(), 2);
3147 	EXPECT_EQ(a.lane<6>(), 4);
3148 	EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFFC));
3149 
3150 
3151 	a = lsr<1>(a);
3152 	EXPECT_EQ(a.lane<0>(), 0);
3153 	EXPECT_EQ(a.lane<1>(), 1);
3154 	EXPECT_EQ(a.lane<2>(), 2);
3155 	EXPECT_EQ(a.lane<3>(), 0x7FFFFFFE);
3156 	EXPECT_EQ(a.lane<4>(), 0);
3157 	EXPECT_EQ(a.lane<5>(), 1);
3158 	EXPECT_EQ(a.lane<6>(), 2);
3159 	EXPECT_EQ(a.lane<7>(), 0x7FFFFFFE);
3160 
3161 	a = lsr<2>(a);
3162 	EXPECT_EQ(a.lane<0>(),  0);
3163 	EXPECT_EQ(a.lane<1>(),  0);
3164 	EXPECT_EQ(a.lane<2>(),  0);
3165 	EXPECT_EQ(a.lane<3>(),  0x1FFFFFFF);
3166 	EXPECT_EQ(a.lane<4>(),  0);
3167 	EXPECT_EQ(a.lane<5>(),  0);
3168 	EXPECT_EQ(a.lane<6>(),  0);
3169 	EXPECT_EQ(a.lane<7>(),  0x1FFFFFFF);
3170 }
3171 
3172 /** @brief Test vint8 asr. */
TEST(vint8,asr)3173 TEST(vint8, asr)
3174 {
3175 	vint8 a(1, 2, 4, -4, 1, 2, 4, -4);
3176 	a = asr<0>(a);
3177 	EXPECT_EQ(a.lane<0>(),  1);
3178 	EXPECT_EQ(a.lane<1>(),  2);
3179 	EXPECT_EQ(a.lane<2>(),  4);
3180 	EXPECT_EQ(a.lane<3>(), -4);
3181 	EXPECT_EQ(a.lane<4>(),  1);
3182 	EXPECT_EQ(a.lane<5>(),  2);
3183 	EXPECT_EQ(a.lane<6>(),  4);
3184 	EXPECT_EQ(a.lane<7>(), -4);
3185 
3186 	a = asr<1>(a);
3187 	EXPECT_EQ(a.lane<0>(),  0);
3188 	EXPECT_EQ(a.lane<1>(),  1);
3189 	EXPECT_EQ(a.lane<2>(),  2);
3190 	EXPECT_EQ(a.lane<3>(), -2);
3191 	EXPECT_EQ(a.lane<4>(),  0);
3192 	EXPECT_EQ(a.lane<5>(),  1);
3193 	EXPECT_EQ(a.lane<6>(),  2);
3194 	EXPECT_EQ(a.lane<7>(), -2);
3195 
3196 	// Note - quirk of asr is that you will get "stuck" at -1
3197 	a = asr<2>(a);
3198 	EXPECT_EQ(a.lane<0>(),  0);
3199 	EXPECT_EQ(a.lane<1>(),  0);
3200 	EXPECT_EQ(a.lane<2>(),  0);
3201 	EXPECT_EQ(a.lane<3>(), -1);
3202 	EXPECT_EQ(a.lane<4>(),  0);
3203 	EXPECT_EQ(a.lane<5>(),  0);
3204 	EXPECT_EQ(a.lane<6>(),  0);
3205 	EXPECT_EQ(a.lane<7>(), -1);
3206 }
3207 
3208 /** @brief Test vint8 hmin. */
TEST(vint8,hmin)3209 TEST(vint8, hmin)
3210 {
3211 	vint8 a1(1, 2, 1, 2, 1, 2, 1, 2);
3212 	vint8 r1 = hmin(a1);
3213 	EXPECT_EQ(r1.lane<0>(), 1);
3214 	EXPECT_EQ(r1.lane<1>(), 1);
3215 	EXPECT_EQ(r1.lane<2>(), 1);
3216 	EXPECT_EQ(r1.lane<3>(), 1);
3217 	EXPECT_EQ(r1.lane<4>(), 1);
3218 	EXPECT_EQ(r1.lane<5>(), 1);
3219 	EXPECT_EQ(r1.lane<6>(), 1);
3220 	EXPECT_EQ(r1.lane<7>(), 1);
3221 
3222 	vint8 a2(1, 2, -1, 5, 1, 2, -1, 5);
3223 	vint8 r2 = hmin(a2);
3224 	EXPECT_EQ(r2.lane<0>(), -1);
3225 	EXPECT_EQ(r2.lane<1>(), -1);
3226 	EXPECT_EQ(r2.lane<2>(), -1);
3227 	EXPECT_EQ(r2.lane<3>(), -1);
3228 	EXPECT_EQ(r2.lane<4>(), -1);
3229 	EXPECT_EQ(r2.lane<5>(), -1);
3230 	EXPECT_EQ(r2.lane<6>(), -1);
3231 	EXPECT_EQ(r2.lane<7>(), -1);
3232 }
3233 
3234 /** @brief Test vint8 hmax. */
TEST(vint8,hmax)3235 TEST(vint8, hmax)
3236 {
3237 	vint8 a1(1, 2, 1, 2, 1, 3, 1, 2);
3238 	vint8 r1 = hmax(a1);
3239 	EXPECT_EQ(r1.lane<0>(), 3);
3240 	EXPECT_EQ(r1.lane<1>(), 3);
3241 	EXPECT_EQ(r1.lane<2>(), 3);
3242 	EXPECT_EQ(r1.lane<3>(), 3);
3243 	EXPECT_EQ(r1.lane<4>(), 3);
3244 	EXPECT_EQ(r1.lane<5>(), 3);
3245 	EXPECT_EQ(r1.lane<6>(), 3);
3246 	EXPECT_EQ(r1.lane<7>(), 3);
3247 
3248 	vint8 a2(1, 2, -1, 5, 1, 2, -1, 5);
3249 	vint8 r2 = hmax(a2);
3250 	EXPECT_EQ(r2.lane<0>(), 5);
3251 	EXPECT_EQ(r2.lane<1>(), 5);
3252 	EXPECT_EQ(r2.lane<2>(), 5);
3253 	EXPECT_EQ(r2.lane<3>(), 5);
3254 	EXPECT_EQ(r2.lane<4>(), 5);
3255 	EXPECT_EQ(r2.lane<5>(), 5);
3256 	EXPECT_EQ(r2.lane<6>(), 5);
3257 	EXPECT_EQ(r2.lane<7>(), 5);
3258 }
3259 
3260 /** @brief Test vint8 storea. */
TEST(vint8,storea)3261 TEST(vint8, storea)
3262 {
3263 	alignas(32) int out[8];
3264 	vint8 a(s32_data);
3265 	storea(a, out);
3266 	EXPECT_EQ(out[0], 0);
3267 	EXPECT_EQ(out[1], 1);
3268 	EXPECT_EQ(out[2], 2);
3269 	EXPECT_EQ(out[3], 3);
3270 	EXPECT_EQ(out[4], 4);
3271 	EXPECT_EQ(out[5], 5);
3272 	EXPECT_EQ(out[6], 6);
3273 	EXPECT_EQ(out[7], 7);
3274 }
3275 
3276 /** @brief Test vint8 store. */
TEST(vint8,store)3277 TEST(vint8, store)
3278 {
3279 	alignas(32) int out[9];
3280 	vint8 a(s32_data);
3281 	store(a, out + 1);
3282 	EXPECT_EQ(out[1], 0);
3283 	EXPECT_EQ(out[2], 1);
3284 	EXPECT_EQ(out[3], 2);
3285 	EXPECT_EQ(out[4], 3);
3286 	EXPECT_EQ(out[5], 4);
3287 	EXPECT_EQ(out[6], 5);
3288 	EXPECT_EQ(out[7], 6);
3289 	EXPECT_EQ(out[8], 7);
3290 }
3291 
3292 /** @brief Test vint8 store_nbytes. */
TEST(vint8,store_nbytes)3293 TEST(vint8, store_nbytes)
3294 {
3295 	alignas(32) int out[2];
3296 	vint8 a(42, 314, 75, 90, 42, 314, 75, 90);
3297 	store_nbytes(a, reinterpret_cast<uint8_t*>(&out));
3298 	EXPECT_EQ(out[0], 42);
3299 	EXPECT_EQ(out[1], 314);
3300 }
3301 
3302 /** @brief Test vint8 store_lanes_masked. */
TEST(vint8,store_lanes_masked)3303 TEST(vint8, store_lanes_masked)
3304 {
3305 	int resulta[8] { 0 };
3306 
3307 	// Store nothing
3308 	vmask8 mask1 = vint8(0) == vint8(1);
3309 	vint8 data1 = vint8(1);
3310 
3311 	store_lanes_masked(resulta, data1, mask1);
3312 	vint8 result1v(resulta);
3313 	vint8 expect1v = vint8::zero();
3314 	EXPECT_TRUE(all(result1v == expect1v));
3315 
3316 	// Store half
3317 	vmask8 mask2 = vint8(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1);
3318 	vint8 data2 = vint8(2);
3319 
3320 	store_lanes_masked(resulta, data2, mask2);
3321 	vint8 result2v(resulta);
3322 	vint8 expect2v = vint8(2, 2, 2, 2, 0, 0, 0, 0);
3323 	EXPECT_TRUE(all(result2v == expect2v));
3324 
3325 	// Store all
3326 	vmask8 mask3 = vint8(1) == vint8(1);
3327 	vint8 data3 = vint8(3);
3328 
3329 	store_lanes_masked(resulta, data3, mask3);
3330 	vint8 result3v(resulta);
3331 	vint8 expect3v = vint8(3);
3332 	EXPECT_TRUE(all(result3v == expect3v));
3333 }
3334 
3335 /** @brief Test vint8 store_lanes_masked to unaligned address. */
TEST(vint8,store_lanes_masked_unaligned)3336 TEST(vint8, store_lanes_masked_unaligned)
3337 {
3338 	int8_t resulta[33] { 0 };
3339 
3340 	// Store nothing
3341 	vmask8 mask1 = vint8(0) == vint8(1);
3342 	vint8 data1 = vint8(1);
3343 
3344 	store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data1, mask1);
3345 	vint8 result1v(reinterpret_cast<int*>(resulta + 1));
3346 	vint8 expect1v = vint8::zero();
3347 	EXPECT_TRUE(all(result1v == expect1v));
3348 
3349 	// Store half
3350 	vmask8 mask2 = vint8(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1);
3351 	vint8 data2 = vint8(2);
3352 
3353 	store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data2, mask2);
3354 	vint8 result2v(reinterpret_cast<int*>(resulta + 1));
3355 	vint8 expect2v = vint8(2, 2, 2, 2, 0, 0, 0, 0);
3356 	EXPECT_TRUE(all(result2v == expect2v));
3357 
3358 	// Store all
3359 	vmask8 mask3 = vint8(1) == vint8(1);
3360 	vint8 data3 = vint8(3);
3361 
3362 	store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data3, mask3);
3363 	vint8 result3v(reinterpret_cast<int*>(resulta + 1));
3364 	vint8 expect3v = vint8(3);
3365 	EXPECT_TRUE(all(result3v == expect3v));
3366 }
3367 
3368 /** @brief Test vint8 gatheri. */
TEST(vint8,gatheri)3369 TEST(vint8, gatheri)
3370 {
3371 	vint8 indices(0, 4, 3, 2, 7, 4, 3, 2);
3372 	vint8 r = gatheri(s32_data, indices);
3373 	EXPECT_EQ(r.lane<0>(), 0);
3374 	EXPECT_EQ(r.lane<1>(), 4);
3375 	EXPECT_EQ(r.lane<2>(), 3);
3376 	EXPECT_EQ(r.lane<3>(), 2);
3377 	EXPECT_EQ(r.lane<4>(), 7);
3378 	EXPECT_EQ(r.lane<5>(), 4);
3379 	EXPECT_EQ(r.lane<6>(), 3);
3380 	EXPECT_EQ(r.lane<7>(), 2);
3381 }
3382 
3383 /** @brief Test vint8 pack_low_bytes. */
TEST(vint8,pack_low_bytes)3384 TEST(vint8, pack_low_bytes)
3385 {
3386 	vint8 a(1, 2, 3, 4, 2, 3, 4, 5);
3387 	vint8 r = pack_low_bytes(a);
3388 	EXPECT_EQ(r.lane<0>(), (4 << 24) | (3 << 16) | (2  << 8) | (1 << 0));
3389 	EXPECT_EQ(r.lane<1>(), (5 << 24) | (4 << 16) | (3  << 8) | (2 << 0));
3390 }
3391 
3392 /** @brief Test vint8 select. */
TEST(vint8,select)3393 TEST(vint8, select)
3394 {
3395 	vint8 m1(1, 1, 1, 1, 1, 1, 1, 1);
3396 	vint8 m2(1, 2, 1, 2, 1, 2, 1, 2);
3397 	vmask8 cond = m1 == m2;
3398 
3399 	vint8 a(1, 3, 3, 1, 1, 3, 3, 1);
3400 	vint8 b(4, 2, 2, 4, 4, 2, 2, 4);
3401 
3402 	vint8 r1 = select(a, b, cond);
3403 	EXPECT_EQ(r1.lane<0>(), 4);
3404 	EXPECT_EQ(r1.lane<1>(), 3);
3405 	EXPECT_EQ(r1.lane<2>(), 2);
3406 	EXPECT_EQ(r1.lane<3>(), 1);
3407 	EXPECT_EQ(r1.lane<4>(), 4);
3408 	EXPECT_EQ(r1.lane<5>(), 3);
3409 	EXPECT_EQ(r1.lane<6>(), 2);
3410 	EXPECT_EQ(r1.lane<7>(), 1);
3411 
3412 	vint8 r2 = select(b, a, cond);
3413 	EXPECT_EQ(r2.lane<0>(), 1);
3414 	EXPECT_EQ(r2.lane<1>(), 2);
3415 	EXPECT_EQ(r2.lane<2>(), 3);
3416 	EXPECT_EQ(r2.lane<3>(), 4);
3417 	EXPECT_EQ(r2.lane<4>(), 1);
3418 	EXPECT_EQ(r2.lane<5>(), 2);
3419 	EXPECT_EQ(r2.lane<6>(), 3);
3420 	EXPECT_EQ(r2.lane<7>(), 4);
3421 }
3422 
3423 // vmask8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3424 
3425 /** @brief Test vmask8 scalar literal constructor. */
TEST(vmask8,scalar_literal_construct)3426 TEST(vmask8, scalar_literal_construct)
3427 {
3428 	vfloat8 ma(0.0f);
3429 	vfloat8 mb(1.0f);
3430 
3431 	vmask8 m1(true);
3432 	vfloat8 r1 = select(ma, mb, m1);
3433 	vmask8 rm1 = r1 == mb;
3434 	EXPECT_EQ(all(rm1), true);
3435 
3436 	vmask8 m2(false);
3437 	vfloat8 r2 = select(ma, mb, m2);
3438 	vmask8 rm2 = r2 == mb;
3439 	EXPECT_EQ(any(rm2), false);
3440 }
3441 
3442 /** @brief Test vmask8 or. */
TEST(vmask8,or)3443 TEST(vmask8, or)
3444 {
3445 	vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3446 	vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3447 	vmask8 m1 = m1a == m1b;
3448 
3449 	vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0);
3450 	vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1);
3451 	vmask8 m2 = m2a == m2b;
3452 
3453 	vmask8 r = m1 | m2;
3454 	EXPECT_EQ(mask(r), 0xBBu);
3455 }
3456 
3457 /** @brief Test vmask8 and. */
TEST(vmask8,and)3458 TEST(vmask8, and)
3459 {
3460 	vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3461 	vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3462 	vmask8 m1 = m1a == m1b;
3463 
3464 	vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0);
3465 	vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1);
3466 	vmask8 m2 = m2a == m2b;
3467 
3468 	vmask8 r = m1 & m2;
3469 	EXPECT_EQ(mask(r), 0x22u);
3470 }
3471 
3472 /** @brief Test vmask8 xor. */
TEST(vmask8,xor)3473 TEST(vmask8, xor)
3474 {
3475 	vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3476 	vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3477 	vmask8 m1 = m1a == m1b;
3478 
3479 	vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0);
3480 	vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1);
3481 	vmask8 m2 = m2a == m2b;
3482 
3483 	vmask8 r = m1 ^ m2;
3484 	EXPECT_EQ(mask(r), 0x99u);
3485 }
3486 
3487 /** @brief Test vmask8 not. */
TEST(vmask8,not)3488 TEST(vmask8, not)
3489 {
3490 	vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3491 	vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3492 	vmask8 m1 = m1a == m1b;
3493 	vmask8 r = ~m1;
3494 	EXPECT_EQ(mask(r), 0x55u);
3495 }
3496 
3497 /** @brief Test vint8 table permute. */
TEST(vint8,vtable_8bt_32bi_32entry)3498 TEST(vint8, vtable_8bt_32bi_32entry)
3499 {
3500 	vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
3501 	vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
3502 
3503 	vint8 table0p, table1p;
3504 	vtable_prepare(table0, table1, table0p, table1p);
3505 
3506 	vint8 index(0, 7, 4, 15, 16, 20, 23, 31);
3507 
3508 	vint8 result = vtable_8bt_32bi(table0p, table1p, index);
3509 
3510 	EXPECT_EQ(result.lane<0>(),  3);
3511 	EXPECT_EQ(result.lane<1>(),  4);
3512 	EXPECT_EQ(result.lane<2>(),  7);
3513 	EXPECT_EQ(result.lane<3>(), 12);
3514 	EXPECT_EQ(result.lane<4>(), 19);
3515 	EXPECT_EQ(result.lane<5>(), 23);
3516 	EXPECT_EQ(result.lane<6>(), 20);
3517 	EXPECT_EQ(result.lane<7>(), 28);
3518 }
3519 
3520 /** @brief Test vint4 table permute. */
TEST(vint8,vtable_8bt_32bi_64entry)3521 TEST(vint8, vtable_8bt_32bi_64entry)
3522 {
3523 	vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
3524 	vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
3525 	vint4 table2(0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f);
3526 	vint4 table3(0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f);
3527 
3528 	vint8 table0p, table1p, table2p, table3p;
3529 	vtable_prepare(table0, table1, table2, table3, table0p, table1p, table2p, table3p);
3530 
3531 	vint8 index(0, 7, 4, 15, 16, 20, 38, 63);
3532 
3533 	vint8 result = vtable_8bt_32bi(table0p, table1p, table2p, table3p, index);
3534 
3535 	EXPECT_EQ(result.lane<0>(),  3);
3536 	EXPECT_EQ(result.lane<1>(),  4);
3537 	EXPECT_EQ(result.lane<2>(),  7);
3538 	EXPECT_EQ(result.lane<3>(), 12);
3539 	EXPECT_EQ(result.lane<4>(), 19);
3540 	EXPECT_EQ(result.lane<5>(), 23);
3541 	EXPECT_EQ(result.lane<6>(), 37);
3542 	EXPECT_EQ(result.lane<7>(), 60);
3543 }
3544 
3545 #endif
3546 
3547 }
3548