1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "gtest/gtest.h"
18 
19 #include "xmmintrin.h"
20 
21 #include <array>
22 #include <cstdint>
23 #include <tuple>
24 
25 #include "berberis/base/bit_util.h"
26 #include "berberis/intrinsics/vector_intrinsics.h"
27 
28 namespace berberis::intrinsics {
29 
30 namespace {
31 
TEST(VectorIntrinsics,VectorBroadcast)32 TEST(VectorIntrinsics, VectorBroadcast) {
33   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int8_t>::min()}>(),
34             VectorBroadcast<Wrapping{std::numeric_limits<int8_t>::min()}>());
35   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int8_t>::max()}>(),
36             VectorBroadcast<Wrapping{std::numeric_limits<int8_t>::max()}>());
37   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint8_t>::max()}>(),
38             VectorBroadcast<Wrapping{std::numeric_limits<uint8_t>::max()}>());
39   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int16_t>::min()}>(),
40             VectorBroadcast<Wrapping{std::numeric_limits<int16_t>::min()}>());
41   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int16_t>::max()}>(),
42             VectorBroadcast<Wrapping{std::numeric_limits<int16_t>::max()}>());
43   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint16_t>::max()}>(),
44             VectorBroadcast<Wrapping{std::numeric_limits<uint16_t>::max()}>());
45   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int32_t>::min()}>(),
46             VectorBroadcast<Wrapping{std::numeric_limits<int32_t>::min()}>());
47   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int32_t>::max()}>(),
48             VectorBroadcast<Wrapping{std::numeric_limits<int32_t>::max()}>());
49   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint32_t>::max()}>(),
50             VectorBroadcast<Wrapping{std::numeric_limits<uint32_t>::max()}>());
51   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int64_t>::min()}>(),
52             VectorBroadcast<Wrapping{std::numeric_limits<int64_t>::min()}>());
53   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int64_t>::max()}>(),
54             VectorBroadcast<Wrapping{std::numeric_limits<int64_t>::max()}>());
55   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint64_t>::max()}>(),
56             VectorBroadcast<Wrapping{std::numeric_limits<uint64_t>::max()}>());
57 }
58 
TEST(VectorIntrinsics,MakeBitmaskFromVl)59 TEST(VectorIntrinsics, MakeBitmaskFromVl) {
60   for (size_t vl = 0; vl < 128; ++vl) {
61     ASSERT_EQ(MakeBitmaskFromVlForTests(vl), MakeBitmaskFromVl(vl));
62   }
63 }
64 
TEST(VectorIntrinsics,Make8bitMaskFromBitmask)65 TEST(VectorIntrinsics, Make8bitMaskFromBitmask) {
66   for (size_t mask = 0; mask < 131071; ++mask) {
67     ASSERT_EQ(BitMaskToSimdMaskForTests<Int8>(mask), BitMaskToSimdMask<Int8>(mask));
68     const auto [simd_mask] = BitMaskToSimdMask<Int8>(mask);
69     ASSERT_EQ(SimdMaskToBitMaskForTests<Int8>(simd_mask), SimdMaskToBitMask<Int8>(simd_mask));
70   }
71 }
72 
TEST(VectorIntrinsics,Make16bitMaskFromBitmask)73 TEST(VectorIntrinsics, Make16bitMaskFromBitmask) {
74   for (size_t mask = 0; mask < 511; ++mask) {
75     ASSERT_EQ(BitMaskToSimdMaskForTests<Int16>(mask), BitMaskToSimdMask<Int16>(mask));
76     const auto [simd_mask] = BitMaskToSimdMask<Int16>(mask);
77     ASSERT_EQ(SimdMaskToBitMaskForTests<Int16>(simd_mask), SimdMaskToBitMask<Int16>(simd_mask));
78   }
79 }
80 
TEST(VectorIntrinsics,Make32bitMaskFromBitmask)81 TEST(VectorIntrinsics, Make32bitMaskFromBitmask) {
82   for (size_t mask = 0; mask < 31; ++mask) {
83     ASSERT_EQ(BitMaskToSimdMaskForTests<Int32>(mask), BitMaskToSimdMask<Int32>(mask));
84     const auto [simd_mask] = BitMaskToSimdMask<Int32>(mask);
85     ASSERT_EQ(SimdMaskToBitMaskForTests<Int32>(simd_mask), SimdMaskToBitMask<Int32>(simd_mask));
86   }
87 }
88 
TEST(VectorIntrinsics,Make64bitMaskFromBitmask)89 TEST(VectorIntrinsics, Make64bitMaskFromBitmask) {
90   for (size_t mask = 0; mask < 7; ++mask) {
91     ASSERT_EQ(BitMaskToSimdMaskForTests<Int64>(mask), BitMaskToSimdMask<Int64>(mask));
92     const auto [simd_mask] = BitMaskToSimdMask<Int64>(mask);
93     ASSERT_EQ(SimdMaskToBitMaskForTests<Int64>(simd_mask), SimdMaskToBitMask<Int64>(simd_mask));
94   }
95 }
96 template <typename ElementType>
TestVidv()97 void TestVidv() {
98   for (size_t index = 0; index < 8; ++index) {
99     ASSERT_EQ(VidvForTests<ElementType>(index), Vidv<ElementType>(index));
100   }
101 }
TEST(VectorIntrinsics,Vidv)102 TEST(VectorIntrinsics, Vidv) {
103   TestVidv<Int8>();
104   TestVidv<Int16>();
105   TestVidv<Int32>();
106   TestVidv<Int64>();
107   TestVidv<UInt8>();
108   TestVidv<UInt16>();
109   TestVidv<UInt32>();
110   TestVidv<UInt64>();
111 }
112 // Easily recognizable bit pattern for target register.
113 constexpr __m128i kUndisturbedResult = {0x5555'5555'5555'5555, 0x5555'5555'5555'5555};
114 
115 template <auto kElement>
TestVectorMaskedElementTo()116 void TestVectorMaskedElementTo() {
117   size_t max_mask = sizeof(kElement) == sizeof(uint8_t)    ? 131071
118                     : sizeof(kElement) == sizeof(uint16_t) ? 511
119                     : sizeof(kElement) == sizeof(uint32_t) ? 31
120                                                            : 7;
121   for (size_t mask = 0; mask < max_mask; ++mask) {
122     const SIMD128Register src = kUndisturbedResult;
123     const auto [simd_mask] = BitMaskToSimdMask<decltype(kElement)>(mask);
124     ASSERT_EQ(VectorMaskedElementToForTests<kElement>(simd_mask, src),
125               VectorMaskedElementTo<kElement>(simd_mask, src));
126   }
127 }
128 
TEST(VectorIntrinsics,VectorMaskedElementTo)129 TEST(VectorIntrinsics, VectorMaskedElementTo) {
130   TestVectorMaskedElementTo<std::numeric_limits<int8_t>::min()>();
131   TestVectorMaskedElementTo<std::numeric_limits<int8_t>::max()>();
132   TestVectorMaskedElementTo<std::numeric_limits<uint8_t>::min()>();
133   TestVectorMaskedElementTo<std::numeric_limits<uint8_t>::max()>();
134   TestVectorMaskedElementTo<std::numeric_limits<int16_t>::min()>();
135   TestVectorMaskedElementTo<std::numeric_limits<int16_t>::max()>();
136   TestVectorMaskedElementTo<std::numeric_limits<uint16_t>::min()>();
137   TestVectorMaskedElementTo<std::numeric_limits<uint16_t>::max()>();
138   TestVectorMaskedElementTo<std::numeric_limits<int32_t>::min()>();
139   TestVectorMaskedElementTo<std::numeric_limits<int32_t>::max()>();
140   TestVectorMaskedElementTo<std::numeric_limits<uint32_t>::min()>();
141   TestVectorMaskedElementTo<std::numeric_limits<uint32_t>::max()>();
142   TestVectorMaskedElementTo<std::numeric_limits<int64_t>::min()>();
143   TestVectorMaskedElementTo<std::numeric_limits<int64_t>::max()>();
144   TestVectorMaskedElementTo<std::numeric_limits<uint64_t>::min()>();
145   TestVectorMaskedElementTo<std::numeric_limits<uint64_t>::max()>();
146 }
147 
TEST(VectorIntrinsics,Vaddvv)148 TEST(VectorIntrinsics, Vaddvv) {
149   auto Verify = []<typename ElementType>(
150                     auto Vaddvv,
151                     SIMD128Register arg2,
152                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
153     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
154                   kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 0, 16)),
155               std::tuple{result_to_check});
156     ASSERT_EQ(
157         (VectorMasking<Wrapping<ElementType>,
158                        TailProcessing::kAgnostic,
159                        InactiveProcessing::kAgnostic>(kUndisturbedResult,
160                                                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
161                                                       0,
162                                                       16,
163                                                       RawInt16{0xffff})),
164         std::tuple{result_to_check});
165   };
166   Verify(Vaddvv<UInt8>,
167          __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
168          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
169   Verify(Vaddvv<UInt8>,
170          __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
171          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
172   Verify(Vaddvv<UInt16>,
173          __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
174          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
175   Verify(Vaddvv<UInt16>,
176          __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
177          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
178   Verify(Vaddvv<UInt32>,
179          __v4su{0, 1, 0, 1},
180          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
181   Verify(Vaddvv<UInt32>,
182          __v4su{1, 0, 1, 0},
183          __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
184   Verify(Vaddvv<UInt64>, __v2du{0, 1}, __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
185   Verify(Vaddvv<UInt64>, __v2du{1, 0}, __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
186 }
187 
TEST(VectorIntrinsics,Vaddvx)188 TEST(VectorIntrinsics, Vaddvx) {
189   auto Verify = []<typename ElementType>(
190                     auto Vaddvx,
191                     SIMD128Register arg1,
192                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
193     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
194                   kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, 16)),
195               std::tuple{result_to_check});
196     ASSERT_EQ(
197         (VectorMasking<Wrapping<ElementType>,
198                        TailProcessing::kAgnostic,
199                        InactiveProcessing::kAgnostic>(
200             kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, 16, RawInt16{0xffff})),
201         std::tuple{result_to_check});
202   };
203   Verify(Vaddvx<UInt8>,
204          __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
205          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
206   Verify(Vaddvx<UInt8>,
207          __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
208          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
209   Verify(Vaddvx<UInt16>,
210          __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
211          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
212   Verify(Vaddvx<UInt16>,
213          __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
214          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
215   Verify(Vaddvx<UInt32>,
216          __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
217          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
218   Verify(Vaddvx<UInt32>,
219          __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
220          __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
221   Verify(Vaddvx<UInt64>,
222          __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
223          __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
224   Verify(Vaddvx<UInt64>,
225          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
226          __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
227 }
228 
TEST(VectorIntrinsics,VlArgForVv)229 TEST(VectorIntrinsics, VlArgForVv) {
230   auto Verify = []<typename ElementType>(
231                     auto Vaddvv,
232                     SIMD128Register arg2,
233                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic,
234                     [[gnu::vector_size(16),
235                       gnu::may_alias]] ElementType result_to_check_undisturbed) {
236     constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
237     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
238                   kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 0, kHalfLen)),
239               std::tuple{result_to_check_agnostic});
240     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
241                   kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 0, kHalfLen)),
242               std::tuple{result_to_check_undisturbed});
243     ASSERT_EQ(
244         (VectorMasking<Wrapping<ElementType>,
245                        TailProcessing::kAgnostic,
246                        InactiveProcessing::kAgnostic>(kUndisturbedResult,
247                                                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
248                                                       0,
249                                                       kHalfLen,
250                                                       RawInt16{0xffff})),
251         std::tuple{result_to_check_agnostic});
252     ASSERT_EQ(
253         (VectorMasking<Wrapping<ElementType>,
254                        TailProcessing::kAgnostic,
255                        InactiveProcessing::kUndisturbed>(kUndisturbedResult,
256                                                          std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
257                                                          0,
258                                                          kHalfLen,
259                                                          RawInt16{0xffff})),
260         std::tuple{result_to_check_agnostic});
261     ASSERT_EQ(
262         (VectorMasking<Wrapping<ElementType>,
263                        TailProcessing::kUndisturbed,
264                        InactiveProcessing::kAgnostic>(kUndisturbedResult,
265                                                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
266                                                       0,
267                                                       kHalfLen,
268                                                       RawInt16{0xffff})),
269         std::tuple{result_to_check_undisturbed});
270     ASSERT_EQ(
271         (VectorMasking<Wrapping<ElementType>,
272                        TailProcessing::kUndisturbed,
273                        InactiveProcessing::kUndisturbed>(kUndisturbedResult,
274                                                          std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
275                                                          0,
276                                                          kHalfLen,
277                                                          RawInt16{0xffff})),
278         std::tuple{result_to_check_undisturbed});
279   };
280   Verify(Vaddvv<UInt8>,
281          __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
282          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
283          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
284   Verify(Vaddvv<UInt8>,
285          __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
286          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
287          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
288   Verify(Vaddvv<UInt16>,
289          __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
290          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
291          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
292   Verify(Vaddvv<UInt16>,
293          __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
294          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
295          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
296   Verify(Vaddvv<UInt32>,
297          __v4su{0, 1, 0, 1},
298          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
299          __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555});
300   Verify(Vaddvv<UInt32>,
301          __v4su{1, 0, 1, 0},
302          __v4su{0x0000'0000, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
303          __v4su{0x0000'0000, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
304   Verify(Vaddvv<UInt64>,
305          __v2du{0, 1},
306          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
307          __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555});
308   Verify(Vaddvv<UInt64>,
309          __v2du{1, 0},
310          __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff},
311          __v2du{0x0000'0000'0000'0000, 0x5555'5555'5555'5555});
312 }
313 
TEST(VectorIntrinsics,VlArgForVx)314 TEST(VectorIntrinsics, VlArgForVx) {
315   auto Verify = []<typename ElementType>(
316                     auto Vaddvx,
317                     SIMD128Register arg1,
318                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic,
319                     [[gnu::vector_size(16),
320                       gnu::may_alias]] ElementType result_to_check_undisturbed) {
321     constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
322     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
323                   kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, kHalfLen)),
324               std::tuple{result_to_check_agnostic});
325     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
326                   kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, kHalfLen)),
327               std::tuple{result_to_check_undisturbed});
328     ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
329                              TailProcessing::kAgnostic,
330                              InactiveProcessing::kAgnostic>(kUndisturbedResult,
331                                                             std::get<0>(Vaddvx(arg1, UInt8{1})),
332                                                             0,
333                                                             kHalfLen,
334                                                             RawInt16{0xffff})),
335               std::tuple{result_to_check_agnostic});
336     ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
337                              TailProcessing::kAgnostic,
338                              InactiveProcessing::kUndisturbed>(kUndisturbedResult,
339                                                                std::get<0>(Vaddvx(arg1, UInt8{1})),
340                                                                0,
341                                                                kHalfLen,
342                                                                RawInt16{0xffff})),
343               std::tuple{result_to_check_agnostic});
344     ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
345                              TailProcessing::kUndisturbed,
346                              InactiveProcessing::kAgnostic>(kUndisturbedResult,
347                                                             std::get<0>(Vaddvx(arg1, UInt8{1})),
348                                                             0,
349                                                             kHalfLen,
350                                                             RawInt16{0xffff})),
351               std::tuple{result_to_check_undisturbed});
352     ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
353                              TailProcessing::kUndisturbed,
354                              InactiveProcessing::kUndisturbed>(kUndisturbedResult,
355                                                                std::get<0>(Vaddvx(arg1, UInt8{1})),
356                                                                0,
357                                                                kHalfLen,
358                                                                RawInt16{0xffff})),
359               std::tuple{result_to_check_undisturbed});
360   };
361   Verify(Vaddvx<UInt8>,
362          __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
363          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
364          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
365   Verify(Vaddvx<UInt8>,
366          __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
367          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
368          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
369   Verify(Vaddvx<UInt16>,
370          __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
371          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
372          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
373   Verify(Vaddvx<UInt16>,
374          __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
375          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
376          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
377   Verify(Vaddvx<UInt32>,
378          __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
379          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
380          __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555});
381   Verify(Vaddvx<UInt32>,
382          __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
383          __v4su{0x0000'0000, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
384          __v4su{0x0000'0000, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
385   Verify(Vaddvx<UInt64>,
386          __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
387          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
388          __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555});
389   Verify(Vaddvx<UInt64>,
390          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
391          __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff},
392          __v2du{0x0000'0000'0000'0000, 0x5555'5555'5555'5555});
393 }
394 
TEST(VectorIntrinsics,VmaskArgForVvv)395 TEST(VectorIntrinsics, VmaskArgForVvv) {
396   auto Verify = []<typename ElementType>(
397                     auto Vaddvv,
398                     SIMD128Register arg2,
399                     [[gnu::vector_size(16),
400                       gnu::may_alias]] ElementType result_to_check_agnostic_agnostic,
401                     [[gnu::vector_size(16),
402                       gnu::may_alias]] ElementType result_to_check_agnostic_undisturbed,
403                     [[gnu::vector_size(16),
404                       gnu::may_alias]] ElementType result_to_check_undisturbed_agnostic,
405                     [[gnu::vector_size(16),
406                       gnu::may_alias]] ElementType result_to_check_undisturbed_undisturbed) {
407     constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
408     ASSERT_EQ(
409         (VectorMasking<Wrapping<ElementType>,
410                        TailProcessing::kAgnostic,
411                        InactiveProcessing::kAgnostic>(kUndisturbedResult,
412                                                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
413                                                       0,
414                                                       kHalfLen,
415                                                       RawInt16{0xfdda})),
416         std::tuple{result_to_check_agnostic_agnostic});
417     ASSERT_EQ(
418         (VectorMasking<Wrapping<ElementType>,
419                        TailProcessing::kAgnostic,
420                        InactiveProcessing::kUndisturbed>(kUndisturbedResult,
421                                                          std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
422                                                          0,
423                                                          kHalfLen,
424                                                          RawInt16{0xfdda})),
425         std::tuple{result_to_check_agnostic_undisturbed});
426     ASSERT_EQ(
427         (VectorMasking<Wrapping<ElementType>,
428                        TailProcessing::kUndisturbed,
429                        InactiveProcessing::kAgnostic>(kUndisturbedResult,
430                                                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
431                                                       0,
432                                                       kHalfLen,
433                                                       RawInt16{0xfdda})),
434         std::tuple{result_to_check_undisturbed_agnostic});
435     ASSERT_EQ(
436         (VectorMasking<Wrapping<ElementType>,
437                        TailProcessing::kUndisturbed,
438                        InactiveProcessing::kUndisturbed>(kUndisturbedResult,
439                                                          std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
440                                                          0,
441                                                          kHalfLen,
442                                                          RawInt16{0xfdda})),
443         std::tuple{result_to_check_undisturbed_undisturbed});
444   };
445   Verify(
446       Vaddvv<UInt8>,
447       __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
448       __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
449       __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
450       __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
451       __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
452   Verify(
453       Vaddvv<UInt8>,
454       __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
455       __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
456       __v16qu{0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
457       __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
458       __v16qu{
459           0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
460   Verify(Vaddvv<UInt16>,
461          __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
462          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
463          __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
464          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555},
465          __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
466   Verify(Vaddvv<UInt16>,
467          __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
468          __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
469          __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
470          __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555},
471          __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
472   Verify(Vaddvv<UInt32>,
473          __v4su{0, 1, 0, 1},
474          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
475          __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
476          __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555},
477          __v4su{0x5555'5555, 0x0000'0000, 0x5555'5555, 0x5555'5555});
478   Verify(Vaddvv<UInt32>,
479          __v4su{1, 0, 1, 0},
480          __v4su{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
481          __v4su{0x5555'5555, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
482          __v4su{0xffff'ffff, 0xffff'ffff, 0x5555'5555, 0x5555'5555},
483          __v4su{0x5555'5555, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
484   Verify(Vaddvv<UInt64>,
485          __v2du{0, 1},
486          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
487          __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
488          __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
489          __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
490   Verify(Vaddvv<UInt64>,
491          __v2du{1, 0},
492          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
493          __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
494          __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
495          __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
496 }
497 
TEST(VectorIntrinsics,VmaskArgForVvx)498 TEST(VectorIntrinsics, VmaskArgForVvx) {
499   auto Verify = []<typename ElementType>(
500                     auto Vaddvx,
501                     SIMD128Register arg1,
502                     [[gnu::vector_size(16),
503                       gnu::may_alias]] ElementType result_to_check_agnostic_agnostic,
504                     [[gnu::vector_size(16),
505                       gnu::may_alias]] ElementType result_to_check_agnostic_undisturbed,
506                     [[gnu::vector_size(16),
507                       gnu::may_alias]] ElementType result_to_check_undisturbed_agnostic,
508                     [[gnu::vector_size(16),
509                       gnu::may_alias]] ElementType result_to_check_undisturbed_undisturbed) {
510     constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
511     ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
512                              TailProcessing::kAgnostic,
513                              InactiveProcessing::kAgnostic>(kUndisturbedResult,
514                                                             std::get<0>(Vaddvx(arg1, UInt8{1})),
515                                                             0,
516                                                             kHalfLen,
517                                                             RawInt16{0xfdda})),
518               std::tuple{result_to_check_agnostic_agnostic});
519     ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
520                              TailProcessing::kAgnostic,
521                              InactiveProcessing::kUndisturbed>(kUndisturbedResult,
522                                                                std::get<0>(Vaddvx(arg1, UInt8{1})),
523                                                                0,
524                                                                kHalfLen,
525                                                                RawInt16{0xfdda})),
526               std::tuple{result_to_check_agnostic_undisturbed});
527     ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
528                              TailProcessing::kUndisturbed,
529                              InactiveProcessing::kAgnostic>(kUndisturbedResult,
530                                                             std::get<0>(Vaddvx(arg1, UInt8{1})),
531                                                             0,
532                                                             kHalfLen,
533                                                             RawInt16{0xfdda})),
534               std::tuple{result_to_check_undisturbed_agnostic});
535     ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
536                              TailProcessing::kUndisturbed,
537                              InactiveProcessing::kUndisturbed>(kUndisturbedResult,
538                                                                std::get<0>(Vaddvx(arg1, UInt8{1})),
539                                                                0,
540                                                                kHalfLen,
541                                                                RawInt16{0xfdda})),
542               std::tuple{result_to_check_undisturbed_undisturbed});
543   };
544   Verify(
545       Vaddvx<UInt8>,
546       __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
547       __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
548       __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
549       __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
550       __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
551   Verify(
552       Vaddvx<UInt8>,
553       __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
554       __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
555       __v16qu{0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
556       __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
557       __v16qu{
558           0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
559   Verify(Vaddvx<UInt16>,
560          __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
561          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
562          __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
563          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555},
564          __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
565   Verify(Vaddvx<UInt16>,
566          __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
567          __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
568          __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
569          __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555},
570          __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
571   Verify(Vaddvx<UInt32>,
572          __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
573          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
574          __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
575          __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555},
576          __v4su{0x5555'5555, 0x0000'0000, 0x5555'5555, 0x5555'5555});
577   Verify(Vaddvx<UInt32>,
578          __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
579          __v4su{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
580          __v4su{0x5555'5555, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
581          __v4su{0xffff'ffff, 0xffff'ffff, 0x5555'5555, 0x5555'5555},
582          __v4su{0x5555'5555, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
583   Verify(Vaddvx<UInt64>,
584          __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
585          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
586          __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
587          __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
588          __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
589   Verify(Vaddvx<UInt64>,
590          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
591          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
592          __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
593          __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
594          __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
595 }
596 
TEST(VectorIntrinsics,VstartArgVv)597 TEST(VectorIntrinsics, VstartArgVv) {
598   auto Verify = []<typename ElementType>(
599                     auto Vaddvv,
600                     SIMD128Register arg2,
601                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
602     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
603                   kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 1, 16)),
604               std::tuple{result_to_check});
605     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
606                   kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 1, 16)),
607               std::tuple{result_to_check});
608     ASSERT_EQ(
609         (VectorMasking<Wrapping<ElementType>,
610                        TailProcessing::kAgnostic,
611                        InactiveProcessing::kAgnostic>(kUndisturbedResult,
612                                                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
613                                                       1,
614                                                       16,
615                                                       RawInt16{0xffff})),
616         std::tuple{result_to_check});
617     ASSERT_EQ(
618         (VectorMasking<Wrapping<ElementType>,
619                        TailProcessing::kAgnostic,
620                        InactiveProcessing::kUndisturbed>(kUndisturbedResult,
621                                                          std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
622                                                          1,
623                                                          16,
624                                                          RawInt16{0xffff})),
625         std::tuple{result_to_check});
626     ASSERT_EQ(
627         (VectorMasking<Wrapping<ElementType>,
628                        TailProcessing::kUndisturbed,
629                        InactiveProcessing::kAgnostic>(kUndisturbedResult,
630                                                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
631                                                       1,
632                                                       16,
633                                                       RawInt16{0xffff})),
634         std::tuple{result_to_check});
635     ASSERT_EQ(
636         (VectorMasking<Wrapping<ElementType>,
637                        TailProcessing::kUndisturbed,
638                        InactiveProcessing::kUndisturbed>(kUndisturbedResult,
639                                                          std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
640                                                          1,
641                                                          16,
642                                                          RawInt16{0xffff})),
643         std::tuple{result_to_check});
644   };
645   Verify(Vaddvv<UInt8>,
646          __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
647          __v16qu{0x55, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
648   Verify(Vaddvv<UInt8>,
649          __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
650          __v16qu{0x55, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
651   Verify(Vaddvv<UInt16>,
652          __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
653          __v8hu{0x5555, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
654   Verify(Vaddvv<UInt16>,
655          __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
656          __v8hu{0x5555, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
657   Verify(Vaddvv<UInt32>,
658          __v4su{0, 1, 0, 1},
659          __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
660   Verify(Vaddvv<UInt32>,
661          __v4su{1, 0, 1, 0},
662          __v4su{0x5555'5555, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
663   Verify(Vaddvv<UInt64>, __v2du{0, 1}, __v2du{0x5555'5555'5555'5555, 0x0000'0000'0000'0000});
664   Verify(Vaddvv<UInt64>, __v2du{1, 0}, __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff});
665 }
666 
TEST(VectorIntrinsics,VstartArgVx)667 TEST(VectorIntrinsics, VstartArgVx) {
668   auto Verify = []<typename ElementType>(
669                     auto Vaddvx,
670                     SIMD128Register arg1,
671                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
672     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
673                   kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16)),
674               std::tuple{result_to_check});
675     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
676                   kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16)),
677               std::tuple{result_to_check});
678     ASSERT_EQ(
679         (VectorMasking<Wrapping<ElementType>,
680                        TailProcessing::kAgnostic,
681                        InactiveProcessing::kAgnostic>(
682             kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
683         std::tuple{result_to_check});
684     ASSERT_EQ(
685         (VectorMasking<Wrapping<ElementType>,
686                        TailProcessing::kAgnostic,
687                        InactiveProcessing::kUndisturbed>(
688             kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
689         std::tuple{result_to_check});
690     ASSERT_EQ(
691         (VectorMasking<Wrapping<ElementType>,
692                        TailProcessing::kUndisturbed,
693                        InactiveProcessing::kAgnostic>(
694             kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
695         std::tuple{result_to_check});
696     ASSERT_EQ(
697         (VectorMasking<Wrapping<ElementType>,
698                        TailProcessing::kUndisturbed,
699                        InactiveProcessing::kUndisturbed>(
700             kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
701         std::tuple{result_to_check});
702   };
703   Verify(Vaddvx<UInt8>,
704          __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
705          __v16qu{0x55, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
706   Verify(Vaddvx<UInt8>,
707          __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
708          __v16qu{0x55, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
709   Verify(Vaddvx<UInt16>,
710          __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
711          __v8hu{0x5555, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
712   Verify(Vaddvx<UInt16>,
713          __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
714          __v8hu{0x5555, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
715   Verify(Vaddvx<UInt32>,
716          __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
717          __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
718   Verify(Vaddvx<UInt32>,
719          __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
720          __v4su{0x5555'5555, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
721   Verify(Vaddvx<UInt64>,
722          __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
723          __v2du{0x5555'5555'5555'5555, 0x0000'0000'0000'0000});
724   Verify(Vaddvx<UInt64>,
725          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
726          __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff});
727 }
728 
TEST(VectorIntrinsics,Vsubvv)729 TEST(VectorIntrinsics, Vsubvv) {
730   auto Verify = []<typename ElementType>(
731                     auto Vsubvv,
732                     SIMD128Register arg2,
733                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
734     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
735                   kUndisturbedResult, std::get<0>(Vsubvv(__m128i{0, 0}, arg2)), 0, 16)),
736               std::tuple{result_to_check});
737     ASSERT_EQ(
738         (VectorMasking<Wrapping<ElementType>,
739                        TailProcessing::kAgnostic,
740                        InactiveProcessing::kAgnostic>(
741             kUndisturbedResult, std::get<0>(Vsubvv(__m128i{0, 0}, arg2)), 0, 16, RawInt16{0xffff})),
742         std::tuple{result_to_check});
743   };
744   Verify(Vsubvv<UInt8>,
745          __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
746          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
747   Verify(Vsubvv<UInt8>,
748          __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
749          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
750   Verify(Vsubvv<UInt16>,
751          __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
752          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
753   Verify(Vsubvv<UInt16>,
754          __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
755          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
756   Verify(Vsubvv<UInt32>,
757          __v4su{0, 1, 0, 1},
758          __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
759   Verify(Vsubvv<UInt64>, __v2du{0, 1}, __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
760   Verify(Vsubvv<UInt64>, __v2du{1, 0}, __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
761 }
762 
TEST(VectorIntrinsics,Vsubvx)763 TEST(VectorIntrinsics, Vsubvx) {
764   auto Verify = []<typename ElementType>(
765                     auto Vsubvx,
766                     SIMD128Register arg1,
767                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
768     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
769                   kUndisturbedResult, std::get<0>(Vsubvx(arg1, UInt8{1})), 0, 16)),
770               std::tuple{result_to_check});
771     ASSERT_EQ(
772         (VectorMasking<Wrapping<ElementType>,
773                        TailProcessing::kAgnostic,
774                        InactiveProcessing::kAgnostic>(
775             kUndisturbedResult, std::get<0>(Vsubvx(arg1, UInt8{1})), 0, 16, RawInt16{0xffff})),
776         std::tuple{result_to_check});
777   };
778   Verify(Vsubvx<UInt8>,
779          __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
780          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
781   Verify(Vsubvx<UInt8>,
782          __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
783          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
784   Verify(Vsubvx<UInt16>,
785          __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
786          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
787   Verify(Vsubvx<UInt16>,
788          __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
789          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
790   Verify(Vsubvx<UInt32>,
791          __v4su{1, 0, 1, 0},
792          __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
793   Verify(Vsubvx<UInt32>,
794          __v4su{0, 1, 0, 1},
795          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
796   Verify(Vsubvx<UInt64>, __v2du{1, 0}, __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
797   Verify(Vsubvx<UInt64>, __v2du{0, 1}, __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
798 }
799 
800 }  // namespace
801 
802 }  // namespace berberis::intrinsics
803