1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "gtest/gtest.h"
18
19 #include "xmmintrin.h"
20
21 #include <array>
22 #include <cstdint>
23 #include <tuple>
24
25 #include "berberis/base/bit_util.h"
26 #include "berberis/intrinsics/vector_intrinsics.h"
27
28 namespace berberis::intrinsics {
29
30 namespace {
31
TEST(VectorIntrinsics,VectorBroadcast)32 TEST(VectorIntrinsics, VectorBroadcast) {
33 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int8_t>::min()}>(),
34 VectorBroadcast<Wrapping{std::numeric_limits<int8_t>::min()}>());
35 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int8_t>::max()}>(),
36 VectorBroadcast<Wrapping{std::numeric_limits<int8_t>::max()}>());
37 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint8_t>::max()}>(),
38 VectorBroadcast<Wrapping{std::numeric_limits<uint8_t>::max()}>());
39 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int16_t>::min()}>(),
40 VectorBroadcast<Wrapping{std::numeric_limits<int16_t>::min()}>());
41 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int16_t>::max()}>(),
42 VectorBroadcast<Wrapping{std::numeric_limits<int16_t>::max()}>());
43 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint16_t>::max()}>(),
44 VectorBroadcast<Wrapping{std::numeric_limits<uint16_t>::max()}>());
45 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int32_t>::min()}>(),
46 VectorBroadcast<Wrapping{std::numeric_limits<int32_t>::min()}>());
47 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int32_t>::max()}>(),
48 VectorBroadcast<Wrapping{std::numeric_limits<int32_t>::max()}>());
49 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint32_t>::max()}>(),
50 VectorBroadcast<Wrapping{std::numeric_limits<uint32_t>::max()}>());
51 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int64_t>::min()}>(),
52 VectorBroadcast<Wrapping{std::numeric_limits<int64_t>::min()}>());
53 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int64_t>::max()}>(),
54 VectorBroadcast<Wrapping{std::numeric_limits<int64_t>::max()}>());
55 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint64_t>::max()}>(),
56 VectorBroadcast<Wrapping{std::numeric_limits<uint64_t>::max()}>());
57 }
58
TEST(VectorIntrinsics,MakeBitmaskFromVl)59 TEST(VectorIntrinsics, MakeBitmaskFromVl) {
60 for (size_t vl = 0; vl < 128; ++vl) {
61 ASSERT_EQ(MakeBitmaskFromVlForTests(vl), MakeBitmaskFromVl(vl));
62 }
63 }
64
TEST(VectorIntrinsics,Make8bitMaskFromBitmask)65 TEST(VectorIntrinsics, Make8bitMaskFromBitmask) {
66 for (size_t mask = 0; mask < 131071; ++mask) {
67 ASSERT_EQ(BitMaskToSimdMaskForTests<Int8>(mask), BitMaskToSimdMask<Int8>(mask));
68 const auto [simd_mask] = BitMaskToSimdMask<Int8>(mask);
69 ASSERT_EQ(SimdMaskToBitMaskForTests<Int8>(simd_mask), SimdMaskToBitMask<Int8>(simd_mask));
70 }
71 }
72
TEST(VectorIntrinsics,Make16bitMaskFromBitmask)73 TEST(VectorIntrinsics, Make16bitMaskFromBitmask) {
74 for (size_t mask = 0; mask < 511; ++mask) {
75 ASSERT_EQ(BitMaskToSimdMaskForTests<Int16>(mask), BitMaskToSimdMask<Int16>(mask));
76 const auto [simd_mask] = BitMaskToSimdMask<Int16>(mask);
77 ASSERT_EQ(SimdMaskToBitMaskForTests<Int16>(simd_mask), SimdMaskToBitMask<Int16>(simd_mask));
78 }
79 }
80
TEST(VectorIntrinsics,Make32bitMaskFromBitmask)81 TEST(VectorIntrinsics, Make32bitMaskFromBitmask) {
82 for (size_t mask = 0; mask < 31; ++mask) {
83 ASSERT_EQ(BitMaskToSimdMaskForTests<Int32>(mask), BitMaskToSimdMask<Int32>(mask));
84 const auto [simd_mask] = BitMaskToSimdMask<Int32>(mask);
85 ASSERT_EQ(SimdMaskToBitMaskForTests<Int32>(simd_mask), SimdMaskToBitMask<Int32>(simd_mask));
86 }
87 }
88
TEST(VectorIntrinsics,Make64bitMaskFromBitmask)89 TEST(VectorIntrinsics, Make64bitMaskFromBitmask) {
90 for (size_t mask = 0; mask < 7; ++mask) {
91 ASSERT_EQ(BitMaskToSimdMaskForTests<Int64>(mask), BitMaskToSimdMask<Int64>(mask));
92 const auto [simd_mask] = BitMaskToSimdMask<Int64>(mask);
93 ASSERT_EQ(SimdMaskToBitMaskForTests<Int64>(simd_mask), SimdMaskToBitMask<Int64>(simd_mask));
94 }
95 }
96 template <typename ElementType>
TestVidv()97 void TestVidv() {
98 for (size_t index = 0; index < 8; ++index) {
99 ASSERT_EQ(VidvForTests<ElementType>(index), Vidv<ElementType>(index));
100 }
101 }
TEST(VectorIntrinsics,Vidv)102 TEST(VectorIntrinsics, Vidv) {
103 TestVidv<Int8>();
104 TestVidv<Int16>();
105 TestVidv<Int32>();
106 TestVidv<Int64>();
107 TestVidv<UInt8>();
108 TestVidv<UInt16>();
109 TestVidv<UInt32>();
110 TestVidv<UInt64>();
111 }
112 // Easily recognizable bit pattern for target register.
113 constexpr __m128i kUndisturbedResult = {0x5555'5555'5555'5555, 0x5555'5555'5555'5555};
114
115 template <auto kElement>
TestVectorMaskedElementTo()116 void TestVectorMaskedElementTo() {
117 size_t max_mask = sizeof(kElement) == sizeof(uint8_t) ? 131071
118 : sizeof(kElement) == sizeof(uint16_t) ? 511
119 : sizeof(kElement) == sizeof(uint32_t) ? 31
120 : 7;
121 for (size_t mask = 0; mask < max_mask; ++mask) {
122 const SIMD128Register src = kUndisturbedResult;
123 const auto [simd_mask] = BitMaskToSimdMask<decltype(kElement)>(mask);
124 ASSERT_EQ(VectorMaskedElementToForTests<kElement>(simd_mask, src),
125 VectorMaskedElementTo<kElement>(simd_mask, src));
126 }
127 }
128
TEST(VectorIntrinsics,VectorMaskedElementTo)129 TEST(VectorIntrinsics, VectorMaskedElementTo) {
130 TestVectorMaskedElementTo<std::numeric_limits<int8_t>::min()>();
131 TestVectorMaskedElementTo<std::numeric_limits<int8_t>::max()>();
132 TestVectorMaskedElementTo<std::numeric_limits<uint8_t>::min()>();
133 TestVectorMaskedElementTo<std::numeric_limits<uint8_t>::max()>();
134 TestVectorMaskedElementTo<std::numeric_limits<int16_t>::min()>();
135 TestVectorMaskedElementTo<std::numeric_limits<int16_t>::max()>();
136 TestVectorMaskedElementTo<std::numeric_limits<uint16_t>::min()>();
137 TestVectorMaskedElementTo<std::numeric_limits<uint16_t>::max()>();
138 TestVectorMaskedElementTo<std::numeric_limits<int32_t>::min()>();
139 TestVectorMaskedElementTo<std::numeric_limits<int32_t>::max()>();
140 TestVectorMaskedElementTo<std::numeric_limits<uint32_t>::min()>();
141 TestVectorMaskedElementTo<std::numeric_limits<uint32_t>::max()>();
142 TestVectorMaskedElementTo<std::numeric_limits<int64_t>::min()>();
143 TestVectorMaskedElementTo<std::numeric_limits<int64_t>::max()>();
144 TestVectorMaskedElementTo<std::numeric_limits<uint64_t>::min()>();
145 TestVectorMaskedElementTo<std::numeric_limits<uint64_t>::max()>();
146 }
147
TEST(VectorIntrinsics,Vaddvv)148 TEST(VectorIntrinsics, Vaddvv) {
149 auto Verify = []<typename ElementType>(
150 auto Vaddvv,
151 SIMD128Register arg2,
152 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
153 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
154 kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 0, 16)),
155 std::tuple{result_to_check});
156 ASSERT_EQ(
157 (VectorMasking<Wrapping<ElementType>,
158 TailProcessing::kAgnostic,
159 InactiveProcessing::kAgnostic>(kUndisturbedResult,
160 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
161 0,
162 16,
163 RawInt16{0xffff})),
164 std::tuple{result_to_check});
165 };
166 Verify(Vaddvv<UInt8>,
167 __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
168 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
169 Verify(Vaddvv<UInt8>,
170 __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
171 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
172 Verify(Vaddvv<UInt16>,
173 __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
174 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
175 Verify(Vaddvv<UInt16>,
176 __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
177 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
178 Verify(Vaddvv<UInt32>,
179 __v4su{0, 1, 0, 1},
180 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
181 Verify(Vaddvv<UInt32>,
182 __v4su{1, 0, 1, 0},
183 __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
184 Verify(Vaddvv<UInt64>, __v2du{0, 1}, __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
185 Verify(Vaddvv<UInt64>, __v2du{1, 0}, __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
186 }
187
TEST(VectorIntrinsics,Vaddvx)188 TEST(VectorIntrinsics, Vaddvx) {
189 auto Verify = []<typename ElementType>(
190 auto Vaddvx,
191 SIMD128Register arg1,
192 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
193 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
194 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, 16)),
195 std::tuple{result_to_check});
196 ASSERT_EQ(
197 (VectorMasking<Wrapping<ElementType>,
198 TailProcessing::kAgnostic,
199 InactiveProcessing::kAgnostic>(
200 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, 16, RawInt16{0xffff})),
201 std::tuple{result_to_check});
202 };
203 Verify(Vaddvx<UInt8>,
204 __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
205 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
206 Verify(Vaddvx<UInt8>,
207 __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
208 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
209 Verify(Vaddvx<UInt16>,
210 __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
211 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
212 Verify(Vaddvx<UInt16>,
213 __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
214 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
215 Verify(Vaddvx<UInt32>,
216 __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
217 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
218 Verify(Vaddvx<UInt32>,
219 __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
220 __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
221 Verify(Vaddvx<UInt64>,
222 __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
223 __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
224 Verify(Vaddvx<UInt64>,
225 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
226 __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
227 }
228
TEST(VectorIntrinsics,VlArgForVv)229 TEST(VectorIntrinsics, VlArgForVv) {
230 auto Verify = []<typename ElementType>(
231 auto Vaddvv,
232 SIMD128Register arg2,
233 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic,
234 [[gnu::vector_size(16),
235 gnu::may_alias]] ElementType result_to_check_undisturbed) {
236 constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
237 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
238 kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 0, kHalfLen)),
239 std::tuple{result_to_check_agnostic});
240 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
241 kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 0, kHalfLen)),
242 std::tuple{result_to_check_undisturbed});
243 ASSERT_EQ(
244 (VectorMasking<Wrapping<ElementType>,
245 TailProcessing::kAgnostic,
246 InactiveProcessing::kAgnostic>(kUndisturbedResult,
247 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
248 0,
249 kHalfLen,
250 RawInt16{0xffff})),
251 std::tuple{result_to_check_agnostic});
252 ASSERT_EQ(
253 (VectorMasking<Wrapping<ElementType>,
254 TailProcessing::kAgnostic,
255 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
256 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
257 0,
258 kHalfLen,
259 RawInt16{0xffff})),
260 std::tuple{result_to_check_agnostic});
261 ASSERT_EQ(
262 (VectorMasking<Wrapping<ElementType>,
263 TailProcessing::kUndisturbed,
264 InactiveProcessing::kAgnostic>(kUndisturbedResult,
265 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
266 0,
267 kHalfLen,
268 RawInt16{0xffff})),
269 std::tuple{result_to_check_undisturbed});
270 ASSERT_EQ(
271 (VectorMasking<Wrapping<ElementType>,
272 TailProcessing::kUndisturbed,
273 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
274 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
275 0,
276 kHalfLen,
277 RawInt16{0xffff})),
278 std::tuple{result_to_check_undisturbed});
279 };
280 Verify(Vaddvv<UInt8>,
281 __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
282 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
283 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
284 Verify(Vaddvv<UInt8>,
285 __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
286 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
287 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
288 Verify(Vaddvv<UInt16>,
289 __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
290 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
291 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
292 Verify(Vaddvv<UInt16>,
293 __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
294 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
295 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
296 Verify(Vaddvv<UInt32>,
297 __v4su{0, 1, 0, 1},
298 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
299 __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555});
300 Verify(Vaddvv<UInt32>,
301 __v4su{1, 0, 1, 0},
302 __v4su{0x0000'0000, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
303 __v4su{0x0000'0000, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
304 Verify(Vaddvv<UInt64>,
305 __v2du{0, 1},
306 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
307 __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555});
308 Verify(Vaddvv<UInt64>,
309 __v2du{1, 0},
310 __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff},
311 __v2du{0x0000'0000'0000'0000, 0x5555'5555'5555'5555});
312 }
313
TEST(VectorIntrinsics,VlArgForVx)314 TEST(VectorIntrinsics, VlArgForVx) {
315 auto Verify = []<typename ElementType>(
316 auto Vaddvx,
317 SIMD128Register arg1,
318 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic,
319 [[gnu::vector_size(16),
320 gnu::may_alias]] ElementType result_to_check_undisturbed) {
321 constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
322 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
323 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, kHalfLen)),
324 std::tuple{result_to_check_agnostic});
325 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
326 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, kHalfLen)),
327 std::tuple{result_to_check_undisturbed});
328 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
329 TailProcessing::kAgnostic,
330 InactiveProcessing::kAgnostic>(kUndisturbedResult,
331 std::get<0>(Vaddvx(arg1, UInt8{1})),
332 0,
333 kHalfLen,
334 RawInt16{0xffff})),
335 std::tuple{result_to_check_agnostic});
336 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
337 TailProcessing::kAgnostic,
338 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
339 std::get<0>(Vaddvx(arg1, UInt8{1})),
340 0,
341 kHalfLen,
342 RawInt16{0xffff})),
343 std::tuple{result_to_check_agnostic});
344 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
345 TailProcessing::kUndisturbed,
346 InactiveProcessing::kAgnostic>(kUndisturbedResult,
347 std::get<0>(Vaddvx(arg1, UInt8{1})),
348 0,
349 kHalfLen,
350 RawInt16{0xffff})),
351 std::tuple{result_to_check_undisturbed});
352 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
353 TailProcessing::kUndisturbed,
354 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
355 std::get<0>(Vaddvx(arg1, UInt8{1})),
356 0,
357 kHalfLen,
358 RawInt16{0xffff})),
359 std::tuple{result_to_check_undisturbed});
360 };
361 Verify(Vaddvx<UInt8>,
362 __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
363 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
364 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
365 Verify(Vaddvx<UInt8>,
366 __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
367 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
368 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
369 Verify(Vaddvx<UInt16>,
370 __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
371 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
372 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
373 Verify(Vaddvx<UInt16>,
374 __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
375 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
376 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
377 Verify(Vaddvx<UInt32>,
378 __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
379 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
380 __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555});
381 Verify(Vaddvx<UInt32>,
382 __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
383 __v4su{0x0000'0000, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
384 __v4su{0x0000'0000, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
385 Verify(Vaddvx<UInt64>,
386 __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
387 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
388 __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555});
389 Verify(Vaddvx<UInt64>,
390 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
391 __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff},
392 __v2du{0x0000'0000'0000'0000, 0x5555'5555'5555'5555});
393 }
394
TEST(VectorIntrinsics,VmaskArgForVvv)395 TEST(VectorIntrinsics, VmaskArgForVvv) {
396 auto Verify = []<typename ElementType>(
397 auto Vaddvv,
398 SIMD128Register arg2,
399 [[gnu::vector_size(16),
400 gnu::may_alias]] ElementType result_to_check_agnostic_agnostic,
401 [[gnu::vector_size(16),
402 gnu::may_alias]] ElementType result_to_check_agnostic_undisturbed,
403 [[gnu::vector_size(16),
404 gnu::may_alias]] ElementType result_to_check_undisturbed_agnostic,
405 [[gnu::vector_size(16),
406 gnu::may_alias]] ElementType result_to_check_undisturbed_undisturbed) {
407 constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
408 ASSERT_EQ(
409 (VectorMasking<Wrapping<ElementType>,
410 TailProcessing::kAgnostic,
411 InactiveProcessing::kAgnostic>(kUndisturbedResult,
412 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
413 0,
414 kHalfLen,
415 RawInt16{0xfdda})),
416 std::tuple{result_to_check_agnostic_agnostic});
417 ASSERT_EQ(
418 (VectorMasking<Wrapping<ElementType>,
419 TailProcessing::kAgnostic,
420 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
421 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
422 0,
423 kHalfLen,
424 RawInt16{0xfdda})),
425 std::tuple{result_to_check_agnostic_undisturbed});
426 ASSERT_EQ(
427 (VectorMasking<Wrapping<ElementType>,
428 TailProcessing::kUndisturbed,
429 InactiveProcessing::kAgnostic>(kUndisturbedResult,
430 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
431 0,
432 kHalfLen,
433 RawInt16{0xfdda})),
434 std::tuple{result_to_check_undisturbed_agnostic});
435 ASSERT_EQ(
436 (VectorMasking<Wrapping<ElementType>,
437 TailProcessing::kUndisturbed,
438 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
439 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
440 0,
441 kHalfLen,
442 RawInt16{0xfdda})),
443 std::tuple{result_to_check_undisturbed_undisturbed});
444 };
445 Verify(
446 Vaddvv<UInt8>,
447 __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
448 __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
449 __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
450 __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
451 __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
452 Verify(
453 Vaddvv<UInt8>,
454 __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
455 __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
456 __v16qu{0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
457 __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
458 __v16qu{
459 0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
460 Verify(Vaddvv<UInt16>,
461 __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
462 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
463 __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
464 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555},
465 __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
466 Verify(Vaddvv<UInt16>,
467 __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
468 __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
469 __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
470 __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555},
471 __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
472 Verify(Vaddvv<UInt32>,
473 __v4su{0, 1, 0, 1},
474 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
475 __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
476 __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555},
477 __v4su{0x5555'5555, 0x0000'0000, 0x5555'5555, 0x5555'5555});
478 Verify(Vaddvv<UInt32>,
479 __v4su{1, 0, 1, 0},
480 __v4su{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
481 __v4su{0x5555'5555, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
482 __v4su{0xffff'ffff, 0xffff'ffff, 0x5555'5555, 0x5555'5555},
483 __v4su{0x5555'5555, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
484 Verify(Vaddvv<UInt64>,
485 __v2du{0, 1},
486 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
487 __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
488 __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
489 __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
490 Verify(Vaddvv<UInt64>,
491 __v2du{1, 0},
492 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
493 __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
494 __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
495 __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
496 }
497
TEST(VectorIntrinsics,VmaskArgForVvx)498 TEST(VectorIntrinsics, VmaskArgForVvx) {
499 auto Verify = []<typename ElementType>(
500 auto Vaddvx,
501 SIMD128Register arg1,
502 [[gnu::vector_size(16),
503 gnu::may_alias]] ElementType result_to_check_agnostic_agnostic,
504 [[gnu::vector_size(16),
505 gnu::may_alias]] ElementType result_to_check_agnostic_undisturbed,
506 [[gnu::vector_size(16),
507 gnu::may_alias]] ElementType result_to_check_undisturbed_agnostic,
508 [[gnu::vector_size(16),
509 gnu::may_alias]] ElementType result_to_check_undisturbed_undisturbed) {
510 constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
511 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
512 TailProcessing::kAgnostic,
513 InactiveProcessing::kAgnostic>(kUndisturbedResult,
514 std::get<0>(Vaddvx(arg1, UInt8{1})),
515 0,
516 kHalfLen,
517 RawInt16{0xfdda})),
518 std::tuple{result_to_check_agnostic_agnostic});
519 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
520 TailProcessing::kAgnostic,
521 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
522 std::get<0>(Vaddvx(arg1, UInt8{1})),
523 0,
524 kHalfLen,
525 RawInt16{0xfdda})),
526 std::tuple{result_to_check_agnostic_undisturbed});
527 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
528 TailProcessing::kUndisturbed,
529 InactiveProcessing::kAgnostic>(kUndisturbedResult,
530 std::get<0>(Vaddvx(arg1, UInt8{1})),
531 0,
532 kHalfLen,
533 RawInt16{0xfdda})),
534 std::tuple{result_to_check_undisturbed_agnostic});
535 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
536 TailProcessing::kUndisturbed,
537 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
538 std::get<0>(Vaddvx(arg1, UInt8{1})),
539 0,
540 kHalfLen,
541 RawInt16{0xfdda})),
542 std::tuple{result_to_check_undisturbed_undisturbed});
543 };
544 Verify(
545 Vaddvx<UInt8>,
546 __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
547 __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
548 __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
549 __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
550 __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
551 Verify(
552 Vaddvx<UInt8>,
553 __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
554 __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
555 __v16qu{0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
556 __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
557 __v16qu{
558 0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
559 Verify(Vaddvx<UInt16>,
560 __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
561 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
562 __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
563 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555},
564 __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
565 Verify(Vaddvx<UInt16>,
566 __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
567 __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
568 __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
569 __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555},
570 __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
571 Verify(Vaddvx<UInt32>,
572 __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
573 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
574 __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
575 __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555},
576 __v4su{0x5555'5555, 0x0000'0000, 0x5555'5555, 0x5555'5555});
577 Verify(Vaddvx<UInt32>,
578 __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
579 __v4su{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
580 __v4su{0x5555'5555, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
581 __v4su{0xffff'ffff, 0xffff'ffff, 0x5555'5555, 0x5555'5555},
582 __v4su{0x5555'5555, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
583 Verify(Vaddvx<UInt64>,
584 __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
585 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
586 __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
587 __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
588 __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
589 Verify(Vaddvx<UInt64>,
590 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
591 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
592 __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
593 __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
594 __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
595 }
596
TEST(VectorIntrinsics,VstartArgVv)597 TEST(VectorIntrinsics, VstartArgVv) {
598 auto Verify = []<typename ElementType>(
599 auto Vaddvv,
600 SIMD128Register arg2,
601 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
602 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
603 kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 1, 16)),
604 std::tuple{result_to_check});
605 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
606 kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 1, 16)),
607 std::tuple{result_to_check});
608 ASSERT_EQ(
609 (VectorMasking<Wrapping<ElementType>,
610 TailProcessing::kAgnostic,
611 InactiveProcessing::kAgnostic>(kUndisturbedResult,
612 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
613 1,
614 16,
615 RawInt16{0xffff})),
616 std::tuple{result_to_check});
617 ASSERT_EQ(
618 (VectorMasking<Wrapping<ElementType>,
619 TailProcessing::kAgnostic,
620 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
621 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
622 1,
623 16,
624 RawInt16{0xffff})),
625 std::tuple{result_to_check});
626 ASSERT_EQ(
627 (VectorMasking<Wrapping<ElementType>,
628 TailProcessing::kUndisturbed,
629 InactiveProcessing::kAgnostic>(kUndisturbedResult,
630 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
631 1,
632 16,
633 RawInt16{0xffff})),
634 std::tuple{result_to_check});
635 ASSERT_EQ(
636 (VectorMasking<Wrapping<ElementType>,
637 TailProcessing::kUndisturbed,
638 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
639 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
640 1,
641 16,
642 RawInt16{0xffff})),
643 std::tuple{result_to_check});
644 };
645 Verify(Vaddvv<UInt8>,
646 __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
647 __v16qu{0x55, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
648 Verify(Vaddvv<UInt8>,
649 __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
650 __v16qu{0x55, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
651 Verify(Vaddvv<UInt16>,
652 __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
653 __v8hu{0x5555, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
654 Verify(Vaddvv<UInt16>,
655 __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
656 __v8hu{0x5555, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
657 Verify(Vaddvv<UInt32>,
658 __v4su{0, 1, 0, 1},
659 __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
660 Verify(Vaddvv<UInt32>,
661 __v4su{1, 0, 1, 0},
662 __v4su{0x5555'5555, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
663 Verify(Vaddvv<UInt64>, __v2du{0, 1}, __v2du{0x5555'5555'5555'5555, 0x0000'0000'0000'0000});
664 Verify(Vaddvv<UInt64>, __v2du{1, 0}, __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff});
665 }
666
TEST(VectorIntrinsics,VstartArgVx)667 TEST(VectorIntrinsics, VstartArgVx) {
668 auto Verify = []<typename ElementType>(
669 auto Vaddvx,
670 SIMD128Register arg1,
671 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
672 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
673 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16)),
674 std::tuple{result_to_check});
675 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
676 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16)),
677 std::tuple{result_to_check});
678 ASSERT_EQ(
679 (VectorMasking<Wrapping<ElementType>,
680 TailProcessing::kAgnostic,
681 InactiveProcessing::kAgnostic>(
682 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
683 std::tuple{result_to_check});
684 ASSERT_EQ(
685 (VectorMasking<Wrapping<ElementType>,
686 TailProcessing::kAgnostic,
687 InactiveProcessing::kUndisturbed>(
688 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
689 std::tuple{result_to_check});
690 ASSERT_EQ(
691 (VectorMasking<Wrapping<ElementType>,
692 TailProcessing::kUndisturbed,
693 InactiveProcessing::kAgnostic>(
694 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
695 std::tuple{result_to_check});
696 ASSERT_EQ(
697 (VectorMasking<Wrapping<ElementType>,
698 TailProcessing::kUndisturbed,
699 InactiveProcessing::kUndisturbed>(
700 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
701 std::tuple{result_to_check});
702 };
703 Verify(Vaddvx<UInt8>,
704 __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
705 __v16qu{0x55, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
706 Verify(Vaddvx<UInt8>,
707 __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
708 __v16qu{0x55, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
709 Verify(Vaddvx<UInt16>,
710 __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
711 __v8hu{0x5555, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
712 Verify(Vaddvx<UInt16>,
713 __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
714 __v8hu{0x5555, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
715 Verify(Vaddvx<UInt32>,
716 __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
717 __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
718 Verify(Vaddvx<UInt32>,
719 __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
720 __v4su{0x5555'5555, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
721 Verify(Vaddvx<UInt64>,
722 __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
723 __v2du{0x5555'5555'5555'5555, 0x0000'0000'0000'0000});
724 Verify(Vaddvx<UInt64>,
725 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
726 __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff});
727 }
728
TEST(VectorIntrinsics,Vsubvv)729 TEST(VectorIntrinsics, Vsubvv) {
730 auto Verify = []<typename ElementType>(
731 auto Vsubvv,
732 SIMD128Register arg2,
733 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
734 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
735 kUndisturbedResult, std::get<0>(Vsubvv(__m128i{0, 0}, arg2)), 0, 16)),
736 std::tuple{result_to_check});
737 ASSERT_EQ(
738 (VectorMasking<Wrapping<ElementType>,
739 TailProcessing::kAgnostic,
740 InactiveProcessing::kAgnostic>(
741 kUndisturbedResult, std::get<0>(Vsubvv(__m128i{0, 0}, arg2)), 0, 16, RawInt16{0xffff})),
742 std::tuple{result_to_check});
743 };
744 Verify(Vsubvv<UInt8>,
745 __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
746 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
747 Verify(Vsubvv<UInt8>,
748 __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
749 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
750 Verify(Vsubvv<UInt16>,
751 __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
752 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
753 Verify(Vsubvv<UInt16>,
754 __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
755 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
756 Verify(Vsubvv<UInt32>,
757 __v4su{0, 1, 0, 1},
758 __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
759 Verify(Vsubvv<UInt64>, __v2du{0, 1}, __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
760 Verify(Vsubvv<UInt64>, __v2du{1, 0}, __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
761 }
762
TEST(VectorIntrinsics,Vsubvx)763 TEST(VectorIntrinsics, Vsubvx) {
764 auto Verify = []<typename ElementType>(
765 auto Vsubvx,
766 SIMD128Register arg1,
767 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
768 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
769 kUndisturbedResult, std::get<0>(Vsubvx(arg1, UInt8{1})), 0, 16)),
770 std::tuple{result_to_check});
771 ASSERT_EQ(
772 (VectorMasking<Wrapping<ElementType>,
773 TailProcessing::kAgnostic,
774 InactiveProcessing::kAgnostic>(
775 kUndisturbedResult, std::get<0>(Vsubvx(arg1, UInt8{1})), 0, 16, RawInt16{0xffff})),
776 std::tuple{result_to_check});
777 };
778 Verify(Vsubvx<UInt8>,
779 __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
780 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
781 Verify(Vsubvx<UInt8>,
782 __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
783 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
784 Verify(Vsubvx<UInt16>,
785 __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
786 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
787 Verify(Vsubvx<UInt16>,
788 __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
789 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
790 Verify(Vsubvx<UInt32>,
791 __v4su{1, 0, 1, 0},
792 __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
793 Verify(Vsubvx<UInt32>,
794 __v4su{0, 1, 0, 1},
795 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
796 Verify(Vsubvx<UInt64>, __v2du{1, 0}, __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
797 Verify(Vsubvx<UInt64>, __v2du{0, 1}, __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
798 }
799
800 } // namespace
801
802 } // namespace berberis::intrinsics
803