1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "gtest/gtest.h"
18
19 #include <cstdint>
20 #include <initializer_list>
21 #include <limits>
22
23 #include "utility.h"
24
25 namespace {
26
TEST(Arm64InsnTest,UnsignedBitfieldMoveNoShift)27 TEST(Arm64InsnTest, UnsignedBitfieldMoveNoShift) {
28 uint64_t arg = 0x3952247371907021ULL;
29 uint64_t res;
30
31 asm("ubfm %0, %1, #0, #63" : "=r"(res) : "r"(arg));
32
33 ASSERT_EQ(res, 0x3952247371907021ULL);
34 }
35
TEST(Arm64InsnTest,BitfieldLeftInsertion)36 TEST(Arm64InsnTest, BitfieldLeftInsertion) {
37 uint64_t arg = 0x389522868478abcdULL;
38 uint64_t res = 0x1101044682325271ULL;
39
40 asm("bfm %0, %1, #40, #15" : "=r"(res) : "r"(arg), "0"(res));
41
42 ASSERT_EQ(res, 0x110104abcd325271ULL);
43 }
44
TEST(Arm64InsnTest,BitfieldRightInsertion)45 TEST(Arm64InsnTest, BitfieldRightInsertion) {
46 uint64_t arg = 0x3276561809377344ULL;
47 uint64_t res = 0x1668039626579787ULL;
48
49 asm("bfm %0, %1, #4, #39" : "=r"(res) : "r"(arg), "0"(res));
50
51 ASSERT_EQ(res, 0x1668039180937734ULL);
52 }
53
TEST(Arm64InsnTest,MoveImmToFp32)54 TEST(Arm64InsnTest, MoveImmToFp32) {
55 // The tests below verify that fmov works with various immediates.
56 // Specifically, the instruction has an 8-bit immediate field consisting of
57 // the following four subfields:
58 //
59 // - sign (one bit)
60 // - upper exponent (one bit)
61 // - lower exponent (two bits)
62 // - mantisa (four bits)
63 //
64 // For example, we decompose imm8 = 0b01001111 into:
65 //
66 // - sign = 0 (positive)
67 // - upper exponent = 1
68 // - lower exponent = 00
69 // - mantisa = 1111
70 //
71 // This immediate corresponds to 32-bit floating point value:
72 //
73 // 0 011111 00 1111 0000000000000000000
74 // | | | | |
75 // | | | | +- 19 zeros
76 // | | | +------ mantisa
77 // | | +--------- lower exponent
78 // | +---------------- upper exponent (custom extended to 6 bits)
79 // +------------------ sign
80 //
81 // Thus we have:
82 //
83 // 1.11110000... * 2^(124-127) = 0.2421875
84 //
85 // where 1.11110000... is in binary.
86 //
87 // See VFPExpandImm in the ARM Architecture Manual for details.
88 //
89 // We enumerate all possible 8-bit immediate encodings of the form:
90 //
91 // {0,1}{0,1}{00,11}{0000,1111}
92 //
93 // to verify that the decoder correctly splits the immediate into the
94 // subfields and reconstructs the intended floating-point value.
95
96 // imm8 = 0b00000000
97 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #2.0e+00")();
98 ASSERT_EQ(res1, MakeUInt128(0x40000000U, 0U));
99
100 // imm8 = 0b00001111
101 __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #3.8750e+00")();
102 ASSERT_EQ(res2, MakeUInt128(0x40780000U, 0U));
103
104 // imm8 = 0b00110000
105 __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.60e+01")();
106 ASSERT_EQ(res3, MakeUInt128(0x41800000U, 0U));
107
108 // imm8 = 0b00111111
109 __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #3.10e+01")();
110 ASSERT_EQ(res4, MakeUInt128(0x41f80000U, 0U));
111
112 // imm8 = 0b01000000
113 __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.250e-01")();
114 ASSERT_EQ(res5, MakeUInt128(0x3e000000U, 0U));
115
116 // imm8 = 0b01001111
117 __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #2.4218750e-01")();
118 ASSERT_EQ(res6, MakeUInt128(0x3e780000U, 0U));
119
120 // imm8 = 0b01110000
121 __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.0e+00")();
122 ASSERT_EQ(res7, MakeUInt128(0x3f800000U, 0U));
123
124 // imm8 = 0b01111111
125 __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.93750e+00")();
126 ASSERT_EQ(res8, MakeUInt128(0x3ff80000U, 0U));
127
128 // imm8 = 0b10000000
129 __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-2.0e+00")();
130 ASSERT_EQ(res9, MakeUInt128(0xc0000000U, 0U));
131
132 // imm8 = 0b10001111
133 __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-3.8750e+00")();
134 ASSERT_EQ(res10, MakeUInt128(0xc0780000U, 0U));
135
136 // imm8 = 0b10110000
137 __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.60e+01")();
138 ASSERT_EQ(res11, MakeUInt128(0xc1800000U, 0U));
139
140 // imm8 = 0b10111111
141 __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-3.10e+01")();
142 ASSERT_EQ(res12, MakeUInt128(0xc1f80000U, 0U));
143
144 // imm8 = 0b11000000
145 __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.250e-01")();
146 ASSERT_EQ(res13, MakeUInt128(0xbe000000U, 0U));
147
148 // imm8 = 0b11001111
149 __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-2.4218750e-01")();
150 ASSERT_EQ(res14, MakeUInt128(0xbe780000U, 0U));
151
152 // imm8 = 0b11110000
153 __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.0e+00")();
154 ASSERT_EQ(res15, MakeUInt128(0xbf800000U, 0U));
155
156 // imm8 = 0b11111111
157 __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.93750e+00")();
158 ASSERT_EQ(res16, MakeUInt128(0xbff80000U, 0U));
159 }
160
TEST(Arm64InsnTest,MoveImmToFp64)161 TEST(Arm64InsnTest, MoveImmToFp64) {
162 // The tests below verify that fmov works with various immediates.
163 // Specifically, the instruction has an 8-bit immediate field consisting of
164 // the following four subfields:
165 //
166 // - sign (one bit)
167 // - upper exponent (one bit)
168 // - lower exponent (two bits)
169 // - mantisa (four bits)
170 //
171 // For example, we decompose imm8 = 0b01001111 into:
172 //
173 // - sign = 0 (positive)
174 // - upper exponent = 1
175 // - lower exponent = 00
176 // - mantisa = 1111
177 //
178 // This immediate corresponds to 64-bit floating point value:
179 //
180 // 0 011111111 00 1111 000000000000000000000000000000000000000000000000
181 // | | | | |
182 // | | | | +- 48 zeros
183 // | | | +------ mantisa
184 // | | +--------- lower exponent
185 // | +------------------- upper exponent (custom extended to 9 bits)
186 // +--------------------- sign
187 //
188 // Thus we have:
189 //
190 // 1.11110000... * 2^(1020-1023) = 0.2421875
191 //
192 // where 1.11110000... is in binary.
193 //
194 // See VFPExpandImm in the ARM Architecture Manual for details.
195 //
196 // We enumerate all possible 8-bit immediate encodings of the form:
197 //
198 // {0,1}{0,1}{00,11}{0000,1111}
199 //
200 // to verify that the decoder correctly splits the immediate into the
201 // subfields and reconstructs the intended floating-point value.
202
203 // imm8 = 0b00000000
204 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #2.0e+00")();
205 ASSERT_EQ(res1, MakeUInt128(0x4000000000000000ULL, 0U));
206
207 // imm8 = 0b00001111
208 __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #3.8750e+00")();
209 ASSERT_EQ(res2, MakeUInt128(0x400f000000000000ULL, 0U));
210
211 // imm8 = 0b00110000
212 __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.60e+01")();
213 ASSERT_EQ(res3, MakeUInt128(0x4030000000000000ULL, 0U));
214
215 // imm8 = 0b00111111
216 __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #3.10e+01")();
217 ASSERT_EQ(res4, MakeUInt128(0x403f000000000000ULL, 0U));
218
219 // imm8 = 0b01000000
220 __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.250e-01")();
221 ASSERT_EQ(res5, MakeUInt128(0x3fc0000000000000ULL, 0U));
222
223 // imm8 = 0b01001111
224 __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #2.4218750e-01")();
225 ASSERT_EQ(res6, MakeUInt128(0x3fcf000000000000ULL, 0U));
226
227 // imm8 = 0b01110000
228 __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.0e+00")();
229 ASSERT_EQ(res7, MakeUInt128(0x3ff0000000000000ULL, 0U));
230
231 // imm8 = 0b01111111
232 __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.93750e+00")();
233 ASSERT_EQ(res8, MakeUInt128(0x3fff000000000000ULL, 0U));
234
235 // imm8 = 0b10000000
236 __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-2.0e+00")();
237 ASSERT_EQ(res9, MakeUInt128(0xc000000000000000ULL, 0U));
238
239 // imm8 = 0b10001111
240 __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-3.8750e+00")();
241 ASSERT_EQ(res10, MakeUInt128(0xc00f000000000000ULL, 0U));
242
243 // imm8 = 0b10110000
244 __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.60e+01")();
245 ASSERT_EQ(res11, MakeUInt128(0xc030000000000000ULL, 0U));
246
247 // imm8 = 0b10111111
248 __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-3.10e+01")();
249 ASSERT_EQ(res12, MakeUInt128(0xc03f000000000000ULL, 0U));
250
251 // imm8 = 0b11000000
252 __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.250e-01")();
253 ASSERT_EQ(res13, MakeUInt128(0xbfc0000000000000ULL, 0U));
254
255 // imm8 = 0b11001111
256 __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-2.4218750e-01")();
257 ASSERT_EQ(res14, MakeUInt128(0xbfcf000000000000ULL, 0U));
258
259 // imm8 = 0b11110000
260 __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.0e+00")();
261 ASSERT_EQ(res15, MakeUInt128(0xbff0000000000000ULL, 0U));
262
263 // imm8 = 0b11111111
264 __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.93750e+00")();
265 ASSERT_EQ(res16, MakeUInt128(0xbfff000000000000ULL, 0U));
266 }
267
TEST(Arm64InsnTest,MoveImmToF32x4)268 TEST(Arm64InsnTest, MoveImmToF32x4) {
269 // The tests below verify that fmov works with various immediates.
270 // Specifically, the instruction has an 8-bit immediate field consisting of
271 // the following four subfields:
272 //
273 // - sign (one bit)
274 // - upper exponent (one bit)
275 // - lower exponent (two bits)
276 // - mantisa (four bits)
277 //
278 // We enumerate all possible 8-bit immediate encodings of the form:
279 //
280 // {0,1}{0,1}{00,11}{0000,1111}
281 //
282 // to verify that the decoder correctly splits the immediate into the
283 // subfields and reconstructs the intended floating-point value.
284
285 // imm8 = 0b00000000
286 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #2.0e+00")();
287 ASSERT_EQ(res1, MakeUInt128(0x4000000040000000ULL, 0x4000000040000000ULL));
288
289 // imm8 = 0b00001111
290 __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #3.8750e+00")();
291 ASSERT_EQ(res2, MakeUInt128(0x4078000040780000ULL, 0x4078000040780000ULL));
292
293 // imm8 = 0b00110000
294 __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.60e+01")();
295 ASSERT_EQ(res3, MakeUInt128(0x4180000041800000ULL, 0x4180000041800000ULL));
296
297 // imm8 = 0b00111111
298 __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #3.10e+01")();
299 ASSERT_EQ(res4, MakeUInt128(0x41f8000041f80000ULL, 0x41f8000041f80000ULL));
300
301 // imm8 = 0b01000000
302 __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.250e-01")();
303 ASSERT_EQ(res5, MakeUInt128(0x3e0000003e000000ULL, 0x3e0000003e000000ULL));
304
305 // imm8 = 0b01001111
306 __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #2.4218750e-01")();
307 ASSERT_EQ(res6, MakeUInt128(0x3e7800003e780000ULL, 0x3e7800003e780000ULL));
308
309 // imm8 = 0b01110000
310 __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.0e+00")();
311 ASSERT_EQ(res7, MakeUInt128(0x3f8000003f800000ULL, 0x3f8000003f800000ULL));
312
313 // imm8 = 0b01111111
314 __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.93750e+00")();
315 ASSERT_EQ(res8, MakeUInt128(0x3ff800003ff80000ULL, 0x3ff800003ff80000ULL));
316
317 // imm8 = 0b10000000
318 __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-2.0e+00")();
319 ASSERT_EQ(res9, MakeUInt128(0xc0000000c0000000ULL, 0xc0000000c0000000ULL));
320
321 // imm8 = 0b10001111
322 __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-3.8750e+00")();
323 ASSERT_EQ(res10, MakeUInt128(0xc0780000c0780000ULL, 0xc0780000c0780000ULL));
324
325 // imm8 = 0b10110000
326 __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.60e+01")();
327 ASSERT_EQ(res11, MakeUInt128(0xc1800000c1800000ULL, 0xc1800000c1800000ULL));
328
329 // imm8 = 0b10111111
330 __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-3.10e+01")();
331 ASSERT_EQ(res12, MakeUInt128(0xc1f80000c1f80000ULL, 0xc1f80000c1f80000ULL));
332
333 // imm8 = 0b11000000
334 __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.250e-01")();
335 ASSERT_EQ(res13, MakeUInt128(0xbe000000be000000ULL, 0xbe000000be000000ULL));
336
337 // imm8 = 0b11001111
338 __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-2.4218750e-01")();
339 ASSERT_EQ(res14, MakeUInt128(0xbe780000be780000ULL, 0xbe780000be780000ULL));
340
341 // imm8 = 0b11110000
342 __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.0e+00")();
343 ASSERT_EQ(res15, MakeUInt128(0xbf800000bf800000ULL, 0xbf800000bf800000ULL));
344
345 // imm8 = 0b11111111
346 __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.93750e+00")();
347 ASSERT_EQ(res16, MakeUInt128(0xbff80000bff80000ULL, 0xbff80000bff80000ULL));
348 }
349
TEST(Arm64InsnTest,MoveImmToF64x2)350 TEST(Arm64InsnTest, MoveImmToF64x2) {
351 // The tests below verify that fmov works with various immediates.
352 // Specifically, the instruction has an 8-bit immediate field consisting of
353 // the following four subfields:
354 //
355 // - sign (one bit)
356 // - upper exponent (one bit)
357 // - lower exponent (two bits)
358 // - mantisa (four bits)
359 //
360 // We enumerate all possible 8-bit immediate encodings of the form:
361 //
362 // {0,1}{0,1}{00,11}{0000,1111}
363 //
364 // to verify that the decoder correctly splits the immediate into the
365 // subfields and reconstructs the intended floating-point value.
366
367 // imm8 = 0b00000000
368 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #2.0e+00")();
369 ASSERT_EQ(res1, MakeUInt128(0x4000000000000000ULL, 0x4000000000000000ULL));
370
371 // imm8 = 0b00001111
372 __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #3.8750e+00")();
373 ASSERT_EQ(res2, MakeUInt128(0x400f000000000000ULL, 0x400f000000000000ULL));
374
375 // imm8 = 0b00110000
376 __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.60e+01")();
377 ASSERT_EQ(res3, MakeUInt128(0x4030000000000000ULL, 0x4030000000000000ULL));
378
379 // imm8 = 0b00111111
380 __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #3.10e+01")();
381 ASSERT_EQ(res4, MakeUInt128(0x403f000000000000ULL, 0x403f000000000000ULL));
382
383 // imm8 = 0b01000000
384 __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.250e-01")();
385 ASSERT_EQ(res5, MakeUInt128(0x3fc0000000000000ULL, 0x3fc0000000000000ULL));
386
387 // imm8 = 0b01001111
388 __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #2.4218750e-01")();
389 ASSERT_EQ(res6, MakeUInt128(0x3fcf000000000000ULL, 0x3fcf000000000000ULL));
390
391 // imm8 = 0b01110000
392 __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.0e+00")();
393 ASSERT_EQ(res7, MakeUInt128(0x3ff0000000000000ULL, 0x3ff0000000000000ULL));
394
395 // imm8 = 0b01111111
396 __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.93750e+00")();
397 ASSERT_EQ(res8, MakeUInt128(0x3fff000000000000ULL, 0x3fff000000000000ULL));
398
399 // imm8 = 0b10000000
400 __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-2.0e+00")();
401 ASSERT_EQ(res9, MakeUInt128(0xc000000000000000ULL, 0xc000000000000000ULL));
402
403 // imm8 = 0b10001111
404 __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-3.8750e+00")();
405 ASSERT_EQ(res10, MakeUInt128(0xc00f000000000000ULL, 0xc00f000000000000ULL));
406
407 // imm8 = 0b10110000
408 __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.60e+01")();
409 ASSERT_EQ(res11, MakeUInt128(0xc030000000000000ULL, 0xc030000000000000ULL));
410
411 // imm8 = 0b10111111
412 __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-3.10e+01")();
413 ASSERT_EQ(res12, MakeUInt128(0xc03f000000000000ULL, 0xc03f000000000000ULL));
414
415 // imm8 = 0b11000000
416 __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.250e-01")();
417 ASSERT_EQ(res13, MakeUInt128(0xbfc0000000000000ULL, 0xbfc0000000000000ULL));
418
419 // imm8 = 0b11001111
420 __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-2.4218750e-01")();
421 ASSERT_EQ(res14, MakeUInt128(0xbfcf000000000000ULL, 0xbfcf000000000000ULL));
422
423 // imm8 = 0b11110000
424 __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.0e+00")();
425 ASSERT_EQ(res15, MakeUInt128(0xbff0000000000000ULL, 0xbff0000000000000ULL));
426
427 // imm8 = 0b11111111
428 __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.93750e+00")();
429 ASSERT_EQ(res16, MakeUInt128(0xbfff000000000000ULL, 0xbfff000000000000ULL));
430 }
431
TEST(Arm64InsnTest,MoveFpRegToReg)432 TEST(Arm64InsnTest, MoveFpRegToReg) {
433 __uint128_t arg = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
434 uint64_t res = 0xffffeeeeddddccccULL;
435
436 // Move from high double.
437 asm("fmov %0, %1.d[1]" : "=r"(res) : "w"(arg));
438 ASSERT_EQ(res, 0x3333cccc4444ddddULL);
439
440 // Move from low double.
441 asm("fmov %0, %d1" : "=r"(res) : "w"(arg));
442 ASSERT_EQ(res, 0x1111aaaa2222bbbbULL);
443
444 // Move from single.
445 asm("fmov %w0, %s1" : "=r"(res) : "w"(arg));
446 ASSERT_EQ(res, 0x2222bbbbULL);
447 }
448
TEST(Arm64InsnTest,MoveRegToFpReg)449 TEST(Arm64InsnTest, MoveRegToFpReg) {
450 uint64_t arg = 0xffffeeeeddddccccULL;
451 __uint128_t res = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
452
453 // Move to high double.
454 asm("fmov %0.d[1], %1" : "=w"(res) : "r"(arg), "0"(res));
455 ASSERT_EQ(res, MakeUInt128(0x1111aaaa2222bbbbULL, 0xffffeeeeddddccccULL));
456
457 // Move to low double.
458 asm("fmov %d0, %1" : "=w"(res) : "r"(arg));
459 ASSERT_EQ(res, MakeUInt128(0xffffeeeeddddccccULL, 0x0));
460
461 // Move to single.
462 asm("fmov %s0, %w1" : "=w"(res) : "r"(arg));
463 ASSERT_EQ(res, MakeUInt128(0xddddccccULL, 0x0));
464 }
465
TEST(Arm64InsnTest,MoveFpRegToFpReg)466 TEST(Arm64InsnTest, MoveFpRegToFpReg) {
467 __uint128_t res;
468
469 __uint128_t fp64_arg =
470 MakeUInt128(0x402e9eb851eb851fULL, 0xdeadbeefaabbccddULL); // 15.31 in double
471 asm("fmov %d0, %d1" : "=w"(res) : "w"(fp64_arg));
472 ASSERT_EQ(res, MakeUInt128(0x402e9eb851eb851fULL, 0ULL));
473
474 __uint128_t fp32_arg =
475 MakeUInt128(0xaabbccdd40e51eb8ULL, 0x0011223344556677ULL); // 7.16 in float
476 asm("fmov %s0, %s1" : "=w"(res) : "w"(fp32_arg));
477 ASSERT_EQ(res, MakeUInt128(0x40e51eb8ULL, 0ULL));
478 }
479
TEST(Arm64InsnTest,InsertRegPartIntoSimd128)480 TEST(Arm64InsnTest, InsertRegPartIntoSimd128) {
481 uint64_t arg = 0xffffeeeeddddccccULL;
482 __uint128_t res = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
483
484 // Byte.
485 asm("mov %0.b[3], %w1" : "=w"(res) : "r"(arg), "0"(res));
486 ASSERT_EQ(res, MakeUInt128(0x1111aaaacc22bbbbULL, 0x3333cccc4444ddddULL));
487
488 // Double word.
489 asm("mov %0.d[1], %1" : "=w"(res) : "r"(arg), "0"(res));
490 ASSERT_EQ(res, MakeUInt128(0x1111aaaacc22bbbbULL, 0xffffeeeeddddccccULL));
491 }
492
TEST(Arm64InsnTest,DuplicateRegIntoSimd128)493 TEST(Arm64InsnTest, DuplicateRegIntoSimd128) {
494 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("dup %0.16b, %w1")(0xabU);
495 ASSERT_EQ(res, MakeUInt128(0xababababababababULL, 0xababababababababULL));
496 }
497
TEST(Arm64InsnTest,MoveSimd128ElemToRegSigned)498 TEST(Arm64InsnTest, MoveSimd128ElemToRegSigned) {
499 uint64_t res = 0;
500 __uint128_t arg = MakeUInt128(0x9796959493929190ULL, 0x9f9e9d9c9b9a99ULL);
501
502 // Single word.
503 asm("smov %0, %1.s[0]" : "=r"(res) : "w"(arg));
504 ASSERT_EQ(res, 0xffffffff93929190ULL);
505
506 asm("smov %0, %1.s[2]" : "=r"(res) : "w"(arg));
507 ASSERT_EQ(res, 0xffffffff9c9b9a99ULL);
508
509 // Half word.
510 asm("smov %w0, %1.h[0]" : "=r"(res) : "w"(arg));
511 ASSERT_EQ(res, 0x00000000ffff9190ULL);
512
513 asm("smov %w0, %1.h[2]" : "=r"(res) : "w"(arg));
514 ASSERT_EQ(res, 0x00000000ffff9594ULL);
515
516 // Byte.
517 asm("smov %w0, %1.b[0]" : "=r"(res) : "w"(arg));
518 ASSERT_EQ(res, 0x00000000ffffff90ULL);
519
520 asm("smov %w0, %1.b[2]" : "=r"(res) : "w"(arg));
521 ASSERT_EQ(res, 0x00000000ffffff92ULL);
522 }
523
TEST(Arm64InsnTest,MoveSimd128ElemToRegUnsigned)524 TEST(Arm64InsnTest, MoveSimd128ElemToRegUnsigned) {
525 uint64_t res = 0;
526 __uint128_t arg = MakeUInt128(0xaaaabbbbcccceeeeULL, 0xffff000011112222ULL);
527
528 // Double word.
529 asm("umov %0, %1.d[0]" : "=r"(res) : "w"(arg));
530 ASSERT_EQ(res, 0xaaaabbbbcccceeeeULL);
531
532 asm("umov %0, %1.d[1]" : "=r"(res) : "w"(arg));
533 ASSERT_EQ(res, 0xffff000011112222ULL);
534
535 // Single word.
536 asm("umov %w0, %1.s[0]" : "=r"(res) : "w"(arg));
537 ASSERT_EQ(res, 0xcccceeeeULL);
538
539 asm("umov %w0, %1.s[2]" : "=r"(res) : "w"(arg));
540 ASSERT_EQ(res, 0x11112222ULL);
541
542 // Half word.
543 asm("umov %w0, %1.h[0]" : "=r"(res) : "w"(arg));
544 ASSERT_EQ(res, 0xeeeeULL);
545
546 asm("umov %w0, %1.h[2]" : "=r"(res) : "w"(arg));
547 ASSERT_EQ(res, 0xbbbbULL);
548
549 // Byte.
550 asm("umov %w0, %1.b[0]" : "=r"(res) : "w"(arg));
551 ASSERT_EQ(res, 0xeeULL);
552
553 asm("umov %w0, %1.b[2]" : "=r"(res) : "w"(arg));
554 ASSERT_EQ(res, 0xccULL);
555 }
556
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4)557 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4) {
558 __uint128_t arg1 = MakeUInt128(0x9463229563989898ULL, 0x9358211674562701ULL);
559 __uint128_t arg2 = MakeUInt128(0x0218356462201349ULL, 0x6715188190973038ULL);
560 __uint128_t arg3 = MakeUInt128(0x1198004973407239ULL, 0x6103685406643193ULL);
561 __uint128_t res =
562 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
563 ASSERT_EQ(res, MakeUInt128(0x37c4a3494b9db539ULL, 0x37c3dab413a58e33ULL));
564 }
565
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4Upper)566 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4Upper) {
567 __uint128_t arg1 = MakeUInt128(0x9478221818528624ULL, 0x0851400666044332ULL);
568 __uint128_t arg2 = MakeUInt128(0x5888569867054315ULL, 0x4706965747458550ULL);
569 __uint128_t arg3 = MakeUInt128(0x3323233421073015ULL, 0x4594051655379068ULL);
570 __uint128_t res =
571 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
572 ASSERT_EQ(res, MakeUInt128(0x5c30bd483c119e0fULL, 0x48ecc5ab6efb3a86ULL));
573 }
574
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4Upper2)575 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4Upper2) {
576 __uint128_t arg1 = MakeUInt128(0x9968262824727064ULL, 0x1336222178923903ULL);
577 __uint128_t arg2 = MakeUInt128(0x1760854289437339ULL, 0x3561889165125042ULL);
578 __uint128_t arg3 = MakeUInt128(0x4404008952719837ULL, 0x8738648058472689ULL);
579 __uint128_t res =
580 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.4s, %1.8h, %2.h[7]")(arg1, arg2, arg3);
581 ASSERT_EQ(res, MakeUInt128(0x5d27e9db5e54d15aULL, 0x8b39d9f65f64ea0aULL));
582 }
583
TEST(Arm64InsnTest,SignedMultiplySubtractLongElemI16x4)584 TEST(Arm64InsnTest, SignedMultiplySubtractLongElemI16x4) {
585 __uint128_t arg1 = MakeUInt128(0x9143447886360410ULL, 0x3182350736502778ULL);
586 __uint128_t arg2 = MakeUInt128(0x5908975782727313ULL, 0x0504889398900992ULL);
587 __uint128_t arg3 = MakeUInt128(0x3913503373250855ULL, 0x9826558670892426ULL);
588 __uint128_t res =
589 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
590 ASSERT_EQ(res, MakeUInt128(0xfd58202775231935ULL, 0x61d69fb0921db6b6ULL));
591 }
592
TEST(Arm64InsnTest,SignedMultiplySubtractLongElemI16x4Upper)593 TEST(Arm64InsnTest, SignedMultiplySubtractLongElemI16x4Upper) {
594 __uint128_t arg1 = MakeUInt128(0x9320199199688285ULL, 0x1718395366913452ULL);
595 __uint128_t arg2 = MakeUInt128(0x2244470804592396ULL, 0x6028171565515656ULL);
596 __uint128_t arg3 = MakeUInt128(0x6611135982311225ULL, 0x0628905854914509ULL);
597 __uint128_t res =
598 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
599 ASSERT_EQ(res, MakeUInt128(0x645326f0814d99a3ULL, 0x05c4290053980b2eULL));
600 }
601
TEST(Arm64InsnTest,UnsignedMultiplyAddLongElemI16x4)602 TEST(Arm64InsnTest, UnsignedMultiplyAddLongElemI16x4) {
603 __uint128_t arg1 = MakeUInt128(0x9027601834840306ULL, 0x8113818551059797ULL);
604 __uint128_t arg2 = MakeUInt128(0x0566400750942608ULL, 0x7885735796037324ULL);
605 __uint128_t arg3 = MakeUInt128(0x5141467867036880ULL, 0x9880609716425849ULL);
606 __uint128_t res =
607 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
608 ASSERT_EQ(res, MakeUInt128(0x61c8e2c867f707f8ULL, 0xc5dfe72334816629ULL));
609 }
610
TEST(Arm64InsnTest,UnsignedMultiplyAddLongElemI16x4Upper)611 TEST(Arm64InsnTest, UnsignedMultiplyAddLongElemI16x4Upper) {
612 __uint128_t arg1 = MakeUInt128(0x9454236828860613ULL, 0x4084148637767009ULL);
613 __uint128_t arg2 = MakeUInt128(0x6120715124914043ULL, 0x0272538607648236ULL);
614 __uint128_t arg3 = MakeUInt128(0x3414334623518975ULL, 0x7664521641376796ULL);
615 __uint128_t res =
616 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
617 ASSERT_EQ(res, MakeUInt128(0x3c00351c3352428eULL, 0x7f9b6cda4425df7cULL));
618 }
619
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongElemI16x4)620 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongElemI16x4) {
621 __uint128_t arg1 = MakeUInt128(0x9128009282525619ULL, 0x0205263016391147ULL);
622 __uint128_t arg2 = MakeUInt128(0x7247331485739107ULL, 0x7758744253876117ULL);
623 __uint128_t arg3 = MakeUInt128(0x4657867116941477ULL, 0x6421441111263583ULL);
624 __uint128_t res =
625 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
626 ASSERT_EQ(res, MakeUInt128(0x0268619be9b26a3cULL, 0x1876471910da19edULL));
627 }
628
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongElemI16x4Upper)629 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongElemI16x4Upper) {
630 __uint128_t arg1 = MakeUInt128(0x9420757136275167ULL, 0x4573189189456283ULL);
631 __uint128_t arg2 = MakeUInt128(0x5257044133543758ULL, 0x5753426986994725ULL);
632 __uint128_t arg3 = MakeUInt128(0x4703165661399199ULL, 0x9682628247270641ULL);
633 __uint128_t res =
634 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
635 ASSERT_EQ(res, MakeUInt128(0x2b7d4cb24d79259dULL, 0x8895afc6423a13adULL));
636 }
637
TEST(Arm64InsnTest,AsmConvertI32F32)638 TEST(Arm64InsnTest, AsmConvertI32F32) {
639 constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %w1");
640 ASSERT_EQ(AsmConvertI32F32(21), MakeUInt128(0x41a80000U, 0U));
641 }
642
TEST(Arm64InsnTest,AsmConvertU32F32)643 TEST(Arm64InsnTest, AsmConvertU32F32) {
644 constexpr auto AsmConvertU32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %w1");
645
646 ASSERT_EQ(AsmConvertU32F32(29), MakeUInt128(0x41e80000U, 0U));
647
648 // Test that the topmost bit isn't treated as the sign.
649 ASSERT_EQ(AsmConvertU32F32(1U << 31), MakeUInt128(0x4f000000U, 0U));
650 }
651
TEST(Arm64InsnTest,AsmConvertU32F32FromSimdReg)652 TEST(Arm64InsnTest, AsmConvertU32F32FromSimdReg) {
653 constexpr auto AsmUcvtf = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %s0, %s1");
654
655 ASSERT_EQ(AsmUcvtf(28), MakeUInt128(0x41e00000U, 0U));
656
657 // Test that the topmost bit isn't treated as the sign.
658 ASSERT_EQ(AsmUcvtf(1U << 31), MakeUInt128(0x4f000000U, 0U));
659 }
660
TEST(Arm64InsnTest,AsmConvertI32F64)661 TEST(Arm64InsnTest, AsmConvertI32F64) {
662 constexpr auto AsmConvertI32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %w1");
663 ASSERT_EQ(AsmConvertI32F64(21), MakeUInt128(0x4035000000000000ULL, 0U));
664 }
665
TEST(Arm64InsnTest,AsmConvertU32F64)666 TEST(Arm64InsnTest, AsmConvertU32F64) {
667 constexpr auto AsmConvertU32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %w1");
668
669 ASSERT_EQ(AsmConvertU32F64(18), MakeUInt128(0x4032000000000000ULL, 0U));
670
671 // Test that the topmost bit isn't treated as the sign.
672 ASSERT_EQ(AsmConvertU32F64(1U << 31), MakeUInt128(0x41e0000000000000ULL, 0U));
673 }
674
TEST(Arm64InsnTest,AsmConvertI64F32)675 TEST(Arm64InsnTest, AsmConvertI64F32) {
676 constexpr auto AsmConvertI64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %x1");
677 ASSERT_EQ(AsmConvertI64F32(11), MakeUInt128(0x41300000U, 0U));
678 }
679
TEST(Arm64InsnTest,AsmConvertU64F32)680 TEST(Arm64InsnTest, AsmConvertU64F32) {
681 constexpr auto AsmConvertU64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %x1");
682
683 ASSERT_EQ(AsmConvertU64F32(3), MakeUInt128(0x40400000U, 0U));
684
685 // Test that the topmost bit isn't treated as the sign.
686 ASSERT_EQ(AsmConvertU64F32(1ULL << 63), MakeUInt128(0x5f000000U, 0U));
687 }
688
TEST(Arm64InsnTest,AsmConvertI64F64)689 TEST(Arm64InsnTest, AsmConvertI64F64) {
690 constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %x1");
691 ASSERT_EQ(AsmConvertI64F64(137), MakeUInt128(0x4061200000000000ULL, 0U));
692 }
693
TEST(Arm64InsnTest,AsmConvertI32F32FromSimdReg)694 TEST(Arm64InsnTest, AsmConvertI32F32FromSimdReg) {
695 constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %s0, %s1");
696 ASSERT_EQ(AsmConvertI32F32(1109), MakeUInt128(0x448aa000ULL, 0U));
697 }
698
TEST(Arm64InsnTest,AsmConvertI64F64FromSimdReg)699 TEST(Arm64InsnTest, AsmConvertI64F64FromSimdReg) {
700 constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %d0, %d1");
701 ASSERT_EQ(AsmConvertI64F64(123), MakeUInt128(0x405ec00000000000ULL, 0U));
702 }
703
TEST(Arm64InsnTest,AsmConvertI32x4F32x4)704 TEST(Arm64InsnTest, AsmConvertI32x4F32x4) {
705 constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.4s, %1.4s");
706 __uint128_t arg = MakeUInt128(0x0000003500000014ULL, 0x0000005400000009ULL);
707 ASSERT_EQ(AsmConvertI32F32(arg), MakeUInt128(0x4254000041a00000ULL, 0x42a8000041100000ULL));
708 }
709
TEST(Arm64InsnTest,AsmConvertI64x2F64x2)710 TEST(Arm64InsnTest, AsmConvertI64x2F64x2) {
711 constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.2d, %1.2d");
712 __uint128_t arg = MakeUInt128(static_cast<int64_t>(-9), 17U);
713 ASSERT_EQ(AsmConvertI64F64(arg), MakeUInt128(0xc022000000000000ULL, 0x4031000000000000ULL));
714 }
715
TEST(Arm64InsnTest,AsmConvertU32x4F32x4)716 TEST(Arm64InsnTest, AsmConvertU32x4F32x4) {
717 constexpr auto AsmConvertU32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.4s, %1.4s");
718 __uint128_t arg = MakeUInt128(0x8000000000000019ULL, 0x0000005800000010ULL);
719 ASSERT_EQ(AsmConvertU32F32(arg), MakeUInt128(0x4f00000041c80000ULL, 0x42b0000041800000ULL));
720 }
721
TEST(Arm64InsnTest,AsmConvertU64x2F64x2)722 TEST(Arm64InsnTest, AsmConvertU64x2F64x2) {
723 constexpr auto AsmConvertU64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d");
724 __uint128_t arg = MakeUInt128(1ULL << 63, 29U);
725 ASSERT_EQ(AsmConvertU64F64(arg), MakeUInt128(0x43e0000000000000ULL, 0x403d000000000000ULL));
726 }
727
TEST(Arm64InsnTest,AsmConvertU64F64)728 TEST(Arm64InsnTest, AsmConvertU64F64) {
729 constexpr auto AsmConvertU64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %x1");
730
731 ASSERT_EQ(AsmConvertU64F64(49), MakeUInt128(0x4048800000000000ULL, 0U));
732
733 // Test that the topmost bit isn't treated as the sign.
734 ASSERT_EQ(AsmConvertU64F64(1ULL << 63), MakeUInt128(0x43e0000000000000ULL, 0U));
735 }
736
TEST(Arm64InsnTest,AsmConvertU64F64FromSimdReg)737 TEST(Arm64InsnTest, AsmConvertU64F64FromSimdReg) {
738 constexpr auto AsmUcvtf = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1");
739
740 ASSERT_EQ(AsmUcvtf(47), MakeUInt128(0x4047800000000000ULL, 0U));
741
742 // Test that the topmost bit isn't treated as the sign.
743 ASSERT_EQ(AsmUcvtf(1ULL << 63), MakeUInt128(0x43e0000000000000ULL, 0U));
744 }
745
TEST(Arm64InsnTest,AsmConvertLiterals)746 TEST(Arm64InsnTest, AsmConvertLiterals) {
747 // Verify that the compiler encodes the floating-point literals used in the
748 // conversion tests below exactly as expected.
749 ASSERT_EQ(bit_cast<uint32_t>(-7.50f), 0xc0f00000U);
750 ASSERT_EQ(bit_cast<uint32_t>(-6.75f), 0xc0d80000U);
751 ASSERT_EQ(bit_cast<uint32_t>(-6.50f), 0xc0d00000U);
752 ASSERT_EQ(bit_cast<uint32_t>(-6.25f), 0xc0c80000U);
753 ASSERT_EQ(bit_cast<uint32_t>(6.25f), 0x40c80000U);
754 ASSERT_EQ(bit_cast<uint32_t>(6.50f), 0x40d00000U);
755 ASSERT_EQ(bit_cast<uint32_t>(6.75f), 0x40d80000U);
756 ASSERT_EQ(bit_cast<uint32_t>(7.50f), 0x40f00000U);
757
758 ASSERT_EQ(bit_cast<uint64_t>(-7.50), 0xc01e000000000000ULL);
759 ASSERT_EQ(bit_cast<uint64_t>(-6.75), 0xc01b000000000000ULL);
760 ASSERT_EQ(bit_cast<uint64_t>(-6.50), 0xc01a000000000000ULL);
761 ASSERT_EQ(bit_cast<uint64_t>(-6.25), 0xc019000000000000ULL);
762 ASSERT_EQ(bit_cast<uint64_t>(6.25), 0x4019000000000000ULL);
763 ASSERT_EQ(bit_cast<uint64_t>(6.50), 0x401a000000000000ULL);
764 ASSERT_EQ(bit_cast<uint64_t>(6.75), 0x401b000000000000ULL);
765 ASSERT_EQ(bit_cast<uint64_t>(7.50), 0x401e000000000000ULL);
766 }
767
768 template <typename IntType, typename FuncType>
TestConvertF32ToInt(FuncType AsmFunc,std::initializer_list<int> expected)769 void TestConvertF32ToInt(FuncType AsmFunc, std::initializer_list<int> expected) {
770 // Note that bit_cast isn't a constexpr.
771 static const uint32_t kConvertF32ToIntInputs[] = {
772 bit_cast<uint32_t>(-7.50f),
773 bit_cast<uint32_t>(-6.75f),
774 bit_cast<uint32_t>(-6.50f),
775 bit_cast<uint32_t>(-6.25f),
776 bit_cast<uint32_t>(6.25f),
777 bit_cast<uint32_t>(6.50f),
778 bit_cast<uint32_t>(6.75f),
779 bit_cast<uint32_t>(7.50f),
780 };
781
782 const size_t kConvertF32ToIntInputsSize = sizeof(kConvertF32ToIntInputs) / sizeof(uint32_t);
783 ASSERT_EQ(kConvertF32ToIntInputsSize, expected.size());
784
785 auto expected_it = expected.begin();
786 for (size_t input_it = 0; input_it < kConvertF32ToIntInputsSize; input_it++) {
787 ASSERT_EQ(AsmFunc(kConvertF32ToIntInputs[input_it]), static_cast<IntType>(*expected_it++));
788 }
789 }
790
TEST(Arm64InsnTest,AsmConvertF32I32TieAway)791 TEST(Arm64InsnTest, AsmConvertF32I32TieAway) {
792 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %w0, %s1");
793 TestConvertF32ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
794 }
795
TEST(Arm64InsnTest,AsmConvertF32U32TieAway)796 TEST(Arm64InsnTest, AsmConvertF32U32TieAway) {
797 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %w0, %s1");
798 TestConvertF32ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
799 }
800
TEST(Arm64InsnTest,AsmConvertF32I32NegInf)801 TEST(Arm64InsnTest, AsmConvertF32I32NegInf) {
802 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %w0, %s1");
803 TestConvertF32ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
804 }
805
TEST(Arm64InsnTest,AsmConvertF32U32NegInf)806 TEST(Arm64InsnTest, AsmConvertF32U32NegInf) {
807 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %w0, %s1");
808 TestConvertF32ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
809 }
810
TEST(Arm64InsnTest,AsmConvertF32I32TieEven)811 TEST(Arm64InsnTest, AsmConvertF32I32TieEven) {
812 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %w0, %s1");
813 TestConvertF32ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
814 }
815
TEST(Arm64InsnTest,AsmConvertF32U32TieEven)816 TEST(Arm64InsnTest, AsmConvertF32U32TieEven) {
817 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %w0, %s1");
818 TestConvertF32ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
819 }
820
TEST(Arm64InsnTest,AsmConvertF32I32PosInf)821 TEST(Arm64InsnTest, AsmConvertF32I32PosInf) {
822 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %w0, %s1");
823 TestConvertF32ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
824 }
825
TEST(Arm64InsnTest,AsmConvertF32U32PosInf)826 TEST(Arm64InsnTest, AsmConvertF32U32PosInf) {
827 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %w0, %s1");
828 TestConvertF32ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
829 }
830
TEST(Arm64InsnTest,AsmConvertF32I32Truncate)831 TEST(Arm64InsnTest, AsmConvertF32I32Truncate) {
832 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1");
833 TestConvertF32ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
834 }
835
TEST(Arm64InsnTest,AsmConvertF32U32Truncate)836 TEST(Arm64InsnTest, AsmConvertF32U32Truncate) {
837 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %s1");
838 TestConvertF32ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
839 }
840
TEST(Arm64InsnTest,AsmConvertF32I64TieAway)841 TEST(Arm64InsnTest, AsmConvertF32I64TieAway) {
842 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %x0, %s1");
843 TestConvertF32ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
844 }
845
TEST(Arm64InsnTest,AsmConvertF32U64TieAway)846 TEST(Arm64InsnTest, AsmConvertF32U64TieAway) {
847 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %x0, %s1");
848 TestConvertF32ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
849 }
850
TEST(Arm64InsnTest,AsmConvertF32I64NegInf)851 TEST(Arm64InsnTest, AsmConvertF32I64NegInf) {
852 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %x0, %s1");
853 TestConvertF32ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
854 }
855
TEST(Arm64InsnTest,AsmConvertF32U64NegInf)856 TEST(Arm64InsnTest, AsmConvertF32U64NegInf) {
857 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %x0, %s1");
858 TestConvertF32ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
859 }
860
TEST(Arm64InsnTest,AsmConvertF32I64TieEven)861 TEST(Arm64InsnTest, AsmConvertF32I64TieEven) {
862 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %x0, %s1");
863 TestConvertF32ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
864 }
865
TEST(Arm64InsnTest,AsmConvertF32U64TieEven)866 TEST(Arm64InsnTest, AsmConvertF32U64TieEven) {
867 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %x0, %s1");
868 TestConvertF32ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
869 }
870
TEST(Arm64InsnTest,AsmConvertF32I64PosInf)871 TEST(Arm64InsnTest, AsmConvertF32I64PosInf) {
872 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %x0, %s1");
873 TestConvertF32ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
874 }
875
TEST(Arm64InsnTest,AsmConvertF32U64PosInf)876 TEST(Arm64InsnTest, AsmConvertF32U64PosInf) {
877 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %x0, %s1");
878 TestConvertF32ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
879 }
880
TEST(Arm64InsnTest,AsmConvertF32I64Truncate)881 TEST(Arm64InsnTest, AsmConvertF32I64Truncate) {
882 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1");
883 TestConvertF32ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
884 }
885
TEST(Arm64InsnTest,AsmConvertF32U64Truncate)886 TEST(Arm64InsnTest, AsmConvertF32U64Truncate) {
887 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %s1");
888 TestConvertF32ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
889 }
890
891 template <typename IntType, typename FuncType>
TestConvertF64ToInt(FuncType AsmFunc,std::initializer_list<int> expected)892 void TestConvertF64ToInt(FuncType AsmFunc, std::initializer_list<int> expected) {
893 // Note that bit_cast isn't a constexpr.
894 static const uint64_t kConvertF64ToIntInputs[] = {
895 bit_cast<uint64_t>(-7.50),
896 bit_cast<uint64_t>(-6.75),
897 bit_cast<uint64_t>(-6.50),
898 bit_cast<uint64_t>(-6.25),
899 bit_cast<uint64_t>(6.25),
900 bit_cast<uint64_t>(6.50),
901 bit_cast<uint64_t>(6.75),
902 bit_cast<uint64_t>(7.50),
903 };
904
905 const size_t kConvertF64ToIntInputsSize = sizeof(kConvertF64ToIntInputs) / sizeof(uint64_t);
906 ASSERT_EQ(kConvertF64ToIntInputsSize, expected.size());
907
908 auto expected_it = expected.begin();
909 for (size_t input_it = 0; input_it < kConvertF64ToIntInputsSize; input_it++) {
910 ASSERT_EQ(AsmFunc(kConvertF64ToIntInputs[input_it]), static_cast<IntType>(*expected_it++));
911 }
912 }
913
TEST(Arm64InsnTest,AsmConvertF64I32TieAway)914 TEST(Arm64InsnTest, AsmConvertF64I32TieAway) {
915 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %w0, %d1");
916 TestConvertF64ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
917 }
918
TEST(Arm64InsnTest,AsmConvertF64U32TieAway)919 TEST(Arm64InsnTest, AsmConvertF64U32TieAway) {
920 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %w0, %d1");
921 TestConvertF64ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
922 }
923
TEST(Arm64InsnTest,AsmConvertF64I32NegInf)924 TEST(Arm64InsnTest, AsmConvertF64I32NegInf) {
925 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %w0, %d1");
926 TestConvertF64ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
927 }
928
TEST(Arm64InsnTest,AsmConvertF64U32NegInf)929 TEST(Arm64InsnTest, AsmConvertF64U32NegInf) {
930 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %w0, %d1");
931 TestConvertF64ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
932 }
933
TEST(Arm64InsnTest,AsmConvertF64I32TieEven)934 TEST(Arm64InsnTest, AsmConvertF64I32TieEven) {
935 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %w0, %d1");
936 TestConvertF64ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
937 }
938
TEST(Arm64InsnTest,AsmConvertF64U32TieEven)939 TEST(Arm64InsnTest, AsmConvertF64U32TieEven) {
940 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %w0, %d1");
941 TestConvertF64ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
942 }
943
TEST(Arm64InsnTest,AsmConvertF64I32PosInf)944 TEST(Arm64InsnTest, AsmConvertF64I32PosInf) {
945 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %w0, %d1");
946 TestConvertF64ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
947 }
948
TEST(Arm64InsnTest,AsmConvertF64U32PosInf)949 TEST(Arm64InsnTest, AsmConvertF64U32PosInf) {
950 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %w0, %d1");
951 TestConvertF64ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
952 }
953
TEST(Arm64InsnTest,AsmConvertF64I32Truncate)954 TEST(Arm64InsnTest, AsmConvertF64I32Truncate) {
955 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %d1");
956 TestConvertF64ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
957 }
958
TEST(Arm64InsnTest,AsmConvertF64U32Truncate)959 TEST(Arm64InsnTest, AsmConvertF64U32Truncate) {
960 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %d1");
961 TestConvertF64ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
962 }
963
TEST(Arm64InsnTest,AsmConvertF64I64TieAway)964 TEST(Arm64InsnTest, AsmConvertF64I64TieAway) {
965 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %x0, %d1");
966 TestConvertF64ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
967 }
968
TEST(Arm64InsnTest,AsmConvertF64U64TieAway)969 TEST(Arm64InsnTest, AsmConvertF64U64TieAway) {
970 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %x0, %d1");
971 TestConvertF64ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
972 }
973
TEST(Arm64InsnTest,AsmConvertF64I64NegInf)974 TEST(Arm64InsnTest, AsmConvertF64I64NegInf) {
975 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %x0, %d1");
976 TestConvertF64ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
977 }
978
TEST(Arm64InsnTest,AsmConvertF64U64NegInf)979 TEST(Arm64InsnTest, AsmConvertF64U64NegInf) {
980 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %x0, %d1");
981 TestConvertF64ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
982 }
983
TEST(Arm64InsnTest,AsmConvertF64I64TieEven)984 TEST(Arm64InsnTest, AsmConvertF64I64TieEven) {
985 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %x0, %d1");
986 TestConvertF64ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
987 }
988
TEST(Arm64InsnTest,AsmConvertF64U64TieEven)989 TEST(Arm64InsnTest, AsmConvertF64U64TieEven) {
990 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %x0, %d1");
991 TestConvertF64ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
992 }
993
TEST(Arm64InsnTest,AsmConvertF64I64PosInf)994 TEST(Arm64InsnTest, AsmConvertF64I64PosInf) {
995 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %x0, %d1");
996 TestConvertF64ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
997 }
998
TEST(Arm64InsnTest,AsmConvertF64U64PosInf)999 TEST(Arm64InsnTest, AsmConvertF64U64PosInf) {
1000 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %x0, %d1");
1001 TestConvertF64ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1002 }
1003
TEST(Arm64InsnTest,AsmConvertF64I64Truncate)1004 TEST(Arm64InsnTest, AsmConvertF64I64Truncate) {
1005 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %d1");
1006 TestConvertF64ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1007 }
1008
TEST(Arm64InsnTest,AsmConvertF64U64Truncate)1009 TEST(Arm64InsnTest, AsmConvertF64U64Truncate) {
1010 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1");
1011 TestConvertF64ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1012 }
1013
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTieAway)1014 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTieAway) {
1015 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %s0, %s1");
1016 TestConvertF32ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
1017 }
1018
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTieAway)1019 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTieAway) {
1020 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %s0, %s1");
1021 TestConvertF32ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
1022 }
1023
TEST(Arm64InsnTest,AsmConvertF32I32ScalarNegInf)1024 TEST(Arm64InsnTest, AsmConvertF32I32ScalarNegInf) {
1025 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %s0, %s1");
1026 TestConvertF32ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
1027 }
1028
TEST(Arm64InsnTest,AsmConvertF32U32ScalarNegInf)1029 TEST(Arm64InsnTest, AsmConvertF32U32ScalarNegInf) {
1030 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %s0, %s1");
1031 TestConvertF32ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1032 }
1033
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTieEven)1034 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTieEven) {
1035 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %s0, %s1");
1036 TestConvertF32ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
1037 }
1038
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTieEven)1039 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTieEven) {
1040 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %s0, %s1");
1041 TestConvertF32ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
1042 }
1043
TEST(Arm64InsnTest,AsmConvertF32I32ScalarPosInf)1044 TEST(Arm64InsnTest, AsmConvertF32I32ScalarPosInf) {
1045 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %s0, %s1");
1046 TestConvertF32ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
1047 }
1048
TEST(Arm64InsnTest,AsmConvertF32U32ScalarPosInf)1049 TEST(Arm64InsnTest, AsmConvertF32U32ScalarPosInf) {
1050 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %s0, %s1");
1051 TestConvertF32ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1052 }
1053
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTruncate)1054 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTruncate) {
1055 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %s0, %s1");
1056 TestConvertF32ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1057 }
1058
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTruncate)1059 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTruncate) {
1060 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %s0, %s1");
1061 TestConvertF32ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1062 }
1063
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTieAway)1064 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTieAway) {
1065 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %d0, %d1");
1066 TestConvertF64ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
1067 }
1068
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTieAway)1069 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTieAway) {
1070 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %d0, %d1");
1071 TestConvertF64ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
1072 }
1073
TEST(Arm64InsnTest,AsmConvertF64I64ScalarNegInf)1074 TEST(Arm64InsnTest, AsmConvertF64I64ScalarNegInf) {
1075 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %d0, %d1");
1076 TestConvertF64ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
1077 }
1078
TEST(Arm64InsnTest,AsmConvertF64U64ScalarNegInf)1079 TEST(Arm64InsnTest, AsmConvertF64U64ScalarNegInf) {
1080 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %d0, %d1");
1081 TestConvertF64ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1082 }
1083
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTieEven)1084 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTieEven) {
1085 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %d0, %d1");
1086 TestConvertF64ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
1087 }
1088
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTieEven)1089 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTieEven) {
1090 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %d0, %d1");
1091 TestConvertF64ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
1092 }
1093
TEST(Arm64InsnTest,AsmConvertF64I64ScalarPosInf)1094 TEST(Arm64InsnTest, AsmConvertF64I64ScalarPosInf) {
1095 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %d0, %d1");
1096 TestConvertF64ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
1097 }
1098
TEST(Arm64InsnTest,AsmConvertF64U64ScalarPosInf)1099 TEST(Arm64InsnTest, AsmConvertF64U64ScalarPosInf) {
1100 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %d0, %d1");
1101 TestConvertF64ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1102 }
1103
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTruncate)1104 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTruncate) {
1105 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %d0, %d1");
1106 TestConvertF64ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1107 }
1108
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTruncate)1109 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTruncate) {
1110 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %d0, %d1");
1111 TestConvertF64ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1112 }
1113
TEST(Arm64InsnTest,AsmConvertF32I32x4TieAway)1114 TEST(Arm64InsnTest, AsmConvertF32I32x4TieAway) {
1115 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %0.4s, %1.4s");
1116 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1117 ASSERT_EQ(AsmFcvtas(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffffafffffff9ULL));
1118 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1119 ASSERT_EQ(AsmFcvtas(arg2), MakeUInt128(0x0000000700000006ULL, 0x0000000800000007ULL));
1120 }
1121
TEST(Arm64InsnTest,AsmConvertF32U32x4TieAway)1122 TEST(Arm64InsnTest, AsmConvertF32U32x4TieAway) {
1123 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %0.4s, %1.4s");
1124 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1125 ASSERT_EQ(AsmFcvtau(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1126 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1127 ASSERT_EQ(AsmFcvtau(arg2), MakeUInt128(0x0000000700000006ULL, 0x0000000800000007ULL));
1128 }
1129
TEST(Arm64InsnTest,AsmConvertF32I32x4NegInf)1130 TEST(Arm64InsnTest, AsmConvertF32I32x4NegInf) {
1131 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %0.4s, %1.4s");
1132 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1133 ASSERT_EQ(AsmFcvtms(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffff9fffffff9ULL));
1134 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1135 ASSERT_EQ(AsmFcvtms(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1136 }
1137
TEST(Arm64InsnTest,AsmConvertF32U32x4NegInf)1138 TEST(Arm64InsnTest, AsmConvertF32U32x4NegInf) {
1139 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %0.4s, %1.4s");
1140 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1141 ASSERT_EQ(AsmFcvtmu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1142 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1143 ASSERT_EQ(AsmFcvtmu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1144 }
1145
TEST(Arm64InsnTest,AsmConvertF32I32x4TieEven)1146 TEST(Arm64InsnTest, AsmConvertF32I32x4TieEven) {
1147 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %0.4s, %1.4s");
1148 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1149 ASSERT_EQ(AsmFcvtns(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffffafffffffaULL));
1150 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1151 ASSERT_EQ(AsmFcvtns(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000800000007ULL));
1152 }
1153
TEST(Arm64InsnTest,AsmConvertF32U32x4TieEven)1154 TEST(Arm64InsnTest, AsmConvertF32U32x4TieEven) {
1155 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %0.4s, %1.4s");
1156 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1157 ASSERT_EQ(AsmFcvtnu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1158 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1159 ASSERT_EQ(AsmFcvtnu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000800000007ULL));
1160 }
1161
TEST(Arm64InsnTest,AsmConvertF32I32x4PosInf)1162 TEST(Arm64InsnTest, AsmConvertF32I32x4PosInf) {
1163 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %0.4s, %1.4s");
1164 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1165 ASSERT_EQ(AsmFcvtps(arg1), MakeUInt128(0xfffffffafffffff9ULL, 0xfffffffafffffffaULL));
1166 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1167 ASSERT_EQ(AsmFcvtps(arg2), MakeUInt128(0x0000000700000007ULL, 0x0000000800000007ULL));
1168 }
1169
TEST(Arm64InsnTest,AsmConvertF32U32x4PosInf)1170 TEST(Arm64InsnTest, AsmConvertF32U32x4PosInf) {
1171 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %0.4s, %1.4s");
1172 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1173 ASSERT_EQ(AsmFcvtpu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1174 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1175 ASSERT_EQ(AsmFcvtpu(arg2), MakeUInt128(0x0000000700000007ULL, 0x0000000800000007ULL));
1176 }
1177
TEST(Arm64InsnTest,AsmConvertF32I32x4Truncate)1178 TEST(Arm64InsnTest, AsmConvertF32I32x4Truncate) {
1179 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.4s, %1.4s");
1180 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1181 ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0xfffffffafffffff9ULL, 0xfffffffafffffffaULL));
1182 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1183 ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1184 }
1185
TEST(Arm64InsnTest,AsmConvertF32U32x4Truncate)1186 TEST(Arm64InsnTest, AsmConvertF32U32x4Truncate) {
1187 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.4s, %1.4s");
1188 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1189 ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1190 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1191 ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1192 }
1193
TEST(Arm64InsnTest,AsmConvertF64I64x4TieAway)1194 TEST(Arm64InsnTest, AsmConvertF64I64x4TieAway) {
1195 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %0.2d, %1.2d");
1196 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1197 ASSERT_EQ(AsmFcvtas(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1198 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1199 ASSERT_EQ(AsmFcvtas(arg2), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1200 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1201 ASSERT_EQ(AsmFcvtas(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1202 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1203 ASSERT_EQ(AsmFcvtas(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1204 }
1205
TEST(Arm64InsnTest,AsmConvertF64U64x4TieAway)1206 TEST(Arm64InsnTest, AsmConvertF64U64x4TieAway) {
1207 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %0.2d, %1.2d");
1208 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1209 ASSERT_EQ(AsmFcvtau(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1210 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1211 ASSERT_EQ(AsmFcvtau(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1212 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1213 ASSERT_EQ(AsmFcvtau(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1214 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1215 ASSERT_EQ(AsmFcvtau(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1216 }
1217
TEST(Arm64InsnTest,AsmConvertF64I64x4NegInf)1218 TEST(Arm64InsnTest, AsmConvertF64I64x4NegInf) {
1219 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %0.2d, %1.2d");
1220 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1221 ASSERT_EQ(AsmFcvtms(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1222 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1223 ASSERT_EQ(AsmFcvtms(arg2), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffff9ULL));
1224 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1225 ASSERT_EQ(AsmFcvtms(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1226 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1227 ASSERT_EQ(AsmFcvtms(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1228 }
1229
TEST(Arm64InsnTest,AsmConvertF64U64x4NegInf)1230 TEST(Arm64InsnTest, AsmConvertF64U64x4NegInf) {
1231 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %0.2d, %1.2d");
1232 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1233 ASSERT_EQ(AsmFcvtmu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1234 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1235 ASSERT_EQ(AsmFcvtmu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1236 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1237 ASSERT_EQ(AsmFcvtmu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1238 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1239 ASSERT_EQ(AsmFcvtmu(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1240 }
1241
TEST(Arm64InsnTest,AsmConvertF64I64x4TieEven)1242 TEST(Arm64InsnTest, AsmConvertF64I64x4TieEven) {
1243 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %0.2d, %1.2d");
1244 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1245 ASSERT_EQ(AsmFcvtns(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1246 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1247 ASSERT_EQ(AsmFcvtns(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1248 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1249 ASSERT_EQ(AsmFcvtns(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1250 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1251 ASSERT_EQ(AsmFcvtns(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1252 }
1253
TEST(Arm64InsnTest,AsmConvertF64U64x4TieEven)1254 TEST(Arm64InsnTest, AsmConvertF64U64x4TieEven) {
1255 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %0.2d, %1.2d");
1256 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1257 ASSERT_EQ(AsmFcvtnu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1258 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1259 ASSERT_EQ(AsmFcvtnu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1260 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1261 ASSERT_EQ(AsmFcvtnu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1262 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1263 ASSERT_EQ(AsmFcvtnu(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1264 }
1265
TEST(Arm64InsnTest,AsmConvertF64I64x4PosInf)1266 TEST(Arm64InsnTest, AsmConvertF64I64x4PosInf) {
1267 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %0.2d, %1.2d");
1268 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1269 ASSERT_EQ(AsmFcvtps(arg1), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1270 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1271 ASSERT_EQ(AsmFcvtps(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1272 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1273 ASSERT_EQ(AsmFcvtps(arg3), MakeUInt128(0x0000000000000007ULL, 0x0000000000000007ULL));
1274 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1275 ASSERT_EQ(AsmFcvtps(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1276 }
1277
TEST(Arm64InsnTest,AsmConvertF64U64x4PosInf)1278 TEST(Arm64InsnTest, AsmConvertF64U64x4PosInf) {
1279 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %0.2d, %1.2d");
1280 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1281 ASSERT_EQ(AsmFcvtpu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1282 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1283 ASSERT_EQ(AsmFcvtpu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1284 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1285 ASSERT_EQ(AsmFcvtpu(arg3), MakeUInt128(0x0000000000000007ULL, 0x0000000000000007ULL));
1286 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1287 ASSERT_EQ(AsmFcvtpu(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1288 }
1289
TEST(Arm64InsnTest,AsmConvertF64I64x4Truncate)1290 TEST(Arm64InsnTest, AsmConvertF64I64x4Truncate) {
1291 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.2d, %1.2d");
1292 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1293 ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1294 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1295 ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1296 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1297 ASSERT_EQ(AsmFcvtzs(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1298 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1299 ASSERT_EQ(AsmFcvtzs(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1300 }
1301
TEST(Arm64InsnTest,AsmConvertF64U64x4Truncate)1302 TEST(Arm64InsnTest, AsmConvertF64U64x4Truncate) {
1303 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.2d, %1.2d");
1304 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1305 ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1306 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1307 ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1308 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1309 ASSERT_EQ(AsmFcvtzu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1310 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1311 ASSERT_EQ(AsmFcvtzu(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1312 }
1313
TEST(Arm64InsnTest,AsmConvertX32F32Scalar)1314 TEST(Arm64InsnTest, AsmConvertX32F32Scalar) {
1315 constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %w1, #7");
1316
1317 ASSERT_EQ(AsmConvertX32F32(0x610), MakeUInt128(0x41420000ULL, 0U));
1318
1319 ASSERT_EQ(AsmConvertX32F32(1U << 31), MakeUInt128(0xcb800000ULL, 0U));
1320 }
1321
TEST(Arm64InsnTest,AsmConvertX32F64Scalar)1322 TEST(Arm64InsnTest, AsmConvertX32F64Scalar) {
1323 constexpr auto AsmConvertX32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %w1, #8");
1324
1325 ASSERT_EQ(AsmConvertX32F64(0x487), MakeUInt128(0x40121c0000000000ULL, 0U));
1326
1327 ASSERT_EQ(AsmConvertX32F64(1 << 31), MakeUInt128(0xc160000000000000ULL, 0U));
1328 }
1329
TEST(Arm64InsnTest,AsmConvertX32F32)1330 TEST(Arm64InsnTest, AsmConvertX32F32) {
1331 constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %s0, %s1, #7");
1332
1333 ASSERT_EQ(AsmConvertX32F32(0x123), MakeUInt128(0x40118000ULL, 0U));
1334
1335 ASSERT_EQ(AsmConvertX32F32(1U << 31), MakeUInt128(0xcb800000ULL, 0U));
1336 }
1337
TEST(Arm64InsnTest,AsmConvertX32x4F32x4)1338 TEST(Arm64InsnTest, AsmConvertX32x4F32x4) {
1339 constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.4s, %1.4s, #11");
1340 __uint128_t arg = MakeUInt128(0x80000000ffff9852ULL, 0x0000110200001254ULL);
1341 ASSERT_EQ(AsmConvertX32F32(arg), MakeUInt128(0xc9800000c14f5c00ULL, 0x400810004012a000ULL));
1342 }
1343
TEST(Arm64InsnTest,AsmConvertUX32F32Scalar)1344 TEST(Arm64InsnTest, AsmConvertUX32F32Scalar) {
1345 constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %w1, #7");
1346
1347 ASSERT_EQ(AsmConvertUX32F32(0x857), MakeUInt128(0x41857000ULL, 0U));
1348
1349 ASSERT_EQ(AsmConvertUX32F32(1U << 31), MakeUInt128(0x4b800000ULL, 0U));
1350
1351 // Test the default rounding behavior (FPRounding_TIEEVEN).
1352 ASSERT_EQ(AsmConvertUX32F32(0x80000080), MakeUInt128(0x4b800000ULL, 0U));
1353 ASSERT_EQ(AsmConvertUX32F32(0x800000c0), MakeUInt128(0x4b800001ULL, 0U));
1354 ASSERT_EQ(AsmConvertUX32F32(0x80000140), MakeUInt128(0x4b800001ULL, 0U));
1355 ASSERT_EQ(AsmConvertUX32F32(0x80000180), MakeUInt128(0x4b800002ULL, 0U));
1356 }
1357
TEST(Arm64InsnTest,AsmConvertUX32F64Scalar)1358 TEST(Arm64InsnTest, AsmConvertUX32F64Scalar) {
1359 constexpr auto AsmConvertUX32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %w1, #8");
1360
1361 ASSERT_EQ(AsmConvertUX32F64(0x361), MakeUInt128(0x400b080000000000ULL, 0U));
1362
1363 ASSERT_EQ(AsmConvertUX32F64(1U << 31), MakeUInt128(0x4160000000000000ULL, 0U));
1364 }
1365
TEST(Arm64InsnTest,AsmConvertUX32F32)1366 TEST(Arm64InsnTest, AsmConvertUX32F32) {
1367 constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %s0, %s1, #7");
1368
1369 ASSERT_EQ(AsmConvertUX32F32(0x456), MakeUInt128(0x410ac000ULL, 0U));
1370
1371 ASSERT_EQ(AsmConvertUX32F32(1U << 31), MakeUInt128(0x4b800000ULL, 0U));
1372 }
1373
TEST(Arm64InsnTest,AsmConvertUX32x4F32x4)1374 TEST(Arm64InsnTest, AsmConvertUX32x4F32x4) {
1375 constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.4s, %1.4s, #11");
1376 __uint128_t arg = MakeUInt128(0x8000000000008023ULL, 0x0000201800001956ULL);
1377 ASSERT_EQ(AsmConvertUX32F32(arg), MakeUInt128(0x4980000041802300ULL, 0x40806000404ab000ULL));
1378 }
1379
TEST(Arm64InsnTest,AsmConvertX64F32Scalar)1380 TEST(Arm64InsnTest, AsmConvertX64F32Scalar) {
1381 constexpr auto AsmConvertX64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %x1, #10");
1382
1383 ASSERT_EQ(AsmConvertX64F32(0x2234), MakeUInt128(0x4108d000ULL, 0U));
1384 }
1385
TEST(Arm64InsnTest,AsmConvertX64F64Scalar)1386 TEST(Arm64InsnTest, AsmConvertX64F64Scalar) {
1387 constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %x1, #10");
1388
1389 ASSERT_EQ(AsmConvertX64F64(0x1324), MakeUInt128(0x4013240000000000ULL, 0U));
1390 }
1391
TEST(Arm64InsnTest,AsmConvertUX64F32Scalar)1392 TEST(Arm64InsnTest, AsmConvertUX64F32Scalar) {
1393 constexpr auto AsmConvertUX64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %x1, #10");
1394
1395 ASSERT_EQ(AsmConvertUX64F32(0x5763), MakeUInt128(0x41aec600ULL, 0U));
1396 }
1397
TEST(Arm64InsnTest,AsmConvertUX64F64Scalar)1398 TEST(Arm64InsnTest, AsmConvertUX64F64Scalar) {
1399 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %x1, #10");
1400
1401 ASSERT_EQ(AsmConvertUX64F64(0x2217), MakeUInt128(0x40210b8000000000ULL, 0U));
1402 }
1403
TEST(Arm64InsnTest,AsmConvertX64F64)1404 TEST(Arm64InsnTest, AsmConvertX64F64) {
1405 constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %d0, %d1, #12");
1406
1407 ASSERT_EQ(AsmConvertX64F64(0x723), MakeUInt128(0x3fdc8c0000000000ULL, 0U));
1408
1409 ASSERT_EQ(AsmConvertX64F64(1ULL << 63), MakeUInt128(0xc320000000000000ULL, 0U));
1410 }
1411
TEST(Arm64InsnTest,AsmConvertUX64F64)1412 TEST(Arm64InsnTest, AsmConvertUX64F64) {
1413 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1, #12");
1414
1415 ASSERT_EQ(AsmConvertUX64F64(0x416), MakeUInt128(0x3fd0580000000000ULL, 0U));
1416
1417 ASSERT_EQ(AsmConvertUX64F64(1ULL << 63), MakeUInt128(0x4320000000000000ULL, 0U));
1418 }
1419
TEST(Arm64InsnTest,AsmConvertUX64F64With64BitFraction)1420 TEST(Arm64InsnTest, AsmConvertUX64F64With64BitFraction) {
1421 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1, #64");
1422
1423 ASSERT_EQ(AsmConvertUX64F64(1ULL << 63), MakeUInt128(0x3fe0'0000'0000'0000ULL, 0U));
1424 }
1425
TEST(Arm64InsnTest,AsmConvertX64x2F64x2)1426 TEST(Arm64InsnTest, AsmConvertX64x2F64x2) {
1427 constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.2d, %1.2d, #12");
1428 __uint128_t arg = MakeUInt128(1ULL << 63, 0x8086U);
1429 ASSERT_EQ(AsmConvertX64F64(arg), MakeUInt128(0xc320000000000000ULL, 0x402010c000000000ULL));
1430 }
1431
TEST(Arm64InsnTest,AsmConvertUX64x2F64x2)1432 TEST(Arm64InsnTest, AsmConvertUX64x2F64x2) {
1433 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d, #12");
1434 __uint128_t arg = MakeUInt128(1ULL << 63, 0x6809U);
1435 ASSERT_EQ(AsmConvertUX64F64(arg), MakeUInt128(0x4320000000000000ULL, 0x401a024000000000ULL));
1436 }
1437
TEST(Arm64InsnTest,AsmConvertUX64x2F64x2With64BitFraction)1438 TEST(Arm64InsnTest, AsmConvertUX64x2F64x2With64BitFraction) {
1439 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d, #64");
1440 __uint128_t arg = MakeUInt128(0x7874'211c'b7aa'f597ULL, 0x2c0f'5504'd25e'f673ULL);
1441 ASSERT_EQ(AsmConvertUX64F64(arg),
1442 MakeUInt128(0x3fde'1d08'472d'eabdULL, 0x3fc6'07aa'8269'2f7bULL));
1443 }
1444
TEST(Arm64InsnTest,AsmConvertF32X32Scalar)1445 TEST(Arm64InsnTest, AsmConvertF32X32Scalar) {
1446 constexpr auto AsmConvertF32X32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #16");
1447 uint32_t arg1 = 0x4091eb85U; // 4.56 in float
1448 ASSERT_EQ(AsmConvertF32X32(arg1), MakeUInt128(0x00048f5cU, 0U));
1449
1450 uint32_t arg2 = 0xc0d80000U; // -6.75 in float
1451 ASSERT_EQ(AsmConvertF32X32(arg2), MakeUInt128(0xfff94000U, 0U));
1452
1453 ASSERT_EQ(AsmConvertF32X32(kDefaultNaN32), MakeUInt128(bit_cast<uint32_t>(0.0f), 0U));
1454 }
1455
TEST(Arm64InsnTest,AsmConvertF32UX32Scalar)1456 TEST(Arm64InsnTest, AsmConvertF32UX32Scalar) {
1457 constexpr auto AsmConvertF32UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #16");
1458 uint32_t arg1 = 0x41223d71U; // 10.14 in float
1459 ASSERT_EQ(AsmConvertF32UX32(arg1), MakeUInt128(0x000a23d7U, 0U));
1460
1461 uint32_t arg2 = 0xc1540000U; // -13.25 in float
1462 ASSERT_EQ(AsmConvertF32UX32(arg2), MakeUInt128(0xfff2c000U, 0U));
1463
1464 ASSERT_EQ(AsmConvertF32UX32(kDefaultNaN32), MakeUInt128(bit_cast<uint32_t>(0.0f), 0U));
1465 }
1466
TEST(Arm64InsnTest,AsmConvertF32UX32With31FractionalBits)1467 TEST(Arm64InsnTest, AsmConvertF32UX32With31FractionalBits) {
1468 constexpr auto AsmConvertF32UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #31");
1469 uint32_t arg1 = bit_cast<uint32_t>(0.25f);
1470 ASSERT_EQ(AsmConvertF32UX32(arg1), MakeUInt128(0x20000000U, 0U));
1471 }
1472
TEST(Arm64InsnTest,AsmConvertF64X32Scalar)1473 TEST(Arm64InsnTest, AsmConvertF64X32Scalar) {
1474 constexpr auto AsmConvertF64X32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %d1, #16");
1475 uint64_t arg1 = 0x401e8f5c28f5c28fULL; // 7.46 in double
1476 ASSERT_EQ(AsmConvertF64X32(arg1), MakeUInt128(0x0007a3d7U, 0U));
1477
1478 uint64_t arg2 = 0xc040200000000000ULL; // -32.44 in double
1479 ASSERT_EQ(AsmConvertF64X32(arg2), MakeUInt128(0xffdfc000U, 0U));
1480 }
1481
TEST(Arm64InsnTest,AsmConvertF32X64Scalar)1482 TEST(Arm64InsnTest, AsmConvertF32X64Scalar) {
1483 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1, #16");
1484 uint64_t arg1 = bit_cast<uint32_t>(7.50f);
1485 ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1486
1487 uint64_t arg2 = bit_cast<uint32_t>(-6.50f);
1488 ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffff98000ULL, 0ULL));
1489 }
1490
TEST(Arm64InsnTest,AsmConvertF32UX64With63FractionalBits)1491 TEST(Arm64InsnTest, AsmConvertF32UX64With63FractionalBits) {
1492 constexpr auto AsmConvertF32UX64 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1, #63");
1493 uint32_t arg1 = bit_cast<uint32_t>(0.25f);
1494 ASSERT_EQ(AsmConvertF32UX64(arg1), MakeUInt128(0x20000000'00000000ULL, 0U));
1495 }
1496
TEST(Arm64InsnTest,AsmConvertF64X64Scalar)1497 TEST(Arm64InsnTest, AsmConvertF64X64Scalar) {
1498 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %d1, #16");
1499 uint64_t arg1 = bit_cast<uint64_t>(7.50);
1500 ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1501
1502 uint64_t arg2 = bit_cast<uint64_t>(-6.50);
1503 ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffff98000ULL, 0ULL));
1504 }
1505
TEST(Arm64InsnTest,AsmConvertF32X32x4)1506 TEST(Arm64InsnTest, AsmConvertF32X32x4) {
1507 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.4s, %1.4s, #2");
1508 __uint128_t res = AsmFcvtzs(MakeF32x4(-5.5f, -0.0f, 0.0f, 6.5f));
1509 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffeaULL, 0x0000001a00000000ULL));
1510 }
1511
TEST(Arm64InsnTest,AsmConvertF64UX32Scalar)1512 TEST(Arm64InsnTest, AsmConvertF64UX32Scalar) {
1513 constexpr auto AsmConvertF64UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %d1, #16");
1514 uint64_t arg1 = 0x4020947ae147ae14ULL; // 8.29 in double
1515 ASSERT_EQ(AsmConvertF64UX32(arg1), MakeUInt128(0x00084a3dU, 0U));
1516
1517 uint64_t arg2 = 0xc023666666666666ULL; // -9.70 in double
1518 ASSERT_EQ(AsmConvertF64UX32(arg2), MakeUInt128(0U, 0U));
1519 }
1520
TEST(Arm64InsnTest,AsmConvertF32UX64Scalar)1521 TEST(Arm64InsnTest, AsmConvertF32UX64Scalar) {
1522 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %s1, #16");
1523 uint64_t arg1 = bit_cast<uint32_t>(7.50f);
1524 ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1525 uint64_t arg2 = bit_cast<uint32_t>(-6.50f);
1526 ASSERT_EQ(AsmFcvtzu(arg2), 0ULL);
1527 }
1528
TEST(Arm64InsnTest,AsmConvertF64UX64Scalar)1529 TEST(Arm64InsnTest, AsmConvertF64UX64Scalar) {
1530 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1, #16");
1531 uint64_t arg1 = bit_cast<uint64_t>(7.50);
1532 ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1533
1534 uint64_t arg2 = bit_cast<uint64_t>(-6.50);
1535 ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0ULL, 0ULL));
1536 }
1537
TEST(Arm64InsnTest,AsmConvertF64UX64ScalarWith64BitFraction)1538 TEST(Arm64InsnTest, AsmConvertF64UX64ScalarWith64BitFraction) {
1539 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1, #64");
1540 uint64_t arg = bit_cast<uint64_t>(0.625);
1541 ASSERT_EQ(AsmFcvtzu(arg), MakeUInt128(0xa000'0000'0000'0000ULL, 0ULL));
1542 }
1543
TEST(Arm64InsnTest,AsmConvertF32UX32x4)1544 TEST(Arm64InsnTest, AsmConvertF32UX32x4) {
1545 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.4s, %1.4s, #2");
1546 __uint128_t res = AsmFcvtzs(MakeF32x4(-5.5f, -0.0f, 0.0f, 6.5f));
1547 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000001a00000000ULL));
1548 }
1549
TEST(Arm64InsnTest,Fp32ConditionalSelect)1550 TEST(Arm64InsnTest, Fp32ConditionalSelect) {
1551 uint64_t int_arg1 = 3;
1552 uint64_t int_arg2 = 7;
1553 uint64_t fp_arg1 = 0xfedcba9876543210ULL;
1554 uint64_t fp_arg2 = 0x0123456789abcdefULL;
1555 __uint128_t res;
1556
1557 asm("cmp %x1,%x2\n\t"
1558 "fcsel %s0, %s3, %s4, eq"
1559 : "=w"(res)
1560 : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1561 ASSERT_EQ(res, MakeUInt128(0x89abcdefULL, 0U));
1562
1563 asm("cmp %x1,%x2\n\t"
1564 "fcsel %s0, %s3, %s4, ne"
1565 : "=w"(res)
1566 : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1567 ASSERT_EQ(res, MakeUInt128(0x76543210ULL, 0U));
1568 }
1569
TEST(Arm64InsnTest,Fp64ConditionalSelect)1570 TEST(Arm64InsnTest, Fp64ConditionalSelect) {
1571 uint64_t int_arg1 = 8;
1572 uint64_t int_arg2 = 3;
1573 uint64_t fp_arg1 = 0xfedcba9876543210ULL;
1574 uint64_t fp_arg2 = 0x0123456789abcdefULL;
1575 __uint128_t res;
1576
1577 asm("cmp %x1,%x2\n\t"
1578 "fcsel %d0, %d3, %d4, eq"
1579 : "=w"(res)
1580 : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1581 ASSERT_EQ(res, MakeUInt128(0x0123456789abcdefULL, 0U));
1582
1583 asm("cmp %x1,%x2\n\t"
1584 "fcsel %d0, %d3, %d4, ne"
1585 : "=w"(res)
1586 : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1587 ASSERT_EQ(res, MakeUInt128(0xfedcba9876543210ULL, 0U));
1588 }
1589
TEST(Arm64InsnTest,RoundUpFp32)1590 TEST(Arm64InsnTest, RoundUpFp32) {
1591 // The lower 32-bit represents 2.7182817 in float.
1592 uint64_t fp_arg = 0xdeadbeef402df854ULL;
1593 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %s0, %s1")(fp_arg);
1594 ASSERT_EQ(res, MakeUInt128(0x40400000ULL, 0U)); // 3.0 in float
1595 }
1596
TEST(Arm64InsnTest,RoundUpFp64)1597 TEST(Arm64InsnTest, RoundUpFp64) {
1598 // 2.7182817 in double.
1599 uint64_t fp_arg = 0x4005BF0A8B145769ULL;
1600 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %d0, %d1")(fp_arg);
1601 ASSERT_EQ(res, MakeUInt128(0x4008000000000000ULL, 0U)); // 3.0 in double
1602 }
1603
TEST(Arm64InsnTest,RoundToIntNearestTiesAwayFp64)1604 TEST(Arm64InsnTest, RoundToIntNearestTiesAwayFp64) {
1605 constexpr auto AsmFrinta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %d0, %d1");
1606
1607 // -7.50 -> -8.00 (ties away from zero as opposted to even)
1608 ASSERT_EQ(AsmFrinta(0xc01E000000000000ULL), MakeUInt128(0xc020000000000000ULL, 0U));
1609
1610 // -6.75 -> -7.00
1611 ASSERT_EQ(AsmFrinta(0xc01B000000000000ULL), MakeUInt128(0xc01c000000000000ULL, 0U));
1612
1613 // -6.50 -> -7.00 (ties away from zero as opposted to even)
1614 ASSERT_EQ(AsmFrinta(0xc01A000000000000ULL), MakeUInt128(0xc01c000000000000ULL, 0U));
1615
1616 // -6.25 -> -6.00
1617 ASSERT_EQ(AsmFrinta(0xc019000000000000ULL), MakeUInt128(0xc018000000000000ULL, 0U));
1618
1619 // 6.25 -> 6.00
1620 ASSERT_EQ(AsmFrinta(0x4019000000000000ULL), MakeUInt128(0x4018000000000000ULL, 0U));
1621
1622 // 6.50 -> 7.00 (ties away from zero as opposted to even)
1623 ASSERT_EQ(AsmFrinta(0x401A000000000000ULL), MakeUInt128(0x401c000000000000ULL, 0U));
1624
1625 // 6.75 -> 7.00
1626 ASSERT_EQ(AsmFrinta(0x401B000000000000ULL), MakeUInt128(0x401c000000000000ULL, 0U));
1627
1628 // 7.50 -> 8.00 (ties away from zero as opposted to even)
1629 ASSERT_EQ(AsmFrinta(0x401E000000000000ULL), MakeUInt128(0x4020000000000000ULL, 0U));
1630
1631 // -0.49999999999999994 -> -0.0 (should not "tie away" since -0.4999... != -0.5)
1632 ASSERT_EQ(AsmFrinta(0xBFDFFFFFFFFFFFFF), MakeUInt128(0x8000000000000000U, 0U));
1633
1634 // A number too large to have fractional precision, should not change upon rounding with tie-away
1635 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(1.0e100)), MakeUInt128(bit_cast<uint64_t>(1.0e100), 0U));
1636 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-1.0e100)), MakeUInt128(bit_cast<uint64_t>(-1.0e100), 0U));
1637 }
1638
TEST(Arm64InsnTest,RoundToIntNearestTiesAwayFp32)1639 TEST(Arm64InsnTest, RoundToIntNearestTiesAwayFp32) {
1640 constexpr auto AsmFrinta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %s0, %s1");
1641
1642 // -7.50 -> -8.00 (ties away from zero as opposted to even)
1643 ASSERT_EQ(AsmFrinta(0xc0f00000U), MakeUInt128(0xc1000000U, 0U));
1644
1645 // -6.75 -> -7.00
1646 ASSERT_EQ(AsmFrinta(0xc0d80000U), MakeUInt128(0xc0e00000U, 0U));
1647
1648 // -6.50 -> -7.00 (ties away from zero as opposted to even)
1649 ASSERT_EQ(AsmFrinta(0xc0d00000U), MakeUInt128(0xc0e00000U, 0U));
1650
1651 // -6.25 -> -6.00
1652 ASSERT_EQ(AsmFrinta(0xc0c80000U), MakeUInt128(0xc0c00000U, 0U));
1653
1654 // 6.25 -> 6.00
1655 ASSERT_EQ(AsmFrinta(0x40c80000U), MakeUInt128(0x40c00000U, 0U));
1656
1657 // 6.50 -> 7.00 (ties away from zero as opposted to even)
1658 ASSERT_EQ(AsmFrinta(0x40d00000U), MakeUInt128(0x40e00000U, 0U));
1659
1660 // 6.75 -> 7.00
1661 ASSERT_EQ(AsmFrinta(0x40d80000U), MakeUInt128(0x40e00000U, 0U));
1662
1663 // 7.50 -> 8.00 (ties away from zero as opposted to even)
1664 ASSERT_EQ(AsmFrinta(0x40f00000U), MakeUInt128(0x41000000U, 0U));
1665
1666 // -0.49999997019767761 -> -0.0 (should not "tie away" since -0.4999... != -0.5)
1667 ASSERT_EQ(AsmFrinta(0xbeffffff), MakeUInt128(0x80000000U, 0U));
1668 }
1669
TEST(Arm64InsnTest,RoundToIntDownwardFp64)1670 TEST(Arm64InsnTest, RoundToIntDownwardFp64) {
1671 constexpr auto AsmFrintm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %d0, %d1");
1672
1673 // 7.7 -> 7.00
1674 ASSERT_EQ(AsmFrintm(0x401ecccccccccccdULL), MakeUInt128(0x401c000000000000, 0U));
1675
1676 // 7.1 -> 7.00
1677 ASSERT_EQ(AsmFrintm(0x401c666666666666ULL), MakeUInt128(0x401c000000000000, 0U));
1678
1679 // -7.10 -> -8.00
1680 ASSERT_EQ(AsmFrintm(0xc01c666666666666ULL), MakeUInt128(0xc020000000000000, 0U));
1681
1682 // -7.90 -> -8.00
1683 ASSERT_EQ(AsmFrintm(0xc01f99999999999aULL), MakeUInt128(0xc020000000000000, 0U));
1684
1685 // 0 -> 0
1686 ASSERT_EQ(AsmFrintm(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1687
1688 // -0 -> -0
1689 ASSERT_EQ(AsmFrintm(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1690 }
1691
TEST(Arm64InsnTest,RoundToIntDownwardFp32)1692 TEST(Arm64InsnTest, RoundToIntDownwardFp32) {
1693 constexpr auto AsmFrintm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %s0, %s1");
1694
1695 // 7.7 -> 7.00
1696 ASSERT_EQ(AsmFrintm(0x40f66666), 0x40e00000);
1697
1698 // 7.1 -> 7.00
1699 ASSERT_EQ(AsmFrintm(0x40e33333), 0x40e00000);
1700
1701 // -7.10 -> -8.00
1702 ASSERT_EQ(AsmFrintm(0xc0e33333), 0xc1000000);
1703
1704 // -7.90 -> -8.00
1705 ASSERT_EQ(AsmFrintm(0xc0fccccd), 0xc1000000);
1706
1707 // 0 -> 0
1708 ASSERT_EQ(AsmFrintm(0x00000000), 0x00000000);
1709
1710 // -0 -> -0
1711 ASSERT_EQ(AsmFrintm(0x80000000), 0x80000000);
1712 }
1713
TEST(Arm64InsnTest,RoundToIntNearestFp64)1714 TEST(Arm64InsnTest, RoundToIntNearestFp64) {
1715 constexpr auto AsmFrintn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %d0, %d1");
1716
1717 // 7.5 -> 8.00 (ties to even)
1718 ASSERT_EQ(AsmFrintn(0x401e000000000000ULL), MakeUInt128(0x4020000000000000, 0U));
1719
1720 // 8.5 -> 8.00 (ties to even)
1721 ASSERT_EQ(AsmFrintn(0x4021000000000000), MakeUInt128(0x4020000000000000, 0U));
1722
1723 // 7.10 -> 7.00
1724 ASSERT_EQ(AsmFrintn(0x401c666666666666), MakeUInt128(0x401c000000000000, 0U));
1725
1726 // 7.90 -> 8.00
1727 ASSERT_EQ(AsmFrintn(0x401f99999999999a), MakeUInt128(0x4020000000000000, 0U));
1728
1729 // -7.5 -> -8.00 (ties to even)
1730 ASSERT_EQ(AsmFrintn(0xc01e000000000000), MakeUInt128(0xc020000000000000, 0U));
1731
1732 // // -8.5 -> -8.00 (ties to even)
1733 ASSERT_EQ(AsmFrintn(0xc021000000000000), MakeUInt128(0xc020000000000000, 0U));
1734
1735 // -7.10 -> -7.00
1736 ASSERT_EQ(AsmFrintn(0xc01c666666666666), MakeUInt128(0xc01c000000000000, 0U));
1737
1738 // -7.90 -> -8.00
1739 ASSERT_EQ(AsmFrintn(0xc01f99999999999a), MakeUInt128(0xc020000000000000, 0U));
1740
1741 // 0 -> 0
1742 ASSERT_EQ(AsmFrintn(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1743
1744 // -0 -> -0
1745 ASSERT_EQ(AsmFrintn(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1746 }
1747
TEST(Arm64InsnTest,RoundToIntToNearestFp32)1748 TEST(Arm64InsnTest, RoundToIntToNearestFp32) {
1749 constexpr auto AsmFrintn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %s0, %s1");
1750
1751 // 7.5 -> 8.00 (ties to even)
1752 ASSERT_EQ(AsmFrintn(0x40f00000), 0x41000000);
1753
1754 // 8.5 -> 8.00 (ties to even)
1755 ASSERT_EQ(AsmFrintn(0x41080000), 0x41000000);
1756
1757 // 7.10 -> 7.00
1758 ASSERT_EQ(AsmFrintn(0x40e33333), 0x40e00000);
1759
1760 // 7.90 -> 8.00
1761 ASSERT_EQ(AsmFrintn(0x40fccccd), 0x41000000);
1762
1763 // -7.5 -> -8.00 (ties to even)
1764 ASSERT_EQ(AsmFrintn(0xc0f00000), 0xc1000000);
1765
1766 // -8.5 -> -8.00 (ties to even)
1767 ASSERT_EQ(AsmFrintn(0xc1080000), 0xc1000000);
1768
1769 // -7.10 -> -7.00
1770 ASSERT_EQ(AsmFrintn(0xc0e33333), 0xc0e00000);
1771
1772 // -7.90 -> -8.00
1773 ASSERT_EQ(AsmFrintn(0xc0fccccd), 0xc1000000);
1774
1775 // 0 -> 0
1776 ASSERT_EQ(AsmFrintn(0x00000000), 0x00000000);
1777
1778 // -0 -> -0
1779 ASSERT_EQ(AsmFrintn(0x80000000), 0x80000000);
1780 }
1781
TEST(Arm64InsnTest,RoundToIntTowardZeroFp64)1782 TEST(Arm64InsnTest, RoundToIntTowardZeroFp64) {
1783 constexpr auto AsmFrintz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %d0, %d1");
1784
1785 // 7.7 -> 7.00
1786 ASSERT_EQ(AsmFrintz(0x401ecccccccccccdULL), MakeUInt128(0x401c000000000000, 0U));
1787
1788 // 7.1 -> 7.00
1789 ASSERT_EQ(AsmFrintz(0x401c666666666666ULL), MakeUInt128(0x401c000000000000, 0U));
1790
1791 // -7.10 -> -7.00
1792 ASSERT_EQ(AsmFrintz(0xc01c666666666666ULL), MakeUInt128(0xc01c000000000000, 0U));
1793
1794 // -7.90 -> -7.00
1795 ASSERT_EQ(AsmFrintz(0xc01f99999999999aULL), MakeUInt128(0xc01c000000000000, 0U));
1796
1797 // 0 -> 0
1798 ASSERT_EQ(AsmFrintz(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1799
1800 // -0 -> -0
1801 ASSERT_EQ(AsmFrintz(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1802 }
1803
TEST(Arm64InsnTest,RoundToIntTowardZeroFp32)1804 TEST(Arm64InsnTest, RoundToIntTowardZeroFp32) {
1805 constexpr auto AsmFrintz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %s0, %s1");
1806
1807 // 7.7 -> 7.00
1808 ASSERT_EQ(AsmFrintz(0x40f66666), 0x40e00000);
1809
1810 // 7.1 -> 7.00
1811 ASSERT_EQ(AsmFrintz(0x40e33333), 0x40e00000);
1812
1813 // -7.10 -> -7.00
1814 ASSERT_EQ(AsmFrintz(0xc0e33333), 0xc0e00000);
1815
1816 // -7.90 -> -7.00
1817 ASSERT_EQ(AsmFrintz(0xc0fccccd), 0xc0e00000);
1818
1819 // 0 -> 0
1820 ASSERT_EQ(AsmFrintz(0x00000000), 0x00000000);
1821
1822 // -0 -> -0
1823 ASSERT_EQ(AsmFrintz(0x80000000), 0x80000000);
1824 }
1825
TEST(Arm64InsnTest,AsmConvertF32x4TieAway)1826 TEST(Arm64InsnTest, AsmConvertF32x4TieAway) {
1827 constexpr auto AsmFcvta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %0.4s, %1.4s");
1828 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1829 ASSERT_EQ(AsmFcvta(arg1), MakeF32x4(-8.00f, -7.00f, -7.00f, -6.00f));
1830 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1831 ASSERT_EQ(AsmFcvta(arg2), MakeF32x4(6.00f, 7.00f, 7.00f, 8.00f));
1832 }
1833
TEST(Arm64InsnTest,AsmConvertF32x4NegInf)1834 TEST(Arm64InsnTest, AsmConvertF32x4NegInf) {
1835 constexpr auto AsmFcvtm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %0.4s, %1.4s");
1836 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1837 ASSERT_EQ(AsmFcvtm(arg1), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
1838 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1839 ASSERT_EQ(AsmFcvtm(arg2), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
1840 }
1841
TEST(Arm64InsnTest,AsmConvertF32x4TieEven)1842 TEST(Arm64InsnTest, AsmConvertF32x4TieEven) {
1843 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %0.4s, %1.4s");
1844 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1845 ASSERT_EQ(AsmFcvtn(arg1), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
1846 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1847 ASSERT_EQ(AsmFcvtn(arg2), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
1848 }
1849
TEST(Arm64InsnTest,AsmConvertF32x4PosInf)1850 TEST(Arm64InsnTest, AsmConvertF32x4PosInf) {
1851 constexpr auto AsmFcvtp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %0.4s, %1.4s");
1852 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1853 ASSERT_EQ(AsmFcvtp(arg1), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
1854 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1855 ASSERT_EQ(AsmFcvtp(arg2), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
1856 }
1857
TEST(Arm64InsnTest,AsmConvertF32x4Truncate)1858 TEST(Arm64InsnTest, AsmConvertF32x4Truncate) {
1859 constexpr auto AsmFcvtz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %0.4s, %1.4s");
1860 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1861 ASSERT_EQ(AsmFcvtz(arg1), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
1862 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1863 ASSERT_EQ(AsmFcvtz(arg2), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
1864 }
1865
TEST(Arm64InsnTest,AsmConvertF64x4TieAway)1866 TEST(Arm64InsnTest, AsmConvertF64x4TieAway) {
1867 constexpr auto AsmFcvta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %0.2d, %1.2d");
1868 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1869 ASSERT_EQ(AsmFcvta(arg1), MakeF64x2(-8.00, -7.00));
1870 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1871 ASSERT_EQ(AsmFcvta(arg2), MakeF64x2(-7.00, -6.00));
1872 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1873 ASSERT_EQ(AsmFcvta(arg3), MakeF64x2(6.00, 7.00));
1874 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1875 ASSERT_EQ(AsmFcvta(arg4), MakeF64x2(7.00, 8.00));
1876 }
1877
TEST(Arm64InsnTest,AsmConvertF64x4NegInf)1878 TEST(Arm64InsnTest, AsmConvertF64x4NegInf) {
1879 constexpr auto AsmFcvtm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %0.2d, %1.2d");
1880 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1881 ASSERT_EQ(AsmFcvtm(arg1), MakeF64x2(-8.00, -7.00));
1882 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1883 ASSERT_EQ(AsmFcvtm(arg2), MakeF64x2(-7.00, -7.00));
1884 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1885 ASSERT_EQ(AsmFcvtm(arg3), MakeF64x2(6.00, 6.00));
1886 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1887 ASSERT_EQ(AsmFcvtm(arg4), MakeF64x2(6.00, 7.00));
1888 }
1889
TEST(Arm64InsnTest,AsmConvertF64x4TieEven)1890 TEST(Arm64InsnTest, AsmConvertF64x4TieEven) {
1891 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %0.2d, %1.2d");
1892 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1893 ASSERT_EQ(AsmFcvtn(arg1), MakeF64x2(-8.00, -7.00));
1894 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1895 ASSERT_EQ(AsmFcvtn(arg2), MakeF64x2(-6.00, -6.00));
1896 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1897 ASSERT_EQ(AsmFcvtn(arg3), MakeF64x2(6.00, 6.00));
1898 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1899 ASSERT_EQ(AsmFcvtn(arg4), MakeF64x2(7.00, 8.00));
1900 }
1901
TEST(Arm64InsnTest,AsmConvertF64x4PosInf)1902 TEST(Arm64InsnTest, AsmConvertF64x4PosInf) {
1903 constexpr auto AsmFcvtp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %0.2d, %1.2d");
1904 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1905 ASSERT_EQ(AsmFcvtp(arg1), MakeF64x2(-7.00, -6.00));
1906 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1907 ASSERT_EQ(AsmFcvtp(arg2), MakeF64x2(-6.00, -6.00));
1908 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1909 ASSERT_EQ(AsmFcvtp(arg3), MakeF64x2(7.00, 7.00));
1910 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1911 ASSERT_EQ(AsmFcvtp(arg4), MakeF64x2(7.00, 8.00));
1912 }
1913
TEST(Arm64InsnTest,AsmConvertF64x4Truncate)1914 TEST(Arm64InsnTest, AsmConvertF64x4Truncate) {
1915 constexpr auto AsmFcvtz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %0.2d, %1.2d");
1916 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1917 ASSERT_EQ(AsmFcvtz(arg1), MakeF64x2(-7.00, -6.00));
1918 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1919 ASSERT_EQ(AsmFcvtz(arg2), MakeF64x2(-6.00, -6.00));
1920 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1921 ASSERT_EQ(AsmFcvtz(arg3), MakeF64x2(6.00, 6.00));
1922 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1923 ASSERT_EQ(AsmFcvtz(arg4), MakeF64x2(6.00, 7.00));
1924 }
1925
TEST(Arm64InsnTest,AsmRoundCurrentModeF32)1926 TEST(Arm64InsnTest, AsmRoundCurrentModeF32) {
1927 constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %s0, %s1");
1928 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-8.00f));
1929 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(-7.00f));
1930 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
1931 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
1932 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
1933 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
1934 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(7.00f));
1935 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(8.00f));
1936 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-8.00f));
1937 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1938 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1939 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1940 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1941 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1942 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1943 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(7.00f));
1944 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-7.00f));
1945 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1946 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1947 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1948 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1949 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1950 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1951 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(8.00f));
1952 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeZero), bit_cast<uint32_t>(-7.00f));
1953 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1954 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1955 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1956 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
1957 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
1958 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
1959 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeZero), bit_cast<uint32_t>(7.00f));
1960 }
1961
TEST(Arm64InsnTest,AsmRoundCurrentModeF64)1962 TEST(Arm64InsnTest, AsmRoundCurrentModeF64) {
1963 constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %d0, %d1");
1964 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-8.00));
1965 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(-7.00));
1966 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
1967 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
1968 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
1969 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
1970 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(7.00));
1971 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(8.00));
1972 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-8.00));
1973 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
1974 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
1975 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
1976 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
1977 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
1978 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
1979 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(7.00));
1980 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModePosInf), bit_cast<uint64_t>(-7.00));
1981 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
1982 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
1983 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
1984 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
1985 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
1986 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
1987 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModePosInf), bit_cast<uint64_t>(8.00));
1988 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeZero), bit_cast<uint64_t>(-7.00));
1989 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
1990 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
1991 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
1992 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
1993 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
1994 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
1995 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeZero), bit_cast<uint64_t>(7.00));
1996 }
1997
TEST(Arm64InsnTest,AsmRoundCurrentModeF32x4)1998 TEST(Arm64InsnTest, AsmRoundCurrentModeF32x4) {
1999 constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %0.4s, %1.4s");
2000 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2001 ASSERT_EQ(AsmFrinti(arg1, kFpcrRModeTieEven), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
2002 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2003 ASSERT_EQ(AsmFrinti(arg2, kFpcrRModeTieEven), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
2004 __uint128_t arg3 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2005 ASSERT_EQ(AsmFrinti(arg3, kFpcrRModeNegInf), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
2006 __uint128_t arg4 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2007 ASSERT_EQ(AsmFrinti(arg4, kFpcrRModeNegInf), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2008 __uint128_t arg5 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2009 ASSERT_EQ(AsmFrinti(arg5, kFpcrRModePosInf), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2010 __uint128_t arg6 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2011 ASSERT_EQ(AsmFrinti(arg6, kFpcrRModePosInf), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
2012 __uint128_t arg7 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2013 ASSERT_EQ(AsmFrinti(arg7, kFpcrRModeZero), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2014 __uint128_t arg8 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2015 ASSERT_EQ(AsmFrinti(arg8, kFpcrRModeZero), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2016 }
2017
TEST(Arm64InsnTest,AsmRoundCurrentModeF64x2)2018 TEST(Arm64InsnTest, AsmRoundCurrentModeF64x2) {
2019 constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %0.2d, %1.2d");
2020 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
2021 ASSERT_EQ(AsmFrinti(arg1, kFpcrRModeTieEven), MakeF64x2(-8.00, -7.00));
2022 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
2023 ASSERT_EQ(AsmFrinti(arg2, kFpcrRModeTieEven), MakeF64x2(-6.00, -6.00));
2024 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
2025 ASSERT_EQ(AsmFrinti(arg3, kFpcrRModeTieEven), MakeF64x2(6.00, 6.00));
2026 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
2027 ASSERT_EQ(AsmFrinti(arg4, kFpcrRModeTieEven), MakeF64x2(7.00, 8.00));
2028 __uint128_t arg5 = MakeF64x2(-7.50, -6.75);
2029 ASSERT_EQ(AsmFrinti(arg5, kFpcrRModeNegInf), MakeF64x2(-8.00, -7.00));
2030 __uint128_t arg6 = MakeF64x2(-6.50, -6.25);
2031 ASSERT_EQ(AsmFrinti(arg6, kFpcrRModeNegInf), MakeF64x2(-7.00, -7.00));
2032 __uint128_t arg7 = MakeF64x2(6.25, 6.50);
2033 ASSERT_EQ(AsmFrinti(arg7, kFpcrRModeNegInf), MakeF64x2(6.00, 6.00));
2034 __uint128_t arg8 = MakeF64x2(6.75, 7.50);
2035 ASSERT_EQ(AsmFrinti(arg8, kFpcrRModeNegInf), MakeF64x2(6.00, 7.00));
2036 __uint128_t arg9 = MakeF64x2(-7.50, -6.75);
2037 ASSERT_EQ(AsmFrinti(arg9, kFpcrRModePosInf), MakeF64x2(-7.00, -6.00));
2038 __uint128_t arg10 = MakeF64x2(-6.50, -6.25);
2039 ASSERT_EQ(AsmFrinti(arg10, kFpcrRModePosInf), MakeF64x2(-6.00, -6.00));
2040 __uint128_t arg11 = MakeF64x2(6.25, 6.50);
2041 ASSERT_EQ(AsmFrinti(arg11, kFpcrRModePosInf), MakeF64x2(7.00, 7.00));
2042 __uint128_t arg12 = MakeF64x2(6.75, 7.50);
2043 ASSERT_EQ(AsmFrinti(arg12, kFpcrRModePosInf), MakeF64x2(7.00, 8.00));
2044 __uint128_t arg13 = MakeF64x2(-7.50, -6.75);
2045 ASSERT_EQ(AsmFrinti(arg13, kFpcrRModeZero), MakeF64x2(-7.00, -6.00));
2046 __uint128_t arg14 = MakeF64x2(-6.50, -6.25);
2047 ASSERT_EQ(AsmFrinti(arg14, kFpcrRModeZero), MakeF64x2(-6.00, -6.00));
2048 __uint128_t arg15 = MakeF64x2(6.25, 6.50);
2049 ASSERT_EQ(AsmFrinti(arg15, kFpcrRModeZero), MakeF64x2(6.00, 6.00));
2050 __uint128_t arg16 = MakeF64x2(6.75, 7.50);
2051 ASSERT_EQ(AsmFrinti(arg16, kFpcrRModeZero), MakeF64x2(6.00, 7.00));
2052 }
2053
TEST(Arm64InsnTest,AsmRoundExactF32)2054 TEST(Arm64InsnTest, AsmRoundExactF32) {
2055 constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %s0, %s1");
2056 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-8.00f));
2057 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(-7.00f));
2058 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
2059 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
2060 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
2061 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
2062 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(7.00f));
2063 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(8.00f));
2064 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-8.00f));
2065 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2066 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2067 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2068 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2069 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2070 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2071 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(7.00f));
2072 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-7.00f));
2073 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2074 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2075 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2076 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2077 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2078 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2079 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(8.00f));
2080 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeZero), bit_cast<uint32_t>(-7.00f));
2081 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2082 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2083 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2084 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2085 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2086 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2087 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeZero), bit_cast<uint32_t>(7.00f));
2088 }
2089
TEST(Arm64InsnTest,AsmRoundExactF64)2090 TEST(Arm64InsnTest, AsmRoundExactF64) {
2091 constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %d0, %d1");
2092 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-8.00));
2093 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(-7.00));
2094 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2095 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2096 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2097 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2098 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(7.00));
2099 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(8.00));
2100 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-8.00));
2101 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2102 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2103 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2104 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2105 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2106 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2107 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(7.00));
2108 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModePosInf), bit_cast<uint64_t>(-7.00));
2109 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2110 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2111 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2112 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2113 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2114 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2115 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModePosInf), bit_cast<uint64_t>(8.00));
2116 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeZero), bit_cast<uint64_t>(-7.00));
2117 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2118 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2119 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2120 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2121 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2122 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2123 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeZero), bit_cast<uint64_t>(7.00));
2124 }
2125
TEST(Arm64InsnTest,AsmRoundExactF32x4)2126 TEST(Arm64InsnTest, AsmRoundExactF32x4) {
2127 constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %0.4s, %1.4s");
2128 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2129 ASSERT_EQ(AsmFrintx(arg1, kFpcrRModeTieEven), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
2130 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2131 ASSERT_EQ(AsmFrintx(arg2, kFpcrRModeTieEven), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
2132 __uint128_t arg3 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2133 ASSERT_EQ(AsmFrintx(arg3, kFpcrRModeNegInf), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
2134 __uint128_t arg4 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2135 ASSERT_EQ(AsmFrintx(arg4, kFpcrRModeNegInf), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2136 __uint128_t arg5 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2137 ASSERT_EQ(AsmFrintx(arg5, kFpcrRModePosInf), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2138 __uint128_t arg6 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2139 ASSERT_EQ(AsmFrintx(arg6, kFpcrRModePosInf), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
2140 __uint128_t arg7 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2141 ASSERT_EQ(AsmFrintx(arg7, kFpcrRModeZero), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2142 __uint128_t arg8 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2143 ASSERT_EQ(AsmFrintx(arg8, kFpcrRModeZero), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2144 }
2145
TEST(Arm64InsnTest,AsmRoundExactF64x2)2146 TEST(Arm64InsnTest, AsmRoundExactF64x2) {
2147 constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %0.2d, %1.2d");
2148 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
2149 ASSERT_EQ(AsmFrintx(arg1, kFpcrRModeTieEven), MakeF64x2(-8.00, -7.00));
2150 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
2151 ASSERT_EQ(AsmFrintx(arg2, kFpcrRModeTieEven), MakeF64x2(-6.00, -6.00));
2152 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
2153 ASSERT_EQ(AsmFrintx(arg3, kFpcrRModeTieEven), MakeF64x2(6.00, 6.00));
2154 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
2155 ASSERT_EQ(AsmFrintx(arg4, kFpcrRModeTieEven), MakeF64x2(7.00, 8.00));
2156 __uint128_t arg5 = MakeF64x2(-7.50, -6.75);
2157 ASSERT_EQ(AsmFrintx(arg5, kFpcrRModeNegInf), MakeF64x2(-8.00, -7.00));
2158 __uint128_t arg6 = MakeF64x2(-6.50, -6.25);
2159 ASSERT_EQ(AsmFrintx(arg6, kFpcrRModeNegInf), MakeF64x2(-7.00, -7.00));
2160 __uint128_t arg7 = MakeF64x2(6.25, 6.50);
2161 ASSERT_EQ(AsmFrintx(arg7, kFpcrRModeNegInf), MakeF64x2(6.00, 6.00));
2162 __uint128_t arg8 = MakeF64x2(6.75, 7.50);
2163 ASSERT_EQ(AsmFrintx(arg8, kFpcrRModeNegInf), MakeF64x2(6.00, 7.00));
2164 __uint128_t arg9 = MakeF64x2(-7.50, -6.75);
2165 ASSERT_EQ(AsmFrintx(arg9, kFpcrRModePosInf), MakeF64x2(-7.00, -6.00));
2166 __uint128_t arg10 = MakeF64x2(-6.50, -6.25);
2167 ASSERT_EQ(AsmFrintx(arg10, kFpcrRModePosInf), MakeF64x2(-6.00, -6.00));
2168 __uint128_t arg11 = MakeF64x2(6.25, 6.50);
2169 ASSERT_EQ(AsmFrintx(arg11, kFpcrRModePosInf), MakeF64x2(7.00, 7.00));
2170 __uint128_t arg12 = MakeF64x2(6.75, 7.50);
2171 ASSERT_EQ(AsmFrintx(arg12, kFpcrRModePosInf), MakeF64x2(7.00, 8.00));
2172 __uint128_t arg13 = MakeF64x2(-7.50, -6.75);
2173 ASSERT_EQ(AsmFrintx(arg13, kFpcrRModeZero), MakeF64x2(-7.00, -6.00));
2174 __uint128_t arg14 = MakeF64x2(-6.50, -6.25);
2175 ASSERT_EQ(AsmFrintx(arg14, kFpcrRModeZero), MakeF64x2(-6.00, -6.00));
2176 __uint128_t arg15 = MakeF64x2(6.25, 6.50);
2177 ASSERT_EQ(AsmFrintx(arg15, kFpcrRModeZero), MakeF64x2(6.00, 6.00));
2178 __uint128_t arg16 = MakeF64x2(6.75, 7.50);
2179 ASSERT_EQ(AsmFrintx(arg16, kFpcrRModeZero), MakeF64x2(6.00, 7.00));
2180 }
2181
Fp32Compare(uint64_t arg1,uint64_t arg2)2182 uint64_t Fp32Compare(uint64_t arg1, uint64_t arg2) {
2183 uint64_t res;
2184 asm("fcmp %s1, %s2\n\t"
2185 "mrs %x0, nzcv"
2186 : "=r"(res)
2187 : "w"(arg1), "w"(arg2));
2188 return res;
2189 }
2190
Fp64Compare(uint64_t arg1,uint64_t arg2)2191 uint64_t Fp64Compare(uint64_t arg1, uint64_t arg2) {
2192 uint64_t res;
2193 asm("fcmp %d1, %d2\n\t"
2194 "mrs %x0, nzcv"
2195 : "=r"(res)
2196 : "w"(arg1), "w"(arg2));
2197 return res;
2198 }
2199
MakeNZCV(uint64_t nzcv)2200 constexpr uint64_t MakeNZCV(uint64_t nzcv) {
2201 return nzcv << 28;
2202 }
2203
TEST(Arm64InsnTest,Fp32Compare)2204 TEST(Arm64InsnTest, Fp32Compare) {
2205 // NaN and 1.83
2206 ASSERT_EQ(Fp32Compare(0x7fc00000ULL, 0x3fea3d71ULL), MakeNZCV(0b0011));
2207
2208 // 6.31 == 6.31
2209 ASSERT_EQ(Fp32Compare(0x40c9eb85ULL, 0x40c9eb85ULL), MakeNZCV(0b0110));
2210
2211 // 1.23 < 2.34
2212 ASSERT_EQ(Fp32Compare(0x3f9d70a4ULL, 0x4015c28fULL), MakeNZCV(0b1000));
2213
2214 // 5.25 > 2.94
2215 ASSERT_EQ(Fp32Compare(0x40a80000ULL, 0x403c28f6ULL), MakeNZCV(0b0010));
2216 }
2217
TEST(Arm64InsnTest,Fp32CompareZero)2218 TEST(Arm64InsnTest, Fp32CompareZero) {
2219 constexpr auto Fp32CompareZero = ASM_INSN_WRAP_FUNC_R_RES_W_ARG(
2220 "fcmp %s1, #0.0\n\t"
2221 "mrs %x0, nzcv");
2222
2223 // NaN and 0.00
2224 ASSERT_EQ(Fp32CompareZero(0x7fa00000ULL), MakeNZCV(0b0011));
2225
2226 // 0.00 == 0.00
2227 ASSERT_EQ(Fp32CompareZero(0x00000000ULL), MakeNZCV(0b0110));
2228
2229 // -2.67 < 0.00
2230 ASSERT_EQ(Fp32CompareZero(0xc02ae148ULL), MakeNZCV(0b1000));
2231
2232 // 1.56 > 0.00
2233 ASSERT_EQ(Fp32CompareZero(0x3fc7ae14ULL), MakeNZCV(0b0010));
2234 }
2235
TEST(Arm64InsnTest,Fp64Compare)2236 TEST(Arm64InsnTest, Fp64Compare) {
2237 // NaN and 1.19
2238 ASSERT_EQ(Fp64Compare(0x7ff8000000000000ULL, 0x3ff30a3d70a3d70aULL), MakeNZCV(0b0011));
2239
2240 // 8.42 == 8.42
2241 ASSERT_EQ(Fp64Compare(0x4020d70a3d70a3d7ULL, 0x4020d70a3d70a3d7ULL), MakeNZCV(0b0110));
2242
2243 // 0.50 < 1.00
2244 ASSERT_EQ(Fp64Compare(0x3fe0000000000000ULL, 0x3ff0000000000000ULL), MakeNZCV(0b1000));
2245
2246 // 7.38 > 1.54
2247 ASSERT_EQ(Fp64Compare(0x401d851eb851eb85ULL, 0x3ff8a3d70a3d70a4ULL), MakeNZCV(0b0010));
2248 }
2249
TEST(Arm64InsnTest,Fp64CompareZero)2250 TEST(Arm64InsnTest, Fp64CompareZero) {
2251 constexpr auto Fp64CompareZero = ASM_INSN_WRAP_FUNC_R_RES_W_ARG(
2252 "fcmp %d1, #0.0\n\t"
2253 "mrs %x0, nzcv");
2254
2255 // NaN and 0.00
2256 ASSERT_EQ(Fp64CompareZero(0x7ff4000000000000ULL), MakeNZCV(0b0011));
2257
2258 // 0.00 == 0.00
2259 ASSERT_EQ(Fp64CompareZero(0x0000000000000000ULL), MakeNZCV(0b0110));
2260
2261 // -7.23 < 0.00
2262 ASSERT_EQ(Fp64CompareZero(0xc01ceb851eb851ecULL), MakeNZCV(0b1000));
2263
2264 // 5.39 > 0.00
2265 ASSERT_EQ(Fp64CompareZero(0x40158f5c28f5c28fULL), MakeNZCV(0b0010));
2266 }
2267
Fp32CompareIfEqualOrSetAllFlags(float arg1,float arg2,uint64_t nzcv)2268 uint64_t Fp32CompareIfEqualOrSetAllFlags(float arg1, float arg2, uint64_t nzcv) {
2269 asm("msr nzcv, %x0\n\t"
2270 "fccmp %s2, %s3, #15, eq\n\t"
2271 "mrs %x0, nzcv\n\t"
2272 : "=r"(nzcv)
2273 : "0"(nzcv), "w"(arg1), "w"(arg2));
2274 return nzcv;
2275 }
2276
TEST(Arm64InsnTest,Fp32ConditionalCompare)2277 TEST(Arm64InsnTest, Fp32ConditionalCompare) {
2278 // Comparison is performed.
2279 constexpr uint64_t kEqual = MakeNZCV(0b0100);
2280 constexpr float kNan = std::numeric_limits<float>::quiet_NaN();
2281 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 1.0f, kEqual), MakeNZCV(0b0110));
2282 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 2.0f, kEqual), MakeNZCV(0b1000));
2283 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(2.0f, 1.0f, kEqual), MakeNZCV(0b0010));
2284 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(kNan, 1.0f, kEqual), MakeNZCV(0b0011));
2285 // Comparison is not performed; alt-nzcv is returned.
2286 constexpr uint64_t kNotEqual = MakeNZCV(0b0000);
2287 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 1.0f, kNotEqual), MakeNZCV(0b1111));
2288 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 2.0f, kNotEqual), MakeNZCV(0b1111));
2289 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(2.0f, 1.0f, kNotEqual), MakeNZCV(0b1111));
2290 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(kNan, 1.0f, kNotEqual), MakeNZCV(0b1111));
2291 }
2292
Fp64CompareIfEqualOrSetAllFlags(double arg1,double arg2,uint64_t nzcv)2293 uint64_t Fp64CompareIfEqualOrSetAllFlags(double arg1, double arg2, uint64_t nzcv) {
2294 asm("msr nzcv, %x0\n\t"
2295 "fccmp %d2, %d3, #15, eq\n\t"
2296 "mrs %x0, nzcv\n\t"
2297 : "=r"(nzcv)
2298 : "0"(nzcv), "w"(arg1), "w"(arg2));
2299 return nzcv;
2300 }
2301
TEST(Arm64InsnTest,Fp64ConditionalCompare)2302 TEST(Arm64InsnTest, Fp64ConditionalCompare) {
2303 // Comparison is performed.
2304 constexpr uint64_t kEqual = MakeNZCV(0b0100);
2305 constexpr double kNan = std::numeric_limits<double>::quiet_NaN();
2306 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 1.0, kEqual), MakeNZCV(0b0110));
2307 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 2.0, kEqual), MakeNZCV(0b1000));
2308 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(2.0, 1.0, kEqual), MakeNZCV(0b0010));
2309 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(kNan, 1.0, kEqual), MakeNZCV(0b0011));
2310 // Comparison is not performed; alt-nzcv is returned.
2311 constexpr uint64_t kNotEqual = MakeNZCV(0b0000);
2312 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 1.0, kNotEqual), MakeNZCV(0b1111));
2313 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 2.0, kNotEqual), MakeNZCV(0b1111));
2314 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(2.0, 1.0, kNotEqual), MakeNZCV(0b1111));
2315 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(kNan, 1.0f, kNotEqual), MakeNZCV(0b1111));
2316 }
2317
TEST(Arm64InsnTest,ConvertFp32ToFp64)2318 TEST(Arm64InsnTest, ConvertFp32ToFp64) {
2319 uint64_t arg = 0x40cd70a4ULL; // 6.42 in float
2320 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %d0, %s1")(arg);
2321 ASSERT_EQ(res, MakeUInt128(0x4019ae1480000000ULL, 0U));
2322 }
2323
TEST(Arm64InsnTest,ConvertFp64ToFp32)2324 TEST(Arm64InsnTest, ConvertFp64ToFp32) {
2325 uint64_t arg = 0x401a0a3d70a3d70aULL; // 6.51 in double
2326 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %s0, %d1")(arg);
2327 ASSERT_EQ(res, MakeUInt128(0x40d051ecULL, 0U));
2328 }
2329
TEST(Arm64InsnTest,ConvertFp32ToFp16)2330 TEST(Arm64InsnTest, ConvertFp32ToFp16) {
2331 constexpr auto AsmFcvt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %h0, %s1");
2332 EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(2.5f)), MakeUInt128(0x4100U, 0U));
2333 EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(4.5f)), MakeUInt128(0x4480U, 0U));
2334 EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(8.5f)), MakeUInt128(0x4840U, 0U));
2335 EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(16.5f)), MakeUInt128(0x4c20U, 0U));
2336 }
2337
TEST(Arm64InsnTest,ConvertFp16ToFp32)2338 TEST(Arm64InsnTest, ConvertFp16ToFp32) {
2339 uint64_t arg = 0x4100U;
2340 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %s0, %h1")(arg);
2341 ASSERT_EQ(res, bit_cast<uint32_t>(2.5f));
2342 }
2343
TEST(Arm64InsnTest,ConvertFp64ToFp16)2344 TEST(Arm64InsnTest, ConvertFp64ToFp16) {
2345 uint64_t arg = bit_cast<uint64_t>(2.5);
2346 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %h0, %d1")(arg);
2347 ASSERT_EQ(res, MakeUInt128(0x4100U, 0U));
2348 }
2349
TEST(Arm64InsnTest,ConvertFp16ToFp64)2350 TEST(Arm64InsnTest, ConvertFp16ToFp64) {
2351 uint64_t arg = 0x4100U;
2352 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %d0, %h1")(arg);
2353 ASSERT_EQ(res, bit_cast<uint64_t>(2.5));
2354 }
2355
TEST(Arm64InsnTest,ConvertToNarrowF64F32x2)2356 TEST(Arm64InsnTest, ConvertToNarrowF64F32x2) {
2357 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtn %0.2s, %1.2d");
2358 ASSERT_EQ(AsmFcvtn(MakeF64x2(2.0, 3.0)), MakeF32x4(2.0f, 3.0f, 0.0f, 0.0f));
2359 // Overflow or inf arguments result in inf.
2360 __uint128_t res = AsmFcvtn(
2361 MakeF64x2(std::numeric_limits<double>::max(), std::numeric_limits<double>::infinity()));
2362 ASSERT_EQ(res,
2363 MakeF32x4(std::numeric_limits<float>::infinity(),
2364 std::numeric_limits<float>::infinity(),
2365 0.0f,
2366 0.0f));
2367 res = AsmFcvtn(
2368 MakeF64x2(std::numeric_limits<double>::lowest(), -std::numeric_limits<double>::infinity()));
2369 ASSERT_EQ(res,
2370 MakeF32x4(-std::numeric_limits<float>::infinity(),
2371 -std::numeric_limits<float>::infinity(),
2372 0.0f,
2373 0.0f));
2374 }
2375
TEST(Arm64InsnTest,ConvertToNarrowF64F32x2Upper)2376 TEST(Arm64InsnTest, ConvertToNarrowF64F32x2Upper) {
2377 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtn2 %0.4s, %1.2d");
2378 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
2379 __uint128_t arg2 = MakeF32x4(4.0f, 5.0f, 6.0f, 7.0f);
2380 ASSERT_EQ(AsmFcvtn(arg1, arg2), MakeF32x4(4.0f, 5.0f, 2.0f, 3.0f));
2381 }
2382
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32)2383 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32) {
2384 constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtxn %s0, %d1");
2385 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(2.0)), bit_cast<uint32_t>(2.0f));
2386 // Overflow is saturated.
2387 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::max())),
2388 bit_cast<uint32_t>(std::numeric_limits<float>::max()));
2389 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::lowest())),
2390 bit_cast<uint32_t>(std::numeric_limits<float>::lowest()));
2391 // inf is converted to inf.
2392 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::infinity())),
2393 bit_cast<uint32_t>(std::numeric_limits<float>::infinity()));
2394 // -inf is converted to -inf.
2395 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(-std::numeric_limits<double>::infinity())),
2396 bit_cast<uint32_t>(-std::numeric_limits<float>::infinity()));
2397 }
2398
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32x2)2399 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32x2) {
2400 constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtxn %0.2s, %1.2d");
2401 __uint128_t res = AsmFcvtxn(MakeF64x2(2.0, 3.0));
2402 ASSERT_EQ(res, MakeF32x4(2.0f, 3.0f, 0.0f, 0.0f));
2403 }
2404
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32x2Upper)2405 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32x2Upper) {
2406 constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtxn2 %0.4s, %1.2d");
2407 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
2408 __uint128_t arg2 = MakeF32x4(4.0f, 5.0f, 6.0f, 7.0f);
2409 ASSERT_EQ(AsmFcvtxn(arg1, arg2), MakeF32x4(4.0f, 5.0f, 2.0f, 3.0f));
2410 }
2411
TEST(Arm64InsnTest,ConvertToWiderF32F64x2Lower)2412 TEST(Arm64InsnTest, ConvertToWiderF32F64x2Lower) {
2413 constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl %0.2d, %1.2s");
2414 __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2415 ASSERT_EQ(AsmFcvtl(arg), MakeF64x2(2.0, 3.0));
2416 }
2417
TEST(Arm64InsnTest,ConvertToWiderF32F64x2Upper)2418 TEST(Arm64InsnTest, ConvertToWiderF32F64x2Upper) {
2419 constexpr auto AsmFcvtl2 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl2 %0.2d, %1.4s");
2420 __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2421 ASSERT_EQ(AsmFcvtl2(arg), MakeF64x2(4.0, 5.0));
2422 }
2423
TEST(Arm64InsnTest,ConvertToWiderF16F32x4Lower)2424 TEST(Arm64InsnTest, ConvertToWiderF16F32x4Lower) {
2425 constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl %0.4s, %1.4h");
2426 // 4xF16 in the lower half.
2427 __uint128_t arg = MakeUInt128(0x4c20'4840'4480'4100ULL, 0);
2428 ASSERT_EQ(AsmFcvtl(arg), MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f));
2429 }
2430
TEST(Arm64InsnTest,ConvertToWiderF16F32x4Upper)2431 TEST(Arm64InsnTest, ConvertToWiderF16F32x4Upper) {
2432 constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl2 %0.4s, %1.8h");
2433 // 4xF16 in the upper half.
2434 __uint128_t arg = MakeUInt128(0, 0x4c20'4840'4480'4100ULL);
2435 ASSERT_EQ(AsmFcvtl(arg), MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f));
2436 }
2437
TEST(Arm64InsnTest,ConvertToNarrowF32F16x4Lower)2438 TEST(Arm64InsnTest, ConvertToNarrowF32F16x4Lower) {
2439 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtn %0.4h, %1.4s");
2440 __uint128_t arg = MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f);
2441 // 4xF16 in the lower half.
2442 ASSERT_EQ(AsmFcvtn(arg), MakeUInt128(0x4c20'4840'4480'4100ULL, 0));
2443 }
2444
TEST(Arm64InsnTest,ConvertToNarrowF32F16x4Upper)2445 TEST(Arm64InsnTest, ConvertToNarrowF32F16x4Upper) {
2446 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtn2 %0.8h, %1.4s");
2447 __uint128_t arg1 = MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f);
2448 __uint128_t arg2 = MakeF32x4(3.0f, 5.0f, 7.0f, 11.0f);
2449 // 4xF16 in the upper half, lower half preserved.
2450 ASSERT_EQ(AsmFcvtn(arg1, arg2), MakeUInt128(uint64_t(arg2), 0x4c20'4840'4480'4100ULL));
2451 }
2452
TEST(Arm64InsnTest,AbsF32)2453 TEST(Arm64InsnTest, AbsF32) {
2454 uint32_t arg = 0xc1273333U; // -10.45 in float
2455 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %s0, %s1")(arg);
2456 ASSERT_EQ(res, MakeUInt128(0x41273333ULL, 0U)); // 10.45 in float
2457 }
2458
TEST(Arm64InsnTest,AbsF64)2459 TEST(Arm64InsnTest, AbsF64) {
2460 uint64_t arg = 0xc03de8f5c28f5c29ULL; // -29.91 in double
2461 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %d0, %d1")(arg);
2462 ASSERT_EQ(res, MakeUInt128(0x403de8f5c28f5c29ULL, 0U)); // 29.91 in double
2463 }
2464
TEST(Arm64InsnTest,AbsF32x4)2465 TEST(Arm64InsnTest, AbsF32x4) {
2466 constexpr auto AsmFabs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %0.4s, %1.4s");
2467 __uint128_t arg = MakeF32x4(-0.0f, 0.0f, 3.0f, -7.0f);
2468 ASSERT_EQ(AsmFabs(arg), MakeF32x4(0.0f, 0.0f, 3.0f, 7.0f));
2469 }
2470
TEST(Arm64InsnTest,AbsF64x2)2471 TEST(Arm64InsnTest, AbsF64x2) {
2472 constexpr auto AsmFabs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %0.2d, %1.2d");
2473 __uint128_t arg = MakeF64x2(-0.0, 3.0);
2474 ASSERT_EQ(AsmFabs(arg), MakeF64x2(0.0, 3.0));
2475 }
2476
TEST(Arm64InsnTest,AbdF32)2477 TEST(Arm64InsnTest, AbdF32) {
2478 uint32_t arg1 = 0x4181851fU; // 16.19 in float
2479 uint32_t arg2 = 0x41211eb8U; // 10.06 in float
2480 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %s0, %s1, %s2")(arg1, arg2);
2481 ASSERT_EQ(res, MakeUInt128(0x40c3d70cULL, 0U)); // 6.12 in float
2482 }
2483
TEST(Arm64InsnTest,AbdF64)2484 TEST(Arm64InsnTest, AbdF64) {
2485 constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %d0, %d1, %d2");
2486 uint64_t arg1 = 0x403828f5c28f5c29U; // 24.16 in double
2487 uint64_t arg2 = 0x4027d70a3d70a3d7U; // 11.92 in double
2488 __uint128_t res = AsmFabd(arg1, arg2);
2489 ASSERT_EQ(res, MakeUInt128(0x40287ae147ae147bULL, 0U)); // 12.24 in double
2490 }
2491
TEST(Arm64InsnTest,AbdF32x4)2492 TEST(Arm64InsnTest, AbdF32x4) {
2493 constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %0.4s, %1.4s, %2.4s");
2494 __uint128_t arg1 = MakeF32x4(1.0f, 5.0f, -3.0f, -2.0f);
2495 __uint128_t arg2 = MakeF32x4(-1.0f, 2.0f, -5.0f, 3.0f);
2496 __uint128_t res = AsmFabd(arg1, arg2);
2497 ASSERT_EQ(res, MakeF32x4(2.0f, 3.0f, 2.0f, 5.0f));
2498 }
2499
TEST(Arm64InsnTest,AbdF64x2)2500 TEST(Arm64InsnTest, AbdF64x2) {
2501 constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %0.2d, %1.2d, %2.2d");
2502 __uint128_t arg1 = MakeF64x2(5.0, -2.0);
2503 __uint128_t arg2 = MakeF64x2(4.0, 3.0);
2504 __uint128_t res = AsmFabd(arg1, arg2);
2505 ASSERT_EQ(res, MakeF64x2(1.0, 5.0));
2506 }
2507
TEST(Arm64InsnTest,NegF32)2508 TEST(Arm64InsnTest, NegF32) {
2509 uint32_t arg = 0x40eeb852U; // 7.46 in float
2510 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %s0, %s1")(arg);
2511 ASSERT_EQ(res, MakeUInt128(0xc0eeb852ULL, 0U)); // -7.46 in float
2512 }
2513
TEST(Arm64InsnTest,NegF64)2514 TEST(Arm64InsnTest, NegF64) {
2515 uint64_t arg = 0x4054b28f5c28f5c3ULL; // 82.79 in double
2516 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %d0, %d1")(arg);
2517 ASSERT_EQ(res, MakeUInt128(0xc054b28f5c28f5c3ULL, 0U)); // -82.79 in double
2518 }
2519
TEST(Arm64InsnTest,NegF32x4)2520 TEST(Arm64InsnTest, NegF32x4) {
2521 constexpr auto AsmFneg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %0.4s, %1.4s");
2522 __uint128_t arg = MakeF32x4(-0.0f, 0.0f, 1.0f, -3.0f);
2523 ASSERT_EQ(AsmFneg(arg), MakeF32x4(0.0f, -0.0f, -1.0f, 3.0f));
2524 }
2525
TEST(Arm64InsnTest,NegF64x2)2526 TEST(Arm64InsnTest, NegF64x2) {
2527 constexpr auto AsmFneg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %0.2d, %1.2d");
2528 __uint128_t arg = MakeF64x2(0.0, 3.0);
2529 ASSERT_EQ(AsmFneg(arg), MakeF64x2(-0.0, -3.0));
2530 }
2531
TEST(Arm64InsnTest,SqrtF32)2532 TEST(Arm64InsnTest, SqrtF32) {
2533 uint32_t arg = 0x41f3cac1U; // 30.474 in float
2534 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %s0, %s1")(arg);
2535 ASSERT_EQ(res, MakeUInt128(0x40b0a683ULL, 0U)); // 5.5203261 in float
2536 }
2537
TEST(Arm64InsnTest,SqrtF64)2538 TEST(Arm64InsnTest, SqrtF64) {
2539 uint64_t arg = 0x403d466666666666ULL; // 29.275 in double
2540 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %d0, %d1")(arg);
2541 ASSERT_EQ(res, MakeUInt128(0x4015a47e3392efb8ULL, 0U)); // 5.41... in double
2542 }
2543
TEST(Arm64InsnTest,SqrtF32x4)2544 TEST(Arm64InsnTest, SqrtF32x4) {
2545 constexpr auto AsmSqrt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %0.4s, %1.4s");
2546 __uint128_t arg = MakeF32x4(0.0f, 1.0f, 4.0f, 9.0f);
2547 ASSERT_EQ(AsmSqrt(arg), MakeF32x4(0.0f, 1.0f, 2.0f, 3.0f));
2548 }
2549
TEST(Arm64InsnTest,RecipEstimateF32)2550 TEST(Arm64InsnTest, RecipEstimateF32) {
2551 constexpr auto AsmFrecpe = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frecpe %s0, %s1");
2552 ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(0.25f)), bit_cast<uint32_t>(3.9921875f));
2553 ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(0.50f)), bit_cast<uint32_t>(1.99609375f));
2554 ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(2.00f)), bit_cast<uint32_t>(0.4990234375f));
2555 ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(4.00f)), bit_cast<uint32_t>(0.24951171875f));
2556 }
2557
TEST(Arm64InsnTest,RecipEstimateF32x4)2558 TEST(Arm64InsnTest, RecipEstimateF32x4) {
2559 constexpr auto AsmFrecpe = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frecpe %0.4s, %1.4s");
2560 __uint128_t res = AsmFrecpe(MakeF32x4(0.25f, 0.50f, 2.00f, 4.00f));
2561 ASSERT_EQ(res, MakeF32x4(3.9921875f, 1.99609375f, 0.4990234375f, 0.24951171875f));
2562 }
2563
TEST(Arm64InsnTest,RecipStepF32)2564 TEST(Arm64InsnTest, RecipStepF32) {
2565 constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %s0, %s1, %s2");
2566 __uint128_t res1 = AsmFrecps(bit_cast<uint32_t>(1.50f), bit_cast<uint32_t>(0.50f));
2567 ASSERT_EQ(res1, bit_cast<uint32_t>(1.25f));
2568 __uint128_t res2 = AsmFrecps(bit_cast<uint32_t>(2.00f), bit_cast<uint32_t>(0.50f));
2569 ASSERT_EQ(res2, bit_cast<uint32_t>(1.00f));
2570 __uint128_t res3 = AsmFrecps(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.25f));
2571 ASSERT_EQ(res3, bit_cast<uint32_t>(1.25f));
2572 __uint128_t res4 = AsmFrecps(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.50f));
2573 ASSERT_EQ(res4, bit_cast<uint32_t>(0.50f));
2574 }
2575
TEST(Arm64InsnTest,RecipStepF64)2576 TEST(Arm64InsnTest, RecipStepF64) {
2577 constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %d0, %d1, %d2");
2578 __uint128_t res1 = AsmFrecps(bit_cast<uint64_t>(1.50), bit_cast<uint64_t>(0.50));
2579 ASSERT_EQ(res1, bit_cast<uint64_t>(1.25));
2580 __uint128_t res2 = AsmFrecps(bit_cast<uint64_t>(2.00), bit_cast<uint64_t>(0.50));
2581 ASSERT_EQ(res2, bit_cast<uint64_t>(1.00));
2582 __uint128_t res3 = AsmFrecps(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.25));
2583 ASSERT_EQ(res3, bit_cast<uint64_t>(1.25));
2584 __uint128_t res4 = AsmFrecps(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.50));
2585 ASSERT_EQ(res4, bit_cast<uint64_t>(0.50));
2586 }
2587
TEST(Arm64InsnTest,RecipStepF32x4)2588 TEST(Arm64InsnTest, RecipStepF32x4) {
2589 constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %0.4s, %1.4s, %2.4s");
2590 __uint128_t arg1 = MakeF32x4(1.50f, 2.00f, 3.00f, 3.00f);
2591 __uint128_t arg2 = MakeF32x4(0.50f, 0.50f, 0.25f, 0.50f);
2592 __uint128_t res = AsmFrecps(arg1, arg2);
2593 ASSERT_EQ(res, MakeF32x4(1.25f, 1.00f, 1.25f, 0.50f));
2594 }
2595
TEST(Arm64InsnTest,RecipStepF64x2)2596 TEST(Arm64InsnTest, RecipStepF64x2) {
2597 constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %0.2d, %1.2d, %2.2d");
2598 __uint128_t arg1 = MakeF64x2(1.50, 2.00);
2599 __uint128_t arg2 = MakeF64x2(0.50, 0.50);
2600 ASSERT_EQ(AsmFrecps(arg1, arg2), MakeF64x2(1.25, 1.00));
2601 __uint128_t arg3 = MakeF64x2(3.00, 3.00);
2602 __uint128_t arg4 = MakeF64x2(0.25, 0.50);
2603 ASSERT_EQ(AsmFrecps(arg3, arg4), MakeF64x2(1.25, 0.50));
2604 }
2605
TEST(Arm64InsnTest,RecipSqrtEstimateF32)2606 TEST(Arm64InsnTest, RecipSqrtEstimateF32) {
2607 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %s0, %s1");
2608 ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(2.0f)), bit_cast<uint32_t>(0.705078125f));
2609 ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(3.0f)), bit_cast<uint32_t>(0.576171875f));
2610 ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(4.0f)), bit_cast<uint32_t>(0.4990234375f));
2611 ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(5.0f)), bit_cast<uint32_t>(0.4462890625f));
2612 }
2613
TEST(Arm64InsnTest,RecipSqrtEstimateF32x4)2614 TEST(Arm64InsnTest, RecipSqrtEstimateF32x4) {
2615 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.4s, %1.4s");
2616 __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2617 __uint128_t res = AsmFrsqrte(arg);
2618 ASSERT_EQ(res, MakeF32x4(0.705078125f, 0.576171875f, 0.4990234375f, 0.4462890625f));
2619 }
2620
TEST(Arm64InsnTest,RecipSqrtEstimateF64)2621 TEST(Arm64InsnTest, RecipSqrtEstimateF64) {
2622 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %d0, %d1");
2623 ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(2.0)), bit_cast<uint64_t>(0.705078125));
2624 ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(3.0)), bit_cast<uint64_t>(0.576171875));
2625 ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(4.0)), bit_cast<uint64_t>(0.4990234375));
2626 ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(5.0)), bit_cast<uint64_t>(0.4462890625));
2627 }
2628
TEST(Arm64InsnTest,RecipSqrtEstimateF64x2)2629 TEST(Arm64InsnTest, RecipSqrtEstimateF64x2) {
2630 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.2d, %1.2d");
2631 __uint128_t arg = MakeF64x2(2.0, 3.0);
2632 __uint128_t res = AsmFrsqrte(arg);
2633 ASSERT_EQ(res, MakeUInt128(bit_cast<uint64_t>(0.705078125), bit_cast<uint64_t>(0.576171875)));
2634 }
2635
TEST(Arm64InsnTest,RecipSqrtStepF32)2636 TEST(Arm64InsnTest, RecipSqrtStepF32) {
2637 constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %s0, %s1, %s2");
2638 __uint128_t res1 = AsmFrsqrts(bit_cast<uint32_t>(1.50f), bit_cast<uint32_t>(0.50f));
2639 ASSERT_EQ(res1, bit_cast<uint32_t>(1.125f));
2640 __uint128_t res2 = AsmFrsqrts(bit_cast<uint32_t>(2.00f), bit_cast<uint32_t>(0.50f));
2641 ASSERT_EQ(res2, bit_cast<uint32_t>(1.000f));
2642 __uint128_t res3 = AsmFrsqrts(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.25f));
2643 ASSERT_EQ(res3, bit_cast<uint32_t>(1.125f));
2644 __uint128_t res4 = AsmFrsqrts(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.50f));
2645 ASSERT_EQ(res4, bit_cast<uint32_t>(0.750f));
2646 }
2647
TEST(Arm64InsnTest,RecipSqrtStepF64)2648 TEST(Arm64InsnTest, RecipSqrtStepF64) {
2649 constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %d0, %d1, %d2");
2650 __uint128_t res1 = AsmFrsqrts(bit_cast<uint64_t>(1.50), bit_cast<uint64_t>(0.50));
2651 ASSERT_EQ(res1, bit_cast<uint64_t>(1.125));
2652 __uint128_t res2 = AsmFrsqrts(bit_cast<uint64_t>(2.00), bit_cast<uint64_t>(0.50));
2653 ASSERT_EQ(res2, bit_cast<uint64_t>(1.000));
2654 __uint128_t res3 = AsmFrsqrts(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.25));
2655 ASSERT_EQ(res3, bit_cast<uint64_t>(1.125));
2656 __uint128_t res4 = AsmFrsqrts(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.50));
2657 ASSERT_EQ(res4, bit_cast<uint64_t>(0.750));
2658 }
2659
TEST(Arm64InsnTest,RecipSqrtStepF32x4)2660 TEST(Arm64InsnTest, RecipSqrtStepF32x4) {
2661 constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %0.4s, %1.4s, %2.4s");
2662 __uint128_t arg1 = MakeF32x4(1.50f, 2.00f, 3.00f, 3.00f);
2663 __uint128_t arg2 = MakeF32x4(0.50f, 0.50f, 0.25f, 0.50f);
2664 __uint128_t res = AsmFrsqrts(arg1, arg2);
2665 ASSERT_EQ(res, MakeF32x4(1.125f, 1.000f, 1.125f, 0.750f));
2666 }
2667
TEST(Arm64InsnTest,RecipSqrtStepF64x2)2668 TEST(Arm64InsnTest, RecipSqrtStepF64x2) {
2669 constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %0.2d, %1.2d, %2.2d");
2670 __uint128_t arg1 = MakeF64x2(1.50, 2.00);
2671 __uint128_t arg2 = MakeF64x2(0.50, 0.50);
2672 ASSERT_EQ(AsmFrsqrts(arg1, arg2), MakeF64x2(1.125, 1.000));
2673 __uint128_t arg3 = MakeF64x2(3.00, 3.00);
2674 __uint128_t arg4 = MakeF64x2(0.25, 0.50);
2675 ASSERT_EQ(AsmFrsqrts(arg3, arg4), MakeF64x2(1.125, 0.750));
2676 }
2677
TEST(Arm64InsnTest,AddFp32)2678 TEST(Arm64InsnTest, AddFp32) {
2679 uint64_t fp_arg1 = 0x40d5c28fULL; // 6.68 in float
2680 uint64_t fp_arg2 = 0x409f5c29ULL; // 4.98 in float
2681 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %s0, %s1, %s2")(fp_arg1, fp_arg2);
2682 ASSERT_EQ(rd, MakeUInt128(0x413a8f5cULL, 0U)); // 11.66 in float
2683 }
2684
TEST(Arm64InsnTest,AddFp64)2685 TEST(Arm64InsnTest, AddFp64) {
2686 uint64_t fp_arg1 = 0x402099999999999aULL; // 8.30 in double
2687 uint64_t fp_arg2 = 0x4010ae147ae147aeULL; // 4.17 in double
2688 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %d0, %d1, %d2")(fp_arg1, fp_arg2);
2689 ASSERT_EQ(rd, MakeUInt128(0x4028f0a3d70a3d71ULL, 0U)); // 12.47 in double
2690 }
2691
TEST(Arm64InsnTest,AddF32x4)2692 TEST(Arm64InsnTest, AddF32x4) {
2693 constexpr auto AsmFadd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %0.4s, %1.4s, %2.4s");
2694 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2695 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2696 ASSERT_EQ(AsmFadd(arg1, arg2), MakeF32x4(3.0f, 3.0f, -1.0f, 5.0f));
2697 }
2698
TEST(Arm64InsnTest,AddF64x2)2699 TEST(Arm64InsnTest, AddF64x2) {
2700 constexpr auto AsmFadd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %0.2d, %1.2d, %2.2d");
2701 __uint128_t arg1 = MakeF64x2(3.0, 5.0);
2702 __uint128_t arg2 = MakeF64x2(-4.0, 2.0);
2703 ASSERT_EQ(AsmFadd(arg1, arg2), MakeF64x2(-1.0, 7.0));
2704 }
2705
TEST(Arm64InsnTest,AddPairwiseF32x2)2706 TEST(Arm64InsnTest, AddPairwiseF32x2) {
2707 constexpr auto AsmFaddp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("faddp %s0, %1.2s");
2708 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 8.0f);
2709 ASSERT_EQ(AsmFaddp(arg1), bit_cast<uint32_t>(3.0f));
2710 }
2711
TEST(Arm64InsnTest,AddPairwiseF32x4)2712 TEST(Arm64InsnTest, AddPairwiseF32x4) {
2713 constexpr auto AsmFaddp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("faddp %0.4s, %1.4s, %2.4s");
2714 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2715 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2716 ASSERT_EQ(AsmFaddp(arg1, arg2), MakeF32x4(-1.0f, 7.0f, 7.0f, -3.0f));
2717 }
2718
TEST(Arm64InsnTest,SubFp32)2719 TEST(Arm64InsnTest, SubFp32) {
2720 uint64_t fp_arg1 = 0x411f5c29ULL; // 9.96 in float
2721 uint64_t fp_arg2 = 0x404851ecULL; // 3.13 in float
2722 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %s0, %s1, %s2")(fp_arg1, fp_arg2);
2723 ASSERT_EQ(rd, MakeUInt128(0x40da8f5cULL, 0U)); // 6.83 in float
2724 }
2725
TEST(Arm64InsnTest,SubFp64)2726 TEST(Arm64InsnTest, SubFp64) {
2727 uint64_t fp_arg1 = 0x401ee147ae147ae1ULL; // 7.72 in double
2728 uint64_t fp_arg2 = 0x4015666666666666ULL; // 5.35 in double
2729 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %d0, %d1, %d2")(fp_arg1, fp_arg2);
2730 ASSERT_EQ(rd, MakeUInt128(0x4002f5c28f5c28f6ULL, 0U)); // 2.37 in double
2731 }
2732
TEST(Arm64InsnTest,SubF32x4)2733 TEST(Arm64InsnTest, SubF32x4) {
2734 constexpr auto AsmFsub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %0.4s, %1.4s, %2.4s");
2735 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2736 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2737 ASSERT_EQ(AsmFsub(arg1, arg2), MakeF32x4(-9.0f, 1.0f, 15.0f, -5.0f));
2738 }
2739
TEST(Arm64InsnTest,SubF64x2)2740 TEST(Arm64InsnTest, SubF64x2) {
2741 constexpr auto AsmFsub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %0.2d, %1.2d, %2.2d");
2742 __uint128_t arg1 = MakeF64x2(3.0, 5.0);
2743 __uint128_t arg2 = MakeF64x2(-4.0, 2.0);
2744 ASSERT_EQ(AsmFsub(arg1, arg2), MakeF64x2(7.0, 3.0));
2745 }
2746
TEST(Arm64InsnTest,MaxFp32)2747 TEST(Arm64InsnTest, MaxFp32) {
2748 constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %s0, %s1, %s2");
2749 uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2750 uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2751
2752 ASSERT_EQ(AsmFmax(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_three, 0, 0, 0));
2753 ASSERT_EQ(AsmFmax(kDefaultNaN32, fp_arg_three), kDefaultNaN32);
2754 ASSERT_EQ(AsmFmax(fp_arg_three, kDefaultNaN32), kDefaultNaN32);
2755 }
2756
TEST(Arm64InsnTest,MaxFp64)2757 TEST(Arm64InsnTest, MaxFp64) {
2758 constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %d0, %d1, %d2");
2759 uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2760 uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2761
2762 ASSERT_EQ(AsmFmax(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_three, 0U));
2763 ASSERT_EQ(AsmFmax(kDefaultNaN64, fp_arg_three), kDefaultNaN64);
2764 ASSERT_EQ(AsmFmax(fp_arg_three, kDefaultNaN64), kDefaultNaN64);
2765 }
2766
TEST(Arm64InsnTest,MaxF32x4)2767 TEST(Arm64InsnTest, MaxF32x4) {
2768 constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %0.4s, %1.4s, %2.4s");
2769 __uint128_t arg1 = MakeF32x4(-0.0f, 2.0f, 3.0f, -4.0f);
2770 __uint128_t arg2 = MakeF32x4(0.0f, 1.0f, -3.0f, -3.0f);
2771 ASSERT_EQ(AsmFmax(arg1, arg2), MakeF32x4(0.0f, 2.0f, 3.0f, -3.0f));
2772
2773 __uint128_t arg3 = MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32), 3.0f, -4.0f);
2774 __uint128_t arg4 = MakeF32x4(0.0f, 1.0f, -3.0f, bit_cast<float>(kDefaultNaN32));
2775 ASSERT_EQ(AsmFmax(arg3, arg4),
2776 MakeF32x4(0.0f, bit_cast<float>(kDefaultNaN32), 3.0f, bit_cast<float>(kDefaultNaN32)));
2777 }
2778
TEST(Arm64InsnTest,MaxF64x2)2779 TEST(Arm64InsnTest, MaxF64x2) {
2780 constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %0.2d, %1.2d, %2.2d");
2781 __uint128_t arg1 = MakeF64x2(-0.0, 3.0);
2782 __uint128_t arg2 = MakeF64x2(0.0, -3.0);
2783 ASSERT_EQ(AsmFmax(arg1, arg2), MakeF64x2(0.0, 3.0));
2784
2785 __uint128_t arg3 = MakeF64x2(bit_cast<double>(kDefaultNaN64), 3.0);
2786 __uint128_t arg4 = MakeF64x2(1.0, bit_cast<double>(kDefaultNaN64));
2787 ASSERT_EQ(AsmFmax(arg3, arg4),
2788 MakeF64x2(bit_cast<double>(kDefaultNaN64), bit_cast<double>(kDefaultNaN64)));
2789 }
2790
TEST(Arm64InsnTest,MaxNumberFp32)2791 TEST(Arm64InsnTest, MaxNumberFp32) {
2792 constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %s0, %s1, %s2");
2793 uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2794 uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2795 uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2796
2797 ASSERT_EQ(AsmFmaxnm(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_three, 0, 0, 0));
2798
2799 ASSERT_EQ(AsmFmaxnm(fp_arg_two, kQuietNaN32), MakeU32x4(fp_arg_two, 0, 0, 0));
2800 ASSERT_EQ(AsmFmaxnm(fp_arg_minus_two, kQuietNaN32), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2801 ASSERT_EQ(AsmFmaxnm(kQuietNaN32, fp_arg_two), MakeU32x4(fp_arg_two, 0, 0, 0));
2802 ASSERT_EQ(AsmFmaxnm(kQuietNaN32, fp_arg_minus_two), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2803 }
2804
TEST(Arm64InsnTest,MaxNumberFp64)2805 TEST(Arm64InsnTest, MaxNumberFp64) {
2806 constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %d0, %d1, %d2");
2807 uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2808 uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2809 uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2810
2811 ASSERT_EQ(AsmFmaxnm(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_three, 0U));
2812
2813 ASSERT_EQ(AsmFmaxnm(fp_arg_two, kQuietNaN64), MakeUInt128(fp_arg_two, 0U));
2814 ASSERT_EQ(AsmFmaxnm(fp_arg_minus_two, kQuietNaN64), MakeUInt128(fp_arg_minus_two, 0));
2815 ASSERT_EQ(AsmFmaxnm(kQuietNaN64, fp_arg_two), MakeUInt128(fp_arg_two, 0));
2816 ASSERT_EQ(AsmFmaxnm(kQuietNaN64, fp_arg_minus_two), MakeUInt128(fp_arg_minus_two, 0));
2817 }
2818
TEST(Arm64InsnTest,MinNumberFp32)2819 TEST(Arm64InsnTest, MinNumberFp32) {
2820 constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %s0, %s1, %s2");
2821 uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2822 uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2823 uint32_t fp_arg_minus_two = bit_cast<uint32_t>(-2.0f);
2824
2825 ASSERT_EQ(AsmFminnm(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_two, 0, 0, 0));
2826
2827 ASSERT_EQ(AsmFminnm(fp_arg_two, kQuietNaN32), MakeU32x4(fp_arg_two, 0, 0, 0));
2828 ASSERT_EQ(AsmFminnm(fp_arg_minus_two, kQuietNaN32), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2829 ASSERT_EQ(AsmFminnm(kQuietNaN32, fp_arg_two), MakeU32x4(fp_arg_two, 0, 0, 0));
2830 ASSERT_EQ(AsmFminnm(kQuietNaN32, fp_arg_minus_two), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2831 }
2832
TEST(Arm64InsnTest,MinNumberFp64)2833 TEST(Arm64InsnTest, MinNumberFp64) {
2834 constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %d0, %d1, %d2");
2835 uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2836 uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2837 uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2838
2839 ASSERT_EQ(AsmFminnm(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_two, 0U));
2840
2841 ASSERT_EQ(AsmFminnm(fp_arg_two, kQuietNaN64), MakeUInt128(fp_arg_two, 0U));
2842 ASSERT_EQ(AsmFminnm(fp_arg_minus_two, kQuietNaN64), MakeUInt128(fp_arg_minus_two, 0));
2843 ASSERT_EQ(AsmFminnm(kQuietNaN64, fp_arg_two), MakeUInt128(fp_arg_two, 0));
2844 ASSERT_EQ(AsmFminnm(kQuietNaN64, fp_arg_minus_two), MakeUInt128(fp_arg_minus_two, 0));
2845 }
2846
TEST(Arm64InsnTest,MaxNumberF32x4)2847 TEST(Arm64InsnTest, MaxNumberF32x4) {
2848 constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %0.4s, %1.4s, %2.4s");
2849 __uint128_t arg1 = MakeF32x4(-1.0f, 2.0f, 3.0f, -4.0f);
2850 __uint128_t arg2 = MakeF32x4(2.0f, 1.0f, -3.0f, -3.0f);
2851 ASSERT_EQ(AsmFmaxnm(arg1, arg2), MakeF32x4(2.0f, 2.0f, 3.0f, -3.0f));
2852
2853 __uint128_t arg3 =
2854 MakeU32x4(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f), kQuietNaN32, kQuietNaN32);
2855 __uint128_t arg4 =
2856 MakeU32x4(kQuietNaN32, kQuietNaN32, bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f));
2857 ASSERT_EQ(AsmFmaxnm(arg3, arg4), MakeF32x4(1.0f, -1.0f, 1.0f, -1.0f));
2858
2859 __uint128_t arg5 = MakeU32x4(
2860 bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f), kSignalingNaN32_1, kQuietNaN32);
2861 __uint128_t arg6 = MakeU32x4(
2862 kSignalingNaN32_1, kQuietNaN32, bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f));
2863 }
2864
TEST(Arm64InsnTest,MaxNumberF64x2)2865 TEST(Arm64InsnTest, MaxNumberF64x2) {
2866 constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %0.2d, %1.2d, %2.2d");
2867 __uint128_t arg1 = MakeF64x2(-1.0, -4.0);
2868 __uint128_t arg2 = MakeF64x2(2.0, -3.0);
2869 ASSERT_EQ(AsmFmaxnm(arg1, arg2), MakeF64x2(2.0, -3.0));
2870
2871 __uint128_t arg3 = MakeUInt128(bit_cast<uint64_t>(1.0), kQuietNaN64);
2872 __uint128_t arg4 = MakeUInt128(kQuietNaN64, bit_cast<uint64_t>(-1.0));
2873 ASSERT_EQ(AsmFmaxnm(arg3, arg4), MakeF64x2(1.0, -1.0));
2874 }
2875
TEST(Arm64InsnTest,MinNumberF32x4)2876 TEST(Arm64InsnTest, MinNumberF32x4) {
2877 constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %0.4s, %1.4s, %2.4s");
2878 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
2879 __uint128_t arg2 = MakeF32x4(-0.0f, 1.0f, -3.0f, -3.0f);
2880 ASSERT_EQ(AsmFminnm(arg1, arg2), MakeF32x4(-0.0f, 1.0f, -3.0f, -4.0f));
2881
2882 __uint128_t arg3 =
2883 MakeU32x4(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f), kQuietNaN32, kQuietNaN32);
2884 __uint128_t arg4 =
2885 MakeU32x4(kQuietNaN32, kQuietNaN32, bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f));
2886 __uint128_t res = AsmFminnm(arg3, arg4);
2887 ASSERT_EQ(res, MakeF32x4(1.0f, -1.0f, 1.0f, -1.0f));
2888 }
2889
TEST(Arm64InsnTest,MinNumberF64x2)2890 TEST(Arm64InsnTest, MinNumberF64x2) {
2891 constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %0.2d, %1.2d, %2.2d");
2892 __uint128_t arg1 = MakeF64x2(0.0, 3.0);
2893 __uint128_t arg2 = MakeF64x2(-0.0, -3.0);
2894 ASSERT_EQ(AsmFminnm(arg1, arg2), MakeF64x2(-0.0, -3.0));
2895
2896 __uint128_t arg3 = MakeUInt128(bit_cast<uint64_t>(1.0), kQuietNaN64);
2897 __uint128_t arg4 = MakeUInt128(kQuietNaN64, bit_cast<uint64_t>(-1.0));
2898 __uint128_t res = AsmFminnm(arg3, arg4);
2899 ASSERT_EQ(res, MakeF64x2(1.0, -1.0));
2900 }
2901
TEST(Arm64InsnTest,MinFp32)2902 TEST(Arm64InsnTest, MinFp32) {
2903 constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %s0, %s1, %s2");
2904 uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2905 uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2906
2907 ASSERT_EQ(AsmFmin(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_two, 0, 0, 0));
2908 ASSERT_EQ(AsmFmin(kDefaultNaN32, fp_arg_three), kDefaultNaN32);
2909 ASSERT_EQ(AsmFmin(fp_arg_three, kDefaultNaN32), kDefaultNaN32);
2910 }
2911
TEST(Arm64InsnTest,MinFp64)2912 TEST(Arm64InsnTest, MinFp64) {
2913 constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %d0, %d1, %d2");
2914 uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2915 uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2916
2917 ASSERT_EQ(AsmFmin(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_two, 0U));
2918 ASSERT_EQ(AsmFmin(kDefaultNaN64, fp_arg_three), kDefaultNaN64);
2919 ASSERT_EQ(AsmFmin(fp_arg_three, kDefaultNaN64), kDefaultNaN64);
2920 }
2921
TEST(Arm64InsnTest,MinF32x4)2922 TEST(Arm64InsnTest, MinF32x4) {
2923 constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %0.4s, %1.4s, %2.4s");
2924 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
2925 __uint128_t arg2 = MakeF32x4(-0.0f, 1.0f, -3.0f, -3.0f);
2926 ASSERT_EQ(AsmFmin(arg1, arg2), MakeF32x4(-0.0f, 1.0f, -3.0f, -4.0f));
2927
2928 __uint128_t arg3 = MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32), 3.0f, -4.0f);
2929 __uint128_t arg4 = MakeF32x4(0.0f, 1.0f, -3.0f, bit_cast<float>(kDefaultNaN32));
2930 ASSERT_EQ(
2931 AsmFmin(arg3, arg4),
2932 MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32), -3.0f, bit_cast<float>(kDefaultNaN32)));
2933 }
2934
TEST(Arm64InsnTest,MinF64x2)2935 TEST(Arm64InsnTest, MinF64x2) {
2936 constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %0.2d, %1.2d, %2.2d");
2937 __uint128_t arg1 = MakeF64x2(0.0, 3.0);
2938 __uint128_t arg2 = MakeF64x2(-0.0, -3.0);
2939 ASSERT_EQ(AsmFmin(arg1, arg2), MakeF64x2(-0.0, -3.0));
2940
2941 __uint128_t arg3 = MakeF64x2(bit_cast<double>(kDefaultNaN64), 3.0);
2942 __uint128_t arg4 = MakeF64x2(1.0, bit_cast<double>(kDefaultNaN64));
2943 ASSERT_EQ(AsmFmin(arg3, arg4),
2944 MakeF64x2(bit_cast<double>(kDefaultNaN64), bit_cast<double>(kDefaultNaN64)));
2945 }
2946
TEST(Arm64InsnTest,MaxPairwiseF32Scalar)2947 TEST(Arm64InsnTest, MaxPairwiseF32Scalar) {
2948 constexpr auto AsmFmaxp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxp %s0, %1.2s");
2949 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2950 ASSERT_EQ(AsmFmaxp(arg1), bit_cast<uint32_t>(2.0f));
2951
2952 __uint128_t arg2 = MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, -0.0f);
2953 ASSERT_EQ(AsmFmaxp(arg2), kDefaultNaN32);
2954 }
2955
TEST(Arm64InsnTest,MaxPairwiseF32x4)2956 TEST(Arm64InsnTest, MaxPairwiseF32x4) {
2957 constexpr auto AsmFmaxp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxp %0.4s, %1.4s, %2.4s");
2958 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2959 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2960 ASSERT_EQ(AsmFmaxp(arg1, arg2), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
2961
2962 __uint128_t arg3 =
2963 MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, bit_cast<float>(kDefaultNaN32));
2964 __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2965 ASSERT_EQ(AsmFmaxp(arg3, arg4),
2966 MakeF32x4(bit_cast<float>(kDefaultNaN32), bit_cast<float>(kDefaultNaN32), 6.0f, 5.0f));
2967 }
2968
TEST(Arm64InsnTest,MinPairwiseF32Scalar)2969 TEST(Arm64InsnTest, MinPairwiseF32Scalar) {
2970 constexpr auto AsmFminp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminp %s0, %1.2s");
2971 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2972 ASSERT_EQ(AsmFminp(arg1), bit_cast<uint32_t>(-3.0f));
2973
2974 __uint128_t arg2 = MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, -0.0f);
2975 ASSERT_EQ(AsmFminp(arg2), kDefaultNaN32);
2976 }
2977
TEST(Arm64InsnTest,MinPairwiseF32x4)2978 TEST(Arm64InsnTest, MinPairwiseF32x4) {
2979 constexpr auto AsmFminp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminp %0.4s, %1.4s, %2.4s");
2980 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2981 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2982 ASSERT_EQ(AsmFminp(arg1, arg2), MakeF32x4(-3.0f, -0.0f, 1.0f, -8.0f));
2983
2984 __uint128_t arg3 =
2985 MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, bit_cast<float>(kDefaultNaN32));
2986 __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2987 ASSERT_EQ(AsmFminp(arg3, arg4),
2988 MakeF32x4(bit_cast<float>(kDefaultNaN32), bit_cast<float>(kDefaultNaN32), 1.0f, -8.0f));
2989 }
2990
TEST(Arm64InsnTest,MaxPairwiseNumberF32Scalar)2991 TEST(Arm64InsnTest, MaxPairwiseNumberF32Scalar) {
2992 constexpr auto AsmFmaxnmp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxnmp %s0, %1.2s");
2993 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2994 ASSERT_EQ(AsmFmaxnmp(arg1), bit_cast<uint32_t>(2.0f));
2995
2996 __uint128_t arg2 = MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, -0.0f);
2997 ASSERT_EQ(AsmFmaxnmp(arg2), bit_cast<uint32_t>(2.0f));
2998 }
2999
TEST(Arm64InsnTest,MaxPairwiseNumberF32x4)3000 TEST(Arm64InsnTest, MaxPairwiseNumberF32x4) {
3001 constexpr auto AsmFmaxnmp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnmp %0.4s, %1.4s, %2.4s");
3002 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3003 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3004 ASSERT_EQ(AsmFmaxnmp(arg1, arg2), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3005
3006 __uint128_t arg3 =
3007 MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, bit_cast<float>(kQuietNaN32));
3008 __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3009 ASSERT_EQ(AsmFmaxnmp(arg3, arg4), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3010 }
3011
TEST(Arm64InsnTest,MinPairwiseNumberF32Scalar)3012 TEST(Arm64InsnTest, MinPairwiseNumberF32Scalar) {
3013 constexpr auto AsmFminnmp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminnmp %s0, %1.2s");
3014 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3015 ASSERT_EQ(AsmFminnmp(arg1), bit_cast<uint32_t>(-3.0f));
3016
3017 __uint128_t arg2 = MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, -0.0f);
3018 ASSERT_EQ(AsmFminnmp(arg2), bit_cast<uint32_t>(2.0f));
3019 }
3020
TEST(Arm64InsnTest,MinPairwiseNumberF32x4)3021 TEST(Arm64InsnTest, MinPairwiseNumberF32x4) {
3022 constexpr auto AsmFminnmp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnmp %0.4s, %1.4s, %2.4s");
3023 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3024 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3025 ASSERT_EQ(AsmFminnmp(arg1, arg2), MakeF32x4(-3.0f, -0.0f, 1.0f, -8.0f));
3026
3027 __uint128_t arg3 =
3028 MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, bit_cast<float>(kQuietNaN32));
3029 __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3030 ASSERT_EQ(AsmFminnmp(arg3, arg4), MakeF32x4(2.0f, 7.0f, 1.0f, -8.0f));
3031 }
3032
TEST(Arm64InsnTest,MaxAcrossF32x4)3033 TEST(Arm64InsnTest, MaxAcrossF32x4) {
3034 constexpr auto AsmFmaxv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxv %s0, %1.4s");
3035 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3036 ASSERT_EQ(AsmFmaxv(arg1), bit_cast<uint32_t>(3.0f));
3037
3038 __uint128_t arg2 = MakeF32x4(0.0f, 2.0f, bit_cast<float>(kDefaultNaN32), -4.0f);
3039 ASSERT_EQ(AsmFmaxv(arg2), kDefaultNaN32);
3040 }
3041
TEST(Arm64InsnTest,MinAcrossF32x4)3042 TEST(Arm64InsnTest, MinAcrossF32x4) {
3043 constexpr auto AsmFminv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminv %s0, %1.4s");
3044 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3045 ASSERT_EQ(AsmFminv(arg1), bit_cast<uint32_t>(-4.0f));
3046
3047 __uint128_t arg2 = MakeF32x4(0.0f, 2.0f, bit_cast<float>(kDefaultNaN32), -4.0f);
3048 ASSERT_EQ(AsmFminv(arg2), kDefaultNaN32);
3049 }
3050
TEST(Arm64InsnTest,MaxNumberAcrossF32x4)3051 TEST(Arm64InsnTest, MaxNumberAcrossF32x4) {
3052 constexpr auto AsmFmaxnmv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxnmv %s0, %1.4s");
3053 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3054 ASSERT_EQ(AsmFmaxnmv(arg1), bit_cast<uint32_t>(3.0f));
3055
3056 __uint128_t arg2 = MakeF32x4(0.0f, bit_cast<float>(kQuietNaN32), 3.0f, -4.0f);
3057 ASSERT_EQ(AsmFmaxnmv(arg2), bit_cast<uint32_t>(3.0f));
3058 }
3059
TEST(Arm64InsnTest,MinNumberAcrossF32x4)3060 TEST(Arm64InsnTest, MinNumberAcrossF32x4) {
3061 constexpr auto AsmFminnmv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminnmv %s0, %1.4s");
3062 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3063 ASSERT_EQ(AsmFminnmv(arg1), bit_cast<uint32_t>(-4.0f));
3064
3065 __uint128_t arg2 = MakeF32x4(0.0f, bit_cast<float>(kQuietNaN32), 3.0f, -4.0f);
3066 ASSERT_EQ(AsmFminnmv(arg2), bit_cast<uint32_t>(-4.0f));
3067 }
3068
TEST(Arm64InsnTest,MulFp32)3069 TEST(Arm64InsnTest, MulFp32) {
3070 uint64_t fp_arg1 = 0x40a1999aULL; // 5.05 in float
3071 uint64_t fp_arg2 = 0x40dae148ULL; // 6.84 in float
3072 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %s0, %s1, %s2")(fp_arg1, fp_arg2);
3073 ASSERT_EQ(rd, MakeUInt128(0x420a2b03ULL, 0U)); // 34.5420 in float
3074 }
3075
TEST(Arm64InsnTest,MulFp64)3076 TEST(Arm64InsnTest, MulFp64) {
3077 uint64_t fp_arg1 = 0x40226b851eb851ecULL; // 9.21 in double
3078 uint64_t fp_arg2 = 0x4020c7ae147ae148ULL; // 8.39 in double
3079 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %d0, %d1, %d2")(fp_arg1, fp_arg2);
3080 ASSERT_EQ(rd, MakeUInt128(0x40535166cf41f214ULL, 0U)); // 77.2719 in double
3081 }
3082
TEST(Arm64InsnTest,MulF32x4)3083 TEST(Arm64InsnTest, MulF32x4) {
3084 constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.4s, %1.4s, %2.4s");
3085 __uint128_t arg1 = MakeF32x4(1.0f, -2.0f, 3.0f, -4.0f);
3086 __uint128_t arg2 = MakeF32x4(-3.0f, -1.0f, 4.0f, 1.0f);
3087 ASSERT_EQ(AsmFmul(arg1, arg2), MakeF32x4(-3.0f, 2.0f, 12.0f, -4.0f));
3088 }
3089
TEST(Arm64InsnTest,MulF64x2)3090 TEST(Arm64InsnTest, MulF64x2) {
3091 constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.2d, %1.2d, %2.2d");
3092 __uint128_t arg1 = MakeF64x2(-4.0, 2.0);
3093 __uint128_t arg2 = MakeF64x2(2.0, 3.0);
3094 ASSERT_EQ(AsmFmul(arg1, arg2), MakeF64x2(-8.0, 6.0));
3095 }
3096
TEST(Arm64InsnTest,MulF32x4ByScalar)3097 TEST(Arm64InsnTest, MulF32x4ByScalar) {
3098 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
3099 __uint128_t arg2 = MakeF32x4(6.0f, 7.0f, 8.0f, 9.0f);
3100 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.4s, %1.4s, %2.s[3]")(arg1, arg2);
3101 ASSERT_EQ(res, MakeF32x4(18.0f, 27.0f, 36.0f, 45.0f));
3102 }
3103
TEST(Arm64InsnTest,MulF64x2ByScalar)3104 TEST(Arm64InsnTest, MulF64x2ByScalar) {
3105 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3106 __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3107 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.2d, %1.2d, %2.d[1]")(arg1, arg2);
3108 ASSERT_EQ(res, MakeF64x2(8.0, 12.0));
3109 }
3110
TEST(Arm64InsnTest,MulF32IndexedElem)3111 TEST(Arm64InsnTest, MulF32IndexedElem) {
3112 constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %s0, %s1, %2.s[2]");
3113 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3114 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3115 ASSERT_EQ(AsmFmul(arg1, arg2), bit_cast<uint32_t>(34.0f));
3116 }
3117
TEST(Arm64InsnTest,MulF64IndexedElem)3118 TEST(Arm64InsnTest, MulF64IndexedElem) {
3119 constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %d0, %d1, %2.d[1]");
3120 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3121 __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3122 ASSERT_EQ(AsmFmul(arg1, arg2), bit_cast<uint64_t>(8.0));
3123 }
3124
TEST(Arm64InsnTest,MulExtendedF32)3125 TEST(Arm64InsnTest, MulExtendedF32) {
3126 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %s0, %s1, %s2");
3127 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3128 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3129 ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint32_t>(22.0f));
3130 }
3131
TEST(Arm64InsnTest,MulExtendedF32x4)3132 TEST(Arm64InsnTest, MulExtendedF32x4) {
3133 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %0.4s, %1.4s, %2.4s");
3134 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3135 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3136 ASSERT_EQ(AsmFmulx(arg1, arg2), MakeF32x4(22.0f, 39.0f, 85.0f, 133.0f));
3137 }
3138
TEST(Arm64InsnTest,MulExtendedF32IndexedElem)3139 TEST(Arm64InsnTest, MulExtendedF32IndexedElem) {
3140 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %s0, %s1, %2.s[2]");
3141 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3142 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3143 ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint32_t>(34.0f));
3144 }
3145
TEST(Arm64InsnTest,MulExtendedF64IndexedElem)3146 TEST(Arm64InsnTest, MulExtendedF64IndexedElem) {
3147 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %d0, %d1, %2.d[1]");
3148 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3149 __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3150 ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint64_t>(8.0));
3151 }
3152
TEST(Arm64InsnTest,MulExtendedF32x4IndexedElem)3153 TEST(Arm64InsnTest, MulExtendedF32x4IndexedElem) {
3154 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %0.4s, %1.4s, %2.s[2]");
3155 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3156 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3157 ASSERT_EQ(AsmFmulx(arg1, arg2), MakeF32x4(34.0f, 51.0f, 85.0f, 119.0f));
3158 }
3159
TEST(Arm64InsnTest,MulNegFp32)3160 TEST(Arm64InsnTest, MulNegFp32) {
3161 uint64_t fp_arg1 = bit_cast<uint32_t>(2.0f);
3162 uint64_t fp_arg2 = bit_cast<uint32_t>(3.0f);
3163 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fnmul %s0, %s1, %s2")(fp_arg1, fp_arg2);
3164 ASSERT_EQ(rd, MakeUInt128(bit_cast<uint32_t>(-6.0f), 0U));
3165 }
3166
TEST(Arm64InsnTest,MulNegFp64)3167 TEST(Arm64InsnTest, MulNegFp64) {
3168 uint64_t fp_arg1 = bit_cast<uint64_t>(2.0);
3169 uint64_t fp_arg2 = bit_cast<uint64_t>(3.0);
3170 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fnmul %d0, %d1, %d2")(fp_arg1, fp_arg2);
3171 ASSERT_EQ(rd, MakeUInt128(bit_cast<uint64_t>(-6.0), 0U));
3172 }
3173
TEST(Arm64InsnTest,DivFp32)3174 TEST(Arm64InsnTest, DivFp32) {
3175 constexpr auto AsmFdiv = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %s0, %s1, %s2");
3176
3177 uint32_t arg1 = 0x40c23d71U; // 6.07 in float
3178 uint32_t arg2 = 0x401a3d71U; // 2.41 in float
3179 ASSERT_EQ(AsmFdiv(arg1, arg2), MakeUInt128(0x402131edULL, 0U)); // 2.5186722 in float
3180
3181 // Make sure that FDIV can produce a denormal result under the default FPCR,
3182 // where the FZ bit (flush-to-zero) is off.
3183 uint32_t arg3 = 0xa876eff9U; // exponent (without offset) = -47
3184 uint32_t arg4 = 0xe7d86b60U; // exponent (without offset) = 80
3185 ASSERT_EQ(AsmFdiv(arg3, arg4), MakeUInt128(0x0049065cULL, 0U)); // denormal
3186 }
3187
TEST(Arm64InsnTest,DivFp64)3188 TEST(Arm64InsnTest, DivFp64) {
3189 uint64_t fp_arg1 = 0x401e5c28f5c28f5cULL; // 7.59 in double
3190 uint64_t fp_arg2 = 0x3ff28f5c28f5c28fULL; // 1.16 in double
3191 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %d0, %d1, %d2")(fp_arg1, fp_arg2);
3192 ASSERT_EQ(rd, MakeUInt128(0x401a2c234f72c235ULL, 0U)); // 6.5431034482758620995923593 in double
3193 }
3194
TEST(Arm64InsnTest,DivFp32_FlagsWhenDivByZero)3195 TEST(Arm64InsnTest, DivFp32_FlagsWhenDivByZero) {
3196 uint64_t fpsr;
3197 volatile float dividend = 123.0f;
3198 volatile float divisor = 0.0f;
3199 float res;
3200 asm volatile(
3201 "msr fpsr, xzr\n\t"
3202 "fdiv %s1, %s2, %s3\n\t"
3203 "mrs %0, fpsr"
3204 : "=r"(fpsr), "=w"(res)
3205 : "w"(dividend), "w"(divisor));
3206 ASSERT_TRUE((fpsr & kFpsrDzcBit) == (kFpsrDzcBit));
3207
3208 // Previous bug caused IOC to be set upon scalar div by zero.
3209 ASSERT_TRUE((fpsr & kFpsrIocBit) == 0);
3210 }
3211
TEST(Arm64InsnTest,DivFp64_FlagsWhenDivByZero)3212 TEST(Arm64InsnTest, DivFp64_FlagsWhenDivByZero) {
3213 uint64_t fpsr;
3214 double res;
3215 asm volatile(
3216 "msr fpsr, xzr\n\t"
3217 "fdiv %d1, %d2, %d3\n\t"
3218 "mrs %0, fpsr"
3219 : "=r"(fpsr), "=w"(res)
3220 : "w"(123.0), "w"(0.0));
3221 ASSERT_TRUE((fpsr & kFpsrDzcBit) == (kFpsrDzcBit));
3222
3223 // Previous bug caused IOC to be set upon scalar div by zero.
3224 ASSERT_TRUE((fpsr & kFpsrIocBit) == 0);
3225 }
3226
TEST(Arm64InsnTest,DivFp32x4)3227 TEST(Arm64InsnTest, DivFp32x4) {
3228 constexpr auto AsmFdiv = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %0.4s, %1.4s, %2.4s");
3229
3230 // 16.39, 80.286, 41.16, 98.01
3231 __uint128_t arg1 = MakeUInt128(0x41831eb842a0926fULL, 0x4224a3d742c4051fULL);
3232 // 13.3, 45.45, 7.89, -2.63
3233 __uint128_t arg2 = MakeUInt128(0x4154cccd4235cccdULL, 0x40fc7ae1c02851ecULL);
3234 __uint128_t res1 = AsmFdiv(arg1, arg2);
3235 // 1.2323308, 1.7664686, 5.21673, -37.26616
3236 ASSERT_EQ(res1, MakeUInt128(0x3f9dbd043fe21ba5ULL, 0x40a6ef74c215108cULL));
3237
3238 // Verify that fdiv produces a denormal result under the default FPCR.
3239 __uint128_t arg3 = MakeF32x4(1.0f, 1.0f, 1.0f, -0x1.eddff2p-47f);
3240 __uint128_t arg4 = MakeF32x4(1.0f, 1.0f, 1.0f, -0x1.b0d6c0p80f);
3241 __uint128_t res2 = AsmFdiv(arg3, arg4);
3242 __uint128_t expected2 = MakeF32x4(1.0f, 1.0f, 1.0f, 0x0.920cb8p-126f);
3243 ASSERT_EQ(res2, expected2);
3244 }
3245
TEST(Arm64InsnTest,DivFp64x2)3246 TEST(Arm64InsnTest, DivFp64x2) {
3247 // 6.23, 65.02
3248 __uint128_t arg1 = MakeUInt128(0x4018EB851EB851ECULL, 0x40504147AE147AE1ULL);
3249 // -7.54, 11.92
3250 __uint128_t arg2 = MakeUInt128(0xC01E28F5C28F5C29ULL, 0x4027D70A3D70A3D7ULL);
3251 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %0.2d, %1.2d, %2.2d")(arg1, arg2);
3252 // -0.82625994695, 5.45469798658
3253 ASSERT_EQ(res, MakeUInt128(0xbfea70b8b3449564ULL, 0x4015d19c59579fc9ULL));
3254 }
3255
TEST(Arm64InsnTest,MulAddFp32)3256 TEST(Arm64InsnTest, MulAddFp32) {
3257 constexpr auto AsmFmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %s0, %s1, %s2, %s3");
3258
3259 __uint128_t res1 =
3260 AsmFmadd(bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(5.0f));
3261 ASSERT_EQ(res1, MakeF32x4(11.0f, 0, 0, 0));
3262
3263 __uint128_t res2 =
3264 AsmFmadd(bit_cast<uint32_t>(2.5f), bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(-5.0f));
3265 ASSERT_EQ(res2, MakeF32x4(0, 0, 0, 0));
3266
3267 // These tests verify that fmadd does not lose precision while doing the mult + add.
3268 __uint128_t res3 = AsmFmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3269 bit_cast<uint32_t>(0x1.000002p0f),
3270 bit_cast<uint32_t>(-0x1.p23f));
3271 ASSERT_EQ(res3, MakeF32x4(0x1.fffffcp-2f, 0, 0, 0));
3272
3273 __uint128_t res4 = AsmFmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3274 bit_cast<uint32_t>(0x1.000002p0f),
3275 bit_cast<uint32_t>(-0x1.fffffep22f));
3276 ASSERT_EQ(res4, MakeF32x4(0x1.fffffep-1f, 0, 0, 0));
3277
3278 __uint128_t res5 = AsmFmadd(bit_cast<uint32_t>(0x1.p23f),
3279 bit_cast<uint32_t>(0x1.fffffep-1f),
3280 bit_cast<uint32_t>(-0x1.000002p23f));
3281 ASSERT_EQ(res5, MakeF32x4(-0x1.80p0f, 0, 0, 0));
3282 }
3283
TEST(Arm64InsnTest,MulAddFp64)3284 TEST(Arm64InsnTest, MulAddFp64) {
3285 uint64_t arg1 = 0x40323d70a3d70a3dULL; // 18.24
3286 uint64_t arg2 = 0x40504147ae147ae1ULL; // 65.02
3287 uint64_t arg3 = 0x4027d70a3d70a3d7ULL; // 11.92
3288 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3289 ASSERT_EQ(res1, MakeUInt128(0x4092b78a0902de00ULL, 0U)); // 1197.8848
3290 __uint128_t res2 =
3291 ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3292 ASSERT_EQ(res2, MakeUInt128(0xc092b78a0902de00ULL, 0U)); // -1197.8848
3293 }
3294
TEST(Arm64InsnTest,MulAddFp64Precision)3295 TEST(Arm64InsnTest, MulAddFp64Precision) {
3296 uint64_t arg1 = bit_cast<uint64_t>(0x1.0p1023);
3297 uint64_t arg2 = bit_cast<uint64_t>(0x1.0p-1);
3298 uint64_t arg3 = bit_cast<uint64_t>(0x1.fffffffffffffp1022);
3299 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3300 ASSERT_EQ(res, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3301 }
3302
TEST(Arm64InsnTest,NegMulAddFp32)3303 TEST(Arm64InsnTest, NegMulAddFp32) {
3304 constexpr auto AsmFnmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %s0, %s1, %s2, %s3");
3305
3306 __uint128_t res1 =
3307 AsmFnmadd(bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(5.0f));
3308 ASSERT_EQ(res1, MakeF32x4(-11.0f, 0, 0, 0));
3309
3310 // No -0 (proper negation)
3311 __uint128_t res2 =
3312 AsmFnmadd(bit_cast<uint32_t>(2.5f), bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(-5.0f));
3313 ASSERT_EQ(res2, MakeF32x4(0.0f, 0, 0, 0));
3314
3315 // These tests verify that fmadd does not lose precision while doing the mult + add.
3316 __uint128_t res3 = AsmFnmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3317 bit_cast<uint32_t>(0x1.000002p0f),
3318 bit_cast<uint32_t>(-0x1.p23f));
3319 ASSERT_EQ(res3, MakeF32x4(-0x1.fffffcp-2f, 0, 0, 0));
3320
3321 __uint128_t res4 = AsmFnmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3322 bit_cast<uint32_t>(0x1.000002p0f),
3323 bit_cast<uint32_t>(-0x1.fffffep22f));
3324 ASSERT_EQ(res4, MakeF32x4(-0x1.fffffep-1f, 0, 0, 0));
3325
3326 __uint128_t res5 = AsmFnmadd(bit_cast<uint32_t>(0x1.p23f),
3327 bit_cast<uint32_t>(0x1.fffffep-1f),
3328 bit_cast<uint32_t>(-0x1.000002p23f));
3329 ASSERT_EQ(res5, MakeF32x4(0x1.80p0f, 0, 0, 0));
3330 }
3331
TEST(Arm64InsnTest,NegMulAddFp64)3332 TEST(Arm64InsnTest, NegMulAddFp64) {
3333 constexpr auto AsmFnmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %d0, %d1, %d2, %d3");
3334
3335 __uint128_t res1 =
3336 AsmFnmadd(bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(5.0));
3337 ASSERT_EQ(res1, MakeF64x2(-11.0, 0));
3338
3339 // Proper negation (no -0 in this case)
3340 __uint128_t res2 =
3341 AsmFnmadd(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(-5.0));
3342 ASSERT_EQ(res2, MakeF64x2(0.0, 0));
3343 }
3344
TEST(Arm64InsnTest,NegMulSubFp64)3345 TEST(Arm64InsnTest, NegMulSubFp64) {
3346 constexpr auto AsmFnmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %d0, %d1, %d2, %d3");
3347
3348 __uint128_t res1 =
3349 AsmFnmsub(bit_cast<uint64_t>(-2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(5.0));
3350 ASSERT_EQ(res1, MakeF64x2(-11.0, 0));
3351
3352 uint64_t arg1 = 0x40357ae147ae147bULL; // 21.48
3353 uint64_t arg2 = 0x404ce3d70a3d70a4ull; // 57.78
3354 uint64_t arg3 = 0x405e29999999999aULL; // 120.65
3355 __uint128_t res2 = AsmFnmsub(arg1, arg2, arg3);
3356 ASSERT_EQ(res2, MakeUInt128(0x409181db8bac710dULL, 0U)); // 1120.4644
3357
3358 // Assert no -0 in this case
3359 __uint128_t res3 =
3360 AsmFnmsub(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(5.0));
3361 ASSERT_EQ(res3, MakeF64x2(0.0, 0));
3362 }
3363
TEST(Arm64InsnTest,NegMulSubFp64Precision)3364 TEST(Arm64InsnTest, NegMulSubFp64Precision) {
3365 constexpr auto AsmFnmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %d0, %d1, %d2, %d3");
3366
3367 __uint128_t res = AsmFnmsub(bit_cast<uint64_t>(0x1.0p1023),
3368 bit_cast<uint64_t>(0x1.0p-1),
3369 bit_cast<uint64_t>(-0x1.fffffffffffffp1022));
3370 ASSERT_EQ(res, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3371 }
3372
TEST(Arm64InsnTest,MulAddF32x4)3373 TEST(Arm64InsnTest, MulAddF32x4) {
3374 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.4s, %1.4s, %2.4s");
3375 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3376 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3377 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3378 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF32x4(5.0f, 5.0f, 9.0f, 14.0f));
3379 }
3380
TEST(Arm64InsnTest,MulAddF32IndexedElem)3381 TEST(Arm64InsnTest, MulAddF32IndexedElem) {
3382 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %s0, %s1, %2.s[2]");
3383 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3384 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3385 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3386 // 2 + (1 * 2)
3387 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), bit_cast<uint32_t>(4.0f));
3388 }
3389
TEST(Arm64InsnTest,MulAddF64IndexedElem)3390 TEST(Arm64InsnTest, MulAddF64IndexedElem) {
3391 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %d0, %d1, %2.d[1]");
3392 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3393 __uint128_t arg2 = MakeF64x2(4.0, 5.0);
3394 __uint128_t arg3 = MakeF64x2(6.0, 7.0);
3395 // 6 + (2 * 5)
3396 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), bit_cast<uint64_t>(16.0));
3397 }
3398
TEST(Arm64InsnTest,MulAddF32x4IndexedElem)3399 TEST(Arm64InsnTest, MulAddF32x4IndexedElem) {
3400 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.4s, %1.4s, %2.s[2]");
3401 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3402 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3403 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3404 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF32x4(4.0f, 7.0f, 9.0f, 8.0f));
3405 }
3406
TEST(Arm64InsnTest,MulSubFp32)3407 TEST(Arm64InsnTest, MulSubFp32) {
3408 uint32_t arg1 = bit_cast<uint32_t>(2.0f);
3409 uint32_t arg2 = bit_cast<uint32_t>(5.0f);
3410 uint32_t arg3 = bit_cast<uint32_t>(3.0f);
3411 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %s0, %s1, %s2, %s3")(arg1, arg2, arg3);
3412 ASSERT_EQ(res1, MakeUInt128(bit_cast<uint32_t>(-7.0f), 0U));
3413 __uint128_t res2 =
3414 ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %s0, %s1, %s2, %s3")(arg1, arg2, arg3);
3415 ASSERT_EQ(res2, MakeUInt128(bit_cast<uint32_t>(7.0f), 0U));
3416 }
3417
TEST(Arm64InsnTest,MulSubFp64)3418 TEST(Arm64InsnTest, MulSubFp64) {
3419 constexpr auto AsmFmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %d0, %d1, %d2, %d3");
3420
3421 uint64_t arg1 = 0x40357ae147ae147bULL; // 21.48
3422 uint64_t arg2 = 0x404ce3d70a3d70a4ull; // 57.78
3423 uint64_t arg3 = 0x405e29999999999aULL; // 120.65
3424 __uint128_t res1 = AsmFmsub(arg1, arg2, arg3);
3425 ASSERT_EQ(res1, MakeUInt128(0xc09181db8bac710dULL, 0U)); // -1120.4644
3426
3427 // Basic case
3428 __uint128_t res3 =
3429 AsmFmsub(bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(-5.0));
3430 ASSERT_EQ(res3, MakeF64x2(-11.0, 0));
3431
3432 // No -0 in this case (proper negation order)
3433 __uint128_t res4 =
3434 AsmFmsub(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(5.0));
3435 ASSERT_EQ(res4, MakeF64x2(0.0, 0));
3436 }
3437
TEST(Arm64InsnTest,MulSubFp64Precision)3438 TEST(Arm64InsnTest, MulSubFp64Precision) {
3439 constexpr auto AsmFmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %d0, %d1, %d2, %d3");
3440 __uint128_t res5 = AsmFmsub(bit_cast<uint64_t>(-0x1.0p1023),
3441 bit_cast<uint64_t>(0x1.0p-1),
3442 bit_cast<uint64_t>(0x1.fffffffffffffp1022));
3443 ASSERT_EQ(res5, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3444 }
3445
TEST(Arm64InsnTest,MulSubF32x4)3446 TEST(Arm64InsnTest, MulSubF32x4) {
3447 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.4s, %1.4s, %2.4s");
3448 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3449 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3450 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3451 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF32x4(-1.0f, 1.0f, -7.0f, -10.0f));
3452 }
3453
TEST(Arm64InsnTest,MulSubF32IndexedElem)3454 TEST(Arm64InsnTest, MulSubF32IndexedElem) {
3455 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %s0, %s1, %2.s[2]");
3456 __uint128_t arg1 = MakeF32x4(2.0f, 1.0f, 4.0f, 3.0f);
3457 __uint128_t arg2 = MakeF32x4(4.0f, 3.0f, 2.0f, 1.0f);
3458 __uint128_t arg3 = MakeF32x4(8.0f, 3.0f, 1.0f, 2.0f);
3459 // 8 - (2 * 2)
3460 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), bit_cast<uint32_t>(4.0f));
3461 }
3462
TEST(Arm64InsnTest,MulSubF64IndexedElem)3463 TEST(Arm64InsnTest, MulSubF64IndexedElem) {
3464 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %d0, %d1, %2.d[1]");
3465 __uint128_t arg1 = MakeF64x2(2.0, 5.0);
3466 __uint128_t arg2 = MakeF64x2(4.0, 1.0);
3467 __uint128_t arg3 = MakeF64x2(6.0, 7.0f);
3468 // 6 - (2 * 1)
3469 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), bit_cast<uint64_t>(4.0));
3470 }
3471
TEST(Arm64InsnTest,MulSubF32x4IndexedElem)3472 TEST(Arm64InsnTest, MulSubF32x4IndexedElem) {
3473 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.4s, %1.4s, %2.s[2]");
3474 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3475 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3476 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3477 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF32x4(0.0f, -1.0f, -7.0f, -4.0f));
3478 }
3479
TEST(Arm64InsnTest,CompareEqualF32)3480 TEST(Arm64InsnTest, CompareEqualF32) {
3481 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %s0, %s1, %s2");
3482 uint32_t two = bit_cast<uint32_t>(2.0f);
3483 uint32_t six = bit_cast<uint32_t>(6.0f);
3484 ASSERT_EQ(AsmFcmeq(two, six), 0x00000000ULL);
3485 ASSERT_EQ(AsmFcmeq(two, two), 0xffffffffULL);
3486 ASSERT_EQ(AsmFcmeq(kDefaultNaN32, two), 0x00000000ULL);
3487 ASSERT_EQ(AsmFcmeq(two, kDefaultNaN32), 0x00000000ULL);
3488 }
3489
TEST(Arm64InsnTest,CompareEqualF32x4)3490 TEST(Arm64InsnTest, CompareEqualF32x4) {
3491 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %0.4s, %1.4s, %2.4s");
3492 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3493 __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3494 __uint128_t res = AsmFcmeq(arg1, arg2);
3495 ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x0000000000000000ULL));
3496 }
3497
TEST(Arm64InsnTest,CompareGreaterEqualF32)3498 TEST(Arm64InsnTest, CompareGreaterEqualF32) {
3499 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %s0, %s1, %s2");
3500 uint32_t two = bit_cast<uint32_t>(2.0f);
3501 uint32_t six = bit_cast<uint32_t>(6.0f);
3502 ASSERT_EQ(AsmFcmge(two, six), 0x00000000ULL);
3503 ASSERT_EQ(AsmFcmge(two, two), 0xffffffffULL);
3504 ASSERT_EQ(AsmFcmge(six, two), 0xffffffffULL);
3505 ASSERT_EQ(AsmFcmge(kDefaultNaN32, two), 0x00000000ULL);
3506 ASSERT_EQ(AsmFcmge(two, kDefaultNaN32), 0x00000000ULL);
3507 }
3508
TEST(Arm64InsnTest,CompareGreaterEqualF32x4)3509 TEST(Arm64InsnTest, CompareGreaterEqualF32x4) {
3510 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %0.4s, %1.4s, %2.4s");
3511 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3512 __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3513 __uint128_t res = AsmFcmge(arg1, arg2);
3514 ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x00000000ffffffffULL));
3515 }
3516
TEST(Arm64InsnTest,CompareGreaterF32)3517 TEST(Arm64InsnTest, CompareGreaterF32) {
3518 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %s0, %s1, %s2");
3519 uint32_t two = bit_cast<uint32_t>(2.0f);
3520 uint32_t six = bit_cast<uint32_t>(6.0f);
3521 ASSERT_EQ(AsmFcmgt(two, six), 0x00000000ULL);
3522 ASSERT_EQ(AsmFcmgt(two, two), 0x00000000ULL);
3523 ASSERT_EQ(AsmFcmgt(six, two), 0xffffffffULL);
3524 ASSERT_EQ(AsmFcmgt(kDefaultNaN32, two), 0x00000000ULL);
3525 ASSERT_EQ(AsmFcmgt(two, kDefaultNaN32), 0x00000000ULL);
3526 }
3527
TEST(Arm64InsnTest,CompareGreaterF32x4)3528 TEST(Arm64InsnTest, CompareGreaterF32x4) {
3529 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %0.4s, %1.4s, %2.4s");
3530 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3531 __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3532 __uint128_t res = AsmFcmgt(arg1, arg2);
3533 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x00000000ffffffffULL));
3534 }
3535
TEST(Arm64InsnTest,CompareEqualZeroF32)3536 TEST(Arm64InsnTest, CompareEqualZeroF32) {
3537 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmeq %s0, %s1, #0");
3538 ASSERT_EQ(AsmFcmeq(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3539 ASSERT_EQ(AsmFcmeq(bit_cast<uint32_t>(4.0f)), 0x00000000ULL);
3540 }
3541
TEST(Arm64InsnTest,CompareEqualZeroF32x4)3542 TEST(Arm64InsnTest, CompareEqualZeroF32x4) {
3543 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmeq %0.4s, %1.4s, #0");
3544 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3545 __uint128_t res = AsmFcmeq(arg);
3546 ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x0000000000000000ULL));
3547 }
3548
TEST(Arm64InsnTest,CompareGreaterThanZeroF32)3549 TEST(Arm64InsnTest, CompareGreaterThanZeroF32) {
3550 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmgt %s0, %s1, #0");
3551 ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3552 ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(0.0f)), 0x00000000ULL);
3553 ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3554 }
3555
TEST(Arm64InsnTest,CompareGreaterThanZeroF32x4)3556 TEST(Arm64InsnTest, CompareGreaterThanZeroF32x4) {
3557 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmgt %0.4s, %1.4s, #0");
3558 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3559 __uint128_t res = AsmFcmgt(arg);
3560 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3561 }
3562
TEST(Arm64InsnTest,CompareGreaterThanOrEqualZeroF32)3563 TEST(Arm64InsnTest, CompareGreaterThanOrEqualZeroF32) {
3564 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmge %s0, %s1, #0");
3565 ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3566 ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3567 ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3568 }
3569
TEST(Arm64InsnTest,CompareGreaterThanOrEqualZeroF32x4)3570 TEST(Arm64InsnTest, CompareGreaterThanOrEqualZeroF32x4) {
3571 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmge %0.4s, %1.4s, #0");
3572 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3573 __uint128_t res = AsmFcmge(arg);
3574 ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0xffffffffffffffffULL));
3575 }
3576
TEST(Arm64InsnTest,CompareLessThanZeroF32)3577 TEST(Arm64InsnTest, CompareLessThanZeroF32) {
3578 constexpr auto AsmFcmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmlt %s0, %s1, #0");
3579 ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3580 ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(0.0f)), 0x00000000ULL);
3581 ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(1.0f)), 0x00000000ULL);
3582 }
3583
TEST(Arm64InsnTest,CompareLessThanZeroF32x4)3584 TEST(Arm64InsnTest, CompareLessThanZeroF32x4) {
3585 constexpr auto AsmFcmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmlt %0.4s, %1.4s, #0");
3586 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3587 __uint128_t res = AsmFcmlt(arg);
3588 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
3589 }
3590
TEST(Arm64InsnTest,CompareLessThanOrEqualZeroF32)3591 TEST(Arm64InsnTest, CompareLessThanOrEqualZeroF32) {
3592 constexpr auto AsmFcmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmle %s0, %s1, #0");
3593 ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3594 ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3595 ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(1.0f)), 0x00000000ULL);
3596 }
3597
TEST(Arm64InsnTest,CompareLessThanOrEqualZeroF32x4)3598 TEST(Arm64InsnTest, CompareLessThanOrEqualZeroF32x4) {
3599 constexpr auto AsmFcmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmle %0.4s, %1.4s, #0");
3600 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3601 __uint128_t res = AsmFcmle(arg);
3602 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3603 }
3604
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanF32)3605 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanF32) {
3606 constexpr auto AsmFacgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facgt %s0, %s1, %s2");
3607 ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(-3.0f), bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3608 ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3609 ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(-7.0f)), 0x00000000ULL);
3610 }
3611
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanOrEqualF32)3612 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanOrEqualF32) {
3613 constexpr auto AsmFacge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facge %s0, %s1, %s2");
3614 ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(-3.0f), bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3615 ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3616 ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(-7.0f)), 0x00000000ULL);
3617 }
3618
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanF32x4)3619 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanF32x4) {
3620 constexpr auto AsmFacgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facgt %0.4s, %1.4s, %2.4s");
3621 __uint128_t arg1 = MakeF32x4(-3.0f, 1.0f, 3.0f, 4.0f);
3622 __uint128_t arg2 = MakeF32x4(1.0f, -1.0f, -7.0f, 2.0f);
3623 ASSERT_EQ(AsmFacgt(arg1, arg2), MakeUInt128(0x00000000ffffffffULL, 0xffffffff00000000ULL));
3624 }
3625
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanEqualF32x4)3626 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanEqualF32x4) {
3627 constexpr auto AsmFacge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facge %0.4s, %1.4s, %2.4s");
3628 __uint128_t arg1 = MakeF32x4(-3.0f, 1.0f, 3.0f, 4.0f);
3629 __uint128_t arg2 = MakeF32x4(1.0f, -1.0f, -7.0f, 2.0f);
3630 ASSERT_EQ(AsmFacge(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0xffffffff00000000ULL));
3631 }
3632
TEST(Arm64InsnTest,CompareEqualF64)3633 TEST(Arm64InsnTest, CompareEqualF64) {
3634 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %d0, %d1, %d2");
3635 uint64_t two = bit_cast<uint64_t>(2.0);
3636 uint64_t six = bit_cast<uint64_t>(6.0);
3637 ASSERT_EQ(AsmFcmeq(two, six), 0x0000000000000000ULL);
3638 ASSERT_EQ(AsmFcmeq(two, two), 0xffffffffffffffffULL);
3639 ASSERT_EQ(AsmFcmeq(kDefaultNaN64, two), 0x0000000000000000ULL);
3640 ASSERT_EQ(AsmFcmeq(two, kDefaultNaN64), 0x0000000000000000ULL);
3641 }
3642
TEST(Arm64InsnTest,CompareEqualF64x2)3643 TEST(Arm64InsnTest, CompareEqualF64x2) {
3644 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %0.2d, %1.2d, %2.2d");
3645 __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3646 __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3647 __uint128_t res = AsmFcmeq(arg1, arg2);
3648 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3649 arg1 = MakeF64x2(7.0, -0.0);
3650 arg2 = MakeF64x2(-8.0, 5.0);
3651 res = AsmFcmeq(arg1, arg2);
3652 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3653 }
3654
TEST(Arm64InsnTest,CompareGreaterEqualF64)3655 TEST(Arm64InsnTest, CompareGreaterEqualF64) {
3656 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %d0, %d1, %d2");
3657 uint64_t two = bit_cast<uint64_t>(2.0);
3658 uint64_t six = bit_cast<uint64_t>(6.0);
3659 ASSERT_EQ(AsmFcmge(two, six), 0x0000000000000000ULL);
3660 ASSERT_EQ(AsmFcmge(two, two), 0xffffffffffffffffULL);
3661 ASSERT_EQ(AsmFcmge(six, two), 0xffffffffffffffffULL);
3662 ASSERT_EQ(AsmFcmge(kDefaultNaN64, two), 0x0000000000000000ULL);
3663 ASSERT_EQ(AsmFcmge(two, kDefaultNaN64), 0x0000000000000000ULL);
3664 }
3665
TEST(Arm64InsnTest,CompareGreaterEqualF64x2)3666 TEST(Arm64InsnTest, CompareGreaterEqualF64x2) {
3667 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %0.2d, %1.2d, %2.2d");
3668 __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3669 __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3670 __uint128_t res = AsmFcmge(arg1, arg2);
3671 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3672 arg1 = MakeF64x2(7.0, -0.0);
3673 arg2 = MakeF64x2(-8.0, 5.0);
3674 res = AsmFcmge(arg1, arg2);
3675 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3676 }
3677
TEST(Arm64InsnTest,CompareGreaterF64)3678 TEST(Arm64InsnTest, CompareGreaterF64) {
3679 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %d0, %d1, %d2");
3680 uint64_t two = bit_cast<uint64_t>(2.0);
3681 uint64_t six = bit_cast<uint64_t>(6.0);
3682 ASSERT_EQ(AsmFcmgt(two, six), 0x0000000000000000ULL);
3683 ASSERT_EQ(AsmFcmgt(two, two), 0x0000000000000000ULL);
3684 ASSERT_EQ(AsmFcmgt(six, two), 0xffffffffffffffffULL);
3685 ASSERT_EQ(AsmFcmgt(kDefaultNaN64, two), 0x0000000000000000ULL);
3686 ASSERT_EQ(AsmFcmgt(two, kDefaultNaN64), 0x0000000000000000ULL);
3687 }
3688
TEST(Arm64InsnTest,CompareGreaterF64x2)3689 TEST(Arm64InsnTest, CompareGreaterF64x2) {
3690 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %0.2d, %1.2d, %2.2d");
3691 __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3692 __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3693 __uint128_t res = AsmFcmgt(arg1, arg2);
3694 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3695 arg1 = MakeF64x2(7.0, -0.0);
3696 arg2 = MakeF64x2(-8.0, 5.0);
3697 res = AsmFcmgt(arg1, arg2);
3698 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3699 }
3700
TEST(Arm64InsnTest,AndInt8x16)3701 TEST(Arm64InsnTest, AndInt8x16) {
3702 __uint128_t op1 = MakeUInt128(0x7781857780532171ULL, 0x2268066130019278ULL);
3703 __uint128_t op2 = MakeUInt128(0x0498862723279178ULL, 0x6085784383827967ULL);
3704 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("and %0.16b, %1.16b, %2.16b")(op1, op2);
3705 ASSERT_EQ(rd, MakeUInt128(0x0480842700030170ULL, 0x2000004100001060ULL));
3706 }
3707
TEST(Arm64InsnTest,AndInt8x8)3708 TEST(Arm64InsnTest, AndInt8x8) {
3709 __uint128_t op1 = MakeUInt128(0x7781857780532171ULL, 0x2268066130019278ULL);
3710 __uint128_t op2 = MakeUInt128(0x0498862723279178ULL, 0x6085784383827967ULL);
3711 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("and %0.8b, %1.8b, %2.8b")(op1, op2);
3712 ASSERT_EQ(rd, MakeUInt128(0x0480842700030170ULL, 0));
3713 }
3714
TEST(Arm64InsnTest,OrInt8x16)3715 TEST(Arm64InsnTest, OrInt8x16) {
3716 __uint128_t op1 = MakeUInt128(0x00ffaa5500112244ULL, 0x1248124812481248ULL);
3717 __uint128_t op2 = MakeUInt128(0x44221100ffaa5500ULL, 0x1122448811224488ULL);
3718 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orr %0.16b, %1.16b, %2.16b")(op1, op2);
3719 ASSERT_EQ(rd, MakeUInt128(0x44ffbb55ffbb7744ULL, 0x136a56c8136a56c8ULL));
3720 }
3721
TEST(Arm64InsnTest,OrInt8x8)3722 TEST(Arm64InsnTest, OrInt8x8) {
3723 __uint128_t op1 = MakeUInt128(0x00ffaa5500112244ULL, 0x1248124812481248ULL);
3724 __uint128_t op2 = MakeUInt128(0x44221100ffaa5500ULL, 0x1122448811224488ULL);
3725 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orr %0.8b, %1.8b, %2.8b")(op1, op2);
3726 ASSERT_EQ(rd, MakeUInt128(0x44ffbb55ffbb7744ULL, 0));
3727 }
3728
TEST(Arm64InsnTest,XorInt8x16)3729 TEST(Arm64InsnTest, XorInt8x16) {
3730 __uint128_t op1 = MakeUInt128(0x1050792279689258ULL, 0x9235420199561121ULL);
3731 __uint128_t op2 = MakeUInt128(0x8239864565961163ULL, 0x5488623057745649ULL);
3732 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("eor %0.16b, %1.16b, %2.16b")(op1, op2);
3733 ASSERT_EQ(rd, MakeUInt128(0x9269ff671cfe833bULL, 0xc6bd2031ce224768ULL));
3734 }
3735
TEST(Arm64InsnTest,XorInt8x8)3736 TEST(Arm64InsnTest, XorInt8x8) {
3737 __uint128_t op1 = MakeUInt128(0x1050792279689258ULL, 0x9235420199561121ULL);
3738 __uint128_t op2 = MakeUInt128(0x8239864565961163ULL, 0x5488623057745649ULL);
3739 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("eor %0.8b, %1.8b, %2.8b")(op1, op2);
3740 ASSERT_EQ(rd, MakeUInt128(0x9269ff671cfe833bULL, 0));
3741 }
3742
TEST(Arm64InsnTest,AndNotInt8x16)3743 TEST(Arm64InsnTest, AndNotInt8x16) {
3744 __uint128_t op1 = MakeUInt128(0x0313783875288658ULL, 0x7533208381420617ULL);
3745 __uint128_t op2 = MakeUInt128(0x2327917860857843ULL, 0x8382796797668145ULL);
3746 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("bic %0.16b, %1.16b, %2.16b")(op1, op2);
3747 ASSERT_EQ(rd, MakeUInt128(0x0010680015288618ULL, 0x7431008000000612ULL));
3748 }
3749
TEST(Arm64InsnTest,AndNotInt8x8)3750 TEST(Arm64InsnTest, AndNotInt8x8) {
3751 __uint128_t op1 = MakeUInt128(0x4861045432664821ULL, 0x2590360011330530ULL);
3752 __uint128_t op2 = MakeUInt128(0x5420199561121290ULL, 0x8572424541506959ULL);
3753 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("bic %0.8b, %1.8b, %2.8b")(op1, op2);
3754 ASSERT_EQ(rd, MakeUInt128(0x0841044012644821ULL, 0x0000000000000000ULL));
3755 }
3756
TEST(Arm64InsnTest,AndNotInt16x4Imm)3757 TEST(Arm64InsnTest, AndNotInt16x4Imm) {
3758 __uint128_t res = MakeUInt128(0x9690314950191085ULL, 0x7598442391986291ULL);
3759
3760 asm("bic %0.4h, #0x3" : "=w"(res) : "0"(res));
3761
3762 ASSERT_EQ(res, MakeUInt128(0x9690314850181084ULL, 0x0000000000000000ULL));
3763 }
3764
TEST(Arm64InsnTest,AndNotInt16x4ImmShiftedBy8)3765 TEST(Arm64InsnTest, AndNotInt16x4ImmShiftedBy8) {
3766 __uint128_t res = MakeUInt128(0x8354056704038674ULL, 0x3513622224771589ULL);
3767
3768 asm("bic %0.4h, #0xa8, lsl #8" : "=w"(res) : "0"(res));
3769
3770 ASSERT_EQ(res, MakeUInt128(0x0354056704030674ULL, 0x0000000000000000ULL));
3771 }
3772
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy8)3773 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy8) {
3774 __uint128_t res = MakeUInt128(0x1842631298608099ULL, 0x8886874132604721ULL);
3775
3776 asm("bic %0.2s, #0xd3, lsl #8" : "=w"(res) : "0"(res));
3777
3778 ASSERT_EQ(res, MakeUInt128(0x1842201298600099ULL, 0x0000000000000000ULL));
3779 }
3780
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy16)3781 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy16) {
3782 __uint128_t res = MakeUInt128(0x2947867242292465ULL, 0x4366800980676928ULL);
3783
3784 asm("bic %0.2s, #0x22, lsl #16" : "=w"(res) : "0"(res));
3785
3786 ASSERT_EQ(res, MakeUInt128(0x2945867242092465ULL, 0x0000000000000000ULL));
3787 }
3788
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy24)3789 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy24) {
3790 __uint128_t res = MakeUInt128(0x0706977942236250ULL, 0x8221688957383798ULL);
3791
3792 asm("bic %0.2s, #0x83, lsl #24" : "=w"(res) : "0"(res));
3793
3794 ASSERT_EQ(res, MakeUInt128(0x0406977940236250ULL, 0x0000000000000000ULL));
3795 }
3796
TEST(Arm64InsnTest,OrInt16x4Imm)3797 TEST(Arm64InsnTest, OrInt16x4Imm) {
3798 __uint128_t res = MakeUInt128(0x0841284886269456ULL, 0x0424196528502221ULL);
3799
3800 asm("orr %0.4h, #0x5" : "=w"(res) : "0"(res));
3801
3802 ASSERT_EQ(res, MakeUInt128(0x0845284d86279457ULL, 0x0000000000000000ULL));
3803 }
3804
TEST(Arm64InsnTest,OrNotInt8x16)3805 TEST(Arm64InsnTest, OrNotInt8x16) {
3806 __uint128_t op1 = MakeUInt128(0x5428584447952658ULL, 0x6782105114135473ULL);
3807 __uint128_t op2 = MakeUInt128(0x3558764024749647ULL, 0x3263914199272604ULL);
3808 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orn %0.16b, %1.16b, %2.16b")(op1, op2);
3809 ASSERT_EQ(rd, MakeUInt128(0xdeafd9ffdf9f6ff8ULL, 0xef9e7eff76dbddfbULL));
3810 }
3811
TEST(Arm64InsnTest,OrNotInt8x8)3812 TEST(Arm64InsnTest, OrNotInt8x8) {
3813 __uint128_t op1 = MakeUInt128(0x3279178608578438ULL, 0x3827967976681454ULL);
3814 __uint128_t op2 = MakeUInt128(0x6838689427741559ULL, 0x9185592524595395ULL);
3815 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orn %0.8b, %1.8b, %2.8b")(op1, op2);
3816 ASSERT_EQ(rd, MakeUInt128(0xb7ff97efd8dfeebeULL, 0x0000000000000000ULL));
3817 }
3818
TEST(Arm64InsnTest,BitwiseSelectInt8x8)3819 TEST(Arm64InsnTest, BitwiseSelectInt8x8) {
3820 __uint128_t op1 = MakeUInt128(0x2000568127145263ULL, 0x5608277857713427ULL);
3821 __uint128_t op2 = MakeUInt128(0x0792279689258923ULL, 0x5420199561121290ULL);
3822 __uint128_t op3 = MakeUInt128(0x8372978049951059ULL, 0x7317328160963185ULL);
3823 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bsl %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3824 ASSERT_EQ(res, MakeUInt128(0x0480369681349963ULL, 0x0000000000000000ULL));
3825 }
3826
TEST(Arm64InsnTest,BitwiseInsertIfTrueInt8x8)3827 TEST(Arm64InsnTest, BitwiseInsertIfTrueInt8x8) {
3828 __uint128_t op1 = MakeUInt128(0x3678925903600113ULL, 0x3053054882046652ULL);
3829 __uint128_t op2 = MakeUInt128(0x9326117931051185ULL, 0x4807446237996274ULL);
3830 __uint128_t op3 = MakeUInt128(0x6430860213949463ULL, 0x9522473719070217ULL);
3831 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bit %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3832 ASSERT_EQ(res, MakeUInt128(0x7630965b03908563ULL, 0x0000000000000000ULL));
3833 }
3834
TEST(Arm64InsnTest,BitwiseInsertIfFalseInt8x8)3835 TEST(Arm64InsnTest, BitwiseInsertIfFalseInt8x8) {
3836 __uint128_t op1 = MakeUInt128(0x7067982148086513ULL, 0x2823066470938446ULL);
3837 __uint128_t op2 = MakeUInt128(0x5964462294895493ULL, 0x0381964428810975ULL);
3838 __uint128_t op3 = MakeUInt128(0x0348610454326648ULL, 0x2133936072602491ULL);
3839 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bif %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3840 ASSERT_EQ(res, MakeUInt128(0x2143d8015c006500ULL, 0x0000000000000000ULL));
3841 }
3842
TEST(Arm64InsnTest,ArithmeticShiftRightInt64x1)3843 TEST(Arm64InsnTest, ArithmeticShiftRightInt64x1) {
3844 __uint128_t arg = MakeUInt128(0x9486015046652681ULL, 0x4398770516153170ULL);
3845 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %d0, %d1, #39")(arg);
3846 ASSERT_EQ(res, MakeUInt128(0xffffffffff290c02ULL, 0x0000000000000000ULL));
3847 }
3848
TEST(Arm64InsnTest,ArithmeticShiftRightBy64Int64x1)3849 TEST(Arm64InsnTest, ArithmeticShiftRightBy64Int64x1) {
3850 __uint128_t arg = MakeUInt128(0x9176042601763387ULL, 0x0454990176143641ULL);
3851 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %d0, %d1, #64")(arg);
3852 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3853 }
3854
TEST(Arm64InsnTest,ArithmeticShiftRightInt64x2)3855 TEST(Arm64InsnTest, ArithmeticShiftRightInt64x2) {
3856 __uint128_t arg = MakeUInt128(0x7501116498327856ULL, 0x3531614516845769ULL);
3857 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %0.2d, %1.2d, #35")(arg);
3858 ASSERT_EQ(res, MakeUInt128(0x000000000ea0222cULL, 0x0000000006a62c28ULL));
3859 }
3860
TEST(Arm64InsnTest,ArithmeticShiftRightAccumulateInt64x1)3861 TEST(Arm64InsnTest, ArithmeticShiftRightAccumulateInt64x1) {
3862 __uint128_t arg1 = MakeUInt128(0x9667179643468760ULL, 0x0770479995378833ULL);
3863 __uint128_t arg2 = MakeUInt128(0x2557176908196030ULL, 0x9201824018842705ULL);
3864 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %d0, %d1, #40")(arg1, arg2);
3865 ASSERT_EQ(res, MakeUInt128(0x2557176907afc747ULL, 0x0000000000000000ULL));
3866 }
3867
TEST(Arm64InsnTest,ArithmeticShiftRightBy64AccumulateInt64x1)3868 TEST(Arm64InsnTest, ArithmeticShiftRightBy64AccumulateInt64x1) {
3869 __uint128_t arg1 = MakeUInt128(0x9223343657791601ULL, 0x2809317940171859ULL);
3870 __uint128_t arg2 = MakeUInt128(0x3498025249906698ULL, 0x4233017350358044ULL);
3871 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %d0, %d1, #64")(arg1, arg2);
3872 ASSERT_EQ(res, MakeUInt128(0x3498025249906697ULL, 0x0000000000000000ULL));
3873 }
3874
TEST(Arm64InsnTest,ArithmeticShiftRightAccumulateInt16x8)3875 TEST(Arm64InsnTest, ArithmeticShiftRightAccumulateInt16x8) {
3876 __uint128_t arg1 = MakeUInt128(0x9276457931065792ULL, 0x2955249887275846ULL);
3877 __uint128_t arg2 = MakeUInt128(0x0101655256375678ULL, 0x5667227966198857ULL);
3878 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %0.8h, %1.8h, #12")(arg1, arg2);
3879 ASSERT_EQ(res, MakeUInt128(0x00fa6556563a567dULL, 0x5669227b6611885cULL));
3880 }
3881
TEST(Arm64InsnTest,ArithmeticRoundingShiftRightAccumulateInt16x8)3882 TEST(Arm64InsnTest, ArithmeticRoundingShiftRightAccumulateInt16x8) {
3883 __uint128_t arg1 = MakeUInt128(0x9894671543578468ULL, 0x7886144458123145ULL);
3884 __uint128_t arg2 = MakeUInt128(0x1412147805734551ULL, 0x0500801908699603ULL);
3885 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %0.8h, %1.8h, #12")(arg1, arg2);
3886 ASSERT_EQ(res, MakeUInt128(0x140c147e05774549ULL, 0x0508801a086f9606ULL));
3887 }
3888
TEST(Arm64InsnTest,LogicalShiftRightInt64x1)3889 TEST(Arm64InsnTest, LogicalShiftRightInt64x1) {
3890 __uint128_t arg = MakeUInt128(0x9859771921805158ULL, 0x5321473926532515ULL);
3891 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %d0, %d1, #33")(arg);
3892 ASSERT_EQ(res, MakeUInt128(0x000000004c2cbb8cULL, 0x0000000000000000ULL));
3893 }
3894
TEST(Arm64InsnTest,LogicalShiftRightBy64Int64x1)3895 TEST(Arm64InsnTest, LogicalShiftRightBy64Int64x1) {
3896 __uint128_t arg = MakeUInt128(0x9474696134360928ULL, 0x6148494178501718ULL);
3897 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %d0, %d1, #64")(arg);
3898 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3899 }
3900
TEST(Arm64InsnTest,LogicalShiftRightInt64x2)3901 TEST(Arm64InsnTest, LogicalShiftRightInt64x2) {
3902 __uint128_t op = MakeUInt128(0x3962657978771855ULL, 0x6084552965412665ULL);
3903 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %0.2d, %1.2d, #33")(op);
3904 ASSERT_EQ(rd, MakeUInt128(0x000000001cb132bcULL, 0x0000000030422a94ULL));
3905 }
3906
TEST(Arm64InsnTest,LogicalShiftRightAccumulateInt64x1)3907 TEST(Arm64InsnTest, LogicalShiftRightAccumulateInt64x1) {
3908 __uint128_t arg1 = MakeUInt128(0x9004112453790153ULL, 0x3296615697052237ULL);
3909 __uint128_t arg2 = MakeUInt128(0x0499939532215362ULL, 0x2748476603613677ULL);
3910 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %d0, %d1, #40")(arg1, arg2);
3911 ASSERT_EQ(res, MakeUInt128(0x0499939532b15773ULL, 0x0000000000000000ULL));
3912 }
3913
TEST(Arm64InsnTest,LogicalShiftRightBy64AccumulateInt64x1)3914 TEST(Arm64InsnTest, LogicalShiftRightBy64AccumulateInt64x1) {
3915 __uint128_t arg1 = MakeUInt128(0x9886592578662856ULL, 0x1249665523533829ULL);
3916 __uint128_t arg2 = MakeUInt128(0x3559152534784459ULL, 0x8183134112900199ULL);
3917 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %d0, %d1, #64")(arg1, arg2);
3918 ASSERT_EQ(res, MakeUInt128(0x3559152534784459ULL, 0x0000000000000000ULL));
3919 }
3920
TEST(Arm64InsnTest,LogicalShiftRightAccumulateInt16x8)3921 TEST(Arm64InsnTest, LogicalShiftRightAccumulateInt16x8) {
3922 __uint128_t arg1 = MakeUInt128(0x9984345225161050ULL, 0x7027056235266012ULL);
3923 __uint128_t arg2 = MakeUInt128(0x4628654036036745ULL, 0x3286510570658748ULL);
3924 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %0.8h, %1.8h, #12")(arg1, arg2);
3925 ASSERT_EQ(res, MakeUInt128(0x4631654336056746ULL, 0x328d51057068874eULL));
3926 }
3927
TEST(Arm64InsnTest,LogicalRoundingShiftRightAccumulateInt16x8)3928 TEST(Arm64InsnTest, LogicalRoundingShiftRightAccumulateInt16x8) {
3929 __uint128_t arg1 = MakeUInt128(0x9843452251610507ULL, 0x0270562352660127ULL);
3930 __uint128_t arg2 = MakeUInt128(0x6286540360367453ULL, 0x2865105706587488ULL);
3931 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %0.8h, %1.8h, #12")(arg1, arg2);
3932 ASSERT_EQ(res, MakeUInt128(0x62805407603b7453ULL, 0x2865105c065d7488ULL));
3933 }
3934
TEST(Arm64InsnTest,SignedRoundingShiftRightInt64x1)3935 TEST(Arm64InsnTest, SignedRoundingShiftRightInt64x1) {
3936 __uint128_t arg = MakeUInt128(0x9323685785585581ULL, 0x9555604215625088ULL);
3937 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("srshr %d0, %d1, #40")(arg);
3938 ASSERT_EQ(res, MakeUInt128(0xffffffffff932368ULL, 0x0000000000000000ULL));
3939 }
3940
TEST(Arm64InsnTest,SignedRoundingShiftRightInt64x2)3941 TEST(Arm64InsnTest, SignedRoundingShiftRightInt64x2) {
3942 __uint128_t arg = MakeUInt128(0x8714878398908107ULL, 0x4295309410605969ULL);
3943 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("srshr %0.2d, %1.2d, #36")(arg);
3944 ASSERT_EQ(res, MakeUInt128(0xfffffffff8714878ULL, 0x0000000004295309ULL));
3945 }
3946
TEST(Arm64InsnTest,SignedRoundingShiftRightAccumulateInt64x1)3947 TEST(Arm64InsnTest, SignedRoundingShiftRightAccumulateInt64x1) {
3948 __uint128_t arg1 = MakeUInt128(0x9946016520577405ULL, 0x2942305360178031ULL);
3949 __uint128_t arg2 = MakeUInt128(0x3960188013782542ULL, 0x1927094767337191ULL);
3950 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %d0, %d1, #33")(arg1, arg2);
3951 ASSERT_EQ(res, MakeUInt128(0x3960187fe01b25f5ULL, 0x0000000000000000ULL));
3952 }
3953
TEST(Arm64InsnTest,UnsignedRoundingShiftRightInt64x1)3954 TEST(Arm64InsnTest, UnsignedRoundingShiftRightInt64x1) {
3955 __uint128_t arg = MakeUInt128(0x9713552208445285ULL, 0x2640081252027665ULL);
3956 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urshr %d0, %d1, #33")(arg);
3957 ASSERT_EQ(res, MakeUInt128(0x000000004b89aa91ULL, 0x0000000000000000ULL));
3958 }
3959
TEST(Arm64InsnTest,UnsignedRoundingShiftRightInt64x2)3960 TEST(Arm64InsnTest, UnsignedRoundingShiftRightInt64x2) {
3961 __uint128_t arg = MakeUInt128(0x6653398573888786ULL, 0x6147629443414010ULL);
3962 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urshr %0.2d, %1.2d, #34")(arg);
3963 ASSERT_EQ(res, MakeUInt128(0x000000001994ce61ULL, 0x000000001851d8a5ULL));
3964 }
3965
TEST(Arm64InsnTest,UnsignedRoundingShiftRightAccumulateInt64x1)3966 TEST(Arm64InsnTest, UnsignedRoundingShiftRightAccumulateInt64x1) {
3967 __uint128_t arg1 = MakeUInt128(0x9616143204006381ULL, 0x3224658411111577ULL);
3968 __uint128_t arg2 = MakeUInt128(0x7184728147519983ULL, 0x5050478129771859ULL);
3969 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ursra %d0, %d1, #33")(arg1, arg2);
3970 ASSERT_EQ(res, MakeUInt128(0x71847281925ca39cULL, 0x0000000000000000ULL));
3971 }
3972
TEST(Arm64InsnTest,ShiftLeftInt64x1)3973 TEST(Arm64InsnTest, ShiftLeftInt64x1) {
3974 __uint128_t arg = MakeUInt128(0x3903594664691623ULL, 0x5396809201394578ULL);
3975 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %d0, %d1, #35")(arg);
3976 ASSERT_EQ(res, MakeUInt128(0x2348b11800000000ULL, 0x0000000000000000ULL));
3977 }
3978
TEST(Arm64InsnTest,ShiftLeftInt64x2)3979 TEST(Arm64InsnTest, ShiftLeftInt64x2) {
3980 __uint128_t arg = MakeUInt128(0x0750111649832785ULL, 0x6353161451684576ULL);
3981 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %0.2d, %1.2d, #37")(arg);
3982 ASSERT_EQ(res, MakeUInt128(0x3064f0a000000000ULL, 0x2d08aec000000000ULL));
3983 }
3984
TEST(Arm64InsnTest,ShiftLeftInt8x8)3985 TEST(Arm64InsnTest, ShiftLeftInt8x8) {
3986 __uint128_t arg = MakeUInt128(0x0402956047346131ULL, 0x1382638788975517ULL);
3987 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %0.8b, %1.8b, #6")(arg);
3988 ASSERT_EQ(res, MakeUInt128(0x00804000c0004040ULL, 0x0000000000000000ULL));
3989 }
3990
TEST(Arm64InsnTest,ShiftRightInsertInt64x1)3991 TEST(Arm64InsnTest, ShiftRightInsertInt64x1) {
3992 __uint128_t arg1 = MakeUInt128(0x9112232618794059ULL, 0x9415540632701319ULL);
3993 __uint128_t arg2 = MakeUInt128(0x1537675115830432ULL, 0x0849872092028092ULL);
3994 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %d0, %d1, #20")(arg1, arg2);
3995 ASSERT_EQ(res, MakeUInt128(0x1537691122326187ULL, 0x0000000000000000ULL));
3996 }
3997
TEST(Arm64InsnTest,ShiftRightInsertInt64x2)3998 TEST(Arm64InsnTest, ShiftRightInsertInt64x2) {
3999 __uint128_t arg1 = MakeUInt128(0x7332335603484653ULL, 0x1873029302665964ULL);
4000 __uint128_t arg2 = MakeUInt128(0x5013718375428897ULL, 0x5579714499246540ULL);
4001 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %0.2d, %1.2d, #21")(arg1, arg2);
4002 ASSERT_EQ(res, MakeUInt128(0x50137399919ab01aULL, 0x557970c398149813ULL));
4003 }
4004
TEST(Arm64InsnTest,ShiftLeftInsertInt64x1)4005 TEST(Arm64InsnTest, ShiftLeftInsertInt64x1) {
4006 __uint128_t arg1 = MakeUInt128(0x3763526969344354ULL, 0x4004730671988689ULL);
4007 __uint128_t arg2 = MakeUInt128(0x6369498567302175ULL, 0x2313252926537589ULL);
4008 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sli %d0, %d1, #23")(arg1, arg2);
4009 ASSERT_EQ(res, MakeUInt128(0x34b49a21aa302175ULL, 0x0000000000000000ULL));
4010 }
4011
TEST(Arm64InsnTest,ShiftLeftInsertInt64x2)4012 TEST(Arm64InsnTest, ShiftLeftInsertInt64x2) {
4013 __uint128_t arg1 = MakeUInt128(0x3270206902872323ULL, 0x3005386216347988ULL);
4014 __uint128_t arg2 = MakeUInt128(0x5094695472004795ULL, 0x2311201504329322ULL);
4015 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sli %0.2d, %1.2d, #21")(arg1, arg2);
4016 ASSERT_EQ(res, MakeUInt128(0x0d2050e464604795ULL, 0x0c42c68f31129322ULL));
4017 }
4018
TEST(Arm64InsnTest,ShiftLeftLongInt8x8)4019 TEST(Arm64InsnTest, ShiftLeftLongInt8x8) {
4020 __uint128_t arg = MakeUInt128(0x2650697620201995ULL, 0x5484126500053944ULL);
4021 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shll %0.8h, %1.8b, #8")(arg);
4022 ASSERT_EQ(res, MakeUInt128(0x2000200019009500ULL, 0x2600500069007600ULL));
4023 }
4024
TEST(Arm64InsnTest,ShiftLeftLongInt8x8Upper)4025 TEST(Arm64InsnTest, ShiftLeftLongInt8x8Upper) {
4026 __uint128_t arg = MakeUInt128(0x9050429225978771ULL, 0x0667873840000616ULL);
4027 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shll2 %0.8h, %1.16b, #8")(arg);
4028 ASSERT_EQ(res, MakeUInt128(0x4000000006001600ULL, 0x0600670087003800ULL));
4029 }
4030
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2)4031 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2) {
4032 __uint128_t arg = MakeUInt128(0x9075407923424023ULL, 0x0092590070173196ULL);
4033 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshll %0.2d, %1.2s, #9")(arg);
4034 ASSERT_EQ(res, MakeUInt128(0x0000004684804600ULL, 0xffffff20ea80f200ULL));
4035 }
4036
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2Upper)4037 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2Upper) {
4038 __uint128_t arg = MakeUInt128(0x9382432227188515ULL, 0x9740547021482897ULL);
4039 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshll2 %0.2d, %1.4s, #9")(arg);
4040 ASSERT_EQ(res, MakeUInt128(0x0000004290512e00ULL, 0xffffff2e80a8e000ULL));
4041 }
4042
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2By0)4043 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2By0) {
4044 __uint128_t arg = MakeUInt128(0x9008777697763127ULL, 0x9572267265556259ULL);
4045 // SXTL is an alias for SSHLL for the shift count being zero.
4046 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sxtl %0.2d, %1.2s")(arg);
4047 ASSERT_EQ(res, MakeUInt128(0xffffffff97763127ULL, 0xffffffff90087776ULL));
4048 }
4049
TEST(Arm64InsnTest,ShiftLeftLongInt32x2)4050 TEST(Arm64InsnTest, ShiftLeftLongInt32x2) {
4051 __uint128_t arg = MakeUInt128(0x9094334676851422ULL, 0x1447737939375170ULL);
4052 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll %0.2d, %1.2s, #9")(arg);
4053 ASSERT_EQ(res, MakeUInt128(0x000000ed0a284400ULL, 0x0000012128668c00ULL));
4054 }
4055
TEST(Arm64InsnTest,ShiftLeftLongInt32x2Upper)4056 TEST(Arm64InsnTest, ShiftLeftLongInt32x2Upper) {
4057 __uint128_t arg = MakeUInt128(0x7096834080053559ULL, 0x8491754173818839ULL);
4058 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll2 %0.2d, %1.4s, #17")(arg);
4059 ASSERT_EQ(res, MakeUInt128(0x0000e70310720000ULL, 0x00010922ea820000ULL));
4060 }
4061
TEST(Arm64InsnTest,ShiftLeftLongInt32x2By0)4062 TEST(Arm64InsnTest, ShiftLeftLongInt32x2By0) {
4063 __uint128_t arg = MakeUInt128(0x9945681506526530ULL, 0x5371829412703369ULL);
4064 // UXTL is an alias for USHLL for the shift count being zero.
4065 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uxtl %0.2d, %1.2s")(arg);
4066 ASSERT_EQ(res, MakeUInt128(0x0000000006526530ULL, 0x0000000099456815ULL));
4067 }
4068
TEST(Arm64InsnTest,ShiftRightNarrowI16x8)4069 TEST(Arm64InsnTest, ShiftRightNarrowI16x8) {
4070 __uint128_t arg = MakeUInt128(0x9378541786109696ULL, 0x9202538865034577ULL);
4071 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shrn %0.8b, %1.8h, #2")(arg);
4072 ASSERT_EQ(res, MakeUInt128(0x80e2405dde0584a5ULL, 0x0000000000000000ULL));
4073 }
4074
TEST(Arm64InsnTest,ShiftRightNarrowI16x8Upper)4075 TEST(Arm64InsnTest, ShiftRightNarrowI16x8Upper) {
4076 __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
4077 __uint128_t arg2 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
4078 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("shrn2 %0.16b, %1.8h, #2")(arg1, arg2);
4079 ASSERT_EQ(res, MakeUInt128(0x3879158299848645ULL, 0xd8988dc1de009890ULL));
4080 }
4081
TEST(Arm64InsnTest,RoundingShiftRightNarrowI16x8)4082 TEST(Arm64InsnTest, RoundingShiftRightNarrowI16x8) {
4083 __uint128_t arg = MakeUInt128(0x9303774688099929ULL, 0x6877582441047878ULL);
4084 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rshrn %0.8b, %1.8h, #2")(arg);
4085 ASSERT_EQ(res, MakeUInt128(0x1e09411ec1d2024aULL, 0x0000000000000000ULL));
4086 }
4087
TEST(Arm64InsnTest,RoundingShiftRightNarrowI16x8Upper)4088 TEST(Arm64InsnTest, RoundingShiftRightNarrowI16x8Upper) {
4089 __uint128_t arg1 = MakeUInt128(0x9314507607167064ULL, 0x3556827437743965ULL);
4090 __uint128_t arg2 = MakeUInt128(0x2103098604092717ULL, 0x0909512808630902ULL);
4091 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("rshrn2 %0.16b, %1.8h, #2")(arg1, arg2);
4092 ASSERT_EQ(res, MakeUInt128(0x2103098604092717ULL, 0x569ddd59c51ec619ULL));
4093 }
4094
TEST(Arm64InsnTest,AddInt64x1)4095 TEST(Arm64InsnTest, AddInt64x1) {
4096 __uint128_t arg1 = MakeUInt128(0x0080000000000003ULL, 0xdeadbeef01234567ULL);
4097 __uint128_t arg2 = MakeUInt128(0x0080000000000005ULL, 0x0123deadbeef4567ULL);
4098 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %d0, %d1, %d2")(arg1, arg2);
4099 ASSERT_EQ(res, MakeUInt128(0x0100000000000008ULL, 0x0ULL));
4100 }
4101
TEST(Arm64InsnTest,AddInt32x4)4102 TEST(Arm64InsnTest, AddInt32x4) {
4103 // The "add" below adds two vectors, each with four 32-bit elements. We set the sign
4104 // bit for each element to verify that the carry does not affect any lane.
4105 __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4106 __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4107 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.4s, %1.4s, %2.4s")(op1, op2);
4108 ASSERT_EQ(rd, MakeUInt128(0x0000000700000003ULL, 0x0000000f0000000bULL));
4109 }
4110
TEST(Arm64InsnTest,AddInt32x2)4111 TEST(Arm64InsnTest, AddInt32x2) {
4112 __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4113 __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4114 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.2s, %1.2s, %2.2s")(op1, op2);
4115 ASSERT_EQ(rd, MakeUInt128(0x0000000700000003ULL, 0));
4116 }
4117
TEST(Arm64InsnTest,AddInt64x2)4118 TEST(Arm64InsnTest, AddInt64x2) {
4119 __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4120 __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4121 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.2d, %1.2d, %2.2d")(op1, op2);
4122 ASSERT_EQ(rd, MakeUInt128(0x0000000800000003ULL, 0x000000100000000bULL));
4123 }
4124
TEST(Arm64InsnTest,SubInt64x1)4125 TEST(Arm64InsnTest, SubInt64x1) {
4126 __uint128_t arg1 = MakeUInt128(0x0000000000000002ULL, 0x0011223344556677ULL);
4127 __uint128_t arg2 = MakeUInt128(0x0000000000000003ULL, 0x0123456789abcdefULL);
4128 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %d0, %d1, %d2")(arg1, arg2);
4129 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0ULL));
4130 }
4131
TEST(Arm64InsnTest,SubInt64x2)4132 TEST(Arm64InsnTest, SubInt64x2) {
4133 constexpr auto AsmSub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.2d, %1.2d, %2.2d");
4134 __uint128_t arg1 = MakeUInt128(0x6873115956286388ULL, 0x2353787593751957ULL);
4135 __uint128_t arg2 = MakeUInt128(0x7818577805321712ULL, 0x2680661300192787ULL);
4136 __uint128_t res = AsmSub(arg1, arg2);
4137 ASSERT_EQ(res, MakeUInt128(0xf05ab9e150f64c76ULL, 0xfcd31262935bf1d0ULL));
4138 }
4139
TEST(Arm64InsnTest,SubInt16x4)4140 TEST(Arm64InsnTest, SubInt16x4) {
4141 __uint128_t arg1 = MakeUInt128(0x8888777766665555ULL, 0);
4142 __uint128_t arg2 = MakeUInt128(0x1111222233334444ULL, 0);
4143 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.4h, %1.4h, %2.4h")(arg1, arg2);
4144 ASSERT_EQ(res, MakeUInt128(0x7777555533331111ULL, 0));
4145 }
4146
TEST(Arm64InsnTest,MultiplyI8x8)4147 TEST(Arm64InsnTest, MultiplyI8x8) {
4148 __uint128_t arg1 = MakeUInt128(0x5261365549781893ULL, 0x1297848216829989ULL);
4149 __uint128_t arg2 = MakeUInt128(0x4542858444795265ULL, 0x8678210511413547ULL);
4150 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("mul %0.8b, %1.8b, %2.8b")(arg1, arg2);
4151 ASSERT_EQ(res, MakeUInt128(0x1a020ed464b8b0ffULL, 0x0000000000000000ULL));
4152 }
4153
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8)4154 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8) {
4155 __uint128_t arg1 = MakeUInt128(0x5848406353422072ULL, 0x2258284886481584ULL);
4156 __uint128_t arg2 = MakeUInt128(0x7823986456596116ULL, 0x3548862305774564ULL);
4157 __uint128_t arg3 = MakeUInt128(0x8797108931456691ULL, 0x3686722874894056ULL);
4158 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.8b, %1.8b, %2.8b")(arg1, arg2, arg3);
4159 ASSERT_EQ(res, MakeUInt128(0xc76f10351337865dULL, 0x0000000000000000ULL));
4160 }
4161
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8IndexedElem)4162 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8IndexedElem) {
4163 __uint128_t arg1 = MakeUInt128(0x4143334547762416ULL, 0x8625189835694855ULL);
4164 __uint128_t arg2 = MakeUInt128(0x5346462080466842ULL, 0x5906949129331367ULL);
4165 __uint128_t arg3 = MakeUInt128(0x0355876402474964ULL, 0x7326391419927260ULL);
4166 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.4h, %1.4h, %2.h[0]")(arg1, arg2, arg3);
4167 ASSERT_EQ(res, MakeUInt128(0x0e9bc72e5eb38710ULL, 0x0000000000000000ULL));
4168 }
4169
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8IndexedElemPosition2)4170 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8IndexedElemPosition2) {
4171 __uint128_t arg1 = MakeUInt128(0x1431429809190659ULL, 0x2509372216964615ULL);
4172 __uint128_t arg2 = MakeUInt128(0x2686838689427741ULL, 0x5599185592524595ULL);
4173 __uint128_t arg3 = MakeUInt128(0x6099124608051243ULL, 0x8843904512441365ULL);
4174 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.2s, %1.2s, %2.s[2]")(arg1, arg2, arg3);
4175 ASSERT_EQ(res, MakeUInt128(0x6ce7ccbedccdc110ULL, 0x0000000000000000ULL));
4176 }
4177
TEST(Arm64InsnTest,MultiplyAndSubtractI8x8IndexedElem)4178 TEST(Arm64InsnTest, MultiplyAndSubtractI8x8IndexedElem) {
4179 __uint128_t arg1 = MakeUInt128(0x8297455570674983ULL, 0x8505494588586926ULL);
4180 __uint128_t arg2 = MakeUInt128(0x6549911988183479ULL, 0x7753566369807426ULL);
4181 __uint128_t arg3 = MakeUInt128(0x4524919217321721ULL, 0x4772350141441973ULL);
4182 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mls %0.4h, %1.4h, %2.h[1]")(arg1, arg2, arg3);
4183 ASSERT_EQ(res, MakeUInt128(0xcefce99ad58a9ad9ULL, 0x0000000000000000ULL));
4184 }
4185
TEST(Arm64InsnTest,MultiplyAndSubtractI8x8)4186 TEST(Arm64InsnTest, MultiplyAndSubtractI8x8) {
4187 __uint128_t arg1 = MakeUInt128(0x0635342207222582ULL, 0x8488648158456028ULL);
4188 __uint128_t arg2 = MakeUInt128(0x9864565961163548ULL, 0x8623057745649803ULL);
4189 __uint128_t arg3 = MakeUInt128(0x1089314566913686ULL, 0x7228748940560101ULL);
4190 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mls %0.8b, %1.8b, %2.8b")(arg1, arg2, arg3);
4191 ASSERT_EQ(res, MakeUInt128(0x80d5b973bfa58df6ULL, 0x0000000000000000ULL));
4192 }
4193
TEST(Arm64InsnTest,MultiplyI32x4IndexedElem)4194 TEST(Arm64InsnTest, MultiplyI32x4IndexedElem) {
4195 __uint128_t arg1 = MakeUInt128(0x143334547762416ULL, 0x8625189835694855ULL);
4196 __uint128_t arg2 = MakeUInt128(0x627232791786085ULL, 0x7843838279679766ULL);
4197 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("mul %0.4s, %1.4s, %2.s[1]")(arg1, arg2);
4198 ASSERT_EQ(res, MakeUInt128(0xcec23e830d48815aULL, 0xd12b87288ae0a3f3ULL));
4199 }
4200
TEST(Arm64InsnTest,PolynomialMultiplyU8x8)4201 TEST(Arm64InsnTest, PolynomialMultiplyU8x8) {
4202 __uint128_t arg1 = MakeUInt128(0x1862056476931257ULL, 0x0586356620185581ULL);
4203 __uint128_t arg2 = MakeUInt128(0x1668039626579787ULL, 0x7185560845529654ULL);
4204 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmul %0.8b, %1.8b, %2.8b")(arg1, arg2);
4205 ASSERT_EQ(res, MakeUInt128(0xd0d00f18f4095e25ULL, 0x0000000000000000ULL));
4206 }
4207
TEST(Arm64InsnTest,PolynomialMultiplyLongU8x8)4208 TEST(Arm64InsnTest, PolynomialMultiplyLongU8x8) {
4209 __uint128_t arg1 = MakeUInt128(0x1327656180937734ULL, 0x4403070746921120ULL);
4210 __uint128_t arg2 = MakeUInt128(0x9838952286847831ULL, 0x2355265821314495ULL);
4211 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull %0.8h, %1.8b, %2.8b")(arg1, arg2);
4212 ASSERT_EQ(res, MakeUInt128(0x43004bcc17e805f4ULL, 0x082807a835210ce2ULL));
4213 }
4214
TEST(Arm64InsnTest,PolynomialMultiplyLongU8x8Upper)4215 TEST(Arm64InsnTest, PolynomialMultiplyLongU8x8Upper) {
4216 __uint128_t arg1 = MakeUInt128(0x4439658253375438ULL, 0x8569094113031509ULL);
4217 __uint128_t arg2 = MakeUInt128(0x1865619673378623ULL, 0x6256125216320862ULL);
4218 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
4219 ASSERT_EQ(res, MakeUInt128(0x015a005600a80372ULL, 0x30ea1da6008214d2ULL));
4220 }
4221
TEST(Arm64InsnTest,PolynomialMultiplyLongU64x2)4222 TEST(Arm64InsnTest, PolynomialMultiplyLongU64x2) {
4223 __uint128_t arg1 = MakeUInt128(0x1000100010001000ULL, 0xffffeeeeffffeeeeULL);
4224 __uint128_t arg2 = MakeUInt128(0x10001ULL, 0xffffeeeeffffeeeeULL);
4225 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull %0.1q, %1.1d, %2.1d")(arg1, arg2);
4226 ASSERT_EQ(res, MakeUInt128(0x1000ULL, 0x1000ULL));
4227 }
4228
TEST(Arm64InsnTest,PolynomialMultiplyLongU64x2Upper)4229 TEST(Arm64InsnTest, PolynomialMultiplyLongU64x2Upper) {
4230 __uint128_t arg1 = MakeUInt128(0xffffeeeeffffeeeeULL, 0x1000100010001000ULL);
4231 __uint128_t arg2 = MakeUInt128(0xffffeeeeffffeeeeULL, 0x10001ULL);
4232 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull2 %0.1q, %1.2d, %2.2d")(arg1, arg2);
4233 ASSERT_EQ(res, MakeUInt128(0x1000ULL, 0x1000ULL));
4234 }
4235
TEST(Arm64InsnTest,PairwiseAddInt8x16)4236 TEST(Arm64InsnTest, PairwiseAddInt8x16) {
4237 __uint128_t op1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
4238 __uint128_t op2 = MakeUInt128(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL);
4239 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.16b, %1.16b, %2.16b")(op1, op2);
4240 ASSERT_EQ(rd, MakeUInt128(0xeda96521dd995511ULL, 0x1d1915110d090501ULL));
4241 }
4242
TEST(Arm64InsnTest,PairwiseAddInt8x8)4243 TEST(Arm64InsnTest, PairwiseAddInt8x8) {
4244 __uint128_t op1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
4245 __uint128_t op2 = MakeUInt128(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL);
4246 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.8b, %1.8b, %2.8b")(op1, op2);
4247 ASSERT_EQ(rd, MakeUInt128(0x0d090501dd995511ULL, 0));
4248 }
4249
TEST(Arm64InsnTest,PairwiseAddInt64x2)4250 TEST(Arm64InsnTest, PairwiseAddInt64x2) {
4251 __uint128_t op1 = MakeUInt128(1ULL, 2ULL);
4252 __uint128_t op2 = MakeUInt128(3ULL, 4ULL);
4253 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.2d, %1.2d, %2.2d")(op1, op2);
4254 ASSERT_EQ(rd, MakeUInt128(3ULL, 7ULL));
4255 }
4256
TEST(Arm64InsnTest,CompareEqualInt8x16)4257 TEST(Arm64InsnTest, CompareEqualInt8x16) {
4258 __uint128_t op1 = MakeUInt128(0x9375195778185778ULL, 0x0532171226806613ULL);
4259 __uint128_t op2 = MakeUInt128(0x9371595778815787ULL, 0x0352172126068613ULL);
4260 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.16b, %1.16b, %2.16b")(op1, op2);
4261 ASSERT_EQ(rd, MakeUInt128(0xff0000ffff00ff00ULL, 0x0000ff00ff0000ffULL));
4262 }
4263
TEST(Arm64InsnTest,CompareEqualInt8x8)4264 TEST(Arm64InsnTest, CompareEqualInt8x8) {
4265 __uint128_t op1 = MakeUInt128(0x9375195778185778ULL, 0x0532171226806613ULL);
4266 __uint128_t op2 = MakeUInt128(0x9371595778815787ULL, 0x0352172126068613ULL);
4267 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.8b, %1.8b, %2.8b")(op1, op2);
4268 ASSERT_EQ(rd, MakeUInt128(0xff0000ffff00ff00ULL, 0));
4269 }
4270
TEST(Arm64InsnTest,CompareEqualInt16x4)4271 TEST(Arm64InsnTest, CompareEqualInt16x4) {
4272 __uint128_t op1 = MakeUInt128(0x4444333322221111ULL, 0);
4273 __uint128_t op2 = MakeUInt128(0x8888333300001111ULL, 0);
4274 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.4h, %1.4h, %2.4h")(op1, op2);
4275 ASSERT_EQ(rd, MakeUInt128(0x0000ffff0000ffffULL, 0));
4276 }
4277
TEST(Arm64InsnTest,CompareEqualInt64x1)4278 TEST(Arm64InsnTest, CompareEqualInt64x1) {
4279 constexpr auto AsmCmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %d0, %d1, %d2");
4280 __uint128_t arg1 = MakeUInt128(0x8297455570674983ULL, 0x8505494588586926ULL);
4281 __uint128_t arg2 = MakeUInt128(0x0665499119881834ULL, 0x7977535663698074ULL);
4282 __uint128_t arg3 = MakeUInt128(0x8297455570674983ULL, 0x1452491921732172ULL);
4283 ASSERT_EQ(AsmCmeq(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4284 ASSERT_EQ(AsmCmeq(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4285 }
4286
TEST(Arm64InsnTest,CompareEqualZeroInt64x1)4287 TEST(Arm64InsnTest, CompareEqualZeroInt64x1) {
4288 constexpr auto AsmCmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %d0, %d1, #0");
4289 __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4290 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x1746089232839170ULL);
4291 ASSERT_EQ(AsmCmeq(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4292 ASSERT_EQ(AsmCmeq(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4293 }
4294
TEST(Arm64InsnTest,CompareEqualZeroInt8x16)4295 TEST(Arm64InsnTest, CompareEqualZeroInt8x16) {
4296 __uint128_t op = MakeUInt128(0x0000555500332200ULL, 0x0000000077001100ULL);
4297 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %0.16b, %1.16b, #0")(op);
4298 ASSERT_EQ(rd, MakeUInt128(0xffff0000ff0000ffULL, 0xffffffff00ff00ffULL));
4299 }
4300
TEST(Arm64InsnTest,CompareEqualZeroInt8x8)4301 TEST(Arm64InsnTest, CompareEqualZeroInt8x8) {
4302 __uint128_t op = MakeUInt128(0x001122330000aaaaULL, 0xdeadbeef0000cafeULL);
4303 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %0.8b, %1.8b, #0")(op);
4304 ASSERT_EQ(rd, MakeUInt128(0xff000000ffff0000ULL, 0));
4305 }
4306
TEST(Arm64InsnTest,CompareGreaterInt64x1)4307 TEST(Arm64InsnTest, CompareGreaterInt64x1) {
4308 constexpr auto AsmCmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %d0, %d1, %d2");
4309 __uint128_t arg1 = MakeUInt128(0x1976668559233565ULL, 0x4639138363185745ULL);
4310 __uint128_t arg2 = MakeUInt128(0x3474940784884423ULL, 0x7721751543342603ULL);
4311 __uint128_t arg3 = MakeUInt128(0x1976668559233565ULL, 0x8183196376370761ULL);
4312 __uint128_t arg4 = MakeUInt128(0x9243530136776310ULL, 0x8491351615642269ULL);
4313 ASSERT_EQ(AsmCmgt(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4314 ASSERT_EQ(AsmCmgt(arg1, arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4315 ASSERT_EQ(AsmCmgt(arg1, arg4), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4316 }
4317
TEST(Arm64InsnTest,CompareGreaterZeroInt64x1)4318 TEST(Arm64InsnTest, CompareGreaterZeroInt64x1) {
4319 constexpr auto AsmCmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %d0, %d1, #0");
4320 __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4321 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x6174599705674507ULL);
4322 __uint128_t arg3 = MakeUInt128(0x9592057668278967ULL, 0x7644531840404185ULL);
4323 ASSERT_EQ(AsmCmgt(arg1), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4324 ASSERT_EQ(AsmCmgt(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4325 ASSERT_EQ(AsmCmgt(arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4326 }
4327
TEST(Arm64InsnTest,CompareGreaterThanZeroInt8x16)4328 TEST(Arm64InsnTest, CompareGreaterThanZeroInt8x16) {
4329 __uint128_t op = MakeUInt128(0x807fff00017efe02ULL, 0xff7f80000102fe02ULL);
4330 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %0.16b, %1.16b, #0")(op);
4331 ASSERT_EQ(rd, MakeUInt128(0x00ff0000ffff00ffULL, 0x00ff0000ffff00ffULL));
4332 }
4333
TEST(Arm64InsnTest,CompareGreaterThanZeroInt8x8)4334 TEST(Arm64InsnTest, CompareGreaterThanZeroInt8x8) {
4335 __uint128_t op = MakeUInt128(0x00ff7f80017efe00ULL, 0x0000cafedeadbeefULL);
4336 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %0.8b, %1.8b, #0")(op);
4337 ASSERT_EQ(rd, MakeUInt128(0x0000ff00ffff0000ULL, 0));
4338 }
4339
TEST(Arm64InsnTest,CompareGreaterThanInt16x8)4340 TEST(Arm64InsnTest, CompareGreaterThanInt16x8) {
4341 __uint128_t arg1 = MakeUInt128(0x9789389001852956ULL, 0x9196780455448285ULL);
4342 __uint128_t arg2 = MakeUInt128(0x7269389081795897ULL, 0x5469399264218285);
4343 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %0.8h, %1.8h, %2.8h")(arg1, arg2);
4344 ASSERT_EQ(res, MakeUInt128(0x00000000ffff0000ULL, 0x0000ffff00000000ULL));
4345 }
4346
TEST(Arm64InsnTest,CompareGreaterThanInt32x4)4347 TEST(Arm64InsnTest, CompareGreaterThanInt32x4) {
4348 __uint128_t arg1 = MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL);
4349 __uint128_t arg2 = MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL);
4350 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %0.4s, %1.4s, %2.4s")(arg1, arg2);
4351 ASSERT_EQ(res, MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL));
4352 }
4353
TEST(Arm64InsnTest,CompareLessZeroInt64x1)4354 TEST(Arm64InsnTest, CompareLessZeroInt64x1) {
4355 constexpr auto AsmCmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %d0, %d1, #0");
4356 __uint128_t arg1 = MakeUInt128(0x4784264567633881ULL, 0x8807565612168960ULL);
4357 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x8955999911209916ULL);
4358 __uint128_t arg3 = MakeUInt128(0x9364610175685060ULL, 0x1671453543158148ULL);
4359 ASSERT_EQ(AsmCmlt(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4360 ASSERT_EQ(AsmCmlt(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4361 ASSERT_EQ(AsmCmlt(arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4362 }
4363
TEST(Arm64InsnTest,CompareLessThanZeroInt8x16)4364 TEST(Arm64InsnTest, CompareLessThanZeroInt8x16) {
4365 __uint128_t op = MakeUInt128(0xff00017ffe020180ULL, 0x0001027e7ffeff80ULL);
4366 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %0.16b, %1.16b, #0")(op);
4367 ASSERT_EQ(rd, MakeUInt128(0xff000000ff0000ffULL, 0x0000000000ffffffULL));
4368 }
4369
TEST(Arm64InsnTest,CompareLessThanZeroInt8x8)4370 TEST(Arm64InsnTest, CompareLessThanZeroInt8x8) {
4371 __uint128_t op = MakeUInt128(0x0002017e7fff8000ULL, 0x001100220000ffffULL);
4372 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %0.8b, %1.8b, #0")(op);
4373 ASSERT_EQ(rd, MakeUInt128(0x0000000000ffff00ULL, 0));
4374 }
4375
TEST(Arm64InsnTest,CompareGreaterThanEqualInt64x1)4376 TEST(Arm64InsnTest, CompareGreaterThanEqualInt64x1) {
4377 constexpr auto AsmCmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmge %d0, %d1, %d2");
4378 __uint128_t arg1 = MakeUInt128(0x1009391369138107ULL, 0x2581378135789400ULL);
4379 __uint128_t arg2 = MakeUInt128(0x5890939568814856ULL, 0x0263224393726562ULL);
4380 __uint128_t arg3 = MakeUInt128(0x1009391369138107ULL, 0x5511995818319637ULL);
4381 __uint128_t arg4 = MakeUInt128(0x9427141009391369ULL, 0x1381072581378135ULL);
4382 ASSERT_EQ(AsmCmge(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4383 ASSERT_EQ(AsmCmge(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4384 ASSERT_EQ(AsmCmge(arg1, arg4), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4385 }
4386
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt64x1)4387 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt64x1) {
4388 constexpr auto AsmCmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %d0, %d1, #0");
4389 __uint128_t arg1 = MakeUInt128(0x5562116715468484ULL, 0x7780394475697980ULL);
4390 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x3548487562529875ULL);
4391 __uint128_t arg3 = MakeUInt128(0x9212366168902596ULL, 0x2730430679316531ULL);
4392 ASSERT_EQ(AsmCmge(arg1), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4393 ASSERT_EQ(AsmCmge(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4394 ASSERT_EQ(AsmCmge(arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4395 }
4396
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt8x16)4397 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt8x16) {
4398 __uint128_t op = MakeUInt128(0x00ff01027ffe8002ULL, 0x80fffe7f7e020100ULL);
4399 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %0.16b, %1.16b, #0")(op);
4400 ASSERT_EQ(rd, MakeUInt128(0xff00ffffff0000ffULL, 0x000000ffffffffffULL));
4401 }
4402
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt8x8)4403 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt8x8) {
4404 __uint128_t op = MakeUInt128(0x0001027f80feff00ULL, 0x0011223344556677ULL);
4405 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %0.8b, %1.8b, #0")(op);
4406 ASSERT_EQ(rd, MakeUInt128(0xffffffff000000ffULL, 0));
4407 }
4408
TEST(Arm64InsnTest,CompareGreaterEqualInt16x8)4409 TEST(Arm64InsnTest, CompareGreaterEqualInt16x8) {
4410 __uint128_t arg1 = MakeUInt128(0x4391962838870543ULL, 0x6777432242768091ULL);
4411 __uint128_t arg2 = MakeUInt128(0x4391838548318875ULL, 0x0142432208995068ULL);
4412 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmge %0.8h, %1.8h, %2.8h")(arg1, arg2);
4413 ASSERT_EQ(res, MakeUInt128(0xffffffff0000ffffULL, 0xffffffffffff0000ULL));
4414 }
4415
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt64x1)4416 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt64x1) {
4417 constexpr auto AsmCmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %d0, %d1, #0");
4418 __uint128_t arg1 = MakeUInt128(0x3643296406335728ULL, 0x1070788758164043ULL);
4419 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x5865720227637840ULL);
4420 __uint128_t arg3 = MakeUInt128(0x8694346828590066ULL, 0x6408063140777577ULL);
4421 ASSERT_EQ(AsmCmle(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4422 ASSERT_EQ(AsmCmle(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4423 ASSERT_EQ(AsmCmle(arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4424 }
4425
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt8x16)4426 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt8x16) {
4427 __uint128_t op = MakeUInt128(0x80fffe7f7e020100ULL, 0x00ff01027ffe8002ULL);
4428 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %0.16b, %1.16b, #0")(op);
4429 ASSERT_EQ(rd, MakeUInt128(0xffffff00000000ffULL, 0xffff000000ffff00ULL));
4430 }
4431
TEST(Arm64InsnTest,CompareHigherInt64x1)4432 TEST(Arm64InsnTest, CompareHigherInt64x1) {
4433 constexpr auto AsmCmhi = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %d0, %d1, %d2");
4434 __uint128_t arg1 = MakeUInt128(0x1009391369138107ULL, 0x2581378135789400ULL);
4435 __uint128_t arg2 = MakeUInt128(0x0759167297007850ULL, 0x5807171863810549ULL);
4436 __uint128_t arg3 = MakeUInt128(0x1009391369138107ULL, 0x6026322439372656ULL);
4437 __uint128_t arg4 = MakeUInt128(0x9087839523245323ULL, 0x7896029841669225ULL);
4438 ASSERT_EQ(AsmCmhi(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4439 ASSERT_EQ(AsmCmhi(arg1, arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4440 ASSERT_EQ(AsmCmhi(arg1, arg4), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4441 }
4442
TEST(Arm64InsnTest,CompareHigherInt16x8)4443 TEST(Arm64InsnTest, CompareHigherInt16x8) {
4444 __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4445 __uint128_t arg2 = MakeUInt128(0x2057166778967764ULL, 0x4531840442045540ULL);
4446 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %0.8h, %1.8h, %2.8h")(arg1, arg2);
4447 ASSERT_EQ(res, MakeUInt128(0xffff000000000000ULL, 0x0000ffff00000000ULL));
4448 }
4449
TEST(Arm64InsnTest,CompareHigherInt32x4)4450 TEST(Arm64InsnTest, CompareHigherInt32x4) {
4451 __uint128_t arg1 = MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL);
4452 __uint128_t arg2 = MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL);
4453 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %0.4s, %1.4s, %2.4s")(arg1, arg2);
4454 ASSERT_EQ(res, MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL));
4455 }
4456
TEST(Arm64InsnTest,CompareHigherSameInt64x1)4457 TEST(Arm64InsnTest, CompareHigherSameInt64x1) {
4458 constexpr auto AsmCmhs = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhs %d0, %d1, %d2");
4459 __uint128_t arg1 = MakeUInt128(0x3529566139788848ULL, 0x6050978608595701ULL);
4460 __uint128_t arg2 = MakeUInt128(0x1769845875810446ULL, 0x6283998806006162ULL);
4461 __uint128_t arg3 = MakeUInt128(0x3529566139788848ULL, 0x9001852956919678ULL);
4462 __uint128_t arg4 = MakeUInt128(0x9628388705436777ULL, 0x4322427680913236ULL);
4463 ASSERT_EQ(AsmCmhs(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4464 ASSERT_EQ(AsmCmhs(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4465 ASSERT_EQ(AsmCmhs(arg1, arg4), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4466 }
4467
TEST(Arm64InsnTest,CompareHigherSameInt16x8)4468 TEST(Arm64InsnTest, CompareHigherSameInt16x8) {
4469 __uint128_t arg1 = MakeUInt128(0x4599705674507183ULL, 0x3206503455664403ULL);
4470 __uint128_t arg2 = MakeUInt128(0x4264705633881880ULL, 0x3206612168960504ULL);
4471 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhs %0.8h, %1.8h, %2.8h")(arg1, arg2);
4472 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0xffff00000000ffffULL));
4473 }
4474
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt8x8)4475 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt8x8) {
4476 __uint128_t op = MakeUInt128(0x00fffe807f020100ULL, 0x00aabbccddeeff00ULL);
4477 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %0.8b, %1.8b, #0")(op);
4478 ASSERT_EQ(rd, MakeUInt128(0xffffffff000000ffULL, 0));
4479 }
4480
TEST(Arm64InsnTest,TestInt64x1)4481 TEST(Arm64InsnTest, TestInt64x1) {
4482 constexpr auto AsmCmtst = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmtst %d0, %d1, %d2");
4483 __uint128_t arg1 = MakeUInt128(0xaaaaaaaa55555555ULL, 0x7698385483188750ULL);
4484 __uint128_t arg2 = MakeUInt128(0x55555555aaaaaaaaULL, 0x1429389089950685ULL);
4485 __uint128_t arg3 = MakeUInt128(0xaa00aa0055005500ULL, 0x4530765116803337ULL);
4486 ASSERT_EQ(AsmCmtst(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4487 ASSERT_EQ(AsmCmtst(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4488 }
4489
TEST(Arm64InsnTest,TestInt16x8)4490 TEST(Arm64InsnTest, TestInt16x8) {
4491 __uint128_t arg1 = MakeUInt128(0x5999911209916464ULL, 0x6441191856827700ULL);
4492 __uint128_t arg2 = MakeUInt128(0x6101756850601671ULL, 0x4535431581480105ULL);
4493 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmtst %0.8h, %1.8h, %2.8h")(arg1, arg2);
4494 ASSERT_EQ(res, MakeUInt128(0xffffffff0000ffffULL, 0xffffffff0000ffffULL));
4495 }
4496
TEST(Arm64InsnTest,ExtractVectorFromPair)4497 TEST(Arm64InsnTest, ExtractVectorFromPair) {
4498 __uint128_t op1 = MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4499 __uint128_t op2 = MakeUInt128(0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
4500 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.16b, %1.16b, %2.16b, #8")(op1, op2);
4501 ASSERT_EQ(rd, MakeUInt128(0x8899aabbccddeeffULL, 0x0001020304050607ULL));
4502 }
4503
TEST(Arm64InsnTest,ExtractVectorFromPairHalfWidth)4504 TEST(Arm64InsnTest, ExtractVectorFromPairHalfWidth) {
4505 __uint128_t op1 = MakeUInt128(0x8138268683868942ULL, 0x7741559918559252ULL);
4506 __uint128_t op2 = MakeUInt128(0x3622262609912460ULL, 0x8051243884390451ULL);
4507 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.8b, %1.8b, %2.8b, #3")(op1, op2);
4508 ASSERT_EQ(res, MakeUInt128(0x9124608138268683ULL, 0x0000000000000000ULL));
4509 }
4510
TEST(Arm64InsnTest,ExtractVectorFromPairHalfWidthPosition1)4511 TEST(Arm64InsnTest, ExtractVectorFromPairHalfWidthPosition1) {
4512 __uint128_t op1 = MakeUInt128(0x9471329621073404ULL, 0x3751895735961458ULL);
4513 __uint128_t op2 = MakeUInt128(0x9048010941214722ULL, 0x1317947647772622ULL);
4514 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.8b, %1.8b, %2.8b, #1")(op1, op2);
4515 ASSERT_EQ(res, MakeUInt128(0x2294713296210734ULL, 0x0000000000000000ULL));
4516 }
4517
TEST(Arm64InsnTest,Load1OneI8x8)4518 TEST(Arm64InsnTest, Load1OneI8x8) {
4519 static constexpr uint64_t arg = 0x8867915896904956ULL;
4520 __uint128_t res;
4521 asm("ld1 {%0.8b}, [%1]" : "=w"(res) : "r"(&arg) : "memory");
4522 ASSERT_EQ(res, arg);
4523 }
4524
TEST(Arm64InsnTest,Load1ThreeI8x8)4525 TEST(Arm64InsnTest, Load1ThreeI8x8) {
4526 static constexpr uint64_t arg[3] = {
4527 0x3415354584283376ULL, 0x4378111988556318ULL, 0x7777925372011667ULL};
4528 __uint128_t res[3];
4529 asm("ld1 {v0.8b-v2.8b}, [%3]\n\t"
4530 "mov %0.16b, v0.16b\n\t"
4531 "mov %1.16b, v1.16b\n\t"
4532 "mov %2.16b, v2.16b"
4533 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4534 : "r"(arg)
4535 : "v0", "v1", "v2", "memory");
4536 ASSERT_EQ(res[0], static_cast<__uint128_t>(arg[0]));
4537 ASSERT_EQ(res[1], static_cast<__uint128_t>(arg[1]));
4538 ASSERT_EQ(res[2], static_cast<__uint128_t>(arg[2]));
4539 }
4540
TEST(Arm64InsnTest,Load1FourI8x8)4541 TEST(Arm64InsnTest, Load1FourI8x8) {
4542 static constexpr uint64_t arg[4] = {
4543 0x9523688483099930ULL,
4544 0x2757419916463841ULL,
4545 0x4270779887088742ULL,
4546 0x2927705389122717ULL,
4547 };
4548 __uint128_t res[4];
4549 asm("ld1 {v0.8b-v3.8b}, [%4]\n\t"
4550 "mov %0.16b, v0.16b\n\t"
4551 "mov %1.16b, v1.16b\n\t"
4552 "mov %2.16b, v2.16b\n\t"
4553 "mov %3.16b, v3.16b"
4554 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
4555 : "r"(arg)
4556 : "v0", "v1", "v2", "v3", "memory");
4557 ASSERT_EQ(res[0], static_cast<__uint128_t>(arg[0]));
4558 ASSERT_EQ(res[1], static_cast<__uint128_t>(arg[1]));
4559 ASSERT_EQ(res[2], static_cast<__uint128_t>(arg[2]));
4560 ASSERT_EQ(res[3], static_cast<__uint128_t>(arg[3]));
4561 }
4562
TEST(Arm64InsnTest,Store1OneI8x16)4563 TEST(Arm64InsnTest, Store1OneI8x16) {
4564 static constexpr __uint128_t arg = MakeUInt128(0x7642291583425006ULL, 0x7361245384916067ULL);
4565 __uint128_t res;
4566 asm("st1 {%0.16b}, [%1]" : : "w"(arg), "r"(&res) : "memory");
4567 ASSERT_EQ(res, arg);
4568 }
4569
TEST(Arm64InsnTest,Store1ThreeI8x8)4570 TEST(Arm64InsnTest, Store1ThreeI8x8) {
4571 static constexpr uint64_t arg[3] = {
4572 0x3086436111389069ULL, 0x4202790881431194ULL, 0x4879941715404210ULL};
4573 uint64_t res[3];
4574 asm("mov v0.16b, %0.16b\n\t"
4575 "mov v1.16b, %1.16b\n\t"
4576 "mov v2.16b, %2.16b\n\t"
4577 "st1 {v0.8b-v2.8b}, [%3]"
4578 :
4579 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
4580 : "v0", "v1", "v2", "memory");
4581 ASSERT_EQ(res[0], arg[0]);
4582 ASSERT_EQ(res[1], arg[1]);
4583 ASSERT_EQ(res[2], arg[2]);
4584 }
4585
TEST(Arm64InsnTest,Store1FourI8x8)4586 TEST(Arm64InsnTest, Store1FourI8x8) {
4587 static constexpr uint64_t arg[4] = {
4588 0x8954750448339314ULL, 0x6896307633966572ULL, 0x2672704339321674ULL, 0x5421824557062524ULL};
4589 uint64_t res[4];
4590 asm("mov v0.16b, %0.16b\n\t"
4591 "mov v1.16b, %1.16b\n\t"
4592 "mov v2.16b, %2.16b\n\t"
4593 "mov v3.16b, %3.16b\n\t"
4594 "st1 {v0.8b-v3.8b}, [%4]"
4595 :
4596 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
4597 : "v0", "v1", "v2", "v3", "memory");
4598 ASSERT_EQ(res[0], arg[0]);
4599 ASSERT_EQ(res[1], arg[1]);
4600 ASSERT_EQ(res[2], arg[2]);
4601 ASSERT_EQ(res[3], arg[3]);
4602 }
4603
TEST(Arm64InsnTest,Load1TwoPostIndex)4604 TEST(Arm64InsnTest, Load1TwoPostIndex) {
4605 __uint128_t op0 = MakeUInt128(0x5499119881834797ULL, 0x0507922796892589ULL);
4606 __uint128_t op1 = MakeUInt128(0x0511854807446237ULL, 0x6691368672287489ULL);
4607 __uint128_t array[] = {
4608 op0,
4609 op1,
4610 };
4611 __uint128_t* addr = &array[0];
4612 __uint128_t res0 = 0;
4613 __uint128_t res1 = 0;
4614
4615 // The "memory" below ensures that the array contents are up to date. Without it, the
4616 // compiler might decide to initialize the array after the asm statement.
4617 //
4618 // We hardcode SIMD registers v0 and v1 below because there is no other way to express
4619 // consecutive registers, which in turn requires the mov instructions to retrieve the
4620 // loaded values into res0 and res1.
4621 asm("ld1 {v0.16b, v1.16b}, [%2], #32\n\t"
4622 "mov %0.16b, v0.16b\n\t"
4623 "mov %1.16b, v1.16b"
4624 : "=w"(res0), "=w"(res1), "+r"(addr)
4625 :
4626 : "v0", "v1", "memory");
4627
4628 ASSERT_EQ(res0, op0);
4629 ASSERT_EQ(res1, op1);
4630 ASSERT_EQ(addr, &array[2]);
4631 }
4632
TEST(Arm64InsnTest,Load1OnePostIndexReg)4633 TEST(Arm64InsnTest, Load1OnePostIndexReg) {
4634 static constexpr __uint128_t arg = MakeUInt128(0x4884761005564018ULL, 0x2423921926950620ULL);
4635 __uint128_t res_val;
4636 uint64_t res_addr;
4637 asm("ld1 {%0.16b}, [%1], %2"
4638 : "=w"(res_val), "=r"(res_addr)
4639 : "r"(static_cast<uint64_t>(32U)), "1"(&arg)
4640 : "memory");
4641 ASSERT_EQ(res_val, arg);
4642 ASSERT_EQ(res_addr, reinterpret_cast<uint64_t>(&arg) + 32);
4643 }
4644
TEST(Arm64InsnTest,LoadSingleInt8)4645 TEST(Arm64InsnTest, LoadSingleInt8) {
4646 static constexpr __uint128_t reg_before =
4647 MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4648 static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4649 __uint128_t reg_after;
4650 asm("ld1 {%0.b}[3], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4651 ASSERT_EQ(reg_after, MakeUInt128(0x00112233'08'556677ULL, 0x8899aabbccddeeffULL));
4652 }
4653
TEST(Arm64InsnTest,LoadSingleInt16)4654 TEST(Arm64InsnTest, LoadSingleInt16) {
4655 static constexpr __uint128_t reg_before =
4656 MakeUInt128(0x0000111122223333ULL, 0x4444555566667777ULL);
4657 static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4658 __uint128_t reg_after;
4659 asm("ld1 {%0.h}[2], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4660 ASSERT_EQ(reg_after, MakeUInt128(0x0000'0708'22223333ULL, 0x4444555566667777ULL));
4661 }
4662
TEST(Arm64InsnTest,LoadSingleInt32)4663 TEST(Arm64InsnTest, LoadSingleInt32) {
4664 static constexpr __uint128_t reg_before =
4665 MakeUInt128(0x0000000011111111ULL, 0x2222222233333333ULL);
4666 static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4667 __uint128_t reg_after;
4668 asm("ld1 {%0.s}[1], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4669 ASSERT_EQ(reg_after, MakeUInt128(0x0506070811111111ULL, 0x2222222233333333ULL));
4670 }
4671
TEST(Arm64InsnTest,LoadSingleInt64)4672 TEST(Arm64InsnTest, LoadSingleInt64) {
4673 static constexpr __uint128_t reg_before =
4674 MakeUInt128(0x0000000000000000ULL, 0x1111111111111111ULL);
4675 static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4676 __uint128_t reg_after;
4677 asm("ld1 {%0.d}[1], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4678 ASSERT_EQ(reg_after, MakeUInt128(0x0000000000000000ULL, 0x0102030405060708ULL));
4679 }
4680
TEST(Arm64InsnTest,StoreSingleInt8)4681 TEST(Arm64InsnTest, StoreSingleInt8) {
4682 static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4683 __uint128_t mem_dest = MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4684 asm("st1 {%1.b}[3], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4685 ASSERT_EQ(mem_dest, MakeUInt128(0x00112233445566'05ULL, 0x8899aabbccddeeffULL));
4686 }
4687
TEST(Arm64InsnTest,StoreSingleInt16)4688 TEST(Arm64InsnTest, StoreSingleInt16) {
4689 static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4690 __uint128_t mem_dest = MakeUInt128(0x0000111122223333ULL, 0x4444555566667777ULL);
4691 asm("st1 {%1.h}[5], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4692 ASSERT_EQ(mem_dest, MakeUInt128(0x000011112222'0d0eULL, 0x4444555566667777ULL));
4693 }
4694
TEST(Arm64InsnTest,StoreSingleInt32)4695 TEST(Arm64InsnTest, StoreSingleInt32) {
4696 static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4697 __uint128_t mem_dest = MakeUInt128(0x0000000011111111ULL, 0x2222222233333333ULL);
4698 asm("st1 {%1.s}[2], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4699 ASSERT_EQ(mem_dest, MakeUInt128(0x000000000'd0e0f10ULL, 0x2222222233333333ULL));
4700 }
4701
TEST(Arm64InsnTest,StoreSingleInt64)4702 TEST(Arm64InsnTest, StoreSingleInt64) {
4703 static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4704 __uint128_t mem_dest = MakeUInt128(0x0000000000000000ULL, 0x1111111111111111ULL);
4705 asm("st1 {%1.d}[1], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4706 ASSERT_EQ(mem_dest, MakeUInt128(0x090a0b0c0d0e0f10ULL, 0x1111111111111111ULL));
4707 }
4708
TEST(Arm64InsnTest,LoadSinglePostIndexImmInt8)4709 TEST(Arm64InsnTest, LoadSinglePostIndexImmInt8) {
4710 static constexpr __uint128_t arg1 = MakeUInt128(0x5494167594605487ULL, 0x1172359464291058ULL);
4711 static constexpr __uint128_t arg2 = MakeUInt128(0x5090995021495879ULL, 0x3112196135908315ULL);
4712 __uint128_t res;
4713 uint8_t* addr;
4714 asm("ld1 {%0.b}[3], [%1], #1" : "=w"(res), "=r"(addr) : "0"(arg1), "1"(&arg2) : "memory");
4715 ASSERT_EQ(res, MakeUInt128(0x5494167579605487ULL, 0x1172359464291058ULL));
4716 ASSERT_EQ(addr, reinterpret_cast<const uint8_t*>(&arg2) + 1);
4717 }
4718
TEST(Arm64InsnTest,LoadSinglePostIndexRegInt16)4719 TEST(Arm64InsnTest, LoadSinglePostIndexRegInt16) {
4720 static constexpr __uint128_t arg1 = MakeUInt128(0x0080587824107493ULL, 0x5751488997891173ULL);
4721 static constexpr __uint128_t arg2 = MakeUInt128(0x9746129320351081ULL, 0x4327032514090304ULL);
4722 __uint128_t res;
4723 uint8_t* addr;
4724 asm("ld1 {%0.h}[7], [%1], %2"
4725 : "=w"(res), "=r"(addr)
4726 : "r"(static_cast<uint64_t>(17U)), "0"(arg1), "1"(&arg2)
4727 : "memory");
4728 ASSERT_EQ(res, MakeUInt128(0x0080587824107493ULL, 0x1081488997891173ULL));
4729 ASSERT_EQ(addr, reinterpret_cast<const uint8_t*>(&arg2) + 17);
4730 }
4731
TEST(Arm64InsnTest,StoreSimdPostIndex)4732 TEST(Arm64InsnTest, StoreSimdPostIndex) {
4733 __uint128_t old_val = MakeUInt128(0x4939965143142980ULL, 0x9190659250937221ULL);
4734 __uint128_t new_val = MakeUInt128(0x5985261365549781ULL, 0x8931297848216829ULL);
4735 __uint128_t* addr = &old_val;
4736
4737 // Verify that the interpreter accepts "str q0, [x0], #8" where the register numbers are
4738 // the same, when the data register is one of the SIMD registers.
4739 asm("mov x0, %0\n\t"
4740 "mov v0.2D, %1.2D\n\t"
4741 "str q0, [x0], #8\n\t"
4742 "mov %0, x0"
4743 : "+r"(addr)
4744 : "w"(new_val)
4745 : "v0", "x0", "memory");
4746
4747 ASSERT_EQ(old_val, MakeUInt128(0x5985261365549781ULL, 0x8931297848216829ULL));
4748 ASSERT_EQ(reinterpret_cast<uintptr_t>(addr), reinterpret_cast<uintptr_t>(&old_val) + 8);
4749 }
4750
TEST(Arm64InsnTest,StoreZeroPostIndex1)4751 TEST(Arm64InsnTest, StoreZeroPostIndex1) {
4752 uint64_t res;
4753 asm("str xzr, [sp, #-16]!\n\t"
4754 "ldr %0, [sp, #0]\n\t"
4755 "add sp, sp, #16"
4756 : "=r"(res));
4757 ASSERT_EQ(res, 0);
4758 }
4759
TEST(Arm64InsnTest,StoreZeroPostIndex2)4760 TEST(Arm64InsnTest, StoreZeroPostIndex2) {
4761 __uint128_t arg1 = MakeUInt128(0x9415573293820485ULL, 0x4212350817391254ULL);
4762 __uint128_t arg2 = MakeUInt128(0x9749819308714396ULL, 0x6151329420459193ULL);
4763 __uint128_t res1;
4764 __uint128_t res2;
4765 asm("mov v30.16b, %2.16b\n\t"
4766 "mov v31.16b, %3.16b\n\t"
4767 "stp q30, q31, [sp, #-32]!\n\t"
4768 "ldr %q0, [sp, #0]\n\t"
4769 "ldr %q1, [sp, #16]\n\t"
4770 "add sp, sp, #32"
4771 : "=w"(res1), "=w"(res2)
4772 : "w"(arg1), "w"(arg2)
4773 : "v30", "v31");
4774
4775 ASSERT_EQ(res1, arg1);
4776 ASSERT_EQ(res2, arg2);
4777 }
4778
TEST(Arm64InsnTest,Load2MultipleInt8x8)4779 TEST(Arm64InsnTest, Load2MultipleInt8x8) {
4780 static constexpr uint8_t mem[] = {0x02,
4781 0x16,
4782 0x91,
4783 0x83,
4784 0x37,
4785 0x23,
4786 0x68,
4787 0x03,
4788 0x99,
4789 0x02,
4790 0x79,
4791 0x31,
4792 0x60,
4793 0x64,
4794 0x20,
4795 0x43};
4796 __uint128_t res[2];
4797 asm("ld2 {v0.8b, v1.8b}, [%2]\n\t"
4798 "mov %0.16b, v0.16b\n\t"
4799 "mov %1.16b, v1.16b"
4800 : "=w"(res[0]), "=w"(res[1])
4801 : "r"(mem)
4802 : "v0", "v1", "memory");
4803 ASSERT_EQ(res[0], MakeUInt128(0x2060799968379102ULL, 0U));
4804 ASSERT_EQ(res[1], MakeUInt128(0x4364310203238316ULL, 0U));
4805 }
4806
TEST(Arm64InsnTest,Load3MultipleInt8x8)4807 TEST(Arm64InsnTest, Load3MultipleInt8x8) {
4808 static constexpr uint8_t mem[] = {0x32, 0x87, 0x67, 0x03, 0x80, 0x92, 0x52, 0x16,
4809 0x79, 0x07, 0x57, 0x12, 0x04, 0x06, 0x12, 0x37,
4810 0x59, 0x63, 0x27, 0x68, 0x56, 0x74, 0x84, 0x50};
4811 __uint128_t res[3];
4812 asm("ld3 {v7.8b-v9.8b}, [%3]\n\t"
4813 "mov %0.16b, v7.16b\n\t"
4814 "mov %1.16b, v8.16b\n\t"
4815 "mov %2.16b, v9.16b"
4816 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4817 : "r"(mem)
4818 : "v7", "v8", "v9", "memory");
4819 ASSERT_EQ(res[0], MakeUInt128(0x7427370407520332ULL, 0U));
4820 ASSERT_EQ(res[1], MakeUInt128(0x8468590657168087ULL, 0U));
4821 ASSERT_EQ(res[2], MakeUInt128(0x5056631212799267ULL, 0U));
4822 }
4823
TEST(Arm64InsnTest,Load4MultipleInt16x8)4824 TEST(Arm64InsnTest, Load4MultipleInt16x8) {
4825 static constexpr uint16_t mem[] = {
4826 0x2069, 0x6535, 0x3863, 0x9644, 0x3225, 0x3883, 0x2752, 0x2499, 0x6059, 0x8697, 0x4759,
4827 0x8823, 0x2991, 0x6263, 0x5459, 0x7332, 0x4445, 0x1637, 0x5533, 0x4377, 0x4929, 0x2899,
4828 0x0581, 0x1757, 0x9881, 0x5078, 0x1468, 0x5262, 0x1332, 0x5247, 0x3837, 0x6511};
4829 __uint128_t res[4];
4830 asm("ld4 {v30.8h-v1.8h}, [%4]\n\t"
4831 "mov %0.16b, v30.16b\n\t"
4832 "mov %1.16b, v31.16b\n\t"
4833 "mov %2.16b, v0.16b\n\t"
4834 "mov %3.16b, v1.16b"
4835 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
4836 : "r"(mem)
4837 : "v30", "v31", "v0", "v1", "memory");
4838 ASSERT_EQ(res[0], MakeUInt128(0x2991605932252069ULL, 0x1332988149294445ULL));
4839 ASSERT_EQ(res[1], MakeUInt128(0x6263869738836535ULL, 0x5247507828991637ULL));
4840 ASSERT_EQ(res[2], MakeUInt128(0x5459475927523863ULL, 0x3837146805815533ULL));
4841 ASSERT_EQ(res[3], MakeUInt128(0x7332882324999644ULL, 0x6511526217574377ULL));
4842 }
4843
TEST(Arm64InsnTest,Load1ReplicateInt8x8)4844 TEST(Arm64InsnTest, Load1ReplicateInt8x8) {
4845 static constexpr uint8_t mem = 0x81U;
4846 __uint128_t res;
4847 asm("ld1r {%0.8b}, [%1]" : "=w"(res) : "r"(&mem) : "memory");
4848 ASSERT_EQ(res, MakeUInt128(0x8181818181818181ULL, 0U));
4849 }
4850
TEST(Arm64InsnTest,Load2ReplicateInt16x8)4851 TEST(Arm64InsnTest, Load2ReplicateInt16x8) {
4852 static constexpr uint16_t mem[] = {0x7904, 0x8715};
4853 __uint128_t res[2];
4854 asm("ld2r {v6.8h, v7.8h}, [%2]\n\t"
4855 "mov %0.16b, v6.16b\n\t"
4856 "mov %1.16b, v7.16b"
4857 : "=w"(res[0]), "=w"(res[1])
4858 : "r"(mem)
4859 : "v6", "v7", "memory");
4860 ASSERT_EQ(res[0], MakeUInt128(0x7904790479047904ULL, 0x7904790479047904ULL));
4861 ASSERT_EQ(res[1], MakeUInt128(0x8715871587158715ULL, 0x8715871587158715ULL));
4862 }
4863
TEST(Arm64InsnTest,Load3ReplicateInt32x4)4864 TEST(Arm64InsnTest, Load3ReplicateInt32x4) {
4865 static constexpr uint32_t mem[] = {0x78713710U, 0x60510637U, 0x95558588U};
4866 __uint128_t res[3];
4867 asm("ld3r {v30.4s-v0.4s}, [%3]\n\t"
4868 "mov %0.16b, v30.16b\n\t"
4869 "mov %1.16b, v31.16b\n\t"
4870 "mov %2.16b, v0.16b"
4871 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4872 : "r"(mem)
4873 : "v30", "v31", "v0", "memory");
4874 ASSERT_EQ(res[0], MakeUInt128(0x7871371078713710ULL, 0x7871371078713710ULL));
4875 ASSERT_EQ(res[1], MakeUInt128(0x6051063760510637ULL, 0x6051063760510637ULL));
4876 ASSERT_EQ(res[2], MakeUInt128(0x9555858895558588ULL, 0x9555858895558588ULL));
4877 }
4878
TEST(Arm64InsnTest,Load4ReplicateInt64x2)4879 TEST(Arm64InsnTest, Load4ReplicateInt64x2) {
4880 static constexpr uint64_t mem[] = {
4881 0x8150781468526213ULL, 0x3252473837651192ULL, 0x9901561091897779ULL, 0x2200870579339646ULL};
4882 __uint128_t res[4];
4883 asm("ld4r {v29.2d-v0.2d}, [%4]\n\t"
4884 "mov %0.16b, v29.16b\n\t"
4885 "mov %1.16b, v30.16b\n\t"
4886 "mov %2.16b, v31.16b\n\t"
4887 "mov %3.16b, v0.16b"
4888 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
4889 : "r"(mem)
4890 : "v29", "v30", "v31", "v0", "memory");
4891 ASSERT_EQ(res[0], MakeUInt128(mem[0], mem[0]));
4892 ASSERT_EQ(res[1], MakeUInt128(mem[1], mem[1]));
4893 ASSERT_EQ(res[2], MakeUInt128(mem[2], mem[2]));
4894 ASSERT_EQ(res[3], MakeUInt128(mem[3], mem[3]));
4895 }
4896
TEST(Arm64InsnTest,LoadPairNonTemporarlInt64)4897 TEST(Arm64InsnTest, LoadPairNonTemporarlInt64) {
4898 static constexpr uint64_t mem[] = {0x3843601737474215ULL, 0x2476085152099016ULL};
4899 __uint128_t res[2];
4900 asm("ldnp %d0, %d1, [%2]" : "=w"(res[0]), "=w"(res[1]) : "r"(mem) : "memory");
4901 ASSERT_EQ(res[0], MakeUInt128(0x3843601737474215ULL, 0U));
4902 ASSERT_EQ(res[1], MakeUInt128(0x2476085152099016ULL, 0U));
4903 }
4904
TEST(Arm64InsnTest,MoviVector2S)4905 TEST(Arm64InsnTest, MoviVector2S) {
4906 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2s, #0xe4")();
4907 ASSERT_EQ(rd, MakeUInt128(0x000000e4000000e4ULL, 0x0000000000000000ULL));
4908 }
4909
TEST(Arm64InsnTest,MoviVector2D)4910 TEST(Arm64InsnTest, MoviVector2D) {
4911 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2d, #0xff")();
4912 ASSERT_EQ(rd, MakeUInt128(0x00000000000000ffULL, 0x00000000000000ffULL));
4913 }
4914
TEST(Arm64InsnTest,MoviVector8B)4915 TEST(Arm64InsnTest, MoviVector8B) {
4916 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.8b, #0xda")();
4917 ASSERT_EQ(res, MakeUInt128(0xdadadadadadadadaULL, 0x0000000000000000ULL));
4918 }
4919
TEST(Arm64InsnTest,MoviVector4HShiftBy8)4920 TEST(Arm64InsnTest, MoviVector4HShiftBy8) {
4921 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.4h, #0xd1, lsl #8")();
4922 ASSERT_EQ(res, MakeUInt128(0xd100d100d100d100ULL, 0x0000000000000000ULL));
4923 }
4924
TEST(Arm64InsnTest,MoviVector2SShiftBy16)4925 TEST(Arm64InsnTest, MoviVector2SShiftBy16) {
4926 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2s, #0x37, msl #16")();
4927 ASSERT_EQ(res, MakeUInt128(0x0037ffff0037ffffULL, 0x0000000000000000ULL));
4928 }
4929
TEST(Arm64InsnTest,MvniVector4H)4930 TEST(Arm64InsnTest, MvniVector4H) {
4931 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.4h, #0xbc")();
4932 ASSERT_EQ(res, MakeUInt128(0xff43ff43ff43ff43ULL, 0x0000000000000000ULL));
4933 }
4934
TEST(Arm64InsnTest,MvniVector2SShiftBy8)4935 TEST(Arm64InsnTest, MvniVector2SShiftBy8) {
4936 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.2s, #0x24, lsl #8")();
4937 ASSERT_EQ(res, MakeUInt128(0xffffdbffffffdbffULL, 0x0000000000000000ULL));
4938 }
4939
TEST(Arm64InsnTest,MvniVector2SShiftBy16)4940 TEST(Arm64InsnTest, MvniVector2SShiftBy16) {
4941 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.2s, #0x25, msl #16")();
4942 ASSERT_EQ(res, MakeUInt128(0xffda0000ffda0000ULL, 0x0000000000000000ULL));
4943 }
4944
TEST(Arm64InsnTest,LoadSimdRegPlusReg)4945 TEST(Arm64InsnTest, LoadSimdRegPlusReg) {
4946 __uint128_t array[] = {
4947 MakeUInt128(0x6517980694113528ULL, 0x0131470130478164ULL),
4948 MakeUInt128(0x8672422924654366ULL, 0x8009806769282382ULL),
4949 };
4950 uint64_t offset = 16;
4951 __uint128_t rd;
4952
4953 asm("ldr %q0, [%1, %2]" : "=w"(rd) : "r"(array), "r"(offset) : "memory");
4954
4955 ASSERT_EQ(rd, MakeUInt128(0x8672422924654366ULL, 0x8009806769282382ULL));
4956 }
4957
TEST(Arm64InsnTest,ExtractNarrowI16x8ToI8x8)4958 TEST(Arm64InsnTest, ExtractNarrowI16x8ToI8x8) {
4959 __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
4960 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.8b, %1.8h")(arg);
4961 ASSERT_EQ(res, MakeUInt128(0x113355772367abefULL, 0x0ULL));
4962 }
4963
TEST(Arm64InsnTest,ExtractNarrowI32x4ToI16x4)4964 TEST(Arm64InsnTest, ExtractNarrowI32x4ToI16x4) {
4965 __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
4966 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.4h, %1.4s")(arg);
4967 ASSERT_EQ(res, MakeUInt128(0x223366774567cdefULL, 0x0ULL));
4968 }
4969
TEST(Arm64InsnTest,ExtractNarrowI64x2ToI32x2)4970 TEST(Arm64InsnTest, ExtractNarrowI64x2ToI32x2) {
4971 __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
4972 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.2s, %1.2d")(arg);
4973 ASSERT_EQ(res, MakeUInt128(0x4455667789abcdefULL, 0x0ULL));
4974 }
4975
TEST(Arm64InsnTest,ExtractNarrow2Int16x8ToInt8x16)4976 TEST(Arm64InsnTest, ExtractNarrow2Int16x8ToInt8x16) {
4977 __uint128_t arg1 = MakeUInt128(0x1844396582533754ULL, 0x3885690941130315ULL);
4978 __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
4979 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("xtn2 %0.16b, %1.8h")(arg1, arg2);
4980 ASSERT_EQ(res, MakeUInt128(0x6121865619673378ULL, 0x8509131544655354ULL));
4981 }
4982
TEST(Arm64InsnTest,LoadLiteralSimd)4983 TEST(Arm64InsnTest, LoadLiteralSimd) {
4984 // We call an external assembly function to perform LDR literal because we
4985 // need to place the literal in .rodata. The literal placed in .text would
4986 // trigger a segfault.
4987 ASSERT_EQ(get_fp64_literal(), 0x0123456789abcdefULL);
4988 }
4989
TEST(Arm64InsnTest,AbsInt64x1)4990 TEST(Arm64InsnTest, AbsInt64x1) {
4991 __uint128_t arg = MakeUInt128(0xfffffffffffffffdULL, 0xdeadbeef01234567ULL);
4992 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("abs %d0, %d1")(arg);
4993 ASSERT_EQ(res, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
4994 }
4995
TEST(Arm64InsnTest,AbsInt8x8)4996 TEST(Arm64InsnTest, AbsInt8x8) {
4997 __uint128_t arg = MakeUInt128(0x0001027e7f8081ffULL, 0x0123456789abcdefULL);
4998 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("abs %0.8b, %1.8b")(arg);
4999 ASSERT_EQ(res, MakeUInt128(0x0001027e7f807f01ULL, 0x0ULL));
5000 }
5001
TEST(Arm64InsnTest,UseV31)5002 TEST(Arm64InsnTest, UseV31) {
5003 __uint128_t res;
5004
5005 asm("movi v31.2d, #0xffffffffffffffff\n\t"
5006 "mov %0.16b, v31.16b"
5007 : "=w"(res)
5008 :
5009 : "v31");
5010
5011 ASSERT_EQ(res, MakeUInt128(~0ULL, ~0ULL));
5012 }
5013
TEST(Arm64InsnTest,AddHighNarrowInt16x8)5014 TEST(Arm64InsnTest, AddHighNarrowInt16x8) {
5015 __uint128_t arg1 = MakeUInt128(0x2296617119637792ULL, 0x1337575114959501ULL);
5016 __uint128_t arg2 = MakeUInt128(0x0941214722131794ULL, 0x7647772622414254ULL);
5017 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5018 ASSERT_EQ(res, MakeUInt128(0x89ce36d72b823b8fULL, 0x0ULL));
5019 }
5020
TEST(Arm64InsnTest,AddHighNarrowUpperInt16x8)5021 TEST(Arm64InsnTest, AddHighNarrowUpperInt16x8) {
5022 __uint128_t arg1 = MakeUInt128(0x6561809377344403ULL, 0x0707469211201913ULL);
5023 __uint128_t arg2 = MakeUInt128(0x6095752706957220ULL, 0x9175671167229109ULL);
5024 __uint128_t arg3 = MakeUInt128(0x5797877185560845ULL, 0x5296541266540853ULL);
5025 __uint128_t res =
5026 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("addhn2 %0.16b, %1.8h, %2.8h")(arg1, arg2, arg3);
5027 ASSERT_EQ(res, MakeUInt128(0x5797877185560845ULL, 0x98ad78aac5f57db6ULL));
5028 }
5029
TEST(Arm64InsnTest,SubHighNarrowInt16x8)5030 TEST(Arm64InsnTest, SubHighNarrowInt16x8) {
5031 __uint128_t arg1 = MakeUInt128(0x4978189312978482ULL, 0x1682998948722658ULL);
5032 __uint128_t arg2 = MakeUInt128(0x1210835791513698ULL, 0x8209144421006751ULL);
5033 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("subhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5034 ASSERT_EQ(res, MakeUInt128(0x948527bf3795814dULL, 0x0ULL));
5035 }
5036
TEST(Arm64InsnTest,SubHighNarrowUpperInt16x8)5037 TEST(Arm64InsnTest, SubHighNarrowUpperInt16x8) {
5038 __uint128_t arg1 = MakeUInt128(0x5324944166803962ULL, 0x6579787718556084ULL);
5039 __uint128_t arg2 = MakeUInt128(0x1066587969981635ULL, 0x7473638405257145ULL);
5040 __uint128_t arg3 = MakeUInt128(0x3142980919065925ULL, 0x0937221696461515ULL);
5041 __uint128_t res =
5042 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("subhn2 %0.16b, %1.8h, %2.8h")(arg1, arg2, arg3);
5043 ASSERT_EQ(res, MakeUInt128(0x3142980919065925ULL, 0xf11413ef423bfc23ULL));
5044 }
5045
TEST(Arm64InsnTest,RoundingAddHighNarrowInt16x8)5046 TEST(Arm64InsnTest, RoundingAddHighNarrowInt16x8) {
5047 __uint128_t arg1 = MakeUInt128(0x8039626579787718ULL, 0x5560845529654126ULL);
5048 __uint128_t arg2 = MakeUInt128(0x3440171274947042ULL, 0x0562230538994561ULL);
5049 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("raddhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5050 ASSERT_EQ(res, MakeUInt128(0x5ba76287b479eee7ULL, 0x0000000000000000ULL));
5051 }
5052
TEST(Arm64InsnTest,RoundingSubHighNarrowInt16x8)5053 TEST(Arm64InsnTest, RoundingSubHighNarrowInt16x8) {
5054 __uint128_t arg1 = MakeUInt128(0x3063432858785698ULL, 0x3052358089330657ULL);
5055 __uint128_t arg2 = MakeUInt128(0x0216471550979259ULL, 0x2309907965473761ULL);
5056 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("rsubhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5057 ASSERT_EQ(res, MakeUInt128(0x0da524cf2efc08c4ULL, 0x0000000000000000ULL));
5058 }
5059
TEST(Arm64InsnTest,ScalarPairwiseAddInt8x2)5060 TEST(Arm64InsnTest, ScalarPairwiseAddInt8x2) {
5061 __uint128_t arg = MakeUInt128(0x6257591633303910ULL, 0x7225383742182140ULL);
5062 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("addp %d0, %1.2d")(arg);
5063 ASSERT_EQ(res, MakeUInt128(0xd47c914d75485a50ULL, 0x0000000000000000ULL));
5064 }
5065
TEST(Arm64InsnTest,AddAcrossInt8x8)5066 TEST(Arm64InsnTest, AddAcrossInt8x8) {
5067 __uint128_t arg = MakeUInt128(0x0681216028764962ULL, 0x8674460477464915ULL);
5068 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("addv %b0, %1.8b")(arg);
5069 ASSERT_EQ(res, MakeUInt128(0x51ULL, 0x0ULL));
5070 }
5071
TEST(Arm64InsnTest,SignedAddLongAcrossInt16x8)5072 TEST(Arm64InsnTest, SignedAddLongAcrossInt16x8) {
5073 __uint128_t arg = MakeUInt128(0x9699557377273756ULL, 0x6761552711392258ULL);
5074 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlv %s0, %1.8h")(arg);
5075 ASSERT_EQ(res, MakeUInt128(0x0000000000018aa2ULL, 0x0000000000000000ULL));
5076 }
5077
TEST(Arm64InsnTest,UnsignedAddLongAcrossInt16x8)5078 TEST(Arm64InsnTest, UnsignedAddLongAcrossInt16x8) {
5079 __uint128_t arg = MakeUInt128(0x7986396522961312ULL, 0x8017826797172898ULL);
5080 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uaddlv %s0, %1.8h")(arg);
5081 ASSERT_EQ(res, MakeUInt128(0x000000000002aac0ULL, 0x0000000000000000ULL));
5082 }
5083
TEST(Arm64InsnTest,SignedMaximumAcrossInt16x8)5084 TEST(Arm64InsnTest, SignedMaximumAcrossInt16x8) {
5085 __uint128_t arg = MakeUInt128(0x8482065967379473ULL, 0x1680864156456505ULL);
5086 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("smaxv %h0, %1.8h")(arg);
5087 ASSERT_EQ(res, MakeUInt128(0x0000000000006737ULL, 0x0000000000000000ULL));
5088 }
5089
TEST(Arm64InsnTest,SignedMinimumAcrossInt16x8)5090 TEST(Arm64InsnTest, SignedMinimumAcrossInt16x8) {
5091 __uint128_t arg = MakeUInt128(0x6772530431825197ULL, 0x5791679296996504ULL);
5092 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sminv %h0, %1.8h")(arg);
5093 ASSERT_EQ(res, MakeUInt128(0x0000000000009699ULL, 0x0000000000000000ULL));
5094 }
5095
TEST(Arm64InsnTest,UnsignedMaximumAcrossInt16x8)5096 TEST(Arm64InsnTest, UnsignedMaximumAcrossInt16x8) {
5097 __uint128_t arg = MakeUInt128(0x6500378070466126ULL, 0x4706021457505793ULL);
5098 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("umaxv %h0, %1.8h")(arg);
5099 ASSERT_EQ(res, MakeUInt128(0x0000000000007046ULL, 0x0000000000000000ULL));
5100 }
5101
TEST(Arm64InsnTest,UnsignedMinimumAcrossInt16x8)5102 TEST(Arm64InsnTest, UnsignedMinimumAcrossInt16x8) {
5103 __uint128_t arg = MakeUInt128(0x5223572397395128ULL, 0x8181640597859142ULL);
5104 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uminv %h0, %1.8h")(arg);
5105 ASSERT_EQ(res, MakeUInt128(0x0000000000005128ULL, 0x0000000000000000ULL));
5106 }
5107
TEST(Arm64InsnTest,CountLeadingZerosI8x8)5108 TEST(Arm64InsnTest, CountLeadingZerosI8x8) {
5109 __uint128_t arg = MakeUInt128(0x1452635608277857ULL, 0x7134275778960917ULL);
5110 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("clz %0.8b, %1.8b")(arg);
5111 ASSERT_EQ(res, MakeUInt128(0x0301010104020101ULL, 0x0000000000000000ULL));
5112 }
5113
TEST(Arm64InsnTest,CountLeadingSignBitsI8x8)5114 TEST(Arm64InsnTest, CountLeadingSignBitsI8x8) {
5115 __uint128_t arg = MakeUInt128(0x8925892354201995ULL, 0x6112129021960864ULL);
5116 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cls %0.8b, %1.8b")(arg);
5117 ASSERT_EQ(res, MakeUInt128(0x0001000100010200ULL, 0x0000000000000000ULL));
5118 }
5119
TEST(Arm64InsnTest,Cnt)5120 TEST(Arm64InsnTest, Cnt) {
5121 __uint128_t arg = MakeUInt128(0x9835484875625298ULL, 0x7524238730775595ULL);
5122 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cnt %0.16b, %1.16b")(arg);
5123 ASSERT_EQ(res, MakeUInt128(0x0304020205030303ULL, 0x0502030402060404ULL));
5124 }
5125
TEST(Arm64InsnTest,SimdScalarMove)5126 TEST(Arm64InsnTest, SimdScalarMove) {
5127 __uint128_t arg = MakeUInt128(0x1433345477624168ULL, 0x6251898356948556ULL);
5128 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("mov %b0, %1.b[5]")(arg);
5129 ASSERT_EQ(res, MakeUInt128(0x0000000000000034ULL, 0x0000000000000000ULL));
5130 }
5131
TEST(Arm64InsnTest,SimdVectorElemDuplicate)5132 TEST(Arm64InsnTest, SimdVectorElemDuplicate) {
5133 __uint128_t arg = MakeUInt128(0x3021647155097925ULL, 0x9230990796547376ULL);
5134 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("dup %0.8b, %1.b[5]")(arg);
5135 ASSERT_EQ(res, MakeUInt128(0x6464646464646464ULL, 0x0000000000000000ULL));
5136 }
5137
TEST(Arm64InsnTest,SimdVectorElemDuplicateInt16AtIndex7)5138 TEST(Arm64InsnTest, SimdVectorElemDuplicateInt16AtIndex7) {
5139 __uint128_t arg = MakeUInt128(0x2582262052248940ULL, 0x7726719478268482ULL);
5140 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("dup %0.4h, %1.h[7]")(arg);
5141 ASSERT_EQ(res, MakeUInt128(0x7726772677267726ULL, 0x0000000000000000ULL));
5142 }
5143
TEST(Arm64InsnTest,SimdVectorElemInsert)5144 TEST(Arm64InsnTest, SimdVectorElemInsert) {
5145 __uint128_t arg1 = MakeUInt128(0x7120844335732654ULL, 0x8938239119325974ULL);
5146 __uint128_t arg2 = MakeUInt128(0x7656180937734440ULL, 0x3070746921120191ULL);
5147 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("mov %0.s[2], %1.s[1]")(arg1, arg2);
5148 ASSERT_EQ(res, MakeUInt128(0x7656180937734440ULL, 0x3070746971208443ULL));
5149 }
5150
TEST(Arm64InsnTest,NegateInt64x1)5151 TEST(Arm64InsnTest, NegateInt64x1) {
5152 constexpr auto AsmNeg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("neg %d0, %d1");
5153 __uint128_t arg1 = MakeUInt128(0x8389522868478312ULL, 0x3552658213144957ULL);
5154 ASSERT_EQ(AsmNeg(arg1), MakeUInt128(0x7c76add797b87ceeULL, 0x0000000000000000ULL));
5155
5156 __uint128_t arg2 = MakeUInt128(1ULL << 63, 0U);
5157 ASSERT_EQ(AsmNeg(arg2), MakeUInt128(1ULL << 63, 0U));
5158 }
5159
TEST(Arm64InsnTest,NegateInt16x8)5160 TEST(Arm64InsnTest, NegateInt16x8) {
5161 __uint128_t arg = MakeUInt128(0x4411010446823252ULL, 0x7162010526522721ULL);
5162 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("neg %0.8h, %1.8h")(arg);
5163 ASSERT_EQ(res, MakeUInt128(0xbbeffefcb97ecdaeULL, 0x8e9efefbd9aed8dfULL));
5164 }
5165
TEST(Arm64InsnTest,NotI8x8)5166 TEST(Arm64InsnTest, NotI8x8) {
5167 __uint128_t arg = MakeUInt128(0x6205647693125705ULL, 0x8635662018558100ULL);
5168 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("not %0.8b, %1.8b")(arg);
5169 ASSERT_EQ(res, MakeUInt128(0x9dfa9b896ceda8faULL, 0x0000000000000000ULL));
5170 }
5171
TEST(Arm64InsnTest,RbitInt8x8)5172 TEST(Arm64InsnTest, RbitInt8x8) {
5173 __uint128_t arg = MakeUInt128(0x4713296210734043ULL, 0x7518957359614589ULL);
5174 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rbit %0.8b, %1.8b")(arg);
5175 ASSERT_EQ(res, MakeUInt128(0xe2c8944608ce02c2ULL, 0x0000000000000000ULL));
5176 }
5177
TEST(Arm64InsnTest,Rev16Int8x16)5178 TEST(Arm64InsnTest, Rev16Int8x16) {
5179 __uint128_t arg = MakeUInt128(0x9904801094121472ULL, 0x2131794764777262ULL);
5180 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev16 %0.16b, %1.16b")(arg);
5181 ASSERT_EQ(res, MakeUInt128(0x0499108012947214ULL, 0x3121477977646272ULL));
5182 }
5183
TEST(Arm64InsnTest,Rev32Int16x8)5184 TEST(Arm64InsnTest, Rev32Int16x8) {
5185 __uint128_t arg = MakeUInt128(0x8662237172159160ULL, 0x7716692547487389ULL);
5186 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev32 %0.8h, %1.8h")(arg);
5187 ASSERT_EQ(res, MakeUInt128(0x2371866291607215ULL, 0x6925771673894748ULL));
5188 }
5189
TEST(Arm64InsnTest,Rev64Int32x4)5190 TEST(Arm64InsnTest, Rev64Int32x4) {
5191 __uint128_t arg = MakeUInt128(0x5306736096571209ULL, 0x1807638327166416ULL);
5192 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev64 %0.4s, %1.4s")(arg);
5193 ASSERT_EQ(res, MakeUInt128(0x9657120953067360ULL, 0x2716641618076383ULL));
5194 }
5195
TEST(Arm64InsnTest,TblInt8x8)5196 TEST(Arm64InsnTest, TblInt8x8) {
5197 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5198 __uint128_t arg2 = MakeUInt128(0x0104011509120605ULL, 0x0315080907091312ULL);
5199 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("tbl %0.8b, {%1.16b}, %2.8b")(arg1, arg2);
5200 ASSERT_EQ(res, MakeUInt128(0x1144110099006655ULL, 0x0000000000000000ULL));
5201 }
5202
TEST(Arm64InsnTest,TblInt8x16)5203 TEST(Arm64InsnTest, TblInt8x16) {
5204 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5205 __uint128_t arg2 = MakeUInt128(0x0905060808010408ULL, 0x0506000206030202ULL);
5206 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("tbl %0.16b, {%1.16b}, %2.16b")(arg1, arg2);
5207 ASSERT_EQ(res, MakeUInt128(0x9955668888114488ULL, 0x5566002266332222ULL));
5208 }
5209
TEST(Arm64InsnTest,Tbl2Int8x16)5210 TEST(Arm64InsnTest, Tbl2Int8x16) {
5211 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5212 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5213 __uint128_t arg3 = MakeUInt128(0x0224052800020910ULL, 0x1807280319002203ULL);
5214 __uint128_t res;
5215
5216 // Hardcode v30 and v0 so that the TBL instruction gets consecutive registers.
5217 asm("mov v31.16b, %1.16b\n\t"
5218 "mov v0.16b, %2.16b\n\t"
5219 "tbl %0.16b, {v31.16b, v0.16b}, %3.16b"
5220 : "=w"(res)
5221 : "w"(arg1), "w"(arg2), "w"(arg3)
5222 : "v31", "v0");
5223
5224 ASSERT_EQ(res, MakeUInt128(0x22005500002299ffULL, 0x8777003398000033ULL));
5225 }
5226
TEST(Arm64InsnTest,Tbl3Int8x16)5227 TEST(Arm64InsnTest, Tbl3Int8x16) {
5228 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5229 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5230 __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5231 __uint128_t arg4 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5232 __uint128_t res;
5233
5234 // Hardcode v0, v1, and v2 so that the TBL instruction gets consecutive registers.
5235 asm("mov v30.16b, %1.16b\n\t"
5236 "mov v31.16b, %2.16b\n\t"
5237 "mov v0.16b, %3.16b\n\t"
5238 "tbl %0.16b, {v30.16b-v0.16b}, %4.16b"
5239 : "=w"(res)
5240 : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4)
5241 : "v0", "v1", "v2");
5242
5243 ASSERT_EQ(res, MakeUInt128(0x778760000090ff00ULL, 0x0060980000103244ULL));
5244 }
5245
TEST(Arm64InsnTest,Tbl4Int8x16)5246 TEST(Arm64InsnTest, Tbl4Int8x16) {
5247 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5248 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5249 __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5250 __uint128_t arg4 = MakeUInt128(0x7f6f5f4f3f2f1fffULL, 0xffefdfcfbfaf9f8fULL);
5251 __uint128_t arg5 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5252 __uint128_t res;
5253
5254 // Hardcode v30, v31, v0, and v1 so that the TBX instruction gets consecutive registers.
5255 asm("mov v30.16b, %1.16b\n\t"
5256 "mov v31.16b, %2.16b\n\t"
5257 "mov v0.16b, %3.16b\n\t"
5258 "mov v1.16b, %4.16b\n\t"
5259 "tbl %0.16b, {v30.16b-v1.16b}, %5.16b"
5260 : "=w"(res)
5261 : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "w"(arg5)
5262 : "v30", "v31", "v0", "v1");
5263
5264 ASSERT_EQ(res, MakeUInt128(0x778760009f90ff5fULL, 0x5f60980000103244ULL));
5265 }
5266
TEST(Arm64InsnTest,TbxInt8x16)5267 TEST(Arm64InsnTest, TbxInt8x16) {
5268 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5269 __uint128_t arg2 = MakeUInt128(0x0915061808010408ULL, 0x0516000206031202ULL);
5270 __uint128_t arg3 = MakeUInt128(0x6668559233565463ULL, 0x9138363185745698ULL);
5271 __uint128_t res =
5272 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("tbx %0.16b, {%1.16b}, %2.16b")(arg1, arg2, arg3);
5273 ASSERT_EQ(res, MakeUInt128(0x9968669288114488ULL, 0x5538002266335622ULL));
5274 }
5275
TEST(Arm64InsnTest,Tbx2Int8x16)5276 TEST(Arm64InsnTest, Tbx2Int8x16) {
5277 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5278 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5279 __uint128_t arg3 = MakeUInt128(0x0224052800020910ULL, 0x1807280319002203ULL);
5280 __uint128_t res = MakeUInt128(0x7494078488442377ULL, 0x2175154334260306ULL);
5281
5282 // Hardcode v0 and v1 so that the TBX instruction gets consecutive registers.
5283 asm("mov v0.16b, %1.16b\n\t"
5284 "mov v1.16b, %2.16b\n\t"
5285 "tbx %0.16b, {v0.16b, v1.16b}, %3.16b"
5286 : "=w"(res)
5287 : "w"(arg1), "w"(arg2), "w"(arg3), "0"(res)
5288 : "v0", "v1");
5289
5290 ASSERT_EQ(res, MakeUInt128(0x22945584002299ffULL, 0x8777153398000333ULL));
5291 }
5292
TEST(Arm64InsnTest,Tbx3Int8x16)5293 TEST(Arm64InsnTest, Tbx3Int8x16) {
5294 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5295 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5296 __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5297 __uint128_t arg4 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5298 __uint128_t res = MakeUInt128(0x0136776310849135ULL, 0x1615642269847507ULL);
5299
5300 // Hardcode v0, v1, and v2 so that the TBX instruction gets consecutive registers.
5301 asm("mov v0.16b, %1.16b\n\t"
5302 "mov v1.16b, %2.16b\n\t"
5303 "mov v2.16b, %3.16b\n\t"
5304 "tbx %0.16b, {v0.16b, v1.16b, v2.16b}, %4.16b"
5305 : "=w"(res)
5306 : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "0"(res)
5307 : "v0", "v1", "v2");
5308
5309 ASSERT_EQ(res, MakeUInt128(0x778760631090ff35ULL, 0x1660980069103244ULL));
5310 }
5311
TEST(Arm64InsnTest,Tbx4Int8x16)5312 TEST(Arm64InsnTest, Tbx4Int8x16) {
5313 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5314 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5315 __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5316 __uint128_t arg4 = MakeUInt128(0x7f6f5f4f3f2f1fffULL, 0xffefdfcfbfaf9f8fULL);
5317 __uint128_t arg5 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5318 __uint128_t res = MakeUInt128(0x5818319637637076ULL, 0x1799191920357958ULL);
5319
5320 // Hardcode v0, v1, v2, and v3 so that the TBX instruction gets consecutive registers.
5321 asm("mov v0.16b, %1.16b\n\t"
5322 "mov v1.16b, %2.16b\n\t"
5323 "mov v2.16b, %3.16b\n\t"
5324 "mov v3.16b, %4.16b\n\t"
5325 "tbx %0.16b, {v0.16b-v3.16b}, %5.16b"
5326 : "=w"(res)
5327 : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "w"(arg5), "0"(res)
5328 : "v0", "v1", "v2", "v3");
5329
5330 ASSERT_EQ(res, MakeUInt128(0x778760969f90ff5fULL, 0x5f60980020103244ULL));
5331 }
5332
TEST(Arm64InsnTest,Trn1Int8x8)5333 TEST(Arm64InsnTest, Trn1Int8x8) {
5334 __uint128_t arg1 = MakeUInt128(0x2075916729700785ULL, 0x0580717186381054ULL);
5335 __uint128_t arg2 = MakeUInt128(0x2786099055690013ULL, 0x4137182368370991ULL);
5336 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("trn1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
5337 ASSERT_EQ(res, MakeUInt128(0x8675906769701385ULL, 0x0000000000000000ULL));
5338 }
5339
TEST(Arm64InsnTest,Trn2Int16x8)5340 TEST(Arm64InsnTest, Trn2Int16x8) {
5341 __uint128_t arg1 = MakeUInt128(0x6685592335654639ULL, 0x1383631857456981ULL);
5342 __uint128_t arg2 = MakeUInt128(0x7494078488442377ULL, 0x2175154334260306ULL);
5343 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("trn2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
5344 ASSERT_EQ(res, MakeUInt128(0x7494668588443565ULL, 0x2175138334265745ULL));
5345 }
5346
TEST(Arm64InsnTest,Uzp1Int8x8)5347 TEST(Arm64InsnTest, Uzp1Int8x8) {
5348 __uint128_t arg1 = MakeUInt128(0x4954893139394489ULL, 0x9216125525597701ULL);
5349 __uint128_t arg2 = MakeUInt128(0x2783467926101995ULL, 0x5852247172201777ULL);
5350 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
5351 ASSERT_EQ(res, MakeUInt128(0x8379109554313989ULL, 0x0000000000000000ULL));
5352 }
5353
TEST(Arm64InsnTest,Uzp2Int16x8)5354 TEST(Arm64InsnTest, Uzp2Int16x8) {
5355 __uint128_t arg1 = MakeUInt128(0x6745642390585850ULL, 0x2167190313952629ULL);
5356 __uint128_t arg2 = MakeUInt128(0x3620129476918749ULL, 0x7519101147231528ULL);
5357 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
5358 ASSERT_EQ(res, MakeUInt128(0x2167139567459058ULL, 0x7519472336207691ULL));
5359 }
5360
TEST(Arm64InsnTest,Zip2Int64x2)5361 TEST(Arm64InsnTest, Zip2Int64x2) {
5362 __uint128_t arg1 = MakeUInt128(0x1494271410093913ULL, 0x6913810725813781ULL);
5363 __uint128_t arg2 = MakeUInt128(0x3578940055995001ULL, 0x8354251184172136ULL);
5364 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp2 %0.2d, %1.2d, %2.2d")(arg1, arg2);
5365 ASSERT_EQ(res, MakeUInt128(0x6913810725813781ULL, 0x8354251184172136ULL));
5366 }
5367
TEST(Arm64InsnTest,Zip1Int8x8)5368 TEST(Arm64InsnTest, Zip1Int8x8) {
5369 __uint128_t arg1 = MakeUInt128(0x7499235630254947ULL, 0x8024901141952123ULL);
5370 __uint128_t arg2 = MakeUInt128(0x3331239480494707ULL, 0x9119153267343028ULL);
5371 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
5372 ASSERT_EQ(res, MakeUInt128(0x8030492547490747ULL, 0x0000000000000000ULL));
5373 }
5374
TEST(Arm64InsnTest,Zip1Int64x2)5375 TEST(Arm64InsnTest, Zip1Int64x2) {
5376 __uint128_t arg1 = MakeUInt128(0x9243530136776310ULL, 0x8491351615642269ULL);
5377 __uint128_t arg2 = MakeUInt128(0x0551199581831963ULL, 0x7637076179919192ULL);
5378 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip1 %0.2d, %1.2d, %2.2d")(arg1, arg2);
5379 ASSERT_EQ(res, MakeUInt128(0x9243530136776310ULL, 0x0551199581831963ULL));
5380 }
5381
TEST(Arm64InsnTest,Zip2Int16x8)5382 TEST(Arm64InsnTest, Zip2Int16x8) {
5383 __uint128_t arg1 = MakeUInt128(0x5831832713142517ULL, 0x0296923488962766ULL);
5384 __uint128_t arg2 = MakeUInt128(0x2934595889706953ULL, 0x6534940603402166ULL);
5385 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
5386 ASSERT_EQ(res, MakeUInt128(0x0340889621662766ULL, 0x6534029694069234ULL));
5387 }
5388
TEST(Arm64InsnTest,SignedMaxInt16x8)5389 TEST(Arm64InsnTest, SignedMaxInt16x8) {
5390 __uint128_t arg1 = MakeUInt128(0x9901573466102371ULL, 0x2235478911292547ULL);
5391 __uint128_t arg2 = MakeUInt128(0x4922157650450812ULL, 0x0677173571202718ULL);
5392 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smax %0.8h, %1.8h, %2.8h")(arg1, arg2);
5393 ASSERT_EQ(res, MakeUInt128(0x4922573466102371ULL, 0x2235478971202718ULL));
5394 }
5395
TEST(Arm64InsnTest,SignedMinInt16x8)5396 TEST(Arm64InsnTest, SignedMinInt16x8) {
5397 __uint128_t arg1 = MakeUInt128(0x7820385653909910ULL, 0x4775941413215432ULL);
5398 __uint128_t arg2 = MakeUInt128(0x0084531214065935ULL, 0x8090412711359200ULL);
5399 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smin %0.8h, %1.8h, %2.8h")(arg1, arg2);
5400 ASSERT_EQ(res, MakeUInt128(0x0084385614069910ULL, 0x8090941411359200ULL));
5401 }
5402
TEST(Arm64InsnTest,SignedMaxPairwiseInt16x8)5403 TEST(Arm64InsnTest, SignedMaxPairwiseInt16x8) {
5404 __uint128_t arg1 = MakeUInt128(0x6998469884770232ULL, 0x3823840055655517ULL);
5405 __uint128_t arg2 = MakeUInt128(0x3272867600724817ULL, 0x2987637569816335ULL);
5406 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smaxp %0.8h, %1.8h, %2.8h")(arg1, arg2);
5407 ASSERT_EQ(res, MakeUInt128(0x3823556569980232ULL, 0x6375698132724817ULL));
5408 }
5409
TEST(Arm64InsnTest,SignedMinPairwiseInt16x8)5410 TEST(Arm64InsnTest, SignedMinPairwiseInt16x8) {
5411 __uint128_t arg1 = MakeUInt128(0x8865701568501691ULL, 0x8647488541679154ULL);
5412 __uint128_t arg2 = MakeUInt128(0x1821553559732353ULL, 0x0686043010675760ULL);
5413 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sminp %0.8h, %1.8h, %2.8h")(arg1, arg2);
5414 ASSERT_EQ(res, MakeUInt128(0x8647915488651691ULL, 0x0430106718212353ULL));
5415 }
5416
TEST(Arm64InsnTest,UnsignedMaxInt16x8)5417 TEST(Arm64InsnTest, UnsignedMaxInt16x8) {
5418 __uint128_t arg1 = MakeUInt128(0x7639975974619383ULL, 0x5845749159880976ULL);
5419 __uint128_t arg2 = MakeUInt128(0x5928493695941434ULL, 0x0814685298150539ULL);
5420 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umax %0.8h, %1.8h, %2.8h")(arg1, arg2);
5421 ASSERT_EQ(res, MakeUInt128(0x7639975995949383ULL, 0x5845749198150976ULL));
5422 }
5423
TEST(Arm64InsnTest,UnsignedMinInt16x8)5424 TEST(Arm64InsnTest, UnsignedMinInt16x8) {
5425 __uint128_t arg1 = MakeUInt128(0x2888773717663748ULL, 0x6027660634960353ULL);
5426 __uint128_t arg2 = MakeUInt128(0x6983349515101986ULL, 0x4269887847171939ULL);
5427 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umin %0.8h, %1.8h, %2.8h")(arg1, arg2);
5428 ASSERT_EQ(res, MakeUInt128(0x2888349515101986ULL, 0x4269660634960353ULL));
5429 }
5430
TEST(Arm64InsnTest,UnsignedMaxPairwiseInt16x8)5431 TEST(Arm64InsnTest, UnsignedMaxPairwiseInt16x8) {
5432 __uint128_t arg1 = MakeUInt128(0x1318583584066747ULL, 0x2370297149785084ULL);
5433 __uint128_t arg2 = MakeUInt128(0x4570249413983163ULL, 0x4332378975955680ULL);
5434 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umaxp %0.8h, %1.8h, %2.8h")(arg1, arg2);
5435 ASSERT_EQ(res, MakeUInt128(0x2971508458358406ULL, 0x4332759545703163ULL));
5436 }
5437
TEST(Arm64InsnTest,UnsignedMinPairwiseInt16x8)5438 TEST(Arm64InsnTest, UnsignedMinPairwiseInt16x8) {
5439 __uint128_t arg1 = MakeUInt128(0x9538121791319145ULL, 0x1350099384631177ULL);
5440 __uint128_t arg2 = MakeUInt128(0x7769055481028850ULL, 0x2080858008781157ULL);
5441 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uminp %0.8h, %1.8h, %2.8h")(arg1, arg2);
5442 ASSERT_EQ(res, MakeUInt128(0x0993117712179131ULL, 0x2080087805548102ULL));
5443 }
5444
TEST(Arm64InsnTest,SignedHalvingAddInt16x8)5445 TEST(Arm64InsnTest, SignedHalvingAddInt16x8) {
5446 __uint128_t arg1 = MakeUInt128(0x1021944719713869ULL, 0x2560841624511239ULL);
5447 __uint128_t arg2 = MakeUInt128(0x8062011318454124ULL, 0x4782050110798760ULL);
5448 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("shadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
5449 ASSERT_EQ(res, MakeUInt128(0xc841caad18db3cc6ULL, 0x3671c48b1a65ccccULL));
5450 }
5451
TEST(Arm64InsnTest,SignedHalvingSubInt16x8)5452 TEST(Arm64InsnTest, SignedHalvingSubInt16x8) {
5453 __uint128_t arg1 = MakeUInt128(0x9041210873032402ULL, 0x0106853419472304ULL);
5454 __uint128_t arg2 = MakeUInt128(0x7666672174986986ULL, 0x8547076781205124ULL);
5455 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("shsub %0.8h, %1.8h, %2.8h")(arg1, arg2);
5456 ASSERT_EQ(res, MakeUInt128(0x8ceddcf3ff35dd3eULL, 0x3ddfbee64c13e8f0ULL));
5457 }
5458
TEST(Arm64InsnTest,SignedRoundingHalvingAddInt16x8)5459 TEST(Arm64InsnTest, SignedRoundingHalvingAddInt16x8) {
5460 __uint128_t arg1 = MakeUInt128(0x5871487839890810ULL, 0x7429530941060596ULL);
5461 __uint128_t arg2 = MakeUInt128(0x9443158477539700ULL, 0x9439883949144323ULL);
5462 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
5463 ASSERT_EQ(res, MakeUInt128(0xf65a2efe586ecf88ULL, 0x0431eda1450d245dULL));
5464 }
5465
TEST(Arm64InsnTest,SignedAbsoluteDifferenceInt16x8)5466 TEST(Arm64InsnTest, SignedAbsoluteDifferenceInt16x8) {
5467 __uint128_t arg1 = MakeUInt128(0x1349607501116498ULL, 0x3278563531614516ULL);
5468 __uint128_t arg2 = MakeUInt128(0x8457695687109002ULL, 0x9997698412632665ULL);
5469 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabd %0.8h, %1.8h, %2.8h")(arg1, arg2);
5470 ASSERT_EQ(res, MakeUInt128(0x8ef208e17a01d496ULL, 0x98e1134f1efe1eb1ULL));
5471 }
5472
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongInt16x8)5473 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongInt16x8) {
5474 __uint128_t arg1 = MakeUInt128(0x7419850973346267ULL, 0x9332107268687076ULL);
5475 __uint128_t arg2 = MakeUInt128(0x8062639919361965ULL, 0x0440995421676278ULL);
5476 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabdl %0.4s, %1.4h, %2.4h")(arg1, arg2);
5477 ASSERT_EQ(res, MakeUInt128(0x000059fe00004902ULL, 0x0000f3b70000de90ULL));
5478 }
5479
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongUpperInt16x8)5480 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongUpperInt16x8) {
5481 __uint128_t arg1 = MakeUInt128(0x4980559610330799ULL, 0x4145347784574699ULL);
5482 __uint128_t arg2 = MakeUInt128(0x9921285999993996ULL, 0x1228161521931488ULL);
5483 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabdl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
5484 ASSERT_EQ(res, MakeUInt128(0x00009d3c00003211ULL, 0x00002f1d00001e62ULL));
5485 }
5486
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateInt16x8)5487 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateInt16x8) {
5488 // The lowest element tests the overflow.
5489 __uint128_t arg1 = MakeUInt128(0x8967'0031'9258'7fffULL, 0x9410'5105'3358'4384ULL);
5490 __uint128_t arg2 = MakeUInt128(0x6560'2339'1796'8000ULL, 0x6784'4763'7084'7497ULL);
5491 __uint128_t arg3 = MakeUInt128(0x8333'6555'7900'5555ULL, 0x1914'7319'8862'7135ULL);
5492 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("saba %0.8h, %1.8h, %2.8h")(arg1, arg2, arg3);
5493 ASSERT_EQ(res, MakeUInt128(0x5f2c'885d'fe3e'5554ULL, 0xec88'7cbb'c58e'a248ULL));
5494 }
5495
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateInt32x4)5496 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateInt32x4) {
5497 // The lowest element tests the overflow.
5498 __uint128_t arg1 = MakeUInt128(0x8967'0031'7fff'ffffULL, 0x9410'5105'3358'4384ULL);
5499 __uint128_t arg2 = MakeUInt128(0x6560'2339'8000'0000ULL, 0x6784'4763'7084'7497ULL);
5500 __uint128_t arg3 = MakeUInt128(0x8333'6555'aaaa'5555ULL, 0x1914'7319'8862'7135ULL);
5501 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("saba %0.4s, %1.4s, %2.4s")(arg1, arg2, arg3);
5502 ASSERT_EQ(res, MakeUInt128(0x5f2c'885d'aaaa'5554ULL, 0xec88'6977'c58e'a248ULL));
5503 }
5504
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateLongInt16x4)5505 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateLongInt16x4) {
5506 __uint128_t arg1 = MakeUInt128(0x078464167452167ULL, 0x719048310967671ULL);
5507 __uint128_t arg2 = MakeUInt128(0x344349481926268ULL, 0x110739948250607ULL);
5508 __uint128_t arg3 = MakeUInt128(0x949507350316901ULL, 0x731852119552635ULL);
5509 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
5510 ASSERT_EQ(res, MakeUInt128(0x094a36265031aa02ULL, 0x073187ed195537e2ULL));
5511 }
5512
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongInt32x2)5513 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongInt32x2) {
5514 __uint128_t arg1 = MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL);
5515 __uint128_t arg2 = MakeUInt128(0x0000000080000000ULL, 0x0000000000000000ULL);
5516 __uint128_t arg3 = MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL);
5517 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal %0.2d, %1.2s, %2.2s")(arg1, arg2, arg3);
5518 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
5519 }
5520
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateLongUpperInt16x8)5521 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateLongUpperInt16x8) {
5522 __uint128_t arg1 = MakeUInt128(0x690943470482932ULL, 0x414041114654092ULL);
5523 __uint128_t arg2 = MakeUInt128(0x988344435159133ULL, 0x010773944111840ULL);
5524 __uint128_t arg3 = MakeUInt128(0x410768498106634ULL, 0x241048239358274ULL);
5525 __uint128_t res =
5526 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal2 %0.4s, %1.8h, %2.8h")(arg1, arg2, arg3);
5527 ASSERT_EQ(res, MakeUInt128(0x0410a63098108e86ULL, 0x024108863935f59cULL));
5528 }
5529
TEST(Arm64InsnTest,UnsignedHalvingAddInt16x8)5530 TEST(Arm64InsnTest, UnsignedHalvingAddInt16x8) {
5531 __uint128_t arg1 = MakeUInt128(0x4775379853799732ULL, 0x2344561227858432ULL);
5532 __uint128_t arg2 = MakeUInt128(0x9684664751333657ULL, 0x3692387201464723ULL);
5533 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
5534 ASSERT_EQ(res, MakeUInt128(0x6efc4eef525666c4ULL, 0x2ceb4742146565aaULL));
5535 }
5536
TEST(Arm64InsnTest,UnsignedHalvingSubInt16x8)5537 TEST(Arm64InsnTest, UnsignedHalvingSubInt16x8) {
5538 __uint128_t arg1 = MakeUInt128(0x9926884349592876ULL, 0x1240075587569464ULL);
5539 __uint128_t arg2 = MakeUInt128(0x1370562514001179ULL, 0x7133166207153715ULL);
5540 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uhsub %0.8h, %1.8h, %2.8h")(arg1, arg2);
5541 ASSERT_EQ(res, MakeUInt128(0x42db190f1aac0b7eULL, 0xd086f87940202ea7ULL));
5542 }
5543
TEST(Arm64InsnTest,UnsignedRoundingHalvingAddInt16x8)5544 TEST(Arm64InsnTest, UnsignedRoundingHalvingAddInt16x8) {
5545 __uint128_t arg1 = MakeUInt128(0x5066533985738887ULL, 0x8661476294434140ULL);
5546 __uint128_t arg2 = MakeUInt128(0x1049888993160051ULL, 0x2076781035886116ULL);
5547 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
5548 ASSERT_EQ(res, MakeUInt128(0x30586de18c45446cULL, 0x536c5fb964e6512bULL));
5549 }
5550
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceInt16x8)5551 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceInt16x8) {
5552 __uint128_t arg1 = MakeUInt128(0x8574664607722834ULL, 0x1540311441529418ULL);
5553 __uint128_t arg2 = MakeUInt128(0x8047825438761770ULL, 0x7904300015669867ULL);
5554 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabd %0.8h, %1.8h, %2.8h")(arg1, arg2);
5555 ASSERT_EQ(res, MakeUInt128(0x052d1c0e310410c4ULL, 0x63c401142bec044fULL));
5556 }
5557
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceLongInt16x8)5558 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceLongInt16x8) {
5559 __uint128_t arg1 = MakeUInt128(0x1614585505839727ULL, 0x4209809097817293ULL);
5560 __uint128_t arg2 = MakeUInt128(0x2393010676638682ULL, 0x4040111304024700ULL);
5561 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabdl %0.4s, %1.4h, %2.4h")(arg1, arg2);
5562 ASSERT_EQ(res, MakeUInt128(0x000070e0000010a5ULL, 0x00000d7f0000574fULL));
5563 }
5564
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceLongUpperInt16x8)5565 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceLongUpperInt16x8) {
5566 __uint128_t arg1 = MakeUInt128(0x0347999588867695ULL, 0x0161249722820403ULL);
5567 __uint128_t arg2 = MakeUInt128(0x0399546327883069ULL, 0x5976249361510102ULL);
5568 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabdl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
5569 ASSERT_EQ(res, MakeUInt128(0x00003ecf00000301ULL, 0x0000581500000004ULL));
5570 }
5571
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateInt16x8)5572 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateInt16x8) {
5573 __uint128_t arg1 = MakeUInt128(0x0857466460772283ULL, 0x4154031144152941ULL);
5574 __uint128_t arg2 = MakeUInt128(0x8804782543876177ULL, 0x0790430001566986ULL);
5575 __uint128_t arg3 = MakeUInt128(0x7767957609099669ULL, 0x3607559496515273ULL);
5576 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uaba %0.8h, %1.8h, %2.8h")(arg1, arg2, arg3);
5577 ASSERT_EQ(res, MakeUInt128(0xf714c73725f9d55dULL, 0x6fcb9583d91092b8ULL));
5578 }
5579
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateLongInt16x4)5580 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateLongInt16x4) {
5581 __uint128_t arg1 = MakeUInt128(0x8343417044157348ULL, 0x2481833301640566ULL);
5582 __uint128_t arg2 = MakeUInt128(0x9596688667695634ULL, 0x9141632842641497ULL);
5583 __uint128_t arg3 = MakeUInt128(0x4533349999480002ULL, 0x6699875888159350ULL);
5584 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uabal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
5585 ASSERT_EQ(res, MakeUInt128(0x453357ed99481d16ULL, 0x669999ab8815ba66ULL));
5586 }
5587
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateLongUpperInt16x8)5588 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateLongUpperInt16x8) {
5589 __uint128_t arg1 = MakeUInt128(0x998685541703188ULL, 0x778867592902607ULL);
5590 __uint128_t arg2 = MakeUInt128(0x043212666179192ULL, 0x352093822787888ULL);
5591 __uint128_t arg3 = MakeUInt128(0x988633599116081ULL, 0x235355570464634ULL);
5592 __uint128_t res =
5593 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uabal2 %0.4s, %1.8h, %2.8h")(arg1, arg2, arg3);
5594 ASSERT_EQ(res, MakeUInt128(0x0988d34d9911b302ULL, 0x0235397b7046c371ULL));
5595 }
5596
TEST(Arm64InsnTest,SignedAddLongPairwiseInt8x16)5597 TEST(Arm64InsnTest, SignedAddLongPairwiseInt8x16) {
5598 __uint128_t arg = MakeUInt128(0x6164411096256633ULL, 0x7305409219519675ULL);
5599 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlp %0.8h, %1.16b")(arg);
5600 ASSERT_EQ(res, MakeUInt128(0x00c50051ffbb0099ULL, 0x0078ffd2006a000bULL));
5601 }
5602
TEST(Arm64InsnTest,SignedAddAccumulateLongPairwiseInt8x16)5603 TEST(Arm64InsnTest, SignedAddAccumulateLongPairwiseInt8x16) {
5604 __uint128_t arg1 = MakeUInt128(0x1991646384142707ULL, 0x7988708874229277ULL);
5605 __uint128_t arg2 = MakeUInt128(0x7217826030500994ULL, 0x5108247835729056ULL);
5606 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sadalp %0.8h, %1.16b")(arg1, arg2);
5607 ASSERT_EQ(res, MakeUInt128(0x71c183272fe809c2ULL, 0x510924703608905fULL));
5608 }
5609
TEST(Arm64InsnTest,SignedAddAccumulateLongPairwiseInt16x8)5610 TEST(Arm64InsnTest, SignedAddAccumulateLongPairwiseInt16x8) {
5611 __uint128_t arg1 = MakeUInt128(0x1991646384142707ULL, 0x7988708874229277ULL);
5612 __uint128_t arg2 = MakeUInt128(0x7217826030500994ULL, 0x5108247835729056ULL);
5613 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sadalp %0.4s, %1.8h")(arg1, arg2);
5614 ASSERT_EQ(res, MakeUInt128(0x72180054304fb4afULL, 0x51090e88357296efULL));
5615 }
5616
TEST(Arm64InsnTest,UnsignedAddLongPairwiseInt8x16)5617 TEST(Arm64InsnTest, UnsignedAddLongPairwiseInt8x16) {
5618 __uint128_t arg = MakeUInt128(0x1483287348089574ULL, 0x7777527834422109ULL);
5619 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uaddlp %0.8h, %1.16b")(arg);
5620 ASSERT_EQ(res, MakeUInt128(0x0097009b00500109ULL, 0x00ee00ca0076002aULL));
5621 }
5622
TEST(Arm64InsnTest,UnsignedAddAccumulateLongPairwiseInt8x16)5623 TEST(Arm64InsnTest, UnsignedAddAccumulateLongPairwiseInt8x16) {
5624 __uint128_t arg1 = MakeUInt128(0x9348154691631162ULL, 0x4928873574718824ULL);
5625 __uint128_t arg2 = MakeUInt128(0x5207665738825139ULL, 0x6391635767231510ULL);
5626 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("uadalp %0.8h, %1.16b")(arg1, arg2);
5627 ASSERT_EQ(res, MakeUInt128(0x52e266b2397651acULL, 0x64026413680815bcULL));
5628 }
5629
TEST(Arm64InsnTest,SignedAddLong)5630 TEST(Arm64InsnTest, SignedAddLong) {
5631 __uint128_t arg1 = MakeUInt128(0x3478074585067606ULL, 0x3048229409653041ULL);
5632 __uint128_t arg2 = MakeUInt128(0x1183066710818930ULL, 0x3110887172816751ULL);
5633 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddl %0.4s, %1.4h, %2.4h")(arg1, arg2);
5634 ASSERT_EQ(res, MakeUInt128(0xffff9587ffffff36ULL, 0x000045fb00000dacULL));
5635 }
5636
TEST(Arm64InsnTest,SignedAddLongUpper)5637 TEST(Arm64InsnTest, SignedAddLongUpper) {
5638 __uint128_t arg1 = MakeUInt128(0x3160683158679946ULL, 0x0165205774052942ULL);
5639 __uint128_t arg2 = MakeUInt128(0x3053601780313357ULL, 0x2632670547903384ULL);
5640 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
5641 ASSERT_EQ(res, MakeUInt128(0x0000bb9500005cc6ULL, 0x000027970000875cULL));
5642 }
5643
TEST(Arm64InsnTest,SignedSubLong)5644 TEST(Arm64InsnTest, SignedSubLong) {
5645 __uint128_t arg1 = MakeUInt128(0x8566746260879482ULL, 0x0186474876727272ULL);
5646 __uint128_t arg2 = MakeUInt128(0x2206267646533809ULL, 0x9801966883680994ULL);
5647 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubl %0.4s, %1.4h, %2.4h")(arg1, arg2);
5648 ASSERT_EQ(res, MakeUInt128(0x00001a34ffff5c79ULL, 0xffff636000004decULL));
5649 }
5650
TEST(Arm64InsnTest,SignedSubLongUpper)5651 TEST(Arm64InsnTest, SignedSubLongUpper) {
5652 __uint128_t arg1 = MakeUInt128(0x3011331753305329ULL, 0x8020166888174813ULL);
5653 __uint128_t arg2 = MakeUInt128(0x4298868158557781ULL, 0x0343231753064784ULL);
5654 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
5655 ASSERT_EQ(res, MakeUInt128(0xffff35110000008fULL, 0xffff7cddfffff351ULL));
5656 }
5657
TEST(Arm64InsnTest,UnsignedAddLong)5658 TEST(Arm64InsnTest, UnsignedAddLong) {
5659 __uint128_t arg1 = MakeUInt128(0x3126059505777727ULL, 0x5424712416483128ULL);
5660 __uint128_t arg2 = MakeUInt128(0x3298207236175057ULL, 0x4673870128209575ULL);
5661 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddl %0.4s, %1.4h, %2.4h")(arg1, arg2);
5662 ASSERT_EQ(res, MakeUInt128(0x00003b8e0000c77eULL, 0x000063be00002607ULL));
5663 }
5664
TEST(Arm64InsnTest,UnsignedAddLongUpper)5665 TEST(Arm64InsnTest, UnsignedAddLongUpper) {
5666 __uint128_t arg1 = MakeUInt128(0x3384698499778726ULL, 0x7065551918544686ULL);
5667 __uint128_t arg2 = MakeUInt128(0x9846947849573462ULL, 0x2606294219624557ULL);
5668 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
5669 ASSERT_EQ(res, MakeUInt128(0x000031b600008bddULL, 0x0000966b00007e5bULL));
5670 }
5671
TEST(Arm64InsnTest,UnsignedSubLong)5672 TEST(Arm64InsnTest, UnsignedSubLong) {
5673 __uint128_t arg1 = MakeUInt128(0x4378111988556318ULL, 0x7777925372011667ULL);
5674 __uint128_t arg2 = MakeUInt128(0x1853954183598443ULL, 0x8305203762819440ULL);
5675 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubl %0.4s, %1.4h, %2.4h")(arg1, arg2);
5676 ASSERT_EQ(res, MakeUInt128(0x000004fcffffded5ULL, 0x00002b25ffff7bd8ULL));
5677 }
5678
TEST(Arm64InsnTest,UnsignedSubLongUpper)5679 TEST(Arm64InsnTest, UnsignedSubLongUpper) {
5680 __uint128_t arg1 = MakeUInt128(0x5228717440266638ULL, 0x9148817173086436ULL);
5681 __uint128_t arg2 = MakeUInt128(0x1113890694202790ULL, 0x8814311944879941ULL);
5682 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
5683 ASSERT_EQ(res, MakeUInt128(0x00002e81ffffcaf5ULL, 0x0000093400005058ULL));
5684 }
5685
TEST(Arm64InsnTest,SignedAddWide)5686 TEST(Arm64InsnTest, SignedAddWide) {
5687 __uint128_t arg1 = MakeUInt128(0x7844598183134112ULL, 0x9001999205981352ULL);
5688 __uint128_t arg2 = MakeUInt128(0x2051173365856407ULL, 0x8264849427644113ULL);
5689 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw %0.4s, %1.4s, %2.4h")(arg1, arg2);
5690 ASSERT_EQ(res, MakeUInt128(0x7844bf068313a519ULL, 0x9001b9e305982a85ULL));
5691 }
5692
TEST(Arm64InsnTest,SignedAddWideUpper)5693 TEST(Arm64InsnTest, SignedAddWideUpper) {
5694 __uint128_t arg1 = MakeUInt128(0x3407092233436577ULL, 0x9160128093179401ULL);
5695 __uint128_t arg2 = MakeUInt128(0x7185985999338492ULL, 0x3549564005709955ULL);
5696 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
5697 ASSERT_EQ(res, MakeUInt128(0x34070e923342feccULL, 0x916047c99317ea41ULL));
5698 }
5699
TEST(Arm64InsnTest,SignedSubWide)5700 TEST(Arm64InsnTest, SignedSubWide) {
5701 __uint128_t arg1 = MakeUInt128(0x2302847007312065ULL, 0x8032626417116165ULL);
5702 __uint128_t arg2 = MakeUInt128(0x9576132723515666ULL, 0x6253667271899853ULL);
5703 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubw %0.4s, %1.4s, %2.4h")(arg1, arg2);
5704 ASSERT_EQ(res, MakeUInt128(0x2302611f0730c9ffULL, 0x8032ccee17114e3eULL));
5705 }
5706
TEST(Arm64InsnTest,SignedSubWideUpper)5707 TEST(Arm64InsnTest, SignedSubWideUpper) {
5708 __uint128_t arg1 = MakeUInt128(0x4510824783572905ULL, 0x6919885554678860ULL);
5709 __uint128_t arg2 = MakeUInt128(0x7946280537122704ULL, 0x2466543192145281ULL);
5710 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
5711 ASSERT_EQ(res, MakeUInt128(0x4510f0338356d684ULL, 0x691963ef5467342fULL));
5712 }
5713
TEST(Arm64InsnTest,UnsignedAddWide)5714 TEST(Arm64InsnTest, UnsignedAddWide) {
5715 __uint128_t arg1 = MakeUInt128(0x5870785951298344ULL, 0x1729535195378855ULL);
5716 __uint128_t arg2 = MakeUInt128(0x3457374260859029ULL, 0x0817651557803905ULL);
5717 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw %0.4s, %1.4s, %2.4h")(arg1, arg2);
5718 ASSERT_EQ(res, MakeUInt128(0x5870d8de512a136dULL, 0x172987a89537bf97ULL));
5719 }
5720
TEST(Arm64InsnTest,UnsignedAddWideUpper)5721 TEST(Arm64InsnTest, UnsignedAddWideUpper) {
5722 __uint128_t arg1 = MakeUInt128(0x7516493270950493ULL, 0x4639382432227188ULL);
5723 __uint128_t arg2 = MakeUInt128(0x5159740547021482ULL, 0x8971117779237612ULL);
5724 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
5725 ASSERT_EQ(res, MakeUInt128(0x7516c25570957aa5ULL, 0x4639c195322282ffULL));
5726 }
5727
TEST(Arm64InsnTest,UnsignedSubWide)5728 TEST(Arm64InsnTest, UnsignedSubWide) {
5729 __uint128_t arg1 = MakeUInt128(0x0625247972199786ULL, 0x6854279897799233ULL);
5730 __uint128_t arg2 = MakeUInt128(0x9579057581890622ULL, 0x5254735822052364ULL);
5731 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubw %0.4s, %1.4s, %2.4h")(arg1, arg2);
5732 ASSERT_EQ(res, MakeUInt128(0x0624a2f072199164ULL, 0x6853921f97798cbeULL));
5733 }
5734
TEST(Arm64InsnTest,UnsignedSubWideUpper)5735 TEST(Arm64InsnTest, UnsignedSubWideUpper) {
5736 __uint128_t arg1 = MakeUInt128(0x8242392192695062ULL, 0x0831838145469839ULL);
5737 __uint128_t arg2 = MakeUInt128(0x2366461363989101ULL, 0x2102177095976704ULL);
5738 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
5739 ASSERT_EQ(res, MakeUInt128(0x8241a38a9268e95eULL, 0x0831627f454680c9ULL));
5740 }
5741
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8)5742 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8) {
5743 __uint128_t arg1 = MakeUInt128(0x9191791552241718ULL, 0x9585361680594741ULL);
5744 __uint128_t arg2 = MakeUInt128(0x2341933984202187ULL, 0x4564925644346239ULL);
5745 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull %0.8h, %1.8b, %2.8b")(arg1, arg2);
5746 ASSERT_EQ(res, MakeUInt128(0xd848048002f7f4a8ULL, 0xf0d3e3d1cc7b04adULL));
5747 }
5748
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8Upper)5749 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8Upper) {
5750 __uint128_t arg1 = MakeUInt128(0x9314052976347574ULL, 0x8119356709110137ULL);
5751 __uint128_t arg2 = MakeUInt128(0x7517210080315590ULL, 0x2485309066920376ULL);
5752 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
5753 ASSERT_EQ(res, MakeUInt128(0x0396f8b20003195aULL, 0xee24f3fd09f0d2f0ULL));
5754 }
5755
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8)5756 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8) {
5757 __uint128_t arg1 = MakeUInt128(0x9149055628425039ULL, 0x1275771028402799ULL);
5758 __uint128_t arg2 = MakeUInt128(0x8066365825488926ULL, 0x4880254566101729ULL);
5759 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.8h, %1.8b, %2.8b")(arg1, arg2);
5760 ASSERT_EQ(res, MakeUInt128(0x05c812902ad00876ULL, 0x48801d16010e1d90ULL));
5761 }
5762
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8Upper)5763 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8Upper) {
5764 __uint128_t arg1 = MakeUInt128(0x9709683408005355ULL, 0x9849175417381883ULL);
5765 __uint128_t arg2 = MakeUInt128(0x9994469748676265ULL, 0x5165827658483588ULL);
5766 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
5767 ASSERT_EQ(res, MakeUInt128(0x07e80fc004f84598ULL, 0x30181ccd0bae26b8ULL));
5768 }
5769
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8IndexedElem)5770 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8IndexedElem) {
5771 __uint128_t arg1 = MakeUInt128(0x9293459588970695ULL, 0x3653494060340216ULL);
5772 __uint128_t arg2 = MakeUInt128(0x6544375589004563ULL, 0x2882250545255640ULL);
5773 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull %0.4s, %1.4h, %2.h[2]")(arg1, arg2);
5774 ASSERT_EQ(res, MakeUInt128(0xe630cb23016c3279ULL, 0xe8593fcf0f0a1d79ULL));
5775 }
5776
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8IndexedElemUpper)5777 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8IndexedElemUpper) {
5778 __uint128_t arg1 = MakeUInt128(0x9279068212073883ULL, 0x7781423356282360ULL);
5779 __uint128_t arg2 = MakeUInt128(0x8963208068222468ULL, 0x0122482611771858ULL);
5780 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull2 %0.4s, %1.8h, %2.h[2]")(arg1, arg2);
5781 ASSERT_EQ(res, MakeUInt128(0x0af01400047db000ULL, 0x0f2be08008677980ULL));
5782 }
5783
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElem)5784 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElem) {
5785 __uint128_t arg1 = MakeUInt128(0x9086996033027634ULL, 0x7870810817545011ULL);
5786 __uint128_t arg2 = MakeUInt128(0x9307141223390866ULL, 0x3938339529425786ULL);
5787 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.4s, %1.4h, %2.h[2]")(arg1, arg2);
5788 ASSERT_EQ(res, MakeUInt128(0x03ffbe2409445fa8ULL, 0x0b54a16c0c0648c0ULL));
5789 }
5790
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElem2)5791 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElem2) {
5792 __uint128_t arg1 = MakeUInt128(0x9132710495478599ULL, 0x1801969678353214ULL);
5793 __uint128_t arg2 = MakeUInt128(0x6444118926063152ULL, 0x6618167443193550ULL);
5794 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.4s, %1.4h, %2.h[4]")(arg1, arg2);
5795 ASSERT_EQ(res, MakeUInt128(0x1f1659301bd26cd0ULL, 0x1e3cb9a017892540ULL));
5796 }
5797
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElemUpper)5798 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElemUpper) {
5799 __uint128_t arg1 = MakeUInt128(0x9815793678976697ULL, 0x4220575059683440ULL);
5800 __uint128_t arg2 = MakeUInt128(0x8697350201410206ULL, 0x7235850200724522ULL);
5801 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull2 %0.4s, %1.8h, %2.h[2]")(arg1, arg2);
5802 ASSERT_EQ(res, MakeUInt128(0x12833ad00ad1a880ULL, 0x0db1244012143ea0ULL));
5803 }
5804
TEST(Arm64InsnTest,SignedMultiplyAddLongInt8x8)5805 TEST(Arm64InsnTest, SignedMultiplyAddLongInt8x8) {
5806 __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
5807 __uint128_t arg2 = MakeUInt128(0x1180643829138347ULL, 0x3546797253992623ULL);
5808 __uint128_t arg3 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
5809 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
5810 ASSERT_EQ(res, MakeUInt128(0x3b5b1ca28ec69893ULL, 0x8b7836c02ef25620ULL));
5811 }
5812
TEST(Arm64InsnTest,SignedMultiplyAddLongInt8x8Upper)5813 TEST(Arm64InsnTest, SignedMultiplyAddLongInt8x8Upper) {
5814 __uint128_t arg1 = MakeUInt128(0x5514435021828702ULL, 0x6685610665003531ULL);
5815 __uint128_t arg2 = MakeUInt128(0x0502163182060176ULL, 0x0921798468493686ULL);
5816 __uint128_t arg3 = MakeUInt128(0x3161293727951873ULL, 0x0789726373537171ULL);
5817 __uint128_t res =
5818 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
5819 ASSERT_EQ(res, MakeUInt128(0x5a69293732c30119ULL, 0x0b1f6288a12c6e89ULL));
5820 }
5821
TEST(Arm64InsnTest,SignedMultiplySubtractLongInt8x8)5822 TEST(Arm64InsnTest, SignedMultiplySubtractLongInt8x8) {
5823 __uint128_t arg1 = MakeUInt128(0x9662539339538092ULL, 0x2195591918188552ULL);
5824 __uint128_t arg2 = MakeUInt128(0x6780621499231727ULL, 0x6316321833989693ULL);
5825 __uint128_t arg3 = MakeUInt128(0x8075616855911752ULL, 0x9984501320671293ULL);
5826 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
5827 ASSERT_EQ(res, MakeUInt128(0x9764560f61112814ULL, 0xc42a811300a11b17ULL));
5828 }
5829
TEST(Arm64InsnTest,SignedMultiplySubtractLongInt8x8Upper)5830 TEST(Arm64InsnTest, SignedMultiplySubtractLongInt8x8Upper) {
5831 __uint128_t arg1 = MakeUInt128(0x9826903089111856ULL, 0x8798692947051352ULL);
5832 __uint128_t arg2 = MakeUInt128(0x4816091743243015ULL, 0x3836847072928989ULL);
5833 __uint128_t arg3 = MakeUInt128(0x8284602223730145ULL, 0x2655679898627767ULL);
5834 __uint128_t res =
5835 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
5836 ASSERT_EQ(res, MakeUInt128(0x62e662482c482763ULL, 0x40cd7d88cb3e6577ULL));
5837 }
5838
TEST(Arm64InsnTest,SignedMultiplyAddLongInt16x4)5839 TEST(Arm64InsnTest, SignedMultiplyAddLongInt16x4) {
5840 __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
5841 __uint128_t arg2 = MakeUInt128(0x1180643829138347ULL, 0x3546797253992623ULL);
5842 __uint128_t arg3 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
5843 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
5844 ASSERT_EQ(res, MakeUInt128(0x3b6bd2a28eac7893ULL, 0x8b4c38c02edab620ULL));
5845 }
5846
TEST(Arm64InsnTest,UnsignedMultiplyAddLongInt8x8)5847 TEST(Arm64InsnTest, UnsignedMultiplyAddLongInt8x8) {
5848 __uint128_t arg1 = MakeUInt128(0x9696920253886503ULL, 0x4577183176686885ULL);
5849 __uint128_t arg2 = MakeUInt128(0x9236814884752764ULL, 0x9846882194973972ULL);
5850 __uint128_t arg3 = MakeUInt128(0x9707737187188400ULL, 0x4143231276365048ULL);
5851 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
5852 ASSERT_EQ(res, MakeUInt128(0xc1d3b199967b852cULL, 0x96cf42b6bfc850d8ULL));
5853 }
5854
TEST(Arm64InsnTest,UnsignedMultiplyAddLongInt8x8Upper)5855 TEST(Arm64InsnTest, UnsignedMultiplyAddLongInt8x8Upper) {
5856 __uint128_t arg1 = MakeUInt128(0x9055637695252326ULL, 0x5361442478023082ULL);
5857 __uint128_t arg2 = MakeUInt128(0x6811831037735887ULL, 0x0892406130313364ULL);
5858 __uint128_t arg3 = MakeUInt128(0x7737101162821461ULL, 0x4661679404090518ULL);
5859 __uint128_t res =
5860 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
5861 ASSERT_EQ(res, MakeUInt128(0x8db710736c124729ULL, 0x48f99ee6150912bcULL));
5862 }
5863
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongInt8x8)5864 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongInt8x8) {
5865 __uint128_t arg1 = MakeUInt128(0x4577772457520386ULL, 0x5437542828256714ULL);
5866 __uint128_t arg2 = MakeUInt128(0x1288583454443513ULL, 0x2562054464241011ULL);
5867 __uint128_t arg3 = MakeUInt128(0x0379554641905811ULL, 0x6862305964476958ULL);
5868 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
5869 ASSERT_EQ(res, MakeUInt128(0xe6ed3f7e40f14e1fULL, 0x6388f1213b5f6208ULL));
5870 }
5871
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongInt8x8Upper)5872 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongInt8x8Upper) {
5873 __uint128_t arg1 = MakeUInt128(0x4739376564336319ULL, 0x7978680367187307ULL);
5874 __uint128_t arg2 = MakeUInt128(0x9693924236321448ULL, 0x4503547763156702ULL);
5875 __uint128_t arg3 = MakeUInt128(0x5539006542311792ULL, 0x0153464977929066ULL);
5876 __uint128_t res =
5877 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
5878 ASSERT_EQ(res, MakeUInt128(0x2d64fe6d13ec1784ULL, 0xe0b644e155728f01ULL));
5879 }
5880
TEST(Arm64InsnTest,SignedShiftLeftInt64x1)5881 TEST(Arm64InsnTest, SignedShiftLeftInt64x1) {
5882 constexpr auto AsmSshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sshl %d0, %d1, %d2");
5883 __uint128_t arg = MakeUInt128(0x9007497297363549ULL, 0x6453328886984406ULL);
5884 ASSERT_EQ(AsmSshl(arg, -65), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
5885 ASSERT_EQ(AsmSshl(arg, -64), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
5886 ASSERT_EQ(AsmSshl(arg, -63), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
5887 ASSERT_EQ(AsmSshl(arg, -1), MakeUInt128(0xc803a4b94b9b1aa4ULL, 0x0000000000000000ULL));
5888 ASSERT_EQ(AsmSshl(arg, 0), MakeUInt128(0x9007497297363549ULL, 0x0000000000000000ULL));
5889 ASSERT_EQ(AsmSshl(arg, 1), MakeUInt128(0x200e92e52e6c6a92ULL, 0x0000000000000000ULL));
5890 ASSERT_EQ(AsmSshl(arg, 63), MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
5891 ASSERT_EQ(AsmSshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5892 ASSERT_EQ(AsmSshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5893 }
5894
TEST(Arm64InsnTest,SignedRoundingShiftLeftInt64x1)5895 TEST(Arm64InsnTest, SignedRoundingShiftLeftInt64x1) {
5896 constexpr auto AsmSrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srshl %d0, %d1, %d2");
5897 __uint128_t arg = MakeUInt128(0x9276457931065792ULL, 0x2955249887275846ULL);
5898 ASSERT_EQ(AsmSrshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5899 ASSERT_EQ(AsmSrshl(arg, -64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5900 ASSERT_EQ(AsmSrshl(arg, -63), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
5901 ASSERT_EQ(AsmSrshl(arg, -1), MakeUInt128(0xc93b22bc98832bc9ULL, 0x0000000000000000ULL));
5902 ASSERT_EQ(AsmSrshl(arg, 0), MakeUInt128(0x9276457931065792ULL, 0x0000000000000000ULL));
5903 ASSERT_EQ(AsmSrshl(arg, 1), MakeUInt128(0x24ec8af2620caf24ULL, 0x0000000000000000ULL));
5904 ASSERT_EQ(AsmSrshl(arg, 63), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5905 ASSERT_EQ(AsmSrshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5906 ASSERT_EQ(AsmSrshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5907 }
5908
TEST(Arm64InsnTest,UnsignedShiftLeftInt64x1)5909 TEST(Arm64InsnTest, UnsignedShiftLeftInt64x1) {
5910 constexpr auto AsmUshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ushl %d0, %d1, %d2");
5911 __uint128_t arg = MakeUInt128(0x9138296682468185ULL, 0x7103188790652870ULL);
5912 ASSERT_EQ(AsmUshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5913 ASSERT_EQ(AsmUshl(arg, -64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5914 ASSERT_EQ(AsmUshl(arg, -63), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
5915 ASSERT_EQ(AsmUshl(arg, -1), MakeUInt128(0x489c14b3412340c2ULL, 0x0000000000000000ULL));
5916 ASSERT_EQ(AsmUshl(arg, 0), MakeUInt128(0x9138296682468185ULL, 0x0000000000000000ULL));
5917 ASSERT_EQ(AsmUshl(arg, 1), MakeUInt128(0x227052cd048d030aULL, 0x0000000000000000ULL));
5918 ASSERT_EQ(AsmUshl(arg, 63), MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
5919 ASSERT_EQ(AsmUshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5920 ASSERT_EQ(AsmUshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5921 }
5922
TEST(Arm64InsnTest,UnsignedRoundingShiftLeftInt64x1)5923 TEST(Arm64InsnTest, UnsignedRoundingShiftLeftInt64x1) {
5924 constexpr auto AsmUrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urshl %d0, %d1, %d2");
5925 __uint128_t arg = MakeUInt128(0x9023452924407736ULL, 0x5949563051007421ULL);
5926 ASSERT_EQ(AsmUrshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5927 ASSERT_EQ(AsmUrshl(arg, -64), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
5928 ASSERT_EQ(AsmUrshl(arg, -63), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
5929 ASSERT_EQ(AsmUrshl(arg, -1), MakeUInt128(0x4811a29492203b9bULL, 0x0000000000000000ULL));
5930 ASSERT_EQ(AsmUrshl(arg, 0), MakeUInt128(0x9023452924407736ULL, 0x0000000000000000ULL));
5931 ASSERT_EQ(AsmUrshl(arg, 1), MakeUInt128(0x20468a524880ee6cULL, 0x0000000000000000ULL));
5932 ASSERT_EQ(AsmUrshl(arg, 63), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5933 ASSERT_EQ(AsmUrshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5934 ASSERT_EQ(AsmUrshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5935 }
5936
TEST(Arm64InsnTest,SignedShiftLeftInt16x8)5937 TEST(Arm64InsnTest, SignedShiftLeftInt16x8) {
5938 constexpr auto AsmSshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sshl %0.8h, %1.8h, %2.8h");
5939 __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
5940 __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
5941 ASSERT_EQ(AsmSshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0xccccffffffffffffULL));
5942 ASSERT_EQ(AsmSshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
5943 }
5944
TEST(Arm64InsnTest,SignedRoundingShiftLeftInt16x8)5945 TEST(Arm64InsnTest, SignedRoundingShiftLeftInt16x8) {
5946 constexpr auto AsmSrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srshl %0.8h, %1.8h, %2.8h");
5947 __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
5948 __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
5949 ASSERT_EQ(AsmSrshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0xcccdffff00000000ULL));
5950 ASSERT_EQ(AsmSrshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
5951 }
5952
TEST(Arm64InsnTest,UnsignedShiftLeftInt16x8)5953 TEST(Arm64InsnTest, UnsignedShiftLeftInt16x8) {
5954 constexpr auto AsmUshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ushl %0.8h, %1.8h, %2.8h");
5955 __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
5956 __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
5957 ASSERT_EQ(AsmUshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0x4ccc000100000000ULL));
5958 ASSERT_EQ(AsmUshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
5959 }
5960
TEST(Arm64InsnTest,UnsignedRoundingShiftLeftInt16x8)5961 TEST(Arm64InsnTest, UnsignedRoundingShiftLeftInt16x8) {
5962 constexpr auto AsmUrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urshl %0.8h, %1.8h, %2.8h");
5963 __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
5964 __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
5965 ASSERT_EQ(AsmUrshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0x4ccd000100010000ULL));
5966 ASSERT_EQ(AsmUrshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
5967 }
5968
TEST(Arm64InsnTest,UnsignedReciprocalSquareRootEstimateInt32x4)5969 TEST(Arm64InsnTest, UnsignedReciprocalSquareRootEstimateInt32x4) {
5970 __uint128_t arg = MakeUInt128(0x9641122821407533ULL, 0x0265510042410489ULL);
5971 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ursqrte %0.4s, %1.4s")(arg);
5972 ASSERT_EQ(res, MakeUInt128(0xa7000000ffffffffULL, 0xfffffffffb800000ULL));
5973 }
5974
TEST(Arm64InsnTest,UnsignedReciprocalEstimateInt32x4)5975 TEST(Arm64InsnTest, UnsignedReciprocalEstimateInt32x4) {
5976 __uint128_t arg = MakeUInt128(0x9714864899468611ULL, 0x2476054286734367ULL);
5977 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urecpe %0.4s, %1.4s")(arg);
5978 ASSERT_EQ(res, MakeUInt128(0xd8800000d6000000ULL, 0xfffffffff4000000ULL));
5979 }
5980
IsQcBitSet(uint32_t fpsr)5981 bool IsQcBitSet(uint32_t fpsr) {
5982 return (fpsr & kFpsrQcBit) != 0;
5983 }
5984
TEST(Arm64InsnTest,SignedSaturatingAddInt64x1)5985 TEST(Arm64InsnTest, SignedSaturatingAddInt64x1) {
5986 constexpr auto AsmSqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqadd %d0, %d2, %d3");
5987
5988 __uint128_t arg1 = MakeUInt128(0x4342527753119724ULL, 0x7430873043619511ULL);
5989 __uint128_t arg2 = MakeUInt128(0x3961190800302558ULL, 0x7838764420608504ULL);
5990 auto [res1, fpsr1] = AsmSqadd(arg1, arg2);
5991 ASSERT_EQ(res1, MakeUInt128(0x7ca36b7f5341bc7cULL, 0x0000000000000000ULL));
5992 ASSERT_FALSE(IsQcBitSet(fpsr1));
5993
5994 __uint128_t arg3 = MakeUInt128(0x2557185308919284ULL, 0x4038050710300647ULL);
5995 __uint128_t arg4 = MakeUInt128(0x7684786324319100ULL, 0x0223929785255372ULL);
5996 auto [res2, fpsr2] = AsmSqadd(arg3, arg4);
5997 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
5998 ASSERT_TRUE(IsQcBitSet(fpsr2));
5999 }
6000
TEST(Arm64InsnTest,SignedSaturatingAddInt32x4)6001 TEST(Arm64InsnTest, SignedSaturatingAddInt32x4) {
6002 constexpr auto AsmSqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqadd %0.4s, %2.4s, %3.4s");
6003
6004 __uint128_t arg1 = MakeUInt128(0x9883554445602495ULL, 0x5666843660292219ULL);
6005 __uint128_t arg2 = MakeUInt128(0x5124830910605377ULL, 0x2019802183101032ULL);
6006 auto [res1, fpsr1] = AsmSqadd(arg1, arg2);
6007 ASSERT_EQ(res1, MakeUInt128(0xe9a7d84d55c0780cULL, 0x76800457e339324bULL));
6008 ASSERT_FALSE(IsQcBitSet(fpsr1));
6009
6010 __uint128_t arg3 = MakeUInt128(0x9713308844617410ULL, 0x7959162511714864ULL);
6011 __uint128_t arg4 = MakeUInt128(0x8744686112476054ULL, 0x2867343670904667ULL);
6012 auto [res2, fpsr2] = AsmSqadd(arg3, arg4);
6013 ASSERT_EQ(res2, MakeUInt128(0x8000000056a8d464ULL, 0x7fffffff7fffffffULL));
6014 ASSERT_TRUE(IsQcBitSet(fpsr2));
6015 }
6016
TEST(Arm64InsnTest,UnsignedSaturatingAddInt8x1)6017 TEST(Arm64InsnTest, UnsignedSaturatingAddInt8x1) {
6018 constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %b0, %b2, %b3");
6019
6020 __uint128_t arg1 = MakeUInt128(0x6017174229960273ULL, 0x5310276871944944ULL);
6021 __uint128_t arg2 = MakeUInt128(0x4917939785144631ULL, 0x5973144353518504ULL);
6022 auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6023 ASSERT_EQ(res1, MakeUInt128(0x00000000000000a4ULL, 0x0000000000000000ULL));
6024 ASSERT_FALSE(IsQcBitSet(fpsr1));
6025
6026 __uint128_t arg3 = MakeUInt128(0x3306263695626490ULL, 0x9108276271159038ULL);
6027 __uint128_t arg4 = MakeUInt128(0x5699505124652999ULL, 0x6062855443838330ULL);
6028 auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6029 ASSERT_EQ(res2, MakeUInt128(0x00000000000000ffULL, 0x0000000000000000ULL));
6030 ASSERT_TRUE(IsQcBitSet(fpsr2));
6031 }
6032
TEST(Arm64InsnTest,UnsignedSaturatingAddInt64x1)6033 TEST(Arm64InsnTest, UnsignedSaturatingAddInt64x1) {
6034 constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %d0, %d2, %d3");
6035
6036 __uint128_t arg1 = MakeUInt128(0x0606885137234627ULL, 0x0799732723313469ULL);
6037 __uint128_t arg2 = MakeUInt128(0x3971456285542615ULL, 0x4676506324656766ULL);
6038 auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6039 ASSERT_EQ(res1, MakeUInt128(0x3f77cdb3bc776c3cULL, 0x0000000000000000ULL));
6040 ASSERT_FALSE(IsQcBitSet(fpsr1));
6041
6042 __uint128_t arg3 = MakeUInt128(0x9534957018600154ULL, 0x1262396228641389ULL);
6043 __uint128_t arg4 = MakeUInt128(0x7796733329070567ULL, 0x3769621564981845ULL);
6044 auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6045 ASSERT_EQ(res2, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6046 ASSERT_TRUE(IsQcBitSet(fpsr2));
6047 }
6048
TEST(Arm64InsnTest,UnsignedSaturatingAddInt32x4)6049 TEST(Arm64InsnTest, UnsignedSaturatingAddInt32x4) {
6050 constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %0.4s, %2.4s, %3.4s");
6051
6052 __uint128_t arg1 = MakeUInt128(0x9737425700735921ULL, 0x0031541508936793ULL);
6053 __uint128_t arg2 = MakeUInt128(0x0081699805365202ULL, 0x7600727749674584ULL);
6054 auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6055 ASSERT_EQ(res1, MakeUInt128(0x97b8abef05a9ab23ULL, 0x7631c68c51faad17ULL));
6056 ASSERT_FALSE(IsQcBitSet(fpsr1));
6057
6058 __uint128_t arg3 = MakeUInt128(0x9727856471983963ULL, 0x0878154322116691ULL);
6059 __uint128_t arg4 = MakeUInt128(0x8654522268126887ULL, 0x2684459684424161ULL);
6060 auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6061 ASSERT_EQ(res2, MakeUInt128(0xffffffffd9aaa1eaULL, 0x2efc5ad9a653a7f2ULL));
6062 ASSERT_TRUE(IsQcBitSet(fpsr2));
6063 }
6064
TEST(Arm64InsnTest,SignedSaturatingSubtractInt32x1)6065 TEST(Arm64InsnTest, SignedSaturatingSubtractInt32x1) {
6066 constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %s0, %s2, %s3");
6067
6068 __uint128_t arg1 = MakeUInt128(0x3178534870760322ULL, 0x1982970579751191ULL);
6069 __uint128_t arg2 = MakeUInt128(0x4405109942358830ULL, 0x3454635349234982ULL);
6070 auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6071 ASSERT_EQ(res1, MakeUInt128(0x2e407af2ULL, 0U));
6072 ASSERT_FALSE(IsQcBitSet(fpsr1));
6073
6074 __uint128_t arg3 = MakeUInt128(0x1423696483086410ULL, 0x2592887457999322ULL);
6075 __uint128_t arg4 = MakeUInt128(0x3749551912219519ULL, 0x0342445230753513ULL);
6076 auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6077 ASSERT_EQ(res2, MakeUInt128(0x80000000ULL, 0U));
6078 ASSERT_TRUE(IsQcBitSet(fpsr2));
6079
6080 __uint128_t arg5 = MakeUInt128(0x3083508879584152ULL, 0x1489912761065137ULL);
6081 __uint128_t arg6 = MakeUInt128(0x4153943580721139ULL, 0x0328574918769094ULL);
6082 auto [res3, fpsr3] = AsmSqsub(arg5, arg6);
6083 ASSERT_EQ(res3, MakeUInt128(0x7fffffffULL, 0U));
6084 ASSERT_TRUE(IsQcBitSet(fpsr3));
6085 }
6086
TEST(Arm64InsnTest,SignedSaturatingSubtractInt64x1)6087 TEST(Arm64InsnTest, SignedSaturatingSubtractInt64x1) {
6088 constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %d0, %d2, %d3");
6089
6090 __uint128_t arg1 = MakeUInt128(0x4416125223196943ULL, 0x4712064173754912ULL);
6091 __uint128_t arg2 = MakeUInt128(0x1635700857369439ULL, 0x7305979709719726ULL);
6092 auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6093 ASSERT_EQ(res1, MakeUInt128(0x2de0a249cbe2d50aULL, 0x0000000000000000ULL));
6094 ASSERT_FALSE(IsQcBitSet(fpsr1));
6095
6096 __uint128_t arg3 = MakeUInt128(0x7862766490242516ULL, 0x1990277471090335ULL);
6097 __uint128_t arg4 = MakeUInt128(0x9333093049483805ULL, 0x9785662884478744ULL);
6098 auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6099 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6100 ASSERT_TRUE(IsQcBitSet(fpsr2));
6101 }
6102
TEST(Arm64InsnTest,SignedSaturatingSubtractInt32x4)6103 TEST(Arm64InsnTest, SignedSaturatingSubtractInt32x4) {
6104 constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %0.4s, %2.4s, %3.4s");
6105
6106 __uint128_t arg1 = MakeUInt128(0x4485680977569630ULL, 0x3129588719161129ULL);
6107 __uint128_t arg2 = MakeUInt128(0x2946818849363386ULL, 0x4739274760122696ULL);
6108 auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6109 ASSERT_EQ(res1, MakeUInt128(0x1b3ee6812e2062aaULL, 0xe9f03140b903ea93ULL));
6110 ASSERT_FALSE(IsQcBitSet(fpsr1));
6111
6112 __uint128_t arg3 = MakeUInt128(0x9304127100727784ULL, 0x9301555038895360ULL);
6113 __uint128_t arg4 = MakeUInt128(0x3382619293437970ULL, 0x8187432094991415ULL);
6114 auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6115 ASSERT_EQ(res2, MakeUInt128(0x800000006d2efe14ULL, 0x117a12307fffffffULL));
6116 ASSERT_TRUE(IsQcBitSet(fpsr2));
6117 }
6118
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt32x1)6119 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt32x1) {
6120 constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %s0, %s2, %s3");
6121
6122 __uint128_t arg1 = MakeUInt128(0x2548156091372812ULL, 0x8406333039373562ULL);
6123 __uint128_t arg2 = MakeUInt128(0x4200160456645574ULL, 0x1458816605216660ULL);
6124 auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6125 ASSERT_EQ(res1, MakeUInt128(0x3ad2d29eULL, 0U));
6126 ASSERT_FALSE(IsQcBitSet(fpsr1));
6127
6128 __uint128_t arg3 = MakeUInt128(0x1259960281839309ULL, 0x5487090590738613ULL);
6129 __uint128_t arg4 = MakeUInt128(0x5191459181951029ULL, 0x7327875571049729ULL);
6130 auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6131 ASSERT_EQ(res2, MakeUInt128(0U, 0U));
6132 ASSERT_TRUE(IsQcBitSet(fpsr2));
6133 }
6134
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt64x1)6135 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt64x1) {
6136 constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %d0, %d2, %d3");
6137
6138 __uint128_t arg1 = MakeUInt128(0x9691077542576474ULL, 0x8832534141213280ULL);
6139 __uint128_t arg2 = MakeUInt128(0x0626717094009098ULL, 0x2235296579579978ULL);
6140 auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6141 ASSERT_EQ(res1, MakeUInt128(0x906a9604ae56d3dcULL, 0x0000000000000000ULL));
6142 ASSERT_FALSE(IsQcBitSet(fpsr1));
6143
6144 __uint128_t arg3 = MakeUInt128(0x7752929106925043ULL, 0x2614469501098610ULL);
6145 __uint128_t arg4 = MakeUInt128(0x8889991465855188ULL, 0x1873582528164302ULL);
6146 auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6147 ASSERT_EQ(res2, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6148 ASSERT_TRUE(IsQcBitSet(fpsr2));
6149 }
6150
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt32x4)6151 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt32x4) {
6152 constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %0.4s, %2.4s, %3.4s");
6153
6154 __uint128_t arg1 = MakeUInt128(0x6884962578665885ULL, 0x9991798675205545ULL);
6155 __uint128_t arg2 = MakeUInt128(0x5809900455646117ULL, 0x8755249370124553ULL);
6156 auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6157 ASSERT_EQ(res1, MakeUInt128(0x107b06212301f76eULL, 0x123c54f3050e0ff2ULL));
6158 ASSERT_FALSE(IsQcBitSet(fpsr1));
6159
6160 __uint128_t arg3 = MakeUInt128(0x5032678340586301ULL, 0x9301932429963972ULL);
6161 __uint128_t arg4 = MakeUInt128(0x0444517928812285ULL, 0x4478211953530898ULL);
6162 auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6163 ASSERT_EQ(res2, MakeUInt128(0x4bee160a17d7407cULL, 0x4e89720b00000000ULL));
6164 ASSERT_TRUE(IsQcBitSet(fpsr2));
6165 }
6166
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt8x1)6167 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt8x1) {
6168 constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %b0, %b2");
6169
6170 __uint128_t arg1 = MakeUInt128(0x8918016855727981ULL, 0x5642185819119749ULL);
6171 auto [res1, fpsr1] = AsmSqabs(arg1);
6172 ASSERT_EQ(res1, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6173 ASSERT_FALSE(IsQcBitSet(fpsr1));
6174
6175 __uint128_t arg2 = MakeUInt128(0x0000000000000080ULL, 0x6464607287574305ULL);
6176 auto [res2, fpsr2] = AsmSqabs(arg2);
6177 ASSERT_EQ(res2, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6178 ASSERT_TRUE(IsQcBitSet(fpsr2));
6179 }
6180
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt64x1)6181 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt64x1) {
6182 constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %d0, %d2");
6183
6184 __uint128_t arg1 = MakeUInt128(0x9717317281315179ULL, 0x3290443112181587ULL);
6185 auto [res1, fpsr1] = AsmSqabs(arg1);
6186 ASSERT_EQ(res1, MakeUInt128(0x68e8ce8d7eceae87ULL, 0x0000000000000000ULL));
6187 ASSERT_FALSE(IsQcBitSet(fpsr1));
6188
6189 __uint128_t arg2 = MakeUInt128(0x8000000000000000ULL, 0x1001237687219447ULL);
6190 auto [res2, fpsr2] = AsmSqabs(arg2);
6191 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6192 ASSERT_TRUE(IsQcBitSet(fpsr2));
6193 }
6194
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt32x4)6195 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt32x4) {
6196 constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %0.4s, %2.4s");
6197
6198 __uint128_t arg1 = MakeUInt128(0x9133820578492800ULL, 0x6982551957402018ULL);
6199 auto [res1, fpsr1] = AsmSqabs(arg1);
6200 ASSERT_EQ(res1, MakeUInt128(0x6ecc7dfb78492800ULL, 0x6982551957402018ULL));
6201 ASSERT_FALSE(IsQcBitSet(fpsr1));
6202
6203 __uint128_t arg2 = MakeUInt128(0x1810564129725083ULL, 0x6070356880000000ULL);
6204 auto [res2, fpsr2] = AsmSqabs(arg2);
6205 ASSERT_EQ(res2, MakeUInt128(0x1810564129725083ULL, 0x607035687fffffffULL));
6206 ASSERT_TRUE(IsQcBitSet(fpsr2));
6207 }
6208
TEST(Arm64InsnTest,SignedSaturatingNegateInt32x1)6209 TEST(Arm64InsnTest, SignedSaturatingNegateInt32x1) {
6210 constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %s0, %s2");
6211
6212 __uint128_t arg1 = MakeUInt128(0x6461582694563802ULL, 0x3950283712168644ULL);
6213 auto [res1, fpsr1] = AsmSqneg(arg1);
6214 ASSERT_EQ(res1, MakeUInt128(0x000000006ba9c7feULL, 0x0000000000000000ULL));
6215 ASSERT_FALSE(IsQcBitSet(fpsr1));
6216
6217 __uint128_t arg2 = MakeUInt128(0x6561785280000000ULL, 0x1277128269186886ULL);
6218 auto [res2, fpsr2] = AsmSqneg(arg2);
6219 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6220 ASSERT_TRUE(IsQcBitSet(fpsr2));
6221 }
6222
TEST(Arm64InsnTest,SignedSaturatingNegateInt64x1)6223 TEST(Arm64InsnTest, SignedSaturatingNegateInt64x1) {
6224 constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %d0, %d2");
6225
6226 __uint128_t arg1 = MakeUInt128(0x9703600795698276ULL, 0x2639234410714658ULL);
6227 auto [res1, fpsr1] = AsmSqneg(arg1);
6228 ASSERT_EQ(res1, MakeUInt128(0x68fc9ff86a967d8aULL, 0x0000000000000000ULL));
6229 ASSERT_FALSE(IsQcBitSet(fpsr1));
6230
6231 __uint128_t arg2 = MakeUInt128(0x8000000000000000ULL, 0x4052295369374997ULL);
6232 auto [res2, fpsr2] = AsmSqneg(arg2);
6233 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6234 ASSERT_TRUE(IsQcBitSet(fpsr2));
6235 }
6236
TEST(Arm64InsnTest,SignedSaturatingNegateInt32x4)6237 TEST(Arm64InsnTest, SignedSaturatingNegateInt32x4) {
6238 constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %0.4s, %2.4s");
6239
6240 __uint128_t arg1 = MakeUInt128(0x9172320202822291ULL, 0x4886959399729974ULL);
6241 auto [res1, fpsr1] = AsmSqneg(arg1);
6242 ASSERT_EQ(res1, MakeUInt128(0x6e8dcdfefd7ddd6fULL, 0xb7796a6d668d668cULL));
6243 ASSERT_FALSE(IsQcBitSet(fpsr1));
6244
6245 __uint128_t arg2 = MakeUInt128(0x2974711553718589ULL, 0x2423849380000000ULL);
6246 auto [res2, fpsr2] = AsmSqneg(arg2);
6247 ASSERT_EQ(res2, MakeUInt128(0xd68b8eebac8e7a77ULL, 0xdbdc7b6d7fffffffULL));
6248 ASSERT_TRUE(IsQcBitSet(fpsr2));
6249 }
6250
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt32x1)6251 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt32x1) {
6252 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %s0, %s2, #20");
6253
6254 __uint128_t arg1 = MakeUInt128(0x9724611600000181ULL, 0x0003509892864120ULL);
6255 auto [res1, fpsr1] = AsmSqshl(arg1);
6256 ASSERT_EQ(res1, MakeUInt128(0x0000000018100000ULL, 0x0000000000000000ULL));
6257 ASSERT_FALSE(IsQcBitSet(fpsr1));
6258
6259 __uint128_t arg2 = MakeUInt128(0x4195163551108763ULL, 0x2042676129798265ULL);
6260 auto [res2, fpsr2] = AsmSqshl(arg2);
6261 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6262 ASSERT_TRUE(IsQcBitSet(fpsr2));
6263 }
6264
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt64x1)6265 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt64x1) {
6266 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %d0, %d2, #28");
6267
6268 __uint128_t arg1 = MakeUInt128(0x0000000774000539ULL, 0x2622760323659751ULL);
6269 auto [res1, fpsr1] = AsmSqshl(arg1);
6270 ASSERT_EQ(res1, MakeUInt128(0x7740005390000000ULL, 0x0000000000000000ULL));
6271 ASSERT_FALSE(IsQcBitSet(fpsr1));
6272
6273 __uint128_t arg2 = MakeUInt128(0x9938714995449137ULL, 0x3020518436690767ULL);
6274 auto [res2, fpsr2] = AsmSqshl(arg2);
6275 ASSERT_EQ(res2, MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
6276 ASSERT_TRUE(IsQcBitSet(fpsr2));
6277 }
6278
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt32x4)6279 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt32x4) {
6280 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %0.4s, %2.4s, #12");
6281
6282 __uint128_t arg1 = MakeUInt128(0x0007256800042011ULL, 0x0000313500033555ULL);
6283 auto [res1, fpsr1] = AsmSqshl(arg1);
6284 ASSERT_EQ(res1, MakeUInt128(0x7256800042011000ULL, 0x0313500033555000ULL));
6285 ASSERT_FALSE(IsQcBitSet(fpsr1));
6286
6287 __uint128_t arg2 = MakeUInt128(0x0944031900072034ULL, 0x8651010561049872ULL);
6288 auto [res2, fpsr2] = AsmSqshl(arg2);
6289 ASSERT_EQ(res2, MakeUInt128(0x7fffffff72034000ULL, 0x800000007fffffffULL));
6290 ASSERT_TRUE(IsQcBitSet(fpsr2));
6291 }
6292
TEST(Arm64InsnTest,SignedSaturatingShiftLeftByRegisterImmInt32x1)6293 TEST(Arm64InsnTest, SignedSaturatingShiftLeftByRegisterImmInt32x1) {
6294 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqshl %s0, %s2, %s3");
6295
6296 __uint128_t res;
6297 uint32_t fpsr;
6298 __uint128_t arg1 = MakeUInt128(0x7480771811555330ULL, 0x9098870255052076ULL);
6299
6300 std::tie(res, fpsr) = AsmSqshl(arg1, -33);
6301 ASSERT_EQ(res, MakeUInt128(0U, 0U));
6302 ASSERT_FALSE(IsQcBitSet(fpsr));
6303
6304 std::tie(res, fpsr) = AsmSqshl(arg1, -32);
6305 ASSERT_EQ(res, MakeUInt128(0U, 0U));
6306 ASSERT_FALSE(IsQcBitSet(fpsr));
6307
6308 std::tie(res, fpsr) = AsmSqshl(arg1, -31);
6309 ASSERT_EQ(res, MakeUInt128(0U, 0U));
6310 ASSERT_FALSE(IsQcBitSet(fpsr));
6311
6312 std::tie(res, fpsr) = AsmSqshl(arg1, -1);
6313 ASSERT_EQ(res, MakeUInt128(0x08aaa998ULL, 0U));
6314 ASSERT_FALSE(IsQcBitSet(fpsr));
6315
6316 std::tie(res, fpsr) = AsmSqshl(arg1, 0);
6317 ASSERT_EQ(res, MakeUInt128(0x11555330ULL, 0U));
6318 ASSERT_FALSE(IsQcBitSet(fpsr));
6319
6320 std::tie(res, fpsr) = AsmSqshl(arg1, 1);
6321 ASSERT_EQ(res, MakeUInt128(0x22aaa660ULL, 0U));
6322 ASSERT_FALSE(IsQcBitSet(fpsr));
6323
6324 std::tie(res, fpsr) = AsmSqshl(arg1, 31);
6325 ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
6326 ASSERT_TRUE(IsQcBitSet(fpsr));
6327
6328 std::tie(res, fpsr) = AsmSqshl(arg1, 32);
6329 ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
6330 ASSERT_TRUE(IsQcBitSet(fpsr));
6331
6332 std::tie(res, fpsr) = AsmSqshl(arg1, 33);
6333 ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
6334 ASSERT_TRUE(IsQcBitSet(fpsr));
6335 }
6336
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftImmInt64x1)6337 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftImmInt64x1) {
6338 constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshl %d0, %d2, #28");
6339
6340 __uint128_t arg1 = MakeUInt128(0x0000000961573564ULL, 0x8883443185280853ULL);
6341 auto [res1, fpsr1] = AsmUqshl(arg1);
6342 ASSERT_EQ(res1, MakeUInt128(0x9615735640000000ULL, 0x0000000000000000ULL));
6343 ASSERT_FALSE(IsQcBitSet(fpsr1));
6344
6345 __uint128_t arg2 = MakeUInt128(0x9759277344336553ULL, 0x8418834030351782ULL);
6346 auto [res2, fpsr2] = AsmUqshl(arg2);
6347 ASSERT_EQ(res2, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6348 ASSERT_TRUE(IsQcBitSet(fpsr2));
6349 }
6350
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftImmInt32x4)6351 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftImmInt32x4) {
6352 constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshl %0.4s, %2.4s, #12");
6353
6354 __uint128_t arg1 = MakeUInt128(0x0000326300096218ULL, 0x0004565900066853ULL);
6355 auto [res1, fpsr1] = AsmUqshl(arg1);
6356 ASSERT_EQ(res1, MakeUInt128(0x0326300096218000ULL, 0x4565900066853000ULL));
6357 ASSERT_FALSE(IsQcBitSet(fpsr1));
6358
6359 __uint128_t arg2 = MakeUInt128(0x0009911314010804ULL, 0x0009732335449090ULL);
6360 auto [res2, fpsr2] = AsmUqshl(arg2);
6361 ASSERT_EQ(res2, MakeUInt128(0x99113000ffffffffULL, 0x97323000ffffffffULL));
6362 ASSERT_TRUE(IsQcBitSet(fpsr2));
6363 }
6364
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftByRegisterImmInt32x1)6365 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftByRegisterImmInt32x1) {
6366 constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqshl %s0, %s2, %s3");
6367
6368 __uint128_t res;
6369 uint32_t fpsr;
6370 __uint128_t arg1 = MakeUInt128(0x9714978507414585ULL, 0x3085781339156270ULL);
6371
6372 std::tie(res, fpsr) = AsmUqshl(arg1, -33);
6373 ASSERT_EQ(res, MakeUInt128(0U, 0U));
6374 ASSERT_FALSE(IsQcBitSet(fpsr));
6375
6376 std::tie(res, fpsr) = AsmUqshl(arg1, -32);
6377 ASSERT_EQ(res, MakeUInt128(0U, 0U));
6378 ASSERT_FALSE(IsQcBitSet(fpsr));
6379
6380 std::tie(res, fpsr) = AsmUqshl(arg1, -31);
6381 ASSERT_EQ(res, MakeUInt128(0U, 0U));
6382 ASSERT_FALSE(IsQcBitSet(fpsr));
6383
6384 std::tie(res, fpsr) = AsmUqshl(arg1, -1);
6385 ASSERT_EQ(res, MakeUInt128(0x03a0a2c2ULL, 0U));
6386 ASSERT_FALSE(IsQcBitSet(fpsr));
6387
6388 std::tie(res, fpsr) = AsmUqshl(arg1, 0);
6389 ASSERT_EQ(res, MakeUInt128(0x07414585ULL, 0U));
6390 ASSERT_FALSE(IsQcBitSet(fpsr));
6391
6392 std::tie(res, fpsr) = AsmUqshl(arg1, 1);
6393 ASSERT_EQ(res, MakeUInt128(0x0e828b0aULL, 0U));
6394 ASSERT_FALSE(IsQcBitSet(fpsr));
6395
6396 std::tie(res, fpsr) = AsmUqshl(arg1, 31);
6397 ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
6398 ASSERT_TRUE(IsQcBitSet(fpsr));
6399
6400 std::tie(res, fpsr) = AsmUqshl(arg1, 32);
6401 ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
6402 ASSERT_TRUE(IsQcBitSet(fpsr));
6403
6404 std::tie(res, fpsr) = AsmUqshl(arg1, 33);
6405 ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
6406 ASSERT_TRUE(IsQcBitSet(fpsr));
6407 }
6408
TEST(Arm64InsnTest,SignedSaturatingShiftLeftByRegisterImmInt16x8)6409 TEST(Arm64InsnTest, SignedSaturatingShiftLeftByRegisterImmInt16x8) {
6410 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqshl %0.8h, %2.8h, %3.8h");
6411
6412 __uint128_t arg1 = 0U;
6413 __uint128_t arg2 = MakeUInt128(0xffdfffe0ffe1ffffULL, 0x0001001f00200021ULL);
6414 auto [res1, fpsr1] = AsmSqshl(arg1, arg2);
6415 ASSERT_EQ(res1, MakeUInt128(0U, 0U));
6416 ASSERT_FALSE(IsQcBitSet(fpsr1));
6417
6418 __uint128_t arg3 = MakeUInt128(0x3333333333333333ULL, 0x3333333333333333ULL);
6419 auto [res2, fpsr2] = AsmSqshl(arg3, arg2);
6420 ASSERT_EQ(res2, MakeUInt128(0x0000000000001999ULL, 0x66667fff7fff7fffULL));
6421 ASSERT_TRUE(IsQcBitSet(fpsr2));
6422 }
6423
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftByRegisterImmInt16x8)6424 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftByRegisterImmInt16x8) {
6425 constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqshl %0.8h, %2.8h, %3.8h");
6426
6427 __uint128_t arg1 = 0U;
6428 __uint128_t arg2 = MakeUInt128(0xffdfffe0ffe1ffffULL, 0x0001001f00200021ULL);
6429 auto [res1, fpsr1] = AsmUqshl(arg1, arg2);
6430 ASSERT_EQ(res1, MakeUInt128(0U, 0U));
6431 ASSERT_FALSE(IsQcBitSet(fpsr1));
6432
6433 __uint128_t arg3 = MakeUInt128(0x7777777777777777ULL, 0x7777777777777777ULL);
6434 auto [res2, fpsr2] = AsmUqshl(arg3, arg2);
6435 ASSERT_EQ(res2, MakeUInt128(0x0000000000003bbbULL, 0xeeeeffffffffffffULL));
6436 ASSERT_TRUE(IsQcBitSet(fpsr2));
6437 }
6438
TEST(Arm64InsnTest,SignedSaturatingExtractNarrowInt64x2ToInt32x2)6439 TEST(Arm64InsnTest, SignedSaturatingExtractNarrowInt64x2ToInt32x2) {
6440 constexpr auto AsmSqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtn %0.2s, %2.2d");
6441
6442 __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
6443 auto [res1, fpsr1] = AsmSqxtn(arg1);
6444 ASSERT_EQ(res1, MakeUInt128(0x800000007fffffffULL, 0x0000000000000000ULL));
6445 ASSERT_TRUE(IsQcBitSet(fpsr1));
6446
6447 __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x000000007ecdba98LL);
6448 auto [res2, fpsr2] = AsmSqxtn(arg2);
6449 ASSERT_EQ(res2, MakeUInt128(0x7ecdba9801234567ULL, 0x0000000000000000ULL));
6450 ASSERT_FALSE(IsQcBitSet(fpsr2));
6451 }
6452
TEST(Arm64InsnTest,SignedSaturatingExtractNarrowInt64x1ToInt32x1)6453 TEST(Arm64InsnTest, SignedSaturatingExtractNarrowInt64x1ToInt32x1) {
6454 constexpr auto AsmSqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtn %s0, %d2");
6455
6456 __uint128_t arg1 = MakeUInt128(0x1234567812345678ULL, 0x0ULL);
6457 auto [res1, fpsr1] = AsmSqxtn(arg1);
6458 ASSERT_EQ(res1, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6459 ASSERT_TRUE(IsQcBitSet(fpsr1));
6460
6461 __uint128_t arg2 = MakeUInt128(0x0000000012345678ULL, 0x0ULL);
6462 auto [res2, fpsr2] = AsmSqxtn(arg2);
6463 ASSERT_EQ(res2, MakeUInt128(0x0000000012345678ULL, 0x0000000000000000ULL));
6464 ASSERT_FALSE(IsQcBitSet(fpsr2));
6465 }
6466
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrowInt64x2ToInt32x2)6467 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrowInt64x2ToInt32x2) {
6468 constexpr auto AsmUqstn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqxtn %0.2s, %2.2d");
6469
6470 __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
6471 auto [res1, fpsr1] = AsmUqstn(arg1);
6472 ASSERT_EQ(res1, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6473 ASSERT_TRUE(IsQcBitSet(fpsr1));
6474
6475 __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
6476 auto [res2, fpsr2] = AsmUqstn(arg2);
6477 ASSERT_EQ(res2, MakeUInt128(0xfecdba9801234567ULL, 0x0000000000000000ULL));
6478 ASSERT_FALSE(IsQcBitSet(fpsr2));
6479 }
6480
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrowInt64x1ToInt32x1)6481 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrowInt64x1ToInt32x1) {
6482 constexpr auto AsmUqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqxtn %s0, %d2");
6483
6484 __uint128_t arg1 = MakeUInt128(0x1234567812345678ULL, 0x0ULL);
6485 auto [res1, fpsr1] = AsmUqxtn(arg1);
6486 ASSERT_EQ(res1, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6487 ASSERT_TRUE(IsQcBitSet(fpsr1));
6488
6489 __uint128_t arg2 = MakeUInt128(0x0000000087654321ULL, 0x0ULL);
6490 auto [res2, fpsr2] = AsmUqxtn(arg2);
6491 ASSERT_EQ(res2, MakeUInt128(0x0000000087654321ULL, 0x0000000000000000ULL));
6492 ASSERT_FALSE(IsQcBitSet(fpsr2));
6493 }
6494
TEST(Arm64InsnTest,SignedSaturatingExtractNarrow2Int64x2ToInt32x2)6495 TEST(Arm64InsnTest, SignedSaturatingExtractNarrow2Int64x2ToInt32x2) {
6496 constexpr auto AsmSqxtn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqxtn2 %0.4s, %2.2d");
6497
6498 __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
6499 __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
6500 auto [res1, fpsr1] = AsmSqxtn2(arg1, arg2);
6501 ASSERT_EQ(res1, MakeUInt128(0x6121865619673378ULL, 0x800000007fffffffULL));
6502 ASSERT_TRUE(IsQcBitSet(fpsr1));
6503
6504 __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x000000007ecdba98LL);
6505 __uint128_t arg4 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
6506 auto [res2, fpsr2] = AsmSqxtn2(arg3, arg4);
6507 ASSERT_EQ(res2, MakeUInt128(0x6121865619673378ULL, 0x7ecdba9801234567ULL));
6508 ASSERT_FALSE(IsQcBitSet(fpsr2));
6509 }
6510
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrow2Int64x2ToInt32x4)6511 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrow2Int64x2ToInt32x4) {
6512 constexpr auto AsmUqxtn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqxtn2 %0.4s, %2.2d");
6513
6514 __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
6515 __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
6516 auto [res1, fpsr1] = AsmUqxtn2(arg1, arg2);
6517 ASSERT_EQ(res1, MakeUInt128(0x6121865619673378ULL, 0xffffffffffffffffULL));
6518 ASSERT_TRUE(IsQcBitSet(fpsr1));
6519
6520 __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
6521 __uint128_t arg4 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
6522 auto [res2, fpsr2] = AsmUqxtn2(arg3, arg4);
6523 ASSERT_EQ(res2, MakeUInt128(0x6121865619673378ULL, 0xfecdba9801234567ULL));
6524 ASSERT_FALSE(IsQcBitSet(fpsr2));
6525 }
6526
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrowInt64x2ToInt32x2)6527 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrowInt64x2ToInt32x2) {
6528 constexpr auto AsmSqxtun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtun %0.2s, %2.2d");
6529
6530 __uint128_t arg1 = MakeUInt128(0x0000000044332211ULL, 0x00000001aabbccddULL);
6531 auto [res1, fpsr1] = AsmSqxtun(arg1);
6532 ASSERT_EQ(res1, MakeUInt128(0xffffffff44332211ULL, 0x0000000000000000ULL));
6533 ASSERT_TRUE(IsQcBitSet(fpsr1));
6534
6535 __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
6536 auto [res2, fpsr2] = AsmSqxtun(arg2);
6537 ASSERT_EQ(res2, MakeUInt128(0xfecdba9801234567ULL, 0x0000000000000000ULL));
6538 ASSERT_FALSE(IsQcBitSet(fpsr2));
6539 }
6540
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrowInt64x1ToInt32x1)6541 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrowInt64x1ToInt32x1) {
6542 constexpr auto AsmSqxtun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtun %s0, %d2");
6543
6544 __uint128_t arg1 = MakeUInt128(0x00000001ff332211ULL, 0x0ULL);
6545 auto [res1, fpsr1] = AsmSqxtun(arg1);
6546 ASSERT_EQ(res1, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6547 ASSERT_TRUE(IsQcBitSet(fpsr1));
6548
6549 __uint128_t arg2 = MakeUInt128(0x00000000ff332211ULL, 0x0ULL);
6550 auto [res2, fpsr2] = AsmSqxtun(arg2);
6551 ASSERT_EQ(res2, MakeUInt128(0x00000000ff332211ULL, 0x0000000000000000ULL));
6552 ASSERT_FALSE(IsQcBitSet(fpsr2));
6553 }
6554
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrow2Int64x2ToInt32x4)6555 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrow2Int64x2ToInt32x4) {
6556 constexpr auto AsmSqxtun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqxtun2 %0.4s, %2.2d");
6557
6558 __uint128_t arg1 = MakeUInt128(0x0000000089abcdefULL, 0xfedcba9876543210ULL);
6559 __uint128_t arg2 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
6560 auto [res1, fpsr1] = AsmSqxtun2(arg1, arg2);
6561 ASSERT_EQ(res1, MakeUInt128(0x0123456789abcdefULL, 0x0000000089abcdefULL));
6562 ASSERT_TRUE(IsQcBitSet(fpsr1));
6563
6564 __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
6565 __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
6566 auto [res2, fpsr2] = AsmSqxtun2(arg3, arg4);
6567 ASSERT_EQ(res2, MakeUInt128(0x0123456789abcdefULL, 0xfecdba9801234567ULL));
6568 ASSERT_FALSE(IsQcBitSet(fpsr2));
6569 }
6570
TEST(Arm64InsnTest,SignedSaturatingAccumulateOfUnsignedValueInt32x1)6571 TEST(Arm64InsnTest, SignedSaturatingAccumulateOfUnsignedValueInt32x1) {
6572 constexpr auto AsmSuqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("suqadd %s0, %s2");
6573
6574 __uint128_t arg1 = MakeUInt128(0x9392023115638719ULL, 0x5080502467972579ULL);
6575 __uint128_t arg2 = MakeUInt128(0x2497605762625913ULL, 0x3285597263712112ULL);
6576 auto [res1, fpsr1] = AsmSuqadd(arg1, arg2);
6577 ASSERT_EQ(res1, MakeUInt128(0x0000000077c5e02cULL, 0x0000000000000000ULL));
6578 ASSERT_FALSE(IsQcBitSet(fpsr1));
6579
6580 __uint128_t arg3 = MakeUInt128(0x9099791776687477ULL, 0x4481882870632315ULL);
6581 __uint128_t arg4 = MakeUInt128(0x5158650328981642ULL, 0x2828823274686610ULL);
6582 auto [res2, fpsr2] = AsmSuqadd(arg3, arg4);
6583 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6584 ASSERT_TRUE(IsQcBitSet(fpsr2));
6585 }
6586
TEST(Arm64InsnTest,SignedSaturatingAccumulateOfUnsignedValueInt32x4)6587 TEST(Arm64InsnTest, SignedSaturatingAccumulateOfUnsignedValueInt32x4) {
6588 constexpr auto AsmSuqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("suqadd %0.4s, %2.4s");
6589
6590 __uint128_t arg1 = MakeUInt128(0x2590181000350989ULL, 0x2864120419516355ULL);
6591 __uint128_t arg2 = MakeUInt128(0x1108763204267612ULL, 0x9798265294258829ULL);
6592 auto [res1, fpsr1] = AsmSuqadd(arg1, arg2);
6593 ASSERT_EQ(res1, MakeUInt128(0x36988e42045b7f9bULL, 0xbffc3856ad76eb7eULL));
6594 ASSERT_FALSE(IsQcBitSet(fpsr1));
6595
6596 __uint128_t arg3 = MakeUInt128(0x9082888934938376ULL, 0x4393992569006040ULL);
6597 __uint128_t arg4 = MakeUInt128(0x6731142209331219ULL, 0x5936202982972351ULL);
6598 auto [res2, fpsr2] = AsmSuqadd(arg3, arg4);
6599 ASSERT_EQ(res2, MakeUInt128(0x7fffffff3dc6958fULL, 0x7fffffffeb978391ULL));
6600 ASSERT_TRUE(IsQcBitSet(fpsr2));
6601 }
6602
TEST(Arm64InsnTest,UnsignedSaturatingAccumulateOfSignedValueInt32x1)6603 TEST(Arm64InsnTest, UnsignedSaturatingAccumulateOfSignedValueInt32x1) {
6604 constexpr auto AsmUsqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("usqadd %s0, %s2");
6605
6606 __uint128_t arg1 = MakeUInt128(0x9052523242348615ULL, 0x3152097693846104ULL);
6607 __uint128_t arg2 = MakeUInt128(0x2582849714963475ULL, 0x3418375620030149ULL);
6608 auto [res1, fpsr1] = AsmUsqadd(arg1, arg2);
6609 ASSERT_EQ(res1, MakeUInt128(0x0000000056caba8aULL, 0x0000000000000000ULL));
6610 ASSERT_FALSE(IsQcBitSet(fpsr1));
6611
6612 __uint128_t arg3 = MakeUInt128(0x9887125387801719ULL, 0x6071816407812484ULL);
6613 __uint128_t arg4 = MakeUInt128(0x7847257912407824ULL, 0x5443616823452395ULL);
6614 auto [res2, fpsr2] = AsmUsqadd(arg3, arg4);
6615 ASSERT_EQ(res2, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6616 ASSERT_TRUE(IsQcBitSet(fpsr2));
6617
6618 __uint128_t arg5 = MakeUInt128(0x9708583970761645ULL, 0x8229630324424328ULL);
6619 __uint128_t arg6 = MakeUInt128(0x2377374595170285ULL, 0x6069806788952176ULL);
6620 auto [res3, fpsr3] = AsmUsqadd(arg5, arg6);
6621 ASSERT_EQ(res3, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6622 ASSERT_TRUE(IsQcBitSet(fpsr3));
6623 }
6624
TEST(Arm64InsnTest,UnsignedSaturatingAccumulateOfSignedValueInt32x4)6625 TEST(Arm64InsnTest, UnsignedSaturatingAccumulateOfSignedValueInt32x4) {
6626 constexpr auto AsmUsqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("usqadd %0.4s, %2.4s");
6627
6628 __uint128_t arg1 = MakeUInt128(0x4129137074982305ULL, 0x7592909166293919ULL);
6629 __uint128_t arg2 = MakeUInt128(0x5014721157586067ULL, 0x2700925477180257ULL);
6630 auto [res1, fpsr1] = AsmUsqadd(arg1, arg2);
6631 ASSERT_EQ(res1, MakeUInt128(0x913d8581cbf0836cULL, 0x9c9322e5dd413b70ULL));
6632 ASSERT_FALSE(IsQcBitSet(fpsr1));
6633
6634 __uint128_t arg3 = MakeUInt128(0x7816422828823274ULL, 0x6866106592732197ULL);
6635 __uint128_t arg4 = MakeUInt128(0x9071623846421534ULL, 0x8985247621678905ULL);
6636 auto [res2, fpsr2] = AsmUsqadd(arg3, arg4);
6637 ASSERT_EQ(res2, MakeUInt128(0xffffffff6ec447a8ULL, 0xf1eb34db00000000ULL));
6638 ASSERT_TRUE(IsQcBitSet(fpsr2));
6639 }
6640
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftLeftInt32x1)6641 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftLeftInt32x1) {
6642 constexpr auto AsmSqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrshl %s0, %s2, %s3");
6643
6644 __uint128_t res;
6645 uint32_t fpsr;
6646
6647 __uint128_t arg = MakeUInt128(0x9736705435580445ULL, 0x8657202276378404ULL);
6648 std::tie(res, fpsr) = AsmSqrshl(arg, -33);
6649 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6650 ASSERT_FALSE(IsQcBitSet(fpsr));
6651
6652 std::tie(res, fpsr) = AsmSqrshl(arg, -32);
6653 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6654 ASSERT_FALSE(IsQcBitSet(fpsr));
6655
6656 std::tie(res, fpsr) = AsmSqrshl(arg, -31);
6657 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6658 ASSERT_FALSE(IsQcBitSet(fpsr));
6659
6660 std::tie(res, fpsr) = AsmSqrshl(arg, -1);
6661 ASSERT_EQ(res, MakeUInt128(0x000000001aac0223ULL, 0x0000000000000000ULL));
6662 ASSERT_FALSE(IsQcBitSet(fpsr));
6663
6664 std::tie(res, fpsr) = AsmSqrshl(arg, 0);
6665 ASSERT_EQ(res, MakeUInt128(0x0000000035580445ULL, 0x0000000000000000ULL));
6666 ASSERT_FALSE(IsQcBitSet(fpsr));
6667
6668 std::tie(res, fpsr) = AsmSqrshl(arg, 1);
6669 ASSERT_EQ(res, MakeUInt128(0x000000006ab0088aULL, 0x0000000000000000ULL));
6670 ASSERT_FALSE(IsQcBitSet(fpsr));
6671
6672 std::tie(res, fpsr) = AsmSqrshl(arg, 31);
6673 ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6674 ASSERT_TRUE(IsQcBitSet(fpsr));
6675
6676 std::tie(res, fpsr) = AsmSqrshl(arg, 32);
6677 ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6678 ASSERT_TRUE(IsQcBitSet(fpsr));
6679
6680 std::tie(res, fpsr) = AsmSqrshl(arg, 33);
6681 ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6682 ASSERT_TRUE(IsQcBitSet(fpsr));
6683 }
6684
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftLeftInt16x8)6685 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftLeftInt16x8) {
6686 constexpr auto AsmSqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrshl %0.8h, %2.8h, %3.8h");
6687
6688 __uint128_t arg1 = MakeUInt128(0x0000000000000099ULL, 0x9999099999999999ULL);
6689 __uint128_t arg2 = MakeUInt128(0x00110010000f0001ULL, 0xfffffff1fff0ffefULL);
6690 auto [res1, fpsr1] = AsmSqrshl(arg1, arg2);
6691 ASSERT_EQ(res1, MakeUInt128(0x0000000000000132ULL, 0xcccd000000000000ULL));
6692 ASSERT_FALSE(IsQcBitSet(fpsr1));
6693
6694 __uint128_t arg3 = MakeUInt128(0x0099009900990099ULL, 0x0099009900990099ULL);
6695 auto [res2, fpsr2] = AsmSqrshl(arg3, arg2);
6696 ASSERT_EQ(res2, MakeUInt128(0x7fff7fff7fff0132ULL, 0x004d000000000000ULL));
6697 ASSERT_TRUE(IsQcBitSet(fpsr2));
6698 }
6699
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftLeftInt32x1)6700 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftLeftInt32x1) {
6701 constexpr auto AsmUqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqrshl %s0, %s2, %s3");
6702
6703 __uint128_t res;
6704 uint32_t fpsr;
6705
6706 __uint128_t arg = MakeUInt128(0x9984124848262367ULL, 0x3771467226061633ULL);
6707 std::tie(res, fpsr) = AsmUqrshl(arg, -33);
6708 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6709 ASSERT_FALSE(IsQcBitSet(fpsr));
6710
6711 std::tie(res, fpsr) = AsmUqrshl(arg, -32);
6712 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6713 ASSERT_FALSE(IsQcBitSet(fpsr));
6714
6715 std::tie(res, fpsr) = AsmUqrshl(arg, -31);
6716 ASSERT_EQ(res, MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6717 ASSERT_FALSE(IsQcBitSet(fpsr));
6718
6719 std::tie(res, fpsr) = AsmUqrshl(arg, -1);
6720 ASSERT_EQ(res, MakeUInt128(0x00000000241311b4ULL, 0x0000000000000000ULL));
6721 ASSERT_FALSE(IsQcBitSet(fpsr));
6722
6723 std::tie(res, fpsr) = AsmUqrshl(arg, 0);
6724 ASSERT_EQ(res, MakeUInt128(0x0000000048262367ULL, 0x0000000000000000ULL));
6725 ASSERT_FALSE(IsQcBitSet(fpsr));
6726
6727 std::tie(res, fpsr) = AsmUqrshl(arg, 1);
6728 ASSERT_EQ(res, MakeUInt128(0x00000000904c46ceULL, 0x0000000000000000ULL));
6729 ASSERT_FALSE(IsQcBitSet(fpsr));
6730
6731 std::tie(res, fpsr) = AsmUqrshl(arg, 31);
6732 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6733 ASSERT_TRUE(IsQcBitSet(fpsr));
6734
6735 std::tie(res, fpsr) = AsmUqrshl(arg, 32);
6736 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6737 ASSERT_TRUE(IsQcBitSet(fpsr));
6738
6739 std::tie(res, fpsr) = AsmUqrshl(arg, 33);
6740 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6741 ASSERT_TRUE(IsQcBitSet(fpsr));
6742 }
6743
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftLeftInt16x8)6744 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftLeftInt16x8) {
6745 constexpr auto AsmUqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqrshl %0.8h, %2.8h, %3.8h");
6746
6747 __uint128_t arg1 = MakeUInt128(0x0000000000000099ULL, 0x9999099999999999ULL);
6748 __uint128_t arg2 = MakeUInt128(0x00110010000f0001ULL, 0xfffffff1fff0ffefULL);
6749 auto [res1, fpsr1] = AsmUqrshl(arg1, arg2);
6750 ASSERT_EQ(res1, MakeUInt128(0x0000000000000132ULL, 0x4ccd000000010000ULL));
6751 ASSERT_FALSE(IsQcBitSet(fpsr1));
6752
6753 __uint128_t arg3 = MakeUInt128(0x0099009900990099ULL, 0x0099009900990099ULL);
6754 auto [res2, fpsr2] = AsmUqrshl(arg3, arg2);
6755 ASSERT_EQ(res2, MakeUInt128(0xffffffffffff0132ULL, 0x004d000000000000ULL));
6756 ASSERT_TRUE(IsQcBitSet(fpsr2));
6757 }
6758
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x1)6759 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x1) {
6760 constexpr auto AsmSqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrn %b0, %h2, #4");
6761
6762 __uint128_t arg1 = MakeUInt128(0x888786614762f943ULL, 0x4140104988899316ULL);
6763 auto [res1, fpsr1] = AsmSqshrn(arg1);
6764 ASSERT_EQ(res1, MakeUInt128(0x94U, 0U));
6765 ASSERT_FALSE(IsQcBitSet(fpsr1));
6766
6767 __uint128_t arg2 = MakeUInt128(0x0051207678103588ULL, 0x6116602029611936ULL);
6768 auto [res2, fpsr2] = AsmSqshrn(arg2);
6769 ASSERT_EQ(res2, MakeUInt128(0x7fU, 0U));
6770 ASSERT_TRUE(IsQcBitSet(fpsr2));
6771 }
6772
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x8)6773 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x8) {
6774 constexpr auto AsmSqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrn %0.8b, %2.8h, #4");
6775
6776 __uint128_t arg1 = MakeUInt128(0x0625051604340253ULL, 0x0299028602670568ULL);
6777 auto [res1, fpsr1] = AsmSqshrn(arg1);
6778 ASSERT_EQ(res1, MakeUInt128(0x2928265662514325ULL, 0U));
6779 ASSERT_FALSE(IsQcBitSet(fpsr1));
6780
6781 __uint128_t arg2 = MakeUInt128(0x2405806005642114ULL, 0x9386436864224724ULL);
6782 auto [res2, fpsr2] = AsmSqshrn(arg2);
6783 ASSERT_EQ(res2, MakeUInt128(0x807f7f7f7f80567fULL, 0U));
6784 ASSERT_TRUE(IsQcBitSet(fpsr2));
6785 }
6786
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x8Upper)6787 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x8Upper) {
6788 constexpr auto AsmSqshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqshrn2 %0.16b, %2.8h, #4");
6789
6790 __uint128_t arg1 = MakeUInt128(0x0367034704100536ULL, 0x0175064803000078ULL);
6791 __uint128_t arg2 = MakeUInt128(0x3494819262681110ULL, 0x7399482506073949ULL);
6792 auto [res1, fpsr1] = AsmSqshrn2(arg1, arg2);
6793 ASSERT_EQ(res1, MakeUInt128(0x3494819262681110ULL, 0x1764300736344153ULL));
6794 ASSERT_FALSE(IsQcBitSet(fpsr1));
6795
6796 __uint128_t arg3 = MakeUInt128(0x4641074501673719ULL, 0x0483109676711344ULL);
6797 auto [res2, fpsr2] = AsmSqshrn2(arg3, arg2);
6798 ASSERT_EQ(res2, MakeUInt128(0x3494819262681110ULL, 0x487f7f7f7f74167fULL));
6799 ASSERT_TRUE(IsQcBitSet(fpsr2));
6800 }
6801
TEST(Arm64InsnTest,UnsignedSaturatingShiftRightNarrowInt16x1)6802 TEST(Arm64InsnTest, UnsignedSaturatingShiftRightNarrowInt16x1) {
6803 constexpr auto AsmUqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshrn %b0, %h2, #4");
6804
6805 __uint128_t arg1 = MakeUInt128(0x6797172898220360ULL, 0x7028806908776866ULL);
6806 auto [res1, fpsr1] = AsmUqshrn(arg1);
6807 ASSERT_EQ(res1, MakeUInt128(0x36U, 0U));
6808 ASSERT_FALSE(IsQcBitSet(fpsr1));
6809
6810 __uint128_t arg2 = MakeUInt128(0x0593252746378405ULL, 0x3976918480820410ULL);
6811 auto [res2, fpsr2] = AsmUqshrn(arg2);
6812 ASSERT_EQ(res2, MakeUInt128(0xffU, 0U));
6813 ASSERT_TRUE(IsQcBitSet(fpsr2));
6814 }
6815
TEST(Arm64InsnTest,UnsignedSaturatingShiftRightNarrowInt16x8)6816 TEST(Arm64InsnTest, UnsignedSaturatingShiftRightNarrowInt16x8) {
6817 constexpr auto AsmUqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshrn %0.8b, %2.8h, #4");
6818
6819 __uint128_t arg1 = MakeUInt128(0x0867067907600099ULL, 0x0693007509490515ULL);
6820 auto [res1, fpsr1] = AsmUqshrn(arg1);
6821 ASSERT_EQ(res1, MakeUInt128(0x6907945186677609ULL, 0U));
6822 ASSERT_FALSE(IsQcBitSet(fpsr1));
6823
6824 __uint128_t arg2 = MakeUInt128(0x2736049811890413ULL, 0x0433116627747123ULL);
6825 auto [res2, fpsr2] = AsmUqshrn(arg2);
6826 ASSERT_EQ(res2, MakeUInt128(0x43ffffffff49ff41ULL, 0U));
6827 ASSERT_TRUE(IsQcBitSet(fpsr2));
6828 }
6829
TEST(Arm64InsnTest,UnignedSaturatingShiftRightNarrowInt16x8Upper)6830 TEST(Arm64InsnTest, UnignedSaturatingShiftRightNarrowInt16x8Upper) {
6831 constexpr auto AsmUqshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqshrn2 %0.16b, %2.8h, #4");
6832
6833 __uint128_t arg1 = MakeUInt128(0x0441018407410768ULL, 0x0981066307240048ULL);
6834 __uint128_t arg2 = MakeUInt128(0x2393582740194493ULL, 0x5665161088463125ULL);
6835 auto [res1, fpsr1] = AsmUqshrn2(arg1, arg2);
6836 ASSERT_EQ(res1, MakeUInt128(0x2393582740194493ULL, 0x9866720444187476ULL));
6837 ASSERT_FALSE(IsQcBitSet(fpsr1));
6838
6839 __uint128_t arg3 = MakeUInt128(0x0785297709734684ULL, 0x3030614624180358ULL);
6840 auto [res2, fpsr2] = AsmUqshrn2(arg3, arg2);
6841 ASSERT_EQ(res2, MakeUInt128(0x2393582740194493ULL, 0xffffff3578ff97ffULL));
6842 ASSERT_TRUE(IsQcBitSet(fpsr2));
6843 }
6844
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x1)6845 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x1) {
6846 constexpr auto AsmSqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrn %b0, %h2, #4");
6847
6848 __uint128_t arg1 = MakeUInt128(0x9610330799410534ULL, 0x7784574699992128ULL);
6849 auto [res1, fpsr1] = AsmSqrshrn(arg1);
6850 ASSERT_EQ(res1, MakeUInt128(0x0000000000000053ULL, 0x0000000000000000ULL));
6851 ASSERT_FALSE(IsQcBitSet(fpsr1));
6852
6853 __uint128_t arg2 = MakeUInt128(0x5999993996122816ULL, 0x1521931488876938ULL);
6854 auto [res2, fpsr2] = AsmSqrshrn(arg2);
6855 ASSERT_EQ(res2, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6856 ASSERT_TRUE(IsQcBitSet(fpsr2));
6857
6858 __uint128_t arg3 = MakeUInt128(0x8022281083009986ULL, 0x0165494165426169ULL);
6859 auto [res3, fpsr3] = AsmSqrshrn(arg3);
6860 ASSERT_EQ(res3, MakeUInt128(0x0000000000000080ULL, 0x0000000000000000ULL));
6861 ASSERT_TRUE(IsQcBitSet(fpsr3));
6862 }
6863
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x8)6864 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x8) {
6865 constexpr auto AsmSqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrn %0.8b, %2.8h, #4");
6866
6867 __uint128_t arg1 = MakeUInt128(0x0666070401700260ULL, 0x0520059204930759ULL);
6868 auto [res1, fpsr1] = AsmSqrshrn(arg1);
6869 ASSERT_EQ(res1, MakeUInt128(0x5259497666701726ULL, 0x0000000000000000ULL));
6870 ASSERT_FALSE(IsQcBitSet(fpsr1));
6871
6872 __uint128_t arg2 = MakeUInt128(0x4143408146852981ULL, 0x5053947178900451ULL);
6873 auto [res2, fpsr2] = AsmSqrshrn(arg2);
6874 ASSERT_EQ(res2, MakeUInt128(0x7f807f457f7f7f7fULL, 0x0000000000000000ULL));
6875 ASSERT_TRUE(IsQcBitSet(fpsr2));
6876 }
6877
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x8Upper)6878 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x8Upper) {
6879 constexpr auto AsmSqrshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqrshrn2 %0.16b, %2.8h, #4");
6880
6881 __uint128_t arg1 = MakeUInt128(0x0784017103960497ULL, 0x0707072501740336ULL);
6882 __uint128_t arg2 = MakeUInt128(0x5662725928440620ULL, 0x4302141137199227ULL);
6883 auto [res1, fpsr1] = AsmSqrshrn2(arg1, arg2);
6884 ASSERT_EQ(res1, MakeUInt128(0x5662725928440620ULL, 0x7072173378173949ULL));
6885 ASSERT_FALSE(IsQcBitSet(fpsr1));
6886
6887 __uint128_t arg3 = MakeUInt128(0x2066886512756882ULL, 0x6614973078865701ULL);
6888 __uint128_t arg4 = MakeUInt128(0x5685016918647488ULL, 0x5416791545965072ULL);
6889 auto [res2, fpsr2] = AsmSqrshrn2(arg3, arg4);
6890 ASSERT_EQ(res2, MakeUInt128(0x5685016918647488ULL, 0x7f807f7f7f807f7fULL));
6891 ASSERT_TRUE(IsQcBitSet(fpsr2));
6892 }
6893
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x1)6894 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x1) {
6895 constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqrshrn %b0, %h2, #4");
6896
6897 __uint128_t arg1 = MakeUInt128(0x9614236585950920ULL, 0x9083073323356034ULL);
6898 auto [res1, fpsr1] = AsmUqrshrn(arg1);
6899 ASSERT_EQ(res1, MakeUInt128(0x0000000000000092ULL, 0x0000000000000000ULL));
6900 ASSERT_FALSE(IsQcBitSet(fpsr1));
6901
6902 __uint128_t arg2 = MakeUInt128(0x8465318730299026ULL, 0x6596450137183754ULL);
6903 auto [res2, fpsr2] = AsmUqrshrn(arg2);
6904 ASSERT_EQ(res2, MakeUInt128(0x00000000000000ffULL, 0x0000000000000000ULL));
6905 ASSERT_TRUE(IsQcBitSet(fpsr2));
6906 }
6907
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x8)6908 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x8) {
6909 constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqrshrn %0.8b, %2.8h, #4");
6910
6911 __uint128_t arg1 = MakeUInt128(0x0301067603860240ULL, 0x0011030402470073ULL);
6912 auto [res1, fpsr1] = AsmUqrshrn(arg1);
6913 ASSERT_EQ(res1, MakeUInt128(0x0130240730673824ULL, 0x0000000000000000ULL));
6914 ASSERT_FALSE(IsQcBitSet(fpsr1));
6915
6916 __uint128_t arg2 = MakeUInt128(0x5085082872462713ULL, 0x4946368501815469ULL);
6917 auto [res2, fpsr2] = AsmUqrshrn(arg2);
6918 ASSERT_EQ(res2, MakeUInt128(0xffff18ffff83ffffULL, 0x0000000000000000ULL));
6919 ASSERT_TRUE(IsQcBitSet(fpsr2));
6920 }
6921
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x8Upper)6922 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x8Upper) {
6923 constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqrshrn2 %0.16b, %2.8h, #4");
6924
6925 __uint128_t arg1 = MakeUInt128(0x0388099005730661ULL, 0x0237022304780112ULL);
6926 __uint128_t arg2 = MakeUInt128(0x0392269110277722ULL, 0x6102544149221576ULL);
6927 auto [res1, fpsr1] = AsmUqrshrn(arg1, arg2);
6928 ASSERT_EQ(res1, MakeUInt128(0x0392269110277722ULL, 0x2322481139995766ULL));
6929 ASSERT_FALSE(IsQcBitSet(fpsr1));
6930
6931 __uint128_t arg3 = MakeUInt128(0x9254069617600504ULL, 0x7974928060721268ULL);
6932 __uint128_t arg4 = MakeUInt128(0x8414695726397884ULL, 0x2560084531214065ULL);
6933 auto [res2, fpsr2] = AsmUqrshrn(arg3, arg4);
6934 ASSERT_EQ(res2, MakeUInt128(0x8414695726397884ULL, 0xffffffffff69ff50ULL));
6935 ASSERT_TRUE(IsQcBitSet(fpsr2));
6936 }
6937
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x1)6938 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x1) {
6939 constexpr auto AsmSqshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrun %b0, %h2, #4");
6940
6941 __uint128_t arg1 = MakeUInt128(0x9143611439920063ULL, 0x8005083214098760ULL);
6942 auto [res1, fpsr1] = AsmSqshrun(arg1);
6943 ASSERT_EQ(res1, MakeUInt128(0x06U, 0U));
6944 ASSERT_FALSE(IsQcBitSet(fpsr1));
6945
6946 __uint128_t arg2 = MakeUInt128(0x3815174571259975ULL, 0x4953580239983146ULL);
6947 auto [res2, fpsr2] = AsmSqshrun(arg2);
6948 ASSERT_EQ(res2, MakeUInt128(0x00U, 0U));
6949 ASSERT_TRUE(IsQcBitSet(fpsr2));
6950
6951 __uint128_t arg3 = MakeUInt128(0x4599309324851025ULL, 0x1682944672606661ULL);
6952 auto [res3, fpsr3] = AsmSqshrun(arg3);
6953 ASSERT_EQ(res3, MakeUInt128(0xffU, 0U));
6954 ASSERT_TRUE(IsQcBitSet(fpsr3));
6955 }
6956
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x8)6957 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x8) {
6958 constexpr auto AsmSqshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrun %0.8b, %2.8h, #4");
6959
6960 __uint128_t arg1 = MakeUInt128(0x0911066408340874ULL, 0x0800074107250670ULL);
6961 auto [res1, fpsr1] = AsmSqshrun(arg1);
6962 ASSERT_EQ(res1, MakeUInt128(0x8074726791668387ULL, 0U));
6963 ASSERT_FALSE(IsQcBitSet(fpsr1));
6964
6965 __uint128_t arg2 = MakeUInt128(0x4792258319129415ULL, 0x7390809143831384ULL);
6966 auto [res2, fpsr2] = AsmSqshrun(arg2);
6967 ASSERT_EQ(res2, MakeUInt128(0xff00ffffffffff00ULL, 0U));
6968 ASSERT_TRUE(IsQcBitSet(fpsr2));
6969 }
6970
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x8Upper)6971 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x8Upper) {
6972 constexpr auto AsmSqshrun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqshrun2 %0.16b, %2.8h, #4");
6973
6974 __uint128_t arg1 = MakeUInt128(0x0625082101740415ULL, 0x0233074903960353ULL);
6975 __uint128_t arg2 = MakeUInt128(0x0136178653673760ULL, 0x6421667781377399ULL);
6976 auto [res1, fpsr1] = AsmSqshrun2(arg1, arg2);
6977 ASSERT_EQ(res1, MakeUInt128(0x0136178653673760ULL, 0x2374393562821741ULL));
6978 ASSERT_FALSE(IsQcBitSet(fpsr1));
6979
6980 __uint128_t arg3 = MakeUInt128(0x4295810545651083ULL, 0x1046297282937584ULL);
6981 __uint128_t arg4 = MakeUInt128(0x1611625325625165ULL, 0x7249807849209989ULL);
6982 auto [res2, fpsr2] = AsmSqshrun2(arg3, arg4);
6983 ASSERT_EQ(res2, MakeUInt128(0x1611625325625165ULL, 0xffff00ffff00ffffULL));
6984 ASSERT_TRUE(IsQcBitSet(fpsr2));
6985 }
6986
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x1)6987 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x1) {
6988 constexpr auto AsmSqrshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrun %b0, %h2, #4");
6989
6990 __uint128_t arg1 = MakeUInt128(0x5760186946490886ULL, 0x8154528562134698ULL);
6991 auto [res1, fpsr1] = AsmSqrshrun(arg1);
6992 ASSERT_EQ(res1, MakeUInt128(0x88ULL, 0U));
6993 ASSERT_FALSE(IsQcBitSet(fpsr1));
6994
6995 __uint128_t arg2 = MakeUInt128(0x8355444560249556ULL, 0x6684366029221951ULL);
6996 auto [res2, fpsr2] = AsmSqrshrun(arg2);
6997 ASSERT_EQ(res2, MakeUInt128(0x00ULL, 0U));
6998 ASSERT_TRUE(IsQcBitSet(fpsr2));
6999
7000 __uint128_t arg3 = MakeUInt128(0x2483091060537720ULL, 0x1980218310103270ULL);
7001 auto [res3, fpsr3] = AsmSqrshrun(arg3);
7002 ASSERT_EQ(res3, MakeUInt128(0xffULL, 0U));
7003 ASSERT_TRUE(IsQcBitSet(fpsr3));
7004 }
7005
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8)7006 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8) {
7007 constexpr auto AsmSqrshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrun %0.8b, %2.8h, #4");
7008
7009 __uint128_t arg1 = MakeUInt128(0x0150069001490702ULL, 0x0673033808340550ULL);
7010 auto [res1, fpsr1] = AsmSqrshrun(arg1);
7011 ASSERT_EQ(res1, MakeUInt128(0x6734835515691570ULL, 0U));
7012 ASSERT_FALSE(IsQcBitSet(fpsr1));
7013
7014 __uint128_t arg2 = MakeUInt128(0x8363660178487710ULL, 0x6080980426924713ULL);
7015 auto [res2, fpsr2] = AsmSqrshrun(arg2);
7016 ASSERT_EQ(res2, MakeUInt128(0xff00ffff00ffffffULL, 0U));
7017 ASSERT_TRUE(IsQcBitSet(fpsr2));
7018 }
7019
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8Upper)7020 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8Upper) {
7021 constexpr auto AsmSqrshrun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqrshrun2 %0.16b, %2.8h, #4");
7022
7023 __uint128_t arg1 = MakeUInt128(0x0733049502080757ULL, 0x0651018705990498ULL);
7024 __uint128_t arg2 = MakeUInt128(0x5693795623875551ULL, 0x6175754380917805ULL);
7025 auto [res1, fpsr1] = AsmSqrshrun2(arg1, arg2);
7026 ASSERT_EQ(res1, MakeUInt128(0x5693795623875551ULL, 0x65185a4a73492175ULL));
7027 ASSERT_FALSE(IsQcBitSet(fpsr1));
7028
7029 __uint128_t arg3 = MakeUInt128(0x1444671298615527ULL, 0x5982014514102756ULL);
7030 __uint128_t arg4 = MakeUInt128(0x0068929750246304ULL, 0x0173514891945763ULL);
7031 auto [res2, fpsr2] = AsmSqrshrun2(arg3, arg4);
7032 ASSERT_EQ(res2, MakeUInt128(0x0068929750246304ULL, 0xff14ffffffff00ffULL));
7033 ASSERT_TRUE(IsQcBitSet(fpsr2));
7034 }
7035
TEST(Arm64InsnTest,SignedSaturatingShiftLeftUnsignedImmInt32x1)7036 TEST(Arm64InsnTest, SignedSaturatingShiftLeftUnsignedImmInt32x1) {
7037 constexpr auto AsmSqshlu = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshlu %s0, %s2, #4");
7038
7039 __uint128_t arg1 = MakeUInt128(0x9704033001862556ULL, 0x1473321177711744ULL);
7040 auto [res1, fpsr1] = AsmSqshlu(arg1);
7041 ASSERT_EQ(res1, MakeUInt128(0x18625560ULL, 0U));
7042 ASSERT_FALSE(IsQcBitSet(fpsr1));
7043
7044 __uint128_t arg2 = MakeUInt128(0x3095760196946490ULL, 0x8868154528562134ULL);
7045 auto [res2, fpsr2] = AsmSqshlu(arg2);
7046 ASSERT_EQ(res2, MakeUInt128(0x00000000ULL, 0U));
7047 ASSERT_TRUE(IsQcBitSet(fpsr2));
7048
7049 __uint128_t arg3 = MakeUInt128(0x1335028160884035ULL, 0x1781452541964320ULL);
7050 auto [res3, fpsr3] = AsmSqshlu(arg3);
7051 ASSERT_EQ(res3, MakeUInt128(0xffffffffULL, 0U));
7052 ASSERT_TRUE(IsQcBitSet(fpsr3));
7053 }
7054
TEST(Arm64InsnTest,SignedSaturatingShiftLeftUnsignedImmInt32x4)7055 TEST(Arm64InsnTest, SignedSaturatingShiftLeftUnsignedImmInt32x4) {
7056 constexpr auto AsmSqshlu = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshlu %0.4s, %2.4s, #4");
7057
7058 __uint128_t arg1 = MakeUInt128(0x0865174507877133ULL, 0x0813875205980941ULL);
7059 auto [res1, fpsr1] = AsmSqshlu(arg1);
7060 ASSERT_EQ(res1, MakeUInt128(0x8651745078771330ULL, 0x8138752059809410ULL));
7061 ASSERT_FALSE(IsQcBitSet(fpsr1));
7062
7063 __uint128_t arg2 = MakeUInt128(0x2174227300352296ULL, 0x0080891797050682ULL);
7064 auto [res2, fpsr2] = AsmSqshlu(arg2);
7065 ASSERT_EQ(res2, MakeUInt128(0xffffffff03522960ULL, 0x0808917000000000ULL));
7066 ASSERT_TRUE(IsQcBitSet(fpsr2));
7067 }
7068
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x2)7069 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x2) {
7070 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.2d, %2.2s, %3.2s");
7071
7072 __uint128_t arg1 = MakeUInt128(0x0000000200000004ULL, 0xfeed000300000010ULL);
7073 __uint128_t arg2 = MakeUInt128(0x0000000300000002ULL, 0xfeed00040000002ULL);
7074 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7075 ASSERT_EQ(res1, MakeUInt128(0x0000000000000010ULL, 0x000000000000000cULL));
7076 ASSERT_FALSE(IsQcBitSet(fpsr1));
7077
7078 __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7079 __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0xfeed00040000002ULL);
7080 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7081 ASSERT_EQ(res2, MakeUInt128(0x0000000000000010ULL, 0x7fffffffffffffffULL));
7082 ASSERT_TRUE(IsQcBitSet(fpsr2));
7083 }
7084
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong16x4)7085 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong16x4) {
7086 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.4s, %2.4h, %3.4h");
7087
7088 __uint128_t arg1 = MakeUInt128(0x0004000200f00004ULL, 0xfeedfeedfeedfeedULL);
7089 __uint128_t arg2 = MakeUInt128(0x0008000300800002ULL, 0xabcd0123ffff4567ULL);
7090 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7091 ASSERT_EQ(res1, MakeUInt128(0x0000f00000000010ULL, 0x000000400000000cULL));
7092 ASSERT_FALSE(IsQcBitSet(fpsr1));
7093
7094 __uint128_t arg3 = MakeUInt128(0x8000000200f00004ULL, 0xfeedfeedfeedfeedULL);
7095 __uint128_t arg4 = MakeUInt128(0x8000000300800002ULL, 0xabcd0123ffff4567ULL);
7096 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7097 ASSERT_EQ(res2, MakeUInt128(0x0000f00000000010ULL, 0x7fffffff0000000cULL));
7098 ASSERT_TRUE(IsQcBitSet(fpsr2));
7099 }
7100
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper32x2)7101 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper32x2) {
7102 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.2d, %2.4s, %3.4s");
7103
7104 __uint128_t arg1 = MakeUInt128(0x0000000200000004ULL, 0xfeed000300000010ULL);
7105 __uint128_t arg2 = MakeUInt128(0x0000000300000002ULL, 0xfeed00040000002ULL);
7106 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7107 ASSERT_EQ(res1, MakeUInt128(0x0000000800000040ULL, 0xffddc4ed7f98e000ULL));
7108 ASSERT_FALSE(IsQcBitSet(fpsr1));
7109
7110 __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0x8000000000000010ULL);
7111 __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0x8000000000000002ULL);
7112 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7113 ASSERT_EQ(res2, MakeUInt128(0x0000000000000040ULL, 0x7fffffffffffffffULL));
7114 ASSERT_TRUE(IsQcBitSet(fpsr2));
7115 }
7116
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper16x4)7117 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper16x4) {
7118 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.4s, %2.8h, %3.8h");
7119
7120 __uint128_t arg1 = MakeUInt128(0x0004000200f00004ULL, 0xfeedfeedfeedfeedULL);
7121 __uint128_t arg2 = MakeUInt128(0x0008000300800002ULL, 0xabcd0123ffff4567ULL);
7122 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7123 ASSERT_EQ(res1, MakeUInt128(0x00000226ff6ae4b6ULL, 0x00b4e592fffd8eceULL));
7124 ASSERT_FALSE(IsQcBitSet(fpsr1));
7125
7126 __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0x8000000000000010ULL);
7127 __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0x8000000000000002ULL);
7128 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7129 ASSERT_EQ(res2, MakeUInt128(0x0000000000000040ULL, 0x7fffffff00000000ULL));
7130 ASSERT_TRUE(IsQcBitSet(fpsr2));
7131 }
7132
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x2IndexedElem)7133 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x2IndexedElem) {
7134 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.2d, %2.2s, %3.s[1]");
7135
7136 __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011LL);
7137 __uint128_t arg2 = MakeUInt128(0x0000000200000000ULL, 0x000000000000000ULL);
7138 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7139 ASSERT_EQ(res1, MakeUInt128(0x000000004488cd10ULL, 0x0000000000880088ULL));
7140 ASSERT_FALSE(IsQcBitSet(fpsr1));
7141
7142 __uint128_t arg3 = MakeUInt128(0x0022002280000000ULL, 0x1122334400110011LL);
7143 __uint128_t arg4 = MakeUInt128(0x8000000000000000ULL, 0x000000000000000ULL);
7144 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7145 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xffddffde00000000ULL));
7146 ASSERT_TRUE(IsQcBitSet(fpsr2));
7147 }
7148
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x4IndexedElem)7149 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x4IndexedElem) {
7150 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.4s, %2.4h, %3.h[4]");
7151
7152 __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011LL);
7153 __uint128_t arg2 = MakeUInt128(0x000f000f000f000fULL, 0x000f000f000f0002ULL);
7154 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7155 ASSERT_EQ(res1, MakeUInt128(0x000044880000cd10ULL, 0x0000008800000088ULL));
7156 ASSERT_FALSE(IsQcBitSet(fpsr1));
7157
7158 __uint128_t arg3 = MakeUInt128(0x0022002280000000ULL, 0x1122334400118000ULL);
7159 __uint128_t arg4 = MakeUInt128(0x1111111122222222ULL, 0x1122334411228000ULL);
7160 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7161 ASSERT_EQ(res2, MakeUInt128(0x7fffffff00000000ULL, 0xffde0000ffde0000ULL));
7162 ASSERT_TRUE(IsQcBitSet(fpsr2));
7163 }
7164
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper64x2IndexedElem)7165 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper64x2IndexedElem) {
7166 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.2d, %2.4s, %3.s[3]");
7167
7168 __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011ULL);
7169 __uint128_t arg2 = MakeUInt128(0xffffffffffffffffULL, 0x00000002ffffffffULL);
7170 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7171 ASSERT_EQ(res1, MakeUInt128(0x0000000000440044ULL, 0x000000004488cd10ULL));
7172 ASSERT_FALSE(IsQcBitSet(fpsr1));
7173
7174 __uint128_t arg3 = MakeUInt128(0x80000000ffffffffULL, 0x1122334480000000ULL);
7175 __uint128_t arg4 = MakeUInt128(0x1122334411223344ULL, 0x80000000ffffffffULL);
7176 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7177 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xeeddccbc00000000ULL));
7178 ASSERT_TRUE(IsQcBitSet(fpsr2));
7179 }
7180
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper32x4IndexedElem)7181 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper32x4IndexedElem) {
7182 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.4s, %2.8h, %3.h[7]");
7183
7184 __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011ULL);
7185 __uint128_t arg2 = MakeUInt128(0xffffffffffffffffULL, 0x0002ffffffffffffULL);
7186 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7187 ASSERT_EQ(res1, MakeUInt128(0x0000004400000044ULL, 0x000044880000cd10ULL));
7188 ASSERT_FALSE(IsQcBitSet(fpsr1));
7189
7190 __uint128_t arg3 = MakeUInt128(0x80000000ffffffffULL, 0x112233448000ffffULL);
7191 __uint128_t arg4 = MakeUInt128(0x1122334411223344ULL, 0x8000ffffffffffffULL);
7192 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7193 ASSERT_EQ(res2, MakeUInt128(0x7fffffff00010000ULL, 0xeede0000ccbc0000ULL));
7194 }
7195
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x1)7196 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x1) {
7197 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %d0, %s2, %s3");
7198 __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7199 __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x0000000300000002ULL);
7200 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7201 ASSERT_EQ(res1, MakeUInt128(0x0222244440000000ULL, 0x0000000000000000ULL));
7202 ASSERT_FALSE(IsQcBitSet(fpsr1));
7203
7204 __uint128_t arg3 = MakeUInt128(0xaabbccdd80000000ULL, 0x1122334400110011ULL);
7205 __uint128_t arg4 = MakeUInt128(0xff11ff1180000000ULL, 0xffffffff11223344ULL);
7206 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7207 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7208 ASSERT_TRUE(IsQcBitSet(fpsr2));
7209 }
7210
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x1)7211 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x1) {
7212 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %s0, %h2, %h3");
7213 __uint128_t arg1 = MakeUInt128(0x1111111811112222ULL, 0xf000000700080006ULL);
7214 __uint128_t arg2 = MakeUInt128(0x0000000510004444ULL, 0xf000000300080002ULL);
7215 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7216 ASSERT_EQ(res1, MakeUInt128(0x0000000012343210ULL, 0x0000000000000000ULL));
7217 ASSERT_FALSE(IsQcBitSet(fpsr1));
7218
7219 __uint128_t arg3 = MakeUInt128(0xaabbccdd00008000ULL, 0x1122334400110011ULL);
7220 __uint128_t arg4 = MakeUInt128(0xff11ff1100008000ULL, 0xffffffff11223344ULL);
7221 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7222 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7223 ASSERT_TRUE(IsQcBitSet(fpsr2));
7224 }
7225
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x1IndexedElem)7226 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x1IndexedElem) {
7227 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %s0, %h2, %3.h[7]");
7228 __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7229 __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x1111000300000002ULL);
7230 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7231 ASSERT_EQ(res1, MakeUInt128(0x00000000048d0c84ULL, 0x0000000000000000ULL));
7232 ASSERT_FALSE(IsQcBitSet(fpsr1));
7233
7234 __uint128_t arg3 = MakeUInt128(0xaabbccddaabb8000ULL, 0x1122334400110011ULL);
7235 __uint128_t arg4 = MakeUInt128(0xff11ff11ff000ff0ULL, 0x8000aabb11223344ULL);
7236 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7237 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7238 ASSERT_TRUE(IsQcBitSet(fpsr2));
7239 }
7240
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x1IndexedElem)7241 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x1IndexedElem) {
7242 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %d0, %s2, %3.s[3]");
7243 __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7244 __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x0000000300000002ULL);
7245 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7246 ASSERT_EQ(res1, MakeUInt128(0x000000006666ccccULL, 0x0000000000000000ULL));
7247 ASSERT_FALSE(IsQcBitSet(fpsr1));
7248
7249 __uint128_t arg3 = MakeUInt128(0xaabbccdd80000000ULL, 0x1122334400110011ULL);
7250 __uint128_t arg4 = MakeUInt128(0xff11ff11ff000ff0ULL, 0x8000000011223344ULL);
7251 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7252 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7253 ASSERT_TRUE(IsQcBitSet(fpsr2));
7254 }
7255
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x2)7256 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x2) {
7257 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.2d, %2.2s, %3.2s");
7258
7259 // No saturation.
7260 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7261 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7262 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7263 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7264 ASSERT_EQ(res1, MakeUInt128(0x0100010111011100ULL, 0x040004008c008c00ULL));
7265 ASSERT_FALSE(IsQcBitSet(fpsr1));
7266
7267 // Saturates in the multiplication.
7268 __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7269 __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
7270 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7271 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7272 ASSERT_EQ(res2, MakeUInt128(0x0000080000000910ULL, 0x7fffffffffffffffULL));
7273 ASSERT_TRUE(IsQcBitSet(fpsr2));
7274
7275 // Saturates in the addition.
7276 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7277 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7278 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7279 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7280 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x00000a0088013800ULL));
7281 ASSERT_TRUE(IsQcBitSet(fpsr3));
7282 }
7283
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong16x4)7284 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong16x4) {
7285 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.4s, %2.4h, %3.4h");
7286
7287 // No saturation.
7288 __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
7289 __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
7290 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7291 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7292 ASSERT_EQ(res1, MakeUInt128(0x0100010001011100ULL, 0x03f0040004024600ULL));
7293 ASSERT_FALSE(IsQcBitSet(fpsr1));
7294
7295 // Saturates in the multiplication.
7296 __uint128_t arg4 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
7297 __uint128_t arg5 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
7298 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7299 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7300 ASSERT_EQ(res2, MakeUInt128(0x0369cba90369cba9ULL, 0x7fffffff0369cba9ULL));
7301 ASSERT_TRUE(IsQcBitSet(fpsr2));
7302
7303 // Saturates in the addition.
7304 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7305 __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
7306 __uint128_t arg9 = MakeUInt128(0x7fffffff12345678ULL, 0x00000a000000b000ULL);
7307 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7308 ASSERT_EQ(res3, MakeUInt128(0x7fffffff12356678ULL, 0x00000a0000013800ULL));
7309 ASSERT_TRUE(IsQcBitSet(fpsr3));
7310 }
7311
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper32x2)7312 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper32x2) {
7313 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.2d, %2.4s, %3.4s");
7314
7315 // No saturation.
7316 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7317 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7318 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7319 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7320 ASSERT_EQ(res1, MakeUInt128(0x020d44926c1ce9e0ULL, 0x050d47926f1cece0ULL));
7321 ASSERT_FALSE(IsQcBitSet(fpsr1));
7322
7323 // Saturates in the multiplication.
7324 __uint128_t arg4 = MakeUInt128(0x1234567800000004ULL, 0x8000000001100010ULL);
7325 __uint128_t arg5 = MakeUInt128(0x1234567800000002ULL, 0x8000000001100020ULL);
7326 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7327 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7328 ASSERT_EQ(res2, MakeUInt128(0x00024a0066000d00ULL, 0x7fffffffffffffffULL));
7329 ASSERT_TRUE(IsQcBitSet(fpsr2));
7330
7331 // Saturates in the addition.
7332 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7333 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7334 __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x7fffffffffffffffULL);
7335 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7336 ASSERT_EQ(res3, MakeUInt128(0x13419a0a7d513f58ULL, 0x7fffffffffffffffULL));
7337 ASSERT_TRUE(IsQcBitSet(fpsr3));
7338 }
7339
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper16x4)7340 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper16x4) {
7341 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.4s, %2.8h, %3.8h");
7342
7343 // No saturation.
7344 __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
7345 __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
7346 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7347 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7348 ASSERT_EQ(res1, MakeUInt128(0x020d03f81c24e9e0ULL, 0x050d06f81f24ece0ULL));
7349 ASSERT_FALSE(IsQcBitSet(fpsr1));
7350
7351 // Saturates in the multiplication.
7352 __uint128_t arg4 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
7353 __uint128_t arg5 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
7354 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7355 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7356 ASSERT_EQ(res2, MakeUInt128(0x03b9fa8703b9fa87ULL, 0x7fffffff03b9fa87ULL));
7357 ASSERT_TRUE(IsQcBitSet(fpsr2));
7358
7359 // Saturates in the addition.
7360 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7361 __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
7362 __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x7fffffff0000b000ULL);
7363 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7364 ASSERT_EQ(res3, MakeUInt128(0x134159702d593f58ULL, 0x7fffffff1b2598e0ULL));
7365 ASSERT_TRUE(IsQcBitSet(fpsr3));
7366 }
7367
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x1)7368 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x1) {
7369 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %d0, %s2, %s3");
7370
7371 // No saturation.
7372 __uint128_t arg1 = MakeUInt128(0x1100110011223344ULL, 0x7654321076543210ULL);
7373 __uint128_t arg2 = MakeUInt128(0x0000000020000000ULL, 0x0123456701234567ULL);
7374 __uint128_t arg3 = MakeUInt128(0x12345678000000FFULL, 0x0400040004000400ULL);
7375 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7376 ASSERT_EQ(res1, MakeUInt128(0x167ce349000000ffULL, 0x0000000000000000ULL));
7377 ASSERT_FALSE(IsQcBitSet(fpsr1));
7378
7379 // Saturates in the multiplication.
7380 __uint128_t arg4 = MakeUInt128(0x1122334480000000ULL, 0xfeed000300000010ULL);
7381 __uint128_t arg5 = MakeUInt128(0xaabbccdd80000000ULL, 0xfeed000400000020ULL);
7382 __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
7383 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7384 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7385 ASSERT_TRUE(IsQcBitSet(fpsr2));
7386
7387 // Saturates in the addition.
7388 __uint128_t arg7 = MakeUInt128(0x1122334400111111ULL, 0x7654321076543210ULL);
7389 __uint128_t arg8 = MakeUInt128(0xaabbccdd00222222ULL, 0x0123456701234567ULL);
7390 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7391 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7392 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7393 ASSERT_TRUE(IsQcBitSet(fpsr3));
7394 }
7395
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x1)7396 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x1) {
7397 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %s0, %h2, %h3");
7398
7399 // No saturation.
7400 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7401 __uint128_t arg2 = MakeUInt128(0x0000000000000004ULL, 0x0123456701234567ULL);
7402 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7403 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7404 ASSERT_EQ(res1, MakeUInt128(0x0000000001011100ULL, 0x0000000000000000ULL));
7405 ASSERT_FALSE(IsQcBitSet(fpsr1));
7406
7407 // Saturates in the multiplication.
7408 __uint128_t arg4 = MakeUInt128(0x1122334411228000ULL, 0xfeed000300000010ULL);
7409 __uint128_t arg5 = MakeUInt128(0xaabbccddaabb8000ULL, 0xfeed000400000020ULL);
7410 __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
7411 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7412 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7413 ASSERT_TRUE(IsQcBitSet(fpsr2));
7414
7415 // Saturates in the addition.
7416 __uint128_t arg7 = MakeUInt128(0x1122334411220123ULL, 0x7654321076543210ULL);
7417 __uint128_t arg8 = MakeUInt128(0xaabbccddaabb0044ULL, 0x0123456701234567ULL);
7418 __uint128_t arg9 = MakeUInt128(0xaabbccdd7fffffffULL, 0x00000a000000b000ULL);
7419 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7420 ASSERT_EQ(res3, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7421 ASSERT_TRUE(IsQcBitSet(fpsr3));
7422 }
7423
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x2IndexedElem)7424 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x2IndexedElem) {
7425 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.2d, %2.2s, %3.s[1]");
7426
7427 // No saturation.
7428 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7429 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7430 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7431 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7432 ASSERT_EQ(res1, MakeUInt128(0x0100010111011100ULL, 0x040004008c008c00ULL));
7433 ASSERT_FALSE(IsQcBitSet(fpsr1));
7434
7435 // Saturates in the multiplication.
7436 __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7437 __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
7438 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7439 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7440 ASSERT_EQ(res2, MakeUInt128(0x000007fc00000900ULL, 0x7fffffffffffffffULL));
7441 ASSERT_TRUE(IsQcBitSet(fpsr2));
7442
7443 // Saturates in the addition.
7444 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7445 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7446 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7447 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7448 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x00000a0088013800ULL));
7449 ASSERT_TRUE(IsQcBitSet(fpsr3));
7450 }
7451
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x4IndexedElem)7452 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x4IndexedElem) {
7453 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.4s, %2.4h, %3.h[7]");
7454
7455 // No saturation.
7456 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7457 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7458 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7459 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7460 ASSERT_EQ(res1, MakeUInt128(0x012eb10b89bbca1fULL, 0xfedf0524765b0d28ULL));
7461 ASSERT_FALSE(IsQcBitSet(fpsr1));
7462
7463 // Saturates in the multiplication.
7464 __uint128_t arg4 = MakeUInt128(0x80000123456789a4ULL, 0xfeed000300000010ULL);
7465 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
7466 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7467 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7468 ASSERT_EQ(res2, MakeUInt128(0xbbbc4567777f4567ULL, 0x7fffffff00004567ULL));
7469 ASSERT_TRUE(IsQcBitSet(fpsr2));
7470
7471 // Saturates in the addition.
7472 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7473 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
7474 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7475 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7476 ASSERT_EQ(res3, MakeUInt128(0x7fffffff004d4bffULL, 0x0026b00000275600ULL));
7477 ASSERT_TRUE(IsQcBitSet(fpsr3));
7478 }
7479
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper64x2IndexedElem)7480 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper64x2IndexedElem) {
7481 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.2d, %2.4s, %3.s[3]");
7482
7483 // No saturation.
7484 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7485 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7486 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7487 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7488 ASSERT_EQ(res1, MakeUInt128(0x020d44926c1ce9e0ULL, 0x050d47926f1cece0ULL));
7489 ASSERT_FALSE(IsQcBitSet(fpsr1));
7490
7491 // Saturates in the multiplication.
7492 __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0x1122334480000000ULL);
7493 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000000011223344ULL);
7494 __uint128_t arg6 = MakeUInt128(0x0101010102020202ULL, 0x0303030304040404ULL);
7495 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7496 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xf1e0cfbf04040404ULL));
7497 ASSERT_TRUE(IsQcBitSet(fpsr2));
7498
7499 // Saturates in the addition.
7500 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7501 __uint128_t arg8 = MakeUInt128(0x1122334444332211ULL, 0x0123456701234567ULL);
7502 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7503 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7504 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x010d4d926b1d98e0ULL));
7505 ASSERT_TRUE(IsQcBitSet(fpsr3));
7506 }
7507
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper32x4IndexedElem)7508 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper32x4IndexedElem) {
7509 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.4s, %2.8h, %3.h[7]");
7510
7511 // No saturation.
7512 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7513 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7514 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7515 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7516 ASSERT_EQ(res1, MakeUInt128(0x0230485f8a1d9e4fULL, 0xffe9bd9076c60270ULL));
7517 ASSERT_FALSE(IsQcBitSet(fpsr1));
7518
7519 // Saturates in the multiplication.
7520 __uint128_t arg4 = MakeUInt128(0x0011223344556677ULL, 0xfeedfeedfeed8000ULL);
7521 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
7522 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7523 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7524 ASSERT_EQ(res2, MakeUInt128(0x023645677fffffffULL, 0x0236456702364567ULL));
7525 ASSERT_TRUE(IsQcBitSet(fpsr2));
7526
7527 // Saturates in the addition.
7528 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7529 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
7530 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7531 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7532 ASSERT_EQ(res3, MakeUInt128(0x7fffffff0071d05fULL, 0x010d0cf800728060ULL));
7533 ASSERT_TRUE(IsQcBitSet(fpsr3));
7534 }
7535
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x1IndexedElem)7536 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x1IndexedElem) {
7537 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %d0, %s2, %3.s[3]");
7538
7539 // No saturation.
7540 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7541 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7542 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7543 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7544 ASSERT_EQ(res1, MakeUInt128(0x012eb3d4d07fc65fULL, 0x0000000000000000ULL));
7545 ASSERT_FALSE(IsQcBitSet(fpsr1));
7546
7547 // Saturates in the multiplication.
7548 __uint128_t arg4 = MakeUInt128(0x0011223380000000ULL, 0xfeedfeedfeed8000ULL);
7549 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x80000000ba123456ULL);
7550 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7551 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7552 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7553 ASSERT_TRUE(IsQcBitSet(fpsr2));
7554
7555 // Saturates in the addition.
7556 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7557 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
7558 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7559 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7560 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7561 ASSERT_TRUE(IsQcBitSet(fpsr3));
7562 }
7563
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x1IndexedElem)7564 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x1IndexedElem) {
7565 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %s0, %h2, %3.h[7]");
7566
7567 // No saturation.
7568 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7569 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7570 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7571 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7572 ASSERT_EQ(res1, MakeUInt128(0x0000000089bbca1fULL, 0x0000000000000000ULL));
7573 ASSERT_FALSE(IsQcBitSet(fpsr1));
7574
7575 // Saturates in the multiplication.
7576 __uint128_t arg4 = MakeUInt128(0x0011223344558000ULL, 0xfeedfeedfeed1234ULL);
7577 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
7578 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7579 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7580 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7581 ASSERT_TRUE(IsQcBitSet(fpsr2));
7582
7583 // Saturates in the addition.
7584 __uint128_t arg7 = MakeUInt128(0xaabbccddeeff2200ULL, 0x7654321076543210ULL);
7585 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x0123aabbccddeeffULL);
7586 __uint128_t arg9 = MakeUInt128(0xaabbccdd7fffffffULL, 0x0011223344556677ULL);
7587 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7588 ASSERT_EQ(res3, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7589 ASSERT_TRUE(IsQcBitSet(fpsr3));
7590 }
7591
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x2)7592 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x2) {
7593 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.2d, %2.2s, %3.2s");
7594
7595 // No saturation.
7596 __uint128_t arg1 = MakeUInt128(0x0000000080000001ULL, 0x7654321076543210ULL);
7597 __uint128_t arg2 = MakeUInt128(0x0000000100000004ULL, 0x0123456701234567ULL);
7598 __uint128_t arg3 = MakeUInt128(0x0000100000000001ULL, 0x0400040004000400ULL);
7599 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7600 ASSERT_EQ(res1, MakeUInt128(0x00001003fffffff9ULL, 0x0400040004000400ULL));
7601 ASSERT_FALSE(IsQcBitSet(fpsr1));
7602
7603 // Saturates in the multiplication.
7604 __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7605 __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
7606 __uint128_t arg6 = MakeUInt128(0x0000000000000900ULL, 0x00000a000000b000ULL);
7607 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7608 ASSERT_EQ(res2, MakeUInt128(0x00000000000008f0ULL, 0x80000a000000b001ULL));
7609 ASSERT_TRUE(IsQcBitSet(fpsr2));
7610
7611 // Saturates in the subtraction.
7612 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7613 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7614 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7615 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7616 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x000009ff78002800ULL));
7617 ASSERT_TRUE(IsQcBitSet(fpsr3));
7618 }
7619
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong16x4)7620 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong16x4) {
7621 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.4s, %2.4h, %3.4h");
7622
7623 // No saturation.
7624 __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
7625 __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
7626 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7627 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7628 ASSERT_EQ(res1, MakeUInt128(0x0100010000fef100ULL, 0x0410040003fdc200ULL));
7629 ASSERT_FALSE(IsQcBitSet(fpsr1));
7630
7631 // Saturates in the multiplication.
7632 __uint128_t arg4 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
7633 __uint128_t arg5 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
7634 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7635 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7636 ASSERT_EQ(res2, MakeUInt128(0xfedcbf25fedcbf25ULL, 0x81234568fedcbf25ULL));
7637 ASSERT_TRUE(IsQcBitSet(fpsr2));
7638
7639 // Saturates in the subtraction.
7640 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7641 __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
7642 __uint128_t arg9 = MakeUInt128(0x8000000012345678ULL, 0x00000a000000b000ULL);
7643 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7644 ASSERT_EQ(res3, MakeUInt128(0x8000000012334678ULL, 0x00000a0000002800ULL));
7645 ASSERT_TRUE(IsQcBitSet(fpsr3));
7646 }
7647
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper32x2)7648 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper32x2) {
7649 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.2d, %2.4s, %3.4s");
7650
7651 // No saturation.
7652 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7653 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7654 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7655 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7656 ASSERT_EQ(res1, MakeUInt128(0xfff2bd6d95e31820ULL, 0x02f2c06d98e31b20ULL));
7657 ASSERT_FALSE(IsQcBitSet(fpsr1));
7658
7659 // Saturates in the multiplication.
7660 __uint128_t arg4 = MakeUInt128(0x1234567800000004ULL, 0x8000000001100010ULL);
7661 __uint128_t arg5 = MakeUInt128(0x1234567800000002ULL, 0x8000000001100020ULL);
7662 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7663 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7664 ASSERT_EQ(res2, MakeUInt128(0xfffdc5ff9a000500ULL, 0x80000a000000b001ULL));
7665 ASSERT_TRUE(IsQcBitSet(fpsr2));
7666
7667 // Saturates in the subtraction.
7668 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7669 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7670 __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x8000000000000000ULL);
7671 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7672 ASSERT_EQ(res3, MakeUInt128(0x112712e5a7176d98ULL, 0x8000000000000000ULL));
7673 ASSERT_TRUE(IsQcBitSet(fpsr3));
7674 }
7675
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper16x4)7676 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper16x4) {
7677 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.4s, %2.8h, %3.8h");
7678
7679 // No saturation.
7680 __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
7681 __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
7682 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7683 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7684 ASSERT_EQ(res1, MakeUInt128(0xfff2fe08e5db1820ULL, 0x02f30108e8db1b20ULL));
7685 ASSERT_FALSE(IsQcBitSet(fpsr1));
7686
7687 // Saturates in the multiplication.
7688 __uint128_t arg4 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
7689 __uint128_t arg5 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
7690 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7691 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7692 ASSERT_EQ(res2, MakeUInt128(0xfe8c9047fe8c9047ULL, 0x81234568fe8c9047ULL));
7693 ASSERT_TRUE(IsQcBitSet(fpsr2));
7694
7695 // Saturates in the subtraction.
7696 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7697 __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
7698 __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x800000000000b000ULL);
7699 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7700 ASSERT_EQ(res3, MakeUInt128(0x11275380f70f6d98ULL, 0x80000000e4dbc720ULL));
7701 ASSERT_TRUE(IsQcBitSet(fpsr3));
7702 }
7703
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x1)7704 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x1) {
7705 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %d0, %s2, %s3");
7706
7707 // No saturation.
7708 __uint128_t arg1 = MakeUInt128(0x1100110011223344ULL, 0x7654321076543210ULL);
7709 __uint128_t arg2 = MakeUInt128(0x0000000020000000ULL, 0x0123456701234567ULL);
7710 __uint128_t arg3 = MakeUInt128(0x12345678000000FFULL, 0x0400040004000400ULL);
7711 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7712 ASSERT_EQ(res1, MakeUInt128(0x0debc9a7000000ffULL, 0x0000000000000000ULL));
7713 ASSERT_FALSE(IsQcBitSet(fpsr1));
7714
7715 // Saturates in the multiplication.
7716 __uint128_t arg4 = MakeUInt128(0x1122334480000000ULL, 0xfeed000300000010ULL);
7717 __uint128_t arg5 = MakeUInt128(0xaabbccdd80000000ULL, 0xfeed000400000020ULL);
7718 __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
7719 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7720 ASSERT_EQ(res2, MakeUInt128(0x9122334411111112ULL, 0x0000000000000000ULL));
7721 ASSERT_TRUE(IsQcBitSet(fpsr2));
7722
7723 // Saturates in the subtraction.
7724 __uint128_t arg7 = MakeUInt128(0x1122334400111111ULL, 0x7654321076543210ULL);
7725 __uint128_t arg8 = MakeUInt128(0xaabbccdd00222222ULL, 0x0123456701234567ULL);
7726 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7727 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7728 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
7729 ASSERT_TRUE(IsQcBitSet(fpsr3));
7730 }
7731
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x1)7732 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x1) {
7733 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %s0, %h2, %h3");
7734
7735 // No saturation.
7736 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7737 __uint128_t arg2 = MakeUInt128(0x0000000000000004ULL, 0x0123456701234567ULL);
7738 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7739 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7740 ASSERT_EQ(res1, MakeUInt128(0x0000000000fef100ULL, 0x0000000000000000ULL));
7741 ASSERT_FALSE(IsQcBitSet(fpsr1));
7742
7743 // Saturates in the multiplication.
7744 __uint128_t arg4 = MakeUInt128(0x1122334411228000ULL, 0xfeed000300000010ULL);
7745 __uint128_t arg5 = MakeUInt128(0xaabbccddaabb8000ULL, 0xfeed000400000020ULL);
7746 __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
7747 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7748 ASSERT_EQ(res2, MakeUInt128(0x0000000091111112ULL, 0x0000000000000000ULL));
7749 ASSERT_TRUE(IsQcBitSet(fpsr2));
7750
7751 // Saturates in the subtraction.
7752 __uint128_t arg7 = MakeUInt128(0x1122334411220123ULL, 0x7654321076543210ULL);
7753 __uint128_t arg8 = MakeUInt128(0xaabbccddaabb0044ULL, 0x0123456701234567ULL);
7754 __uint128_t arg9 = MakeUInt128(0xaabbccdd80000000ULL, 0x00000a000000b000ULL);
7755 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7756 ASSERT_EQ(res3, MakeUInt128(0x0000000080000000ULL, 0x0000000000000000ULL));
7757 ASSERT_TRUE(IsQcBitSet(fpsr3));
7758 }
7759
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x2IndexedElem)7760 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x2IndexedElem) {
7761 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.2d, %2.2s, %3.s[1]");
7762
7763 // No saturation.
7764 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7765 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7766 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7767 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7768 ASSERT_EQ(res1, MakeUInt128(0x010000fef0fef100ULL, 0x040003ff7bff7c00ULL));
7769 ASSERT_FALSE(IsQcBitSet(fpsr1));
7770
7771 // Saturates in the multiplication.
7772 __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7773 __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
7774 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7775 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7776 ASSERT_EQ(res2, MakeUInt128(0x0000080400000900ULL, 0x80000a000000b001ULL));
7777 ASSERT_TRUE(IsQcBitSet(fpsr2));
7778
7779 // Saturates in the subtraction.
7780 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7781 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7782 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7783 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7784 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x000009ff78002800ULL));
7785 ASSERT_TRUE(IsQcBitSet(fpsr3));
7786 }
7787
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x4IndexedElem)7788 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x4IndexedElem) {
7789 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.4s, %2.4h, %3.h[7]");
7790
7791 // No saturation.
7792 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7793 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7794 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7795 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7796 ASSERT_EQ(res1, MakeUInt128(0x0117d9c3899bd1bfULL, 0xfeda700c764d56f8ULL));
7797 ASSERT_FALSE(IsQcBitSet(fpsr1));
7798
7799 // Saturates in the multiplication.
7800 __uint128_t arg4 = MakeUInt128(0x80000123456789a4ULL, 0xfeed000300000010ULL);
7801 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
7802 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7803 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7804 ASSERT_EQ(res2, MakeUInt128(0x468a45678ac74567ULL, 0x8123456802464567ULL));
7805 ASSERT_TRUE(IsQcBitSet(fpsr2));
7806
7807 // Saturates in the subtraction.
7808 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7809 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
7810 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7811 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7812 ASSERT_EQ(res3, MakeUInt128(0x80000000ffb2b400ULL, 0xffd96400ffda0a00ULL));
7813 ASSERT_TRUE(IsQcBitSet(fpsr3));
7814 }
7815
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper64x2IndexedElem)7816 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper64x2IndexedElem) {
7817 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.2d, %2.4s, %3.s[3]");
7818
7819 // No saturation.
7820 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7821 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7822 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7823 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7824 ASSERT_EQ(res1, MakeUInt128(0xfff2bd6d95e31820ULL, 0x02f2c06d98e31b20ULL));
7825 ASSERT_FALSE(IsQcBitSet(fpsr1));
7826
7827 // Saturates in the multiplication.
7828 __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0x1122334480000000ULL);
7829 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000000011223344ULL);
7830 __uint128_t arg6 = MakeUInt128(0x0101010102020202ULL, 0x0303030304040404ULL);
7831 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7832 ASSERT_EQ(res2, MakeUInt128(0x8101010102020203ULL, 0x1425364704040404ULL));
7833 ASSERT_TRUE(IsQcBitSet(fpsr2));
7834
7835 // Saturates in the subtraction.
7836 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7837 __uint128_t arg8 = MakeUInt128(0x1122334444332211ULL, 0x0123456701234567ULL);
7838 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7839 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7840 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0xfef2c66d94e3c720ULL));
7841 ASSERT_TRUE(IsQcBitSet(fpsr3));
7842 }
7843
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper32x4IndexedElem)7844 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper32x4IndexedElem) {
7845 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.4s, %2.8h, %3.h[7]");
7846
7847 // No saturation.
7848 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7849 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7850 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7851 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7852 ASSERT_EQ(res1, MakeUInt128(0x0016426f8939fd8fULL, 0xfdcfb7a075e261b0ULL));
7853 ASSERT_FALSE(IsQcBitSet(fpsr1));
7854
7855 // Saturates in the multiplication.
7856 __uint128_t arg4 = MakeUInt128(0x0011223344556677ULL, 0xfeedfeedfeed8000ULL);
7857 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
7858 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7859 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7860 ASSERT_EQ(res2, MakeUInt128(0x0010456781234568ULL, 0x0010456700104567ULL));
7861 ASSERT_TRUE(IsQcBitSet(fpsr2));
7862
7863 // Saturates in the subtraction.
7864 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7865 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
7866 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7867 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7868 ASSERT_EQ(res3, MakeUInt128(0x80000000ff8e2fa0ULL, 0xfef30708ff8edfa0ULL));
7869 ASSERT_TRUE(IsQcBitSet(fpsr3));
7870 }
7871
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x1IndexedElem)7872 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x1IndexedElem) {
7873 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %d0, %s2, %3.s[3]");
7874
7875 // No saturation.
7876 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7877 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7878 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7879 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7880 ASSERT_EQ(res1, MakeUInt128(0x0117d6fa42d7d57fULL, 0x0ULL));
7881 ASSERT_FALSE(IsQcBitSet(fpsr1));
7882
7883 // Saturates in the multiplication.
7884 __uint128_t arg4 = MakeUInt128(0x0011223380000000ULL, 0xfeedfeedfeed8000ULL);
7885 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x80000000ba123456ULL);
7886 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7887 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7888 ASSERT_EQ(res2, MakeUInt128(0x8123456701234568ULL, 0x0ULL));
7889 ASSERT_TRUE(IsQcBitSet(fpsr2));
7890
7891 // Saturates in the subtraction.
7892 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7893 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
7894 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7895 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7896 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x0ULL));
7897 ASSERT_TRUE(IsQcBitSet(fpsr3));
7898 }
7899
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x1IndexedElem)7900 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x1IndexedElem) {
7901 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %s0, %h2, %3.h[7]");
7902
7903 // No saturation.
7904 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7905 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7906 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7907 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7908 ASSERT_EQ(res1, MakeUInt128(0x00000000899bd1bfULL, 0x0ULL));
7909 ASSERT_FALSE(IsQcBitSet(fpsr1));
7910
7911 // Saturates in the multiplication.
7912 __uint128_t arg4 = MakeUInt128(0x0011223344558000ULL, 0xfeedfeedfeed1234ULL);
7913 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
7914 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7915 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7916 ASSERT_EQ(res2, MakeUInt128(0x0000000081234568ULL, 0x0ULL));
7917 ASSERT_TRUE(IsQcBitSet(fpsr2));
7918
7919 // Saturates in the subtraction.
7920 __uint128_t arg7 = MakeUInt128(0xaabbccddeeff2200ULL, 0x7654321076543210ULL);
7921 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x0123aabbccddeeffULL);
7922 __uint128_t arg9 = MakeUInt128(0xaabbccdd80000000ULL, 0x0011223344556677ULL);
7923 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7924 ASSERT_EQ(res3, MakeUInt128(0x0000000080000000ULL, 0x0ULL));
7925 ASSERT_TRUE(IsQcBitSet(fpsr3));
7926 }
7927
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x4)7928 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x4) {
7929 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4s, %2.4s, %3.4s");
7930
7931 __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
7932 __uint128_t arg2 = MakeU32x4(0x00000008UL, 0x00000002UL, 0x7eed0004UL, 0x00000002UL);
7933 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
7934 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7ddc4ed9UL, 0x0UL));
7935 ASSERT_FALSE(IsQcBitSet(fpsr1));
7936
7937 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
7938 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xfeed0004UL, 0x00000002UL);
7939 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
7940 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x00024ed2UL, 0x0UL));
7941 ASSERT_TRUE(IsQcBitSet(fpsr2));
7942 }
7943
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x2)7944 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x2) {
7945 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.2s, %2.2s, %3.2s");
7946
7947 __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
7948 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
7949 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
7950 ASSERT_EQ(res1, MakeU32x4(0x3, 0x0UL, 0x0UL, 0x0UL));
7951 ASSERT_FALSE(IsQcBitSet(fpsr1));
7952
7953 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
7954 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
7955 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
7956 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
7957 ASSERT_TRUE(IsQcBitSet(fpsr2));
7958 }
7959
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x8)7960 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x8) {
7961 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.8h, %2.8h, %3.8h");
7962
7963 __uint128_t arg1 = MakeUInt128(0x200000017fff1111ULL, 0x7eed000300000010ULL);
7964 __uint128_t arg2 = MakeUInt128(0x0008000840000000ULL, 0x7eed000400000002ULL);
7965 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
7966 ASSERT_EQ(res1, MakeUInt128(0x0002000040000000ULL, 0x7ddc000000000000ULL));
7967 ASSERT_FALSE(IsQcBitSet(fpsr1));
7968
7969 __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xfeed0003ffff0010ULL);
7970 __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xfeed0004ffff0002ULL);
7971 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
7972 ASSERT_EQ(res2, MakeUInt128(0x7fff000100020000ULL, 0x0002000000000000ULL));
7973 ASSERT_TRUE(IsQcBitSet(fpsr2));
7974 }
7975
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x4)7976 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x4) {
7977 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4h, %2.4h, %3.4h");
7978
7979 __uint128_t arg1 = MakeUInt128(0x555500017fff1111ULL, 0xdeadc0dedeadc0deULL);
7980 __uint128_t arg2 = MakeUInt128(0x0004000840000000ULL, 0xdeadc0dedeadc0deULL);
7981 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
7982 ASSERT_EQ(res1, MakeUInt128(0x0003000040000000ULL, 0x0000000000000000ULL));
7983 ASSERT_FALSE(IsQcBitSet(fpsr1));
7984
7985 __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xdeadc0dedeadc0deULL);
7986 __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xdeadc0dedeadc0deULL);
7987 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
7988 ASSERT_EQ(res2, MakeUInt128(0x7fff000100020000ULL, 0x0000000000000000ULL));
7989 ASSERT_TRUE(IsQcBitSet(fpsr2));
7990 }
7991
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x4IndexedElem)7992 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x4IndexedElem) {
7993 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4s, %2.4s, %3.s[0]");
7994
7995 __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003, 0x00000010UL);
7996 __uint128_t arg2 = MakeU32x4(0x00000008UL, 0xfeedfeedUL, 0xfeedfeed, 0xfeedfeedUL);
7997 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
7998 // Without rounding, result should be 7 instead of 8.
7999 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x8UL, 0x0UL));
8000 ASSERT_FALSE(IsQcBitSet(fpsr1));
8001
8002 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8003 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8004 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8005 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0112fffdUL, 0xfffffff0UL));
8006 ASSERT_TRUE(IsQcBitSet(fpsr2));
8007 }
8008
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x2IndexedElem)8009 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x2IndexedElem) {
8010 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.2s, %2.2s, %3.s[0]");
8011
8012 __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8013 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8014 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8015 ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8016 ASSERT_FALSE(IsQcBitSet(fpsr1));
8017
8018 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8019 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8020 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8021 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0UL, 0x0UL));
8022 ASSERT_TRUE(IsQcBitSet(fpsr2));
8023 }
8024
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x8IndexedElem)8025 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x8IndexedElem) {
8026 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.8h, %2.8h, %3.h[7]");
8027
8028 __uint128_t arg1 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8029 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0008feedfeedfeedULL);
8030 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8031 ASSERT_EQ(res1, MakeUInt128(0x0008fff800040000ULL, 0x0000000800020004ULL));
8032 ASSERT_FALSE(IsQcBitSet(fpsr1));
8033
8034 __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8035 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8036 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8037 ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x02008800e000bfffULL));
8038 ASSERT_TRUE(IsQcBitSet(fpsr2));
8039 }
8040
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x4IndexedElem)8041 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x4IndexedElem) {
8042 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4h, %2.4h, %3.h[7]");
8043
8044 __uint128_t arg1 = MakeUInt128(0x7fff800055550000ULL, 0xdeadc0dedeadc0deULL);
8045 __uint128_t arg2 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x0004c0dedeadc0deULL);
8046 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8047 ASSERT_EQ(res1, MakeUInt128(0x0004fffc00030000ULL, 0x0000000000000000ULL));
8048 ASSERT_FALSE(IsQcBitSet(fpsr1));
8049
8050 __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xdeadc0dedeadc0deULL);
8051 __uint128_t arg4 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x8000c0dedeadc0deULL);
8052 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8053 ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x0000000000000000ULL));
8054 ASSERT_TRUE(IsQcBitSet(fpsr2));
8055 }
8056
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x1)8057 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x1) {
8058 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %s0, %s2, %s3");
8059
8060 __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8061 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8062 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8063 // Without roundings, result should be 2 instead of 3.
8064 ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8065 ASSERT_FALSE(IsQcBitSet(fpsr1));
8066
8067 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8068 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8069 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8070 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8071 ASSERT_TRUE(IsQcBitSet(fpsr2));
8072 }
8073
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x1)8074 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x1) {
8075 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %h0, %h2, %h3");
8076
8077 __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8078 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeed0004ULL, 0xfeedfeedfeedfeedULL);
8079 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8080 ASSERT_EQ(res1, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
8081 ASSERT_FALSE(IsQcBitSet(fpsr1));
8082
8083 __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8084 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8085 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8086 ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8087 ASSERT_TRUE(IsQcBitSet(fpsr2));
8088 }
8089
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x1IndexedElem)8090 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x1IndexedElem) {
8091 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %s0, %s2, %3.s[2]");
8092
8093 __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8094 __uint128_t arg2 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x00000004UL, 0xfeedfeedUL);
8095 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8096 // Without rounding, result should be 2 instead of 3.
8097 ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8098 ASSERT_FALSE(IsQcBitSet(fpsr1));
8099
8100 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8101 __uint128_t arg4 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x80000000UL, 0xfeedfeedUL);
8102 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8103 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8104 ASSERT_TRUE(IsQcBitSet(fpsr2));
8105 }
8106
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x1IndexedElem)8107 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x1IndexedElem) {
8108 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %h0, %h2, %3.h[7]");
8109
8110 __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8111 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0004feedfeedfeedULL);
8112 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8113 // Without rounding, result should be 2 instead of 3.
8114 ASSERT_EQ(res1, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
8115 ASSERT_FALSE(IsQcBitSet(fpsr1));
8116
8117 __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8118 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8119 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8120 ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8121 ASSERT_TRUE(IsQcBitSet(fpsr2));
8122 }
8123
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x4)8124 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x4) {
8125 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4s, %2.4s, %3.4s");
8126
8127 __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8128 __uint128_t arg2 = MakeU32x4(0x00000008UL, 0x00000002UL, 0x7eed0004UL, 0x00000002UL);
8129 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8130 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7ddc4ed8UL, 0x0UL));
8131 ASSERT_FALSE(IsQcBitSet(fpsr1));
8132
8133 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8134 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xfeed0004UL, 0x00000002UL);
8135 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8136 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x00024ed1UL, 0x0UL));
8137 ASSERT_TRUE(IsQcBitSet(fpsr2));
8138 }
8139
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x2)8140 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x2) {
8141 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.2s, %2.2s, %3.2s");
8142
8143 __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8144 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8145 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8146 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8147 ASSERT_FALSE(IsQcBitSet(fpsr1));
8148
8149 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8150 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8151 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8152 ASSERT_EQ(res2, MakeU32x4(0x7fffffff, 0x0UL, 0x0UL, 0x0UL));
8153 ASSERT_TRUE(IsQcBitSet(fpsr2));
8154 }
8155
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x8)8156 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x8) {
8157 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.8h, %2.8h, %3.8h");
8158
8159 __uint128_t arg1 = MakeUInt128(0x200000017fff1111ULL, 0x7eed000300000010ULL);
8160 __uint128_t arg2 = MakeUInt128(0x0008000840000000ULL, 0x7eed000400000002ULL);
8161 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8162 ASSERT_EQ(res1, MakeUInt128(0x000200003fff0000ULL, 0x7ddc000000000000ULL));
8163 ASSERT_FALSE(IsQcBitSet(fpsr1));
8164
8165 __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xfeed0003ffff0010ULL);
8166 __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xfeed0004ffff0002ULL);
8167 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8168 ASSERT_EQ(res2, MakeUInt128(0x7fff000000020000ULL, 0x0002000000000000ULL));
8169 ASSERT_TRUE(IsQcBitSet(fpsr2));
8170 }
8171
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x4)8172 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x4) {
8173 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4h, %2.4h, %3.4h");
8174
8175 __uint128_t arg1 = MakeUInt128(0x555500017fff1111ULL, 0xdeadc0dedeadc0deULL);
8176 __uint128_t arg2 = MakeUInt128(0x0004000840000000ULL, 0xdeadc0dedeadc0deULL);
8177 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8178 ASSERT_EQ(res1, MakeUInt128(0x000200003fff0000ULL, 0x0000000000000000ULL));
8179 ASSERT_FALSE(IsQcBitSet(fpsr1));
8180
8181 __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xdeadc0dedeadc0deULL);
8182 __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xdeadc0dedeadc0deULL);
8183 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8184 ASSERT_EQ(res2, MakeUInt128(0x7fff000000020000ULL, 0x0000000000000000ULL));
8185 ASSERT_TRUE(IsQcBitSet(fpsr2));
8186 }
8187
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x4IndexedElem)8188 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x4IndexedElem) {
8189 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4s, %2.4s, %3.s[0]");
8190
8191 __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8192 __uint128_t arg2 = MakeU32x4(0x00000008UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8193 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8194 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7UL, 0x0UL));
8195 ASSERT_FALSE(IsQcBitSet(fpsr1));
8196
8197 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8198 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8199 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8200 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0112fffdUL, 0xfffffff0UL));
8201 ASSERT_TRUE(IsQcBitSet(fpsr2));
8202 }
8203
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x2IndexedElem)8204 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x2IndexedElem) {
8205 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.2s, %2.2s, %3.s[0]");
8206
8207 __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8208 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8209 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8210 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8211 ASSERT_FALSE(IsQcBitSet(fpsr1));
8212
8213 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8214 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8215 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8216 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0UL, 0x0UL));
8217 ASSERT_TRUE(IsQcBitSet(fpsr2));
8218 }
8219
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x8IndexedElem)8220 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x8IndexedElem) {
8221 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.8h, %2.8h, %3.h[7]");
8222
8223 __uint128_t arg1 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8224 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0008feedfeedfeedULL);
8225 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8226 ASSERT_EQ(res1, MakeUInt128(0x0007fff800040000ULL, 0xffff000700020004ULL));
8227 ASSERT_FALSE(IsQcBitSet(fpsr1));
8228
8229 __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8230 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8231 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8232 ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x02008800e000bfffULL));
8233 ASSERT_TRUE(IsQcBitSet(fpsr2));
8234 }
8235
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x4IndexedElem)8236 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x4IndexedElem) {
8237 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4h, %2.4h, %3.h[7]");
8238
8239 __uint128_t arg1 = MakeUInt128(0x7fff800055550000ULL, 0xdeadc0dedeadc0deULL);
8240 __uint128_t arg2 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x0004c0dedeadc0deULL);
8241 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8242 ASSERT_EQ(res1, MakeUInt128(0x0003fffc00020000ULL, 0x0000000000000000ULL));
8243 ASSERT_FALSE(IsQcBitSet(fpsr1));
8244
8245 __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xdeadc0dedeadc0deULL);
8246 __uint128_t arg4 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x8000c0dedeadc0deULL);
8247 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8248 ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x0000000000000000ULL));
8249 ASSERT_TRUE(IsQcBitSet(fpsr2));
8250 }
8251
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x1)8252 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x1) {
8253 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %s0, %s2, %s3");
8254
8255 __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8256 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8257 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8258 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0ULL));
8259 ASSERT_FALSE(IsQcBitSet(fpsr1));
8260
8261 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8262 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8263 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8264 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8265 ASSERT_TRUE(IsQcBitSet(fpsr2));
8266 }
8267
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x1)8268 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x1) {
8269 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %h0, %h2, %h3");
8270
8271 __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8272 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeed0004ULL, 0xfeedfeedfeedfeedULL);
8273 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8274 ASSERT_EQ(res1, MakeUInt128(0x0000000000000002ULL, 0x0ULL));
8275 ASSERT_FALSE(IsQcBitSet(fpsr1));
8276
8277 __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8278 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8279 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8280 ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8281 ASSERT_TRUE(IsQcBitSet(fpsr2));
8282 }
8283
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x1IndexedElem)8284 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x1IndexedElem) {
8285 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %s0, %s2, %3.s[2]");
8286
8287 __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8288 __uint128_t arg2 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x00000004UL, 0xfeedfeedUL);
8289 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8290 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8291 ASSERT_FALSE(IsQcBitSet(fpsr1));
8292
8293 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8294 __uint128_t arg4 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x80000000UL, 0xfeedfeedUL);
8295 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8296 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8297 ASSERT_TRUE(IsQcBitSet(fpsr2));
8298 }
8299
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x1IndexedElem)8300 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x1IndexedElem) {
8301 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %h0, %h2, %3.h[7]");
8302
8303 __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8304 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0004feedfeedfeedULL);
8305 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8306 ASSERT_EQ(res1, MakeUInt128(0x0000000000000002ULL, 0x0ULL));
8307 ASSERT_FALSE(IsQcBitSet(fpsr1));
8308
8309 __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8310 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8311 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8312 ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8313 ASSERT_TRUE(IsQcBitSet(fpsr2));
8314 }
8315
8316 class FpcrBitSupport : public testing::TestWithParam<uint64_t> {};
8317
TEST_P(FpcrBitSupport,SupportsBit)8318 TEST_P(FpcrBitSupport, SupportsBit) {
8319 uint64_t fpcr1;
8320 asm("msr fpcr, %x1\n\t"
8321 "mrs %x0, fpcr"
8322 : "=r"(fpcr1)
8323 : "r"(static_cast<uint64_t>(GetParam())));
8324 ASSERT_EQ(fpcr1, GetParam()) << "Should be able to set then get FPCR bit: " << GetParam();
8325 };
8326
8327 // Note: The exception enablement flags (such as IOE) are not checked, because when tested on actual
8328 // ARM64 device we find that the tests fail either because they cannot be written or are RAZ (read
8329 // as zero).
8330 INSTANTIATE_TEST_SUITE_P(Arm64InsnTest,
8331 FpcrBitSupport,
8332 testing::Values(kFpcrRModeTieEven,
8333 kFpcrRModeZero,
8334 kFpcrRModeNegInf,
8335 kFpcrRModePosInf,
8336 kFpcrFzBit,
8337 kFpcrDnBit,
8338 0));
8339
8340 class FpsrBitSupport : public testing::TestWithParam<uint64_t> {};
8341
TEST_P(FpsrBitSupport,SupportsBit)8342 TEST_P(FpsrBitSupport, SupportsBit) {
8343 uint64_t fpsr1;
8344 asm("msr fpsr, %1\n\t"
8345 "mrs %0, fpsr"
8346 : "=r"(fpsr1)
8347 : "r"(static_cast<uint64_t>(GetParam())));
8348 ASSERT_EQ(fpsr1, GetParam()) << "Should be able to set then get FPSR bit";
8349 };
8350
8351 INSTANTIATE_TEST_SUITE_P(Arm64InsnTest,
8352 FpsrBitSupport,
8353 testing::Values(kFpsrIocBit,
8354 kFpsrDzcBit,
8355 kFpsrOfcBit,
8356 kFpsrUfcBit,
8357 kFpsrIxcBit,
8358 kFpsrIdcBit,
8359 kFpsrQcBit));
8360
TEST(Arm64InsnTest,UnsignedDivide64)8361 TEST(Arm64InsnTest, UnsignedDivide64) {
8362 auto udiv64 = [](uint64_t num, uint64_t den) {
8363 uint64_t result;
8364 asm("udiv %0, %1, %2" : "=r"(result) : "r"(num), "r"(den));
8365 return result;
8366 };
8367 ASSERT_EQ(udiv64(0x8'0000'0000ULL, 2ULL), 0x4'0000'0000ULL) << "Division is 64-bit.";
8368 ASSERT_EQ(udiv64(123ULL, 0ULL), 0ULL) << "Div by 0 results in 0.";
8369 }
8370
TEST(Arm64InsnTest,AesEncode)8371 TEST(Arm64InsnTest, AesEncode) {
8372 __uint128_t arg = MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL);
8373 __uint128_t key = MakeUInt128(0xaaaa'bbbb'cccc'ddddULL, 0xeeee'ffff'0000'9999ULL);
8374 __uint128_t res;
8375 asm("aese %0.16b, %2.16b" : "=w"(res) : "0"(arg), "w"(key));
8376 ASSERT_EQ(res, MakeUInt128(0x16ea'82ee'eaf5'eeeeULL, 0xf5ea'eeee'ea16'ee82ULL));
8377 }
8378
TEST(Arm64InsnTest,AesMixColumns)8379 TEST(Arm64InsnTest, AesMixColumns) {
8380 __uint128_t arg = MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL);
8381 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("aesmc %0.16b, %1.16b")(arg);
8382 ASSERT_EQ(res, MakeUInt128(0x77114422dd33aa44ULL, 0x3355006692776d88ULL));
8383 }
8384
TEST(Arm64InsnTest,AesDecode)8385 TEST(Arm64InsnTest, AesDecode) {
8386 // Check that it's opposite to AesEncode with extra XORs.
8387 __uint128_t arg = MakeUInt128(0x16ea'82ee'eaf5'eeeeULL, 0xf5ea'eeee'ea16'ee82ULL);
8388 __uint128_t key = MakeUInt128(0xaaaa'bbbb'cccc'ddddULL, 0xeeee'ffff'0000'9999ULL);
8389 arg ^= key;
8390 __uint128_t res;
8391 asm("aesd %0.16b, %2.16b" : "=w"(res) : "0"(arg), "w"(key));
8392 ASSERT_EQ(res ^ key, MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL));
8393 }
8394
TEST(Arm64InsnTest,AesInverseMixColumns)8395 TEST(Arm64InsnTest, AesInverseMixColumns) {
8396 __uint128_t arg = MakeUInt128(0x77114422dd33aa44ULL, 0x3355006692776d88ULL);
8397 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("aesimc %0.16b, %1.16b")(arg);
8398 ASSERT_EQ(res, MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL));
8399 }
8400
8401 } // namespace
8402