1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "gtest/gtest.h"
18 
19 #include <cstdint>
20 #include <initializer_list>
21 #include <limits>
22 
23 #include "utility.h"
24 
25 namespace {
26 
TEST(Arm64InsnTest,UnsignedBitfieldMoveNoShift)27 TEST(Arm64InsnTest, UnsignedBitfieldMoveNoShift) {
28   uint64_t arg = 0x3952247371907021ULL;
29   uint64_t res;
30 
31   asm("ubfm %0, %1, #0, #63" : "=r"(res) : "r"(arg));
32 
33   ASSERT_EQ(res, 0x3952247371907021ULL);
34 }
35 
TEST(Arm64InsnTest,BitfieldLeftInsertion)36 TEST(Arm64InsnTest, BitfieldLeftInsertion) {
37   uint64_t arg = 0x389522868478abcdULL;
38   uint64_t res = 0x1101044682325271ULL;
39 
40   asm("bfm %0, %1, #40, #15" : "=r"(res) : "r"(arg), "0"(res));
41 
42   ASSERT_EQ(res, 0x110104abcd325271ULL);
43 }
44 
TEST(Arm64InsnTest,BitfieldRightInsertion)45 TEST(Arm64InsnTest, BitfieldRightInsertion) {
46   uint64_t arg = 0x3276561809377344ULL;
47   uint64_t res = 0x1668039626579787ULL;
48 
49   asm("bfm %0, %1, #4, #39" : "=r"(res) : "r"(arg), "0"(res));
50 
51   ASSERT_EQ(res, 0x1668039180937734ULL);
52 }
53 
TEST(Arm64InsnTest,MoveImmToFp32)54 TEST(Arm64InsnTest, MoveImmToFp32) {
55   // The tests below verify that fmov works with various immediates.
56   // Specifically, the instruction has an 8-bit immediate field consisting of
57   // the following four subfields:
58   //
59   // - sign (one bit)
60   // - upper exponent (one bit)
61   // - lower exponent (two bits)
62   // - mantisa (four bits)
63   //
64   // For example, we decompose imm8 = 0b01001111 into:
65   //
66   // - sign = 0 (positive)
67   // - upper exponent = 1
68   // - lower exponent = 00
69   // - mantisa = 1111
70   //
71   // This immediate corresponds to 32-bit floating point value:
72   //
73   // 0 011111 00 1111 0000000000000000000
74   // | |      |  |    |
75   // | |      |  |    +- 19 zeros
76   // | |      |  +------ mantisa
77   // | |      +--------- lower exponent
78   // | +---------------- upper exponent (custom extended to 6 bits)
79   // +------------------ sign
80   //
81   // Thus we have:
82   //
83   //   1.11110000... * 2^(124-127) = 0.2421875
84   //
85   // where 1.11110000... is in binary.
86   //
87   // See VFPExpandImm in the ARM Architecture Manual for details.
88   //
89   // We enumerate all possible 8-bit immediate encodings of the form:
90   //
91   //   {0,1}{0,1}{00,11}{0000,1111}
92   //
93   // to verify that the decoder correctly splits the immediate into the
94   // subfields and reconstructs the intended floating-point value.
95 
96   // imm8 = 0b00000000
97   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #2.0e+00")();
98   ASSERT_EQ(res1, MakeUInt128(0x40000000U, 0U));
99 
100   // imm8 = 0b00001111
101   __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #3.8750e+00")();
102   ASSERT_EQ(res2, MakeUInt128(0x40780000U, 0U));
103 
104   // imm8 = 0b00110000
105   __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.60e+01")();
106   ASSERT_EQ(res3, MakeUInt128(0x41800000U, 0U));
107 
108   // imm8 = 0b00111111
109   __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #3.10e+01")();
110   ASSERT_EQ(res4, MakeUInt128(0x41f80000U, 0U));
111 
112   // imm8 = 0b01000000
113   __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.250e-01")();
114   ASSERT_EQ(res5, MakeUInt128(0x3e000000U, 0U));
115 
116   // imm8 = 0b01001111
117   __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #2.4218750e-01")();
118   ASSERT_EQ(res6, MakeUInt128(0x3e780000U, 0U));
119 
120   // imm8 = 0b01110000
121   __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.0e+00")();
122   ASSERT_EQ(res7, MakeUInt128(0x3f800000U, 0U));
123 
124   // imm8 = 0b01111111
125   __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.93750e+00")();
126   ASSERT_EQ(res8, MakeUInt128(0x3ff80000U, 0U));
127 
128   // imm8 = 0b10000000
129   __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-2.0e+00")();
130   ASSERT_EQ(res9, MakeUInt128(0xc0000000U, 0U));
131 
132   // imm8 = 0b10001111
133   __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-3.8750e+00")();
134   ASSERT_EQ(res10, MakeUInt128(0xc0780000U, 0U));
135 
136   // imm8 = 0b10110000
137   __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.60e+01")();
138   ASSERT_EQ(res11, MakeUInt128(0xc1800000U, 0U));
139 
140   // imm8 = 0b10111111
141   __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-3.10e+01")();
142   ASSERT_EQ(res12, MakeUInt128(0xc1f80000U, 0U));
143 
144   // imm8 = 0b11000000
145   __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.250e-01")();
146   ASSERT_EQ(res13, MakeUInt128(0xbe000000U, 0U));
147 
148   // imm8 = 0b11001111
149   __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-2.4218750e-01")();
150   ASSERT_EQ(res14, MakeUInt128(0xbe780000U, 0U));
151 
152   // imm8 = 0b11110000
153   __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.0e+00")();
154   ASSERT_EQ(res15, MakeUInt128(0xbf800000U, 0U));
155 
156   // imm8 = 0b11111111
157   __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.93750e+00")();
158   ASSERT_EQ(res16, MakeUInt128(0xbff80000U, 0U));
159 }
160 
TEST(Arm64InsnTest,MoveImmToFp64)161 TEST(Arm64InsnTest, MoveImmToFp64) {
162   // The tests below verify that fmov works with various immediates.
163   // Specifically, the instruction has an 8-bit immediate field consisting of
164   // the following four subfields:
165   //
166   // - sign (one bit)
167   // - upper exponent (one bit)
168   // - lower exponent (two bits)
169   // - mantisa (four bits)
170   //
171   // For example, we decompose imm8 = 0b01001111 into:
172   //
173   // - sign = 0 (positive)
174   // - upper exponent = 1
175   // - lower exponent = 00
176   // - mantisa = 1111
177   //
178   // This immediate corresponds to 64-bit floating point value:
179   //
180   // 0 011111111 00 1111 000000000000000000000000000000000000000000000000
181   // | |         |  |    |
182   // | |         |  |    +- 48 zeros
183   // | |         |  +------ mantisa
184   // | |         +--------- lower exponent
185   // | +------------------- upper exponent (custom extended to 9 bits)
186   // +--------------------- sign
187   //
188   // Thus we have:
189   //
190   //   1.11110000... * 2^(1020-1023) = 0.2421875
191   //
192   // where 1.11110000... is in binary.
193   //
194   // See VFPExpandImm in the ARM Architecture Manual for details.
195   //
196   // We enumerate all possible 8-bit immediate encodings of the form:
197   //
198   //   {0,1}{0,1}{00,11}{0000,1111}
199   //
200   // to verify that the decoder correctly splits the immediate into the
201   // subfields and reconstructs the intended floating-point value.
202 
203   // imm8 = 0b00000000
204   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #2.0e+00")();
205   ASSERT_EQ(res1, MakeUInt128(0x4000000000000000ULL, 0U));
206 
207   // imm8 = 0b00001111
208   __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #3.8750e+00")();
209   ASSERT_EQ(res2, MakeUInt128(0x400f000000000000ULL, 0U));
210 
211   // imm8 = 0b00110000
212   __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.60e+01")();
213   ASSERT_EQ(res3, MakeUInt128(0x4030000000000000ULL, 0U));
214 
215   // imm8 = 0b00111111
216   __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #3.10e+01")();
217   ASSERT_EQ(res4, MakeUInt128(0x403f000000000000ULL, 0U));
218 
219   // imm8 = 0b01000000
220   __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.250e-01")();
221   ASSERT_EQ(res5, MakeUInt128(0x3fc0000000000000ULL, 0U));
222 
223   // imm8 = 0b01001111
224   __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #2.4218750e-01")();
225   ASSERT_EQ(res6, MakeUInt128(0x3fcf000000000000ULL, 0U));
226 
227   // imm8 = 0b01110000
228   __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.0e+00")();
229   ASSERT_EQ(res7, MakeUInt128(0x3ff0000000000000ULL, 0U));
230 
231   // imm8 = 0b01111111
232   __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.93750e+00")();
233   ASSERT_EQ(res8, MakeUInt128(0x3fff000000000000ULL, 0U));
234 
235   // imm8 = 0b10000000
236   __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-2.0e+00")();
237   ASSERT_EQ(res9, MakeUInt128(0xc000000000000000ULL, 0U));
238 
239   // imm8 = 0b10001111
240   __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-3.8750e+00")();
241   ASSERT_EQ(res10, MakeUInt128(0xc00f000000000000ULL, 0U));
242 
243   // imm8 = 0b10110000
244   __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.60e+01")();
245   ASSERT_EQ(res11, MakeUInt128(0xc030000000000000ULL, 0U));
246 
247   // imm8 = 0b10111111
248   __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-3.10e+01")();
249   ASSERT_EQ(res12, MakeUInt128(0xc03f000000000000ULL, 0U));
250 
251   // imm8 = 0b11000000
252   __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.250e-01")();
253   ASSERT_EQ(res13, MakeUInt128(0xbfc0000000000000ULL, 0U));
254 
255   // imm8 = 0b11001111
256   __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-2.4218750e-01")();
257   ASSERT_EQ(res14, MakeUInt128(0xbfcf000000000000ULL, 0U));
258 
259   // imm8 = 0b11110000
260   __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.0e+00")();
261   ASSERT_EQ(res15, MakeUInt128(0xbff0000000000000ULL, 0U));
262 
263   // imm8 = 0b11111111
264   __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.93750e+00")();
265   ASSERT_EQ(res16, MakeUInt128(0xbfff000000000000ULL, 0U));
266 }
267 
TEST(Arm64InsnTest,MoveImmToF32x4)268 TEST(Arm64InsnTest, MoveImmToF32x4) {
269   // The tests below verify that fmov works with various immediates.
270   // Specifically, the instruction has an 8-bit immediate field consisting of
271   // the following four subfields:
272   //
273   // - sign (one bit)
274   // - upper exponent (one bit)
275   // - lower exponent (two bits)
276   // - mantisa (four bits)
277   //
278   // We enumerate all possible 8-bit immediate encodings of the form:
279   //
280   //   {0,1}{0,1}{00,11}{0000,1111}
281   //
282   // to verify that the decoder correctly splits the immediate into the
283   // subfields and reconstructs the intended floating-point value.
284 
285   // imm8 = 0b00000000
286   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #2.0e+00")();
287   ASSERT_EQ(res1, MakeUInt128(0x4000000040000000ULL, 0x4000000040000000ULL));
288 
289   // imm8 = 0b00001111
290   __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #3.8750e+00")();
291   ASSERT_EQ(res2, MakeUInt128(0x4078000040780000ULL, 0x4078000040780000ULL));
292 
293   // imm8 = 0b00110000
294   __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.60e+01")();
295   ASSERT_EQ(res3, MakeUInt128(0x4180000041800000ULL, 0x4180000041800000ULL));
296 
297   // imm8 = 0b00111111
298   __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #3.10e+01")();
299   ASSERT_EQ(res4, MakeUInt128(0x41f8000041f80000ULL, 0x41f8000041f80000ULL));
300 
301   // imm8 = 0b01000000
302   __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.250e-01")();
303   ASSERT_EQ(res5, MakeUInt128(0x3e0000003e000000ULL, 0x3e0000003e000000ULL));
304 
305   // imm8 = 0b01001111
306   __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #2.4218750e-01")();
307   ASSERT_EQ(res6, MakeUInt128(0x3e7800003e780000ULL, 0x3e7800003e780000ULL));
308 
309   // imm8 = 0b01110000
310   __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.0e+00")();
311   ASSERT_EQ(res7, MakeUInt128(0x3f8000003f800000ULL, 0x3f8000003f800000ULL));
312 
313   // imm8 = 0b01111111
314   __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.93750e+00")();
315   ASSERT_EQ(res8, MakeUInt128(0x3ff800003ff80000ULL, 0x3ff800003ff80000ULL));
316 
317   // imm8 = 0b10000000
318   __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-2.0e+00")();
319   ASSERT_EQ(res9, MakeUInt128(0xc0000000c0000000ULL, 0xc0000000c0000000ULL));
320 
321   // imm8 = 0b10001111
322   __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-3.8750e+00")();
323   ASSERT_EQ(res10, MakeUInt128(0xc0780000c0780000ULL, 0xc0780000c0780000ULL));
324 
325   // imm8 = 0b10110000
326   __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.60e+01")();
327   ASSERT_EQ(res11, MakeUInt128(0xc1800000c1800000ULL, 0xc1800000c1800000ULL));
328 
329   // imm8 = 0b10111111
330   __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-3.10e+01")();
331   ASSERT_EQ(res12, MakeUInt128(0xc1f80000c1f80000ULL, 0xc1f80000c1f80000ULL));
332 
333   // imm8 = 0b11000000
334   __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.250e-01")();
335   ASSERT_EQ(res13, MakeUInt128(0xbe000000be000000ULL, 0xbe000000be000000ULL));
336 
337   // imm8 = 0b11001111
338   __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-2.4218750e-01")();
339   ASSERT_EQ(res14, MakeUInt128(0xbe780000be780000ULL, 0xbe780000be780000ULL));
340 
341   // imm8 = 0b11110000
342   __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.0e+00")();
343   ASSERT_EQ(res15, MakeUInt128(0xbf800000bf800000ULL, 0xbf800000bf800000ULL));
344 
345   // imm8 = 0b11111111
346   __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.93750e+00")();
347   ASSERT_EQ(res16, MakeUInt128(0xbff80000bff80000ULL, 0xbff80000bff80000ULL));
348 }
349 
TEST(Arm64InsnTest,MoveImmToF64x2)350 TEST(Arm64InsnTest, MoveImmToF64x2) {
351   // The tests below verify that fmov works with various immediates.
352   // Specifically, the instruction has an 8-bit immediate field consisting of
353   // the following four subfields:
354   //
355   // - sign (one bit)
356   // - upper exponent (one bit)
357   // - lower exponent (two bits)
358   // - mantisa (four bits)
359   //
360   // We enumerate all possible 8-bit immediate encodings of the form:
361   //
362   //   {0,1}{0,1}{00,11}{0000,1111}
363   //
364   // to verify that the decoder correctly splits the immediate into the
365   // subfields and reconstructs the intended floating-point value.
366 
367   // imm8 = 0b00000000
368   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #2.0e+00")();
369   ASSERT_EQ(res1, MakeUInt128(0x4000000000000000ULL, 0x4000000000000000ULL));
370 
371   // imm8 = 0b00001111
372   __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #3.8750e+00")();
373   ASSERT_EQ(res2, MakeUInt128(0x400f000000000000ULL, 0x400f000000000000ULL));
374 
375   // imm8 = 0b00110000
376   __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.60e+01")();
377   ASSERT_EQ(res3, MakeUInt128(0x4030000000000000ULL, 0x4030000000000000ULL));
378 
379   // imm8 = 0b00111111
380   __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #3.10e+01")();
381   ASSERT_EQ(res4, MakeUInt128(0x403f000000000000ULL, 0x403f000000000000ULL));
382 
383   // imm8 = 0b01000000
384   __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.250e-01")();
385   ASSERT_EQ(res5, MakeUInt128(0x3fc0000000000000ULL, 0x3fc0000000000000ULL));
386 
387   // imm8 = 0b01001111
388   __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #2.4218750e-01")();
389   ASSERT_EQ(res6, MakeUInt128(0x3fcf000000000000ULL, 0x3fcf000000000000ULL));
390 
391   // imm8 = 0b01110000
392   __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.0e+00")();
393   ASSERT_EQ(res7, MakeUInt128(0x3ff0000000000000ULL, 0x3ff0000000000000ULL));
394 
395   // imm8 = 0b01111111
396   __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.93750e+00")();
397   ASSERT_EQ(res8, MakeUInt128(0x3fff000000000000ULL, 0x3fff000000000000ULL));
398 
399   // imm8 = 0b10000000
400   __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-2.0e+00")();
401   ASSERT_EQ(res9, MakeUInt128(0xc000000000000000ULL, 0xc000000000000000ULL));
402 
403   // imm8 = 0b10001111
404   __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-3.8750e+00")();
405   ASSERT_EQ(res10, MakeUInt128(0xc00f000000000000ULL, 0xc00f000000000000ULL));
406 
407   // imm8 = 0b10110000
408   __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.60e+01")();
409   ASSERT_EQ(res11, MakeUInt128(0xc030000000000000ULL, 0xc030000000000000ULL));
410 
411   // imm8 = 0b10111111
412   __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-3.10e+01")();
413   ASSERT_EQ(res12, MakeUInt128(0xc03f000000000000ULL, 0xc03f000000000000ULL));
414 
415   // imm8 = 0b11000000
416   __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.250e-01")();
417   ASSERT_EQ(res13, MakeUInt128(0xbfc0000000000000ULL, 0xbfc0000000000000ULL));
418 
419   // imm8 = 0b11001111
420   __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-2.4218750e-01")();
421   ASSERT_EQ(res14, MakeUInt128(0xbfcf000000000000ULL, 0xbfcf000000000000ULL));
422 
423   // imm8 = 0b11110000
424   __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.0e+00")();
425   ASSERT_EQ(res15, MakeUInt128(0xbff0000000000000ULL, 0xbff0000000000000ULL));
426 
427   // imm8 = 0b11111111
428   __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.93750e+00")();
429   ASSERT_EQ(res16, MakeUInt128(0xbfff000000000000ULL, 0xbfff000000000000ULL));
430 }
431 
TEST(Arm64InsnTest,MoveFpRegToReg)432 TEST(Arm64InsnTest, MoveFpRegToReg) {
433   __uint128_t arg = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
434   uint64_t res = 0xffffeeeeddddccccULL;
435 
436   // Move from high double.
437   asm("fmov %0, %1.d[1]" : "=r"(res) : "w"(arg));
438   ASSERT_EQ(res, 0x3333cccc4444ddddULL);
439 
440   // Move from low double.
441   asm("fmov %0, %d1" : "=r"(res) : "w"(arg));
442   ASSERT_EQ(res, 0x1111aaaa2222bbbbULL);
443 
444   // Move from single.
445   asm("fmov %w0, %s1" : "=r"(res) : "w"(arg));
446   ASSERT_EQ(res, 0x2222bbbbULL);
447 }
448 
TEST(Arm64InsnTest,MoveRegToFpReg)449 TEST(Arm64InsnTest, MoveRegToFpReg) {
450   uint64_t arg = 0xffffeeeeddddccccULL;
451   __uint128_t res = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
452 
453   // Move to high double.
454   asm("fmov %0.d[1], %1" : "=w"(res) : "r"(arg), "0"(res));
455   ASSERT_EQ(res, MakeUInt128(0x1111aaaa2222bbbbULL, 0xffffeeeeddddccccULL));
456 
457   // Move to low double.
458   asm("fmov %d0, %1" : "=w"(res) : "r"(arg));
459   ASSERT_EQ(res, MakeUInt128(0xffffeeeeddddccccULL, 0x0));
460 
461   // Move to single.
462   asm("fmov %s0, %w1" : "=w"(res) : "r"(arg));
463   ASSERT_EQ(res, MakeUInt128(0xddddccccULL, 0x0));
464 }
465 
TEST(Arm64InsnTest,MoveFpRegToFpReg)466 TEST(Arm64InsnTest, MoveFpRegToFpReg) {
467   __uint128_t res;
468 
469   __uint128_t fp64_arg =
470       MakeUInt128(0x402e9eb851eb851fULL, 0xdeadbeefaabbccddULL);  // 15.31 in double
471   asm("fmov %d0, %d1" : "=w"(res) : "w"(fp64_arg));
472   ASSERT_EQ(res, MakeUInt128(0x402e9eb851eb851fULL, 0ULL));
473 
474   __uint128_t fp32_arg =
475       MakeUInt128(0xaabbccdd40e51eb8ULL, 0x0011223344556677ULL);  // 7.16 in float
476   asm("fmov %s0, %s1" : "=w"(res) : "w"(fp32_arg));
477   ASSERT_EQ(res, MakeUInt128(0x40e51eb8ULL, 0ULL));
478 }
479 
TEST(Arm64InsnTest,InsertRegPartIntoSimd128)480 TEST(Arm64InsnTest, InsertRegPartIntoSimd128) {
481   uint64_t arg = 0xffffeeeeddddccccULL;
482   __uint128_t res = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
483 
484   // Byte.
485   asm("mov %0.b[3], %w1" : "=w"(res) : "r"(arg), "0"(res));
486   ASSERT_EQ(res, MakeUInt128(0x1111aaaacc22bbbbULL, 0x3333cccc4444ddddULL));
487 
488   // Double word.
489   asm("mov %0.d[1], %1" : "=w"(res) : "r"(arg), "0"(res));
490   ASSERT_EQ(res, MakeUInt128(0x1111aaaacc22bbbbULL, 0xffffeeeeddddccccULL));
491 }
492 
TEST(Arm64InsnTest,DuplicateRegIntoSimd128)493 TEST(Arm64InsnTest, DuplicateRegIntoSimd128) {
494   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("dup %0.16b, %w1")(0xabU);
495   ASSERT_EQ(res, MakeUInt128(0xababababababababULL, 0xababababababababULL));
496 }
497 
TEST(Arm64InsnTest,MoveSimd128ElemToRegSigned)498 TEST(Arm64InsnTest, MoveSimd128ElemToRegSigned) {
499   uint64_t res = 0;
500   __uint128_t arg = MakeUInt128(0x9796959493929190ULL, 0x9f9e9d9c9b9a99ULL);
501 
502   // Single word.
503   asm("smov %0, %1.s[0]" : "=r"(res) : "w"(arg));
504   ASSERT_EQ(res, 0xffffffff93929190ULL);
505 
506   asm("smov %0, %1.s[2]" : "=r"(res) : "w"(arg));
507   ASSERT_EQ(res, 0xffffffff9c9b9a99ULL);
508 
509   // Half word.
510   asm("smov %w0, %1.h[0]" : "=r"(res) : "w"(arg));
511   ASSERT_EQ(res, 0x00000000ffff9190ULL);
512 
513   asm("smov %w0, %1.h[2]" : "=r"(res) : "w"(arg));
514   ASSERT_EQ(res, 0x00000000ffff9594ULL);
515 
516   // Byte.
517   asm("smov %w0, %1.b[0]" : "=r"(res) : "w"(arg));
518   ASSERT_EQ(res, 0x00000000ffffff90ULL);
519 
520   asm("smov %w0, %1.b[2]" : "=r"(res) : "w"(arg));
521   ASSERT_EQ(res, 0x00000000ffffff92ULL);
522 }
523 
TEST(Arm64InsnTest,MoveSimd128ElemToRegUnsigned)524 TEST(Arm64InsnTest, MoveSimd128ElemToRegUnsigned) {
525   uint64_t res = 0;
526   __uint128_t arg = MakeUInt128(0xaaaabbbbcccceeeeULL, 0xffff000011112222ULL);
527 
528   // Double word.
529   asm("umov %0, %1.d[0]" : "=r"(res) : "w"(arg));
530   ASSERT_EQ(res, 0xaaaabbbbcccceeeeULL);
531 
532   asm("umov %0, %1.d[1]" : "=r"(res) : "w"(arg));
533   ASSERT_EQ(res, 0xffff000011112222ULL);
534 
535   // Single word.
536   asm("umov %w0, %1.s[0]" : "=r"(res) : "w"(arg));
537   ASSERT_EQ(res, 0xcccceeeeULL);
538 
539   asm("umov %w0, %1.s[2]" : "=r"(res) : "w"(arg));
540   ASSERT_EQ(res, 0x11112222ULL);
541 
542   // Half word.
543   asm("umov %w0, %1.h[0]" : "=r"(res) : "w"(arg));
544   ASSERT_EQ(res, 0xeeeeULL);
545 
546   asm("umov %w0, %1.h[2]" : "=r"(res) : "w"(arg));
547   ASSERT_EQ(res, 0xbbbbULL);
548 
549   // Byte.
550   asm("umov %w0, %1.b[0]" : "=r"(res) : "w"(arg));
551   ASSERT_EQ(res, 0xeeULL);
552 
553   asm("umov %w0, %1.b[2]" : "=r"(res) : "w"(arg));
554   ASSERT_EQ(res, 0xccULL);
555 }
556 
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4)557 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4) {
558   __uint128_t arg1 = MakeUInt128(0x9463229563989898ULL, 0x9358211674562701ULL);
559   __uint128_t arg2 = MakeUInt128(0x0218356462201349ULL, 0x6715188190973038ULL);
560   __uint128_t arg3 = MakeUInt128(0x1198004973407239ULL, 0x6103685406643193ULL);
561   __uint128_t res =
562       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
563   ASSERT_EQ(res, MakeUInt128(0x37c4a3494b9db539ULL, 0x37c3dab413a58e33ULL));
564 }
565 
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4Upper)566 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4Upper) {
567   __uint128_t arg1 = MakeUInt128(0x9478221818528624ULL, 0x0851400666044332ULL);
568   __uint128_t arg2 = MakeUInt128(0x5888569867054315ULL, 0x4706965747458550ULL);
569   __uint128_t arg3 = MakeUInt128(0x3323233421073015ULL, 0x4594051655379068ULL);
570   __uint128_t res =
571       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
572   ASSERT_EQ(res, MakeUInt128(0x5c30bd483c119e0fULL, 0x48ecc5ab6efb3a86ULL));
573 }
574 
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4Upper2)575 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4Upper2) {
576   __uint128_t arg1 = MakeUInt128(0x9968262824727064ULL, 0x1336222178923903ULL);
577   __uint128_t arg2 = MakeUInt128(0x1760854289437339ULL, 0x3561889165125042ULL);
578   __uint128_t arg3 = MakeUInt128(0x4404008952719837ULL, 0x8738648058472689ULL);
579   __uint128_t res =
580       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.4s, %1.8h, %2.h[7]")(arg1, arg2, arg3);
581   ASSERT_EQ(res, MakeUInt128(0x5d27e9db5e54d15aULL, 0x8b39d9f65f64ea0aULL));
582 }
583 
TEST(Arm64InsnTest,SignedMultiplySubtractLongElemI16x4)584 TEST(Arm64InsnTest, SignedMultiplySubtractLongElemI16x4) {
585   __uint128_t arg1 = MakeUInt128(0x9143447886360410ULL, 0x3182350736502778ULL);
586   __uint128_t arg2 = MakeUInt128(0x5908975782727313ULL, 0x0504889398900992ULL);
587   __uint128_t arg3 = MakeUInt128(0x3913503373250855ULL, 0x9826558670892426ULL);
588   __uint128_t res =
589       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
590   ASSERT_EQ(res, MakeUInt128(0xfd58202775231935ULL, 0x61d69fb0921db6b6ULL));
591 }
592 
TEST(Arm64InsnTest,SignedMultiplySubtractLongElemI16x4Upper)593 TEST(Arm64InsnTest, SignedMultiplySubtractLongElemI16x4Upper) {
594   __uint128_t arg1 = MakeUInt128(0x9320199199688285ULL, 0x1718395366913452ULL);
595   __uint128_t arg2 = MakeUInt128(0x2244470804592396ULL, 0x6028171565515656ULL);
596   __uint128_t arg3 = MakeUInt128(0x6611135982311225ULL, 0x0628905854914509ULL);
597   __uint128_t res =
598       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
599   ASSERT_EQ(res, MakeUInt128(0x645326f0814d99a3ULL, 0x05c4290053980b2eULL));
600 }
601 
TEST(Arm64InsnTest,UnsignedMultiplyAddLongElemI16x4)602 TEST(Arm64InsnTest, UnsignedMultiplyAddLongElemI16x4) {
603   __uint128_t arg1 = MakeUInt128(0x9027601834840306ULL, 0x8113818551059797ULL);
604   __uint128_t arg2 = MakeUInt128(0x0566400750942608ULL, 0x7885735796037324ULL);
605   __uint128_t arg3 = MakeUInt128(0x5141467867036880ULL, 0x9880609716425849ULL);
606   __uint128_t res =
607       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
608   ASSERT_EQ(res, MakeUInt128(0x61c8e2c867f707f8ULL, 0xc5dfe72334816629ULL));
609 }
610 
TEST(Arm64InsnTest,UnsignedMultiplyAddLongElemI16x4Upper)611 TEST(Arm64InsnTest, UnsignedMultiplyAddLongElemI16x4Upper) {
612   __uint128_t arg1 = MakeUInt128(0x9454236828860613ULL, 0x4084148637767009ULL);
613   __uint128_t arg2 = MakeUInt128(0x6120715124914043ULL, 0x0272538607648236ULL);
614   __uint128_t arg3 = MakeUInt128(0x3414334623518975ULL, 0x7664521641376796ULL);
615   __uint128_t res =
616       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
617   ASSERT_EQ(res, MakeUInt128(0x3c00351c3352428eULL, 0x7f9b6cda4425df7cULL));
618 }
619 
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongElemI16x4)620 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongElemI16x4) {
621   __uint128_t arg1 = MakeUInt128(0x9128009282525619ULL, 0x0205263016391147ULL);
622   __uint128_t arg2 = MakeUInt128(0x7247331485739107ULL, 0x7758744253876117ULL);
623   __uint128_t arg3 = MakeUInt128(0x4657867116941477ULL, 0x6421441111263583ULL);
624   __uint128_t res =
625       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
626   ASSERT_EQ(res, MakeUInt128(0x0268619be9b26a3cULL, 0x1876471910da19edULL));
627 }
628 
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongElemI16x4Upper)629 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongElemI16x4Upper) {
630   __uint128_t arg1 = MakeUInt128(0x9420757136275167ULL, 0x4573189189456283ULL);
631   __uint128_t arg2 = MakeUInt128(0x5257044133543758ULL, 0x5753426986994725ULL);
632   __uint128_t arg3 = MakeUInt128(0x4703165661399199ULL, 0x9682628247270641ULL);
633   __uint128_t res =
634       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
635   ASSERT_EQ(res, MakeUInt128(0x2b7d4cb24d79259dULL, 0x8895afc6423a13adULL));
636 }
637 
TEST(Arm64InsnTest,AsmConvertI32F32)638 TEST(Arm64InsnTest, AsmConvertI32F32) {
639   constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %w1");
640   ASSERT_EQ(AsmConvertI32F32(21), MakeUInt128(0x41a80000U, 0U));
641 }
642 
TEST(Arm64InsnTest,AsmConvertU32F32)643 TEST(Arm64InsnTest, AsmConvertU32F32) {
644   constexpr auto AsmConvertU32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %w1");
645 
646   ASSERT_EQ(AsmConvertU32F32(29), MakeUInt128(0x41e80000U, 0U));
647 
648   // Test that the topmost bit isn't treated as the sign.
649   ASSERT_EQ(AsmConvertU32F32(1U << 31), MakeUInt128(0x4f000000U, 0U));
650 }
651 
TEST(Arm64InsnTest,AsmConvertU32F32FromSimdReg)652 TEST(Arm64InsnTest, AsmConvertU32F32FromSimdReg) {
653   constexpr auto AsmUcvtf = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %s0, %s1");
654 
655   ASSERT_EQ(AsmUcvtf(28), MakeUInt128(0x41e00000U, 0U));
656 
657   // Test that the topmost bit isn't treated as the sign.
658   ASSERT_EQ(AsmUcvtf(1U << 31), MakeUInt128(0x4f000000U, 0U));
659 }
660 
TEST(Arm64InsnTest,AsmConvertI32F64)661 TEST(Arm64InsnTest, AsmConvertI32F64) {
662   constexpr auto AsmConvertI32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %w1");
663   ASSERT_EQ(AsmConvertI32F64(21), MakeUInt128(0x4035000000000000ULL, 0U));
664 }
665 
TEST(Arm64InsnTest,AsmConvertU32F64)666 TEST(Arm64InsnTest, AsmConvertU32F64) {
667   constexpr auto AsmConvertU32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %w1");
668 
669   ASSERT_EQ(AsmConvertU32F64(18), MakeUInt128(0x4032000000000000ULL, 0U));
670 
671   // Test that the topmost bit isn't treated as the sign.
672   ASSERT_EQ(AsmConvertU32F64(1U << 31), MakeUInt128(0x41e0000000000000ULL, 0U));
673 }
674 
TEST(Arm64InsnTest,AsmConvertI64F32)675 TEST(Arm64InsnTest, AsmConvertI64F32) {
676   constexpr auto AsmConvertI64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %x1");
677   ASSERT_EQ(AsmConvertI64F32(11), MakeUInt128(0x41300000U, 0U));
678 }
679 
TEST(Arm64InsnTest,AsmConvertU64F32)680 TEST(Arm64InsnTest, AsmConvertU64F32) {
681   constexpr auto AsmConvertU64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %x1");
682 
683   ASSERT_EQ(AsmConvertU64F32(3), MakeUInt128(0x40400000U, 0U));
684 
685   // Test that the topmost bit isn't treated as the sign.
686   ASSERT_EQ(AsmConvertU64F32(1ULL << 63), MakeUInt128(0x5f000000U, 0U));
687 }
688 
TEST(Arm64InsnTest,AsmConvertI64F64)689 TEST(Arm64InsnTest, AsmConvertI64F64) {
690   constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %x1");
691   ASSERT_EQ(AsmConvertI64F64(137), MakeUInt128(0x4061200000000000ULL, 0U));
692 }
693 
TEST(Arm64InsnTest,AsmConvertI32F32FromSimdReg)694 TEST(Arm64InsnTest, AsmConvertI32F32FromSimdReg) {
695   constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %s0, %s1");
696   ASSERT_EQ(AsmConvertI32F32(1109), MakeUInt128(0x448aa000ULL, 0U));
697 }
698 
TEST(Arm64InsnTest,AsmConvertI64F64FromSimdReg)699 TEST(Arm64InsnTest, AsmConvertI64F64FromSimdReg) {
700   constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %d0, %d1");
701   ASSERT_EQ(AsmConvertI64F64(123), MakeUInt128(0x405ec00000000000ULL, 0U));
702 }
703 
TEST(Arm64InsnTest,AsmConvertI32x4F32x4)704 TEST(Arm64InsnTest, AsmConvertI32x4F32x4) {
705   constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.4s, %1.4s");
706   __uint128_t arg = MakeUInt128(0x0000003500000014ULL, 0x0000005400000009ULL);
707   ASSERT_EQ(AsmConvertI32F32(arg), MakeUInt128(0x4254000041a00000ULL, 0x42a8000041100000ULL));
708 }
709 
TEST(Arm64InsnTest,AsmConvertI64x2F64x2)710 TEST(Arm64InsnTest, AsmConvertI64x2F64x2) {
711   constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.2d, %1.2d");
712   __uint128_t arg = MakeUInt128(static_cast<int64_t>(-9), 17U);
713   ASSERT_EQ(AsmConvertI64F64(arg), MakeUInt128(0xc022000000000000ULL, 0x4031000000000000ULL));
714 }
715 
TEST(Arm64InsnTest,AsmConvertU32x4F32x4)716 TEST(Arm64InsnTest, AsmConvertU32x4F32x4) {
717   constexpr auto AsmConvertU32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.4s, %1.4s");
718   __uint128_t arg = MakeUInt128(0x8000000000000019ULL, 0x0000005800000010ULL);
719   ASSERT_EQ(AsmConvertU32F32(arg), MakeUInt128(0x4f00000041c80000ULL, 0x42b0000041800000ULL));
720 }
721 
TEST(Arm64InsnTest,AsmConvertU64x2F64x2)722 TEST(Arm64InsnTest, AsmConvertU64x2F64x2) {
723   constexpr auto AsmConvertU64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d");
724   __uint128_t arg = MakeUInt128(1ULL << 63, 29U);
725   ASSERT_EQ(AsmConvertU64F64(arg), MakeUInt128(0x43e0000000000000ULL, 0x403d000000000000ULL));
726 }
727 
TEST(Arm64InsnTest,AsmConvertU64F64)728 TEST(Arm64InsnTest, AsmConvertU64F64) {
729   constexpr auto AsmConvertU64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %x1");
730 
731   ASSERT_EQ(AsmConvertU64F64(49), MakeUInt128(0x4048800000000000ULL, 0U));
732 
733   // Test that the topmost bit isn't treated as the sign.
734   ASSERT_EQ(AsmConvertU64F64(1ULL << 63), MakeUInt128(0x43e0000000000000ULL, 0U));
735 }
736 
TEST(Arm64InsnTest,AsmConvertU64F64FromSimdReg)737 TEST(Arm64InsnTest, AsmConvertU64F64FromSimdReg) {
738   constexpr auto AsmUcvtf = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1");
739 
740   ASSERT_EQ(AsmUcvtf(47), MakeUInt128(0x4047800000000000ULL, 0U));
741 
742   // Test that the topmost bit isn't treated as the sign.
743   ASSERT_EQ(AsmUcvtf(1ULL << 63), MakeUInt128(0x43e0000000000000ULL, 0U));
744 }
745 
TEST(Arm64InsnTest,AsmConvertLiterals)746 TEST(Arm64InsnTest, AsmConvertLiterals) {
747   // Verify that the compiler encodes the floating-point literals used in the
748   // conversion tests below exactly as expected.
749   ASSERT_EQ(bit_cast<uint32_t>(-7.50f), 0xc0f00000U);
750   ASSERT_EQ(bit_cast<uint32_t>(-6.75f), 0xc0d80000U);
751   ASSERT_EQ(bit_cast<uint32_t>(-6.50f), 0xc0d00000U);
752   ASSERT_EQ(bit_cast<uint32_t>(-6.25f), 0xc0c80000U);
753   ASSERT_EQ(bit_cast<uint32_t>(6.25f), 0x40c80000U);
754   ASSERT_EQ(bit_cast<uint32_t>(6.50f), 0x40d00000U);
755   ASSERT_EQ(bit_cast<uint32_t>(6.75f), 0x40d80000U);
756   ASSERT_EQ(bit_cast<uint32_t>(7.50f), 0x40f00000U);
757 
758   ASSERT_EQ(bit_cast<uint64_t>(-7.50), 0xc01e000000000000ULL);
759   ASSERT_EQ(bit_cast<uint64_t>(-6.75), 0xc01b000000000000ULL);
760   ASSERT_EQ(bit_cast<uint64_t>(-6.50), 0xc01a000000000000ULL);
761   ASSERT_EQ(bit_cast<uint64_t>(-6.25), 0xc019000000000000ULL);
762   ASSERT_EQ(bit_cast<uint64_t>(6.25), 0x4019000000000000ULL);
763   ASSERT_EQ(bit_cast<uint64_t>(6.50), 0x401a000000000000ULL);
764   ASSERT_EQ(bit_cast<uint64_t>(6.75), 0x401b000000000000ULL);
765   ASSERT_EQ(bit_cast<uint64_t>(7.50), 0x401e000000000000ULL);
766 }
767 
768 template <typename IntType, typename FuncType>
TestConvertF32ToInt(FuncType AsmFunc,std::initializer_list<int> expected)769 void TestConvertF32ToInt(FuncType AsmFunc, std::initializer_list<int> expected) {
770   // Note that bit_cast isn't a constexpr.
771   static const uint32_t kConvertF32ToIntInputs[] = {
772       bit_cast<uint32_t>(-7.50f),
773       bit_cast<uint32_t>(-6.75f),
774       bit_cast<uint32_t>(-6.50f),
775       bit_cast<uint32_t>(-6.25f),
776       bit_cast<uint32_t>(6.25f),
777       bit_cast<uint32_t>(6.50f),
778       bit_cast<uint32_t>(6.75f),
779       bit_cast<uint32_t>(7.50f),
780   };
781 
782   const size_t kConvertF32ToIntInputsSize = sizeof(kConvertF32ToIntInputs) / sizeof(uint32_t);
783   ASSERT_EQ(kConvertF32ToIntInputsSize, expected.size());
784 
785   auto expected_it = expected.begin();
786   for (size_t input_it = 0; input_it < kConvertF32ToIntInputsSize; input_it++) {
787     ASSERT_EQ(AsmFunc(kConvertF32ToIntInputs[input_it]), static_cast<IntType>(*expected_it++));
788   }
789 }
790 
TEST(Arm64InsnTest,AsmConvertF32I32TieAway)791 TEST(Arm64InsnTest, AsmConvertF32I32TieAway) {
792   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %w0, %s1");
793   TestConvertF32ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
794 }
795 
TEST(Arm64InsnTest,AsmConvertF32U32TieAway)796 TEST(Arm64InsnTest, AsmConvertF32U32TieAway) {
797   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %w0, %s1");
798   TestConvertF32ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
799 }
800 
TEST(Arm64InsnTest,AsmConvertF32I32NegInf)801 TEST(Arm64InsnTest, AsmConvertF32I32NegInf) {
802   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %w0, %s1");
803   TestConvertF32ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
804 }
805 
TEST(Arm64InsnTest,AsmConvertF32U32NegInf)806 TEST(Arm64InsnTest, AsmConvertF32U32NegInf) {
807   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %w0, %s1");
808   TestConvertF32ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
809 }
810 
TEST(Arm64InsnTest,AsmConvertF32I32TieEven)811 TEST(Arm64InsnTest, AsmConvertF32I32TieEven) {
812   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %w0, %s1");
813   TestConvertF32ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
814 }
815 
TEST(Arm64InsnTest,AsmConvertF32U32TieEven)816 TEST(Arm64InsnTest, AsmConvertF32U32TieEven) {
817   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %w0, %s1");
818   TestConvertF32ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
819 }
820 
TEST(Arm64InsnTest,AsmConvertF32I32PosInf)821 TEST(Arm64InsnTest, AsmConvertF32I32PosInf) {
822   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %w0, %s1");
823   TestConvertF32ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
824 }
825 
TEST(Arm64InsnTest,AsmConvertF32U32PosInf)826 TEST(Arm64InsnTest, AsmConvertF32U32PosInf) {
827   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %w0, %s1");
828   TestConvertF32ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
829 }
830 
TEST(Arm64InsnTest,AsmConvertF32I32Truncate)831 TEST(Arm64InsnTest, AsmConvertF32I32Truncate) {
832   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1");
833   TestConvertF32ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
834 }
835 
TEST(Arm64InsnTest,AsmConvertF32U32Truncate)836 TEST(Arm64InsnTest, AsmConvertF32U32Truncate) {
837   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %s1");
838   TestConvertF32ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
839 }
840 
TEST(Arm64InsnTest,AsmConvertF32I64TieAway)841 TEST(Arm64InsnTest, AsmConvertF32I64TieAway) {
842   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %x0, %s1");
843   TestConvertF32ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
844 }
845 
TEST(Arm64InsnTest,AsmConvertF32U64TieAway)846 TEST(Arm64InsnTest, AsmConvertF32U64TieAway) {
847   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %x0, %s1");
848   TestConvertF32ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
849 }
850 
TEST(Arm64InsnTest,AsmConvertF32I64NegInf)851 TEST(Arm64InsnTest, AsmConvertF32I64NegInf) {
852   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %x0, %s1");
853   TestConvertF32ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
854 }
855 
TEST(Arm64InsnTest,AsmConvertF32U64NegInf)856 TEST(Arm64InsnTest, AsmConvertF32U64NegInf) {
857   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %x0, %s1");
858   TestConvertF32ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
859 }
860 
TEST(Arm64InsnTest,AsmConvertF32I64TieEven)861 TEST(Arm64InsnTest, AsmConvertF32I64TieEven) {
862   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %x0, %s1");
863   TestConvertF32ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
864 }
865 
TEST(Arm64InsnTest,AsmConvertF32U64TieEven)866 TEST(Arm64InsnTest, AsmConvertF32U64TieEven) {
867   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %x0, %s1");
868   TestConvertF32ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
869 }
870 
TEST(Arm64InsnTest,AsmConvertF32I64PosInf)871 TEST(Arm64InsnTest, AsmConvertF32I64PosInf) {
872   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %x0, %s1");
873   TestConvertF32ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
874 }
875 
TEST(Arm64InsnTest,AsmConvertF32U64PosInf)876 TEST(Arm64InsnTest, AsmConvertF32U64PosInf) {
877   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %x0, %s1");
878   TestConvertF32ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
879 }
880 
TEST(Arm64InsnTest,AsmConvertF32I64Truncate)881 TEST(Arm64InsnTest, AsmConvertF32I64Truncate) {
882   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1");
883   TestConvertF32ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
884 }
885 
TEST(Arm64InsnTest,AsmConvertF32U64Truncate)886 TEST(Arm64InsnTest, AsmConvertF32U64Truncate) {
887   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %s1");
888   TestConvertF32ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
889 }
890 
891 template <typename IntType, typename FuncType>
TestConvertF64ToInt(FuncType AsmFunc,std::initializer_list<int> expected)892 void TestConvertF64ToInt(FuncType AsmFunc, std::initializer_list<int> expected) {
893   // Note that bit_cast isn't a constexpr.
894   static const uint64_t kConvertF64ToIntInputs[] = {
895       bit_cast<uint64_t>(-7.50),
896       bit_cast<uint64_t>(-6.75),
897       bit_cast<uint64_t>(-6.50),
898       bit_cast<uint64_t>(-6.25),
899       bit_cast<uint64_t>(6.25),
900       bit_cast<uint64_t>(6.50),
901       bit_cast<uint64_t>(6.75),
902       bit_cast<uint64_t>(7.50),
903   };
904 
905   const size_t kConvertF64ToIntInputsSize = sizeof(kConvertF64ToIntInputs) / sizeof(uint64_t);
906   ASSERT_EQ(kConvertF64ToIntInputsSize, expected.size());
907 
908   auto expected_it = expected.begin();
909   for (size_t input_it = 0; input_it < kConvertF64ToIntInputsSize; input_it++) {
910     ASSERT_EQ(AsmFunc(kConvertF64ToIntInputs[input_it]), static_cast<IntType>(*expected_it++));
911   }
912 }
913 
TEST(Arm64InsnTest,AsmConvertF64I32TieAway)914 TEST(Arm64InsnTest, AsmConvertF64I32TieAway) {
915   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %w0, %d1");
916   TestConvertF64ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
917 }
918 
TEST(Arm64InsnTest,AsmConvertF64U32TieAway)919 TEST(Arm64InsnTest, AsmConvertF64U32TieAway) {
920   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %w0, %d1");
921   TestConvertF64ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
922 }
923 
TEST(Arm64InsnTest,AsmConvertF64I32NegInf)924 TEST(Arm64InsnTest, AsmConvertF64I32NegInf) {
925   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %w0, %d1");
926   TestConvertF64ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
927 }
928 
TEST(Arm64InsnTest,AsmConvertF64U32NegInf)929 TEST(Arm64InsnTest, AsmConvertF64U32NegInf) {
930   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %w0, %d1");
931   TestConvertF64ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
932 }
933 
TEST(Arm64InsnTest,AsmConvertF64I32TieEven)934 TEST(Arm64InsnTest, AsmConvertF64I32TieEven) {
935   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %w0, %d1");
936   TestConvertF64ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
937 }
938 
TEST(Arm64InsnTest,AsmConvertF64U32TieEven)939 TEST(Arm64InsnTest, AsmConvertF64U32TieEven) {
940   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %w0, %d1");
941   TestConvertF64ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
942 }
943 
TEST(Arm64InsnTest,AsmConvertF64I32PosInf)944 TEST(Arm64InsnTest, AsmConvertF64I32PosInf) {
945   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %w0, %d1");
946   TestConvertF64ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
947 }
948 
TEST(Arm64InsnTest,AsmConvertF64U32PosInf)949 TEST(Arm64InsnTest, AsmConvertF64U32PosInf) {
950   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %w0, %d1");
951   TestConvertF64ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
952 }
953 
TEST(Arm64InsnTest,AsmConvertF64I32Truncate)954 TEST(Arm64InsnTest, AsmConvertF64I32Truncate) {
955   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %d1");
956   TestConvertF64ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
957 }
958 
TEST(Arm64InsnTest,AsmConvertF64U32Truncate)959 TEST(Arm64InsnTest, AsmConvertF64U32Truncate) {
960   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %d1");
961   TestConvertF64ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
962 }
963 
TEST(Arm64InsnTest,AsmConvertF64I64TieAway)964 TEST(Arm64InsnTest, AsmConvertF64I64TieAway) {
965   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %x0, %d1");
966   TestConvertF64ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
967 }
968 
TEST(Arm64InsnTest,AsmConvertF64U64TieAway)969 TEST(Arm64InsnTest, AsmConvertF64U64TieAway) {
970   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %x0, %d1");
971   TestConvertF64ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
972 }
973 
TEST(Arm64InsnTest,AsmConvertF64I64NegInf)974 TEST(Arm64InsnTest, AsmConvertF64I64NegInf) {
975   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %x0, %d1");
976   TestConvertF64ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
977 }
978 
TEST(Arm64InsnTest,AsmConvertF64U64NegInf)979 TEST(Arm64InsnTest, AsmConvertF64U64NegInf) {
980   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %x0, %d1");
981   TestConvertF64ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
982 }
983 
TEST(Arm64InsnTest,AsmConvertF64I64TieEven)984 TEST(Arm64InsnTest, AsmConvertF64I64TieEven) {
985   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %x0, %d1");
986   TestConvertF64ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
987 }
988 
TEST(Arm64InsnTest,AsmConvertF64U64TieEven)989 TEST(Arm64InsnTest, AsmConvertF64U64TieEven) {
990   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %x0, %d1");
991   TestConvertF64ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
992 }
993 
TEST(Arm64InsnTest,AsmConvertF64I64PosInf)994 TEST(Arm64InsnTest, AsmConvertF64I64PosInf) {
995   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %x0, %d1");
996   TestConvertF64ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
997 }
998 
TEST(Arm64InsnTest,AsmConvertF64U64PosInf)999 TEST(Arm64InsnTest, AsmConvertF64U64PosInf) {
1000   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %x0, %d1");
1001   TestConvertF64ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1002 }
1003 
TEST(Arm64InsnTest,AsmConvertF64I64Truncate)1004 TEST(Arm64InsnTest, AsmConvertF64I64Truncate) {
1005   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %d1");
1006   TestConvertF64ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1007 }
1008 
TEST(Arm64InsnTest,AsmConvertF64U64Truncate)1009 TEST(Arm64InsnTest, AsmConvertF64U64Truncate) {
1010   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1");
1011   TestConvertF64ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1012 }
1013 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTieAway)1014 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTieAway) {
1015   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %s0, %s1");
1016   TestConvertF32ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
1017 }
1018 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTieAway)1019 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTieAway) {
1020   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %s0, %s1");
1021   TestConvertF32ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
1022 }
1023 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarNegInf)1024 TEST(Arm64InsnTest, AsmConvertF32I32ScalarNegInf) {
1025   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %s0, %s1");
1026   TestConvertF32ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
1027 }
1028 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarNegInf)1029 TEST(Arm64InsnTest, AsmConvertF32U32ScalarNegInf) {
1030   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %s0, %s1");
1031   TestConvertF32ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1032 }
1033 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTieEven)1034 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTieEven) {
1035   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %s0, %s1");
1036   TestConvertF32ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
1037 }
1038 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTieEven)1039 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTieEven) {
1040   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %s0, %s1");
1041   TestConvertF32ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
1042 }
1043 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarPosInf)1044 TEST(Arm64InsnTest, AsmConvertF32I32ScalarPosInf) {
1045   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %s0, %s1");
1046   TestConvertF32ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
1047 }
1048 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarPosInf)1049 TEST(Arm64InsnTest, AsmConvertF32U32ScalarPosInf) {
1050   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %s0, %s1");
1051   TestConvertF32ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1052 }
1053 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTruncate)1054 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTruncate) {
1055   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %s0, %s1");
1056   TestConvertF32ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1057 }
1058 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTruncate)1059 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTruncate) {
1060   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %s0, %s1");
1061   TestConvertF32ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1062 }
1063 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTieAway)1064 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTieAway) {
1065   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %d0, %d1");
1066   TestConvertF64ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
1067 }
1068 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTieAway)1069 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTieAway) {
1070   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %d0, %d1");
1071   TestConvertF64ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
1072 }
1073 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarNegInf)1074 TEST(Arm64InsnTest, AsmConvertF64I64ScalarNegInf) {
1075   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %d0, %d1");
1076   TestConvertF64ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
1077 }
1078 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarNegInf)1079 TEST(Arm64InsnTest, AsmConvertF64U64ScalarNegInf) {
1080   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %d0, %d1");
1081   TestConvertF64ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1082 }
1083 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTieEven)1084 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTieEven) {
1085   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %d0, %d1");
1086   TestConvertF64ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
1087 }
1088 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTieEven)1089 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTieEven) {
1090   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %d0, %d1");
1091   TestConvertF64ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
1092 }
1093 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarPosInf)1094 TEST(Arm64InsnTest, AsmConvertF64I64ScalarPosInf) {
1095   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %d0, %d1");
1096   TestConvertF64ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
1097 }
1098 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarPosInf)1099 TEST(Arm64InsnTest, AsmConvertF64U64ScalarPosInf) {
1100   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %d0, %d1");
1101   TestConvertF64ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1102 }
1103 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTruncate)1104 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTruncate) {
1105   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %d0, %d1");
1106   TestConvertF64ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1107 }
1108 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTruncate)1109 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTruncate) {
1110   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %d0, %d1");
1111   TestConvertF64ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1112 }
1113 
TEST(Arm64InsnTest,AsmConvertF32I32x4TieAway)1114 TEST(Arm64InsnTest, AsmConvertF32I32x4TieAway) {
1115   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %0.4s, %1.4s");
1116   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1117   ASSERT_EQ(AsmFcvtas(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffffafffffff9ULL));
1118   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1119   ASSERT_EQ(AsmFcvtas(arg2), MakeUInt128(0x0000000700000006ULL, 0x0000000800000007ULL));
1120 }
1121 
TEST(Arm64InsnTest,AsmConvertF32U32x4TieAway)1122 TEST(Arm64InsnTest, AsmConvertF32U32x4TieAway) {
1123   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %0.4s, %1.4s");
1124   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1125   ASSERT_EQ(AsmFcvtau(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1126   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1127   ASSERT_EQ(AsmFcvtau(arg2), MakeUInt128(0x0000000700000006ULL, 0x0000000800000007ULL));
1128 }
1129 
TEST(Arm64InsnTest,AsmConvertF32I32x4NegInf)1130 TEST(Arm64InsnTest, AsmConvertF32I32x4NegInf) {
1131   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %0.4s, %1.4s");
1132   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1133   ASSERT_EQ(AsmFcvtms(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffff9fffffff9ULL));
1134   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1135   ASSERT_EQ(AsmFcvtms(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1136 }
1137 
TEST(Arm64InsnTest,AsmConvertF32U32x4NegInf)1138 TEST(Arm64InsnTest, AsmConvertF32U32x4NegInf) {
1139   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %0.4s, %1.4s");
1140   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1141   ASSERT_EQ(AsmFcvtmu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1142   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1143   ASSERT_EQ(AsmFcvtmu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1144 }
1145 
TEST(Arm64InsnTest,AsmConvertF32I32x4TieEven)1146 TEST(Arm64InsnTest, AsmConvertF32I32x4TieEven) {
1147   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %0.4s, %1.4s");
1148   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1149   ASSERT_EQ(AsmFcvtns(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffffafffffffaULL));
1150   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1151   ASSERT_EQ(AsmFcvtns(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000800000007ULL));
1152 }
1153 
TEST(Arm64InsnTest,AsmConvertF32U32x4TieEven)1154 TEST(Arm64InsnTest, AsmConvertF32U32x4TieEven) {
1155   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %0.4s, %1.4s");
1156   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1157   ASSERT_EQ(AsmFcvtnu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1158   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1159   ASSERT_EQ(AsmFcvtnu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000800000007ULL));
1160 }
1161 
TEST(Arm64InsnTest,AsmConvertF32I32x4PosInf)1162 TEST(Arm64InsnTest, AsmConvertF32I32x4PosInf) {
1163   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %0.4s, %1.4s");
1164   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1165   ASSERT_EQ(AsmFcvtps(arg1), MakeUInt128(0xfffffffafffffff9ULL, 0xfffffffafffffffaULL));
1166   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1167   ASSERT_EQ(AsmFcvtps(arg2), MakeUInt128(0x0000000700000007ULL, 0x0000000800000007ULL));
1168 }
1169 
TEST(Arm64InsnTest,AsmConvertF32U32x4PosInf)1170 TEST(Arm64InsnTest, AsmConvertF32U32x4PosInf) {
1171   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %0.4s, %1.4s");
1172   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1173   ASSERT_EQ(AsmFcvtpu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1174   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1175   ASSERT_EQ(AsmFcvtpu(arg2), MakeUInt128(0x0000000700000007ULL, 0x0000000800000007ULL));
1176 }
1177 
TEST(Arm64InsnTest,AsmConvertF32I32x4Truncate)1178 TEST(Arm64InsnTest, AsmConvertF32I32x4Truncate) {
1179   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.4s, %1.4s");
1180   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1181   ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0xfffffffafffffff9ULL, 0xfffffffafffffffaULL));
1182   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1183   ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1184 }
1185 
TEST(Arm64InsnTest,AsmConvertF32U32x4Truncate)1186 TEST(Arm64InsnTest, AsmConvertF32U32x4Truncate) {
1187   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.4s, %1.4s");
1188   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1189   ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1190   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1191   ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1192 }
1193 
TEST(Arm64InsnTest,AsmConvertF64I64x4TieAway)1194 TEST(Arm64InsnTest, AsmConvertF64I64x4TieAway) {
1195   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %0.2d, %1.2d");
1196   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1197   ASSERT_EQ(AsmFcvtas(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1198   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1199   ASSERT_EQ(AsmFcvtas(arg2), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1200   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1201   ASSERT_EQ(AsmFcvtas(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1202   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1203   ASSERT_EQ(AsmFcvtas(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1204 }
1205 
TEST(Arm64InsnTest,AsmConvertF64U64x4TieAway)1206 TEST(Arm64InsnTest, AsmConvertF64U64x4TieAway) {
1207   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %0.2d, %1.2d");
1208   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1209   ASSERT_EQ(AsmFcvtau(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1210   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1211   ASSERT_EQ(AsmFcvtau(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1212   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1213   ASSERT_EQ(AsmFcvtau(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1214   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1215   ASSERT_EQ(AsmFcvtau(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1216 }
1217 
TEST(Arm64InsnTest,AsmConvertF64I64x4NegInf)1218 TEST(Arm64InsnTest, AsmConvertF64I64x4NegInf) {
1219   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %0.2d, %1.2d");
1220   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1221   ASSERT_EQ(AsmFcvtms(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1222   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1223   ASSERT_EQ(AsmFcvtms(arg2), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffff9ULL));
1224   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1225   ASSERT_EQ(AsmFcvtms(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1226   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1227   ASSERT_EQ(AsmFcvtms(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1228 }
1229 
TEST(Arm64InsnTest,AsmConvertF64U64x4NegInf)1230 TEST(Arm64InsnTest, AsmConvertF64U64x4NegInf) {
1231   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %0.2d, %1.2d");
1232   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1233   ASSERT_EQ(AsmFcvtmu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1234   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1235   ASSERT_EQ(AsmFcvtmu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1236   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1237   ASSERT_EQ(AsmFcvtmu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1238   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1239   ASSERT_EQ(AsmFcvtmu(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1240 }
1241 
TEST(Arm64InsnTest,AsmConvertF64I64x4TieEven)1242 TEST(Arm64InsnTest, AsmConvertF64I64x4TieEven) {
1243   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %0.2d, %1.2d");
1244   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1245   ASSERT_EQ(AsmFcvtns(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1246   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1247   ASSERT_EQ(AsmFcvtns(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1248   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1249   ASSERT_EQ(AsmFcvtns(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1250   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1251   ASSERT_EQ(AsmFcvtns(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1252 }
1253 
TEST(Arm64InsnTest,AsmConvertF64U64x4TieEven)1254 TEST(Arm64InsnTest, AsmConvertF64U64x4TieEven) {
1255   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %0.2d, %1.2d");
1256   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1257   ASSERT_EQ(AsmFcvtnu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1258   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1259   ASSERT_EQ(AsmFcvtnu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1260   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1261   ASSERT_EQ(AsmFcvtnu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1262   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1263   ASSERT_EQ(AsmFcvtnu(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1264 }
1265 
TEST(Arm64InsnTest,AsmConvertF64I64x4PosInf)1266 TEST(Arm64InsnTest, AsmConvertF64I64x4PosInf) {
1267   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %0.2d, %1.2d");
1268   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1269   ASSERT_EQ(AsmFcvtps(arg1), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1270   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1271   ASSERT_EQ(AsmFcvtps(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1272   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1273   ASSERT_EQ(AsmFcvtps(arg3), MakeUInt128(0x0000000000000007ULL, 0x0000000000000007ULL));
1274   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1275   ASSERT_EQ(AsmFcvtps(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1276 }
1277 
TEST(Arm64InsnTest,AsmConvertF64U64x4PosInf)1278 TEST(Arm64InsnTest, AsmConvertF64U64x4PosInf) {
1279   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %0.2d, %1.2d");
1280   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1281   ASSERT_EQ(AsmFcvtpu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1282   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1283   ASSERT_EQ(AsmFcvtpu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1284   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1285   ASSERT_EQ(AsmFcvtpu(arg3), MakeUInt128(0x0000000000000007ULL, 0x0000000000000007ULL));
1286   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1287   ASSERT_EQ(AsmFcvtpu(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1288 }
1289 
TEST(Arm64InsnTest,AsmConvertF64I64x4Truncate)1290 TEST(Arm64InsnTest, AsmConvertF64I64x4Truncate) {
1291   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.2d, %1.2d");
1292   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1293   ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1294   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1295   ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1296   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1297   ASSERT_EQ(AsmFcvtzs(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1298   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1299   ASSERT_EQ(AsmFcvtzs(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1300 }
1301 
TEST(Arm64InsnTest,AsmConvertF64U64x4Truncate)1302 TEST(Arm64InsnTest, AsmConvertF64U64x4Truncate) {
1303   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.2d, %1.2d");
1304   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1305   ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1306   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1307   ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1308   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1309   ASSERT_EQ(AsmFcvtzu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1310   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1311   ASSERT_EQ(AsmFcvtzu(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1312 }
1313 
TEST(Arm64InsnTest,AsmConvertX32F32Scalar)1314 TEST(Arm64InsnTest, AsmConvertX32F32Scalar) {
1315   constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %w1, #7");
1316 
1317   ASSERT_EQ(AsmConvertX32F32(0x610), MakeUInt128(0x41420000ULL, 0U));
1318 
1319   ASSERT_EQ(AsmConvertX32F32(1U << 31), MakeUInt128(0xcb800000ULL, 0U));
1320 }
1321 
TEST(Arm64InsnTest,AsmConvertX32F64Scalar)1322 TEST(Arm64InsnTest, AsmConvertX32F64Scalar) {
1323   constexpr auto AsmConvertX32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %w1, #8");
1324 
1325   ASSERT_EQ(AsmConvertX32F64(0x487), MakeUInt128(0x40121c0000000000ULL, 0U));
1326 
1327   ASSERT_EQ(AsmConvertX32F64(1 << 31), MakeUInt128(0xc160000000000000ULL, 0U));
1328 }
1329 
TEST(Arm64InsnTest,AsmConvertX32F32)1330 TEST(Arm64InsnTest, AsmConvertX32F32) {
1331   constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %s0, %s1, #7");
1332 
1333   ASSERT_EQ(AsmConvertX32F32(0x123), MakeUInt128(0x40118000ULL, 0U));
1334 
1335   ASSERT_EQ(AsmConvertX32F32(1U << 31), MakeUInt128(0xcb800000ULL, 0U));
1336 }
1337 
TEST(Arm64InsnTest,AsmConvertX32x4F32x4)1338 TEST(Arm64InsnTest, AsmConvertX32x4F32x4) {
1339   constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.4s, %1.4s, #11");
1340   __uint128_t arg = MakeUInt128(0x80000000ffff9852ULL, 0x0000110200001254ULL);
1341   ASSERT_EQ(AsmConvertX32F32(arg), MakeUInt128(0xc9800000c14f5c00ULL, 0x400810004012a000ULL));
1342 }
1343 
TEST(Arm64InsnTest,AsmConvertUX32F32Scalar)1344 TEST(Arm64InsnTest, AsmConvertUX32F32Scalar) {
1345   constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %w1, #7");
1346 
1347   ASSERT_EQ(AsmConvertUX32F32(0x857), MakeUInt128(0x41857000ULL, 0U));
1348 
1349   ASSERT_EQ(AsmConvertUX32F32(1U << 31), MakeUInt128(0x4b800000ULL, 0U));
1350 
1351   // Test the default rounding behavior (FPRounding_TIEEVEN).
1352   ASSERT_EQ(AsmConvertUX32F32(0x80000080), MakeUInt128(0x4b800000ULL, 0U));
1353   ASSERT_EQ(AsmConvertUX32F32(0x800000c0), MakeUInt128(0x4b800001ULL, 0U));
1354   ASSERT_EQ(AsmConvertUX32F32(0x80000140), MakeUInt128(0x4b800001ULL, 0U));
1355   ASSERT_EQ(AsmConvertUX32F32(0x80000180), MakeUInt128(0x4b800002ULL, 0U));
1356 }
1357 
TEST(Arm64InsnTest,AsmConvertUX32F64Scalar)1358 TEST(Arm64InsnTest, AsmConvertUX32F64Scalar) {
1359   constexpr auto AsmConvertUX32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %w1, #8");
1360 
1361   ASSERT_EQ(AsmConvertUX32F64(0x361), MakeUInt128(0x400b080000000000ULL, 0U));
1362 
1363   ASSERT_EQ(AsmConvertUX32F64(1U << 31), MakeUInt128(0x4160000000000000ULL, 0U));
1364 }
1365 
TEST(Arm64InsnTest,AsmConvertUX32F32)1366 TEST(Arm64InsnTest, AsmConvertUX32F32) {
1367   constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %s0, %s1, #7");
1368 
1369   ASSERT_EQ(AsmConvertUX32F32(0x456), MakeUInt128(0x410ac000ULL, 0U));
1370 
1371   ASSERT_EQ(AsmConvertUX32F32(1U << 31), MakeUInt128(0x4b800000ULL, 0U));
1372 }
1373 
TEST(Arm64InsnTest,AsmConvertUX32x4F32x4)1374 TEST(Arm64InsnTest, AsmConvertUX32x4F32x4) {
1375   constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.4s, %1.4s, #11");
1376   __uint128_t arg = MakeUInt128(0x8000000000008023ULL, 0x0000201800001956ULL);
1377   ASSERT_EQ(AsmConvertUX32F32(arg), MakeUInt128(0x4980000041802300ULL, 0x40806000404ab000ULL));
1378 }
1379 
TEST(Arm64InsnTest,AsmConvertX64F32Scalar)1380 TEST(Arm64InsnTest, AsmConvertX64F32Scalar) {
1381   constexpr auto AsmConvertX64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %x1, #10");
1382 
1383   ASSERT_EQ(AsmConvertX64F32(0x2234), MakeUInt128(0x4108d000ULL, 0U));
1384 }
1385 
TEST(Arm64InsnTest,AsmConvertX64F64Scalar)1386 TEST(Arm64InsnTest, AsmConvertX64F64Scalar) {
1387   constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %x1, #10");
1388 
1389   ASSERT_EQ(AsmConvertX64F64(0x1324), MakeUInt128(0x4013240000000000ULL, 0U));
1390 }
1391 
TEST(Arm64InsnTest,AsmConvertUX64F32Scalar)1392 TEST(Arm64InsnTest, AsmConvertUX64F32Scalar) {
1393   constexpr auto AsmConvertUX64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %x1, #10");
1394 
1395   ASSERT_EQ(AsmConvertUX64F32(0x5763), MakeUInt128(0x41aec600ULL, 0U));
1396 }
1397 
TEST(Arm64InsnTest,AsmConvertUX64F64Scalar)1398 TEST(Arm64InsnTest, AsmConvertUX64F64Scalar) {
1399   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %x1, #10");
1400 
1401   ASSERT_EQ(AsmConvertUX64F64(0x2217), MakeUInt128(0x40210b8000000000ULL, 0U));
1402 }
1403 
TEST(Arm64InsnTest,AsmConvertX64F64)1404 TEST(Arm64InsnTest, AsmConvertX64F64) {
1405   constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %d0, %d1, #12");
1406 
1407   ASSERT_EQ(AsmConvertX64F64(0x723), MakeUInt128(0x3fdc8c0000000000ULL, 0U));
1408 
1409   ASSERT_EQ(AsmConvertX64F64(1ULL << 63), MakeUInt128(0xc320000000000000ULL, 0U));
1410 }
1411 
TEST(Arm64InsnTest,AsmConvertUX64F64)1412 TEST(Arm64InsnTest, AsmConvertUX64F64) {
1413   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1, #12");
1414 
1415   ASSERT_EQ(AsmConvertUX64F64(0x416), MakeUInt128(0x3fd0580000000000ULL, 0U));
1416 
1417   ASSERT_EQ(AsmConvertUX64F64(1ULL << 63), MakeUInt128(0x4320000000000000ULL, 0U));
1418 }
1419 
TEST(Arm64InsnTest,AsmConvertUX64F64With64BitFraction)1420 TEST(Arm64InsnTest, AsmConvertUX64F64With64BitFraction) {
1421   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1, #64");
1422 
1423   ASSERT_EQ(AsmConvertUX64F64(1ULL << 63), MakeUInt128(0x3fe0'0000'0000'0000ULL, 0U));
1424 }
1425 
TEST(Arm64InsnTest,AsmConvertX64x2F64x2)1426 TEST(Arm64InsnTest, AsmConvertX64x2F64x2) {
1427   constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.2d, %1.2d, #12");
1428   __uint128_t arg = MakeUInt128(1ULL << 63, 0x8086U);
1429   ASSERT_EQ(AsmConvertX64F64(arg), MakeUInt128(0xc320000000000000ULL, 0x402010c000000000ULL));
1430 }
1431 
TEST(Arm64InsnTest,AsmConvertUX64x2F64x2)1432 TEST(Arm64InsnTest, AsmConvertUX64x2F64x2) {
1433   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d, #12");
1434   __uint128_t arg = MakeUInt128(1ULL << 63, 0x6809U);
1435   ASSERT_EQ(AsmConvertUX64F64(arg), MakeUInt128(0x4320000000000000ULL, 0x401a024000000000ULL));
1436 }
1437 
TEST(Arm64InsnTest,AsmConvertUX64x2F64x2With64BitFraction)1438 TEST(Arm64InsnTest, AsmConvertUX64x2F64x2With64BitFraction) {
1439   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d, #64");
1440   __uint128_t arg = MakeUInt128(0x7874'211c'b7aa'f597ULL, 0x2c0f'5504'd25e'f673ULL);
1441   ASSERT_EQ(AsmConvertUX64F64(arg),
1442             MakeUInt128(0x3fde'1d08'472d'eabdULL, 0x3fc6'07aa'8269'2f7bULL));
1443 }
1444 
TEST(Arm64InsnTest,AsmConvertF32X32Scalar)1445 TEST(Arm64InsnTest, AsmConvertF32X32Scalar) {
1446   constexpr auto AsmConvertF32X32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #16");
1447   uint32_t arg1 = 0x4091eb85U;  // 4.56 in float
1448   ASSERT_EQ(AsmConvertF32X32(arg1), MakeUInt128(0x00048f5cU, 0U));
1449 
1450   uint32_t arg2 = 0xc0d80000U;  // -6.75 in float
1451   ASSERT_EQ(AsmConvertF32X32(arg2), MakeUInt128(0xfff94000U, 0U));
1452 
1453   ASSERT_EQ(AsmConvertF32X32(kDefaultNaN32), MakeUInt128(bit_cast<uint32_t>(0.0f), 0U));
1454 }
1455 
TEST(Arm64InsnTest,AsmConvertF32UX32Scalar)1456 TEST(Arm64InsnTest, AsmConvertF32UX32Scalar) {
1457   constexpr auto AsmConvertF32UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #16");
1458   uint32_t arg1 = 0x41223d71U;  // 10.14 in float
1459   ASSERT_EQ(AsmConvertF32UX32(arg1), MakeUInt128(0x000a23d7U, 0U));
1460 
1461   uint32_t arg2 = 0xc1540000U;  // -13.25 in float
1462   ASSERT_EQ(AsmConvertF32UX32(arg2), MakeUInt128(0xfff2c000U, 0U));
1463 
1464   ASSERT_EQ(AsmConvertF32UX32(kDefaultNaN32), MakeUInt128(bit_cast<uint32_t>(0.0f), 0U));
1465 }
1466 
TEST(Arm64InsnTest,AsmConvertF32UX32With31FractionalBits)1467 TEST(Arm64InsnTest, AsmConvertF32UX32With31FractionalBits) {
1468   constexpr auto AsmConvertF32UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #31");
1469   uint32_t arg1 = bit_cast<uint32_t>(0.25f);
1470   ASSERT_EQ(AsmConvertF32UX32(arg1), MakeUInt128(0x20000000U, 0U));
1471 }
1472 
TEST(Arm64InsnTest,AsmConvertF64X32Scalar)1473 TEST(Arm64InsnTest, AsmConvertF64X32Scalar) {
1474   constexpr auto AsmConvertF64X32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %d1, #16");
1475   uint64_t arg1 = 0x401e8f5c28f5c28fULL;  // 7.46 in double
1476   ASSERT_EQ(AsmConvertF64X32(arg1), MakeUInt128(0x0007a3d7U, 0U));
1477 
1478   uint64_t arg2 = 0xc040200000000000ULL;  // -32.44 in double
1479   ASSERT_EQ(AsmConvertF64X32(arg2), MakeUInt128(0xffdfc000U, 0U));
1480 }
1481 
TEST(Arm64InsnTest,AsmConvertF32X64Scalar)1482 TEST(Arm64InsnTest, AsmConvertF32X64Scalar) {
1483   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1, #16");
1484   uint64_t arg1 = bit_cast<uint32_t>(7.50f);
1485   ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1486 
1487   uint64_t arg2 = bit_cast<uint32_t>(-6.50f);
1488   ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffff98000ULL, 0ULL));
1489 }
1490 
TEST(Arm64InsnTest,AsmConvertF32UX64With63FractionalBits)1491 TEST(Arm64InsnTest, AsmConvertF32UX64With63FractionalBits) {
1492   constexpr auto AsmConvertF32UX64 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1, #63");
1493   uint32_t arg1 = bit_cast<uint32_t>(0.25f);
1494   ASSERT_EQ(AsmConvertF32UX64(arg1), MakeUInt128(0x20000000'00000000ULL, 0U));
1495 }
1496 
TEST(Arm64InsnTest,AsmConvertF64X64Scalar)1497 TEST(Arm64InsnTest, AsmConvertF64X64Scalar) {
1498   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %d1, #16");
1499   uint64_t arg1 = bit_cast<uint64_t>(7.50);
1500   ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1501 
1502   uint64_t arg2 = bit_cast<uint64_t>(-6.50);
1503   ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffff98000ULL, 0ULL));
1504 }
1505 
TEST(Arm64InsnTest,AsmConvertF32X32x4)1506 TEST(Arm64InsnTest, AsmConvertF32X32x4) {
1507   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.4s, %1.4s, #2");
1508   __uint128_t res = AsmFcvtzs(MakeF32x4(-5.5f, -0.0f, 0.0f, 6.5f));
1509   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffeaULL, 0x0000001a00000000ULL));
1510 }
1511 
TEST(Arm64InsnTest,AsmConvertF64UX32Scalar)1512 TEST(Arm64InsnTest, AsmConvertF64UX32Scalar) {
1513   constexpr auto AsmConvertF64UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %d1, #16");
1514   uint64_t arg1 = 0x4020947ae147ae14ULL;  // 8.29 in double
1515   ASSERT_EQ(AsmConvertF64UX32(arg1), MakeUInt128(0x00084a3dU, 0U));
1516 
1517   uint64_t arg2 = 0xc023666666666666ULL;  // -9.70 in double
1518   ASSERT_EQ(AsmConvertF64UX32(arg2), MakeUInt128(0U, 0U));
1519 }
1520 
TEST(Arm64InsnTest,AsmConvertF32UX64Scalar)1521 TEST(Arm64InsnTest, AsmConvertF32UX64Scalar) {
1522   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %s1, #16");
1523   uint64_t arg1 = bit_cast<uint32_t>(7.50f);
1524   ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1525   uint64_t arg2 = bit_cast<uint32_t>(-6.50f);
1526   ASSERT_EQ(AsmFcvtzu(arg2), 0ULL);
1527 }
1528 
TEST(Arm64InsnTest,AsmConvertF64UX64Scalar)1529 TEST(Arm64InsnTest, AsmConvertF64UX64Scalar) {
1530   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1, #16");
1531   uint64_t arg1 = bit_cast<uint64_t>(7.50);
1532   ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1533 
1534   uint64_t arg2 = bit_cast<uint64_t>(-6.50);
1535   ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0ULL, 0ULL));
1536 }
1537 
TEST(Arm64InsnTest,AsmConvertF64UX64ScalarWith64BitFraction)1538 TEST(Arm64InsnTest, AsmConvertF64UX64ScalarWith64BitFraction) {
1539   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1, #64");
1540   uint64_t arg = bit_cast<uint64_t>(0.625);
1541   ASSERT_EQ(AsmFcvtzu(arg), MakeUInt128(0xa000'0000'0000'0000ULL, 0ULL));
1542 }
1543 
TEST(Arm64InsnTest,AsmConvertF32UX32x4)1544 TEST(Arm64InsnTest, AsmConvertF32UX32x4) {
1545   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.4s, %1.4s, #2");
1546   __uint128_t res = AsmFcvtzs(MakeF32x4(-5.5f, -0.0f, 0.0f, 6.5f));
1547   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000001a00000000ULL));
1548 }
1549 
TEST(Arm64InsnTest,Fp32ConditionalSelect)1550 TEST(Arm64InsnTest, Fp32ConditionalSelect) {
1551   uint64_t int_arg1 = 3;
1552   uint64_t int_arg2 = 7;
1553   uint64_t fp_arg1 = 0xfedcba9876543210ULL;
1554   uint64_t fp_arg2 = 0x0123456789abcdefULL;
1555   __uint128_t res;
1556 
1557   asm("cmp %x1,%x2\n\t"
1558       "fcsel %s0, %s3, %s4, eq"
1559       : "=w"(res)
1560       : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1561   ASSERT_EQ(res, MakeUInt128(0x89abcdefULL, 0U));
1562 
1563   asm("cmp %x1,%x2\n\t"
1564       "fcsel %s0, %s3, %s4, ne"
1565       : "=w"(res)
1566       : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1567   ASSERT_EQ(res, MakeUInt128(0x76543210ULL, 0U));
1568 }
1569 
TEST(Arm64InsnTest,Fp64ConditionalSelect)1570 TEST(Arm64InsnTest, Fp64ConditionalSelect) {
1571   uint64_t int_arg1 = 8;
1572   uint64_t int_arg2 = 3;
1573   uint64_t fp_arg1 = 0xfedcba9876543210ULL;
1574   uint64_t fp_arg2 = 0x0123456789abcdefULL;
1575   __uint128_t res;
1576 
1577   asm("cmp %x1,%x2\n\t"
1578       "fcsel %d0, %d3, %d4, eq"
1579       : "=w"(res)
1580       : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1581   ASSERT_EQ(res, MakeUInt128(0x0123456789abcdefULL, 0U));
1582 
1583   asm("cmp %x1,%x2\n\t"
1584       "fcsel %d0, %d3, %d4, ne"
1585       : "=w"(res)
1586       : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1587   ASSERT_EQ(res, MakeUInt128(0xfedcba9876543210ULL, 0U));
1588 }
1589 
TEST(Arm64InsnTest,RoundUpFp32)1590 TEST(Arm64InsnTest, RoundUpFp32) {
1591   // The lower 32-bit represents 2.7182817 in float.
1592   uint64_t fp_arg = 0xdeadbeef402df854ULL;
1593   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %s0, %s1")(fp_arg);
1594   ASSERT_EQ(res, MakeUInt128(0x40400000ULL, 0U));  // 3.0 in float
1595 }
1596 
TEST(Arm64InsnTest,RoundUpFp64)1597 TEST(Arm64InsnTest, RoundUpFp64) {
1598   // 2.7182817 in double.
1599   uint64_t fp_arg = 0x4005BF0A8B145769ULL;
1600   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %d0, %d1")(fp_arg);
1601   ASSERT_EQ(res, MakeUInt128(0x4008000000000000ULL, 0U));  // 3.0 in double
1602 }
1603 
TEST(Arm64InsnTest,RoundToIntNearestTiesAwayFp64)1604 TEST(Arm64InsnTest, RoundToIntNearestTiesAwayFp64) {
1605   constexpr auto AsmFrinta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %d0, %d1");
1606 
1607   // -7.50 -> -8.00 (ties away from zero as opposted to even)
1608   ASSERT_EQ(AsmFrinta(0xc01E000000000000ULL), MakeUInt128(0xc020000000000000ULL, 0U));
1609 
1610   // -6.75 -> -7.00
1611   ASSERT_EQ(AsmFrinta(0xc01B000000000000ULL), MakeUInt128(0xc01c000000000000ULL, 0U));
1612 
1613   // -6.50 -> -7.00 (ties away from zero as opposted to even)
1614   ASSERT_EQ(AsmFrinta(0xc01A000000000000ULL), MakeUInt128(0xc01c000000000000ULL, 0U));
1615 
1616   // -6.25 -> -6.00
1617   ASSERT_EQ(AsmFrinta(0xc019000000000000ULL), MakeUInt128(0xc018000000000000ULL, 0U));
1618 
1619   // 6.25 -> 6.00
1620   ASSERT_EQ(AsmFrinta(0x4019000000000000ULL), MakeUInt128(0x4018000000000000ULL, 0U));
1621 
1622   // 6.50 -> 7.00 (ties away from zero as opposted to even)
1623   ASSERT_EQ(AsmFrinta(0x401A000000000000ULL), MakeUInt128(0x401c000000000000ULL, 0U));
1624 
1625   // 6.75 -> 7.00
1626   ASSERT_EQ(AsmFrinta(0x401B000000000000ULL), MakeUInt128(0x401c000000000000ULL, 0U));
1627 
1628   // 7.50 -> 8.00 (ties away from zero as opposted to even)
1629   ASSERT_EQ(AsmFrinta(0x401E000000000000ULL), MakeUInt128(0x4020000000000000ULL, 0U));
1630 
1631   // -0.49999999999999994 -> -0.0 (should not "tie away" since -0.4999... != -0.5)
1632   ASSERT_EQ(AsmFrinta(0xBFDFFFFFFFFFFFFF), MakeUInt128(0x8000000000000000U, 0U));
1633 
1634   // A number too large to have fractional precision, should not change upon rounding with tie-away
1635   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(1.0e100)), MakeUInt128(bit_cast<uint64_t>(1.0e100), 0U));
1636   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-1.0e100)), MakeUInt128(bit_cast<uint64_t>(-1.0e100), 0U));
1637 }
1638 
TEST(Arm64InsnTest,RoundToIntNearestTiesAwayFp32)1639 TEST(Arm64InsnTest, RoundToIntNearestTiesAwayFp32) {
1640   constexpr auto AsmFrinta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %s0, %s1");
1641 
1642   // -7.50 -> -8.00 (ties away from zero as opposted to even)
1643   ASSERT_EQ(AsmFrinta(0xc0f00000U), MakeUInt128(0xc1000000U, 0U));
1644 
1645   // -6.75 -> -7.00
1646   ASSERT_EQ(AsmFrinta(0xc0d80000U), MakeUInt128(0xc0e00000U, 0U));
1647 
1648   // -6.50 -> -7.00 (ties away from zero as opposted to even)
1649   ASSERT_EQ(AsmFrinta(0xc0d00000U), MakeUInt128(0xc0e00000U, 0U));
1650 
1651   // -6.25 -> -6.00
1652   ASSERT_EQ(AsmFrinta(0xc0c80000U), MakeUInt128(0xc0c00000U, 0U));
1653 
1654   // 6.25 -> 6.00
1655   ASSERT_EQ(AsmFrinta(0x40c80000U), MakeUInt128(0x40c00000U, 0U));
1656 
1657   // 6.50 -> 7.00 (ties away from zero as opposted to even)
1658   ASSERT_EQ(AsmFrinta(0x40d00000U), MakeUInt128(0x40e00000U, 0U));
1659 
1660   // 6.75 -> 7.00
1661   ASSERT_EQ(AsmFrinta(0x40d80000U), MakeUInt128(0x40e00000U, 0U));
1662 
1663   // 7.50 -> 8.00 (ties away from zero as opposted to even)
1664   ASSERT_EQ(AsmFrinta(0x40f00000U), MakeUInt128(0x41000000U, 0U));
1665 
1666   // -0.49999997019767761 -> -0.0 (should not "tie away" since -0.4999... != -0.5)
1667   ASSERT_EQ(AsmFrinta(0xbeffffff), MakeUInt128(0x80000000U, 0U));
1668 }
1669 
TEST(Arm64InsnTest,RoundToIntDownwardFp64)1670 TEST(Arm64InsnTest, RoundToIntDownwardFp64) {
1671   constexpr auto AsmFrintm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %d0, %d1");
1672 
1673   // 7.7 -> 7.00
1674   ASSERT_EQ(AsmFrintm(0x401ecccccccccccdULL), MakeUInt128(0x401c000000000000, 0U));
1675 
1676   // 7.1 -> 7.00
1677   ASSERT_EQ(AsmFrintm(0x401c666666666666ULL), MakeUInt128(0x401c000000000000, 0U));
1678 
1679   // -7.10 -> -8.00
1680   ASSERT_EQ(AsmFrintm(0xc01c666666666666ULL), MakeUInt128(0xc020000000000000, 0U));
1681 
1682   // -7.90 -> -8.00
1683   ASSERT_EQ(AsmFrintm(0xc01f99999999999aULL), MakeUInt128(0xc020000000000000, 0U));
1684 
1685   // 0 -> 0
1686   ASSERT_EQ(AsmFrintm(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1687 
1688   // -0 -> -0
1689   ASSERT_EQ(AsmFrintm(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1690 }
1691 
TEST(Arm64InsnTest,RoundToIntDownwardFp32)1692 TEST(Arm64InsnTest, RoundToIntDownwardFp32) {
1693   constexpr auto AsmFrintm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %s0, %s1");
1694 
1695   // 7.7 -> 7.00
1696   ASSERT_EQ(AsmFrintm(0x40f66666), 0x40e00000);
1697 
1698   // 7.1 -> 7.00
1699   ASSERT_EQ(AsmFrintm(0x40e33333), 0x40e00000);
1700 
1701   // -7.10 -> -8.00
1702   ASSERT_EQ(AsmFrintm(0xc0e33333), 0xc1000000);
1703 
1704   // -7.90 -> -8.00
1705   ASSERT_EQ(AsmFrintm(0xc0fccccd), 0xc1000000);
1706 
1707   // 0 -> 0
1708   ASSERT_EQ(AsmFrintm(0x00000000), 0x00000000);
1709 
1710   // -0 -> -0
1711   ASSERT_EQ(AsmFrintm(0x80000000), 0x80000000);
1712 }
1713 
TEST(Arm64InsnTest,RoundToIntNearestFp64)1714 TEST(Arm64InsnTest, RoundToIntNearestFp64) {
1715   constexpr auto AsmFrintn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %d0, %d1");
1716 
1717   // 7.5 -> 8.00 (ties to even)
1718   ASSERT_EQ(AsmFrintn(0x401e000000000000ULL), MakeUInt128(0x4020000000000000, 0U));
1719 
1720   // 8.5 -> 8.00 (ties to even)
1721   ASSERT_EQ(AsmFrintn(0x4021000000000000), MakeUInt128(0x4020000000000000, 0U));
1722 
1723   // 7.10 -> 7.00
1724   ASSERT_EQ(AsmFrintn(0x401c666666666666), MakeUInt128(0x401c000000000000, 0U));
1725 
1726   // 7.90 -> 8.00
1727   ASSERT_EQ(AsmFrintn(0x401f99999999999a), MakeUInt128(0x4020000000000000, 0U));
1728 
1729   // -7.5 -> -8.00 (ties to even)
1730   ASSERT_EQ(AsmFrintn(0xc01e000000000000), MakeUInt128(0xc020000000000000, 0U));
1731 
1732   // // -8.5 -> -8.00 (ties to even)
1733   ASSERT_EQ(AsmFrintn(0xc021000000000000), MakeUInt128(0xc020000000000000, 0U));
1734 
1735   // -7.10 -> -7.00
1736   ASSERT_EQ(AsmFrintn(0xc01c666666666666), MakeUInt128(0xc01c000000000000, 0U));
1737 
1738   // -7.90 -> -8.00
1739   ASSERT_EQ(AsmFrintn(0xc01f99999999999a), MakeUInt128(0xc020000000000000, 0U));
1740 
1741   // 0 -> 0
1742   ASSERT_EQ(AsmFrintn(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1743 
1744   // -0 -> -0
1745   ASSERT_EQ(AsmFrintn(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1746 }
1747 
TEST(Arm64InsnTest,RoundToIntToNearestFp32)1748 TEST(Arm64InsnTest, RoundToIntToNearestFp32) {
1749   constexpr auto AsmFrintn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %s0, %s1");
1750 
1751   // 7.5 -> 8.00 (ties to even)
1752   ASSERT_EQ(AsmFrintn(0x40f00000), 0x41000000);
1753 
1754   // 8.5 -> 8.00 (ties to even)
1755   ASSERT_EQ(AsmFrintn(0x41080000), 0x41000000);
1756 
1757   // 7.10 -> 7.00
1758   ASSERT_EQ(AsmFrintn(0x40e33333), 0x40e00000);
1759 
1760   // 7.90 -> 8.00
1761   ASSERT_EQ(AsmFrintn(0x40fccccd), 0x41000000);
1762 
1763   // -7.5 -> -8.00 (ties to even)
1764   ASSERT_EQ(AsmFrintn(0xc0f00000), 0xc1000000);
1765 
1766   // -8.5 -> -8.00 (ties to even)
1767   ASSERT_EQ(AsmFrintn(0xc1080000), 0xc1000000);
1768 
1769   // -7.10 -> -7.00
1770   ASSERT_EQ(AsmFrintn(0xc0e33333), 0xc0e00000);
1771 
1772   // -7.90 -> -8.00
1773   ASSERT_EQ(AsmFrintn(0xc0fccccd), 0xc1000000);
1774 
1775   // 0 -> 0
1776   ASSERT_EQ(AsmFrintn(0x00000000), 0x00000000);
1777 
1778   // -0 -> -0
1779   ASSERT_EQ(AsmFrintn(0x80000000), 0x80000000);
1780 }
1781 
TEST(Arm64InsnTest,RoundToIntTowardZeroFp64)1782 TEST(Arm64InsnTest, RoundToIntTowardZeroFp64) {
1783   constexpr auto AsmFrintz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %d0, %d1");
1784 
1785   // 7.7 -> 7.00
1786   ASSERT_EQ(AsmFrintz(0x401ecccccccccccdULL), MakeUInt128(0x401c000000000000, 0U));
1787 
1788   // 7.1 -> 7.00
1789   ASSERT_EQ(AsmFrintz(0x401c666666666666ULL), MakeUInt128(0x401c000000000000, 0U));
1790 
1791   // -7.10 -> -7.00
1792   ASSERT_EQ(AsmFrintz(0xc01c666666666666ULL), MakeUInt128(0xc01c000000000000, 0U));
1793 
1794   // -7.90 -> -7.00
1795   ASSERT_EQ(AsmFrintz(0xc01f99999999999aULL), MakeUInt128(0xc01c000000000000, 0U));
1796 
1797   // 0 -> 0
1798   ASSERT_EQ(AsmFrintz(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1799 
1800   // -0 -> -0
1801   ASSERT_EQ(AsmFrintz(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1802 }
1803 
TEST(Arm64InsnTest,RoundToIntTowardZeroFp32)1804 TEST(Arm64InsnTest, RoundToIntTowardZeroFp32) {
1805   constexpr auto AsmFrintz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %s0, %s1");
1806 
1807   // 7.7 -> 7.00
1808   ASSERT_EQ(AsmFrintz(0x40f66666), 0x40e00000);
1809 
1810   // 7.1 -> 7.00
1811   ASSERT_EQ(AsmFrintz(0x40e33333), 0x40e00000);
1812 
1813   // -7.10 -> -7.00
1814   ASSERT_EQ(AsmFrintz(0xc0e33333), 0xc0e00000);
1815 
1816   // -7.90 -> -7.00
1817   ASSERT_EQ(AsmFrintz(0xc0fccccd), 0xc0e00000);
1818 
1819   // 0 -> 0
1820   ASSERT_EQ(AsmFrintz(0x00000000), 0x00000000);
1821 
1822   // -0 -> -0
1823   ASSERT_EQ(AsmFrintz(0x80000000), 0x80000000);
1824 }
1825 
TEST(Arm64InsnTest,AsmConvertF32x4TieAway)1826 TEST(Arm64InsnTest, AsmConvertF32x4TieAway) {
1827   constexpr auto AsmFcvta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %0.4s, %1.4s");
1828   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1829   ASSERT_EQ(AsmFcvta(arg1), MakeF32x4(-8.00f, -7.00f, -7.00f, -6.00f));
1830   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1831   ASSERT_EQ(AsmFcvta(arg2), MakeF32x4(6.00f, 7.00f, 7.00f, 8.00f));
1832 }
1833 
TEST(Arm64InsnTest,AsmConvertF32x4NegInf)1834 TEST(Arm64InsnTest, AsmConvertF32x4NegInf) {
1835   constexpr auto AsmFcvtm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %0.4s, %1.4s");
1836   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1837   ASSERT_EQ(AsmFcvtm(arg1), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
1838   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1839   ASSERT_EQ(AsmFcvtm(arg2), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
1840 }
1841 
TEST(Arm64InsnTest,AsmConvertF32x4TieEven)1842 TEST(Arm64InsnTest, AsmConvertF32x4TieEven) {
1843   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %0.4s, %1.4s");
1844   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1845   ASSERT_EQ(AsmFcvtn(arg1), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
1846   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1847   ASSERT_EQ(AsmFcvtn(arg2), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
1848 }
1849 
TEST(Arm64InsnTest,AsmConvertF32x4PosInf)1850 TEST(Arm64InsnTest, AsmConvertF32x4PosInf) {
1851   constexpr auto AsmFcvtp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %0.4s, %1.4s");
1852   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1853   ASSERT_EQ(AsmFcvtp(arg1), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
1854   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1855   ASSERT_EQ(AsmFcvtp(arg2), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
1856 }
1857 
TEST(Arm64InsnTest,AsmConvertF32x4Truncate)1858 TEST(Arm64InsnTest, AsmConvertF32x4Truncate) {
1859   constexpr auto AsmFcvtz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %0.4s, %1.4s");
1860   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1861   ASSERT_EQ(AsmFcvtz(arg1), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
1862   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1863   ASSERT_EQ(AsmFcvtz(arg2), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
1864 }
1865 
TEST(Arm64InsnTest,AsmConvertF64x4TieAway)1866 TEST(Arm64InsnTest, AsmConvertF64x4TieAway) {
1867   constexpr auto AsmFcvta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %0.2d, %1.2d");
1868   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1869   ASSERT_EQ(AsmFcvta(arg1), MakeF64x2(-8.00, -7.00));
1870   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1871   ASSERT_EQ(AsmFcvta(arg2), MakeF64x2(-7.00, -6.00));
1872   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1873   ASSERT_EQ(AsmFcvta(arg3), MakeF64x2(6.00, 7.00));
1874   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1875   ASSERT_EQ(AsmFcvta(arg4), MakeF64x2(7.00, 8.00));
1876 }
1877 
TEST(Arm64InsnTest,AsmConvertF64x4NegInf)1878 TEST(Arm64InsnTest, AsmConvertF64x4NegInf) {
1879   constexpr auto AsmFcvtm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %0.2d, %1.2d");
1880   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1881   ASSERT_EQ(AsmFcvtm(arg1), MakeF64x2(-8.00, -7.00));
1882   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1883   ASSERT_EQ(AsmFcvtm(arg2), MakeF64x2(-7.00, -7.00));
1884   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1885   ASSERT_EQ(AsmFcvtm(arg3), MakeF64x2(6.00, 6.00));
1886   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1887   ASSERT_EQ(AsmFcvtm(arg4), MakeF64x2(6.00, 7.00));
1888 }
1889 
TEST(Arm64InsnTest,AsmConvertF64x4TieEven)1890 TEST(Arm64InsnTest, AsmConvertF64x4TieEven) {
1891   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %0.2d, %1.2d");
1892   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1893   ASSERT_EQ(AsmFcvtn(arg1), MakeF64x2(-8.00, -7.00));
1894   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1895   ASSERT_EQ(AsmFcvtn(arg2), MakeF64x2(-6.00, -6.00));
1896   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1897   ASSERT_EQ(AsmFcvtn(arg3), MakeF64x2(6.00, 6.00));
1898   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1899   ASSERT_EQ(AsmFcvtn(arg4), MakeF64x2(7.00, 8.00));
1900 }
1901 
TEST(Arm64InsnTest,AsmConvertF64x4PosInf)1902 TEST(Arm64InsnTest, AsmConvertF64x4PosInf) {
1903   constexpr auto AsmFcvtp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %0.2d, %1.2d");
1904   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1905   ASSERT_EQ(AsmFcvtp(arg1), MakeF64x2(-7.00, -6.00));
1906   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1907   ASSERT_EQ(AsmFcvtp(arg2), MakeF64x2(-6.00, -6.00));
1908   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1909   ASSERT_EQ(AsmFcvtp(arg3), MakeF64x2(7.00, 7.00));
1910   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1911   ASSERT_EQ(AsmFcvtp(arg4), MakeF64x2(7.00, 8.00));
1912 }
1913 
TEST(Arm64InsnTest,AsmConvertF64x4Truncate)1914 TEST(Arm64InsnTest, AsmConvertF64x4Truncate) {
1915   constexpr auto AsmFcvtz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %0.2d, %1.2d");
1916   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1917   ASSERT_EQ(AsmFcvtz(arg1), MakeF64x2(-7.00, -6.00));
1918   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1919   ASSERT_EQ(AsmFcvtz(arg2), MakeF64x2(-6.00, -6.00));
1920   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1921   ASSERT_EQ(AsmFcvtz(arg3), MakeF64x2(6.00, 6.00));
1922   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1923   ASSERT_EQ(AsmFcvtz(arg4), MakeF64x2(6.00, 7.00));
1924 }
1925 
TEST(Arm64InsnTest,AsmRoundCurrentModeF32)1926 TEST(Arm64InsnTest, AsmRoundCurrentModeF32) {
1927   constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %s0, %s1");
1928   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-8.00f));
1929   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(-7.00f));
1930   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
1931   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
1932   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
1933   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
1934   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(7.00f));
1935   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(8.00f));
1936   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-8.00f));
1937   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1938   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1939   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1940   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1941   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1942   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1943   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(7.00f));
1944   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-7.00f));
1945   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1946   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1947   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1948   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1949   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1950   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1951   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(8.00f));
1952   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeZero), bit_cast<uint32_t>(-7.00f));
1953   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1954   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1955   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1956   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
1957   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
1958   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
1959   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeZero), bit_cast<uint32_t>(7.00f));
1960 }
1961 
TEST(Arm64InsnTest,AsmRoundCurrentModeF64)1962 TEST(Arm64InsnTest, AsmRoundCurrentModeF64) {
1963   constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %d0, %d1");
1964   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-8.00));
1965   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(-7.00));
1966   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
1967   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
1968   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
1969   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
1970   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(7.00));
1971   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(8.00));
1972   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-8.00));
1973   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
1974   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
1975   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
1976   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
1977   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
1978   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
1979   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(7.00));
1980   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModePosInf), bit_cast<uint64_t>(-7.00));
1981   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
1982   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
1983   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
1984   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
1985   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
1986   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
1987   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModePosInf), bit_cast<uint64_t>(8.00));
1988   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeZero), bit_cast<uint64_t>(-7.00));
1989   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
1990   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
1991   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
1992   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
1993   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
1994   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
1995   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeZero), bit_cast<uint64_t>(7.00));
1996 }
1997 
TEST(Arm64InsnTest,AsmRoundCurrentModeF32x4)1998 TEST(Arm64InsnTest, AsmRoundCurrentModeF32x4) {
1999   constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %0.4s, %1.4s");
2000   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2001   ASSERT_EQ(AsmFrinti(arg1, kFpcrRModeTieEven), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
2002   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2003   ASSERT_EQ(AsmFrinti(arg2, kFpcrRModeTieEven), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
2004   __uint128_t arg3 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2005   ASSERT_EQ(AsmFrinti(arg3, kFpcrRModeNegInf), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
2006   __uint128_t arg4 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2007   ASSERT_EQ(AsmFrinti(arg4, kFpcrRModeNegInf), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2008   __uint128_t arg5 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2009   ASSERT_EQ(AsmFrinti(arg5, kFpcrRModePosInf), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2010   __uint128_t arg6 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2011   ASSERT_EQ(AsmFrinti(arg6, kFpcrRModePosInf), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
2012   __uint128_t arg7 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2013   ASSERT_EQ(AsmFrinti(arg7, kFpcrRModeZero), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2014   __uint128_t arg8 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2015   ASSERT_EQ(AsmFrinti(arg8, kFpcrRModeZero), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2016 }
2017 
TEST(Arm64InsnTest,AsmRoundCurrentModeF64x2)2018 TEST(Arm64InsnTest, AsmRoundCurrentModeF64x2) {
2019   constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %0.2d, %1.2d");
2020   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
2021   ASSERT_EQ(AsmFrinti(arg1, kFpcrRModeTieEven), MakeF64x2(-8.00, -7.00));
2022   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
2023   ASSERT_EQ(AsmFrinti(arg2, kFpcrRModeTieEven), MakeF64x2(-6.00, -6.00));
2024   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
2025   ASSERT_EQ(AsmFrinti(arg3, kFpcrRModeTieEven), MakeF64x2(6.00, 6.00));
2026   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
2027   ASSERT_EQ(AsmFrinti(arg4, kFpcrRModeTieEven), MakeF64x2(7.00, 8.00));
2028   __uint128_t arg5 = MakeF64x2(-7.50, -6.75);
2029   ASSERT_EQ(AsmFrinti(arg5, kFpcrRModeNegInf), MakeF64x2(-8.00, -7.00));
2030   __uint128_t arg6 = MakeF64x2(-6.50, -6.25);
2031   ASSERT_EQ(AsmFrinti(arg6, kFpcrRModeNegInf), MakeF64x2(-7.00, -7.00));
2032   __uint128_t arg7 = MakeF64x2(6.25, 6.50);
2033   ASSERT_EQ(AsmFrinti(arg7, kFpcrRModeNegInf), MakeF64x2(6.00, 6.00));
2034   __uint128_t arg8 = MakeF64x2(6.75, 7.50);
2035   ASSERT_EQ(AsmFrinti(arg8, kFpcrRModeNegInf), MakeF64x2(6.00, 7.00));
2036   __uint128_t arg9 = MakeF64x2(-7.50, -6.75);
2037   ASSERT_EQ(AsmFrinti(arg9, kFpcrRModePosInf), MakeF64x2(-7.00, -6.00));
2038   __uint128_t arg10 = MakeF64x2(-6.50, -6.25);
2039   ASSERT_EQ(AsmFrinti(arg10, kFpcrRModePosInf), MakeF64x2(-6.00, -6.00));
2040   __uint128_t arg11 = MakeF64x2(6.25, 6.50);
2041   ASSERT_EQ(AsmFrinti(arg11, kFpcrRModePosInf), MakeF64x2(7.00, 7.00));
2042   __uint128_t arg12 = MakeF64x2(6.75, 7.50);
2043   ASSERT_EQ(AsmFrinti(arg12, kFpcrRModePosInf), MakeF64x2(7.00, 8.00));
2044   __uint128_t arg13 = MakeF64x2(-7.50, -6.75);
2045   ASSERT_EQ(AsmFrinti(arg13, kFpcrRModeZero), MakeF64x2(-7.00, -6.00));
2046   __uint128_t arg14 = MakeF64x2(-6.50, -6.25);
2047   ASSERT_EQ(AsmFrinti(arg14, kFpcrRModeZero), MakeF64x2(-6.00, -6.00));
2048   __uint128_t arg15 = MakeF64x2(6.25, 6.50);
2049   ASSERT_EQ(AsmFrinti(arg15, kFpcrRModeZero), MakeF64x2(6.00, 6.00));
2050   __uint128_t arg16 = MakeF64x2(6.75, 7.50);
2051   ASSERT_EQ(AsmFrinti(arg16, kFpcrRModeZero), MakeF64x2(6.00, 7.00));
2052 }
2053 
TEST(Arm64InsnTest,AsmRoundExactF32)2054 TEST(Arm64InsnTest, AsmRoundExactF32) {
2055   constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %s0, %s1");
2056   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-8.00f));
2057   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(-7.00f));
2058   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
2059   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
2060   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
2061   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
2062   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(7.00f));
2063   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(8.00f));
2064   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-8.00f));
2065   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2066   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2067   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2068   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2069   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2070   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2071   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(7.00f));
2072   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-7.00f));
2073   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2074   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2075   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2076   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2077   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2078   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2079   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(8.00f));
2080   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeZero), bit_cast<uint32_t>(-7.00f));
2081   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2082   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2083   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2084   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2085   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2086   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2087   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeZero), bit_cast<uint32_t>(7.00f));
2088 }
2089 
TEST(Arm64InsnTest,AsmRoundExactF64)2090 TEST(Arm64InsnTest, AsmRoundExactF64) {
2091   constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %d0, %d1");
2092   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-8.00));
2093   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(-7.00));
2094   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2095   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2096   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2097   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2098   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(7.00));
2099   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(8.00));
2100   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-8.00));
2101   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2102   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2103   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2104   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2105   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2106   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2107   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(7.00));
2108   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModePosInf), bit_cast<uint64_t>(-7.00));
2109   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2110   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2111   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2112   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2113   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2114   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2115   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModePosInf), bit_cast<uint64_t>(8.00));
2116   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeZero), bit_cast<uint64_t>(-7.00));
2117   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2118   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2119   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2120   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2121   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2122   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2123   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeZero), bit_cast<uint64_t>(7.00));
2124 }
2125 
TEST(Arm64InsnTest,AsmRoundExactF32x4)2126 TEST(Arm64InsnTest, AsmRoundExactF32x4) {
2127   constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %0.4s, %1.4s");
2128   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2129   ASSERT_EQ(AsmFrintx(arg1, kFpcrRModeTieEven), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
2130   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2131   ASSERT_EQ(AsmFrintx(arg2, kFpcrRModeTieEven), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
2132   __uint128_t arg3 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2133   ASSERT_EQ(AsmFrintx(arg3, kFpcrRModeNegInf), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
2134   __uint128_t arg4 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2135   ASSERT_EQ(AsmFrintx(arg4, kFpcrRModeNegInf), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2136   __uint128_t arg5 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2137   ASSERT_EQ(AsmFrintx(arg5, kFpcrRModePosInf), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2138   __uint128_t arg6 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2139   ASSERT_EQ(AsmFrintx(arg6, kFpcrRModePosInf), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
2140   __uint128_t arg7 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2141   ASSERT_EQ(AsmFrintx(arg7, kFpcrRModeZero), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2142   __uint128_t arg8 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2143   ASSERT_EQ(AsmFrintx(arg8, kFpcrRModeZero), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2144 }
2145 
TEST(Arm64InsnTest,AsmRoundExactF64x2)2146 TEST(Arm64InsnTest, AsmRoundExactF64x2) {
2147   constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %0.2d, %1.2d");
2148   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
2149   ASSERT_EQ(AsmFrintx(arg1, kFpcrRModeTieEven), MakeF64x2(-8.00, -7.00));
2150   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
2151   ASSERT_EQ(AsmFrintx(arg2, kFpcrRModeTieEven), MakeF64x2(-6.00, -6.00));
2152   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
2153   ASSERT_EQ(AsmFrintx(arg3, kFpcrRModeTieEven), MakeF64x2(6.00, 6.00));
2154   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
2155   ASSERT_EQ(AsmFrintx(arg4, kFpcrRModeTieEven), MakeF64x2(7.00, 8.00));
2156   __uint128_t arg5 = MakeF64x2(-7.50, -6.75);
2157   ASSERT_EQ(AsmFrintx(arg5, kFpcrRModeNegInf), MakeF64x2(-8.00, -7.00));
2158   __uint128_t arg6 = MakeF64x2(-6.50, -6.25);
2159   ASSERT_EQ(AsmFrintx(arg6, kFpcrRModeNegInf), MakeF64x2(-7.00, -7.00));
2160   __uint128_t arg7 = MakeF64x2(6.25, 6.50);
2161   ASSERT_EQ(AsmFrintx(arg7, kFpcrRModeNegInf), MakeF64x2(6.00, 6.00));
2162   __uint128_t arg8 = MakeF64x2(6.75, 7.50);
2163   ASSERT_EQ(AsmFrintx(arg8, kFpcrRModeNegInf), MakeF64x2(6.00, 7.00));
2164   __uint128_t arg9 = MakeF64x2(-7.50, -6.75);
2165   ASSERT_EQ(AsmFrintx(arg9, kFpcrRModePosInf), MakeF64x2(-7.00, -6.00));
2166   __uint128_t arg10 = MakeF64x2(-6.50, -6.25);
2167   ASSERT_EQ(AsmFrintx(arg10, kFpcrRModePosInf), MakeF64x2(-6.00, -6.00));
2168   __uint128_t arg11 = MakeF64x2(6.25, 6.50);
2169   ASSERT_EQ(AsmFrintx(arg11, kFpcrRModePosInf), MakeF64x2(7.00, 7.00));
2170   __uint128_t arg12 = MakeF64x2(6.75, 7.50);
2171   ASSERT_EQ(AsmFrintx(arg12, kFpcrRModePosInf), MakeF64x2(7.00, 8.00));
2172   __uint128_t arg13 = MakeF64x2(-7.50, -6.75);
2173   ASSERT_EQ(AsmFrintx(arg13, kFpcrRModeZero), MakeF64x2(-7.00, -6.00));
2174   __uint128_t arg14 = MakeF64x2(-6.50, -6.25);
2175   ASSERT_EQ(AsmFrintx(arg14, kFpcrRModeZero), MakeF64x2(-6.00, -6.00));
2176   __uint128_t arg15 = MakeF64x2(6.25, 6.50);
2177   ASSERT_EQ(AsmFrintx(arg15, kFpcrRModeZero), MakeF64x2(6.00, 6.00));
2178   __uint128_t arg16 = MakeF64x2(6.75, 7.50);
2179   ASSERT_EQ(AsmFrintx(arg16, kFpcrRModeZero), MakeF64x2(6.00, 7.00));
2180 }
2181 
Fp32Compare(uint64_t arg1,uint64_t arg2)2182 uint64_t Fp32Compare(uint64_t arg1, uint64_t arg2) {
2183   uint64_t res;
2184   asm("fcmp %s1, %s2\n\t"
2185       "mrs %x0, nzcv"
2186       : "=r"(res)
2187       : "w"(arg1), "w"(arg2));
2188   return res;
2189 }
2190 
Fp64Compare(uint64_t arg1,uint64_t arg2)2191 uint64_t Fp64Compare(uint64_t arg1, uint64_t arg2) {
2192   uint64_t res;
2193   asm("fcmp %d1, %d2\n\t"
2194       "mrs %x0, nzcv"
2195       : "=r"(res)
2196       : "w"(arg1), "w"(arg2));
2197   return res;
2198 }
2199 
MakeNZCV(uint64_t nzcv)2200 constexpr uint64_t MakeNZCV(uint64_t nzcv) {
2201   return nzcv << 28;
2202 }
2203 
TEST(Arm64InsnTest,Fp32Compare)2204 TEST(Arm64InsnTest, Fp32Compare) {
2205   // NaN and 1.83
2206   ASSERT_EQ(Fp32Compare(0x7fc00000ULL, 0x3fea3d71ULL), MakeNZCV(0b0011));
2207 
2208   // 6.31 == 6.31
2209   ASSERT_EQ(Fp32Compare(0x40c9eb85ULL, 0x40c9eb85ULL), MakeNZCV(0b0110));
2210 
2211   // 1.23 < 2.34
2212   ASSERT_EQ(Fp32Compare(0x3f9d70a4ULL, 0x4015c28fULL), MakeNZCV(0b1000));
2213 
2214   // 5.25 > 2.94
2215   ASSERT_EQ(Fp32Compare(0x40a80000ULL, 0x403c28f6ULL), MakeNZCV(0b0010));
2216 }
2217 
TEST(Arm64InsnTest,Fp32CompareZero)2218 TEST(Arm64InsnTest, Fp32CompareZero) {
2219   constexpr auto Fp32CompareZero = ASM_INSN_WRAP_FUNC_R_RES_W_ARG(
2220       "fcmp %s1, #0.0\n\t"
2221       "mrs %x0, nzcv");
2222 
2223   // NaN and 0.00
2224   ASSERT_EQ(Fp32CompareZero(0x7fa00000ULL), MakeNZCV(0b0011));
2225 
2226   // 0.00 == 0.00
2227   ASSERT_EQ(Fp32CompareZero(0x00000000ULL), MakeNZCV(0b0110));
2228 
2229   // -2.67 < 0.00
2230   ASSERT_EQ(Fp32CompareZero(0xc02ae148ULL), MakeNZCV(0b1000));
2231 
2232   // 1.56 > 0.00
2233   ASSERT_EQ(Fp32CompareZero(0x3fc7ae14ULL), MakeNZCV(0b0010));
2234 }
2235 
TEST(Arm64InsnTest,Fp64Compare)2236 TEST(Arm64InsnTest, Fp64Compare) {
2237   // NaN and 1.19
2238   ASSERT_EQ(Fp64Compare(0x7ff8000000000000ULL, 0x3ff30a3d70a3d70aULL), MakeNZCV(0b0011));
2239 
2240   // 8.42 == 8.42
2241   ASSERT_EQ(Fp64Compare(0x4020d70a3d70a3d7ULL, 0x4020d70a3d70a3d7ULL), MakeNZCV(0b0110));
2242 
2243   // 0.50 < 1.00
2244   ASSERT_EQ(Fp64Compare(0x3fe0000000000000ULL, 0x3ff0000000000000ULL), MakeNZCV(0b1000));
2245 
2246   // 7.38 > 1.54
2247   ASSERT_EQ(Fp64Compare(0x401d851eb851eb85ULL, 0x3ff8a3d70a3d70a4ULL), MakeNZCV(0b0010));
2248 }
2249 
TEST(Arm64InsnTest,Fp64CompareZero)2250 TEST(Arm64InsnTest, Fp64CompareZero) {
2251   constexpr auto Fp64CompareZero = ASM_INSN_WRAP_FUNC_R_RES_W_ARG(
2252       "fcmp %d1, #0.0\n\t"
2253       "mrs %x0, nzcv");
2254 
2255   // NaN and 0.00
2256   ASSERT_EQ(Fp64CompareZero(0x7ff4000000000000ULL), MakeNZCV(0b0011));
2257 
2258   // 0.00 == 0.00
2259   ASSERT_EQ(Fp64CompareZero(0x0000000000000000ULL), MakeNZCV(0b0110));
2260 
2261   // -7.23 < 0.00
2262   ASSERT_EQ(Fp64CompareZero(0xc01ceb851eb851ecULL), MakeNZCV(0b1000));
2263 
2264   // 5.39 > 0.00
2265   ASSERT_EQ(Fp64CompareZero(0x40158f5c28f5c28fULL), MakeNZCV(0b0010));
2266 }
2267 
Fp32CompareIfEqualOrSetAllFlags(float arg1,float arg2,uint64_t nzcv)2268 uint64_t Fp32CompareIfEqualOrSetAllFlags(float arg1, float arg2, uint64_t nzcv) {
2269   asm("msr nzcv, %x0\n\t"
2270       "fccmp %s2, %s3, #15, eq\n\t"
2271       "mrs %x0, nzcv\n\t"
2272       : "=r"(nzcv)
2273       : "0"(nzcv), "w"(arg1), "w"(arg2));
2274   return nzcv;
2275 }
2276 
TEST(Arm64InsnTest,Fp32ConditionalCompare)2277 TEST(Arm64InsnTest, Fp32ConditionalCompare) {
2278   // Comparison is performed.
2279   constexpr uint64_t kEqual = MakeNZCV(0b0100);
2280   constexpr float kNan = std::numeric_limits<float>::quiet_NaN();
2281   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 1.0f, kEqual), MakeNZCV(0b0110));
2282   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 2.0f, kEqual), MakeNZCV(0b1000));
2283   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(2.0f, 1.0f, kEqual), MakeNZCV(0b0010));
2284   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(kNan, 1.0f, kEqual), MakeNZCV(0b0011));
2285   // Comparison is not performed; alt-nzcv is returned.
2286   constexpr uint64_t kNotEqual = MakeNZCV(0b0000);
2287   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 1.0f, kNotEqual), MakeNZCV(0b1111));
2288   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 2.0f, kNotEqual), MakeNZCV(0b1111));
2289   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(2.0f, 1.0f, kNotEqual), MakeNZCV(0b1111));
2290   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(kNan, 1.0f, kNotEqual), MakeNZCV(0b1111));
2291 }
2292 
Fp64CompareIfEqualOrSetAllFlags(double arg1,double arg2,uint64_t nzcv)2293 uint64_t Fp64CompareIfEqualOrSetAllFlags(double arg1, double arg2, uint64_t nzcv) {
2294   asm("msr nzcv, %x0\n\t"
2295       "fccmp %d2, %d3, #15, eq\n\t"
2296       "mrs %x0, nzcv\n\t"
2297       : "=r"(nzcv)
2298       : "0"(nzcv), "w"(arg1), "w"(arg2));
2299   return nzcv;
2300 }
2301 
TEST(Arm64InsnTest,Fp64ConditionalCompare)2302 TEST(Arm64InsnTest, Fp64ConditionalCompare) {
2303   // Comparison is performed.
2304   constexpr uint64_t kEqual = MakeNZCV(0b0100);
2305   constexpr double kNan = std::numeric_limits<double>::quiet_NaN();
2306   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 1.0, kEqual), MakeNZCV(0b0110));
2307   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 2.0, kEqual), MakeNZCV(0b1000));
2308   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(2.0, 1.0, kEqual), MakeNZCV(0b0010));
2309   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(kNan, 1.0, kEqual), MakeNZCV(0b0011));
2310   // Comparison is not performed; alt-nzcv is returned.
2311   constexpr uint64_t kNotEqual = MakeNZCV(0b0000);
2312   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 1.0, kNotEqual), MakeNZCV(0b1111));
2313   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 2.0, kNotEqual), MakeNZCV(0b1111));
2314   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(2.0, 1.0, kNotEqual), MakeNZCV(0b1111));
2315   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(kNan, 1.0f, kNotEqual), MakeNZCV(0b1111));
2316 }
2317 
TEST(Arm64InsnTest,ConvertFp32ToFp64)2318 TEST(Arm64InsnTest, ConvertFp32ToFp64) {
2319   uint64_t arg = 0x40cd70a4ULL;  // 6.42 in float
2320   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %d0, %s1")(arg);
2321   ASSERT_EQ(res, MakeUInt128(0x4019ae1480000000ULL, 0U));
2322 }
2323 
TEST(Arm64InsnTest,ConvertFp64ToFp32)2324 TEST(Arm64InsnTest, ConvertFp64ToFp32) {
2325   uint64_t arg = 0x401a0a3d70a3d70aULL;  // 6.51 in double
2326   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %s0, %d1")(arg);
2327   ASSERT_EQ(res, MakeUInt128(0x40d051ecULL, 0U));
2328 }
2329 
TEST(Arm64InsnTest,ConvertFp32ToFp16)2330 TEST(Arm64InsnTest, ConvertFp32ToFp16) {
2331   constexpr auto AsmFcvt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %h0, %s1");
2332   EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(2.5f)), MakeUInt128(0x4100U, 0U));
2333   EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(4.5f)), MakeUInt128(0x4480U, 0U));
2334   EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(8.5f)), MakeUInt128(0x4840U, 0U));
2335   EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(16.5f)), MakeUInt128(0x4c20U, 0U));
2336 }
2337 
TEST(Arm64InsnTest,ConvertFp16ToFp32)2338 TEST(Arm64InsnTest, ConvertFp16ToFp32) {
2339   uint64_t arg = 0x4100U;
2340   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %s0, %h1")(arg);
2341   ASSERT_EQ(res, bit_cast<uint32_t>(2.5f));
2342 }
2343 
TEST(Arm64InsnTest,ConvertFp64ToFp16)2344 TEST(Arm64InsnTest, ConvertFp64ToFp16) {
2345   uint64_t arg = bit_cast<uint64_t>(2.5);
2346   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %h0, %d1")(arg);
2347   ASSERT_EQ(res, MakeUInt128(0x4100U, 0U));
2348 }
2349 
TEST(Arm64InsnTest,ConvertFp16ToFp64)2350 TEST(Arm64InsnTest, ConvertFp16ToFp64) {
2351   uint64_t arg = 0x4100U;
2352   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %d0, %h1")(arg);
2353   ASSERT_EQ(res, bit_cast<uint64_t>(2.5));
2354 }
2355 
TEST(Arm64InsnTest,ConvertToNarrowF64F32x2)2356 TEST(Arm64InsnTest, ConvertToNarrowF64F32x2) {
2357   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtn %0.2s, %1.2d");
2358   ASSERT_EQ(AsmFcvtn(MakeF64x2(2.0, 3.0)), MakeF32x4(2.0f, 3.0f, 0.0f, 0.0f));
2359   // Overflow or inf arguments result in inf.
2360   __uint128_t res = AsmFcvtn(
2361       MakeF64x2(std::numeric_limits<double>::max(), std::numeric_limits<double>::infinity()));
2362   ASSERT_EQ(res,
2363             MakeF32x4(std::numeric_limits<float>::infinity(),
2364                       std::numeric_limits<float>::infinity(),
2365                       0.0f,
2366                       0.0f));
2367   res = AsmFcvtn(
2368       MakeF64x2(std::numeric_limits<double>::lowest(), -std::numeric_limits<double>::infinity()));
2369   ASSERT_EQ(res,
2370             MakeF32x4(-std::numeric_limits<float>::infinity(),
2371                       -std::numeric_limits<float>::infinity(),
2372                       0.0f,
2373                       0.0f));
2374 }
2375 
TEST(Arm64InsnTest,ConvertToNarrowF64F32x2Upper)2376 TEST(Arm64InsnTest, ConvertToNarrowF64F32x2Upper) {
2377   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtn2 %0.4s, %1.2d");
2378   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
2379   __uint128_t arg2 = MakeF32x4(4.0f, 5.0f, 6.0f, 7.0f);
2380   ASSERT_EQ(AsmFcvtn(arg1, arg2), MakeF32x4(4.0f, 5.0f, 2.0f, 3.0f));
2381 }
2382 
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32)2383 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32) {
2384   constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtxn %s0, %d1");
2385   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(2.0)), bit_cast<uint32_t>(2.0f));
2386   // Overflow is saturated.
2387   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::max())),
2388             bit_cast<uint32_t>(std::numeric_limits<float>::max()));
2389   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::lowest())),
2390             bit_cast<uint32_t>(std::numeric_limits<float>::lowest()));
2391   // inf is converted to inf.
2392   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::infinity())),
2393             bit_cast<uint32_t>(std::numeric_limits<float>::infinity()));
2394   // -inf is converted to -inf.
2395   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(-std::numeric_limits<double>::infinity())),
2396             bit_cast<uint32_t>(-std::numeric_limits<float>::infinity()));
2397 }
2398 
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32x2)2399 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32x2) {
2400   constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtxn %0.2s, %1.2d");
2401   __uint128_t res = AsmFcvtxn(MakeF64x2(2.0, 3.0));
2402   ASSERT_EQ(res, MakeF32x4(2.0f, 3.0f, 0.0f, 0.0f));
2403 }
2404 
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32x2Upper)2405 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32x2Upper) {
2406   constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtxn2 %0.4s, %1.2d");
2407   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
2408   __uint128_t arg2 = MakeF32x4(4.0f, 5.0f, 6.0f, 7.0f);
2409   ASSERT_EQ(AsmFcvtxn(arg1, arg2), MakeF32x4(4.0f, 5.0f, 2.0f, 3.0f));
2410 }
2411 
TEST(Arm64InsnTest,ConvertToWiderF32F64x2Lower)2412 TEST(Arm64InsnTest, ConvertToWiderF32F64x2Lower) {
2413   constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl %0.2d, %1.2s");
2414   __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2415   ASSERT_EQ(AsmFcvtl(arg), MakeF64x2(2.0, 3.0));
2416 }
2417 
TEST(Arm64InsnTest,ConvertToWiderF32F64x2Upper)2418 TEST(Arm64InsnTest, ConvertToWiderF32F64x2Upper) {
2419   constexpr auto AsmFcvtl2 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl2 %0.2d, %1.4s");
2420   __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2421   ASSERT_EQ(AsmFcvtl2(arg), MakeF64x2(4.0, 5.0));
2422 }
2423 
TEST(Arm64InsnTest,ConvertToWiderF16F32x4Lower)2424 TEST(Arm64InsnTest, ConvertToWiderF16F32x4Lower) {
2425   constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl %0.4s, %1.4h");
2426   // 4xF16 in the lower half.
2427   __uint128_t arg = MakeUInt128(0x4c20'4840'4480'4100ULL, 0);
2428   ASSERT_EQ(AsmFcvtl(arg), MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f));
2429 }
2430 
TEST(Arm64InsnTest,ConvertToWiderF16F32x4Upper)2431 TEST(Arm64InsnTest, ConvertToWiderF16F32x4Upper) {
2432   constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl2 %0.4s, %1.8h");
2433   // 4xF16 in the upper half.
2434   __uint128_t arg = MakeUInt128(0, 0x4c20'4840'4480'4100ULL);
2435   ASSERT_EQ(AsmFcvtl(arg), MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f));
2436 }
2437 
TEST(Arm64InsnTest,ConvertToNarrowF32F16x4Lower)2438 TEST(Arm64InsnTest, ConvertToNarrowF32F16x4Lower) {
2439   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtn %0.4h, %1.4s");
2440   __uint128_t arg = MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f);
2441   // 4xF16 in the lower half.
2442   ASSERT_EQ(AsmFcvtn(arg), MakeUInt128(0x4c20'4840'4480'4100ULL, 0));
2443 }
2444 
TEST(Arm64InsnTest,ConvertToNarrowF32F16x4Upper)2445 TEST(Arm64InsnTest, ConvertToNarrowF32F16x4Upper) {
2446   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtn2 %0.8h, %1.4s");
2447   __uint128_t arg1 = MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f);
2448   __uint128_t arg2 = MakeF32x4(3.0f, 5.0f, 7.0f, 11.0f);
2449   // 4xF16 in the upper half, lower half preserved.
2450   ASSERT_EQ(AsmFcvtn(arg1, arg2), MakeUInt128(uint64_t(arg2), 0x4c20'4840'4480'4100ULL));
2451 }
2452 
TEST(Arm64InsnTest,AbsF32)2453 TEST(Arm64InsnTest, AbsF32) {
2454   uint32_t arg = 0xc1273333U;  // -10.45 in float
2455   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %s0, %s1")(arg);
2456   ASSERT_EQ(res, MakeUInt128(0x41273333ULL, 0U));  // 10.45 in float
2457 }
2458 
TEST(Arm64InsnTest,AbsF64)2459 TEST(Arm64InsnTest, AbsF64) {
2460   uint64_t arg = 0xc03de8f5c28f5c29ULL;  // -29.91 in double
2461   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %d0, %d1")(arg);
2462   ASSERT_EQ(res, MakeUInt128(0x403de8f5c28f5c29ULL, 0U));  // 29.91 in double
2463 }
2464 
TEST(Arm64InsnTest,AbsF32x4)2465 TEST(Arm64InsnTest, AbsF32x4) {
2466   constexpr auto AsmFabs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %0.4s, %1.4s");
2467   __uint128_t arg = MakeF32x4(-0.0f, 0.0f, 3.0f, -7.0f);
2468   ASSERT_EQ(AsmFabs(arg), MakeF32x4(0.0f, 0.0f, 3.0f, 7.0f));
2469 }
2470 
TEST(Arm64InsnTest,AbsF64x2)2471 TEST(Arm64InsnTest, AbsF64x2) {
2472   constexpr auto AsmFabs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %0.2d, %1.2d");
2473   __uint128_t arg = MakeF64x2(-0.0, 3.0);
2474   ASSERT_EQ(AsmFabs(arg), MakeF64x2(0.0, 3.0));
2475 }
2476 
TEST(Arm64InsnTest,AbdF32)2477 TEST(Arm64InsnTest, AbdF32) {
2478   uint32_t arg1 = 0x4181851fU;  // 16.19 in float
2479   uint32_t arg2 = 0x41211eb8U;  // 10.06 in float
2480   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %s0, %s1, %s2")(arg1, arg2);
2481   ASSERT_EQ(res, MakeUInt128(0x40c3d70cULL, 0U));  // 6.12 in float
2482 }
2483 
TEST(Arm64InsnTest,AbdF64)2484 TEST(Arm64InsnTest, AbdF64) {
2485   constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %d0, %d1, %d2");
2486   uint64_t arg1 = 0x403828f5c28f5c29U;  // 24.16 in double
2487   uint64_t arg2 = 0x4027d70a3d70a3d7U;  // 11.92 in double
2488   __uint128_t res = AsmFabd(arg1, arg2);
2489   ASSERT_EQ(res, MakeUInt128(0x40287ae147ae147bULL, 0U));  // 12.24 in double
2490 }
2491 
TEST(Arm64InsnTest,AbdF32x4)2492 TEST(Arm64InsnTest, AbdF32x4) {
2493   constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %0.4s, %1.4s, %2.4s");
2494   __uint128_t arg1 = MakeF32x4(1.0f, 5.0f, -3.0f, -2.0f);
2495   __uint128_t arg2 = MakeF32x4(-1.0f, 2.0f, -5.0f, 3.0f);
2496   __uint128_t res = AsmFabd(arg1, arg2);
2497   ASSERT_EQ(res, MakeF32x4(2.0f, 3.0f, 2.0f, 5.0f));
2498 }
2499 
TEST(Arm64InsnTest,AbdF64x2)2500 TEST(Arm64InsnTest, AbdF64x2) {
2501   constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %0.2d, %1.2d, %2.2d");
2502   __uint128_t arg1 = MakeF64x2(5.0, -2.0);
2503   __uint128_t arg2 = MakeF64x2(4.0, 3.0);
2504   __uint128_t res = AsmFabd(arg1, arg2);
2505   ASSERT_EQ(res, MakeF64x2(1.0, 5.0));
2506 }
2507 
TEST(Arm64InsnTest,NegF32)2508 TEST(Arm64InsnTest, NegF32) {
2509   uint32_t arg = 0x40eeb852U;  // 7.46 in float
2510   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %s0, %s1")(arg);
2511   ASSERT_EQ(res, MakeUInt128(0xc0eeb852ULL, 0U));  // -7.46 in float
2512 }
2513 
TEST(Arm64InsnTest,NegF64)2514 TEST(Arm64InsnTest, NegF64) {
2515   uint64_t arg = 0x4054b28f5c28f5c3ULL;  // 82.79 in double
2516   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %d0, %d1")(arg);
2517   ASSERT_EQ(res, MakeUInt128(0xc054b28f5c28f5c3ULL, 0U));  // -82.79 in double
2518 }
2519 
TEST(Arm64InsnTest,NegF32x4)2520 TEST(Arm64InsnTest, NegF32x4) {
2521   constexpr auto AsmFneg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %0.4s, %1.4s");
2522   __uint128_t arg = MakeF32x4(-0.0f, 0.0f, 1.0f, -3.0f);
2523   ASSERT_EQ(AsmFneg(arg), MakeF32x4(0.0f, -0.0f, -1.0f, 3.0f));
2524 }
2525 
TEST(Arm64InsnTest,NegF64x2)2526 TEST(Arm64InsnTest, NegF64x2) {
2527   constexpr auto AsmFneg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %0.2d, %1.2d");
2528   __uint128_t arg = MakeF64x2(0.0, 3.0);
2529   ASSERT_EQ(AsmFneg(arg), MakeF64x2(-0.0, -3.0));
2530 }
2531 
TEST(Arm64InsnTest,SqrtF32)2532 TEST(Arm64InsnTest, SqrtF32) {
2533   uint32_t arg = 0x41f3cac1U;  // 30.474 in float
2534   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %s0, %s1")(arg);
2535   ASSERT_EQ(res, MakeUInt128(0x40b0a683ULL, 0U));  // 5.5203261 in float
2536 }
2537 
TEST(Arm64InsnTest,SqrtF64)2538 TEST(Arm64InsnTest, SqrtF64) {
2539   uint64_t arg = 0x403d466666666666ULL;  // 29.275 in double
2540   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %d0, %d1")(arg);
2541   ASSERT_EQ(res, MakeUInt128(0x4015a47e3392efb8ULL, 0U));  // 5.41... in double
2542 }
2543 
TEST(Arm64InsnTest,SqrtF32x4)2544 TEST(Arm64InsnTest, SqrtF32x4) {
2545   constexpr auto AsmSqrt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %0.4s, %1.4s");
2546   __uint128_t arg = MakeF32x4(0.0f, 1.0f, 4.0f, 9.0f);
2547   ASSERT_EQ(AsmSqrt(arg), MakeF32x4(0.0f, 1.0f, 2.0f, 3.0f));
2548 }
2549 
TEST(Arm64InsnTest,RecipEstimateF32)2550 TEST(Arm64InsnTest, RecipEstimateF32) {
2551   constexpr auto AsmFrecpe = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frecpe %s0, %s1");
2552   ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(0.25f)), bit_cast<uint32_t>(3.9921875f));
2553   ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(0.50f)), bit_cast<uint32_t>(1.99609375f));
2554   ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(2.00f)), bit_cast<uint32_t>(0.4990234375f));
2555   ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(4.00f)), bit_cast<uint32_t>(0.24951171875f));
2556 }
2557 
TEST(Arm64InsnTest,RecipEstimateF32x4)2558 TEST(Arm64InsnTest, RecipEstimateF32x4) {
2559   constexpr auto AsmFrecpe = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frecpe %0.4s, %1.4s");
2560   __uint128_t res = AsmFrecpe(MakeF32x4(0.25f, 0.50f, 2.00f, 4.00f));
2561   ASSERT_EQ(res, MakeF32x4(3.9921875f, 1.99609375f, 0.4990234375f, 0.24951171875f));
2562 }
2563 
TEST(Arm64InsnTest,RecipStepF32)2564 TEST(Arm64InsnTest, RecipStepF32) {
2565   constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %s0, %s1, %s2");
2566   __uint128_t res1 = AsmFrecps(bit_cast<uint32_t>(1.50f), bit_cast<uint32_t>(0.50f));
2567   ASSERT_EQ(res1, bit_cast<uint32_t>(1.25f));
2568   __uint128_t res2 = AsmFrecps(bit_cast<uint32_t>(2.00f), bit_cast<uint32_t>(0.50f));
2569   ASSERT_EQ(res2, bit_cast<uint32_t>(1.00f));
2570   __uint128_t res3 = AsmFrecps(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.25f));
2571   ASSERT_EQ(res3, bit_cast<uint32_t>(1.25f));
2572   __uint128_t res4 = AsmFrecps(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.50f));
2573   ASSERT_EQ(res4, bit_cast<uint32_t>(0.50f));
2574 }
2575 
TEST(Arm64InsnTest,RecipStepF64)2576 TEST(Arm64InsnTest, RecipStepF64) {
2577   constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %d0, %d1, %d2");
2578   __uint128_t res1 = AsmFrecps(bit_cast<uint64_t>(1.50), bit_cast<uint64_t>(0.50));
2579   ASSERT_EQ(res1, bit_cast<uint64_t>(1.25));
2580   __uint128_t res2 = AsmFrecps(bit_cast<uint64_t>(2.00), bit_cast<uint64_t>(0.50));
2581   ASSERT_EQ(res2, bit_cast<uint64_t>(1.00));
2582   __uint128_t res3 = AsmFrecps(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.25));
2583   ASSERT_EQ(res3, bit_cast<uint64_t>(1.25));
2584   __uint128_t res4 = AsmFrecps(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.50));
2585   ASSERT_EQ(res4, bit_cast<uint64_t>(0.50));
2586 }
2587 
TEST(Arm64InsnTest,RecipStepF32x4)2588 TEST(Arm64InsnTest, RecipStepF32x4) {
2589   constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %0.4s, %1.4s, %2.4s");
2590   __uint128_t arg1 = MakeF32x4(1.50f, 2.00f, 3.00f, 3.00f);
2591   __uint128_t arg2 = MakeF32x4(0.50f, 0.50f, 0.25f, 0.50f);
2592   __uint128_t res = AsmFrecps(arg1, arg2);
2593   ASSERT_EQ(res, MakeF32x4(1.25f, 1.00f, 1.25f, 0.50f));
2594 }
2595 
TEST(Arm64InsnTest,RecipStepF64x2)2596 TEST(Arm64InsnTest, RecipStepF64x2) {
2597   constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %0.2d, %1.2d, %2.2d");
2598   __uint128_t arg1 = MakeF64x2(1.50, 2.00);
2599   __uint128_t arg2 = MakeF64x2(0.50, 0.50);
2600   ASSERT_EQ(AsmFrecps(arg1, arg2), MakeF64x2(1.25, 1.00));
2601   __uint128_t arg3 = MakeF64x2(3.00, 3.00);
2602   __uint128_t arg4 = MakeF64x2(0.25, 0.50);
2603   ASSERT_EQ(AsmFrecps(arg3, arg4), MakeF64x2(1.25, 0.50));
2604 }
2605 
TEST(Arm64InsnTest,RecipSqrtEstimateF32)2606 TEST(Arm64InsnTest, RecipSqrtEstimateF32) {
2607   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %s0, %s1");
2608   ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(2.0f)), bit_cast<uint32_t>(0.705078125f));
2609   ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(3.0f)), bit_cast<uint32_t>(0.576171875f));
2610   ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(4.0f)), bit_cast<uint32_t>(0.4990234375f));
2611   ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(5.0f)), bit_cast<uint32_t>(0.4462890625f));
2612 }
2613 
TEST(Arm64InsnTest,RecipSqrtEstimateF32x4)2614 TEST(Arm64InsnTest, RecipSqrtEstimateF32x4) {
2615   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.4s, %1.4s");
2616   __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2617   __uint128_t res = AsmFrsqrte(arg);
2618   ASSERT_EQ(res, MakeF32x4(0.705078125f, 0.576171875f, 0.4990234375f, 0.4462890625f));
2619 }
2620 
TEST(Arm64InsnTest,RecipSqrtEstimateF64)2621 TEST(Arm64InsnTest, RecipSqrtEstimateF64) {
2622   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %d0, %d1");
2623   ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(2.0)), bit_cast<uint64_t>(0.705078125));
2624   ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(3.0)), bit_cast<uint64_t>(0.576171875));
2625   ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(4.0)), bit_cast<uint64_t>(0.4990234375));
2626   ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(5.0)), bit_cast<uint64_t>(0.4462890625));
2627 }
2628 
TEST(Arm64InsnTest,RecipSqrtEstimateF64x2)2629 TEST(Arm64InsnTest, RecipSqrtEstimateF64x2) {
2630   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.2d, %1.2d");
2631   __uint128_t arg = MakeF64x2(2.0, 3.0);
2632   __uint128_t res = AsmFrsqrte(arg);
2633   ASSERT_EQ(res, MakeUInt128(bit_cast<uint64_t>(0.705078125), bit_cast<uint64_t>(0.576171875)));
2634 }
2635 
TEST(Arm64InsnTest,RecipSqrtStepF32)2636 TEST(Arm64InsnTest, RecipSqrtStepF32) {
2637   constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %s0, %s1, %s2");
2638   __uint128_t res1 = AsmFrsqrts(bit_cast<uint32_t>(1.50f), bit_cast<uint32_t>(0.50f));
2639   ASSERT_EQ(res1, bit_cast<uint32_t>(1.125f));
2640   __uint128_t res2 = AsmFrsqrts(bit_cast<uint32_t>(2.00f), bit_cast<uint32_t>(0.50f));
2641   ASSERT_EQ(res2, bit_cast<uint32_t>(1.000f));
2642   __uint128_t res3 = AsmFrsqrts(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.25f));
2643   ASSERT_EQ(res3, bit_cast<uint32_t>(1.125f));
2644   __uint128_t res4 = AsmFrsqrts(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.50f));
2645   ASSERT_EQ(res4, bit_cast<uint32_t>(0.750f));
2646 }
2647 
TEST(Arm64InsnTest,RecipSqrtStepF64)2648 TEST(Arm64InsnTest, RecipSqrtStepF64) {
2649   constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %d0, %d1, %d2");
2650   __uint128_t res1 = AsmFrsqrts(bit_cast<uint64_t>(1.50), bit_cast<uint64_t>(0.50));
2651   ASSERT_EQ(res1, bit_cast<uint64_t>(1.125));
2652   __uint128_t res2 = AsmFrsqrts(bit_cast<uint64_t>(2.00), bit_cast<uint64_t>(0.50));
2653   ASSERT_EQ(res2, bit_cast<uint64_t>(1.000));
2654   __uint128_t res3 = AsmFrsqrts(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.25));
2655   ASSERT_EQ(res3, bit_cast<uint64_t>(1.125));
2656   __uint128_t res4 = AsmFrsqrts(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.50));
2657   ASSERT_EQ(res4, bit_cast<uint64_t>(0.750));
2658 }
2659 
TEST(Arm64InsnTest,RecipSqrtStepF32x4)2660 TEST(Arm64InsnTest, RecipSqrtStepF32x4) {
2661   constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %0.4s, %1.4s, %2.4s");
2662   __uint128_t arg1 = MakeF32x4(1.50f, 2.00f, 3.00f, 3.00f);
2663   __uint128_t arg2 = MakeF32x4(0.50f, 0.50f, 0.25f, 0.50f);
2664   __uint128_t res = AsmFrsqrts(arg1, arg2);
2665   ASSERT_EQ(res, MakeF32x4(1.125f, 1.000f, 1.125f, 0.750f));
2666 }
2667 
TEST(Arm64InsnTest,RecipSqrtStepF64x2)2668 TEST(Arm64InsnTest, RecipSqrtStepF64x2) {
2669   constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %0.2d, %1.2d, %2.2d");
2670   __uint128_t arg1 = MakeF64x2(1.50, 2.00);
2671   __uint128_t arg2 = MakeF64x2(0.50, 0.50);
2672   ASSERT_EQ(AsmFrsqrts(arg1, arg2), MakeF64x2(1.125, 1.000));
2673   __uint128_t arg3 = MakeF64x2(3.00, 3.00);
2674   __uint128_t arg4 = MakeF64x2(0.25, 0.50);
2675   ASSERT_EQ(AsmFrsqrts(arg3, arg4), MakeF64x2(1.125, 0.750));
2676 }
2677 
TEST(Arm64InsnTest,AddFp32)2678 TEST(Arm64InsnTest, AddFp32) {
2679   uint64_t fp_arg1 = 0x40d5c28fULL;  // 6.68 in float
2680   uint64_t fp_arg2 = 0x409f5c29ULL;  // 4.98 in float
2681   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %s0, %s1, %s2")(fp_arg1, fp_arg2);
2682   ASSERT_EQ(rd, MakeUInt128(0x413a8f5cULL, 0U));  // 11.66 in float
2683 }
2684 
TEST(Arm64InsnTest,AddFp64)2685 TEST(Arm64InsnTest, AddFp64) {
2686   uint64_t fp_arg1 = 0x402099999999999aULL;  // 8.30 in double
2687   uint64_t fp_arg2 = 0x4010ae147ae147aeULL;  // 4.17 in double
2688   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %d0, %d1, %d2")(fp_arg1, fp_arg2);
2689   ASSERT_EQ(rd, MakeUInt128(0x4028f0a3d70a3d71ULL, 0U));  // 12.47 in double
2690 }
2691 
TEST(Arm64InsnTest,AddF32x4)2692 TEST(Arm64InsnTest, AddF32x4) {
2693   constexpr auto AsmFadd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %0.4s, %1.4s, %2.4s");
2694   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2695   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2696   ASSERT_EQ(AsmFadd(arg1, arg2), MakeF32x4(3.0f, 3.0f, -1.0f, 5.0f));
2697 }
2698 
TEST(Arm64InsnTest,AddF64x2)2699 TEST(Arm64InsnTest, AddF64x2) {
2700   constexpr auto AsmFadd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %0.2d, %1.2d, %2.2d");
2701   __uint128_t arg1 = MakeF64x2(3.0, 5.0);
2702   __uint128_t arg2 = MakeF64x2(-4.0, 2.0);
2703   ASSERT_EQ(AsmFadd(arg1, arg2), MakeF64x2(-1.0, 7.0));
2704 }
2705 
TEST(Arm64InsnTest,AddPairwiseF32x2)2706 TEST(Arm64InsnTest, AddPairwiseF32x2) {
2707   constexpr auto AsmFaddp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("faddp %s0, %1.2s");
2708   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 8.0f);
2709   ASSERT_EQ(AsmFaddp(arg1), bit_cast<uint32_t>(3.0f));
2710 }
2711 
TEST(Arm64InsnTest,AddPairwiseF32x4)2712 TEST(Arm64InsnTest, AddPairwiseF32x4) {
2713   constexpr auto AsmFaddp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("faddp %0.4s, %1.4s, %2.4s");
2714   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2715   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2716   ASSERT_EQ(AsmFaddp(arg1, arg2), MakeF32x4(-1.0f, 7.0f, 7.0f, -3.0f));
2717 }
2718 
TEST(Arm64InsnTest,SubFp32)2719 TEST(Arm64InsnTest, SubFp32) {
2720   uint64_t fp_arg1 = 0x411f5c29ULL;  // 9.96 in float
2721   uint64_t fp_arg2 = 0x404851ecULL;  // 3.13 in float
2722   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %s0, %s1, %s2")(fp_arg1, fp_arg2);
2723   ASSERT_EQ(rd, MakeUInt128(0x40da8f5cULL, 0U));  // 6.83 in float
2724 }
2725 
TEST(Arm64InsnTest,SubFp64)2726 TEST(Arm64InsnTest, SubFp64) {
2727   uint64_t fp_arg1 = 0x401ee147ae147ae1ULL;  // 7.72 in double
2728   uint64_t fp_arg2 = 0x4015666666666666ULL;  // 5.35 in double
2729   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %d0, %d1, %d2")(fp_arg1, fp_arg2);
2730   ASSERT_EQ(rd, MakeUInt128(0x4002f5c28f5c28f6ULL, 0U));  // 2.37 in double
2731 }
2732 
TEST(Arm64InsnTest,SubF32x4)2733 TEST(Arm64InsnTest, SubF32x4) {
2734   constexpr auto AsmFsub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %0.4s, %1.4s, %2.4s");
2735   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2736   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2737   ASSERT_EQ(AsmFsub(arg1, arg2), MakeF32x4(-9.0f, 1.0f, 15.0f, -5.0f));
2738 }
2739 
TEST(Arm64InsnTest,SubF64x2)2740 TEST(Arm64InsnTest, SubF64x2) {
2741   constexpr auto AsmFsub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %0.2d, %1.2d, %2.2d");
2742   __uint128_t arg1 = MakeF64x2(3.0, 5.0);
2743   __uint128_t arg2 = MakeF64x2(-4.0, 2.0);
2744   ASSERT_EQ(AsmFsub(arg1, arg2), MakeF64x2(7.0, 3.0));
2745 }
2746 
TEST(Arm64InsnTest,MaxFp32)2747 TEST(Arm64InsnTest, MaxFp32) {
2748   constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %s0, %s1, %s2");
2749   uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2750   uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2751 
2752   ASSERT_EQ(AsmFmax(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_three, 0, 0, 0));
2753   ASSERT_EQ(AsmFmax(kDefaultNaN32, fp_arg_three), kDefaultNaN32);
2754   ASSERT_EQ(AsmFmax(fp_arg_three, kDefaultNaN32), kDefaultNaN32);
2755 }
2756 
TEST(Arm64InsnTest,MaxFp64)2757 TEST(Arm64InsnTest, MaxFp64) {
2758   constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %d0, %d1, %d2");
2759   uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2760   uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2761 
2762   ASSERT_EQ(AsmFmax(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_three, 0U));
2763   ASSERT_EQ(AsmFmax(kDefaultNaN64, fp_arg_three), kDefaultNaN64);
2764   ASSERT_EQ(AsmFmax(fp_arg_three, kDefaultNaN64), kDefaultNaN64);
2765 }
2766 
TEST(Arm64InsnTest,MaxF32x4)2767 TEST(Arm64InsnTest, MaxF32x4) {
2768   constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %0.4s, %1.4s, %2.4s");
2769   __uint128_t arg1 = MakeF32x4(-0.0f, 2.0f, 3.0f, -4.0f);
2770   __uint128_t arg2 = MakeF32x4(0.0f, 1.0f, -3.0f, -3.0f);
2771   ASSERT_EQ(AsmFmax(arg1, arg2), MakeF32x4(0.0f, 2.0f, 3.0f, -3.0f));
2772 
2773   __uint128_t arg3 = MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32), 3.0f, -4.0f);
2774   __uint128_t arg4 = MakeF32x4(0.0f, 1.0f, -3.0f, bit_cast<float>(kDefaultNaN32));
2775   ASSERT_EQ(AsmFmax(arg3, arg4),
2776             MakeF32x4(0.0f, bit_cast<float>(kDefaultNaN32), 3.0f, bit_cast<float>(kDefaultNaN32)));
2777 }
2778 
TEST(Arm64InsnTest,MaxF64x2)2779 TEST(Arm64InsnTest, MaxF64x2) {
2780   constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %0.2d, %1.2d, %2.2d");
2781   __uint128_t arg1 = MakeF64x2(-0.0, 3.0);
2782   __uint128_t arg2 = MakeF64x2(0.0, -3.0);
2783   ASSERT_EQ(AsmFmax(arg1, arg2), MakeF64x2(0.0, 3.0));
2784 
2785   __uint128_t arg3 = MakeF64x2(bit_cast<double>(kDefaultNaN64), 3.0);
2786   __uint128_t arg4 = MakeF64x2(1.0, bit_cast<double>(kDefaultNaN64));
2787   ASSERT_EQ(AsmFmax(arg3, arg4),
2788             MakeF64x2(bit_cast<double>(kDefaultNaN64), bit_cast<double>(kDefaultNaN64)));
2789 }
2790 
TEST(Arm64InsnTest,MaxNumberFp32)2791 TEST(Arm64InsnTest, MaxNumberFp32) {
2792   constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %s0, %s1, %s2");
2793   uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2794   uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2795   uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2796 
2797   ASSERT_EQ(AsmFmaxnm(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_three, 0, 0, 0));
2798 
2799   ASSERT_EQ(AsmFmaxnm(fp_arg_two, kQuietNaN32), MakeU32x4(fp_arg_two, 0, 0, 0));
2800   ASSERT_EQ(AsmFmaxnm(fp_arg_minus_two, kQuietNaN32), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2801   ASSERT_EQ(AsmFmaxnm(kQuietNaN32, fp_arg_two), MakeU32x4(fp_arg_two, 0, 0, 0));
2802   ASSERT_EQ(AsmFmaxnm(kQuietNaN32, fp_arg_minus_two), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2803 }
2804 
TEST(Arm64InsnTest,MaxNumberFp64)2805 TEST(Arm64InsnTest, MaxNumberFp64) {
2806   constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %d0, %d1, %d2");
2807   uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2808   uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2809   uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2810 
2811   ASSERT_EQ(AsmFmaxnm(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_three, 0U));
2812 
2813   ASSERT_EQ(AsmFmaxnm(fp_arg_two, kQuietNaN64), MakeUInt128(fp_arg_two, 0U));
2814   ASSERT_EQ(AsmFmaxnm(fp_arg_minus_two, kQuietNaN64), MakeUInt128(fp_arg_minus_two, 0));
2815   ASSERT_EQ(AsmFmaxnm(kQuietNaN64, fp_arg_two), MakeUInt128(fp_arg_two, 0));
2816   ASSERT_EQ(AsmFmaxnm(kQuietNaN64, fp_arg_minus_two), MakeUInt128(fp_arg_minus_two, 0));
2817 }
2818 
TEST(Arm64InsnTest,MinNumberFp32)2819 TEST(Arm64InsnTest, MinNumberFp32) {
2820   constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %s0, %s1, %s2");
2821   uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2822   uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2823   uint32_t fp_arg_minus_two = bit_cast<uint32_t>(-2.0f);
2824 
2825   ASSERT_EQ(AsmFminnm(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_two, 0, 0, 0));
2826 
2827   ASSERT_EQ(AsmFminnm(fp_arg_two, kQuietNaN32), MakeU32x4(fp_arg_two, 0, 0, 0));
2828   ASSERT_EQ(AsmFminnm(fp_arg_minus_two, kQuietNaN32), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2829   ASSERT_EQ(AsmFminnm(kQuietNaN32, fp_arg_two), MakeU32x4(fp_arg_two, 0, 0, 0));
2830   ASSERT_EQ(AsmFminnm(kQuietNaN32, fp_arg_minus_two), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2831 }
2832 
TEST(Arm64InsnTest,MinNumberFp64)2833 TEST(Arm64InsnTest, MinNumberFp64) {
2834   constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %d0, %d1, %d2");
2835   uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2836   uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2837   uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2838 
2839   ASSERT_EQ(AsmFminnm(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_two, 0U));
2840 
2841   ASSERT_EQ(AsmFminnm(fp_arg_two, kQuietNaN64), MakeUInt128(fp_arg_two, 0U));
2842   ASSERT_EQ(AsmFminnm(fp_arg_minus_two, kQuietNaN64), MakeUInt128(fp_arg_minus_two, 0));
2843   ASSERT_EQ(AsmFminnm(kQuietNaN64, fp_arg_two), MakeUInt128(fp_arg_two, 0));
2844   ASSERT_EQ(AsmFminnm(kQuietNaN64, fp_arg_minus_two), MakeUInt128(fp_arg_minus_two, 0));
2845 }
2846 
TEST(Arm64InsnTest,MaxNumberF32x4)2847 TEST(Arm64InsnTest, MaxNumberF32x4) {
2848   constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %0.4s, %1.4s, %2.4s");
2849   __uint128_t arg1 = MakeF32x4(-1.0f, 2.0f, 3.0f, -4.0f);
2850   __uint128_t arg2 = MakeF32x4(2.0f, 1.0f, -3.0f, -3.0f);
2851   ASSERT_EQ(AsmFmaxnm(arg1, arg2), MakeF32x4(2.0f, 2.0f, 3.0f, -3.0f));
2852 
2853   __uint128_t arg3 =
2854       MakeU32x4(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f), kQuietNaN32, kQuietNaN32);
2855   __uint128_t arg4 =
2856       MakeU32x4(kQuietNaN32, kQuietNaN32, bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f));
2857   ASSERT_EQ(AsmFmaxnm(arg3, arg4), MakeF32x4(1.0f, -1.0f, 1.0f, -1.0f));
2858 
2859   __uint128_t arg5 = MakeU32x4(
2860       bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f), kSignalingNaN32_1, kQuietNaN32);
2861   __uint128_t arg6 = MakeU32x4(
2862       kSignalingNaN32_1, kQuietNaN32, bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f));
2863 }
2864 
TEST(Arm64InsnTest,MaxNumberF64x2)2865 TEST(Arm64InsnTest, MaxNumberF64x2) {
2866   constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %0.2d, %1.2d, %2.2d");
2867   __uint128_t arg1 = MakeF64x2(-1.0, -4.0);
2868   __uint128_t arg2 = MakeF64x2(2.0, -3.0);
2869   ASSERT_EQ(AsmFmaxnm(arg1, arg2), MakeF64x2(2.0, -3.0));
2870 
2871   __uint128_t arg3 = MakeUInt128(bit_cast<uint64_t>(1.0), kQuietNaN64);
2872   __uint128_t arg4 = MakeUInt128(kQuietNaN64, bit_cast<uint64_t>(-1.0));
2873   ASSERT_EQ(AsmFmaxnm(arg3, arg4), MakeF64x2(1.0, -1.0));
2874 }
2875 
TEST(Arm64InsnTest,MinNumberF32x4)2876 TEST(Arm64InsnTest, MinNumberF32x4) {
2877   constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %0.4s, %1.4s, %2.4s");
2878   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
2879   __uint128_t arg2 = MakeF32x4(-0.0f, 1.0f, -3.0f, -3.0f);
2880   ASSERT_EQ(AsmFminnm(arg1, arg2), MakeF32x4(-0.0f, 1.0f, -3.0f, -4.0f));
2881 
2882   __uint128_t arg3 =
2883       MakeU32x4(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f), kQuietNaN32, kQuietNaN32);
2884   __uint128_t arg4 =
2885       MakeU32x4(kQuietNaN32, kQuietNaN32, bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f));
2886   __uint128_t res = AsmFminnm(arg3, arg4);
2887   ASSERT_EQ(res, MakeF32x4(1.0f, -1.0f, 1.0f, -1.0f));
2888 }
2889 
TEST(Arm64InsnTest,MinNumberF64x2)2890 TEST(Arm64InsnTest, MinNumberF64x2) {
2891   constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %0.2d, %1.2d, %2.2d");
2892   __uint128_t arg1 = MakeF64x2(0.0, 3.0);
2893   __uint128_t arg2 = MakeF64x2(-0.0, -3.0);
2894   ASSERT_EQ(AsmFminnm(arg1, arg2), MakeF64x2(-0.0, -3.0));
2895 
2896   __uint128_t arg3 = MakeUInt128(bit_cast<uint64_t>(1.0), kQuietNaN64);
2897   __uint128_t arg4 = MakeUInt128(kQuietNaN64, bit_cast<uint64_t>(-1.0));
2898   __uint128_t res = AsmFminnm(arg3, arg4);
2899   ASSERT_EQ(res, MakeF64x2(1.0, -1.0));
2900 }
2901 
TEST(Arm64InsnTest,MinFp32)2902 TEST(Arm64InsnTest, MinFp32) {
2903   constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %s0, %s1, %s2");
2904   uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2905   uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2906 
2907   ASSERT_EQ(AsmFmin(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_two, 0, 0, 0));
2908   ASSERT_EQ(AsmFmin(kDefaultNaN32, fp_arg_three), kDefaultNaN32);
2909   ASSERT_EQ(AsmFmin(fp_arg_three, kDefaultNaN32), kDefaultNaN32);
2910 }
2911 
TEST(Arm64InsnTest,MinFp64)2912 TEST(Arm64InsnTest, MinFp64) {
2913   constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %d0, %d1, %d2");
2914   uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2915   uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2916 
2917   ASSERT_EQ(AsmFmin(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_two, 0U));
2918   ASSERT_EQ(AsmFmin(kDefaultNaN64, fp_arg_three), kDefaultNaN64);
2919   ASSERT_EQ(AsmFmin(fp_arg_three, kDefaultNaN64), kDefaultNaN64);
2920 }
2921 
TEST(Arm64InsnTest,MinF32x4)2922 TEST(Arm64InsnTest, MinF32x4) {
2923   constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %0.4s, %1.4s, %2.4s");
2924   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
2925   __uint128_t arg2 = MakeF32x4(-0.0f, 1.0f, -3.0f, -3.0f);
2926   ASSERT_EQ(AsmFmin(arg1, arg2), MakeF32x4(-0.0f, 1.0f, -3.0f, -4.0f));
2927 
2928   __uint128_t arg3 = MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32), 3.0f, -4.0f);
2929   __uint128_t arg4 = MakeF32x4(0.0f, 1.0f, -3.0f, bit_cast<float>(kDefaultNaN32));
2930   ASSERT_EQ(
2931       AsmFmin(arg3, arg4),
2932       MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32), -3.0f, bit_cast<float>(kDefaultNaN32)));
2933 }
2934 
TEST(Arm64InsnTest,MinF64x2)2935 TEST(Arm64InsnTest, MinF64x2) {
2936   constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %0.2d, %1.2d, %2.2d");
2937   __uint128_t arg1 = MakeF64x2(0.0, 3.0);
2938   __uint128_t arg2 = MakeF64x2(-0.0, -3.0);
2939   ASSERT_EQ(AsmFmin(arg1, arg2), MakeF64x2(-0.0, -3.0));
2940 
2941   __uint128_t arg3 = MakeF64x2(bit_cast<double>(kDefaultNaN64), 3.0);
2942   __uint128_t arg4 = MakeF64x2(1.0, bit_cast<double>(kDefaultNaN64));
2943   ASSERT_EQ(AsmFmin(arg3, arg4),
2944             MakeF64x2(bit_cast<double>(kDefaultNaN64), bit_cast<double>(kDefaultNaN64)));
2945 }
2946 
TEST(Arm64InsnTest,MaxPairwiseF32Scalar)2947 TEST(Arm64InsnTest, MaxPairwiseF32Scalar) {
2948   constexpr auto AsmFmaxp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxp %s0, %1.2s");
2949   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2950   ASSERT_EQ(AsmFmaxp(arg1), bit_cast<uint32_t>(2.0f));
2951 
2952   __uint128_t arg2 = MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, -0.0f);
2953   ASSERT_EQ(AsmFmaxp(arg2), kDefaultNaN32);
2954 }
2955 
TEST(Arm64InsnTest,MaxPairwiseF32x4)2956 TEST(Arm64InsnTest, MaxPairwiseF32x4) {
2957   constexpr auto AsmFmaxp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxp %0.4s, %1.4s, %2.4s");
2958   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2959   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2960   ASSERT_EQ(AsmFmaxp(arg1, arg2), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
2961 
2962   __uint128_t arg3 =
2963       MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, bit_cast<float>(kDefaultNaN32));
2964   __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2965   ASSERT_EQ(AsmFmaxp(arg3, arg4),
2966             MakeF32x4(bit_cast<float>(kDefaultNaN32), bit_cast<float>(kDefaultNaN32), 6.0f, 5.0f));
2967 }
2968 
TEST(Arm64InsnTest,MinPairwiseF32Scalar)2969 TEST(Arm64InsnTest, MinPairwiseF32Scalar) {
2970   constexpr auto AsmFminp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminp %s0, %1.2s");
2971   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2972   ASSERT_EQ(AsmFminp(arg1), bit_cast<uint32_t>(-3.0f));
2973 
2974   __uint128_t arg2 = MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, -0.0f);
2975   ASSERT_EQ(AsmFminp(arg2), kDefaultNaN32);
2976 }
2977 
TEST(Arm64InsnTest,MinPairwiseF32x4)2978 TEST(Arm64InsnTest, MinPairwiseF32x4) {
2979   constexpr auto AsmFminp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminp %0.4s, %1.4s, %2.4s");
2980   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2981   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2982   ASSERT_EQ(AsmFminp(arg1, arg2), MakeF32x4(-3.0f, -0.0f, 1.0f, -8.0f));
2983 
2984   __uint128_t arg3 =
2985       MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, bit_cast<float>(kDefaultNaN32));
2986   __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2987   ASSERT_EQ(AsmFminp(arg3, arg4),
2988             MakeF32x4(bit_cast<float>(kDefaultNaN32), bit_cast<float>(kDefaultNaN32), 1.0f, -8.0f));
2989 }
2990 
TEST(Arm64InsnTest,MaxPairwiseNumberF32Scalar)2991 TEST(Arm64InsnTest, MaxPairwiseNumberF32Scalar) {
2992   constexpr auto AsmFmaxnmp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxnmp %s0, %1.2s");
2993   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2994   ASSERT_EQ(AsmFmaxnmp(arg1), bit_cast<uint32_t>(2.0f));
2995 
2996   __uint128_t arg2 = MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, -0.0f);
2997   ASSERT_EQ(AsmFmaxnmp(arg2), bit_cast<uint32_t>(2.0f));
2998 }
2999 
TEST(Arm64InsnTest,MaxPairwiseNumberF32x4)3000 TEST(Arm64InsnTest, MaxPairwiseNumberF32x4) {
3001   constexpr auto AsmFmaxnmp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnmp %0.4s, %1.4s, %2.4s");
3002   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3003   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3004   ASSERT_EQ(AsmFmaxnmp(arg1, arg2), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3005 
3006   __uint128_t arg3 =
3007       MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, bit_cast<float>(kQuietNaN32));
3008   __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3009   ASSERT_EQ(AsmFmaxnmp(arg3, arg4), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3010 }
3011 
TEST(Arm64InsnTest,MinPairwiseNumberF32Scalar)3012 TEST(Arm64InsnTest, MinPairwiseNumberF32Scalar) {
3013   constexpr auto AsmFminnmp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminnmp %s0, %1.2s");
3014   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3015   ASSERT_EQ(AsmFminnmp(arg1), bit_cast<uint32_t>(-3.0f));
3016 
3017   __uint128_t arg2 = MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, -0.0f);
3018   ASSERT_EQ(AsmFminnmp(arg2), bit_cast<uint32_t>(2.0f));
3019 }
3020 
TEST(Arm64InsnTest,MinPairwiseNumberF32x4)3021 TEST(Arm64InsnTest, MinPairwiseNumberF32x4) {
3022   constexpr auto AsmFminnmp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnmp %0.4s, %1.4s, %2.4s");
3023   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3024   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3025   ASSERT_EQ(AsmFminnmp(arg1, arg2), MakeF32x4(-3.0f, -0.0f, 1.0f, -8.0f));
3026 
3027   __uint128_t arg3 =
3028       MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, bit_cast<float>(kQuietNaN32));
3029   __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3030   ASSERT_EQ(AsmFminnmp(arg3, arg4), MakeF32x4(2.0f, 7.0f, 1.0f, -8.0f));
3031 }
3032 
TEST(Arm64InsnTest,MaxAcrossF32x4)3033 TEST(Arm64InsnTest, MaxAcrossF32x4) {
3034   constexpr auto AsmFmaxv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxv %s0, %1.4s");
3035   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3036   ASSERT_EQ(AsmFmaxv(arg1), bit_cast<uint32_t>(3.0f));
3037 
3038   __uint128_t arg2 = MakeF32x4(0.0f, 2.0f, bit_cast<float>(kDefaultNaN32), -4.0f);
3039   ASSERT_EQ(AsmFmaxv(arg2), kDefaultNaN32);
3040 }
3041 
TEST(Arm64InsnTest,MinAcrossF32x4)3042 TEST(Arm64InsnTest, MinAcrossF32x4) {
3043   constexpr auto AsmFminv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminv %s0, %1.4s");
3044   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3045   ASSERT_EQ(AsmFminv(arg1), bit_cast<uint32_t>(-4.0f));
3046 
3047   __uint128_t arg2 = MakeF32x4(0.0f, 2.0f, bit_cast<float>(kDefaultNaN32), -4.0f);
3048   ASSERT_EQ(AsmFminv(arg2), kDefaultNaN32);
3049 }
3050 
TEST(Arm64InsnTest,MaxNumberAcrossF32x4)3051 TEST(Arm64InsnTest, MaxNumberAcrossF32x4) {
3052   constexpr auto AsmFmaxnmv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxnmv %s0, %1.4s");
3053   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3054   ASSERT_EQ(AsmFmaxnmv(arg1), bit_cast<uint32_t>(3.0f));
3055 
3056   __uint128_t arg2 = MakeF32x4(0.0f, bit_cast<float>(kQuietNaN32), 3.0f, -4.0f);
3057   ASSERT_EQ(AsmFmaxnmv(arg2), bit_cast<uint32_t>(3.0f));
3058 }
3059 
TEST(Arm64InsnTest,MinNumberAcrossF32x4)3060 TEST(Arm64InsnTest, MinNumberAcrossF32x4) {
3061   constexpr auto AsmFminnmv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminnmv %s0, %1.4s");
3062   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3063   ASSERT_EQ(AsmFminnmv(arg1), bit_cast<uint32_t>(-4.0f));
3064 
3065   __uint128_t arg2 = MakeF32x4(0.0f, bit_cast<float>(kQuietNaN32), 3.0f, -4.0f);
3066   ASSERT_EQ(AsmFminnmv(arg2), bit_cast<uint32_t>(-4.0f));
3067 }
3068 
TEST(Arm64InsnTest,MulFp32)3069 TEST(Arm64InsnTest, MulFp32) {
3070   uint64_t fp_arg1 = 0x40a1999aULL;  // 5.05 in float
3071   uint64_t fp_arg2 = 0x40dae148ULL;  // 6.84 in float
3072   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %s0, %s1, %s2")(fp_arg1, fp_arg2);
3073   ASSERT_EQ(rd, MakeUInt128(0x420a2b03ULL, 0U));  // 34.5420 in float
3074 }
3075 
TEST(Arm64InsnTest,MulFp64)3076 TEST(Arm64InsnTest, MulFp64) {
3077   uint64_t fp_arg1 = 0x40226b851eb851ecULL;  // 9.21 in double
3078   uint64_t fp_arg2 = 0x4020c7ae147ae148ULL;  // 8.39 in double
3079   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %d0, %d1, %d2")(fp_arg1, fp_arg2);
3080   ASSERT_EQ(rd, MakeUInt128(0x40535166cf41f214ULL, 0U));  // 77.2719 in double
3081 }
3082 
TEST(Arm64InsnTest,MulF32x4)3083 TEST(Arm64InsnTest, MulF32x4) {
3084   constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.4s, %1.4s, %2.4s");
3085   __uint128_t arg1 = MakeF32x4(1.0f, -2.0f, 3.0f, -4.0f);
3086   __uint128_t arg2 = MakeF32x4(-3.0f, -1.0f, 4.0f, 1.0f);
3087   ASSERT_EQ(AsmFmul(arg1, arg2), MakeF32x4(-3.0f, 2.0f, 12.0f, -4.0f));
3088 }
3089 
TEST(Arm64InsnTest,MulF64x2)3090 TEST(Arm64InsnTest, MulF64x2) {
3091   constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.2d, %1.2d, %2.2d");
3092   __uint128_t arg1 = MakeF64x2(-4.0, 2.0);
3093   __uint128_t arg2 = MakeF64x2(2.0, 3.0);
3094   ASSERT_EQ(AsmFmul(arg1, arg2), MakeF64x2(-8.0, 6.0));
3095 }
3096 
TEST(Arm64InsnTest,MulF32x4ByScalar)3097 TEST(Arm64InsnTest, MulF32x4ByScalar) {
3098   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
3099   __uint128_t arg2 = MakeF32x4(6.0f, 7.0f, 8.0f, 9.0f);
3100   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.4s, %1.4s, %2.s[3]")(arg1, arg2);
3101   ASSERT_EQ(res, MakeF32x4(18.0f, 27.0f, 36.0f, 45.0f));
3102 }
3103 
TEST(Arm64InsnTest,MulF64x2ByScalar)3104 TEST(Arm64InsnTest, MulF64x2ByScalar) {
3105   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3106   __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3107   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.2d, %1.2d, %2.d[1]")(arg1, arg2);
3108   ASSERT_EQ(res, MakeF64x2(8.0, 12.0));
3109 }
3110 
TEST(Arm64InsnTest,MulF32IndexedElem)3111 TEST(Arm64InsnTest, MulF32IndexedElem) {
3112   constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %s0, %s1, %2.s[2]");
3113   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3114   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3115   ASSERT_EQ(AsmFmul(arg1, arg2), bit_cast<uint32_t>(34.0f));
3116 }
3117 
TEST(Arm64InsnTest,MulF64IndexedElem)3118 TEST(Arm64InsnTest, MulF64IndexedElem) {
3119   constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %d0, %d1, %2.d[1]");
3120   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3121   __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3122   ASSERT_EQ(AsmFmul(arg1, arg2), bit_cast<uint64_t>(8.0));
3123 }
3124 
TEST(Arm64InsnTest,MulExtendedF32)3125 TEST(Arm64InsnTest, MulExtendedF32) {
3126   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %s0, %s1, %s2");
3127   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3128   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3129   ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint32_t>(22.0f));
3130 }
3131 
TEST(Arm64InsnTest,MulExtendedF32x4)3132 TEST(Arm64InsnTest, MulExtendedF32x4) {
3133   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %0.4s, %1.4s, %2.4s");
3134   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3135   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3136   ASSERT_EQ(AsmFmulx(arg1, arg2), MakeF32x4(22.0f, 39.0f, 85.0f, 133.0f));
3137 }
3138 
TEST(Arm64InsnTest,MulExtendedF32IndexedElem)3139 TEST(Arm64InsnTest, MulExtendedF32IndexedElem) {
3140   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %s0, %s1, %2.s[2]");
3141   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3142   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3143   ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint32_t>(34.0f));
3144 }
3145 
TEST(Arm64InsnTest,MulExtendedF64IndexedElem)3146 TEST(Arm64InsnTest, MulExtendedF64IndexedElem) {
3147   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %d0, %d1, %2.d[1]");
3148   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3149   __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3150   ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint64_t>(8.0));
3151 }
3152 
TEST(Arm64InsnTest,MulExtendedF32x4IndexedElem)3153 TEST(Arm64InsnTest, MulExtendedF32x4IndexedElem) {
3154   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %0.4s, %1.4s, %2.s[2]");
3155   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3156   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3157   ASSERT_EQ(AsmFmulx(arg1, arg2), MakeF32x4(34.0f, 51.0f, 85.0f, 119.0f));
3158 }
3159 
TEST(Arm64InsnTest,MulNegFp32)3160 TEST(Arm64InsnTest, MulNegFp32) {
3161   uint64_t fp_arg1 = bit_cast<uint32_t>(2.0f);
3162   uint64_t fp_arg2 = bit_cast<uint32_t>(3.0f);
3163   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fnmul %s0, %s1, %s2")(fp_arg1, fp_arg2);
3164   ASSERT_EQ(rd, MakeUInt128(bit_cast<uint32_t>(-6.0f), 0U));
3165 }
3166 
TEST(Arm64InsnTest,MulNegFp64)3167 TEST(Arm64InsnTest, MulNegFp64) {
3168   uint64_t fp_arg1 = bit_cast<uint64_t>(2.0);
3169   uint64_t fp_arg2 = bit_cast<uint64_t>(3.0);
3170   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fnmul %d0, %d1, %d2")(fp_arg1, fp_arg2);
3171   ASSERT_EQ(rd, MakeUInt128(bit_cast<uint64_t>(-6.0), 0U));
3172 }
3173 
TEST(Arm64InsnTest,DivFp32)3174 TEST(Arm64InsnTest, DivFp32) {
3175   constexpr auto AsmFdiv = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %s0, %s1, %s2");
3176 
3177   uint32_t arg1 = 0x40c23d71U;                                     // 6.07 in float
3178   uint32_t arg2 = 0x401a3d71U;                                     // 2.41 in float
3179   ASSERT_EQ(AsmFdiv(arg1, arg2), MakeUInt128(0x402131edULL, 0U));  // 2.5186722 in float
3180 
3181   // Make sure that FDIV can produce a denormal result under the default FPCR,
3182   // where the FZ bit (flush-to-zero) is off.
3183   uint32_t arg3 = 0xa876eff9U;  // exponent (without offset) = -47
3184   uint32_t arg4 = 0xe7d86b60U;  // exponent (without offset) = 80
3185   ASSERT_EQ(AsmFdiv(arg3, arg4), MakeUInt128(0x0049065cULL, 0U));  // denormal
3186 }
3187 
TEST(Arm64InsnTest,DivFp64)3188 TEST(Arm64InsnTest, DivFp64) {
3189   uint64_t fp_arg1 = 0x401e5c28f5c28f5cULL;  // 7.59 in double
3190   uint64_t fp_arg2 = 0x3ff28f5c28f5c28fULL;  // 1.16 in double
3191   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %d0, %d1, %d2")(fp_arg1, fp_arg2);
3192   ASSERT_EQ(rd, MakeUInt128(0x401a2c234f72c235ULL, 0U));  // 6.5431034482758620995923593 in double
3193 }
3194 
TEST(Arm64InsnTest,DivFp32_FlagsWhenDivByZero)3195 TEST(Arm64InsnTest, DivFp32_FlagsWhenDivByZero) {
3196   uint64_t fpsr;
3197   volatile float dividend = 123.0f;
3198   volatile float divisor = 0.0f;
3199   float res;
3200   asm volatile(
3201       "msr fpsr, xzr\n\t"
3202       "fdiv %s1, %s2, %s3\n\t"
3203       "mrs %0, fpsr"
3204       : "=r"(fpsr), "=w"(res)
3205       : "w"(dividend), "w"(divisor));
3206   ASSERT_TRUE((fpsr & kFpsrDzcBit) == (kFpsrDzcBit));
3207 
3208   // Previous bug caused IOC to be set upon scalar div by zero.
3209   ASSERT_TRUE((fpsr & kFpsrIocBit) == 0);
3210 }
3211 
TEST(Arm64InsnTest,DivFp64_FlagsWhenDivByZero)3212 TEST(Arm64InsnTest, DivFp64_FlagsWhenDivByZero) {
3213   uint64_t fpsr;
3214   double res;
3215   asm volatile(
3216       "msr fpsr, xzr\n\t"
3217       "fdiv %d1, %d2, %d3\n\t"
3218       "mrs %0, fpsr"
3219       : "=r"(fpsr), "=w"(res)
3220       : "w"(123.0), "w"(0.0));
3221   ASSERT_TRUE((fpsr & kFpsrDzcBit) == (kFpsrDzcBit));
3222 
3223   // Previous bug caused IOC to be set upon scalar div by zero.
3224   ASSERT_TRUE((fpsr & kFpsrIocBit) == 0);
3225 }
3226 
TEST(Arm64InsnTest,DivFp32x4)3227 TEST(Arm64InsnTest, DivFp32x4) {
3228   constexpr auto AsmFdiv = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %0.4s, %1.4s, %2.4s");
3229 
3230   // 16.39, 80.286, 41.16, 98.01
3231   __uint128_t arg1 = MakeUInt128(0x41831eb842a0926fULL, 0x4224a3d742c4051fULL);
3232   // 13.3, 45.45, 7.89, -2.63
3233   __uint128_t arg2 = MakeUInt128(0x4154cccd4235cccdULL, 0x40fc7ae1c02851ecULL);
3234   __uint128_t res1 = AsmFdiv(arg1, arg2);
3235   // 1.2323308, 1.7664686, 5.21673, -37.26616
3236   ASSERT_EQ(res1, MakeUInt128(0x3f9dbd043fe21ba5ULL, 0x40a6ef74c215108cULL));
3237 
3238   // Verify that fdiv produces a denormal result under the default FPCR.
3239   __uint128_t arg3 = MakeF32x4(1.0f, 1.0f, 1.0f, -0x1.eddff2p-47f);
3240   __uint128_t arg4 = MakeF32x4(1.0f, 1.0f, 1.0f, -0x1.b0d6c0p80f);
3241   __uint128_t res2 = AsmFdiv(arg3, arg4);
3242   __uint128_t expected2 = MakeF32x4(1.0f, 1.0f, 1.0f, 0x0.920cb8p-126f);
3243   ASSERT_EQ(res2, expected2);
3244 }
3245 
TEST(Arm64InsnTest,DivFp64x2)3246 TEST(Arm64InsnTest, DivFp64x2) {
3247   // 6.23, 65.02
3248   __uint128_t arg1 = MakeUInt128(0x4018EB851EB851ECULL, 0x40504147AE147AE1ULL);
3249   // -7.54, 11.92
3250   __uint128_t arg2 = MakeUInt128(0xC01E28F5C28F5C29ULL, 0x4027D70A3D70A3D7ULL);
3251   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %0.2d, %1.2d, %2.2d")(arg1, arg2);
3252   // -0.82625994695, 5.45469798658
3253   ASSERT_EQ(res, MakeUInt128(0xbfea70b8b3449564ULL, 0x4015d19c59579fc9ULL));
3254 }
3255 
TEST(Arm64InsnTest,MulAddFp32)3256 TEST(Arm64InsnTest, MulAddFp32) {
3257   constexpr auto AsmFmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %s0, %s1, %s2, %s3");
3258 
3259   __uint128_t res1 =
3260       AsmFmadd(bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(5.0f));
3261   ASSERT_EQ(res1, MakeF32x4(11.0f, 0, 0, 0));
3262 
3263   __uint128_t res2 =
3264       AsmFmadd(bit_cast<uint32_t>(2.5f), bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(-5.0f));
3265   ASSERT_EQ(res2, MakeF32x4(0, 0, 0, 0));
3266 
3267   // These tests verify that fmadd does not lose precision while doing the mult + add.
3268   __uint128_t res3 = AsmFmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3269                               bit_cast<uint32_t>(0x1.000002p0f),
3270                               bit_cast<uint32_t>(-0x1.p23f));
3271   ASSERT_EQ(res3, MakeF32x4(0x1.fffffcp-2f, 0, 0, 0));
3272 
3273   __uint128_t res4 = AsmFmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3274                               bit_cast<uint32_t>(0x1.000002p0f),
3275                               bit_cast<uint32_t>(-0x1.fffffep22f));
3276   ASSERT_EQ(res4, MakeF32x4(0x1.fffffep-1f, 0, 0, 0));
3277 
3278   __uint128_t res5 = AsmFmadd(bit_cast<uint32_t>(0x1.p23f),
3279                               bit_cast<uint32_t>(0x1.fffffep-1f),
3280                               bit_cast<uint32_t>(-0x1.000002p23f));
3281   ASSERT_EQ(res5, MakeF32x4(-0x1.80p0f, 0, 0, 0));
3282 }
3283 
TEST(Arm64InsnTest,MulAddFp64)3284 TEST(Arm64InsnTest, MulAddFp64) {
3285   uint64_t arg1 = 0x40323d70a3d70a3dULL;  // 18.24
3286   uint64_t arg2 = 0x40504147ae147ae1ULL;  // 65.02
3287   uint64_t arg3 = 0x4027d70a3d70a3d7ULL;  // 11.92
3288   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3289   ASSERT_EQ(res1, MakeUInt128(0x4092b78a0902de00ULL, 0U));  // 1197.8848
3290   __uint128_t res2 =
3291       ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3292   ASSERT_EQ(res2, MakeUInt128(0xc092b78a0902de00ULL, 0U));  // -1197.8848
3293 }
3294 
TEST(Arm64InsnTest,MulAddFp64Precision)3295 TEST(Arm64InsnTest, MulAddFp64Precision) {
3296   uint64_t arg1 = bit_cast<uint64_t>(0x1.0p1023);
3297   uint64_t arg2 = bit_cast<uint64_t>(0x1.0p-1);
3298   uint64_t arg3 = bit_cast<uint64_t>(0x1.fffffffffffffp1022);
3299   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3300   ASSERT_EQ(res, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3301 }
3302 
TEST(Arm64InsnTest,NegMulAddFp32)3303 TEST(Arm64InsnTest, NegMulAddFp32) {
3304   constexpr auto AsmFnmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %s0, %s1, %s2, %s3");
3305 
3306   __uint128_t res1 =
3307       AsmFnmadd(bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(5.0f));
3308   ASSERT_EQ(res1, MakeF32x4(-11.0f, 0, 0, 0));
3309 
3310   // No -0 (proper negation)
3311   __uint128_t res2 =
3312       AsmFnmadd(bit_cast<uint32_t>(2.5f), bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(-5.0f));
3313   ASSERT_EQ(res2, MakeF32x4(0.0f, 0, 0, 0));
3314 
3315   // These tests verify that fmadd does not lose precision while doing the mult + add.
3316   __uint128_t res3 = AsmFnmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3317                                bit_cast<uint32_t>(0x1.000002p0f),
3318                                bit_cast<uint32_t>(-0x1.p23f));
3319   ASSERT_EQ(res3, MakeF32x4(-0x1.fffffcp-2f, 0, 0, 0));
3320 
3321   __uint128_t res4 = AsmFnmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3322                                bit_cast<uint32_t>(0x1.000002p0f),
3323                                bit_cast<uint32_t>(-0x1.fffffep22f));
3324   ASSERT_EQ(res4, MakeF32x4(-0x1.fffffep-1f, 0, 0, 0));
3325 
3326   __uint128_t res5 = AsmFnmadd(bit_cast<uint32_t>(0x1.p23f),
3327                                bit_cast<uint32_t>(0x1.fffffep-1f),
3328                                bit_cast<uint32_t>(-0x1.000002p23f));
3329   ASSERT_EQ(res5, MakeF32x4(0x1.80p0f, 0, 0, 0));
3330 }
3331 
TEST(Arm64InsnTest,NegMulAddFp64)3332 TEST(Arm64InsnTest, NegMulAddFp64) {
3333   constexpr auto AsmFnmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %d0, %d1, %d2, %d3");
3334 
3335   __uint128_t res1 =
3336       AsmFnmadd(bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(5.0));
3337   ASSERT_EQ(res1, MakeF64x2(-11.0, 0));
3338 
3339   // Proper negation (no -0 in this case)
3340   __uint128_t res2 =
3341       AsmFnmadd(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(-5.0));
3342   ASSERT_EQ(res2, MakeF64x2(0.0, 0));
3343 }
3344 
TEST(Arm64InsnTest,NegMulSubFp64)3345 TEST(Arm64InsnTest, NegMulSubFp64) {
3346   constexpr auto AsmFnmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %d0, %d1, %d2, %d3");
3347 
3348   __uint128_t res1 =
3349       AsmFnmsub(bit_cast<uint64_t>(-2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(5.0));
3350   ASSERT_EQ(res1, MakeF64x2(-11.0, 0));
3351 
3352   uint64_t arg1 = 0x40357ae147ae147bULL;  // 21.48
3353   uint64_t arg2 = 0x404ce3d70a3d70a4ull;  // 57.78
3354   uint64_t arg3 = 0x405e29999999999aULL;  // 120.65
3355   __uint128_t res2 = AsmFnmsub(arg1, arg2, arg3);
3356   ASSERT_EQ(res2, MakeUInt128(0x409181db8bac710dULL, 0U));  // 1120.4644
3357 
3358   // Assert no -0 in this case
3359   __uint128_t res3 =
3360       AsmFnmsub(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(5.0));
3361   ASSERT_EQ(res3, MakeF64x2(0.0, 0));
3362 }
3363 
TEST(Arm64InsnTest,NegMulSubFp64Precision)3364 TEST(Arm64InsnTest, NegMulSubFp64Precision) {
3365   constexpr auto AsmFnmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %d0, %d1, %d2, %d3");
3366 
3367   __uint128_t res = AsmFnmsub(bit_cast<uint64_t>(0x1.0p1023),
3368                               bit_cast<uint64_t>(0x1.0p-1),
3369                               bit_cast<uint64_t>(-0x1.fffffffffffffp1022));
3370   ASSERT_EQ(res, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3371 }
3372 
TEST(Arm64InsnTest,MulAddF32x4)3373 TEST(Arm64InsnTest, MulAddF32x4) {
3374   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.4s, %1.4s, %2.4s");
3375   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3376   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3377   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3378   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF32x4(5.0f, 5.0f, 9.0f, 14.0f));
3379 }
3380 
TEST(Arm64InsnTest,MulAddF32IndexedElem)3381 TEST(Arm64InsnTest, MulAddF32IndexedElem) {
3382   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %s0, %s1, %2.s[2]");
3383   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3384   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3385   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3386   // 2 + (1 * 2)
3387   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), bit_cast<uint32_t>(4.0f));
3388 }
3389 
TEST(Arm64InsnTest,MulAddF64IndexedElem)3390 TEST(Arm64InsnTest, MulAddF64IndexedElem) {
3391   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %d0, %d1, %2.d[1]");
3392   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3393   __uint128_t arg2 = MakeF64x2(4.0, 5.0);
3394   __uint128_t arg3 = MakeF64x2(6.0, 7.0);
3395   // 6 + (2 * 5)
3396   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), bit_cast<uint64_t>(16.0));
3397 }
3398 
TEST(Arm64InsnTest,MulAddF32x4IndexedElem)3399 TEST(Arm64InsnTest, MulAddF32x4IndexedElem) {
3400   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.4s, %1.4s, %2.s[2]");
3401   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3402   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3403   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3404   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF32x4(4.0f, 7.0f, 9.0f, 8.0f));
3405 }
3406 
TEST(Arm64InsnTest,MulSubFp32)3407 TEST(Arm64InsnTest, MulSubFp32) {
3408   uint32_t arg1 = bit_cast<uint32_t>(2.0f);
3409   uint32_t arg2 = bit_cast<uint32_t>(5.0f);
3410   uint32_t arg3 = bit_cast<uint32_t>(3.0f);
3411   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %s0, %s1, %s2, %s3")(arg1, arg2, arg3);
3412   ASSERT_EQ(res1, MakeUInt128(bit_cast<uint32_t>(-7.0f), 0U));
3413   __uint128_t res2 =
3414       ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %s0, %s1, %s2, %s3")(arg1, arg2, arg3);
3415   ASSERT_EQ(res2, MakeUInt128(bit_cast<uint32_t>(7.0f), 0U));
3416 }
3417 
TEST(Arm64InsnTest,MulSubFp64)3418 TEST(Arm64InsnTest, MulSubFp64) {
3419   constexpr auto AsmFmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %d0, %d1, %d2, %d3");
3420 
3421   uint64_t arg1 = 0x40357ae147ae147bULL;  // 21.48
3422   uint64_t arg2 = 0x404ce3d70a3d70a4ull;  // 57.78
3423   uint64_t arg3 = 0x405e29999999999aULL;  // 120.65
3424   __uint128_t res1 = AsmFmsub(arg1, arg2, arg3);
3425   ASSERT_EQ(res1, MakeUInt128(0xc09181db8bac710dULL, 0U));  // -1120.4644
3426 
3427   // Basic case
3428   __uint128_t res3 =
3429       AsmFmsub(bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(-5.0));
3430   ASSERT_EQ(res3, MakeF64x2(-11.0, 0));
3431 
3432   // No -0 in this case (proper negation order)
3433   __uint128_t res4 =
3434       AsmFmsub(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(5.0));
3435   ASSERT_EQ(res4, MakeF64x2(0.0, 0));
3436 }
3437 
TEST(Arm64InsnTest,MulSubFp64Precision)3438 TEST(Arm64InsnTest, MulSubFp64Precision) {
3439   constexpr auto AsmFmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %d0, %d1, %d2, %d3");
3440   __uint128_t res5 = AsmFmsub(bit_cast<uint64_t>(-0x1.0p1023),
3441                               bit_cast<uint64_t>(0x1.0p-1),
3442                               bit_cast<uint64_t>(0x1.fffffffffffffp1022));
3443   ASSERT_EQ(res5, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3444 }
3445 
TEST(Arm64InsnTest,MulSubF32x4)3446 TEST(Arm64InsnTest, MulSubF32x4) {
3447   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.4s, %1.4s, %2.4s");
3448   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3449   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3450   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3451   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF32x4(-1.0f, 1.0f, -7.0f, -10.0f));
3452 }
3453 
TEST(Arm64InsnTest,MulSubF32IndexedElem)3454 TEST(Arm64InsnTest, MulSubF32IndexedElem) {
3455   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %s0, %s1, %2.s[2]");
3456   __uint128_t arg1 = MakeF32x4(2.0f, 1.0f, 4.0f, 3.0f);
3457   __uint128_t arg2 = MakeF32x4(4.0f, 3.0f, 2.0f, 1.0f);
3458   __uint128_t arg3 = MakeF32x4(8.0f, 3.0f, 1.0f, 2.0f);
3459   // 8 - (2 * 2)
3460   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), bit_cast<uint32_t>(4.0f));
3461 }
3462 
TEST(Arm64InsnTest,MulSubF64IndexedElem)3463 TEST(Arm64InsnTest, MulSubF64IndexedElem) {
3464   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %d0, %d1, %2.d[1]");
3465   __uint128_t arg1 = MakeF64x2(2.0, 5.0);
3466   __uint128_t arg2 = MakeF64x2(4.0, 1.0);
3467   __uint128_t arg3 = MakeF64x2(6.0, 7.0f);
3468   // 6 - (2 * 1)
3469   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), bit_cast<uint64_t>(4.0));
3470 }
3471 
TEST(Arm64InsnTest,MulSubF32x4IndexedElem)3472 TEST(Arm64InsnTest, MulSubF32x4IndexedElem) {
3473   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.4s, %1.4s, %2.s[2]");
3474   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3475   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3476   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3477   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF32x4(0.0f, -1.0f, -7.0f, -4.0f));
3478 }
3479 
TEST(Arm64InsnTest,CompareEqualF32)3480 TEST(Arm64InsnTest, CompareEqualF32) {
3481   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %s0, %s1, %s2");
3482   uint32_t two = bit_cast<uint32_t>(2.0f);
3483   uint32_t six = bit_cast<uint32_t>(6.0f);
3484   ASSERT_EQ(AsmFcmeq(two, six), 0x00000000ULL);
3485   ASSERT_EQ(AsmFcmeq(two, two), 0xffffffffULL);
3486   ASSERT_EQ(AsmFcmeq(kDefaultNaN32, two), 0x00000000ULL);
3487   ASSERT_EQ(AsmFcmeq(two, kDefaultNaN32), 0x00000000ULL);
3488 }
3489 
TEST(Arm64InsnTest,CompareEqualF32x4)3490 TEST(Arm64InsnTest, CompareEqualF32x4) {
3491   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %0.4s, %1.4s, %2.4s");
3492   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3493   __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3494   __uint128_t res = AsmFcmeq(arg1, arg2);
3495   ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x0000000000000000ULL));
3496 }
3497 
TEST(Arm64InsnTest,CompareGreaterEqualF32)3498 TEST(Arm64InsnTest, CompareGreaterEqualF32) {
3499   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %s0, %s1, %s2");
3500   uint32_t two = bit_cast<uint32_t>(2.0f);
3501   uint32_t six = bit_cast<uint32_t>(6.0f);
3502   ASSERT_EQ(AsmFcmge(two, six), 0x00000000ULL);
3503   ASSERT_EQ(AsmFcmge(two, two), 0xffffffffULL);
3504   ASSERT_EQ(AsmFcmge(six, two), 0xffffffffULL);
3505   ASSERT_EQ(AsmFcmge(kDefaultNaN32, two), 0x00000000ULL);
3506   ASSERT_EQ(AsmFcmge(two, kDefaultNaN32), 0x00000000ULL);
3507 }
3508 
TEST(Arm64InsnTest,CompareGreaterEqualF32x4)3509 TEST(Arm64InsnTest, CompareGreaterEqualF32x4) {
3510   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %0.4s, %1.4s, %2.4s");
3511   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3512   __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3513   __uint128_t res = AsmFcmge(arg1, arg2);
3514   ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x00000000ffffffffULL));
3515 }
3516 
TEST(Arm64InsnTest,CompareGreaterF32)3517 TEST(Arm64InsnTest, CompareGreaterF32) {
3518   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %s0, %s1, %s2");
3519   uint32_t two = bit_cast<uint32_t>(2.0f);
3520   uint32_t six = bit_cast<uint32_t>(6.0f);
3521   ASSERT_EQ(AsmFcmgt(two, six), 0x00000000ULL);
3522   ASSERT_EQ(AsmFcmgt(two, two), 0x00000000ULL);
3523   ASSERT_EQ(AsmFcmgt(six, two), 0xffffffffULL);
3524   ASSERT_EQ(AsmFcmgt(kDefaultNaN32, two), 0x00000000ULL);
3525   ASSERT_EQ(AsmFcmgt(two, kDefaultNaN32), 0x00000000ULL);
3526 }
3527 
TEST(Arm64InsnTest,CompareGreaterF32x4)3528 TEST(Arm64InsnTest, CompareGreaterF32x4) {
3529   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %0.4s, %1.4s, %2.4s");
3530   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3531   __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3532   __uint128_t res = AsmFcmgt(arg1, arg2);
3533   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x00000000ffffffffULL));
3534 }
3535 
TEST(Arm64InsnTest,CompareEqualZeroF32)3536 TEST(Arm64InsnTest, CompareEqualZeroF32) {
3537   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmeq %s0, %s1, #0");
3538   ASSERT_EQ(AsmFcmeq(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3539   ASSERT_EQ(AsmFcmeq(bit_cast<uint32_t>(4.0f)), 0x00000000ULL);
3540 }
3541 
TEST(Arm64InsnTest,CompareEqualZeroF32x4)3542 TEST(Arm64InsnTest, CompareEqualZeroF32x4) {
3543   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmeq %0.4s, %1.4s, #0");
3544   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3545   __uint128_t res = AsmFcmeq(arg);
3546   ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x0000000000000000ULL));
3547 }
3548 
TEST(Arm64InsnTest,CompareGreaterThanZeroF32)3549 TEST(Arm64InsnTest, CompareGreaterThanZeroF32) {
3550   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmgt %s0, %s1, #0");
3551   ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3552   ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(0.0f)), 0x00000000ULL);
3553   ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3554 }
3555 
TEST(Arm64InsnTest,CompareGreaterThanZeroF32x4)3556 TEST(Arm64InsnTest, CompareGreaterThanZeroF32x4) {
3557   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmgt %0.4s, %1.4s, #0");
3558   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3559   __uint128_t res = AsmFcmgt(arg);
3560   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3561 }
3562 
TEST(Arm64InsnTest,CompareGreaterThanOrEqualZeroF32)3563 TEST(Arm64InsnTest, CompareGreaterThanOrEqualZeroF32) {
3564   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmge %s0, %s1, #0");
3565   ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3566   ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3567   ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3568 }
3569 
TEST(Arm64InsnTest,CompareGreaterThanOrEqualZeroF32x4)3570 TEST(Arm64InsnTest, CompareGreaterThanOrEqualZeroF32x4) {
3571   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmge %0.4s, %1.4s, #0");
3572   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3573   __uint128_t res = AsmFcmge(arg);
3574   ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0xffffffffffffffffULL));
3575 }
3576 
TEST(Arm64InsnTest,CompareLessThanZeroF32)3577 TEST(Arm64InsnTest, CompareLessThanZeroF32) {
3578   constexpr auto AsmFcmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmlt %s0, %s1, #0");
3579   ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3580   ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(0.0f)), 0x00000000ULL);
3581   ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(1.0f)), 0x00000000ULL);
3582 }
3583 
TEST(Arm64InsnTest,CompareLessThanZeroF32x4)3584 TEST(Arm64InsnTest, CompareLessThanZeroF32x4) {
3585   constexpr auto AsmFcmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmlt %0.4s, %1.4s, #0");
3586   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3587   __uint128_t res = AsmFcmlt(arg);
3588   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
3589 }
3590 
TEST(Arm64InsnTest,CompareLessThanOrEqualZeroF32)3591 TEST(Arm64InsnTest, CompareLessThanOrEqualZeroF32) {
3592   constexpr auto AsmFcmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmle %s0, %s1, #0");
3593   ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3594   ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3595   ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(1.0f)), 0x00000000ULL);
3596 }
3597 
TEST(Arm64InsnTest,CompareLessThanOrEqualZeroF32x4)3598 TEST(Arm64InsnTest, CompareLessThanOrEqualZeroF32x4) {
3599   constexpr auto AsmFcmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmle %0.4s, %1.4s, #0");
3600   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3601   __uint128_t res = AsmFcmle(arg);
3602   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3603 }
3604 
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanF32)3605 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanF32) {
3606   constexpr auto AsmFacgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facgt %s0, %s1, %s2");
3607   ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(-3.0f), bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3608   ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3609   ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(-7.0f)), 0x00000000ULL);
3610 }
3611 
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanOrEqualF32)3612 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanOrEqualF32) {
3613   constexpr auto AsmFacge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facge %s0, %s1, %s2");
3614   ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(-3.0f), bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3615   ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3616   ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(-7.0f)), 0x00000000ULL);
3617 }
3618 
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanF32x4)3619 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanF32x4) {
3620   constexpr auto AsmFacgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facgt %0.4s, %1.4s, %2.4s");
3621   __uint128_t arg1 = MakeF32x4(-3.0f, 1.0f, 3.0f, 4.0f);
3622   __uint128_t arg2 = MakeF32x4(1.0f, -1.0f, -7.0f, 2.0f);
3623   ASSERT_EQ(AsmFacgt(arg1, arg2), MakeUInt128(0x00000000ffffffffULL, 0xffffffff00000000ULL));
3624 }
3625 
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanEqualF32x4)3626 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanEqualF32x4) {
3627   constexpr auto AsmFacge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facge %0.4s, %1.4s, %2.4s");
3628   __uint128_t arg1 = MakeF32x4(-3.0f, 1.0f, 3.0f, 4.0f);
3629   __uint128_t arg2 = MakeF32x4(1.0f, -1.0f, -7.0f, 2.0f);
3630   ASSERT_EQ(AsmFacge(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0xffffffff00000000ULL));
3631 }
3632 
TEST(Arm64InsnTest,CompareEqualF64)3633 TEST(Arm64InsnTest, CompareEqualF64) {
3634   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %d0, %d1, %d2");
3635   uint64_t two = bit_cast<uint64_t>(2.0);
3636   uint64_t six = bit_cast<uint64_t>(6.0);
3637   ASSERT_EQ(AsmFcmeq(two, six), 0x0000000000000000ULL);
3638   ASSERT_EQ(AsmFcmeq(two, two), 0xffffffffffffffffULL);
3639   ASSERT_EQ(AsmFcmeq(kDefaultNaN64, two), 0x0000000000000000ULL);
3640   ASSERT_EQ(AsmFcmeq(two, kDefaultNaN64), 0x0000000000000000ULL);
3641 }
3642 
TEST(Arm64InsnTest,CompareEqualF64x2)3643 TEST(Arm64InsnTest, CompareEqualF64x2) {
3644   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %0.2d, %1.2d, %2.2d");
3645   __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3646   __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3647   __uint128_t res = AsmFcmeq(arg1, arg2);
3648   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3649   arg1 = MakeF64x2(7.0, -0.0);
3650   arg2 = MakeF64x2(-8.0, 5.0);
3651   res = AsmFcmeq(arg1, arg2);
3652   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3653 }
3654 
TEST(Arm64InsnTest,CompareGreaterEqualF64)3655 TEST(Arm64InsnTest, CompareGreaterEqualF64) {
3656   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %d0, %d1, %d2");
3657   uint64_t two = bit_cast<uint64_t>(2.0);
3658   uint64_t six = bit_cast<uint64_t>(6.0);
3659   ASSERT_EQ(AsmFcmge(two, six), 0x0000000000000000ULL);
3660   ASSERT_EQ(AsmFcmge(two, two), 0xffffffffffffffffULL);
3661   ASSERT_EQ(AsmFcmge(six, two), 0xffffffffffffffffULL);
3662   ASSERT_EQ(AsmFcmge(kDefaultNaN64, two), 0x0000000000000000ULL);
3663   ASSERT_EQ(AsmFcmge(two, kDefaultNaN64), 0x0000000000000000ULL);
3664 }
3665 
TEST(Arm64InsnTest,CompareGreaterEqualF64x2)3666 TEST(Arm64InsnTest, CompareGreaterEqualF64x2) {
3667   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %0.2d, %1.2d, %2.2d");
3668   __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3669   __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3670   __uint128_t res = AsmFcmge(arg1, arg2);
3671   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3672   arg1 = MakeF64x2(7.0, -0.0);
3673   arg2 = MakeF64x2(-8.0, 5.0);
3674   res = AsmFcmge(arg1, arg2);
3675   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3676 }
3677 
TEST(Arm64InsnTest,CompareGreaterF64)3678 TEST(Arm64InsnTest, CompareGreaterF64) {
3679   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %d0, %d1, %d2");
3680   uint64_t two = bit_cast<uint64_t>(2.0);
3681   uint64_t six = bit_cast<uint64_t>(6.0);
3682   ASSERT_EQ(AsmFcmgt(two, six), 0x0000000000000000ULL);
3683   ASSERT_EQ(AsmFcmgt(two, two), 0x0000000000000000ULL);
3684   ASSERT_EQ(AsmFcmgt(six, two), 0xffffffffffffffffULL);
3685   ASSERT_EQ(AsmFcmgt(kDefaultNaN64, two), 0x0000000000000000ULL);
3686   ASSERT_EQ(AsmFcmgt(two, kDefaultNaN64), 0x0000000000000000ULL);
3687 }
3688 
TEST(Arm64InsnTest,CompareGreaterF64x2)3689 TEST(Arm64InsnTest, CompareGreaterF64x2) {
3690   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %0.2d, %1.2d, %2.2d");
3691   __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3692   __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3693   __uint128_t res = AsmFcmgt(arg1, arg2);
3694   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3695   arg1 = MakeF64x2(7.0, -0.0);
3696   arg2 = MakeF64x2(-8.0, 5.0);
3697   res = AsmFcmgt(arg1, arg2);
3698   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3699 }
3700 
TEST(Arm64InsnTest,AndInt8x16)3701 TEST(Arm64InsnTest, AndInt8x16) {
3702   __uint128_t op1 = MakeUInt128(0x7781857780532171ULL, 0x2268066130019278ULL);
3703   __uint128_t op2 = MakeUInt128(0x0498862723279178ULL, 0x6085784383827967ULL);
3704   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("and %0.16b, %1.16b, %2.16b")(op1, op2);
3705   ASSERT_EQ(rd, MakeUInt128(0x0480842700030170ULL, 0x2000004100001060ULL));
3706 }
3707 
TEST(Arm64InsnTest,AndInt8x8)3708 TEST(Arm64InsnTest, AndInt8x8) {
3709   __uint128_t op1 = MakeUInt128(0x7781857780532171ULL, 0x2268066130019278ULL);
3710   __uint128_t op2 = MakeUInt128(0x0498862723279178ULL, 0x6085784383827967ULL);
3711   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("and %0.8b, %1.8b, %2.8b")(op1, op2);
3712   ASSERT_EQ(rd, MakeUInt128(0x0480842700030170ULL, 0));
3713 }
3714 
TEST(Arm64InsnTest,OrInt8x16)3715 TEST(Arm64InsnTest, OrInt8x16) {
3716   __uint128_t op1 = MakeUInt128(0x00ffaa5500112244ULL, 0x1248124812481248ULL);
3717   __uint128_t op2 = MakeUInt128(0x44221100ffaa5500ULL, 0x1122448811224488ULL);
3718   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orr %0.16b, %1.16b, %2.16b")(op1, op2);
3719   ASSERT_EQ(rd, MakeUInt128(0x44ffbb55ffbb7744ULL, 0x136a56c8136a56c8ULL));
3720 }
3721 
TEST(Arm64InsnTest,OrInt8x8)3722 TEST(Arm64InsnTest, OrInt8x8) {
3723   __uint128_t op1 = MakeUInt128(0x00ffaa5500112244ULL, 0x1248124812481248ULL);
3724   __uint128_t op2 = MakeUInt128(0x44221100ffaa5500ULL, 0x1122448811224488ULL);
3725   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orr %0.8b, %1.8b, %2.8b")(op1, op2);
3726   ASSERT_EQ(rd, MakeUInt128(0x44ffbb55ffbb7744ULL, 0));
3727 }
3728 
TEST(Arm64InsnTest,XorInt8x16)3729 TEST(Arm64InsnTest, XorInt8x16) {
3730   __uint128_t op1 = MakeUInt128(0x1050792279689258ULL, 0x9235420199561121ULL);
3731   __uint128_t op2 = MakeUInt128(0x8239864565961163ULL, 0x5488623057745649ULL);
3732   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("eor %0.16b, %1.16b, %2.16b")(op1, op2);
3733   ASSERT_EQ(rd, MakeUInt128(0x9269ff671cfe833bULL, 0xc6bd2031ce224768ULL));
3734 }
3735 
TEST(Arm64InsnTest,XorInt8x8)3736 TEST(Arm64InsnTest, XorInt8x8) {
3737   __uint128_t op1 = MakeUInt128(0x1050792279689258ULL, 0x9235420199561121ULL);
3738   __uint128_t op2 = MakeUInt128(0x8239864565961163ULL, 0x5488623057745649ULL);
3739   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("eor %0.8b, %1.8b, %2.8b")(op1, op2);
3740   ASSERT_EQ(rd, MakeUInt128(0x9269ff671cfe833bULL, 0));
3741 }
3742 
TEST(Arm64InsnTest,AndNotInt8x16)3743 TEST(Arm64InsnTest, AndNotInt8x16) {
3744   __uint128_t op1 = MakeUInt128(0x0313783875288658ULL, 0x7533208381420617ULL);
3745   __uint128_t op2 = MakeUInt128(0x2327917860857843ULL, 0x8382796797668145ULL);
3746   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("bic %0.16b, %1.16b, %2.16b")(op1, op2);
3747   ASSERT_EQ(rd, MakeUInt128(0x0010680015288618ULL, 0x7431008000000612ULL));
3748 }
3749 
TEST(Arm64InsnTest,AndNotInt8x8)3750 TEST(Arm64InsnTest, AndNotInt8x8) {
3751   __uint128_t op1 = MakeUInt128(0x4861045432664821ULL, 0x2590360011330530ULL);
3752   __uint128_t op2 = MakeUInt128(0x5420199561121290ULL, 0x8572424541506959ULL);
3753   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("bic %0.8b, %1.8b, %2.8b")(op1, op2);
3754   ASSERT_EQ(rd, MakeUInt128(0x0841044012644821ULL, 0x0000000000000000ULL));
3755 }
3756 
TEST(Arm64InsnTest,AndNotInt16x4Imm)3757 TEST(Arm64InsnTest, AndNotInt16x4Imm) {
3758   __uint128_t res = MakeUInt128(0x9690314950191085ULL, 0x7598442391986291ULL);
3759 
3760   asm("bic %0.4h, #0x3" : "=w"(res) : "0"(res));
3761 
3762   ASSERT_EQ(res, MakeUInt128(0x9690314850181084ULL, 0x0000000000000000ULL));
3763 }
3764 
TEST(Arm64InsnTest,AndNotInt16x4ImmShiftedBy8)3765 TEST(Arm64InsnTest, AndNotInt16x4ImmShiftedBy8) {
3766   __uint128_t res = MakeUInt128(0x8354056704038674ULL, 0x3513622224771589ULL);
3767 
3768   asm("bic %0.4h, #0xa8, lsl #8" : "=w"(res) : "0"(res));
3769 
3770   ASSERT_EQ(res, MakeUInt128(0x0354056704030674ULL, 0x0000000000000000ULL));
3771 }
3772 
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy8)3773 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy8) {
3774   __uint128_t res = MakeUInt128(0x1842631298608099ULL, 0x8886874132604721ULL);
3775 
3776   asm("bic %0.2s, #0xd3, lsl #8" : "=w"(res) : "0"(res));
3777 
3778   ASSERT_EQ(res, MakeUInt128(0x1842201298600099ULL, 0x0000000000000000ULL));
3779 }
3780 
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy16)3781 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy16) {
3782   __uint128_t res = MakeUInt128(0x2947867242292465ULL, 0x4366800980676928ULL);
3783 
3784   asm("bic %0.2s, #0x22, lsl #16" : "=w"(res) : "0"(res));
3785 
3786   ASSERT_EQ(res, MakeUInt128(0x2945867242092465ULL, 0x0000000000000000ULL));
3787 }
3788 
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy24)3789 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy24) {
3790   __uint128_t res = MakeUInt128(0x0706977942236250ULL, 0x8221688957383798ULL);
3791 
3792   asm("bic %0.2s, #0x83, lsl #24" : "=w"(res) : "0"(res));
3793 
3794   ASSERT_EQ(res, MakeUInt128(0x0406977940236250ULL, 0x0000000000000000ULL));
3795 }
3796 
TEST(Arm64InsnTest,OrInt16x4Imm)3797 TEST(Arm64InsnTest, OrInt16x4Imm) {
3798   __uint128_t res = MakeUInt128(0x0841284886269456ULL, 0x0424196528502221ULL);
3799 
3800   asm("orr %0.4h, #0x5" : "=w"(res) : "0"(res));
3801 
3802   ASSERT_EQ(res, MakeUInt128(0x0845284d86279457ULL, 0x0000000000000000ULL));
3803 }
3804 
TEST(Arm64InsnTest,OrNotInt8x16)3805 TEST(Arm64InsnTest, OrNotInt8x16) {
3806   __uint128_t op1 = MakeUInt128(0x5428584447952658ULL, 0x6782105114135473ULL);
3807   __uint128_t op2 = MakeUInt128(0x3558764024749647ULL, 0x3263914199272604ULL);
3808   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orn %0.16b, %1.16b, %2.16b")(op1, op2);
3809   ASSERT_EQ(rd, MakeUInt128(0xdeafd9ffdf9f6ff8ULL, 0xef9e7eff76dbddfbULL));
3810 }
3811 
TEST(Arm64InsnTest,OrNotInt8x8)3812 TEST(Arm64InsnTest, OrNotInt8x8) {
3813   __uint128_t op1 = MakeUInt128(0x3279178608578438ULL, 0x3827967976681454ULL);
3814   __uint128_t op2 = MakeUInt128(0x6838689427741559ULL, 0x9185592524595395ULL);
3815   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orn %0.8b, %1.8b, %2.8b")(op1, op2);
3816   ASSERT_EQ(rd, MakeUInt128(0xb7ff97efd8dfeebeULL, 0x0000000000000000ULL));
3817 }
3818 
TEST(Arm64InsnTest,BitwiseSelectInt8x8)3819 TEST(Arm64InsnTest, BitwiseSelectInt8x8) {
3820   __uint128_t op1 = MakeUInt128(0x2000568127145263ULL, 0x5608277857713427ULL);
3821   __uint128_t op2 = MakeUInt128(0x0792279689258923ULL, 0x5420199561121290ULL);
3822   __uint128_t op3 = MakeUInt128(0x8372978049951059ULL, 0x7317328160963185ULL);
3823   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bsl %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3824   ASSERT_EQ(res, MakeUInt128(0x0480369681349963ULL, 0x0000000000000000ULL));
3825 }
3826 
TEST(Arm64InsnTest,BitwiseInsertIfTrueInt8x8)3827 TEST(Arm64InsnTest, BitwiseInsertIfTrueInt8x8) {
3828   __uint128_t op1 = MakeUInt128(0x3678925903600113ULL, 0x3053054882046652ULL);
3829   __uint128_t op2 = MakeUInt128(0x9326117931051185ULL, 0x4807446237996274ULL);
3830   __uint128_t op3 = MakeUInt128(0x6430860213949463ULL, 0x9522473719070217ULL);
3831   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bit %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3832   ASSERT_EQ(res, MakeUInt128(0x7630965b03908563ULL, 0x0000000000000000ULL));
3833 }
3834 
TEST(Arm64InsnTest,BitwiseInsertIfFalseInt8x8)3835 TEST(Arm64InsnTest, BitwiseInsertIfFalseInt8x8) {
3836   __uint128_t op1 = MakeUInt128(0x7067982148086513ULL, 0x2823066470938446ULL);
3837   __uint128_t op2 = MakeUInt128(0x5964462294895493ULL, 0x0381964428810975ULL);
3838   __uint128_t op3 = MakeUInt128(0x0348610454326648ULL, 0x2133936072602491ULL);
3839   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bif %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3840   ASSERT_EQ(res, MakeUInt128(0x2143d8015c006500ULL, 0x0000000000000000ULL));
3841 }
3842 
TEST(Arm64InsnTest,ArithmeticShiftRightInt64x1)3843 TEST(Arm64InsnTest, ArithmeticShiftRightInt64x1) {
3844   __uint128_t arg = MakeUInt128(0x9486015046652681ULL, 0x4398770516153170ULL);
3845   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %d0, %d1, #39")(arg);
3846   ASSERT_EQ(res, MakeUInt128(0xffffffffff290c02ULL, 0x0000000000000000ULL));
3847 }
3848 
TEST(Arm64InsnTest,ArithmeticShiftRightBy64Int64x1)3849 TEST(Arm64InsnTest, ArithmeticShiftRightBy64Int64x1) {
3850   __uint128_t arg = MakeUInt128(0x9176042601763387ULL, 0x0454990176143641ULL);
3851   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %d0, %d1, #64")(arg);
3852   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3853 }
3854 
TEST(Arm64InsnTest,ArithmeticShiftRightInt64x2)3855 TEST(Arm64InsnTest, ArithmeticShiftRightInt64x2) {
3856   __uint128_t arg = MakeUInt128(0x7501116498327856ULL, 0x3531614516845769ULL);
3857   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %0.2d, %1.2d, #35")(arg);
3858   ASSERT_EQ(res, MakeUInt128(0x000000000ea0222cULL, 0x0000000006a62c28ULL));
3859 }
3860 
TEST(Arm64InsnTest,ArithmeticShiftRightAccumulateInt64x1)3861 TEST(Arm64InsnTest, ArithmeticShiftRightAccumulateInt64x1) {
3862   __uint128_t arg1 = MakeUInt128(0x9667179643468760ULL, 0x0770479995378833ULL);
3863   __uint128_t arg2 = MakeUInt128(0x2557176908196030ULL, 0x9201824018842705ULL);
3864   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %d0, %d1, #40")(arg1, arg2);
3865   ASSERT_EQ(res, MakeUInt128(0x2557176907afc747ULL, 0x0000000000000000ULL));
3866 }
3867 
TEST(Arm64InsnTest,ArithmeticShiftRightBy64AccumulateInt64x1)3868 TEST(Arm64InsnTest, ArithmeticShiftRightBy64AccumulateInt64x1) {
3869   __uint128_t arg1 = MakeUInt128(0x9223343657791601ULL, 0x2809317940171859ULL);
3870   __uint128_t arg2 = MakeUInt128(0x3498025249906698ULL, 0x4233017350358044ULL);
3871   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %d0, %d1, #64")(arg1, arg2);
3872   ASSERT_EQ(res, MakeUInt128(0x3498025249906697ULL, 0x0000000000000000ULL));
3873 }
3874 
TEST(Arm64InsnTest,ArithmeticShiftRightAccumulateInt16x8)3875 TEST(Arm64InsnTest, ArithmeticShiftRightAccumulateInt16x8) {
3876   __uint128_t arg1 = MakeUInt128(0x9276457931065792ULL, 0x2955249887275846ULL);
3877   __uint128_t arg2 = MakeUInt128(0x0101655256375678ULL, 0x5667227966198857ULL);
3878   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %0.8h, %1.8h, #12")(arg1, arg2);
3879   ASSERT_EQ(res, MakeUInt128(0x00fa6556563a567dULL, 0x5669227b6611885cULL));
3880 }
3881 
TEST(Arm64InsnTest,ArithmeticRoundingShiftRightAccumulateInt16x8)3882 TEST(Arm64InsnTest, ArithmeticRoundingShiftRightAccumulateInt16x8) {
3883   __uint128_t arg1 = MakeUInt128(0x9894671543578468ULL, 0x7886144458123145ULL);
3884   __uint128_t arg2 = MakeUInt128(0x1412147805734551ULL, 0x0500801908699603ULL);
3885   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %0.8h, %1.8h, #12")(arg1, arg2);
3886   ASSERT_EQ(res, MakeUInt128(0x140c147e05774549ULL, 0x0508801a086f9606ULL));
3887 }
3888 
TEST(Arm64InsnTest,LogicalShiftRightInt64x1)3889 TEST(Arm64InsnTest, LogicalShiftRightInt64x1) {
3890   __uint128_t arg = MakeUInt128(0x9859771921805158ULL, 0x5321473926532515ULL);
3891   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %d0, %d1, #33")(arg);
3892   ASSERT_EQ(res, MakeUInt128(0x000000004c2cbb8cULL, 0x0000000000000000ULL));
3893 }
3894 
TEST(Arm64InsnTest,LogicalShiftRightBy64Int64x1)3895 TEST(Arm64InsnTest, LogicalShiftRightBy64Int64x1) {
3896   __uint128_t arg = MakeUInt128(0x9474696134360928ULL, 0x6148494178501718ULL);
3897   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %d0, %d1, #64")(arg);
3898   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3899 }
3900 
TEST(Arm64InsnTest,LogicalShiftRightInt64x2)3901 TEST(Arm64InsnTest, LogicalShiftRightInt64x2) {
3902   __uint128_t op = MakeUInt128(0x3962657978771855ULL, 0x6084552965412665ULL);
3903   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %0.2d, %1.2d, #33")(op);
3904   ASSERT_EQ(rd, MakeUInt128(0x000000001cb132bcULL, 0x0000000030422a94ULL));
3905 }
3906 
TEST(Arm64InsnTest,LogicalShiftRightAccumulateInt64x1)3907 TEST(Arm64InsnTest, LogicalShiftRightAccumulateInt64x1) {
3908   __uint128_t arg1 = MakeUInt128(0x9004112453790153ULL, 0x3296615697052237ULL);
3909   __uint128_t arg2 = MakeUInt128(0x0499939532215362ULL, 0x2748476603613677ULL);
3910   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %d0, %d1, #40")(arg1, arg2);
3911   ASSERT_EQ(res, MakeUInt128(0x0499939532b15773ULL, 0x0000000000000000ULL));
3912 }
3913 
TEST(Arm64InsnTest,LogicalShiftRightBy64AccumulateInt64x1)3914 TEST(Arm64InsnTest, LogicalShiftRightBy64AccumulateInt64x1) {
3915   __uint128_t arg1 = MakeUInt128(0x9886592578662856ULL, 0x1249665523533829ULL);
3916   __uint128_t arg2 = MakeUInt128(0x3559152534784459ULL, 0x8183134112900199ULL);
3917   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %d0, %d1, #64")(arg1, arg2);
3918   ASSERT_EQ(res, MakeUInt128(0x3559152534784459ULL, 0x0000000000000000ULL));
3919 }
3920 
TEST(Arm64InsnTest,LogicalShiftRightAccumulateInt16x8)3921 TEST(Arm64InsnTest, LogicalShiftRightAccumulateInt16x8) {
3922   __uint128_t arg1 = MakeUInt128(0x9984345225161050ULL, 0x7027056235266012ULL);
3923   __uint128_t arg2 = MakeUInt128(0x4628654036036745ULL, 0x3286510570658748ULL);
3924   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %0.8h, %1.8h, #12")(arg1, arg2);
3925   ASSERT_EQ(res, MakeUInt128(0x4631654336056746ULL, 0x328d51057068874eULL));
3926 }
3927 
TEST(Arm64InsnTest,LogicalRoundingShiftRightAccumulateInt16x8)3928 TEST(Arm64InsnTest, LogicalRoundingShiftRightAccumulateInt16x8) {
3929   __uint128_t arg1 = MakeUInt128(0x9843452251610507ULL, 0x0270562352660127ULL);
3930   __uint128_t arg2 = MakeUInt128(0x6286540360367453ULL, 0x2865105706587488ULL);
3931   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %0.8h, %1.8h, #12")(arg1, arg2);
3932   ASSERT_EQ(res, MakeUInt128(0x62805407603b7453ULL, 0x2865105c065d7488ULL));
3933 }
3934 
TEST(Arm64InsnTest,SignedRoundingShiftRightInt64x1)3935 TEST(Arm64InsnTest, SignedRoundingShiftRightInt64x1) {
3936   __uint128_t arg = MakeUInt128(0x9323685785585581ULL, 0x9555604215625088ULL);
3937   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("srshr %d0, %d1, #40")(arg);
3938   ASSERT_EQ(res, MakeUInt128(0xffffffffff932368ULL, 0x0000000000000000ULL));
3939 }
3940 
TEST(Arm64InsnTest,SignedRoundingShiftRightInt64x2)3941 TEST(Arm64InsnTest, SignedRoundingShiftRightInt64x2) {
3942   __uint128_t arg = MakeUInt128(0x8714878398908107ULL, 0x4295309410605969ULL);
3943   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("srshr %0.2d, %1.2d, #36")(arg);
3944   ASSERT_EQ(res, MakeUInt128(0xfffffffff8714878ULL, 0x0000000004295309ULL));
3945 }
3946 
TEST(Arm64InsnTest,SignedRoundingShiftRightAccumulateInt64x1)3947 TEST(Arm64InsnTest, SignedRoundingShiftRightAccumulateInt64x1) {
3948   __uint128_t arg1 = MakeUInt128(0x9946016520577405ULL, 0x2942305360178031ULL);
3949   __uint128_t arg2 = MakeUInt128(0x3960188013782542ULL, 0x1927094767337191ULL);
3950   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %d0, %d1, #33")(arg1, arg2);
3951   ASSERT_EQ(res, MakeUInt128(0x3960187fe01b25f5ULL, 0x0000000000000000ULL));
3952 }
3953 
TEST(Arm64InsnTest,UnsignedRoundingShiftRightInt64x1)3954 TEST(Arm64InsnTest, UnsignedRoundingShiftRightInt64x1) {
3955   __uint128_t arg = MakeUInt128(0x9713552208445285ULL, 0x2640081252027665ULL);
3956   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urshr %d0, %d1, #33")(arg);
3957   ASSERT_EQ(res, MakeUInt128(0x000000004b89aa91ULL, 0x0000000000000000ULL));
3958 }
3959 
TEST(Arm64InsnTest,UnsignedRoundingShiftRightInt64x2)3960 TEST(Arm64InsnTest, UnsignedRoundingShiftRightInt64x2) {
3961   __uint128_t arg = MakeUInt128(0x6653398573888786ULL, 0x6147629443414010ULL);
3962   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urshr %0.2d, %1.2d, #34")(arg);
3963   ASSERT_EQ(res, MakeUInt128(0x000000001994ce61ULL, 0x000000001851d8a5ULL));
3964 }
3965 
TEST(Arm64InsnTest,UnsignedRoundingShiftRightAccumulateInt64x1)3966 TEST(Arm64InsnTest, UnsignedRoundingShiftRightAccumulateInt64x1) {
3967   __uint128_t arg1 = MakeUInt128(0x9616143204006381ULL, 0x3224658411111577ULL);
3968   __uint128_t arg2 = MakeUInt128(0x7184728147519983ULL, 0x5050478129771859ULL);
3969   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ursra %d0, %d1, #33")(arg1, arg2);
3970   ASSERT_EQ(res, MakeUInt128(0x71847281925ca39cULL, 0x0000000000000000ULL));
3971 }
3972 
TEST(Arm64InsnTest,ShiftLeftInt64x1)3973 TEST(Arm64InsnTest, ShiftLeftInt64x1) {
3974   __uint128_t arg = MakeUInt128(0x3903594664691623ULL, 0x5396809201394578ULL);
3975   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %d0, %d1, #35")(arg);
3976   ASSERT_EQ(res, MakeUInt128(0x2348b11800000000ULL, 0x0000000000000000ULL));
3977 }
3978 
TEST(Arm64InsnTest,ShiftLeftInt64x2)3979 TEST(Arm64InsnTest, ShiftLeftInt64x2) {
3980   __uint128_t arg = MakeUInt128(0x0750111649832785ULL, 0x6353161451684576ULL);
3981   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %0.2d, %1.2d, #37")(arg);
3982   ASSERT_EQ(res, MakeUInt128(0x3064f0a000000000ULL, 0x2d08aec000000000ULL));
3983 }
3984 
TEST(Arm64InsnTest,ShiftLeftInt8x8)3985 TEST(Arm64InsnTest, ShiftLeftInt8x8) {
3986   __uint128_t arg = MakeUInt128(0x0402956047346131ULL, 0x1382638788975517ULL);
3987   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %0.8b, %1.8b, #6")(arg);
3988   ASSERT_EQ(res, MakeUInt128(0x00804000c0004040ULL, 0x0000000000000000ULL));
3989 }
3990 
TEST(Arm64InsnTest,ShiftRightInsertInt64x1)3991 TEST(Arm64InsnTest, ShiftRightInsertInt64x1) {
3992   __uint128_t arg1 = MakeUInt128(0x9112232618794059ULL, 0x9415540632701319ULL);
3993   __uint128_t arg2 = MakeUInt128(0x1537675115830432ULL, 0x0849872092028092ULL);
3994   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %d0, %d1, #20")(arg1, arg2);
3995   ASSERT_EQ(res, MakeUInt128(0x1537691122326187ULL, 0x0000000000000000ULL));
3996 }
3997 
TEST(Arm64InsnTest,ShiftRightInsertInt64x2)3998 TEST(Arm64InsnTest, ShiftRightInsertInt64x2) {
3999   __uint128_t arg1 = MakeUInt128(0x7332335603484653ULL, 0x1873029302665964ULL);
4000   __uint128_t arg2 = MakeUInt128(0x5013718375428897ULL, 0x5579714499246540ULL);
4001   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %0.2d, %1.2d, #21")(arg1, arg2);
4002   ASSERT_EQ(res, MakeUInt128(0x50137399919ab01aULL, 0x557970c398149813ULL));
4003 }
4004 
TEST(Arm64InsnTest,ShiftLeftInsertInt64x1)4005 TEST(Arm64InsnTest, ShiftLeftInsertInt64x1) {
4006   __uint128_t arg1 = MakeUInt128(0x3763526969344354ULL, 0x4004730671988689ULL);
4007   __uint128_t arg2 = MakeUInt128(0x6369498567302175ULL, 0x2313252926537589ULL);
4008   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sli %d0, %d1, #23")(arg1, arg2);
4009   ASSERT_EQ(res, MakeUInt128(0x34b49a21aa302175ULL, 0x0000000000000000ULL));
4010 }
4011 
TEST(Arm64InsnTest,ShiftLeftInsertInt64x2)4012 TEST(Arm64InsnTest, ShiftLeftInsertInt64x2) {
4013   __uint128_t arg1 = MakeUInt128(0x3270206902872323ULL, 0x3005386216347988ULL);
4014   __uint128_t arg2 = MakeUInt128(0x5094695472004795ULL, 0x2311201504329322ULL);
4015   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sli %0.2d, %1.2d, #21")(arg1, arg2);
4016   ASSERT_EQ(res, MakeUInt128(0x0d2050e464604795ULL, 0x0c42c68f31129322ULL));
4017 }
4018 
TEST(Arm64InsnTest,ShiftLeftLongInt8x8)4019 TEST(Arm64InsnTest, ShiftLeftLongInt8x8) {
4020   __uint128_t arg = MakeUInt128(0x2650697620201995ULL, 0x5484126500053944ULL);
4021   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shll %0.8h, %1.8b, #8")(arg);
4022   ASSERT_EQ(res, MakeUInt128(0x2000200019009500ULL, 0x2600500069007600ULL));
4023 }
4024 
TEST(Arm64InsnTest,ShiftLeftLongInt8x8Upper)4025 TEST(Arm64InsnTest, ShiftLeftLongInt8x8Upper) {
4026   __uint128_t arg = MakeUInt128(0x9050429225978771ULL, 0x0667873840000616ULL);
4027   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shll2 %0.8h, %1.16b, #8")(arg);
4028   ASSERT_EQ(res, MakeUInt128(0x4000000006001600ULL, 0x0600670087003800ULL));
4029 }
4030 
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2)4031 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2) {
4032   __uint128_t arg = MakeUInt128(0x9075407923424023ULL, 0x0092590070173196ULL);
4033   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshll %0.2d, %1.2s, #9")(arg);
4034   ASSERT_EQ(res, MakeUInt128(0x0000004684804600ULL, 0xffffff20ea80f200ULL));
4035 }
4036 
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2Upper)4037 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2Upper) {
4038   __uint128_t arg = MakeUInt128(0x9382432227188515ULL, 0x9740547021482897ULL);
4039   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshll2 %0.2d, %1.4s, #9")(arg);
4040   ASSERT_EQ(res, MakeUInt128(0x0000004290512e00ULL, 0xffffff2e80a8e000ULL));
4041 }
4042 
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2By0)4043 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2By0) {
4044   __uint128_t arg = MakeUInt128(0x9008777697763127ULL, 0x9572267265556259ULL);
4045   // SXTL is an alias for SSHLL for the shift count being zero.
4046   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sxtl %0.2d, %1.2s")(arg);
4047   ASSERT_EQ(res, MakeUInt128(0xffffffff97763127ULL, 0xffffffff90087776ULL));
4048 }
4049 
TEST(Arm64InsnTest,ShiftLeftLongInt32x2)4050 TEST(Arm64InsnTest, ShiftLeftLongInt32x2) {
4051   __uint128_t arg = MakeUInt128(0x9094334676851422ULL, 0x1447737939375170ULL);
4052   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll %0.2d, %1.2s, #9")(arg);
4053   ASSERT_EQ(res, MakeUInt128(0x000000ed0a284400ULL, 0x0000012128668c00ULL));
4054 }
4055 
TEST(Arm64InsnTest,ShiftLeftLongInt32x2Upper)4056 TEST(Arm64InsnTest, ShiftLeftLongInt32x2Upper) {
4057   __uint128_t arg = MakeUInt128(0x7096834080053559ULL, 0x8491754173818839ULL);
4058   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll2 %0.2d, %1.4s, #17")(arg);
4059   ASSERT_EQ(res, MakeUInt128(0x0000e70310720000ULL, 0x00010922ea820000ULL));
4060 }
4061 
TEST(Arm64InsnTest,ShiftLeftLongInt32x2By0)4062 TEST(Arm64InsnTest, ShiftLeftLongInt32x2By0) {
4063   __uint128_t arg = MakeUInt128(0x9945681506526530ULL, 0x5371829412703369ULL);
4064   // UXTL is an alias for USHLL for the shift count being zero.
4065   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uxtl %0.2d, %1.2s")(arg);
4066   ASSERT_EQ(res, MakeUInt128(0x0000000006526530ULL, 0x0000000099456815ULL));
4067 }
4068 
TEST(Arm64InsnTest,ShiftRightNarrowI16x8)4069 TEST(Arm64InsnTest, ShiftRightNarrowI16x8) {
4070   __uint128_t arg = MakeUInt128(0x9378541786109696ULL, 0x9202538865034577ULL);
4071   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shrn %0.8b, %1.8h, #2")(arg);
4072   ASSERT_EQ(res, MakeUInt128(0x80e2405dde0584a5ULL, 0x0000000000000000ULL));
4073 }
4074 
TEST(Arm64InsnTest,ShiftRightNarrowI16x8Upper)4075 TEST(Arm64InsnTest, ShiftRightNarrowI16x8Upper) {
4076   __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
4077   __uint128_t arg2 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
4078   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("shrn2 %0.16b, %1.8h, #2")(arg1, arg2);
4079   ASSERT_EQ(res, MakeUInt128(0x3879158299848645ULL, 0xd8988dc1de009890ULL));
4080 }
4081 
TEST(Arm64InsnTest,RoundingShiftRightNarrowI16x8)4082 TEST(Arm64InsnTest, RoundingShiftRightNarrowI16x8) {
4083   __uint128_t arg = MakeUInt128(0x9303774688099929ULL, 0x6877582441047878ULL);
4084   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rshrn %0.8b, %1.8h, #2")(arg);
4085   ASSERT_EQ(res, MakeUInt128(0x1e09411ec1d2024aULL, 0x0000000000000000ULL));
4086 }
4087 
TEST(Arm64InsnTest,RoundingShiftRightNarrowI16x8Upper)4088 TEST(Arm64InsnTest, RoundingShiftRightNarrowI16x8Upper) {
4089   __uint128_t arg1 = MakeUInt128(0x9314507607167064ULL, 0x3556827437743965ULL);
4090   __uint128_t arg2 = MakeUInt128(0x2103098604092717ULL, 0x0909512808630902ULL);
4091   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("rshrn2 %0.16b, %1.8h, #2")(arg1, arg2);
4092   ASSERT_EQ(res, MakeUInt128(0x2103098604092717ULL, 0x569ddd59c51ec619ULL));
4093 }
4094 
TEST(Arm64InsnTest,AddInt64x1)4095 TEST(Arm64InsnTest, AddInt64x1) {
4096   __uint128_t arg1 = MakeUInt128(0x0080000000000003ULL, 0xdeadbeef01234567ULL);
4097   __uint128_t arg2 = MakeUInt128(0x0080000000000005ULL, 0x0123deadbeef4567ULL);
4098   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %d0, %d1, %d2")(arg1, arg2);
4099   ASSERT_EQ(res, MakeUInt128(0x0100000000000008ULL, 0x0ULL));
4100 }
4101 
TEST(Arm64InsnTest,AddInt32x4)4102 TEST(Arm64InsnTest, AddInt32x4) {
4103   // The "add" below adds two vectors, each with four 32-bit elements.  We set the sign
4104   // bit for each element to verify that the carry does not affect any lane.
4105   __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4106   __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4107   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.4s, %1.4s, %2.4s")(op1, op2);
4108   ASSERT_EQ(rd, MakeUInt128(0x0000000700000003ULL, 0x0000000f0000000bULL));
4109 }
4110 
TEST(Arm64InsnTest,AddInt32x2)4111 TEST(Arm64InsnTest, AddInt32x2) {
4112   __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4113   __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4114   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.2s, %1.2s, %2.2s")(op1, op2);
4115   ASSERT_EQ(rd, MakeUInt128(0x0000000700000003ULL, 0));
4116 }
4117 
TEST(Arm64InsnTest,AddInt64x2)4118 TEST(Arm64InsnTest, AddInt64x2) {
4119   __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4120   __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4121   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.2d, %1.2d, %2.2d")(op1, op2);
4122   ASSERT_EQ(rd, MakeUInt128(0x0000000800000003ULL, 0x000000100000000bULL));
4123 }
4124 
TEST(Arm64InsnTest,SubInt64x1)4125 TEST(Arm64InsnTest, SubInt64x1) {
4126   __uint128_t arg1 = MakeUInt128(0x0000000000000002ULL, 0x0011223344556677ULL);
4127   __uint128_t arg2 = MakeUInt128(0x0000000000000003ULL, 0x0123456789abcdefULL);
4128   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %d0, %d1, %d2")(arg1, arg2);
4129   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0ULL));
4130 }
4131 
TEST(Arm64InsnTest,SubInt64x2)4132 TEST(Arm64InsnTest, SubInt64x2) {
4133   constexpr auto AsmSub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.2d, %1.2d, %2.2d");
4134   __uint128_t arg1 = MakeUInt128(0x6873115956286388ULL, 0x2353787593751957ULL);
4135   __uint128_t arg2 = MakeUInt128(0x7818577805321712ULL, 0x2680661300192787ULL);
4136   __uint128_t res = AsmSub(arg1, arg2);
4137   ASSERT_EQ(res, MakeUInt128(0xf05ab9e150f64c76ULL, 0xfcd31262935bf1d0ULL));
4138 }
4139 
TEST(Arm64InsnTest,SubInt16x4)4140 TEST(Arm64InsnTest, SubInt16x4) {
4141   __uint128_t arg1 = MakeUInt128(0x8888777766665555ULL, 0);
4142   __uint128_t arg2 = MakeUInt128(0x1111222233334444ULL, 0);
4143   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.4h, %1.4h, %2.4h")(arg1, arg2);
4144   ASSERT_EQ(res, MakeUInt128(0x7777555533331111ULL, 0));
4145 }
4146 
TEST(Arm64InsnTest,MultiplyI8x8)4147 TEST(Arm64InsnTest, MultiplyI8x8) {
4148   __uint128_t arg1 = MakeUInt128(0x5261365549781893ULL, 0x1297848216829989ULL);
4149   __uint128_t arg2 = MakeUInt128(0x4542858444795265ULL, 0x8678210511413547ULL);
4150   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("mul %0.8b, %1.8b, %2.8b")(arg1, arg2);
4151   ASSERT_EQ(res, MakeUInt128(0x1a020ed464b8b0ffULL, 0x0000000000000000ULL));
4152 }
4153 
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8)4154 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8) {
4155   __uint128_t arg1 = MakeUInt128(0x5848406353422072ULL, 0x2258284886481584ULL);
4156   __uint128_t arg2 = MakeUInt128(0x7823986456596116ULL, 0x3548862305774564ULL);
4157   __uint128_t arg3 = MakeUInt128(0x8797108931456691ULL, 0x3686722874894056ULL);
4158   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.8b, %1.8b, %2.8b")(arg1, arg2, arg3);
4159   ASSERT_EQ(res, MakeUInt128(0xc76f10351337865dULL, 0x0000000000000000ULL));
4160 }
4161 
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8IndexedElem)4162 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8IndexedElem) {
4163   __uint128_t arg1 = MakeUInt128(0x4143334547762416ULL, 0x8625189835694855ULL);
4164   __uint128_t arg2 = MakeUInt128(0x5346462080466842ULL, 0x5906949129331367ULL);
4165   __uint128_t arg3 = MakeUInt128(0x0355876402474964ULL, 0x7326391419927260ULL);
4166   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.4h, %1.4h, %2.h[0]")(arg1, arg2, arg3);
4167   ASSERT_EQ(res, MakeUInt128(0x0e9bc72e5eb38710ULL, 0x0000000000000000ULL));
4168 }
4169 
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8IndexedElemPosition2)4170 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8IndexedElemPosition2) {
4171   __uint128_t arg1 = MakeUInt128(0x1431429809190659ULL, 0x2509372216964615ULL);
4172   __uint128_t arg2 = MakeUInt128(0x2686838689427741ULL, 0x5599185592524595ULL);
4173   __uint128_t arg3 = MakeUInt128(0x6099124608051243ULL, 0x8843904512441365ULL);
4174   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.2s, %1.2s, %2.s[2]")(arg1, arg2, arg3);
4175   ASSERT_EQ(res, MakeUInt128(0x6ce7ccbedccdc110ULL, 0x0000000000000000ULL));
4176 }
4177 
TEST(Arm64InsnTest,MultiplyAndSubtractI8x8IndexedElem)4178 TEST(Arm64InsnTest, MultiplyAndSubtractI8x8IndexedElem) {
4179   __uint128_t arg1 = MakeUInt128(0x8297455570674983ULL, 0x8505494588586926ULL);
4180   __uint128_t arg2 = MakeUInt128(0x6549911988183479ULL, 0x7753566369807426ULL);
4181   __uint128_t arg3 = MakeUInt128(0x4524919217321721ULL, 0x4772350141441973ULL);
4182   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mls %0.4h, %1.4h, %2.h[1]")(arg1, arg2, arg3);
4183   ASSERT_EQ(res, MakeUInt128(0xcefce99ad58a9ad9ULL, 0x0000000000000000ULL));
4184 }
4185 
TEST(Arm64InsnTest,MultiplyAndSubtractI8x8)4186 TEST(Arm64InsnTest, MultiplyAndSubtractI8x8) {
4187   __uint128_t arg1 = MakeUInt128(0x0635342207222582ULL, 0x8488648158456028ULL);
4188   __uint128_t arg2 = MakeUInt128(0x9864565961163548ULL, 0x8623057745649803ULL);
4189   __uint128_t arg3 = MakeUInt128(0x1089314566913686ULL, 0x7228748940560101ULL);
4190   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mls %0.8b, %1.8b, %2.8b")(arg1, arg2, arg3);
4191   ASSERT_EQ(res, MakeUInt128(0x80d5b973bfa58df6ULL, 0x0000000000000000ULL));
4192 }
4193 
TEST(Arm64InsnTest,MultiplyI32x4IndexedElem)4194 TEST(Arm64InsnTest, MultiplyI32x4IndexedElem) {
4195   __uint128_t arg1 = MakeUInt128(0x143334547762416ULL, 0x8625189835694855ULL);
4196   __uint128_t arg2 = MakeUInt128(0x627232791786085ULL, 0x7843838279679766ULL);
4197   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("mul %0.4s, %1.4s, %2.s[1]")(arg1, arg2);
4198   ASSERT_EQ(res, MakeUInt128(0xcec23e830d48815aULL, 0xd12b87288ae0a3f3ULL));
4199 }
4200 
TEST(Arm64InsnTest,PolynomialMultiplyU8x8)4201 TEST(Arm64InsnTest, PolynomialMultiplyU8x8) {
4202   __uint128_t arg1 = MakeUInt128(0x1862056476931257ULL, 0x0586356620185581ULL);
4203   __uint128_t arg2 = MakeUInt128(0x1668039626579787ULL, 0x7185560845529654ULL);
4204   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmul %0.8b, %1.8b, %2.8b")(arg1, arg2);
4205   ASSERT_EQ(res, MakeUInt128(0xd0d00f18f4095e25ULL, 0x0000000000000000ULL));
4206 }
4207 
TEST(Arm64InsnTest,PolynomialMultiplyLongU8x8)4208 TEST(Arm64InsnTest, PolynomialMultiplyLongU8x8) {
4209   __uint128_t arg1 = MakeUInt128(0x1327656180937734ULL, 0x4403070746921120ULL);
4210   __uint128_t arg2 = MakeUInt128(0x9838952286847831ULL, 0x2355265821314495ULL);
4211   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull %0.8h, %1.8b, %2.8b")(arg1, arg2);
4212   ASSERT_EQ(res, MakeUInt128(0x43004bcc17e805f4ULL, 0x082807a835210ce2ULL));
4213 }
4214 
TEST(Arm64InsnTest,PolynomialMultiplyLongU8x8Upper)4215 TEST(Arm64InsnTest, PolynomialMultiplyLongU8x8Upper) {
4216   __uint128_t arg1 = MakeUInt128(0x4439658253375438ULL, 0x8569094113031509ULL);
4217   __uint128_t arg2 = MakeUInt128(0x1865619673378623ULL, 0x6256125216320862ULL);
4218   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
4219   ASSERT_EQ(res, MakeUInt128(0x015a005600a80372ULL, 0x30ea1da6008214d2ULL));
4220 }
4221 
TEST(Arm64InsnTest,PolynomialMultiplyLongU64x2)4222 TEST(Arm64InsnTest, PolynomialMultiplyLongU64x2) {
4223   __uint128_t arg1 = MakeUInt128(0x1000100010001000ULL, 0xffffeeeeffffeeeeULL);
4224   __uint128_t arg2 = MakeUInt128(0x10001ULL, 0xffffeeeeffffeeeeULL);
4225   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull %0.1q, %1.1d, %2.1d")(arg1, arg2);
4226   ASSERT_EQ(res, MakeUInt128(0x1000ULL, 0x1000ULL));
4227 }
4228 
TEST(Arm64InsnTest,PolynomialMultiplyLongU64x2Upper)4229 TEST(Arm64InsnTest, PolynomialMultiplyLongU64x2Upper) {
4230   __uint128_t arg1 = MakeUInt128(0xffffeeeeffffeeeeULL, 0x1000100010001000ULL);
4231   __uint128_t arg2 = MakeUInt128(0xffffeeeeffffeeeeULL, 0x10001ULL);
4232   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull2 %0.1q, %1.2d, %2.2d")(arg1, arg2);
4233   ASSERT_EQ(res, MakeUInt128(0x1000ULL, 0x1000ULL));
4234 }
4235 
TEST(Arm64InsnTest,PairwiseAddInt8x16)4236 TEST(Arm64InsnTest, PairwiseAddInt8x16) {
4237   __uint128_t op1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
4238   __uint128_t op2 = MakeUInt128(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL);
4239   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.16b, %1.16b, %2.16b")(op1, op2);
4240   ASSERT_EQ(rd, MakeUInt128(0xeda96521dd995511ULL, 0x1d1915110d090501ULL));
4241 }
4242 
TEST(Arm64InsnTest,PairwiseAddInt8x8)4243 TEST(Arm64InsnTest, PairwiseAddInt8x8) {
4244   __uint128_t op1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
4245   __uint128_t op2 = MakeUInt128(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL);
4246   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.8b, %1.8b, %2.8b")(op1, op2);
4247   ASSERT_EQ(rd, MakeUInt128(0x0d090501dd995511ULL, 0));
4248 }
4249 
TEST(Arm64InsnTest,PairwiseAddInt64x2)4250 TEST(Arm64InsnTest, PairwiseAddInt64x2) {
4251   __uint128_t op1 = MakeUInt128(1ULL, 2ULL);
4252   __uint128_t op2 = MakeUInt128(3ULL, 4ULL);
4253   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.2d, %1.2d, %2.2d")(op1, op2);
4254   ASSERT_EQ(rd, MakeUInt128(3ULL, 7ULL));
4255 }
4256 
TEST(Arm64InsnTest,CompareEqualInt8x16)4257 TEST(Arm64InsnTest, CompareEqualInt8x16) {
4258   __uint128_t op1 = MakeUInt128(0x9375195778185778ULL, 0x0532171226806613ULL);
4259   __uint128_t op2 = MakeUInt128(0x9371595778815787ULL, 0x0352172126068613ULL);
4260   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.16b, %1.16b, %2.16b")(op1, op2);
4261   ASSERT_EQ(rd, MakeUInt128(0xff0000ffff00ff00ULL, 0x0000ff00ff0000ffULL));
4262 }
4263 
TEST(Arm64InsnTest,CompareEqualInt8x8)4264 TEST(Arm64InsnTest, CompareEqualInt8x8) {
4265   __uint128_t op1 = MakeUInt128(0x9375195778185778ULL, 0x0532171226806613ULL);
4266   __uint128_t op2 = MakeUInt128(0x9371595778815787ULL, 0x0352172126068613ULL);
4267   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.8b, %1.8b, %2.8b")(op1, op2);
4268   ASSERT_EQ(rd, MakeUInt128(0xff0000ffff00ff00ULL, 0));
4269 }
4270 
TEST(Arm64InsnTest,CompareEqualInt16x4)4271 TEST(Arm64InsnTest, CompareEqualInt16x4) {
4272   __uint128_t op1 = MakeUInt128(0x4444333322221111ULL, 0);
4273   __uint128_t op2 = MakeUInt128(0x8888333300001111ULL, 0);
4274   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.4h, %1.4h, %2.4h")(op1, op2);
4275   ASSERT_EQ(rd, MakeUInt128(0x0000ffff0000ffffULL, 0));
4276 }
4277 
TEST(Arm64InsnTest,CompareEqualInt64x1)4278 TEST(Arm64InsnTest, CompareEqualInt64x1) {
4279   constexpr auto AsmCmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %d0, %d1, %d2");
4280   __uint128_t arg1 = MakeUInt128(0x8297455570674983ULL, 0x8505494588586926ULL);
4281   __uint128_t arg2 = MakeUInt128(0x0665499119881834ULL, 0x7977535663698074ULL);
4282   __uint128_t arg3 = MakeUInt128(0x8297455570674983ULL, 0x1452491921732172ULL);
4283   ASSERT_EQ(AsmCmeq(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4284   ASSERT_EQ(AsmCmeq(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4285 }
4286 
TEST(Arm64InsnTest,CompareEqualZeroInt64x1)4287 TEST(Arm64InsnTest, CompareEqualZeroInt64x1) {
4288   constexpr auto AsmCmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %d0, %d1, #0");
4289   __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4290   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x1746089232839170ULL);
4291   ASSERT_EQ(AsmCmeq(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4292   ASSERT_EQ(AsmCmeq(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4293 }
4294 
TEST(Arm64InsnTest,CompareEqualZeroInt8x16)4295 TEST(Arm64InsnTest, CompareEqualZeroInt8x16) {
4296   __uint128_t op = MakeUInt128(0x0000555500332200ULL, 0x0000000077001100ULL);
4297   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %0.16b, %1.16b, #0")(op);
4298   ASSERT_EQ(rd, MakeUInt128(0xffff0000ff0000ffULL, 0xffffffff00ff00ffULL));
4299 }
4300 
TEST(Arm64InsnTest,CompareEqualZeroInt8x8)4301 TEST(Arm64InsnTest, CompareEqualZeroInt8x8) {
4302   __uint128_t op = MakeUInt128(0x001122330000aaaaULL, 0xdeadbeef0000cafeULL);
4303   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %0.8b, %1.8b, #0")(op);
4304   ASSERT_EQ(rd, MakeUInt128(0xff000000ffff0000ULL, 0));
4305 }
4306 
TEST(Arm64InsnTest,CompareGreaterInt64x1)4307 TEST(Arm64InsnTest, CompareGreaterInt64x1) {
4308   constexpr auto AsmCmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %d0, %d1, %d2");
4309   __uint128_t arg1 = MakeUInt128(0x1976668559233565ULL, 0x4639138363185745ULL);
4310   __uint128_t arg2 = MakeUInt128(0x3474940784884423ULL, 0x7721751543342603ULL);
4311   __uint128_t arg3 = MakeUInt128(0x1976668559233565ULL, 0x8183196376370761ULL);
4312   __uint128_t arg4 = MakeUInt128(0x9243530136776310ULL, 0x8491351615642269ULL);
4313   ASSERT_EQ(AsmCmgt(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4314   ASSERT_EQ(AsmCmgt(arg1, arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4315   ASSERT_EQ(AsmCmgt(arg1, arg4), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4316 }
4317 
TEST(Arm64InsnTest,CompareGreaterZeroInt64x1)4318 TEST(Arm64InsnTest, CompareGreaterZeroInt64x1) {
4319   constexpr auto AsmCmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %d0, %d1, #0");
4320   __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4321   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x6174599705674507ULL);
4322   __uint128_t arg3 = MakeUInt128(0x9592057668278967ULL, 0x7644531840404185ULL);
4323   ASSERT_EQ(AsmCmgt(arg1), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4324   ASSERT_EQ(AsmCmgt(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4325   ASSERT_EQ(AsmCmgt(arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4326 }
4327 
TEST(Arm64InsnTest,CompareGreaterThanZeroInt8x16)4328 TEST(Arm64InsnTest, CompareGreaterThanZeroInt8x16) {
4329   __uint128_t op = MakeUInt128(0x807fff00017efe02ULL, 0xff7f80000102fe02ULL);
4330   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %0.16b, %1.16b, #0")(op);
4331   ASSERT_EQ(rd, MakeUInt128(0x00ff0000ffff00ffULL, 0x00ff0000ffff00ffULL));
4332 }
4333 
TEST(Arm64InsnTest,CompareGreaterThanZeroInt8x8)4334 TEST(Arm64InsnTest, CompareGreaterThanZeroInt8x8) {
4335   __uint128_t op = MakeUInt128(0x00ff7f80017efe00ULL, 0x0000cafedeadbeefULL);
4336   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %0.8b, %1.8b, #0")(op);
4337   ASSERT_EQ(rd, MakeUInt128(0x0000ff00ffff0000ULL, 0));
4338 }
4339 
TEST(Arm64InsnTest,CompareGreaterThanInt16x8)4340 TEST(Arm64InsnTest, CompareGreaterThanInt16x8) {
4341   __uint128_t arg1 = MakeUInt128(0x9789389001852956ULL, 0x9196780455448285ULL);
4342   __uint128_t arg2 = MakeUInt128(0x7269389081795897ULL, 0x5469399264218285);
4343   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %0.8h, %1.8h, %2.8h")(arg1, arg2);
4344   ASSERT_EQ(res, MakeUInt128(0x00000000ffff0000ULL, 0x0000ffff00000000ULL));
4345 }
4346 
TEST(Arm64InsnTest,CompareGreaterThanInt32x4)4347 TEST(Arm64InsnTest, CompareGreaterThanInt32x4) {
4348   __uint128_t arg1 = MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL);
4349   __uint128_t arg2 = MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL);
4350   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %0.4s, %1.4s, %2.4s")(arg1, arg2);
4351   ASSERT_EQ(res, MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL));
4352 }
4353 
TEST(Arm64InsnTest,CompareLessZeroInt64x1)4354 TEST(Arm64InsnTest, CompareLessZeroInt64x1) {
4355   constexpr auto AsmCmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %d0, %d1, #0");
4356   __uint128_t arg1 = MakeUInt128(0x4784264567633881ULL, 0x8807565612168960ULL);
4357   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x8955999911209916ULL);
4358   __uint128_t arg3 = MakeUInt128(0x9364610175685060ULL, 0x1671453543158148ULL);
4359   ASSERT_EQ(AsmCmlt(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4360   ASSERT_EQ(AsmCmlt(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4361   ASSERT_EQ(AsmCmlt(arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4362 }
4363 
TEST(Arm64InsnTest,CompareLessThanZeroInt8x16)4364 TEST(Arm64InsnTest, CompareLessThanZeroInt8x16) {
4365   __uint128_t op = MakeUInt128(0xff00017ffe020180ULL, 0x0001027e7ffeff80ULL);
4366   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %0.16b, %1.16b, #0")(op);
4367   ASSERT_EQ(rd, MakeUInt128(0xff000000ff0000ffULL, 0x0000000000ffffffULL));
4368 }
4369 
TEST(Arm64InsnTest,CompareLessThanZeroInt8x8)4370 TEST(Arm64InsnTest, CompareLessThanZeroInt8x8) {
4371   __uint128_t op = MakeUInt128(0x0002017e7fff8000ULL, 0x001100220000ffffULL);
4372   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %0.8b, %1.8b, #0")(op);
4373   ASSERT_EQ(rd, MakeUInt128(0x0000000000ffff00ULL, 0));
4374 }
4375 
TEST(Arm64InsnTest,CompareGreaterThanEqualInt64x1)4376 TEST(Arm64InsnTest, CompareGreaterThanEqualInt64x1) {
4377   constexpr auto AsmCmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmge %d0, %d1, %d2");
4378   __uint128_t arg1 = MakeUInt128(0x1009391369138107ULL, 0x2581378135789400ULL);
4379   __uint128_t arg2 = MakeUInt128(0x5890939568814856ULL, 0x0263224393726562ULL);
4380   __uint128_t arg3 = MakeUInt128(0x1009391369138107ULL, 0x5511995818319637ULL);
4381   __uint128_t arg4 = MakeUInt128(0x9427141009391369ULL, 0x1381072581378135ULL);
4382   ASSERT_EQ(AsmCmge(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4383   ASSERT_EQ(AsmCmge(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4384   ASSERT_EQ(AsmCmge(arg1, arg4), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4385 }
4386 
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt64x1)4387 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt64x1) {
4388   constexpr auto AsmCmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %d0, %d1, #0");
4389   __uint128_t arg1 = MakeUInt128(0x5562116715468484ULL, 0x7780394475697980ULL);
4390   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x3548487562529875ULL);
4391   __uint128_t arg3 = MakeUInt128(0x9212366168902596ULL, 0x2730430679316531ULL);
4392   ASSERT_EQ(AsmCmge(arg1), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4393   ASSERT_EQ(AsmCmge(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4394   ASSERT_EQ(AsmCmge(arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4395 }
4396 
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt8x16)4397 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt8x16) {
4398   __uint128_t op = MakeUInt128(0x00ff01027ffe8002ULL, 0x80fffe7f7e020100ULL);
4399   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %0.16b, %1.16b, #0")(op);
4400   ASSERT_EQ(rd, MakeUInt128(0xff00ffffff0000ffULL, 0x000000ffffffffffULL));
4401 }
4402 
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt8x8)4403 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt8x8) {
4404   __uint128_t op = MakeUInt128(0x0001027f80feff00ULL, 0x0011223344556677ULL);
4405   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %0.8b, %1.8b, #0")(op);
4406   ASSERT_EQ(rd, MakeUInt128(0xffffffff000000ffULL, 0));
4407 }
4408 
TEST(Arm64InsnTest,CompareGreaterEqualInt16x8)4409 TEST(Arm64InsnTest, CompareGreaterEqualInt16x8) {
4410   __uint128_t arg1 = MakeUInt128(0x4391962838870543ULL, 0x6777432242768091ULL);
4411   __uint128_t arg2 = MakeUInt128(0x4391838548318875ULL, 0x0142432208995068ULL);
4412   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmge %0.8h, %1.8h, %2.8h")(arg1, arg2);
4413   ASSERT_EQ(res, MakeUInt128(0xffffffff0000ffffULL, 0xffffffffffff0000ULL));
4414 }
4415 
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt64x1)4416 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt64x1) {
4417   constexpr auto AsmCmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %d0, %d1, #0");
4418   __uint128_t arg1 = MakeUInt128(0x3643296406335728ULL, 0x1070788758164043ULL);
4419   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x5865720227637840ULL);
4420   __uint128_t arg3 = MakeUInt128(0x8694346828590066ULL, 0x6408063140777577ULL);
4421   ASSERT_EQ(AsmCmle(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4422   ASSERT_EQ(AsmCmle(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4423   ASSERT_EQ(AsmCmle(arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4424 }
4425 
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt8x16)4426 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt8x16) {
4427   __uint128_t op = MakeUInt128(0x80fffe7f7e020100ULL, 0x00ff01027ffe8002ULL);
4428   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %0.16b, %1.16b, #0")(op);
4429   ASSERT_EQ(rd, MakeUInt128(0xffffff00000000ffULL, 0xffff000000ffff00ULL));
4430 }
4431 
TEST(Arm64InsnTest,CompareHigherInt64x1)4432 TEST(Arm64InsnTest, CompareHigherInt64x1) {
4433   constexpr auto AsmCmhi = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %d0, %d1, %d2");
4434   __uint128_t arg1 = MakeUInt128(0x1009391369138107ULL, 0x2581378135789400ULL);
4435   __uint128_t arg2 = MakeUInt128(0x0759167297007850ULL, 0x5807171863810549ULL);
4436   __uint128_t arg3 = MakeUInt128(0x1009391369138107ULL, 0x6026322439372656ULL);
4437   __uint128_t arg4 = MakeUInt128(0x9087839523245323ULL, 0x7896029841669225ULL);
4438   ASSERT_EQ(AsmCmhi(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4439   ASSERT_EQ(AsmCmhi(arg1, arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4440   ASSERT_EQ(AsmCmhi(arg1, arg4), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4441 }
4442 
TEST(Arm64InsnTest,CompareHigherInt16x8)4443 TEST(Arm64InsnTest, CompareHigherInt16x8) {
4444   __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4445   __uint128_t arg2 = MakeUInt128(0x2057166778967764ULL, 0x4531840442045540ULL);
4446   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %0.8h, %1.8h, %2.8h")(arg1, arg2);
4447   ASSERT_EQ(res, MakeUInt128(0xffff000000000000ULL, 0x0000ffff00000000ULL));
4448 }
4449 
TEST(Arm64InsnTest,CompareHigherInt32x4)4450 TEST(Arm64InsnTest, CompareHigherInt32x4) {
4451   __uint128_t arg1 = MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL);
4452   __uint128_t arg2 = MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL);
4453   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %0.4s, %1.4s, %2.4s")(arg1, arg2);
4454   ASSERT_EQ(res, MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL));
4455 }
4456 
TEST(Arm64InsnTest,CompareHigherSameInt64x1)4457 TEST(Arm64InsnTest, CompareHigherSameInt64x1) {
4458   constexpr auto AsmCmhs = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhs %d0, %d1, %d2");
4459   __uint128_t arg1 = MakeUInt128(0x3529566139788848ULL, 0x6050978608595701ULL);
4460   __uint128_t arg2 = MakeUInt128(0x1769845875810446ULL, 0x6283998806006162ULL);
4461   __uint128_t arg3 = MakeUInt128(0x3529566139788848ULL, 0x9001852956919678ULL);
4462   __uint128_t arg4 = MakeUInt128(0x9628388705436777ULL, 0x4322427680913236ULL);
4463   ASSERT_EQ(AsmCmhs(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4464   ASSERT_EQ(AsmCmhs(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4465   ASSERT_EQ(AsmCmhs(arg1, arg4), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4466 }
4467 
TEST(Arm64InsnTest,CompareHigherSameInt16x8)4468 TEST(Arm64InsnTest, CompareHigherSameInt16x8) {
4469   __uint128_t arg1 = MakeUInt128(0x4599705674507183ULL, 0x3206503455664403ULL);
4470   __uint128_t arg2 = MakeUInt128(0x4264705633881880ULL, 0x3206612168960504ULL);
4471   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhs %0.8h, %1.8h, %2.8h")(arg1, arg2);
4472   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0xffff00000000ffffULL));
4473 }
4474 
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt8x8)4475 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt8x8) {
4476   __uint128_t op = MakeUInt128(0x00fffe807f020100ULL, 0x00aabbccddeeff00ULL);
4477   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %0.8b, %1.8b, #0")(op);
4478   ASSERT_EQ(rd, MakeUInt128(0xffffffff000000ffULL, 0));
4479 }
4480 
TEST(Arm64InsnTest,TestInt64x1)4481 TEST(Arm64InsnTest, TestInt64x1) {
4482   constexpr auto AsmCmtst = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmtst %d0, %d1, %d2");
4483   __uint128_t arg1 = MakeUInt128(0xaaaaaaaa55555555ULL, 0x7698385483188750ULL);
4484   __uint128_t arg2 = MakeUInt128(0x55555555aaaaaaaaULL, 0x1429389089950685ULL);
4485   __uint128_t arg3 = MakeUInt128(0xaa00aa0055005500ULL, 0x4530765116803337ULL);
4486   ASSERT_EQ(AsmCmtst(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4487   ASSERT_EQ(AsmCmtst(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4488 }
4489 
TEST(Arm64InsnTest,TestInt16x8)4490 TEST(Arm64InsnTest, TestInt16x8) {
4491   __uint128_t arg1 = MakeUInt128(0x5999911209916464ULL, 0x6441191856827700ULL);
4492   __uint128_t arg2 = MakeUInt128(0x6101756850601671ULL, 0x4535431581480105ULL);
4493   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmtst %0.8h, %1.8h, %2.8h")(arg1, arg2);
4494   ASSERT_EQ(res, MakeUInt128(0xffffffff0000ffffULL, 0xffffffff0000ffffULL));
4495 }
4496 
TEST(Arm64InsnTest,ExtractVectorFromPair)4497 TEST(Arm64InsnTest, ExtractVectorFromPair) {
4498   __uint128_t op1 = MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4499   __uint128_t op2 = MakeUInt128(0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
4500   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.16b, %1.16b, %2.16b, #8")(op1, op2);
4501   ASSERT_EQ(rd, MakeUInt128(0x8899aabbccddeeffULL, 0x0001020304050607ULL));
4502 }
4503 
TEST(Arm64InsnTest,ExtractVectorFromPairHalfWidth)4504 TEST(Arm64InsnTest, ExtractVectorFromPairHalfWidth) {
4505   __uint128_t op1 = MakeUInt128(0x8138268683868942ULL, 0x7741559918559252ULL);
4506   __uint128_t op2 = MakeUInt128(0x3622262609912460ULL, 0x8051243884390451ULL);
4507   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.8b, %1.8b, %2.8b, #3")(op1, op2);
4508   ASSERT_EQ(res, MakeUInt128(0x9124608138268683ULL, 0x0000000000000000ULL));
4509 }
4510 
TEST(Arm64InsnTest,ExtractVectorFromPairHalfWidthPosition1)4511 TEST(Arm64InsnTest, ExtractVectorFromPairHalfWidthPosition1) {
4512   __uint128_t op1 = MakeUInt128(0x9471329621073404ULL, 0x3751895735961458ULL);
4513   __uint128_t op2 = MakeUInt128(0x9048010941214722ULL, 0x1317947647772622ULL);
4514   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.8b, %1.8b, %2.8b, #1")(op1, op2);
4515   ASSERT_EQ(res, MakeUInt128(0x2294713296210734ULL, 0x0000000000000000ULL));
4516 }
4517 
TEST(Arm64InsnTest,Load1OneI8x8)4518 TEST(Arm64InsnTest, Load1OneI8x8) {
4519   static constexpr uint64_t arg = 0x8867915896904956ULL;
4520   __uint128_t res;
4521   asm("ld1 {%0.8b}, [%1]" : "=w"(res) : "r"(&arg) : "memory");
4522   ASSERT_EQ(res, arg);
4523 }
4524 
TEST(Arm64InsnTest,Load1ThreeI8x8)4525 TEST(Arm64InsnTest, Load1ThreeI8x8) {
4526   static constexpr uint64_t arg[3] = {
4527       0x3415354584283376ULL, 0x4378111988556318ULL, 0x7777925372011667ULL};
4528   __uint128_t res[3];
4529   asm("ld1 {v0.8b-v2.8b}, [%3]\n\t"
4530       "mov %0.16b, v0.16b\n\t"
4531       "mov %1.16b, v1.16b\n\t"
4532       "mov %2.16b, v2.16b"
4533       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4534       : "r"(arg)
4535       : "v0", "v1", "v2", "memory");
4536   ASSERT_EQ(res[0], static_cast<__uint128_t>(arg[0]));
4537   ASSERT_EQ(res[1], static_cast<__uint128_t>(arg[1]));
4538   ASSERT_EQ(res[2], static_cast<__uint128_t>(arg[2]));
4539 }
4540 
TEST(Arm64InsnTest,Load1FourI8x8)4541 TEST(Arm64InsnTest, Load1FourI8x8) {
4542   static constexpr uint64_t arg[4] = {
4543       0x9523688483099930ULL,
4544       0x2757419916463841ULL,
4545       0x4270779887088742ULL,
4546       0x2927705389122717ULL,
4547   };
4548   __uint128_t res[4];
4549   asm("ld1 {v0.8b-v3.8b}, [%4]\n\t"
4550       "mov %0.16b, v0.16b\n\t"
4551       "mov %1.16b, v1.16b\n\t"
4552       "mov %2.16b, v2.16b\n\t"
4553       "mov %3.16b, v3.16b"
4554       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
4555       : "r"(arg)
4556       : "v0", "v1", "v2", "v3", "memory");
4557   ASSERT_EQ(res[0], static_cast<__uint128_t>(arg[0]));
4558   ASSERT_EQ(res[1], static_cast<__uint128_t>(arg[1]));
4559   ASSERT_EQ(res[2], static_cast<__uint128_t>(arg[2]));
4560   ASSERT_EQ(res[3], static_cast<__uint128_t>(arg[3]));
4561 }
4562 
TEST(Arm64InsnTest,Store1OneI8x16)4563 TEST(Arm64InsnTest, Store1OneI8x16) {
4564   static constexpr __uint128_t arg = MakeUInt128(0x7642291583425006ULL, 0x7361245384916067ULL);
4565   __uint128_t res;
4566   asm("st1 {%0.16b}, [%1]" : : "w"(arg), "r"(&res) : "memory");
4567   ASSERT_EQ(res, arg);
4568 }
4569 
TEST(Arm64InsnTest,Store1ThreeI8x8)4570 TEST(Arm64InsnTest, Store1ThreeI8x8) {
4571   static constexpr uint64_t arg[3] = {
4572       0x3086436111389069ULL, 0x4202790881431194ULL, 0x4879941715404210ULL};
4573   uint64_t res[3];
4574   asm("mov v0.16b, %0.16b\n\t"
4575       "mov v1.16b, %1.16b\n\t"
4576       "mov v2.16b, %2.16b\n\t"
4577       "st1 {v0.8b-v2.8b}, [%3]"
4578       :
4579       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
4580       : "v0", "v1", "v2", "memory");
4581   ASSERT_EQ(res[0], arg[0]);
4582   ASSERT_EQ(res[1], arg[1]);
4583   ASSERT_EQ(res[2], arg[2]);
4584 }
4585 
TEST(Arm64InsnTest,Store1FourI8x8)4586 TEST(Arm64InsnTest, Store1FourI8x8) {
4587   static constexpr uint64_t arg[4] = {
4588       0x8954750448339314ULL, 0x6896307633966572ULL, 0x2672704339321674ULL, 0x5421824557062524ULL};
4589   uint64_t res[4];
4590   asm("mov v0.16b, %0.16b\n\t"
4591       "mov v1.16b, %1.16b\n\t"
4592       "mov v2.16b, %2.16b\n\t"
4593       "mov v3.16b, %3.16b\n\t"
4594       "st1 {v0.8b-v3.8b}, [%4]"
4595       :
4596       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
4597       : "v0", "v1", "v2", "v3", "memory");
4598   ASSERT_EQ(res[0], arg[0]);
4599   ASSERT_EQ(res[1], arg[1]);
4600   ASSERT_EQ(res[2], arg[2]);
4601   ASSERT_EQ(res[3], arg[3]);
4602 }
4603 
TEST(Arm64InsnTest,Load1TwoPostIndex)4604 TEST(Arm64InsnTest, Load1TwoPostIndex) {
4605   __uint128_t op0 = MakeUInt128(0x5499119881834797ULL, 0x0507922796892589ULL);
4606   __uint128_t op1 = MakeUInt128(0x0511854807446237ULL, 0x6691368672287489ULL);
4607   __uint128_t array[] = {
4608       op0,
4609       op1,
4610   };
4611   __uint128_t* addr = &array[0];
4612   __uint128_t res0 = 0;
4613   __uint128_t res1 = 0;
4614 
4615   // The "memory" below ensures that the array contents are up to date.  Without it, the
4616   // compiler might decide to initialize the array after the asm statement.
4617   //
4618   // We hardcode SIMD registers v0 and v1 below because there is no other way to express
4619   // consecutive registers, which in turn requires the mov instructions to retrieve the
4620   // loaded values into res0 and res1.
4621   asm("ld1 {v0.16b, v1.16b}, [%2], #32\n\t"
4622       "mov %0.16b, v0.16b\n\t"
4623       "mov %1.16b, v1.16b"
4624       : "=w"(res0), "=w"(res1), "+r"(addr)
4625       :
4626       : "v0", "v1", "memory");
4627 
4628   ASSERT_EQ(res0, op0);
4629   ASSERT_EQ(res1, op1);
4630   ASSERT_EQ(addr, &array[2]);
4631 }
4632 
TEST(Arm64InsnTest,Load1OnePostIndexReg)4633 TEST(Arm64InsnTest, Load1OnePostIndexReg) {
4634   static constexpr __uint128_t arg = MakeUInt128(0x4884761005564018ULL, 0x2423921926950620ULL);
4635   __uint128_t res_val;
4636   uint64_t res_addr;
4637   asm("ld1 {%0.16b}, [%1], %2"
4638       : "=w"(res_val), "=r"(res_addr)
4639       : "r"(static_cast<uint64_t>(32U)), "1"(&arg)
4640       : "memory");
4641   ASSERT_EQ(res_val, arg);
4642   ASSERT_EQ(res_addr, reinterpret_cast<uint64_t>(&arg) + 32);
4643 }
4644 
TEST(Arm64InsnTest,LoadSingleInt8)4645 TEST(Arm64InsnTest, LoadSingleInt8) {
4646   static constexpr __uint128_t reg_before =
4647       MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4648   static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4649   __uint128_t reg_after;
4650   asm("ld1 {%0.b}[3], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4651   ASSERT_EQ(reg_after, MakeUInt128(0x00112233'08'556677ULL, 0x8899aabbccddeeffULL));
4652 }
4653 
TEST(Arm64InsnTest,LoadSingleInt16)4654 TEST(Arm64InsnTest, LoadSingleInt16) {
4655   static constexpr __uint128_t reg_before =
4656       MakeUInt128(0x0000111122223333ULL, 0x4444555566667777ULL);
4657   static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4658   __uint128_t reg_after;
4659   asm("ld1 {%0.h}[2], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4660   ASSERT_EQ(reg_after, MakeUInt128(0x0000'0708'22223333ULL, 0x4444555566667777ULL));
4661 }
4662 
TEST(Arm64InsnTest,LoadSingleInt32)4663 TEST(Arm64InsnTest, LoadSingleInt32) {
4664   static constexpr __uint128_t reg_before =
4665       MakeUInt128(0x0000000011111111ULL, 0x2222222233333333ULL);
4666   static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4667   __uint128_t reg_after;
4668   asm("ld1 {%0.s}[1], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4669   ASSERT_EQ(reg_after, MakeUInt128(0x0506070811111111ULL, 0x2222222233333333ULL));
4670 }
4671 
TEST(Arm64InsnTest,LoadSingleInt64)4672 TEST(Arm64InsnTest, LoadSingleInt64) {
4673   static constexpr __uint128_t reg_before =
4674       MakeUInt128(0x0000000000000000ULL, 0x1111111111111111ULL);
4675   static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4676   __uint128_t reg_after;
4677   asm("ld1 {%0.d}[1], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4678   ASSERT_EQ(reg_after, MakeUInt128(0x0000000000000000ULL, 0x0102030405060708ULL));
4679 }
4680 
TEST(Arm64InsnTest,StoreSingleInt8)4681 TEST(Arm64InsnTest, StoreSingleInt8) {
4682   static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4683   __uint128_t mem_dest = MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4684   asm("st1 {%1.b}[3], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4685   ASSERT_EQ(mem_dest, MakeUInt128(0x00112233445566'05ULL, 0x8899aabbccddeeffULL));
4686 }
4687 
TEST(Arm64InsnTest,StoreSingleInt16)4688 TEST(Arm64InsnTest, StoreSingleInt16) {
4689   static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4690   __uint128_t mem_dest = MakeUInt128(0x0000111122223333ULL, 0x4444555566667777ULL);
4691   asm("st1 {%1.h}[5], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4692   ASSERT_EQ(mem_dest, MakeUInt128(0x000011112222'0d0eULL, 0x4444555566667777ULL));
4693 }
4694 
TEST(Arm64InsnTest,StoreSingleInt32)4695 TEST(Arm64InsnTest, StoreSingleInt32) {
4696   static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4697   __uint128_t mem_dest = MakeUInt128(0x0000000011111111ULL, 0x2222222233333333ULL);
4698   asm("st1 {%1.s}[2], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4699   ASSERT_EQ(mem_dest, MakeUInt128(0x000000000'd0e0f10ULL, 0x2222222233333333ULL));
4700 }
4701 
TEST(Arm64InsnTest,StoreSingleInt64)4702 TEST(Arm64InsnTest, StoreSingleInt64) {
4703   static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4704   __uint128_t mem_dest = MakeUInt128(0x0000000000000000ULL, 0x1111111111111111ULL);
4705   asm("st1 {%1.d}[1], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4706   ASSERT_EQ(mem_dest, MakeUInt128(0x090a0b0c0d0e0f10ULL, 0x1111111111111111ULL));
4707 }
4708 
TEST(Arm64InsnTest,LoadSinglePostIndexImmInt8)4709 TEST(Arm64InsnTest, LoadSinglePostIndexImmInt8) {
4710   static constexpr __uint128_t arg1 = MakeUInt128(0x5494167594605487ULL, 0x1172359464291058ULL);
4711   static constexpr __uint128_t arg2 = MakeUInt128(0x5090995021495879ULL, 0x3112196135908315ULL);
4712   __uint128_t res;
4713   uint8_t* addr;
4714   asm("ld1 {%0.b}[3], [%1], #1" : "=w"(res), "=r"(addr) : "0"(arg1), "1"(&arg2) : "memory");
4715   ASSERT_EQ(res, MakeUInt128(0x5494167579605487ULL, 0x1172359464291058ULL));
4716   ASSERT_EQ(addr, reinterpret_cast<const uint8_t*>(&arg2) + 1);
4717 }
4718 
TEST(Arm64InsnTest,LoadSinglePostIndexRegInt16)4719 TEST(Arm64InsnTest, LoadSinglePostIndexRegInt16) {
4720   static constexpr __uint128_t arg1 = MakeUInt128(0x0080587824107493ULL, 0x5751488997891173ULL);
4721   static constexpr __uint128_t arg2 = MakeUInt128(0x9746129320351081ULL, 0x4327032514090304ULL);
4722   __uint128_t res;
4723   uint8_t* addr;
4724   asm("ld1 {%0.h}[7], [%1], %2"
4725       : "=w"(res), "=r"(addr)
4726       : "r"(static_cast<uint64_t>(17U)), "0"(arg1), "1"(&arg2)
4727       : "memory");
4728   ASSERT_EQ(res, MakeUInt128(0x0080587824107493ULL, 0x1081488997891173ULL));
4729   ASSERT_EQ(addr, reinterpret_cast<const uint8_t*>(&arg2) + 17);
4730 }
4731 
TEST(Arm64InsnTest,StoreSimdPostIndex)4732 TEST(Arm64InsnTest, StoreSimdPostIndex) {
4733   __uint128_t old_val = MakeUInt128(0x4939965143142980ULL, 0x9190659250937221ULL);
4734   __uint128_t new_val = MakeUInt128(0x5985261365549781ULL, 0x8931297848216829ULL);
4735   __uint128_t* addr = &old_val;
4736 
4737   // Verify that the interpreter accepts "str q0, [x0], #8" where the register numbers are
4738   // the same, when the data register is one of the SIMD registers.
4739   asm("mov x0, %0\n\t"
4740       "mov v0.2D, %1.2D\n\t"
4741       "str q0, [x0], #8\n\t"
4742       "mov %0, x0"
4743       : "+r"(addr)
4744       : "w"(new_val)
4745       : "v0", "x0", "memory");
4746 
4747   ASSERT_EQ(old_val, MakeUInt128(0x5985261365549781ULL, 0x8931297848216829ULL));
4748   ASSERT_EQ(reinterpret_cast<uintptr_t>(addr), reinterpret_cast<uintptr_t>(&old_val) + 8);
4749 }
4750 
TEST(Arm64InsnTest,StoreZeroPostIndex1)4751 TEST(Arm64InsnTest, StoreZeroPostIndex1) {
4752   uint64_t res;
4753   asm("str xzr, [sp, #-16]!\n\t"
4754       "ldr %0, [sp, #0]\n\t"
4755       "add sp, sp, #16"
4756       : "=r"(res));
4757   ASSERT_EQ(res, 0);
4758 }
4759 
TEST(Arm64InsnTest,StoreZeroPostIndex2)4760 TEST(Arm64InsnTest, StoreZeroPostIndex2) {
4761   __uint128_t arg1 = MakeUInt128(0x9415573293820485ULL, 0x4212350817391254ULL);
4762   __uint128_t arg2 = MakeUInt128(0x9749819308714396ULL, 0x6151329420459193ULL);
4763   __uint128_t res1;
4764   __uint128_t res2;
4765   asm("mov v30.16b, %2.16b\n\t"
4766       "mov v31.16b, %3.16b\n\t"
4767       "stp q30, q31, [sp, #-32]!\n\t"
4768       "ldr %q0, [sp, #0]\n\t"
4769       "ldr %q1, [sp, #16]\n\t"
4770       "add sp, sp, #32"
4771       : "=w"(res1), "=w"(res2)
4772       : "w"(arg1), "w"(arg2)
4773       : "v30", "v31");
4774 
4775   ASSERT_EQ(res1, arg1);
4776   ASSERT_EQ(res2, arg2);
4777 }
4778 
TEST(Arm64InsnTest,Load2MultipleInt8x8)4779 TEST(Arm64InsnTest, Load2MultipleInt8x8) {
4780   static constexpr uint8_t mem[] = {0x02,
4781                                     0x16,
4782                                     0x91,
4783                                     0x83,
4784                                     0x37,
4785                                     0x23,
4786                                     0x68,
4787                                     0x03,
4788                                     0x99,
4789                                     0x02,
4790                                     0x79,
4791                                     0x31,
4792                                     0x60,
4793                                     0x64,
4794                                     0x20,
4795                                     0x43};
4796   __uint128_t res[2];
4797   asm("ld2 {v0.8b, v1.8b}, [%2]\n\t"
4798       "mov %0.16b, v0.16b\n\t"
4799       "mov %1.16b, v1.16b"
4800       : "=w"(res[0]), "=w"(res[1])
4801       : "r"(mem)
4802       : "v0", "v1", "memory");
4803   ASSERT_EQ(res[0], MakeUInt128(0x2060799968379102ULL, 0U));
4804   ASSERT_EQ(res[1], MakeUInt128(0x4364310203238316ULL, 0U));
4805 }
4806 
TEST(Arm64InsnTest,Load3MultipleInt8x8)4807 TEST(Arm64InsnTest, Load3MultipleInt8x8) {
4808   static constexpr uint8_t mem[] = {0x32, 0x87, 0x67, 0x03, 0x80, 0x92, 0x52, 0x16,
4809                                     0x79, 0x07, 0x57, 0x12, 0x04, 0x06, 0x12, 0x37,
4810                                     0x59, 0x63, 0x27, 0x68, 0x56, 0x74, 0x84, 0x50};
4811   __uint128_t res[3];
4812   asm("ld3 {v7.8b-v9.8b}, [%3]\n\t"
4813       "mov %0.16b, v7.16b\n\t"
4814       "mov %1.16b, v8.16b\n\t"
4815       "mov %2.16b, v9.16b"
4816       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4817       : "r"(mem)
4818       : "v7", "v8", "v9", "memory");
4819   ASSERT_EQ(res[0], MakeUInt128(0x7427370407520332ULL, 0U));
4820   ASSERT_EQ(res[1], MakeUInt128(0x8468590657168087ULL, 0U));
4821   ASSERT_EQ(res[2], MakeUInt128(0x5056631212799267ULL, 0U));
4822 }
4823 
TEST(Arm64InsnTest,Load4MultipleInt16x8)4824 TEST(Arm64InsnTest, Load4MultipleInt16x8) {
4825   static constexpr uint16_t mem[] = {
4826       0x2069, 0x6535, 0x3863, 0x9644, 0x3225, 0x3883, 0x2752, 0x2499, 0x6059, 0x8697, 0x4759,
4827       0x8823, 0x2991, 0x6263, 0x5459, 0x7332, 0x4445, 0x1637, 0x5533, 0x4377, 0x4929, 0x2899,
4828       0x0581, 0x1757, 0x9881, 0x5078, 0x1468, 0x5262, 0x1332, 0x5247, 0x3837, 0x6511};
4829   __uint128_t res[4];
4830   asm("ld4 {v30.8h-v1.8h}, [%4]\n\t"
4831       "mov %0.16b, v30.16b\n\t"
4832       "mov %1.16b, v31.16b\n\t"
4833       "mov %2.16b, v0.16b\n\t"
4834       "mov %3.16b, v1.16b"
4835       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
4836       : "r"(mem)
4837       : "v30", "v31", "v0", "v1", "memory");
4838   ASSERT_EQ(res[0], MakeUInt128(0x2991605932252069ULL, 0x1332988149294445ULL));
4839   ASSERT_EQ(res[1], MakeUInt128(0x6263869738836535ULL, 0x5247507828991637ULL));
4840   ASSERT_EQ(res[2], MakeUInt128(0x5459475927523863ULL, 0x3837146805815533ULL));
4841   ASSERT_EQ(res[3], MakeUInt128(0x7332882324999644ULL, 0x6511526217574377ULL));
4842 }
4843 
TEST(Arm64InsnTest,Load1ReplicateInt8x8)4844 TEST(Arm64InsnTest, Load1ReplicateInt8x8) {
4845   static constexpr uint8_t mem = 0x81U;
4846   __uint128_t res;
4847   asm("ld1r {%0.8b}, [%1]" : "=w"(res) : "r"(&mem) : "memory");
4848   ASSERT_EQ(res, MakeUInt128(0x8181818181818181ULL, 0U));
4849 }
4850 
TEST(Arm64InsnTest,Load2ReplicateInt16x8)4851 TEST(Arm64InsnTest, Load2ReplicateInt16x8) {
4852   static constexpr uint16_t mem[] = {0x7904, 0x8715};
4853   __uint128_t res[2];
4854   asm("ld2r {v6.8h, v7.8h}, [%2]\n\t"
4855       "mov %0.16b, v6.16b\n\t"
4856       "mov %1.16b, v7.16b"
4857       : "=w"(res[0]), "=w"(res[1])
4858       : "r"(mem)
4859       : "v6", "v7", "memory");
4860   ASSERT_EQ(res[0], MakeUInt128(0x7904790479047904ULL, 0x7904790479047904ULL));
4861   ASSERT_EQ(res[1], MakeUInt128(0x8715871587158715ULL, 0x8715871587158715ULL));
4862 }
4863 
TEST(Arm64InsnTest,Load3ReplicateInt32x4)4864 TEST(Arm64InsnTest, Load3ReplicateInt32x4) {
4865   static constexpr uint32_t mem[] = {0x78713710U, 0x60510637U, 0x95558588U};
4866   __uint128_t res[3];
4867   asm("ld3r {v30.4s-v0.4s}, [%3]\n\t"
4868       "mov %0.16b, v30.16b\n\t"
4869       "mov %1.16b, v31.16b\n\t"
4870       "mov %2.16b, v0.16b"
4871       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4872       : "r"(mem)
4873       : "v30", "v31", "v0", "memory");
4874   ASSERT_EQ(res[0], MakeUInt128(0x7871371078713710ULL, 0x7871371078713710ULL));
4875   ASSERT_EQ(res[1], MakeUInt128(0x6051063760510637ULL, 0x6051063760510637ULL));
4876   ASSERT_EQ(res[2], MakeUInt128(0x9555858895558588ULL, 0x9555858895558588ULL));
4877 }
4878 
TEST(Arm64InsnTest,Load4ReplicateInt64x2)4879 TEST(Arm64InsnTest, Load4ReplicateInt64x2) {
4880   static constexpr uint64_t mem[] = {
4881       0x8150781468526213ULL, 0x3252473837651192ULL, 0x9901561091897779ULL, 0x2200870579339646ULL};
4882   __uint128_t res[4];
4883   asm("ld4r {v29.2d-v0.2d}, [%4]\n\t"
4884       "mov %0.16b, v29.16b\n\t"
4885       "mov %1.16b, v30.16b\n\t"
4886       "mov %2.16b, v31.16b\n\t"
4887       "mov %3.16b, v0.16b"
4888       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
4889       : "r"(mem)
4890       : "v29", "v30", "v31", "v0", "memory");
4891   ASSERT_EQ(res[0], MakeUInt128(mem[0], mem[0]));
4892   ASSERT_EQ(res[1], MakeUInt128(mem[1], mem[1]));
4893   ASSERT_EQ(res[2], MakeUInt128(mem[2], mem[2]));
4894   ASSERT_EQ(res[3], MakeUInt128(mem[3], mem[3]));
4895 }
4896 
TEST(Arm64InsnTest,LoadPairNonTemporarlInt64)4897 TEST(Arm64InsnTest, LoadPairNonTemporarlInt64) {
4898   static constexpr uint64_t mem[] = {0x3843601737474215ULL, 0x2476085152099016ULL};
4899   __uint128_t res[2];
4900   asm("ldnp %d0, %d1, [%2]" : "=w"(res[0]), "=w"(res[1]) : "r"(mem) : "memory");
4901   ASSERT_EQ(res[0], MakeUInt128(0x3843601737474215ULL, 0U));
4902   ASSERT_EQ(res[1], MakeUInt128(0x2476085152099016ULL, 0U));
4903 }
4904 
TEST(Arm64InsnTest,MoviVector2S)4905 TEST(Arm64InsnTest, MoviVector2S) {
4906   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2s, #0xe4")();
4907   ASSERT_EQ(rd, MakeUInt128(0x000000e4000000e4ULL, 0x0000000000000000ULL));
4908 }
4909 
TEST(Arm64InsnTest,MoviVector2D)4910 TEST(Arm64InsnTest, MoviVector2D) {
4911   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2d, #0xff")();
4912   ASSERT_EQ(rd, MakeUInt128(0x00000000000000ffULL, 0x00000000000000ffULL));
4913 }
4914 
TEST(Arm64InsnTest,MoviVector8B)4915 TEST(Arm64InsnTest, MoviVector8B) {
4916   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.8b, #0xda")();
4917   ASSERT_EQ(res, MakeUInt128(0xdadadadadadadadaULL, 0x0000000000000000ULL));
4918 }
4919 
TEST(Arm64InsnTest,MoviVector4HShiftBy8)4920 TEST(Arm64InsnTest, MoviVector4HShiftBy8) {
4921   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.4h, #0xd1, lsl #8")();
4922   ASSERT_EQ(res, MakeUInt128(0xd100d100d100d100ULL, 0x0000000000000000ULL));
4923 }
4924 
TEST(Arm64InsnTest,MoviVector2SShiftBy16)4925 TEST(Arm64InsnTest, MoviVector2SShiftBy16) {
4926   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2s, #0x37, msl #16")();
4927   ASSERT_EQ(res, MakeUInt128(0x0037ffff0037ffffULL, 0x0000000000000000ULL));
4928 }
4929 
TEST(Arm64InsnTest,MvniVector4H)4930 TEST(Arm64InsnTest, MvniVector4H) {
4931   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.4h, #0xbc")();
4932   ASSERT_EQ(res, MakeUInt128(0xff43ff43ff43ff43ULL, 0x0000000000000000ULL));
4933 }
4934 
TEST(Arm64InsnTest,MvniVector2SShiftBy8)4935 TEST(Arm64InsnTest, MvniVector2SShiftBy8) {
4936   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.2s, #0x24, lsl #8")();
4937   ASSERT_EQ(res, MakeUInt128(0xffffdbffffffdbffULL, 0x0000000000000000ULL));
4938 }
4939 
TEST(Arm64InsnTest,MvniVector2SShiftBy16)4940 TEST(Arm64InsnTest, MvniVector2SShiftBy16) {
4941   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.2s, #0x25, msl #16")();
4942   ASSERT_EQ(res, MakeUInt128(0xffda0000ffda0000ULL, 0x0000000000000000ULL));
4943 }
4944 
TEST(Arm64InsnTest,LoadSimdRegPlusReg)4945 TEST(Arm64InsnTest, LoadSimdRegPlusReg) {
4946   __uint128_t array[] = {
4947       MakeUInt128(0x6517980694113528ULL, 0x0131470130478164ULL),
4948       MakeUInt128(0x8672422924654366ULL, 0x8009806769282382ULL),
4949   };
4950   uint64_t offset = 16;
4951   __uint128_t rd;
4952 
4953   asm("ldr %q0, [%1, %2]" : "=w"(rd) : "r"(array), "r"(offset) : "memory");
4954 
4955   ASSERT_EQ(rd, MakeUInt128(0x8672422924654366ULL, 0x8009806769282382ULL));
4956 }
4957 
TEST(Arm64InsnTest,ExtractNarrowI16x8ToI8x8)4958 TEST(Arm64InsnTest, ExtractNarrowI16x8ToI8x8) {
4959   __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
4960   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.8b, %1.8h")(arg);
4961   ASSERT_EQ(res, MakeUInt128(0x113355772367abefULL, 0x0ULL));
4962 }
4963 
TEST(Arm64InsnTest,ExtractNarrowI32x4ToI16x4)4964 TEST(Arm64InsnTest, ExtractNarrowI32x4ToI16x4) {
4965   __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
4966   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.4h, %1.4s")(arg);
4967   ASSERT_EQ(res, MakeUInt128(0x223366774567cdefULL, 0x0ULL));
4968 }
4969 
TEST(Arm64InsnTest,ExtractNarrowI64x2ToI32x2)4970 TEST(Arm64InsnTest, ExtractNarrowI64x2ToI32x2) {
4971   __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
4972   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.2s, %1.2d")(arg);
4973   ASSERT_EQ(res, MakeUInt128(0x4455667789abcdefULL, 0x0ULL));
4974 }
4975 
TEST(Arm64InsnTest,ExtractNarrow2Int16x8ToInt8x16)4976 TEST(Arm64InsnTest, ExtractNarrow2Int16x8ToInt8x16) {
4977   __uint128_t arg1 = MakeUInt128(0x1844396582533754ULL, 0x3885690941130315ULL);
4978   __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
4979   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("xtn2 %0.16b, %1.8h")(arg1, arg2);
4980   ASSERT_EQ(res, MakeUInt128(0x6121865619673378ULL, 0x8509131544655354ULL));
4981 }
4982 
TEST(Arm64InsnTest,LoadLiteralSimd)4983 TEST(Arm64InsnTest, LoadLiteralSimd) {
4984   // We call an external assembly function to perform LDR literal because we
4985   // need to place the literal in .rodata.  The literal placed in .text would
4986   // trigger a segfault.
4987   ASSERT_EQ(get_fp64_literal(), 0x0123456789abcdefULL);
4988 }
4989 
TEST(Arm64InsnTest,AbsInt64x1)4990 TEST(Arm64InsnTest, AbsInt64x1) {
4991   __uint128_t arg = MakeUInt128(0xfffffffffffffffdULL, 0xdeadbeef01234567ULL);
4992   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("abs %d0, %d1")(arg);
4993   ASSERT_EQ(res, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
4994 }
4995 
TEST(Arm64InsnTest,AbsInt8x8)4996 TEST(Arm64InsnTest, AbsInt8x8) {
4997   __uint128_t arg = MakeUInt128(0x0001027e7f8081ffULL, 0x0123456789abcdefULL);
4998   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("abs %0.8b, %1.8b")(arg);
4999   ASSERT_EQ(res, MakeUInt128(0x0001027e7f807f01ULL, 0x0ULL));
5000 }
5001 
TEST(Arm64InsnTest,UseV31)5002 TEST(Arm64InsnTest, UseV31) {
5003   __uint128_t res;
5004 
5005   asm("movi v31.2d, #0xffffffffffffffff\n\t"
5006       "mov %0.16b, v31.16b"
5007       : "=w"(res)
5008       :
5009       : "v31");
5010 
5011   ASSERT_EQ(res, MakeUInt128(~0ULL, ~0ULL));
5012 }
5013 
TEST(Arm64InsnTest,AddHighNarrowInt16x8)5014 TEST(Arm64InsnTest, AddHighNarrowInt16x8) {
5015   __uint128_t arg1 = MakeUInt128(0x2296617119637792ULL, 0x1337575114959501ULL);
5016   __uint128_t arg2 = MakeUInt128(0x0941214722131794ULL, 0x7647772622414254ULL);
5017   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5018   ASSERT_EQ(res, MakeUInt128(0x89ce36d72b823b8fULL, 0x0ULL));
5019 }
5020 
TEST(Arm64InsnTest,AddHighNarrowUpperInt16x8)5021 TEST(Arm64InsnTest, AddHighNarrowUpperInt16x8) {
5022   __uint128_t arg1 = MakeUInt128(0x6561809377344403ULL, 0x0707469211201913ULL);
5023   __uint128_t arg2 = MakeUInt128(0x6095752706957220ULL, 0x9175671167229109ULL);
5024   __uint128_t arg3 = MakeUInt128(0x5797877185560845ULL, 0x5296541266540853ULL);
5025   __uint128_t res =
5026       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("addhn2 %0.16b, %1.8h, %2.8h")(arg1, arg2, arg3);
5027   ASSERT_EQ(res, MakeUInt128(0x5797877185560845ULL, 0x98ad78aac5f57db6ULL));
5028 }
5029 
TEST(Arm64InsnTest,SubHighNarrowInt16x8)5030 TEST(Arm64InsnTest, SubHighNarrowInt16x8) {
5031   __uint128_t arg1 = MakeUInt128(0x4978189312978482ULL, 0x1682998948722658ULL);
5032   __uint128_t arg2 = MakeUInt128(0x1210835791513698ULL, 0x8209144421006751ULL);
5033   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("subhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5034   ASSERT_EQ(res, MakeUInt128(0x948527bf3795814dULL, 0x0ULL));
5035 }
5036 
TEST(Arm64InsnTest,SubHighNarrowUpperInt16x8)5037 TEST(Arm64InsnTest, SubHighNarrowUpperInt16x8) {
5038   __uint128_t arg1 = MakeUInt128(0x5324944166803962ULL, 0x6579787718556084ULL);
5039   __uint128_t arg2 = MakeUInt128(0x1066587969981635ULL, 0x7473638405257145ULL);
5040   __uint128_t arg3 = MakeUInt128(0x3142980919065925ULL, 0x0937221696461515ULL);
5041   __uint128_t res =
5042       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("subhn2 %0.16b, %1.8h, %2.8h")(arg1, arg2, arg3);
5043   ASSERT_EQ(res, MakeUInt128(0x3142980919065925ULL, 0xf11413ef423bfc23ULL));
5044 }
5045 
TEST(Arm64InsnTest,RoundingAddHighNarrowInt16x8)5046 TEST(Arm64InsnTest, RoundingAddHighNarrowInt16x8) {
5047   __uint128_t arg1 = MakeUInt128(0x8039626579787718ULL, 0x5560845529654126ULL);
5048   __uint128_t arg2 = MakeUInt128(0x3440171274947042ULL, 0x0562230538994561ULL);
5049   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("raddhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5050   ASSERT_EQ(res, MakeUInt128(0x5ba76287b479eee7ULL, 0x0000000000000000ULL));
5051 }
5052 
TEST(Arm64InsnTest,RoundingSubHighNarrowInt16x8)5053 TEST(Arm64InsnTest, RoundingSubHighNarrowInt16x8) {
5054   __uint128_t arg1 = MakeUInt128(0x3063432858785698ULL, 0x3052358089330657ULL);
5055   __uint128_t arg2 = MakeUInt128(0x0216471550979259ULL, 0x2309907965473761ULL);
5056   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("rsubhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5057   ASSERT_EQ(res, MakeUInt128(0x0da524cf2efc08c4ULL, 0x0000000000000000ULL));
5058 }
5059 
TEST(Arm64InsnTest,ScalarPairwiseAddInt8x2)5060 TEST(Arm64InsnTest, ScalarPairwiseAddInt8x2) {
5061   __uint128_t arg = MakeUInt128(0x6257591633303910ULL, 0x7225383742182140ULL);
5062   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("addp %d0, %1.2d")(arg);
5063   ASSERT_EQ(res, MakeUInt128(0xd47c914d75485a50ULL, 0x0000000000000000ULL));
5064 }
5065 
TEST(Arm64InsnTest,AddAcrossInt8x8)5066 TEST(Arm64InsnTest, AddAcrossInt8x8) {
5067   __uint128_t arg = MakeUInt128(0x0681216028764962ULL, 0x8674460477464915ULL);
5068   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("addv %b0, %1.8b")(arg);
5069   ASSERT_EQ(res, MakeUInt128(0x51ULL, 0x0ULL));
5070 }
5071 
TEST(Arm64InsnTest,SignedAddLongAcrossInt16x8)5072 TEST(Arm64InsnTest, SignedAddLongAcrossInt16x8) {
5073   __uint128_t arg = MakeUInt128(0x9699557377273756ULL, 0x6761552711392258ULL);
5074   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlv %s0, %1.8h")(arg);
5075   ASSERT_EQ(res, MakeUInt128(0x0000000000018aa2ULL, 0x0000000000000000ULL));
5076 }
5077 
TEST(Arm64InsnTest,UnsignedAddLongAcrossInt16x8)5078 TEST(Arm64InsnTest, UnsignedAddLongAcrossInt16x8) {
5079   __uint128_t arg = MakeUInt128(0x7986396522961312ULL, 0x8017826797172898ULL);
5080   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uaddlv %s0, %1.8h")(arg);
5081   ASSERT_EQ(res, MakeUInt128(0x000000000002aac0ULL, 0x0000000000000000ULL));
5082 }
5083 
TEST(Arm64InsnTest,SignedMaximumAcrossInt16x8)5084 TEST(Arm64InsnTest, SignedMaximumAcrossInt16x8) {
5085   __uint128_t arg = MakeUInt128(0x8482065967379473ULL, 0x1680864156456505ULL);
5086   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("smaxv %h0, %1.8h")(arg);
5087   ASSERT_EQ(res, MakeUInt128(0x0000000000006737ULL, 0x0000000000000000ULL));
5088 }
5089 
TEST(Arm64InsnTest,SignedMinimumAcrossInt16x8)5090 TEST(Arm64InsnTest, SignedMinimumAcrossInt16x8) {
5091   __uint128_t arg = MakeUInt128(0x6772530431825197ULL, 0x5791679296996504ULL);
5092   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sminv %h0, %1.8h")(arg);
5093   ASSERT_EQ(res, MakeUInt128(0x0000000000009699ULL, 0x0000000000000000ULL));
5094 }
5095 
TEST(Arm64InsnTest,UnsignedMaximumAcrossInt16x8)5096 TEST(Arm64InsnTest, UnsignedMaximumAcrossInt16x8) {
5097   __uint128_t arg = MakeUInt128(0x6500378070466126ULL, 0x4706021457505793ULL);
5098   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("umaxv %h0, %1.8h")(arg);
5099   ASSERT_EQ(res, MakeUInt128(0x0000000000007046ULL, 0x0000000000000000ULL));
5100 }
5101 
TEST(Arm64InsnTest,UnsignedMinimumAcrossInt16x8)5102 TEST(Arm64InsnTest, UnsignedMinimumAcrossInt16x8) {
5103   __uint128_t arg = MakeUInt128(0x5223572397395128ULL, 0x8181640597859142ULL);
5104   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uminv %h0, %1.8h")(arg);
5105   ASSERT_EQ(res, MakeUInt128(0x0000000000005128ULL, 0x0000000000000000ULL));
5106 }
5107 
TEST(Arm64InsnTest,CountLeadingZerosI8x8)5108 TEST(Arm64InsnTest, CountLeadingZerosI8x8) {
5109   __uint128_t arg = MakeUInt128(0x1452635608277857ULL, 0x7134275778960917ULL);
5110   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("clz %0.8b, %1.8b")(arg);
5111   ASSERT_EQ(res, MakeUInt128(0x0301010104020101ULL, 0x0000000000000000ULL));
5112 }
5113 
TEST(Arm64InsnTest,CountLeadingSignBitsI8x8)5114 TEST(Arm64InsnTest, CountLeadingSignBitsI8x8) {
5115   __uint128_t arg = MakeUInt128(0x8925892354201995ULL, 0x6112129021960864ULL);
5116   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cls %0.8b, %1.8b")(arg);
5117   ASSERT_EQ(res, MakeUInt128(0x0001000100010200ULL, 0x0000000000000000ULL));
5118 }
5119 
TEST(Arm64InsnTest,Cnt)5120 TEST(Arm64InsnTest, Cnt) {
5121   __uint128_t arg = MakeUInt128(0x9835484875625298ULL, 0x7524238730775595ULL);
5122   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cnt %0.16b, %1.16b")(arg);
5123   ASSERT_EQ(res, MakeUInt128(0x0304020205030303ULL, 0x0502030402060404ULL));
5124 }
5125 
TEST(Arm64InsnTest,SimdScalarMove)5126 TEST(Arm64InsnTest, SimdScalarMove) {
5127   __uint128_t arg = MakeUInt128(0x1433345477624168ULL, 0x6251898356948556ULL);
5128   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("mov %b0, %1.b[5]")(arg);
5129   ASSERT_EQ(res, MakeUInt128(0x0000000000000034ULL, 0x0000000000000000ULL));
5130 }
5131 
TEST(Arm64InsnTest,SimdVectorElemDuplicate)5132 TEST(Arm64InsnTest, SimdVectorElemDuplicate) {
5133   __uint128_t arg = MakeUInt128(0x3021647155097925ULL, 0x9230990796547376ULL);
5134   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("dup %0.8b, %1.b[5]")(arg);
5135   ASSERT_EQ(res, MakeUInt128(0x6464646464646464ULL, 0x0000000000000000ULL));
5136 }
5137 
TEST(Arm64InsnTest,SimdVectorElemDuplicateInt16AtIndex7)5138 TEST(Arm64InsnTest, SimdVectorElemDuplicateInt16AtIndex7) {
5139   __uint128_t arg = MakeUInt128(0x2582262052248940ULL, 0x7726719478268482ULL);
5140   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("dup %0.4h, %1.h[7]")(arg);
5141   ASSERT_EQ(res, MakeUInt128(0x7726772677267726ULL, 0x0000000000000000ULL));
5142 }
5143 
TEST(Arm64InsnTest,SimdVectorElemInsert)5144 TEST(Arm64InsnTest, SimdVectorElemInsert) {
5145   __uint128_t arg1 = MakeUInt128(0x7120844335732654ULL, 0x8938239119325974ULL);
5146   __uint128_t arg2 = MakeUInt128(0x7656180937734440ULL, 0x3070746921120191ULL);
5147   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("mov %0.s[2], %1.s[1]")(arg1, arg2);
5148   ASSERT_EQ(res, MakeUInt128(0x7656180937734440ULL, 0x3070746971208443ULL));
5149 }
5150 
TEST(Arm64InsnTest,NegateInt64x1)5151 TEST(Arm64InsnTest, NegateInt64x1) {
5152   constexpr auto AsmNeg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("neg %d0, %d1");
5153   __uint128_t arg1 = MakeUInt128(0x8389522868478312ULL, 0x3552658213144957ULL);
5154   ASSERT_EQ(AsmNeg(arg1), MakeUInt128(0x7c76add797b87ceeULL, 0x0000000000000000ULL));
5155 
5156   __uint128_t arg2 = MakeUInt128(1ULL << 63, 0U);
5157   ASSERT_EQ(AsmNeg(arg2), MakeUInt128(1ULL << 63, 0U));
5158 }
5159 
TEST(Arm64InsnTest,NegateInt16x8)5160 TEST(Arm64InsnTest, NegateInt16x8) {
5161   __uint128_t arg = MakeUInt128(0x4411010446823252ULL, 0x7162010526522721ULL);
5162   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("neg %0.8h, %1.8h")(arg);
5163   ASSERT_EQ(res, MakeUInt128(0xbbeffefcb97ecdaeULL, 0x8e9efefbd9aed8dfULL));
5164 }
5165 
TEST(Arm64InsnTest,NotI8x8)5166 TEST(Arm64InsnTest, NotI8x8) {
5167   __uint128_t arg = MakeUInt128(0x6205647693125705ULL, 0x8635662018558100ULL);
5168   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("not %0.8b, %1.8b")(arg);
5169   ASSERT_EQ(res, MakeUInt128(0x9dfa9b896ceda8faULL, 0x0000000000000000ULL));
5170 }
5171 
TEST(Arm64InsnTest,RbitInt8x8)5172 TEST(Arm64InsnTest, RbitInt8x8) {
5173   __uint128_t arg = MakeUInt128(0x4713296210734043ULL, 0x7518957359614589ULL);
5174   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rbit %0.8b, %1.8b")(arg);
5175   ASSERT_EQ(res, MakeUInt128(0xe2c8944608ce02c2ULL, 0x0000000000000000ULL));
5176 }
5177 
TEST(Arm64InsnTest,Rev16Int8x16)5178 TEST(Arm64InsnTest, Rev16Int8x16) {
5179   __uint128_t arg = MakeUInt128(0x9904801094121472ULL, 0x2131794764777262ULL);
5180   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev16 %0.16b, %1.16b")(arg);
5181   ASSERT_EQ(res, MakeUInt128(0x0499108012947214ULL, 0x3121477977646272ULL));
5182 }
5183 
TEST(Arm64InsnTest,Rev32Int16x8)5184 TEST(Arm64InsnTest, Rev32Int16x8) {
5185   __uint128_t arg = MakeUInt128(0x8662237172159160ULL, 0x7716692547487389ULL);
5186   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev32 %0.8h, %1.8h")(arg);
5187   ASSERT_EQ(res, MakeUInt128(0x2371866291607215ULL, 0x6925771673894748ULL));
5188 }
5189 
TEST(Arm64InsnTest,Rev64Int32x4)5190 TEST(Arm64InsnTest, Rev64Int32x4) {
5191   __uint128_t arg = MakeUInt128(0x5306736096571209ULL, 0x1807638327166416ULL);
5192   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev64 %0.4s, %1.4s")(arg);
5193   ASSERT_EQ(res, MakeUInt128(0x9657120953067360ULL, 0x2716641618076383ULL));
5194 }
5195 
TEST(Arm64InsnTest,TblInt8x8)5196 TEST(Arm64InsnTest, TblInt8x8) {
5197   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5198   __uint128_t arg2 = MakeUInt128(0x0104011509120605ULL, 0x0315080907091312ULL);
5199   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("tbl %0.8b, {%1.16b}, %2.8b")(arg1, arg2);
5200   ASSERT_EQ(res, MakeUInt128(0x1144110099006655ULL, 0x0000000000000000ULL));
5201 }
5202 
TEST(Arm64InsnTest,TblInt8x16)5203 TEST(Arm64InsnTest, TblInt8x16) {
5204   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5205   __uint128_t arg2 = MakeUInt128(0x0905060808010408ULL, 0x0506000206030202ULL);
5206   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("tbl %0.16b, {%1.16b}, %2.16b")(arg1, arg2);
5207   ASSERT_EQ(res, MakeUInt128(0x9955668888114488ULL, 0x5566002266332222ULL));
5208 }
5209 
TEST(Arm64InsnTest,Tbl2Int8x16)5210 TEST(Arm64InsnTest, Tbl2Int8x16) {
5211   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5212   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5213   __uint128_t arg3 = MakeUInt128(0x0224052800020910ULL, 0x1807280319002203ULL);
5214   __uint128_t res;
5215 
5216   // Hardcode v30 and v0 so that the TBL instruction gets consecutive registers.
5217   asm("mov v31.16b, %1.16b\n\t"
5218       "mov v0.16b, %2.16b\n\t"
5219       "tbl %0.16b, {v31.16b, v0.16b}, %3.16b"
5220       : "=w"(res)
5221       : "w"(arg1), "w"(arg2), "w"(arg3)
5222       : "v31", "v0");
5223 
5224   ASSERT_EQ(res, MakeUInt128(0x22005500002299ffULL, 0x8777003398000033ULL));
5225 }
5226 
TEST(Arm64InsnTest,Tbl3Int8x16)5227 TEST(Arm64InsnTest, Tbl3Int8x16) {
5228   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5229   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5230   __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5231   __uint128_t arg4 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5232   __uint128_t res;
5233 
5234   // Hardcode v0, v1, and v2 so that the TBL instruction gets consecutive registers.
5235   asm("mov v30.16b, %1.16b\n\t"
5236       "mov v31.16b, %2.16b\n\t"
5237       "mov v0.16b, %3.16b\n\t"
5238       "tbl %0.16b, {v30.16b-v0.16b}, %4.16b"
5239       : "=w"(res)
5240       : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4)
5241       : "v0", "v1", "v2");
5242 
5243   ASSERT_EQ(res, MakeUInt128(0x778760000090ff00ULL, 0x0060980000103244ULL));
5244 }
5245 
TEST(Arm64InsnTest,Tbl4Int8x16)5246 TEST(Arm64InsnTest, Tbl4Int8x16) {
5247   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5248   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5249   __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5250   __uint128_t arg4 = MakeUInt128(0x7f6f5f4f3f2f1fffULL, 0xffefdfcfbfaf9f8fULL);
5251   __uint128_t arg5 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5252   __uint128_t res;
5253 
5254   // Hardcode v30, v31, v0, and v1 so that the TBX instruction gets consecutive registers.
5255   asm("mov v30.16b, %1.16b\n\t"
5256       "mov v31.16b, %2.16b\n\t"
5257       "mov v0.16b, %3.16b\n\t"
5258       "mov v1.16b, %4.16b\n\t"
5259       "tbl %0.16b, {v30.16b-v1.16b}, %5.16b"
5260       : "=w"(res)
5261       : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "w"(arg5)
5262       : "v30", "v31", "v0", "v1");
5263 
5264   ASSERT_EQ(res, MakeUInt128(0x778760009f90ff5fULL, 0x5f60980000103244ULL));
5265 }
5266 
TEST(Arm64InsnTest,TbxInt8x16)5267 TEST(Arm64InsnTest, TbxInt8x16) {
5268   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5269   __uint128_t arg2 = MakeUInt128(0x0915061808010408ULL, 0x0516000206031202ULL);
5270   __uint128_t arg3 = MakeUInt128(0x6668559233565463ULL, 0x9138363185745698ULL);
5271   __uint128_t res =
5272       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("tbx %0.16b, {%1.16b}, %2.16b")(arg1, arg2, arg3);
5273   ASSERT_EQ(res, MakeUInt128(0x9968669288114488ULL, 0x5538002266335622ULL));
5274 }
5275 
TEST(Arm64InsnTest,Tbx2Int8x16)5276 TEST(Arm64InsnTest, Tbx2Int8x16) {
5277   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5278   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5279   __uint128_t arg3 = MakeUInt128(0x0224052800020910ULL, 0x1807280319002203ULL);
5280   __uint128_t res = MakeUInt128(0x7494078488442377ULL, 0x2175154334260306ULL);
5281 
5282   // Hardcode v0 and v1 so that the TBX instruction gets consecutive registers.
5283   asm("mov v0.16b, %1.16b\n\t"
5284       "mov v1.16b, %2.16b\n\t"
5285       "tbx %0.16b, {v0.16b, v1.16b}, %3.16b"
5286       : "=w"(res)
5287       : "w"(arg1), "w"(arg2), "w"(arg3), "0"(res)
5288       : "v0", "v1");
5289 
5290   ASSERT_EQ(res, MakeUInt128(0x22945584002299ffULL, 0x8777153398000333ULL));
5291 }
5292 
TEST(Arm64InsnTest,Tbx3Int8x16)5293 TEST(Arm64InsnTest, Tbx3Int8x16) {
5294   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5295   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5296   __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5297   __uint128_t arg4 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5298   __uint128_t res = MakeUInt128(0x0136776310849135ULL, 0x1615642269847507ULL);
5299 
5300   // Hardcode v0, v1, and v2 so that the TBX instruction gets consecutive registers.
5301   asm("mov v0.16b, %1.16b\n\t"
5302       "mov v1.16b, %2.16b\n\t"
5303       "mov v2.16b, %3.16b\n\t"
5304       "tbx %0.16b, {v0.16b, v1.16b, v2.16b}, %4.16b"
5305       : "=w"(res)
5306       : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "0"(res)
5307       : "v0", "v1", "v2");
5308 
5309   ASSERT_EQ(res, MakeUInt128(0x778760631090ff35ULL, 0x1660980069103244ULL));
5310 }
5311 
TEST(Arm64InsnTest,Tbx4Int8x16)5312 TEST(Arm64InsnTest, Tbx4Int8x16) {
5313   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5314   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5315   __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5316   __uint128_t arg4 = MakeUInt128(0x7f6f5f4f3f2f1fffULL, 0xffefdfcfbfaf9f8fULL);
5317   __uint128_t arg5 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5318   __uint128_t res = MakeUInt128(0x5818319637637076ULL, 0x1799191920357958ULL);
5319 
5320   // Hardcode v0, v1, v2, and v3 so that the TBX instruction gets consecutive registers.
5321   asm("mov v0.16b, %1.16b\n\t"
5322       "mov v1.16b, %2.16b\n\t"
5323       "mov v2.16b, %3.16b\n\t"
5324       "mov v3.16b, %4.16b\n\t"
5325       "tbx %0.16b, {v0.16b-v3.16b}, %5.16b"
5326       : "=w"(res)
5327       : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "w"(arg5), "0"(res)
5328       : "v0", "v1", "v2", "v3");
5329 
5330   ASSERT_EQ(res, MakeUInt128(0x778760969f90ff5fULL, 0x5f60980020103244ULL));
5331 }
5332 
TEST(Arm64InsnTest,Trn1Int8x8)5333 TEST(Arm64InsnTest, Trn1Int8x8) {
5334   __uint128_t arg1 = MakeUInt128(0x2075916729700785ULL, 0x0580717186381054ULL);
5335   __uint128_t arg2 = MakeUInt128(0x2786099055690013ULL, 0x4137182368370991ULL);
5336   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("trn1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
5337   ASSERT_EQ(res, MakeUInt128(0x8675906769701385ULL, 0x0000000000000000ULL));
5338 }
5339 
TEST(Arm64InsnTest,Trn2Int16x8)5340 TEST(Arm64InsnTest, Trn2Int16x8) {
5341   __uint128_t arg1 = MakeUInt128(0x6685592335654639ULL, 0x1383631857456981ULL);
5342   __uint128_t arg2 = MakeUInt128(0x7494078488442377ULL, 0x2175154334260306ULL);
5343   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("trn2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
5344   ASSERT_EQ(res, MakeUInt128(0x7494668588443565ULL, 0x2175138334265745ULL));
5345 }
5346 
TEST(Arm64InsnTest,Uzp1Int8x8)5347 TEST(Arm64InsnTest, Uzp1Int8x8) {
5348   __uint128_t arg1 = MakeUInt128(0x4954893139394489ULL, 0x9216125525597701ULL);
5349   __uint128_t arg2 = MakeUInt128(0x2783467926101995ULL, 0x5852247172201777ULL);
5350   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
5351   ASSERT_EQ(res, MakeUInt128(0x8379109554313989ULL, 0x0000000000000000ULL));
5352 }
5353 
TEST(Arm64InsnTest,Uzp2Int16x8)5354 TEST(Arm64InsnTest, Uzp2Int16x8) {
5355   __uint128_t arg1 = MakeUInt128(0x6745642390585850ULL, 0x2167190313952629ULL);
5356   __uint128_t arg2 = MakeUInt128(0x3620129476918749ULL, 0x7519101147231528ULL);
5357   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
5358   ASSERT_EQ(res, MakeUInt128(0x2167139567459058ULL, 0x7519472336207691ULL));
5359 }
5360 
TEST(Arm64InsnTest,Zip2Int64x2)5361 TEST(Arm64InsnTest, Zip2Int64x2) {
5362   __uint128_t arg1 = MakeUInt128(0x1494271410093913ULL, 0x6913810725813781ULL);
5363   __uint128_t arg2 = MakeUInt128(0x3578940055995001ULL, 0x8354251184172136ULL);
5364   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp2 %0.2d, %1.2d, %2.2d")(arg1, arg2);
5365   ASSERT_EQ(res, MakeUInt128(0x6913810725813781ULL, 0x8354251184172136ULL));
5366 }
5367 
TEST(Arm64InsnTest,Zip1Int8x8)5368 TEST(Arm64InsnTest, Zip1Int8x8) {
5369   __uint128_t arg1 = MakeUInt128(0x7499235630254947ULL, 0x8024901141952123ULL);
5370   __uint128_t arg2 = MakeUInt128(0x3331239480494707ULL, 0x9119153267343028ULL);
5371   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
5372   ASSERT_EQ(res, MakeUInt128(0x8030492547490747ULL, 0x0000000000000000ULL));
5373 }
5374 
TEST(Arm64InsnTest,Zip1Int64x2)5375 TEST(Arm64InsnTest, Zip1Int64x2) {
5376   __uint128_t arg1 = MakeUInt128(0x9243530136776310ULL, 0x8491351615642269ULL);
5377   __uint128_t arg2 = MakeUInt128(0x0551199581831963ULL, 0x7637076179919192ULL);
5378   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip1 %0.2d, %1.2d, %2.2d")(arg1, arg2);
5379   ASSERT_EQ(res, MakeUInt128(0x9243530136776310ULL, 0x0551199581831963ULL));
5380 }
5381 
TEST(Arm64InsnTest,Zip2Int16x8)5382 TEST(Arm64InsnTest, Zip2Int16x8) {
5383   __uint128_t arg1 = MakeUInt128(0x5831832713142517ULL, 0x0296923488962766ULL);
5384   __uint128_t arg2 = MakeUInt128(0x2934595889706953ULL, 0x6534940603402166ULL);
5385   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
5386   ASSERT_EQ(res, MakeUInt128(0x0340889621662766ULL, 0x6534029694069234ULL));
5387 }
5388 
TEST(Arm64InsnTest,SignedMaxInt16x8)5389 TEST(Arm64InsnTest, SignedMaxInt16x8) {
5390   __uint128_t arg1 = MakeUInt128(0x9901573466102371ULL, 0x2235478911292547ULL);
5391   __uint128_t arg2 = MakeUInt128(0x4922157650450812ULL, 0x0677173571202718ULL);
5392   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smax %0.8h, %1.8h, %2.8h")(arg1, arg2);
5393   ASSERT_EQ(res, MakeUInt128(0x4922573466102371ULL, 0x2235478971202718ULL));
5394 }
5395 
TEST(Arm64InsnTest,SignedMinInt16x8)5396 TEST(Arm64InsnTest, SignedMinInt16x8) {
5397   __uint128_t arg1 = MakeUInt128(0x7820385653909910ULL, 0x4775941413215432ULL);
5398   __uint128_t arg2 = MakeUInt128(0x0084531214065935ULL, 0x8090412711359200ULL);
5399   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smin %0.8h, %1.8h, %2.8h")(arg1, arg2);
5400   ASSERT_EQ(res, MakeUInt128(0x0084385614069910ULL, 0x8090941411359200ULL));
5401 }
5402 
TEST(Arm64InsnTest,SignedMaxPairwiseInt16x8)5403 TEST(Arm64InsnTest, SignedMaxPairwiseInt16x8) {
5404   __uint128_t arg1 = MakeUInt128(0x6998469884770232ULL, 0x3823840055655517ULL);
5405   __uint128_t arg2 = MakeUInt128(0x3272867600724817ULL, 0x2987637569816335ULL);
5406   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smaxp %0.8h, %1.8h, %2.8h")(arg1, arg2);
5407   ASSERT_EQ(res, MakeUInt128(0x3823556569980232ULL, 0x6375698132724817ULL));
5408 }
5409 
TEST(Arm64InsnTest,SignedMinPairwiseInt16x8)5410 TEST(Arm64InsnTest, SignedMinPairwiseInt16x8) {
5411   __uint128_t arg1 = MakeUInt128(0x8865701568501691ULL, 0x8647488541679154ULL);
5412   __uint128_t arg2 = MakeUInt128(0x1821553559732353ULL, 0x0686043010675760ULL);
5413   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sminp %0.8h, %1.8h, %2.8h")(arg1, arg2);
5414   ASSERT_EQ(res, MakeUInt128(0x8647915488651691ULL, 0x0430106718212353ULL));
5415 }
5416 
TEST(Arm64InsnTest,UnsignedMaxInt16x8)5417 TEST(Arm64InsnTest, UnsignedMaxInt16x8) {
5418   __uint128_t arg1 = MakeUInt128(0x7639975974619383ULL, 0x5845749159880976ULL);
5419   __uint128_t arg2 = MakeUInt128(0x5928493695941434ULL, 0x0814685298150539ULL);
5420   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umax %0.8h, %1.8h, %2.8h")(arg1, arg2);
5421   ASSERT_EQ(res, MakeUInt128(0x7639975995949383ULL, 0x5845749198150976ULL));
5422 }
5423 
TEST(Arm64InsnTest,UnsignedMinInt16x8)5424 TEST(Arm64InsnTest, UnsignedMinInt16x8) {
5425   __uint128_t arg1 = MakeUInt128(0x2888773717663748ULL, 0x6027660634960353ULL);
5426   __uint128_t arg2 = MakeUInt128(0x6983349515101986ULL, 0x4269887847171939ULL);
5427   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umin %0.8h, %1.8h, %2.8h")(arg1, arg2);
5428   ASSERT_EQ(res, MakeUInt128(0x2888349515101986ULL, 0x4269660634960353ULL));
5429 }
5430 
TEST(Arm64InsnTest,UnsignedMaxPairwiseInt16x8)5431 TEST(Arm64InsnTest, UnsignedMaxPairwiseInt16x8) {
5432   __uint128_t arg1 = MakeUInt128(0x1318583584066747ULL, 0x2370297149785084ULL);
5433   __uint128_t arg2 = MakeUInt128(0x4570249413983163ULL, 0x4332378975955680ULL);
5434   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umaxp %0.8h, %1.8h, %2.8h")(arg1, arg2);
5435   ASSERT_EQ(res, MakeUInt128(0x2971508458358406ULL, 0x4332759545703163ULL));
5436 }
5437 
TEST(Arm64InsnTest,UnsignedMinPairwiseInt16x8)5438 TEST(Arm64InsnTest, UnsignedMinPairwiseInt16x8) {
5439   __uint128_t arg1 = MakeUInt128(0x9538121791319145ULL, 0x1350099384631177ULL);
5440   __uint128_t arg2 = MakeUInt128(0x7769055481028850ULL, 0x2080858008781157ULL);
5441   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uminp %0.8h, %1.8h, %2.8h")(arg1, arg2);
5442   ASSERT_EQ(res, MakeUInt128(0x0993117712179131ULL, 0x2080087805548102ULL));
5443 }
5444 
TEST(Arm64InsnTest,SignedHalvingAddInt16x8)5445 TEST(Arm64InsnTest, SignedHalvingAddInt16x8) {
5446   __uint128_t arg1 = MakeUInt128(0x1021944719713869ULL, 0x2560841624511239ULL);
5447   __uint128_t arg2 = MakeUInt128(0x8062011318454124ULL, 0x4782050110798760ULL);
5448   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("shadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
5449   ASSERT_EQ(res, MakeUInt128(0xc841caad18db3cc6ULL, 0x3671c48b1a65ccccULL));
5450 }
5451 
TEST(Arm64InsnTest,SignedHalvingSubInt16x8)5452 TEST(Arm64InsnTest, SignedHalvingSubInt16x8) {
5453   __uint128_t arg1 = MakeUInt128(0x9041210873032402ULL, 0x0106853419472304ULL);
5454   __uint128_t arg2 = MakeUInt128(0x7666672174986986ULL, 0x8547076781205124ULL);
5455   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("shsub %0.8h, %1.8h, %2.8h")(arg1, arg2);
5456   ASSERT_EQ(res, MakeUInt128(0x8ceddcf3ff35dd3eULL, 0x3ddfbee64c13e8f0ULL));
5457 }
5458 
TEST(Arm64InsnTest,SignedRoundingHalvingAddInt16x8)5459 TEST(Arm64InsnTest, SignedRoundingHalvingAddInt16x8) {
5460   __uint128_t arg1 = MakeUInt128(0x5871487839890810ULL, 0x7429530941060596ULL);
5461   __uint128_t arg2 = MakeUInt128(0x9443158477539700ULL, 0x9439883949144323ULL);
5462   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
5463   ASSERT_EQ(res, MakeUInt128(0xf65a2efe586ecf88ULL, 0x0431eda1450d245dULL));
5464 }
5465 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceInt16x8)5466 TEST(Arm64InsnTest, SignedAbsoluteDifferenceInt16x8) {
5467   __uint128_t arg1 = MakeUInt128(0x1349607501116498ULL, 0x3278563531614516ULL);
5468   __uint128_t arg2 = MakeUInt128(0x8457695687109002ULL, 0x9997698412632665ULL);
5469   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabd %0.8h, %1.8h, %2.8h")(arg1, arg2);
5470   ASSERT_EQ(res, MakeUInt128(0x8ef208e17a01d496ULL, 0x98e1134f1efe1eb1ULL));
5471 }
5472 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongInt16x8)5473 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongInt16x8) {
5474   __uint128_t arg1 = MakeUInt128(0x7419850973346267ULL, 0x9332107268687076ULL);
5475   __uint128_t arg2 = MakeUInt128(0x8062639919361965ULL, 0x0440995421676278ULL);
5476   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabdl %0.4s, %1.4h, %2.4h")(arg1, arg2);
5477   ASSERT_EQ(res, MakeUInt128(0x000059fe00004902ULL, 0x0000f3b70000de90ULL));
5478 }
5479 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongUpperInt16x8)5480 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongUpperInt16x8) {
5481   __uint128_t arg1 = MakeUInt128(0x4980559610330799ULL, 0x4145347784574699ULL);
5482   __uint128_t arg2 = MakeUInt128(0x9921285999993996ULL, 0x1228161521931488ULL);
5483   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabdl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
5484   ASSERT_EQ(res, MakeUInt128(0x00009d3c00003211ULL, 0x00002f1d00001e62ULL));
5485 }
5486 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateInt16x8)5487 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateInt16x8) {
5488   // The lowest element tests the overflow.
5489   __uint128_t arg1 = MakeUInt128(0x8967'0031'9258'7fffULL, 0x9410'5105'3358'4384ULL);
5490   __uint128_t arg2 = MakeUInt128(0x6560'2339'1796'8000ULL, 0x6784'4763'7084'7497ULL);
5491   __uint128_t arg3 = MakeUInt128(0x8333'6555'7900'5555ULL, 0x1914'7319'8862'7135ULL);
5492   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("saba %0.8h, %1.8h, %2.8h")(arg1, arg2, arg3);
5493   ASSERT_EQ(res, MakeUInt128(0x5f2c'885d'fe3e'5554ULL, 0xec88'7cbb'c58e'a248ULL));
5494 }
5495 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateInt32x4)5496 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateInt32x4) {
5497   // The lowest element tests the overflow.
5498   __uint128_t arg1 = MakeUInt128(0x8967'0031'7fff'ffffULL, 0x9410'5105'3358'4384ULL);
5499   __uint128_t arg2 = MakeUInt128(0x6560'2339'8000'0000ULL, 0x6784'4763'7084'7497ULL);
5500   __uint128_t arg3 = MakeUInt128(0x8333'6555'aaaa'5555ULL, 0x1914'7319'8862'7135ULL);
5501   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("saba %0.4s, %1.4s, %2.4s")(arg1, arg2, arg3);
5502   ASSERT_EQ(res, MakeUInt128(0x5f2c'885d'aaaa'5554ULL, 0xec88'6977'c58e'a248ULL));
5503 }
5504 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateLongInt16x4)5505 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateLongInt16x4) {
5506   __uint128_t arg1 = MakeUInt128(0x078464167452167ULL, 0x719048310967671ULL);
5507   __uint128_t arg2 = MakeUInt128(0x344349481926268ULL, 0x110739948250607ULL);
5508   __uint128_t arg3 = MakeUInt128(0x949507350316901ULL, 0x731852119552635ULL);
5509   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
5510   ASSERT_EQ(res, MakeUInt128(0x094a36265031aa02ULL, 0x073187ed195537e2ULL));
5511 }
5512 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongInt32x2)5513 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongInt32x2) {
5514   __uint128_t arg1 = MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL);
5515   __uint128_t arg2 = MakeUInt128(0x0000000080000000ULL, 0x0000000000000000ULL);
5516   __uint128_t arg3 = MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL);
5517   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal %0.2d, %1.2s, %2.2s")(arg1, arg2, arg3);
5518   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
5519 }
5520 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateLongUpperInt16x8)5521 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateLongUpperInt16x8) {
5522   __uint128_t arg1 = MakeUInt128(0x690943470482932ULL, 0x414041114654092ULL);
5523   __uint128_t arg2 = MakeUInt128(0x988344435159133ULL, 0x010773944111840ULL);
5524   __uint128_t arg3 = MakeUInt128(0x410768498106634ULL, 0x241048239358274ULL);
5525   __uint128_t res =
5526       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal2 %0.4s, %1.8h, %2.8h")(arg1, arg2, arg3);
5527   ASSERT_EQ(res, MakeUInt128(0x0410a63098108e86ULL, 0x024108863935f59cULL));
5528 }
5529 
TEST(Arm64InsnTest,UnsignedHalvingAddInt16x8)5530 TEST(Arm64InsnTest, UnsignedHalvingAddInt16x8) {
5531   __uint128_t arg1 = MakeUInt128(0x4775379853799732ULL, 0x2344561227858432ULL);
5532   __uint128_t arg2 = MakeUInt128(0x9684664751333657ULL, 0x3692387201464723ULL);
5533   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
5534   ASSERT_EQ(res, MakeUInt128(0x6efc4eef525666c4ULL, 0x2ceb4742146565aaULL));
5535 }
5536 
TEST(Arm64InsnTest,UnsignedHalvingSubInt16x8)5537 TEST(Arm64InsnTest, UnsignedHalvingSubInt16x8) {
5538   __uint128_t arg1 = MakeUInt128(0x9926884349592876ULL, 0x1240075587569464ULL);
5539   __uint128_t arg2 = MakeUInt128(0x1370562514001179ULL, 0x7133166207153715ULL);
5540   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uhsub %0.8h, %1.8h, %2.8h")(arg1, arg2);
5541   ASSERT_EQ(res, MakeUInt128(0x42db190f1aac0b7eULL, 0xd086f87940202ea7ULL));
5542 }
5543 
TEST(Arm64InsnTest,UnsignedRoundingHalvingAddInt16x8)5544 TEST(Arm64InsnTest, UnsignedRoundingHalvingAddInt16x8) {
5545   __uint128_t arg1 = MakeUInt128(0x5066533985738887ULL, 0x8661476294434140ULL);
5546   __uint128_t arg2 = MakeUInt128(0x1049888993160051ULL, 0x2076781035886116ULL);
5547   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
5548   ASSERT_EQ(res, MakeUInt128(0x30586de18c45446cULL, 0x536c5fb964e6512bULL));
5549 }
5550 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceInt16x8)5551 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceInt16x8) {
5552   __uint128_t arg1 = MakeUInt128(0x8574664607722834ULL, 0x1540311441529418ULL);
5553   __uint128_t arg2 = MakeUInt128(0x8047825438761770ULL, 0x7904300015669867ULL);
5554   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabd %0.8h, %1.8h, %2.8h")(arg1, arg2);
5555   ASSERT_EQ(res, MakeUInt128(0x052d1c0e310410c4ULL, 0x63c401142bec044fULL));
5556 }
5557 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceLongInt16x8)5558 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceLongInt16x8) {
5559   __uint128_t arg1 = MakeUInt128(0x1614585505839727ULL, 0x4209809097817293ULL);
5560   __uint128_t arg2 = MakeUInt128(0x2393010676638682ULL, 0x4040111304024700ULL);
5561   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabdl %0.4s, %1.4h, %2.4h")(arg1, arg2);
5562   ASSERT_EQ(res, MakeUInt128(0x000070e0000010a5ULL, 0x00000d7f0000574fULL));
5563 }
5564 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceLongUpperInt16x8)5565 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceLongUpperInt16x8) {
5566   __uint128_t arg1 = MakeUInt128(0x0347999588867695ULL, 0x0161249722820403ULL);
5567   __uint128_t arg2 = MakeUInt128(0x0399546327883069ULL, 0x5976249361510102ULL);
5568   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabdl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
5569   ASSERT_EQ(res, MakeUInt128(0x00003ecf00000301ULL, 0x0000581500000004ULL));
5570 }
5571 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateInt16x8)5572 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateInt16x8) {
5573   __uint128_t arg1 = MakeUInt128(0x0857466460772283ULL, 0x4154031144152941ULL);
5574   __uint128_t arg2 = MakeUInt128(0x8804782543876177ULL, 0x0790430001566986ULL);
5575   __uint128_t arg3 = MakeUInt128(0x7767957609099669ULL, 0x3607559496515273ULL);
5576   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uaba %0.8h, %1.8h, %2.8h")(arg1, arg2, arg3);
5577   ASSERT_EQ(res, MakeUInt128(0xf714c73725f9d55dULL, 0x6fcb9583d91092b8ULL));
5578 }
5579 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateLongInt16x4)5580 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateLongInt16x4) {
5581   __uint128_t arg1 = MakeUInt128(0x8343417044157348ULL, 0x2481833301640566ULL);
5582   __uint128_t arg2 = MakeUInt128(0x9596688667695634ULL, 0x9141632842641497ULL);
5583   __uint128_t arg3 = MakeUInt128(0x4533349999480002ULL, 0x6699875888159350ULL);
5584   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uabal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
5585   ASSERT_EQ(res, MakeUInt128(0x453357ed99481d16ULL, 0x669999ab8815ba66ULL));
5586 }
5587 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateLongUpperInt16x8)5588 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateLongUpperInt16x8) {
5589   __uint128_t arg1 = MakeUInt128(0x998685541703188ULL, 0x778867592902607ULL);
5590   __uint128_t arg2 = MakeUInt128(0x043212666179192ULL, 0x352093822787888ULL);
5591   __uint128_t arg3 = MakeUInt128(0x988633599116081ULL, 0x235355570464634ULL);
5592   __uint128_t res =
5593       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uabal2 %0.4s, %1.8h, %2.8h")(arg1, arg2, arg3);
5594   ASSERT_EQ(res, MakeUInt128(0x0988d34d9911b302ULL, 0x0235397b7046c371ULL));
5595 }
5596 
TEST(Arm64InsnTest,SignedAddLongPairwiseInt8x16)5597 TEST(Arm64InsnTest, SignedAddLongPairwiseInt8x16) {
5598   __uint128_t arg = MakeUInt128(0x6164411096256633ULL, 0x7305409219519675ULL);
5599   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlp %0.8h, %1.16b")(arg);
5600   ASSERT_EQ(res, MakeUInt128(0x00c50051ffbb0099ULL, 0x0078ffd2006a000bULL));
5601 }
5602 
TEST(Arm64InsnTest,SignedAddAccumulateLongPairwiseInt8x16)5603 TEST(Arm64InsnTest, SignedAddAccumulateLongPairwiseInt8x16) {
5604   __uint128_t arg1 = MakeUInt128(0x1991646384142707ULL, 0x7988708874229277ULL);
5605   __uint128_t arg2 = MakeUInt128(0x7217826030500994ULL, 0x5108247835729056ULL);
5606   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sadalp %0.8h, %1.16b")(arg1, arg2);
5607   ASSERT_EQ(res, MakeUInt128(0x71c183272fe809c2ULL, 0x510924703608905fULL));
5608 }
5609 
TEST(Arm64InsnTest,SignedAddAccumulateLongPairwiseInt16x8)5610 TEST(Arm64InsnTest, SignedAddAccumulateLongPairwiseInt16x8) {
5611   __uint128_t arg1 = MakeUInt128(0x1991646384142707ULL, 0x7988708874229277ULL);
5612   __uint128_t arg2 = MakeUInt128(0x7217826030500994ULL, 0x5108247835729056ULL);
5613   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sadalp %0.4s, %1.8h")(arg1, arg2);
5614   ASSERT_EQ(res, MakeUInt128(0x72180054304fb4afULL, 0x51090e88357296efULL));
5615 }
5616 
TEST(Arm64InsnTest,UnsignedAddLongPairwiseInt8x16)5617 TEST(Arm64InsnTest, UnsignedAddLongPairwiseInt8x16) {
5618   __uint128_t arg = MakeUInt128(0x1483287348089574ULL, 0x7777527834422109ULL);
5619   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uaddlp %0.8h, %1.16b")(arg);
5620   ASSERT_EQ(res, MakeUInt128(0x0097009b00500109ULL, 0x00ee00ca0076002aULL));
5621 }
5622 
TEST(Arm64InsnTest,UnsignedAddAccumulateLongPairwiseInt8x16)5623 TEST(Arm64InsnTest, UnsignedAddAccumulateLongPairwiseInt8x16) {
5624   __uint128_t arg1 = MakeUInt128(0x9348154691631162ULL, 0x4928873574718824ULL);
5625   __uint128_t arg2 = MakeUInt128(0x5207665738825139ULL, 0x6391635767231510ULL);
5626   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("uadalp %0.8h, %1.16b")(arg1, arg2);
5627   ASSERT_EQ(res, MakeUInt128(0x52e266b2397651acULL, 0x64026413680815bcULL));
5628 }
5629 
TEST(Arm64InsnTest,SignedAddLong)5630 TEST(Arm64InsnTest, SignedAddLong) {
5631   __uint128_t arg1 = MakeUInt128(0x3478074585067606ULL, 0x3048229409653041ULL);
5632   __uint128_t arg2 = MakeUInt128(0x1183066710818930ULL, 0x3110887172816751ULL);
5633   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddl %0.4s, %1.4h, %2.4h")(arg1, arg2);
5634   ASSERT_EQ(res, MakeUInt128(0xffff9587ffffff36ULL, 0x000045fb00000dacULL));
5635 }
5636 
TEST(Arm64InsnTest,SignedAddLongUpper)5637 TEST(Arm64InsnTest, SignedAddLongUpper) {
5638   __uint128_t arg1 = MakeUInt128(0x3160683158679946ULL, 0x0165205774052942ULL);
5639   __uint128_t arg2 = MakeUInt128(0x3053601780313357ULL, 0x2632670547903384ULL);
5640   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
5641   ASSERT_EQ(res, MakeUInt128(0x0000bb9500005cc6ULL, 0x000027970000875cULL));
5642 }
5643 
TEST(Arm64InsnTest,SignedSubLong)5644 TEST(Arm64InsnTest, SignedSubLong) {
5645   __uint128_t arg1 = MakeUInt128(0x8566746260879482ULL, 0x0186474876727272ULL);
5646   __uint128_t arg2 = MakeUInt128(0x2206267646533809ULL, 0x9801966883680994ULL);
5647   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubl %0.4s, %1.4h, %2.4h")(arg1, arg2);
5648   ASSERT_EQ(res, MakeUInt128(0x00001a34ffff5c79ULL, 0xffff636000004decULL));
5649 }
5650 
TEST(Arm64InsnTest,SignedSubLongUpper)5651 TEST(Arm64InsnTest, SignedSubLongUpper) {
5652   __uint128_t arg1 = MakeUInt128(0x3011331753305329ULL, 0x8020166888174813ULL);
5653   __uint128_t arg2 = MakeUInt128(0x4298868158557781ULL, 0x0343231753064784ULL);
5654   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
5655   ASSERT_EQ(res, MakeUInt128(0xffff35110000008fULL, 0xffff7cddfffff351ULL));
5656 }
5657 
TEST(Arm64InsnTest,UnsignedAddLong)5658 TEST(Arm64InsnTest, UnsignedAddLong) {
5659   __uint128_t arg1 = MakeUInt128(0x3126059505777727ULL, 0x5424712416483128ULL);
5660   __uint128_t arg2 = MakeUInt128(0x3298207236175057ULL, 0x4673870128209575ULL);
5661   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddl %0.4s, %1.4h, %2.4h")(arg1, arg2);
5662   ASSERT_EQ(res, MakeUInt128(0x00003b8e0000c77eULL, 0x000063be00002607ULL));
5663 }
5664 
TEST(Arm64InsnTest,UnsignedAddLongUpper)5665 TEST(Arm64InsnTest, UnsignedAddLongUpper) {
5666   __uint128_t arg1 = MakeUInt128(0x3384698499778726ULL, 0x7065551918544686ULL);
5667   __uint128_t arg2 = MakeUInt128(0x9846947849573462ULL, 0x2606294219624557ULL);
5668   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
5669   ASSERT_EQ(res, MakeUInt128(0x000031b600008bddULL, 0x0000966b00007e5bULL));
5670 }
5671 
TEST(Arm64InsnTest,UnsignedSubLong)5672 TEST(Arm64InsnTest, UnsignedSubLong) {
5673   __uint128_t arg1 = MakeUInt128(0x4378111988556318ULL, 0x7777925372011667ULL);
5674   __uint128_t arg2 = MakeUInt128(0x1853954183598443ULL, 0x8305203762819440ULL);
5675   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubl %0.4s, %1.4h, %2.4h")(arg1, arg2);
5676   ASSERT_EQ(res, MakeUInt128(0x000004fcffffded5ULL, 0x00002b25ffff7bd8ULL));
5677 }
5678 
TEST(Arm64InsnTest,UnsignedSubLongUpper)5679 TEST(Arm64InsnTest, UnsignedSubLongUpper) {
5680   __uint128_t arg1 = MakeUInt128(0x5228717440266638ULL, 0x9148817173086436ULL);
5681   __uint128_t arg2 = MakeUInt128(0x1113890694202790ULL, 0x8814311944879941ULL);
5682   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
5683   ASSERT_EQ(res, MakeUInt128(0x00002e81ffffcaf5ULL, 0x0000093400005058ULL));
5684 }
5685 
TEST(Arm64InsnTest,SignedAddWide)5686 TEST(Arm64InsnTest, SignedAddWide) {
5687   __uint128_t arg1 = MakeUInt128(0x7844598183134112ULL, 0x9001999205981352ULL);
5688   __uint128_t arg2 = MakeUInt128(0x2051173365856407ULL, 0x8264849427644113ULL);
5689   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw %0.4s, %1.4s, %2.4h")(arg1, arg2);
5690   ASSERT_EQ(res, MakeUInt128(0x7844bf068313a519ULL, 0x9001b9e305982a85ULL));
5691 }
5692 
TEST(Arm64InsnTest,SignedAddWideUpper)5693 TEST(Arm64InsnTest, SignedAddWideUpper) {
5694   __uint128_t arg1 = MakeUInt128(0x3407092233436577ULL, 0x9160128093179401ULL);
5695   __uint128_t arg2 = MakeUInt128(0x7185985999338492ULL, 0x3549564005709955ULL);
5696   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
5697   ASSERT_EQ(res, MakeUInt128(0x34070e923342feccULL, 0x916047c99317ea41ULL));
5698 }
5699 
TEST(Arm64InsnTest,SignedSubWide)5700 TEST(Arm64InsnTest, SignedSubWide) {
5701   __uint128_t arg1 = MakeUInt128(0x2302847007312065ULL, 0x8032626417116165ULL);
5702   __uint128_t arg2 = MakeUInt128(0x9576132723515666ULL, 0x6253667271899853ULL);
5703   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubw %0.4s, %1.4s, %2.4h")(arg1, arg2);
5704   ASSERT_EQ(res, MakeUInt128(0x2302611f0730c9ffULL, 0x8032ccee17114e3eULL));
5705 }
5706 
TEST(Arm64InsnTest,SignedSubWideUpper)5707 TEST(Arm64InsnTest, SignedSubWideUpper) {
5708   __uint128_t arg1 = MakeUInt128(0x4510824783572905ULL, 0x6919885554678860ULL);
5709   __uint128_t arg2 = MakeUInt128(0x7946280537122704ULL, 0x2466543192145281ULL);
5710   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
5711   ASSERT_EQ(res, MakeUInt128(0x4510f0338356d684ULL, 0x691963ef5467342fULL));
5712 }
5713 
TEST(Arm64InsnTest,UnsignedAddWide)5714 TEST(Arm64InsnTest, UnsignedAddWide) {
5715   __uint128_t arg1 = MakeUInt128(0x5870785951298344ULL, 0x1729535195378855ULL);
5716   __uint128_t arg2 = MakeUInt128(0x3457374260859029ULL, 0x0817651557803905ULL);
5717   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw %0.4s, %1.4s, %2.4h")(arg1, arg2);
5718   ASSERT_EQ(res, MakeUInt128(0x5870d8de512a136dULL, 0x172987a89537bf97ULL));
5719 }
5720 
TEST(Arm64InsnTest,UnsignedAddWideUpper)5721 TEST(Arm64InsnTest, UnsignedAddWideUpper) {
5722   __uint128_t arg1 = MakeUInt128(0x7516493270950493ULL, 0x4639382432227188ULL);
5723   __uint128_t arg2 = MakeUInt128(0x5159740547021482ULL, 0x8971117779237612ULL);
5724   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
5725   ASSERT_EQ(res, MakeUInt128(0x7516c25570957aa5ULL, 0x4639c195322282ffULL));
5726 }
5727 
TEST(Arm64InsnTest,UnsignedSubWide)5728 TEST(Arm64InsnTest, UnsignedSubWide) {
5729   __uint128_t arg1 = MakeUInt128(0x0625247972199786ULL, 0x6854279897799233ULL);
5730   __uint128_t arg2 = MakeUInt128(0x9579057581890622ULL, 0x5254735822052364ULL);
5731   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubw %0.4s, %1.4s, %2.4h")(arg1, arg2);
5732   ASSERT_EQ(res, MakeUInt128(0x0624a2f072199164ULL, 0x6853921f97798cbeULL));
5733 }
5734 
TEST(Arm64InsnTest,UnsignedSubWideUpper)5735 TEST(Arm64InsnTest, UnsignedSubWideUpper) {
5736   __uint128_t arg1 = MakeUInt128(0x8242392192695062ULL, 0x0831838145469839ULL);
5737   __uint128_t arg2 = MakeUInt128(0x2366461363989101ULL, 0x2102177095976704ULL);
5738   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
5739   ASSERT_EQ(res, MakeUInt128(0x8241a38a9268e95eULL, 0x0831627f454680c9ULL));
5740 }
5741 
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8)5742 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8) {
5743   __uint128_t arg1 = MakeUInt128(0x9191791552241718ULL, 0x9585361680594741ULL);
5744   __uint128_t arg2 = MakeUInt128(0x2341933984202187ULL, 0x4564925644346239ULL);
5745   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull %0.8h, %1.8b, %2.8b")(arg1, arg2);
5746   ASSERT_EQ(res, MakeUInt128(0xd848048002f7f4a8ULL, 0xf0d3e3d1cc7b04adULL));
5747 }
5748 
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8Upper)5749 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8Upper) {
5750   __uint128_t arg1 = MakeUInt128(0x9314052976347574ULL, 0x8119356709110137ULL);
5751   __uint128_t arg2 = MakeUInt128(0x7517210080315590ULL, 0x2485309066920376ULL);
5752   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
5753   ASSERT_EQ(res, MakeUInt128(0x0396f8b20003195aULL, 0xee24f3fd09f0d2f0ULL));
5754 }
5755 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8)5756 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8) {
5757   __uint128_t arg1 = MakeUInt128(0x9149055628425039ULL, 0x1275771028402799ULL);
5758   __uint128_t arg2 = MakeUInt128(0x8066365825488926ULL, 0x4880254566101729ULL);
5759   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.8h, %1.8b, %2.8b")(arg1, arg2);
5760   ASSERT_EQ(res, MakeUInt128(0x05c812902ad00876ULL, 0x48801d16010e1d90ULL));
5761 }
5762 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8Upper)5763 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8Upper) {
5764   __uint128_t arg1 = MakeUInt128(0x9709683408005355ULL, 0x9849175417381883ULL);
5765   __uint128_t arg2 = MakeUInt128(0x9994469748676265ULL, 0x5165827658483588ULL);
5766   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
5767   ASSERT_EQ(res, MakeUInt128(0x07e80fc004f84598ULL, 0x30181ccd0bae26b8ULL));
5768 }
5769 
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8IndexedElem)5770 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8IndexedElem) {
5771   __uint128_t arg1 = MakeUInt128(0x9293459588970695ULL, 0x3653494060340216ULL);
5772   __uint128_t arg2 = MakeUInt128(0x6544375589004563ULL, 0x2882250545255640ULL);
5773   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull %0.4s, %1.4h, %2.h[2]")(arg1, arg2);
5774   ASSERT_EQ(res, MakeUInt128(0xe630cb23016c3279ULL, 0xe8593fcf0f0a1d79ULL));
5775 }
5776 
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8IndexedElemUpper)5777 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8IndexedElemUpper) {
5778   __uint128_t arg1 = MakeUInt128(0x9279068212073883ULL, 0x7781423356282360ULL);
5779   __uint128_t arg2 = MakeUInt128(0x8963208068222468ULL, 0x0122482611771858ULL);
5780   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull2 %0.4s, %1.8h, %2.h[2]")(arg1, arg2);
5781   ASSERT_EQ(res, MakeUInt128(0x0af01400047db000ULL, 0x0f2be08008677980ULL));
5782 }
5783 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElem)5784 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElem) {
5785   __uint128_t arg1 = MakeUInt128(0x9086996033027634ULL, 0x7870810817545011ULL);
5786   __uint128_t arg2 = MakeUInt128(0x9307141223390866ULL, 0x3938339529425786ULL);
5787   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.4s, %1.4h, %2.h[2]")(arg1, arg2);
5788   ASSERT_EQ(res, MakeUInt128(0x03ffbe2409445fa8ULL, 0x0b54a16c0c0648c0ULL));
5789 }
5790 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElem2)5791 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElem2) {
5792   __uint128_t arg1 = MakeUInt128(0x9132710495478599ULL, 0x1801969678353214ULL);
5793   __uint128_t arg2 = MakeUInt128(0x6444118926063152ULL, 0x6618167443193550ULL);
5794   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.4s, %1.4h, %2.h[4]")(arg1, arg2);
5795   ASSERT_EQ(res, MakeUInt128(0x1f1659301bd26cd0ULL, 0x1e3cb9a017892540ULL));
5796 }
5797 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElemUpper)5798 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElemUpper) {
5799   __uint128_t arg1 = MakeUInt128(0x9815793678976697ULL, 0x4220575059683440ULL);
5800   __uint128_t arg2 = MakeUInt128(0x8697350201410206ULL, 0x7235850200724522ULL);
5801   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull2 %0.4s, %1.8h, %2.h[2]")(arg1, arg2);
5802   ASSERT_EQ(res, MakeUInt128(0x12833ad00ad1a880ULL, 0x0db1244012143ea0ULL));
5803 }
5804 
TEST(Arm64InsnTest,SignedMultiplyAddLongInt8x8)5805 TEST(Arm64InsnTest, SignedMultiplyAddLongInt8x8) {
5806   __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
5807   __uint128_t arg2 = MakeUInt128(0x1180643829138347ULL, 0x3546797253992623ULL);
5808   __uint128_t arg3 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
5809   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
5810   ASSERT_EQ(res, MakeUInt128(0x3b5b1ca28ec69893ULL, 0x8b7836c02ef25620ULL));
5811 }
5812 
TEST(Arm64InsnTest,SignedMultiplyAddLongInt8x8Upper)5813 TEST(Arm64InsnTest, SignedMultiplyAddLongInt8x8Upper) {
5814   __uint128_t arg1 = MakeUInt128(0x5514435021828702ULL, 0x6685610665003531ULL);
5815   __uint128_t arg2 = MakeUInt128(0x0502163182060176ULL, 0x0921798468493686ULL);
5816   __uint128_t arg3 = MakeUInt128(0x3161293727951873ULL, 0x0789726373537171ULL);
5817   __uint128_t res =
5818       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
5819   ASSERT_EQ(res, MakeUInt128(0x5a69293732c30119ULL, 0x0b1f6288a12c6e89ULL));
5820 }
5821 
TEST(Arm64InsnTest,SignedMultiplySubtractLongInt8x8)5822 TEST(Arm64InsnTest, SignedMultiplySubtractLongInt8x8) {
5823   __uint128_t arg1 = MakeUInt128(0x9662539339538092ULL, 0x2195591918188552ULL);
5824   __uint128_t arg2 = MakeUInt128(0x6780621499231727ULL, 0x6316321833989693ULL);
5825   __uint128_t arg3 = MakeUInt128(0x8075616855911752ULL, 0x9984501320671293ULL);
5826   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
5827   ASSERT_EQ(res, MakeUInt128(0x9764560f61112814ULL, 0xc42a811300a11b17ULL));
5828 }
5829 
TEST(Arm64InsnTest,SignedMultiplySubtractLongInt8x8Upper)5830 TEST(Arm64InsnTest, SignedMultiplySubtractLongInt8x8Upper) {
5831   __uint128_t arg1 = MakeUInt128(0x9826903089111856ULL, 0x8798692947051352ULL);
5832   __uint128_t arg2 = MakeUInt128(0x4816091743243015ULL, 0x3836847072928989ULL);
5833   __uint128_t arg3 = MakeUInt128(0x8284602223730145ULL, 0x2655679898627767ULL);
5834   __uint128_t res =
5835       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
5836   ASSERT_EQ(res, MakeUInt128(0x62e662482c482763ULL, 0x40cd7d88cb3e6577ULL));
5837 }
5838 
TEST(Arm64InsnTest,SignedMultiplyAddLongInt16x4)5839 TEST(Arm64InsnTest, SignedMultiplyAddLongInt16x4) {
5840   __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
5841   __uint128_t arg2 = MakeUInt128(0x1180643829138347ULL, 0x3546797253992623ULL);
5842   __uint128_t arg3 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
5843   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
5844   ASSERT_EQ(res, MakeUInt128(0x3b6bd2a28eac7893ULL, 0x8b4c38c02edab620ULL));
5845 }
5846 
TEST(Arm64InsnTest,UnsignedMultiplyAddLongInt8x8)5847 TEST(Arm64InsnTest, UnsignedMultiplyAddLongInt8x8) {
5848   __uint128_t arg1 = MakeUInt128(0x9696920253886503ULL, 0x4577183176686885ULL);
5849   __uint128_t arg2 = MakeUInt128(0x9236814884752764ULL, 0x9846882194973972ULL);
5850   __uint128_t arg3 = MakeUInt128(0x9707737187188400ULL, 0x4143231276365048ULL);
5851   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
5852   ASSERT_EQ(res, MakeUInt128(0xc1d3b199967b852cULL, 0x96cf42b6bfc850d8ULL));
5853 }
5854 
TEST(Arm64InsnTest,UnsignedMultiplyAddLongInt8x8Upper)5855 TEST(Arm64InsnTest, UnsignedMultiplyAddLongInt8x8Upper) {
5856   __uint128_t arg1 = MakeUInt128(0x9055637695252326ULL, 0x5361442478023082ULL);
5857   __uint128_t arg2 = MakeUInt128(0x6811831037735887ULL, 0x0892406130313364ULL);
5858   __uint128_t arg3 = MakeUInt128(0x7737101162821461ULL, 0x4661679404090518ULL);
5859   __uint128_t res =
5860       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
5861   ASSERT_EQ(res, MakeUInt128(0x8db710736c124729ULL, 0x48f99ee6150912bcULL));
5862 }
5863 
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongInt8x8)5864 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongInt8x8) {
5865   __uint128_t arg1 = MakeUInt128(0x4577772457520386ULL, 0x5437542828256714ULL);
5866   __uint128_t arg2 = MakeUInt128(0x1288583454443513ULL, 0x2562054464241011ULL);
5867   __uint128_t arg3 = MakeUInt128(0x0379554641905811ULL, 0x6862305964476958ULL);
5868   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
5869   ASSERT_EQ(res, MakeUInt128(0xe6ed3f7e40f14e1fULL, 0x6388f1213b5f6208ULL));
5870 }
5871 
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongInt8x8Upper)5872 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongInt8x8Upper) {
5873   __uint128_t arg1 = MakeUInt128(0x4739376564336319ULL, 0x7978680367187307ULL);
5874   __uint128_t arg2 = MakeUInt128(0x9693924236321448ULL, 0x4503547763156702ULL);
5875   __uint128_t arg3 = MakeUInt128(0x5539006542311792ULL, 0x0153464977929066ULL);
5876   __uint128_t res =
5877       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
5878   ASSERT_EQ(res, MakeUInt128(0x2d64fe6d13ec1784ULL, 0xe0b644e155728f01ULL));
5879 }
5880 
TEST(Arm64InsnTest,SignedShiftLeftInt64x1)5881 TEST(Arm64InsnTest, SignedShiftLeftInt64x1) {
5882   constexpr auto AsmSshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sshl %d0, %d1, %d2");
5883   __uint128_t arg = MakeUInt128(0x9007497297363549ULL, 0x6453328886984406ULL);
5884   ASSERT_EQ(AsmSshl(arg, -65), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
5885   ASSERT_EQ(AsmSshl(arg, -64), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
5886   ASSERT_EQ(AsmSshl(arg, -63), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
5887   ASSERT_EQ(AsmSshl(arg, -1), MakeUInt128(0xc803a4b94b9b1aa4ULL, 0x0000000000000000ULL));
5888   ASSERT_EQ(AsmSshl(arg, 0), MakeUInt128(0x9007497297363549ULL, 0x0000000000000000ULL));
5889   ASSERT_EQ(AsmSshl(arg, 1), MakeUInt128(0x200e92e52e6c6a92ULL, 0x0000000000000000ULL));
5890   ASSERT_EQ(AsmSshl(arg, 63), MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
5891   ASSERT_EQ(AsmSshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5892   ASSERT_EQ(AsmSshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5893 }
5894 
TEST(Arm64InsnTest,SignedRoundingShiftLeftInt64x1)5895 TEST(Arm64InsnTest, SignedRoundingShiftLeftInt64x1) {
5896   constexpr auto AsmSrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srshl %d0, %d1, %d2");
5897   __uint128_t arg = MakeUInt128(0x9276457931065792ULL, 0x2955249887275846ULL);
5898   ASSERT_EQ(AsmSrshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5899   ASSERT_EQ(AsmSrshl(arg, -64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5900   ASSERT_EQ(AsmSrshl(arg, -63), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
5901   ASSERT_EQ(AsmSrshl(arg, -1), MakeUInt128(0xc93b22bc98832bc9ULL, 0x0000000000000000ULL));
5902   ASSERT_EQ(AsmSrshl(arg, 0), MakeUInt128(0x9276457931065792ULL, 0x0000000000000000ULL));
5903   ASSERT_EQ(AsmSrshl(arg, 1), MakeUInt128(0x24ec8af2620caf24ULL, 0x0000000000000000ULL));
5904   ASSERT_EQ(AsmSrshl(arg, 63), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5905   ASSERT_EQ(AsmSrshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5906   ASSERT_EQ(AsmSrshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5907 }
5908 
TEST(Arm64InsnTest,UnsignedShiftLeftInt64x1)5909 TEST(Arm64InsnTest, UnsignedShiftLeftInt64x1) {
5910   constexpr auto AsmUshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ushl %d0, %d1, %d2");
5911   __uint128_t arg = MakeUInt128(0x9138296682468185ULL, 0x7103188790652870ULL);
5912   ASSERT_EQ(AsmUshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5913   ASSERT_EQ(AsmUshl(arg, -64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5914   ASSERT_EQ(AsmUshl(arg, -63), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
5915   ASSERT_EQ(AsmUshl(arg, -1), MakeUInt128(0x489c14b3412340c2ULL, 0x0000000000000000ULL));
5916   ASSERT_EQ(AsmUshl(arg, 0), MakeUInt128(0x9138296682468185ULL, 0x0000000000000000ULL));
5917   ASSERT_EQ(AsmUshl(arg, 1), MakeUInt128(0x227052cd048d030aULL, 0x0000000000000000ULL));
5918   ASSERT_EQ(AsmUshl(arg, 63), MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
5919   ASSERT_EQ(AsmUshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5920   ASSERT_EQ(AsmUshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5921 }
5922 
TEST(Arm64InsnTest,UnsignedRoundingShiftLeftInt64x1)5923 TEST(Arm64InsnTest, UnsignedRoundingShiftLeftInt64x1) {
5924   constexpr auto AsmUrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urshl %d0, %d1, %d2");
5925   __uint128_t arg = MakeUInt128(0x9023452924407736ULL, 0x5949563051007421ULL);
5926   ASSERT_EQ(AsmUrshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5927   ASSERT_EQ(AsmUrshl(arg, -64), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
5928   ASSERT_EQ(AsmUrshl(arg, -63), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
5929   ASSERT_EQ(AsmUrshl(arg, -1), MakeUInt128(0x4811a29492203b9bULL, 0x0000000000000000ULL));
5930   ASSERT_EQ(AsmUrshl(arg, 0), MakeUInt128(0x9023452924407736ULL, 0x0000000000000000ULL));
5931   ASSERT_EQ(AsmUrshl(arg, 1), MakeUInt128(0x20468a524880ee6cULL, 0x0000000000000000ULL));
5932   ASSERT_EQ(AsmUrshl(arg, 63), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5933   ASSERT_EQ(AsmUrshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5934   ASSERT_EQ(AsmUrshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
5935 }
5936 
TEST(Arm64InsnTest,SignedShiftLeftInt16x8)5937 TEST(Arm64InsnTest, SignedShiftLeftInt16x8) {
5938   constexpr auto AsmSshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sshl %0.8h, %1.8h, %2.8h");
5939   __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
5940   __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
5941   ASSERT_EQ(AsmSshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0xccccffffffffffffULL));
5942   ASSERT_EQ(AsmSshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
5943 }
5944 
TEST(Arm64InsnTest,SignedRoundingShiftLeftInt16x8)5945 TEST(Arm64InsnTest, SignedRoundingShiftLeftInt16x8) {
5946   constexpr auto AsmSrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srshl %0.8h, %1.8h, %2.8h");
5947   __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
5948   __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
5949   ASSERT_EQ(AsmSrshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0xcccdffff00000000ULL));
5950   ASSERT_EQ(AsmSrshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
5951 }
5952 
TEST(Arm64InsnTest,UnsignedShiftLeftInt16x8)5953 TEST(Arm64InsnTest, UnsignedShiftLeftInt16x8) {
5954   constexpr auto AsmUshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ushl %0.8h, %1.8h, %2.8h");
5955   __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
5956   __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
5957   ASSERT_EQ(AsmUshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0x4ccc000100000000ULL));
5958   ASSERT_EQ(AsmUshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
5959 }
5960 
TEST(Arm64InsnTest,UnsignedRoundingShiftLeftInt16x8)5961 TEST(Arm64InsnTest, UnsignedRoundingShiftLeftInt16x8) {
5962   constexpr auto AsmUrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urshl %0.8h, %1.8h, %2.8h");
5963   __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
5964   __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
5965   ASSERT_EQ(AsmUrshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0x4ccd000100010000ULL));
5966   ASSERT_EQ(AsmUrshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
5967 }
5968 
TEST(Arm64InsnTest,UnsignedReciprocalSquareRootEstimateInt32x4)5969 TEST(Arm64InsnTest, UnsignedReciprocalSquareRootEstimateInt32x4) {
5970   __uint128_t arg = MakeUInt128(0x9641122821407533ULL, 0x0265510042410489ULL);
5971   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ursqrte %0.4s, %1.4s")(arg);
5972   ASSERT_EQ(res, MakeUInt128(0xa7000000ffffffffULL, 0xfffffffffb800000ULL));
5973 }
5974 
TEST(Arm64InsnTest,UnsignedReciprocalEstimateInt32x4)5975 TEST(Arm64InsnTest, UnsignedReciprocalEstimateInt32x4) {
5976   __uint128_t arg = MakeUInt128(0x9714864899468611ULL, 0x2476054286734367ULL);
5977   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urecpe %0.4s, %1.4s")(arg);
5978   ASSERT_EQ(res, MakeUInt128(0xd8800000d6000000ULL, 0xfffffffff4000000ULL));
5979 }
5980 
IsQcBitSet(uint32_t fpsr)5981 bool IsQcBitSet(uint32_t fpsr) {
5982   return (fpsr & kFpsrQcBit) != 0;
5983 }
5984 
TEST(Arm64InsnTest,SignedSaturatingAddInt64x1)5985 TEST(Arm64InsnTest, SignedSaturatingAddInt64x1) {
5986   constexpr auto AsmSqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqadd %d0, %d2, %d3");
5987 
5988   __uint128_t arg1 = MakeUInt128(0x4342527753119724ULL, 0x7430873043619511ULL);
5989   __uint128_t arg2 = MakeUInt128(0x3961190800302558ULL, 0x7838764420608504ULL);
5990   auto [res1, fpsr1] = AsmSqadd(arg1, arg2);
5991   ASSERT_EQ(res1, MakeUInt128(0x7ca36b7f5341bc7cULL, 0x0000000000000000ULL));
5992   ASSERT_FALSE(IsQcBitSet(fpsr1));
5993 
5994   __uint128_t arg3 = MakeUInt128(0x2557185308919284ULL, 0x4038050710300647ULL);
5995   __uint128_t arg4 = MakeUInt128(0x7684786324319100ULL, 0x0223929785255372ULL);
5996   auto [res2, fpsr2] = AsmSqadd(arg3, arg4);
5997   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
5998   ASSERT_TRUE(IsQcBitSet(fpsr2));
5999 }
6000 
TEST(Arm64InsnTest,SignedSaturatingAddInt32x4)6001 TEST(Arm64InsnTest, SignedSaturatingAddInt32x4) {
6002   constexpr auto AsmSqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqadd %0.4s, %2.4s, %3.4s");
6003 
6004   __uint128_t arg1 = MakeUInt128(0x9883554445602495ULL, 0x5666843660292219ULL);
6005   __uint128_t arg2 = MakeUInt128(0x5124830910605377ULL, 0x2019802183101032ULL);
6006   auto [res1, fpsr1] = AsmSqadd(arg1, arg2);
6007   ASSERT_EQ(res1, MakeUInt128(0xe9a7d84d55c0780cULL, 0x76800457e339324bULL));
6008   ASSERT_FALSE(IsQcBitSet(fpsr1));
6009 
6010   __uint128_t arg3 = MakeUInt128(0x9713308844617410ULL, 0x7959162511714864ULL);
6011   __uint128_t arg4 = MakeUInt128(0x8744686112476054ULL, 0x2867343670904667ULL);
6012   auto [res2, fpsr2] = AsmSqadd(arg3, arg4);
6013   ASSERT_EQ(res2, MakeUInt128(0x8000000056a8d464ULL, 0x7fffffff7fffffffULL));
6014   ASSERT_TRUE(IsQcBitSet(fpsr2));
6015 }
6016 
TEST(Arm64InsnTest,UnsignedSaturatingAddInt8x1)6017 TEST(Arm64InsnTest, UnsignedSaturatingAddInt8x1) {
6018   constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %b0, %b2, %b3");
6019 
6020   __uint128_t arg1 = MakeUInt128(0x6017174229960273ULL, 0x5310276871944944ULL);
6021   __uint128_t arg2 = MakeUInt128(0x4917939785144631ULL, 0x5973144353518504ULL);
6022   auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6023   ASSERT_EQ(res1, MakeUInt128(0x00000000000000a4ULL, 0x0000000000000000ULL));
6024   ASSERT_FALSE(IsQcBitSet(fpsr1));
6025 
6026   __uint128_t arg3 = MakeUInt128(0x3306263695626490ULL, 0x9108276271159038ULL);
6027   __uint128_t arg4 = MakeUInt128(0x5699505124652999ULL, 0x6062855443838330ULL);
6028   auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6029   ASSERT_EQ(res2, MakeUInt128(0x00000000000000ffULL, 0x0000000000000000ULL));
6030   ASSERT_TRUE(IsQcBitSet(fpsr2));
6031 }
6032 
TEST(Arm64InsnTest,UnsignedSaturatingAddInt64x1)6033 TEST(Arm64InsnTest, UnsignedSaturatingAddInt64x1) {
6034   constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %d0, %d2, %d3");
6035 
6036   __uint128_t arg1 = MakeUInt128(0x0606885137234627ULL, 0x0799732723313469ULL);
6037   __uint128_t arg2 = MakeUInt128(0x3971456285542615ULL, 0x4676506324656766ULL);
6038   auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6039   ASSERT_EQ(res1, MakeUInt128(0x3f77cdb3bc776c3cULL, 0x0000000000000000ULL));
6040   ASSERT_FALSE(IsQcBitSet(fpsr1));
6041 
6042   __uint128_t arg3 = MakeUInt128(0x9534957018600154ULL, 0x1262396228641389ULL);
6043   __uint128_t arg4 = MakeUInt128(0x7796733329070567ULL, 0x3769621564981845ULL);
6044   auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6045   ASSERT_EQ(res2, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6046   ASSERT_TRUE(IsQcBitSet(fpsr2));
6047 }
6048 
TEST(Arm64InsnTest,UnsignedSaturatingAddInt32x4)6049 TEST(Arm64InsnTest, UnsignedSaturatingAddInt32x4) {
6050   constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %0.4s, %2.4s, %3.4s");
6051 
6052   __uint128_t arg1 = MakeUInt128(0x9737425700735921ULL, 0x0031541508936793ULL);
6053   __uint128_t arg2 = MakeUInt128(0x0081699805365202ULL, 0x7600727749674584ULL);
6054   auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6055   ASSERT_EQ(res1, MakeUInt128(0x97b8abef05a9ab23ULL, 0x7631c68c51faad17ULL));
6056   ASSERT_FALSE(IsQcBitSet(fpsr1));
6057 
6058   __uint128_t arg3 = MakeUInt128(0x9727856471983963ULL, 0x0878154322116691ULL);
6059   __uint128_t arg4 = MakeUInt128(0x8654522268126887ULL, 0x2684459684424161ULL);
6060   auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6061   ASSERT_EQ(res2, MakeUInt128(0xffffffffd9aaa1eaULL, 0x2efc5ad9a653a7f2ULL));
6062   ASSERT_TRUE(IsQcBitSet(fpsr2));
6063 }
6064 
TEST(Arm64InsnTest,SignedSaturatingSubtractInt32x1)6065 TEST(Arm64InsnTest, SignedSaturatingSubtractInt32x1) {
6066   constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %s0, %s2, %s3");
6067 
6068   __uint128_t arg1 = MakeUInt128(0x3178534870760322ULL, 0x1982970579751191ULL);
6069   __uint128_t arg2 = MakeUInt128(0x4405109942358830ULL, 0x3454635349234982ULL);
6070   auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6071   ASSERT_EQ(res1, MakeUInt128(0x2e407af2ULL, 0U));
6072   ASSERT_FALSE(IsQcBitSet(fpsr1));
6073 
6074   __uint128_t arg3 = MakeUInt128(0x1423696483086410ULL, 0x2592887457999322ULL);
6075   __uint128_t arg4 = MakeUInt128(0x3749551912219519ULL, 0x0342445230753513ULL);
6076   auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6077   ASSERT_EQ(res2, MakeUInt128(0x80000000ULL, 0U));
6078   ASSERT_TRUE(IsQcBitSet(fpsr2));
6079 
6080   __uint128_t arg5 = MakeUInt128(0x3083508879584152ULL, 0x1489912761065137ULL);
6081   __uint128_t arg6 = MakeUInt128(0x4153943580721139ULL, 0x0328574918769094ULL);
6082   auto [res3, fpsr3] = AsmSqsub(arg5, arg6);
6083   ASSERT_EQ(res3, MakeUInt128(0x7fffffffULL, 0U));
6084   ASSERT_TRUE(IsQcBitSet(fpsr3));
6085 }
6086 
TEST(Arm64InsnTest,SignedSaturatingSubtractInt64x1)6087 TEST(Arm64InsnTest, SignedSaturatingSubtractInt64x1) {
6088   constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %d0, %d2, %d3");
6089 
6090   __uint128_t arg1 = MakeUInt128(0x4416125223196943ULL, 0x4712064173754912ULL);
6091   __uint128_t arg2 = MakeUInt128(0x1635700857369439ULL, 0x7305979709719726ULL);
6092   auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6093   ASSERT_EQ(res1, MakeUInt128(0x2de0a249cbe2d50aULL, 0x0000000000000000ULL));
6094   ASSERT_FALSE(IsQcBitSet(fpsr1));
6095 
6096   __uint128_t arg3 = MakeUInt128(0x7862766490242516ULL, 0x1990277471090335ULL);
6097   __uint128_t arg4 = MakeUInt128(0x9333093049483805ULL, 0x9785662884478744ULL);
6098   auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6099   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6100   ASSERT_TRUE(IsQcBitSet(fpsr2));
6101 }
6102 
TEST(Arm64InsnTest,SignedSaturatingSubtractInt32x4)6103 TEST(Arm64InsnTest, SignedSaturatingSubtractInt32x4) {
6104   constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %0.4s, %2.4s, %3.4s");
6105 
6106   __uint128_t arg1 = MakeUInt128(0x4485680977569630ULL, 0x3129588719161129ULL);
6107   __uint128_t arg2 = MakeUInt128(0x2946818849363386ULL, 0x4739274760122696ULL);
6108   auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6109   ASSERT_EQ(res1, MakeUInt128(0x1b3ee6812e2062aaULL, 0xe9f03140b903ea93ULL));
6110   ASSERT_FALSE(IsQcBitSet(fpsr1));
6111 
6112   __uint128_t arg3 = MakeUInt128(0x9304127100727784ULL, 0x9301555038895360ULL);
6113   __uint128_t arg4 = MakeUInt128(0x3382619293437970ULL, 0x8187432094991415ULL);
6114   auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6115   ASSERT_EQ(res2, MakeUInt128(0x800000006d2efe14ULL, 0x117a12307fffffffULL));
6116   ASSERT_TRUE(IsQcBitSet(fpsr2));
6117 }
6118 
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt32x1)6119 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt32x1) {
6120   constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %s0, %s2, %s3");
6121 
6122   __uint128_t arg1 = MakeUInt128(0x2548156091372812ULL, 0x8406333039373562ULL);
6123   __uint128_t arg2 = MakeUInt128(0x4200160456645574ULL, 0x1458816605216660ULL);
6124   auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6125   ASSERT_EQ(res1, MakeUInt128(0x3ad2d29eULL, 0U));
6126   ASSERT_FALSE(IsQcBitSet(fpsr1));
6127 
6128   __uint128_t arg3 = MakeUInt128(0x1259960281839309ULL, 0x5487090590738613ULL);
6129   __uint128_t arg4 = MakeUInt128(0x5191459181951029ULL, 0x7327875571049729ULL);
6130   auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6131   ASSERT_EQ(res2, MakeUInt128(0U, 0U));
6132   ASSERT_TRUE(IsQcBitSet(fpsr2));
6133 }
6134 
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt64x1)6135 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt64x1) {
6136   constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %d0, %d2, %d3");
6137 
6138   __uint128_t arg1 = MakeUInt128(0x9691077542576474ULL, 0x8832534141213280ULL);
6139   __uint128_t arg2 = MakeUInt128(0x0626717094009098ULL, 0x2235296579579978ULL);
6140   auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6141   ASSERT_EQ(res1, MakeUInt128(0x906a9604ae56d3dcULL, 0x0000000000000000ULL));
6142   ASSERT_FALSE(IsQcBitSet(fpsr1));
6143 
6144   __uint128_t arg3 = MakeUInt128(0x7752929106925043ULL, 0x2614469501098610ULL);
6145   __uint128_t arg4 = MakeUInt128(0x8889991465855188ULL, 0x1873582528164302ULL);
6146   auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6147   ASSERT_EQ(res2, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6148   ASSERT_TRUE(IsQcBitSet(fpsr2));
6149 }
6150 
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt32x4)6151 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt32x4) {
6152   constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %0.4s, %2.4s, %3.4s");
6153 
6154   __uint128_t arg1 = MakeUInt128(0x6884962578665885ULL, 0x9991798675205545ULL);
6155   __uint128_t arg2 = MakeUInt128(0x5809900455646117ULL, 0x8755249370124553ULL);
6156   auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6157   ASSERT_EQ(res1, MakeUInt128(0x107b06212301f76eULL, 0x123c54f3050e0ff2ULL));
6158   ASSERT_FALSE(IsQcBitSet(fpsr1));
6159 
6160   __uint128_t arg3 = MakeUInt128(0x5032678340586301ULL, 0x9301932429963972ULL);
6161   __uint128_t arg4 = MakeUInt128(0x0444517928812285ULL, 0x4478211953530898ULL);
6162   auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6163   ASSERT_EQ(res2, MakeUInt128(0x4bee160a17d7407cULL, 0x4e89720b00000000ULL));
6164   ASSERT_TRUE(IsQcBitSet(fpsr2));
6165 }
6166 
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt8x1)6167 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt8x1) {
6168   constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %b0, %b2");
6169 
6170   __uint128_t arg1 = MakeUInt128(0x8918016855727981ULL, 0x5642185819119749ULL);
6171   auto [res1, fpsr1] = AsmSqabs(arg1);
6172   ASSERT_EQ(res1, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6173   ASSERT_FALSE(IsQcBitSet(fpsr1));
6174 
6175   __uint128_t arg2 = MakeUInt128(0x0000000000000080ULL, 0x6464607287574305ULL);
6176   auto [res2, fpsr2] = AsmSqabs(arg2);
6177   ASSERT_EQ(res2, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6178   ASSERT_TRUE(IsQcBitSet(fpsr2));
6179 }
6180 
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt64x1)6181 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt64x1) {
6182   constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %d0, %d2");
6183 
6184   __uint128_t arg1 = MakeUInt128(0x9717317281315179ULL, 0x3290443112181587ULL);
6185   auto [res1, fpsr1] = AsmSqabs(arg1);
6186   ASSERT_EQ(res1, MakeUInt128(0x68e8ce8d7eceae87ULL, 0x0000000000000000ULL));
6187   ASSERT_FALSE(IsQcBitSet(fpsr1));
6188 
6189   __uint128_t arg2 = MakeUInt128(0x8000000000000000ULL, 0x1001237687219447ULL);
6190   auto [res2, fpsr2] = AsmSqabs(arg2);
6191   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6192   ASSERT_TRUE(IsQcBitSet(fpsr2));
6193 }
6194 
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt32x4)6195 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt32x4) {
6196   constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %0.4s, %2.4s");
6197 
6198   __uint128_t arg1 = MakeUInt128(0x9133820578492800ULL, 0x6982551957402018ULL);
6199   auto [res1, fpsr1] = AsmSqabs(arg1);
6200   ASSERT_EQ(res1, MakeUInt128(0x6ecc7dfb78492800ULL, 0x6982551957402018ULL));
6201   ASSERT_FALSE(IsQcBitSet(fpsr1));
6202 
6203   __uint128_t arg2 = MakeUInt128(0x1810564129725083ULL, 0x6070356880000000ULL);
6204   auto [res2, fpsr2] = AsmSqabs(arg2);
6205   ASSERT_EQ(res2, MakeUInt128(0x1810564129725083ULL, 0x607035687fffffffULL));
6206   ASSERT_TRUE(IsQcBitSet(fpsr2));
6207 }
6208 
TEST(Arm64InsnTest,SignedSaturatingNegateInt32x1)6209 TEST(Arm64InsnTest, SignedSaturatingNegateInt32x1) {
6210   constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %s0, %s2");
6211 
6212   __uint128_t arg1 = MakeUInt128(0x6461582694563802ULL, 0x3950283712168644ULL);
6213   auto [res1, fpsr1] = AsmSqneg(arg1);
6214   ASSERT_EQ(res1, MakeUInt128(0x000000006ba9c7feULL, 0x0000000000000000ULL));
6215   ASSERT_FALSE(IsQcBitSet(fpsr1));
6216 
6217   __uint128_t arg2 = MakeUInt128(0x6561785280000000ULL, 0x1277128269186886ULL);
6218   auto [res2, fpsr2] = AsmSqneg(arg2);
6219   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6220   ASSERT_TRUE(IsQcBitSet(fpsr2));
6221 }
6222 
TEST(Arm64InsnTest,SignedSaturatingNegateInt64x1)6223 TEST(Arm64InsnTest, SignedSaturatingNegateInt64x1) {
6224   constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %d0, %d2");
6225 
6226   __uint128_t arg1 = MakeUInt128(0x9703600795698276ULL, 0x2639234410714658ULL);
6227   auto [res1, fpsr1] = AsmSqneg(arg1);
6228   ASSERT_EQ(res1, MakeUInt128(0x68fc9ff86a967d8aULL, 0x0000000000000000ULL));
6229   ASSERT_FALSE(IsQcBitSet(fpsr1));
6230 
6231   __uint128_t arg2 = MakeUInt128(0x8000000000000000ULL, 0x4052295369374997ULL);
6232   auto [res2, fpsr2] = AsmSqneg(arg2);
6233   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6234   ASSERT_TRUE(IsQcBitSet(fpsr2));
6235 }
6236 
TEST(Arm64InsnTest,SignedSaturatingNegateInt32x4)6237 TEST(Arm64InsnTest, SignedSaturatingNegateInt32x4) {
6238   constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %0.4s, %2.4s");
6239 
6240   __uint128_t arg1 = MakeUInt128(0x9172320202822291ULL, 0x4886959399729974ULL);
6241   auto [res1, fpsr1] = AsmSqneg(arg1);
6242   ASSERT_EQ(res1, MakeUInt128(0x6e8dcdfefd7ddd6fULL, 0xb7796a6d668d668cULL));
6243   ASSERT_FALSE(IsQcBitSet(fpsr1));
6244 
6245   __uint128_t arg2 = MakeUInt128(0x2974711553718589ULL, 0x2423849380000000ULL);
6246   auto [res2, fpsr2] = AsmSqneg(arg2);
6247   ASSERT_EQ(res2, MakeUInt128(0xd68b8eebac8e7a77ULL, 0xdbdc7b6d7fffffffULL));
6248   ASSERT_TRUE(IsQcBitSet(fpsr2));
6249 }
6250 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt32x1)6251 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt32x1) {
6252   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %s0, %s2, #20");
6253 
6254   __uint128_t arg1 = MakeUInt128(0x9724611600000181ULL, 0x0003509892864120ULL);
6255   auto [res1, fpsr1] = AsmSqshl(arg1);
6256   ASSERT_EQ(res1, MakeUInt128(0x0000000018100000ULL, 0x0000000000000000ULL));
6257   ASSERT_FALSE(IsQcBitSet(fpsr1));
6258 
6259   __uint128_t arg2 = MakeUInt128(0x4195163551108763ULL, 0x2042676129798265ULL);
6260   auto [res2, fpsr2] = AsmSqshl(arg2);
6261   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6262   ASSERT_TRUE(IsQcBitSet(fpsr2));
6263 }
6264 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt64x1)6265 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt64x1) {
6266   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %d0, %d2, #28");
6267 
6268   __uint128_t arg1 = MakeUInt128(0x0000000774000539ULL, 0x2622760323659751ULL);
6269   auto [res1, fpsr1] = AsmSqshl(arg1);
6270   ASSERT_EQ(res1, MakeUInt128(0x7740005390000000ULL, 0x0000000000000000ULL));
6271   ASSERT_FALSE(IsQcBitSet(fpsr1));
6272 
6273   __uint128_t arg2 = MakeUInt128(0x9938714995449137ULL, 0x3020518436690767ULL);
6274   auto [res2, fpsr2] = AsmSqshl(arg2);
6275   ASSERT_EQ(res2, MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
6276   ASSERT_TRUE(IsQcBitSet(fpsr2));
6277 }
6278 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt32x4)6279 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt32x4) {
6280   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %0.4s, %2.4s, #12");
6281 
6282   __uint128_t arg1 = MakeUInt128(0x0007256800042011ULL, 0x0000313500033555ULL);
6283   auto [res1, fpsr1] = AsmSqshl(arg1);
6284   ASSERT_EQ(res1, MakeUInt128(0x7256800042011000ULL, 0x0313500033555000ULL));
6285   ASSERT_FALSE(IsQcBitSet(fpsr1));
6286 
6287   __uint128_t arg2 = MakeUInt128(0x0944031900072034ULL, 0x8651010561049872ULL);
6288   auto [res2, fpsr2] = AsmSqshl(arg2);
6289   ASSERT_EQ(res2, MakeUInt128(0x7fffffff72034000ULL, 0x800000007fffffffULL));
6290   ASSERT_TRUE(IsQcBitSet(fpsr2));
6291 }
6292 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftByRegisterImmInt32x1)6293 TEST(Arm64InsnTest, SignedSaturatingShiftLeftByRegisterImmInt32x1) {
6294   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqshl %s0, %s2, %s3");
6295 
6296   __uint128_t res;
6297   uint32_t fpsr;
6298   __uint128_t arg1 = MakeUInt128(0x7480771811555330ULL, 0x9098870255052076ULL);
6299 
6300   std::tie(res, fpsr) = AsmSqshl(arg1, -33);
6301   ASSERT_EQ(res, MakeUInt128(0U, 0U));
6302   ASSERT_FALSE(IsQcBitSet(fpsr));
6303 
6304   std::tie(res, fpsr) = AsmSqshl(arg1, -32);
6305   ASSERT_EQ(res, MakeUInt128(0U, 0U));
6306   ASSERT_FALSE(IsQcBitSet(fpsr));
6307 
6308   std::tie(res, fpsr) = AsmSqshl(arg1, -31);
6309   ASSERT_EQ(res, MakeUInt128(0U, 0U));
6310   ASSERT_FALSE(IsQcBitSet(fpsr));
6311 
6312   std::tie(res, fpsr) = AsmSqshl(arg1, -1);
6313   ASSERT_EQ(res, MakeUInt128(0x08aaa998ULL, 0U));
6314   ASSERT_FALSE(IsQcBitSet(fpsr));
6315 
6316   std::tie(res, fpsr) = AsmSqshl(arg1, 0);
6317   ASSERT_EQ(res, MakeUInt128(0x11555330ULL, 0U));
6318   ASSERT_FALSE(IsQcBitSet(fpsr));
6319 
6320   std::tie(res, fpsr) = AsmSqshl(arg1, 1);
6321   ASSERT_EQ(res, MakeUInt128(0x22aaa660ULL, 0U));
6322   ASSERT_FALSE(IsQcBitSet(fpsr));
6323 
6324   std::tie(res, fpsr) = AsmSqshl(arg1, 31);
6325   ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
6326   ASSERT_TRUE(IsQcBitSet(fpsr));
6327 
6328   std::tie(res, fpsr) = AsmSqshl(arg1, 32);
6329   ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
6330   ASSERT_TRUE(IsQcBitSet(fpsr));
6331 
6332   std::tie(res, fpsr) = AsmSqshl(arg1, 33);
6333   ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
6334   ASSERT_TRUE(IsQcBitSet(fpsr));
6335 }
6336 
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftImmInt64x1)6337 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftImmInt64x1) {
6338   constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshl %d0, %d2, #28");
6339 
6340   __uint128_t arg1 = MakeUInt128(0x0000000961573564ULL, 0x8883443185280853ULL);
6341   auto [res1, fpsr1] = AsmUqshl(arg1);
6342   ASSERT_EQ(res1, MakeUInt128(0x9615735640000000ULL, 0x0000000000000000ULL));
6343   ASSERT_FALSE(IsQcBitSet(fpsr1));
6344 
6345   __uint128_t arg2 = MakeUInt128(0x9759277344336553ULL, 0x8418834030351782ULL);
6346   auto [res2, fpsr2] = AsmUqshl(arg2);
6347   ASSERT_EQ(res2, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6348   ASSERT_TRUE(IsQcBitSet(fpsr2));
6349 }
6350 
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftImmInt32x4)6351 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftImmInt32x4) {
6352   constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshl %0.4s, %2.4s, #12");
6353 
6354   __uint128_t arg1 = MakeUInt128(0x0000326300096218ULL, 0x0004565900066853ULL);
6355   auto [res1, fpsr1] = AsmUqshl(arg1);
6356   ASSERT_EQ(res1, MakeUInt128(0x0326300096218000ULL, 0x4565900066853000ULL));
6357   ASSERT_FALSE(IsQcBitSet(fpsr1));
6358 
6359   __uint128_t arg2 = MakeUInt128(0x0009911314010804ULL, 0x0009732335449090ULL);
6360   auto [res2, fpsr2] = AsmUqshl(arg2);
6361   ASSERT_EQ(res2, MakeUInt128(0x99113000ffffffffULL, 0x97323000ffffffffULL));
6362   ASSERT_TRUE(IsQcBitSet(fpsr2));
6363 }
6364 
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftByRegisterImmInt32x1)6365 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftByRegisterImmInt32x1) {
6366   constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqshl %s0, %s2, %s3");
6367 
6368   __uint128_t res;
6369   uint32_t fpsr;
6370   __uint128_t arg1 = MakeUInt128(0x9714978507414585ULL, 0x3085781339156270ULL);
6371 
6372   std::tie(res, fpsr) = AsmUqshl(arg1, -33);
6373   ASSERT_EQ(res, MakeUInt128(0U, 0U));
6374   ASSERT_FALSE(IsQcBitSet(fpsr));
6375 
6376   std::tie(res, fpsr) = AsmUqshl(arg1, -32);
6377   ASSERT_EQ(res, MakeUInt128(0U, 0U));
6378   ASSERT_FALSE(IsQcBitSet(fpsr));
6379 
6380   std::tie(res, fpsr) = AsmUqshl(arg1, -31);
6381   ASSERT_EQ(res, MakeUInt128(0U, 0U));
6382   ASSERT_FALSE(IsQcBitSet(fpsr));
6383 
6384   std::tie(res, fpsr) = AsmUqshl(arg1, -1);
6385   ASSERT_EQ(res, MakeUInt128(0x03a0a2c2ULL, 0U));
6386   ASSERT_FALSE(IsQcBitSet(fpsr));
6387 
6388   std::tie(res, fpsr) = AsmUqshl(arg1, 0);
6389   ASSERT_EQ(res, MakeUInt128(0x07414585ULL, 0U));
6390   ASSERT_FALSE(IsQcBitSet(fpsr));
6391 
6392   std::tie(res, fpsr) = AsmUqshl(arg1, 1);
6393   ASSERT_EQ(res, MakeUInt128(0x0e828b0aULL, 0U));
6394   ASSERT_FALSE(IsQcBitSet(fpsr));
6395 
6396   std::tie(res, fpsr) = AsmUqshl(arg1, 31);
6397   ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
6398   ASSERT_TRUE(IsQcBitSet(fpsr));
6399 
6400   std::tie(res, fpsr) = AsmUqshl(arg1, 32);
6401   ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
6402   ASSERT_TRUE(IsQcBitSet(fpsr));
6403 
6404   std::tie(res, fpsr) = AsmUqshl(arg1, 33);
6405   ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
6406   ASSERT_TRUE(IsQcBitSet(fpsr));
6407 }
6408 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftByRegisterImmInt16x8)6409 TEST(Arm64InsnTest, SignedSaturatingShiftLeftByRegisterImmInt16x8) {
6410   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqshl %0.8h, %2.8h, %3.8h");
6411 
6412   __uint128_t arg1 = 0U;
6413   __uint128_t arg2 = MakeUInt128(0xffdfffe0ffe1ffffULL, 0x0001001f00200021ULL);
6414   auto [res1, fpsr1] = AsmSqshl(arg1, arg2);
6415   ASSERT_EQ(res1, MakeUInt128(0U, 0U));
6416   ASSERT_FALSE(IsQcBitSet(fpsr1));
6417 
6418   __uint128_t arg3 = MakeUInt128(0x3333333333333333ULL, 0x3333333333333333ULL);
6419   auto [res2, fpsr2] = AsmSqshl(arg3, arg2);
6420   ASSERT_EQ(res2, MakeUInt128(0x0000000000001999ULL, 0x66667fff7fff7fffULL));
6421   ASSERT_TRUE(IsQcBitSet(fpsr2));
6422 }
6423 
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftByRegisterImmInt16x8)6424 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftByRegisterImmInt16x8) {
6425   constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqshl %0.8h, %2.8h, %3.8h");
6426 
6427   __uint128_t arg1 = 0U;
6428   __uint128_t arg2 = MakeUInt128(0xffdfffe0ffe1ffffULL, 0x0001001f00200021ULL);
6429   auto [res1, fpsr1] = AsmUqshl(arg1, arg2);
6430   ASSERT_EQ(res1, MakeUInt128(0U, 0U));
6431   ASSERT_FALSE(IsQcBitSet(fpsr1));
6432 
6433   __uint128_t arg3 = MakeUInt128(0x7777777777777777ULL, 0x7777777777777777ULL);
6434   auto [res2, fpsr2] = AsmUqshl(arg3, arg2);
6435   ASSERT_EQ(res2, MakeUInt128(0x0000000000003bbbULL, 0xeeeeffffffffffffULL));
6436   ASSERT_TRUE(IsQcBitSet(fpsr2));
6437 }
6438 
TEST(Arm64InsnTest,SignedSaturatingExtractNarrowInt64x2ToInt32x2)6439 TEST(Arm64InsnTest, SignedSaturatingExtractNarrowInt64x2ToInt32x2) {
6440   constexpr auto AsmSqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtn %0.2s, %2.2d");
6441 
6442   __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
6443   auto [res1, fpsr1] = AsmSqxtn(arg1);
6444   ASSERT_EQ(res1, MakeUInt128(0x800000007fffffffULL, 0x0000000000000000ULL));
6445   ASSERT_TRUE(IsQcBitSet(fpsr1));
6446 
6447   __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x000000007ecdba98LL);
6448   auto [res2, fpsr2] = AsmSqxtn(arg2);
6449   ASSERT_EQ(res2, MakeUInt128(0x7ecdba9801234567ULL, 0x0000000000000000ULL));
6450   ASSERT_FALSE(IsQcBitSet(fpsr2));
6451 }
6452 
TEST(Arm64InsnTest,SignedSaturatingExtractNarrowInt64x1ToInt32x1)6453 TEST(Arm64InsnTest, SignedSaturatingExtractNarrowInt64x1ToInt32x1) {
6454   constexpr auto AsmSqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtn %s0, %d2");
6455 
6456   __uint128_t arg1 = MakeUInt128(0x1234567812345678ULL, 0x0ULL);
6457   auto [res1, fpsr1] = AsmSqxtn(arg1);
6458   ASSERT_EQ(res1, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6459   ASSERT_TRUE(IsQcBitSet(fpsr1));
6460 
6461   __uint128_t arg2 = MakeUInt128(0x0000000012345678ULL, 0x0ULL);
6462   auto [res2, fpsr2] = AsmSqxtn(arg2);
6463   ASSERT_EQ(res2, MakeUInt128(0x0000000012345678ULL, 0x0000000000000000ULL));
6464   ASSERT_FALSE(IsQcBitSet(fpsr2));
6465 }
6466 
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrowInt64x2ToInt32x2)6467 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrowInt64x2ToInt32x2) {
6468   constexpr auto AsmUqstn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqxtn %0.2s, %2.2d");
6469 
6470   __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
6471   auto [res1, fpsr1] = AsmUqstn(arg1);
6472   ASSERT_EQ(res1, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6473   ASSERT_TRUE(IsQcBitSet(fpsr1));
6474 
6475   __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
6476   auto [res2, fpsr2] = AsmUqstn(arg2);
6477   ASSERT_EQ(res2, MakeUInt128(0xfecdba9801234567ULL, 0x0000000000000000ULL));
6478   ASSERT_FALSE(IsQcBitSet(fpsr2));
6479 }
6480 
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrowInt64x1ToInt32x1)6481 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrowInt64x1ToInt32x1) {
6482   constexpr auto AsmUqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqxtn %s0, %d2");
6483 
6484   __uint128_t arg1 = MakeUInt128(0x1234567812345678ULL, 0x0ULL);
6485   auto [res1, fpsr1] = AsmUqxtn(arg1);
6486   ASSERT_EQ(res1, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6487   ASSERT_TRUE(IsQcBitSet(fpsr1));
6488 
6489   __uint128_t arg2 = MakeUInt128(0x0000000087654321ULL, 0x0ULL);
6490   auto [res2, fpsr2] = AsmUqxtn(arg2);
6491   ASSERT_EQ(res2, MakeUInt128(0x0000000087654321ULL, 0x0000000000000000ULL));
6492   ASSERT_FALSE(IsQcBitSet(fpsr2));
6493 }
6494 
TEST(Arm64InsnTest,SignedSaturatingExtractNarrow2Int64x2ToInt32x2)6495 TEST(Arm64InsnTest, SignedSaturatingExtractNarrow2Int64x2ToInt32x2) {
6496   constexpr auto AsmSqxtn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqxtn2 %0.4s, %2.2d");
6497 
6498   __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
6499   __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
6500   auto [res1, fpsr1] = AsmSqxtn2(arg1, arg2);
6501   ASSERT_EQ(res1, MakeUInt128(0x6121865619673378ULL, 0x800000007fffffffULL));
6502   ASSERT_TRUE(IsQcBitSet(fpsr1));
6503 
6504   __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x000000007ecdba98LL);
6505   __uint128_t arg4 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
6506   auto [res2, fpsr2] = AsmSqxtn2(arg3, arg4);
6507   ASSERT_EQ(res2, MakeUInt128(0x6121865619673378ULL, 0x7ecdba9801234567ULL));
6508   ASSERT_FALSE(IsQcBitSet(fpsr2));
6509 }
6510 
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrow2Int64x2ToInt32x4)6511 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrow2Int64x2ToInt32x4) {
6512   constexpr auto AsmUqxtn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqxtn2 %0.4s, %2.2d");
6513 
6514   __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
6515   __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
6516   auto [res1, fpsr1] = AsmUqxtn2(arg1, arg2);
6517   ASSERT_EQ(res1, MakeUInt128(0x6121865619673378ULL, 0xffffffffffffffffULL));
6518   ASSERT_TRUE(IsQcBitSet(fpsr1));
6519 
6520   __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
6521   __uint128_t arg4 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
6522   auto [res2, fpsr2] = AsmUqxtn2(arg3, arg4);
6523   ASSERT_EQ(res2, MakeUInt128(0x6121865619673378ULL, 0xfecdba9801234567ULL));
6524   ASSERT_FALSE(IsQcBitSet(fpsr2));
6525 }
6526 
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrowInt64x2ToInt32x2)6527 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrowInt64x2ToInt32x2) {
6528   constexpr auto AsmSqxtun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtun %0.2s, %2.2d");
6529 
6530   __uint128_t arg1 = MakeUInt128(0x0000000044332211ULL, 0x00000001aabbccddULL);
6531   auto [res1, fpsr1] = AsmSqxtun(arg1);
6532   ASSERT_EQ(res1, MakeUInt128(0xffffffff44332211ULL, 0x0000000000000000ULL));
6533   ASSERT_TRUE(IsQcBitSet(fpsr1));
6534 
6535   __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
6536   auto [res2, fpsr2] = AsmSqxtun(arg2);
6537   ASSERT_EQ(res2, MakeUInt128(0xfecdba9801234567ULL, 0x0000000000000000ULL));
6538   ASSERT_FALSE(IsQcBitSet(fpsr2));
6539 }
6540 
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrowInt64x1ToInt32x1)6541 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrowInt64x1ToInt32x1) {
6542   constexpr auto AsmSqxtun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtun %s0, %d2");
6543 
6544   __uint128_t arg1 = MakeUInt128(0x00000001ff332211ULL, 0x0ULL);
6545   auto [res1, fpsr1] = AsmSqxtun(arg1);
6546   ASSERT_EQ(res1, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6547   ASSERT_TRUE(IsQcBitSet(fpsr1));
6548 
6549   __uint128_t arg2 = MakeUInt128(0x00000000ff332211ULL, 0x0ULL);
6550   auto [res2, fpsr2] = AsmSqxtun(arg2);
6551   ASSERT_EQ(res2, MakeUInt128(0x00000000ff332211ULL, 0x0000000000000000ULL));
6552   ASSERT_FALSE(IsQcBitSet(fpsr2));
6553 }
6554 
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrow2Int64x2ToInt32x4)6555 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrow2Int64x2ToInt32x4) {
6556   constexpr auto AsmSqxtun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqxtun2 %0.4s, %2.2d");
6557 
6558   __uint128_t arg1 = MakeUInt128(0x0000000089abcdefULL, 0xfedcba9876543210ULL);
6559   __uint128_t arg2 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
6560   auto [res1, fpsr1] = AsmSqxtun2(arg1, arg2);
6561   ASSERT_EQ(res1, MakeUInt128(0x0123456789abcdefULL, 0x0000000089abcdefULL));
6562   ASSERT_TRUE(IsQcBitSet(fpsr1));
6563 
6564   __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
6565   __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
6566   auto [res2, fpsr2] = AsmSqxtun2(arg3, arg4);
6567   ASSERT_EQ(res2, MakeUInt128(0x0123456789abcdefULL, 0xfecdba9801234567ULL));
6568   ASSERT_FALSE(IsQcBitSet(fpsr2));
6569 }
6570 
TEST(Arm64InsnTest,SignedSaturatingAccumulateOfUnsignedValueInt32x1)6571 TEST(Arm64InsnTest, SignedSaturatingAccumulateOfUnsignedValueInt32x1) {
6572   constexpr auto AsmSuqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("suqadd %s0, %s2");
6573 
6574   __uint128_t arg1 = MakeUInt128(0x9392023115638719ULL, 0x5080502467972579ULL);
6575   __uint128_t arg2 = MakeUInt128(0x2497605762625913ULL, 0x3285597263712112ULL);
6576   auto [res1, fpsr1] = AsmSuqadd(arg1, arg2);
6577   ASSERT_EQ(res1, MakeUInt128(0x0000000077c5e02cULL, 0x0000000000000000ULL));
6578   ASSERT_FALSE(IsQcBitSet(fpsr1));
6579 
6580   __uint128_t arg3 = MakeUInt128(0x9099791776687477ULL, 0x4481882870632315ULL);
6581   __uint128_t arg4 = MakeUInt128(0x5158650328981642ULL, 0x2828823274686610ULL);
6582   auto [res2, fpsr2] = AsmSuqadd(arg3, arg4);
6583   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6584   ASSERT_TRUE(IsQcBitSet(fpsr2));
6585 }
6586 
TEST(Arm64InsnTest,SignedSaturatingAccumulateOfUnsignedValueInt32x4)6587 TEST(Arm64InsnTest, SignedSaturatingAccumulateOfUnsignedValueInt32x4) {
6588   constexpr auto AsmSuqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("suqadd %0.4s, %2.4s");
6589 
6590   __uint128_t arg1 = MakeUInt128(0x2590181000350989ULL, 0x2864120419516355ULL);
6591   __uint128_t arg2 = MakeUInt128(0x1108763204267612ULL, 0x9798265294258829ULL);
6592   auto [res1, fpsr1] = AsmSuqadd(arg1, arg2);
6593   ASSERT_EQ(res1, MakeUInt128(0x36988e42045b7f9bULL, 0xbffc3856ad76eb7eULL));
6594   ASSERT_FALSE(IsQcBitSet(fpsr1));
6595 
6596   __uint128_t arg3 = MakeUInt128(0x9082888934938376ULL, 0x4393992569006040ULL);
6597   __uint128_t arg4 = MakeUInt128(0x6731142209331219ULL, 0x5936202982972351ULL);
6598   auto [res2, fpsr2] = AsmSuqadd(arg3, arg4);
6599   ASSERT_EQ(res2, MakeUInt128(0x7fffffff3dc6958fULL, 0x7fffffffeb978391ULL));
6600   ASSERT_TRUE(IsQcBitSet(fpsr2));
6601 }
6602 
TEST(Arm64InsnTest,UnsignedSaturatingAccumulateOfSignedValueInt32x1)6603 TEST(Arm64InsnTest, UnsignedSaturatingAccumulateOfSignedValueInt32x1) {
6604   constexpr auto AsmUsqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("usqadd %s0, %s2");
6605 
6606   __uint128_t arg1 = MakeUInt128(0x9052523242348615ULL, 0x3152097693846104ULL);
6607   __uint128_t arg2 = MakeUInt128(0x2582849714963475ULL, 0x3418375620030149ULL);
6608   auto [res1, fpsr1] = AsmUsqadd(arg1, arg2);
6609   ASSERT_EQ(res1, MakeUInt128(0x0000000056caba8aULL, 0x0000000000000000ULL));
6610   ASSERT_FALSE(IsQcBitSet(fpsr1));
6611 
6612   __uint128_t arg3 = MakeUInt128(0x9887125387801719ULL, 0x6071816407812484ULL);
6613   __uint128_t arg4 = MakeUInt128(0x7847257912407824ULL, 0x5443616823452395ULL);
6614   auto [res2, fpsr2] = AsmUsqadd(arg3, arg4);
6615   ASSERT_EQ(res2, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6616   ASSERT_TRUE(IsQcBitSet(fpsr2));
6617 
6618   __uint128_t arg5 = MakeUInt128(0x9708583970761645ULL, 0x8229630324424328ULL);
6619   __uint128_t arg6 = MakeUInt128(0x2377374595170285ULL, 0x6069806788952176ULL);
6620   auto [res3, fpsr3] = AsmUsqadd(arg5, arg6);
6621   ASSERT_EQ(res3, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6622   ASSERT_TRUE(IsQcBitSet(fpsr3));
6623 }
6624 
TEST(Arm64InsnTest,UnsignedSaturatingAccumulateOfSignedValueInt32x4)6625 TEST(Arm64InsnTest, UnsignedSaturatingAccumulateOfSignedValueInt32x4) {
6626   constexpr auto AsmUsqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("usqadd %0.4s, %2.4s");
6627 
6628   __uint128_t arg1 = MakeUInt128(0x4129137074982305ULL, 0x7592909166293919ULL);
6629   __uint128_t arg2 = MakeUInt128(0x5014721157586067ULL, 0x2700925477180257ULL);
6630   auto [res1, fpsr1] = AsmUsqadd(arg1, arg2);
6631   ASSERT_EQ(res1, MakeUInt128(0x913d8581cbf0836cULL, 0x9c9322e5dd413b70ULL));
6632   ASSERT_FALSE(IsQcBitSet(fpsr1));
6633 
6634   __uint128_t arg3 = MakeUInt128(0x7816422828823274ULL, 0x6866106592732197ULL);
6635   __uint128_t arg4 = MakeUInt128(0x9071623846421534ULL, 0x8985247621678905ULL);
6636   auto [res2, fpsr2] = AsmUsqadd(arg3, arg4);
6637   ASSERT_EQ(res2, MakeUInt128(0xffffffff6ec447a8ULL, 0xf1eb34db00000000ULL));
6638   ASSERT_TRUE(IsQcBitSet(fpsr2));
6639 }
6640 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftLeftInt32x1)6641 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftLeftInt32x1) {
6642   constexpr auto AsmSqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrshl %s0, %s2, %s3");
6643 
6644   __uint128_t res;
6645   uint32_t fpsr;
6646 
6647   __uint128_t arg = MakeUInt128(0x9736705435580445ULL, 0x8657202276378404ULL);
6648   std::tie(res, fpsr) = AsmSqrshl(arg, -33);
6649   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6650   ASSERT_FALSE(IsQcBitSet(fpsr));
6651 
6652   std::tie(res, fpsr) = AsmSqrshl(arg, -32);
6653   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6654   ASSERT_FALSE(IsQcBitSet(fpsr));
6655 
6656   std::tie(res, fpsr) = AsmSqrshl(arg, -31);
6657   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6658   ASSERT_FALSE(IsQcBitSet(fpsr));
6659 
6660   std::tie(res, fpsr) = AsmSqrshl(arg, -1);
6661   ASSERT_EQ(res, MakeUInt128(0x000000001aac0223ULL, 0x0000000000000000ULL));
6662   ASSERT_FALSE(IsQcBitSet(fpsr));
6663 
6664   std::tie(res, fpsr) = AsmSqrshl(arg, 0);
6665   ASSERT_EQ(res, MakeUInt128(0x0000000035580445ULL, 0x0000000000000000ULL));
6666   ASSERT_FALSE(IsQcBitSet(fpsr));
6667 
6668   std::tie(res, fpsr) = AsmSqrshl(arg, 1);
6669   ASSERT_EQ(res, MakeUInt128(0x000000006ab0088aULL, 0x0000000000000000ULL));
6670   ASSERT_FALSE(IsQcBitSet(fpsr));
6671 
6672   std::tie(res, fpsr) = AsmSqrshl(arg, 31);
6673   ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6674   ASSERT_TRUE(IsQcBitSet(fpsr));
6675 
6676   std::tie(res, fpsr) = AsmSqrshl(arg, 32);
6677   ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6678   ASSERT_TRUE(IsQcBitSet(fpsr));
6679 
6680   std::tie(res, fpsr) = AsmSqrshl(arg, 33);
6681   ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6682   ASSERT_TRUE(IsQcBitSet(fpsr));
6683 }
6684 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftLeftInt16x8)6685 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftLeftInt16x8) {
6686   constexpr auto AsmSqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrshl %0.8h, %2.8h, %3.8h");
6687 
6688   __uint128_t arg1 = MakeUInt128(0x0000000000000099ULL, 0x9999099999999999ULL);
6689   __uint128_t arg2 = MakeUInt128(0x00110010000f0001ULL, 0xfffffff1fff0ffefULL);
6690   auto [res1, fpsr1] = AsmSqrshl(arg1, arg2);
6691   ASSERT_EQ(res1, MakeUInt128(0x0000000000000132ULL, 0xcccd000000000000ULL));
6692   ASSERT_FALSE(IsQcBitSet(fpsr1));
6693 
6694   __uint128_t arg3 = MakeUInt128(0x0099009900990099ULL, 0x0099009900990099ULL);
6695   auto [res2, fpsr2] = AsmSqrshl(arg3, arg2);
6696   ASSERT_EQ(res2, MakeUInt128(0x7fff7fff7fff0132ULL, 0x004d000000000000ULL));
6697   ASSERT_TRUE(IsQcBitSet(fpsr2));
6698 }
6699 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftLeftInt32x1)6700 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftLeftInt32x1) {
6701   constexpr auto AsmUqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqrshl %s0, %s2, %s3");
6702 
6703   __uint128_t res;
6704   uint32_t fpsr;
6705 
6706   __uint128_t arg = MakeUInt128(0x9984124848262367ULL, 0x3771467226061633ULL);
6707   std::tie(res, fpsr) = AsmUqrshl(arg, -33);
6708   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6709   ASSERT_FALSE(IsQcBitSet(fpsr));
6710 
6711   std::tie(res, fpsr) = AsmUqrshl(arg, -32);
6712   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6713   ASSERT_FALSE(IsQcBitSet(fpsr));
6714 
6715   std::tie(res, fpsr) = AsmUqrshl(arg, -31);
6716   ASSERT_EQ(res, MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6717   ASSERT_FALSE(IsQcBitSet(fpsr));
6718 
6719   std::tie(res, fpsr) = AsmUqrshl(arg, -1);
6720   ASSERT_EQ(res, MakeUInt128(0x00000000241311b4ULL, 0x0000000000000000ULL));
6721   ASSERT_FALSE(IsQcBitSet(fpsr));
6722 
6723   std::tie(res, fpsr) = AsmUqrshl(arg, 0);
6724   ASSERT_EQ(res, MakeUInt128(0x0000000048262367ULL, 0x0000000000000000ULL));
6725   ASSERT_FALSE(IsQcBitSet(fpsr));
6726 
6727   std::tie(res, fpsr) = AsmUqrshl(arg, 1);
6728   ASSERT_EQ(res, MakeUInt128(0x00000000904c46ceULL, 0x0000000000000000ULL));
6729   ASSERT_FALSE(IsQcBitSet(fpsr));
6730 
6731   std::tie(res, fpsr) = AsmUqrshl(arg, 31);
6732   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6733   ASSERT_TRUE(IsQcBitSet(fpsr));
6734 
6735   std::tie(res, fpsr) = AsmUqrshl(arg, 32);
6736   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6737   ASSERT_TRUE(IsQcBitSet(fpsr));
6738 
6739   std::tie(res, fpsr) = AsmUqrshl(arg, 33);
6740   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6741   ASSERT_TRUE(IsQcBitSet(fpsr));
6742 }
6743 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftLeftInt16x8)6744 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftLeftInt16x8) {
6745   constexpr auto AsmUqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqrshl %0.8h, %2.8h, %3.8h");
6746 
6747   __uint128_t arg1 = MakeUInt128(0x0000000000000099ULL, 0x9999099999999999ULL);
6748   __uint128_t arg2 = MakeUInt128(0x00110010000f0001ULL, 0xfffffff1fff0ffefULL);
6749   auto [res1, fpsr1] = AsmUqrshl(arg1, arg2);
6750   ASSERT_EQ(res1, MakeUInt128(0x0000000000000132ULL, 0x4ccd000000010000ULL));
6751   ASSERT_FALSE(IsQcBitSet(fpsr1));
6752 
6753   __uint128_t arg3 = MakeUInt128(0x0099009900990099ULL, 0x0099009900990099ULL);
6754   auto [res2, fpsr2] = AsmUqrshl(arg3, arg2);
6755   ASSERT_EQ(res2, MakeUInt128(0xffffffffffff0132ULL, 0x004d000000000000ULL));
6756   ASSERT_TRUE(IsQcBitSet(fpsr2));
6757 }
6758 
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x1)6759 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x1) {
6760   constexpr auto AsmSqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrn %b0, %h2, #4");
6761 
6762   __uint128_t arg1 = MakeUInt128(0x888786614762f943ULL, 0x4140104988899316ULL);
6763   auto [res1, fpsr1] = AsmSqshrn(arg1);
6764   ASSERT_EQ(res1, MakeUInt128(0x94U, 0U));
6765   ASSERT_FALSE(IsQcBitSet(fpsr1));
6766 
6767   __uint128_t arg2 = MakeUInt128(0x0051207678103588ULL, 0x6116602029611936ULL);
6768   auto [res2, fpsr2] = AsmSqshrn(arg2);
6769   ASSERT_EQ(res2, MakeUInt128(0x7fU, 0U));
6770   ASSERT_TRUE(IsQcBitSet(fpsr2));
6771 }
6772 
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x8)6773 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x8) {
6774   constexpr auto AsmSqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrn %0.8b, %2.8h, #4");
6775 
6776   __uint128_t arg1 = MakeUInt128(0x0625051604340253ULL, 0x0299028602670568ULL);
6777   auto [res1, fpsr1] = AsmSqshrn(arg1);
6778   ASSERT_EQ(res1, MakeUInt128(0x2928265662514325ULL, 0U));
6779   ASSERT_FALSE(IsQcBitSet(fpsr1));
6780 
6781   __uint128_t arg2 = MakeUInt128(0x2405806005642114ULL, 0x9386436864224724ULL);
6782   auto [res2, fpsr2] = AsmSqshrn(arg2);
6783   ASSERT_EQ(res2, MakeUInt128(0x807f7f7f7f80567fULL, 0U));
6784   ASSERT_TRUE(IsQcBitSet(fpsr2));
6785 }
6786 
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x8Upper)6787 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x8Upper) {
6788   constexpr auto AsmSqshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqshrn2 %0.16b, %2.8h, #4");
6789 
6790   __uint128_t arg1 = MakeUInt128(0x0367034704100536ULL, 0x0175064803000078ULL);
6791   __uint128_t arg2 = MakeUInt128(0x3494819262681110ULL, 0x7399482506073949ULL);
6792   auto [res1, fpsr1] = AsmSqshrn2(arg1, arg2);
6793   ASSERT_EQ(res1, MakeUInt128(0x3494819262681110ULL, 0x1764300736344153ULL));
6794   ASSERT_FALSE(IsQcBitSet(fpsr1));
6795 
6796   __uint128_t arg3 = MakeUInt128(0x4641074501673719ULL, 0x0483109676711344ULL);
6797   auto [res2, fpsr2] = AsmSqshrn2(arg3, arg2);
6798   ASSERT_EQ(res2, MakeUInt128(0x3494819262681110ULL, 0x487f7f7f7f74167fULL));
6799   ASSERT_TRUE(IsQcBitSet(fpsr2));
6800 }
6801 
TEST(Arm64InsnTest,UnsignedSaturatingShiftRightNarrowInt16x1)6802 TEST(Arm64InsnTest, UnsignedSaturatingShiftRightNarrowInt16x1) {
6803   constexpr auto AsmUqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshrn %b0, %h2, #4");
6804 
6805   __uint128_t arg1 = MakeUInt128(0x6797172898220360ULL, 0x7028806908776866ULL);
6806   auto [res1, fpsr1] = AsmUqshrn(arg1);
6807   ASSERT_EQ(res1, MakeUInt128(0x36U, 0U));
6808   ASSERT_FALSE(IsQcBitSet(fpsr1));
6809 
6810   __uint128_t arg2 = MakeUInt128(0x0593252746378405ULL, 0x3976918480820410ULL);
6811   auto [res2, fpsr2] = AsmUqshrn(arg2);
6812   ASSERT_EQ(res2, MakeUInt128(0xffU, 0U));
6813   ASSERT_TRUE(IsQcBitSet(fpsr2));
6814 }
6815 
TEST(Arm64InsnTest,UnsignedSaturatingShiftRightNarrowInt16x8)6816 TEST(Arm64InsnTest, UnsignedSaturatingShiftRightNarrowInt16x8) {
6817   constexpr auto AsmUqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshrn %0.8b, %2.8h, #4");
6818 
6819   __uint128_t arg1 = MakeUInt128(0x0867067907600099ULL, 0x0693007509490515ULL);
6820   auto [res1, fpsr1] = AsmUqshrn(arg1);
6821   ASSERT_EQ(res1, MakeUInt128(0x6907945186677609ULL, 0U));
6822   ASSERT_FALSE(IsQcBitSet(fpsr1));
6823 
6824   __uint128_t arg2 = MakeUInt128(0x2736049811890413ULL, 0x0433116627747123ULL);
6825   auto [res2, fpsr2] = AsmUqshrn(arg2);
6826   ASSERT_EQ(res2, MakeUInt128(0x43ffffffff49ff41ULL, 0U));
6827   ASSERT_TRUE(IsQcBitSet(fpsr2));
6828 }
6829 
TEST(Arm64InsnTest,UnignedSaturatingShiftRightNarrowInt16x8Upper)6830 TEST(Arm64InsnTest, UnignedSaturatingShiftRightNarrowInt16x8Upper) {
6831   constexpr auto AsmUqshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqshrn2 %0.16b, %2.8h, #4");
6832 
6833   __uint128_t arg1 = MakeUInt128(0x0441018407410768ULL, 0x0981066307240048ULL);
6834   __uint128_t arg2 = MakeUInt128(0x2393582740194493ULL, 0x5665161088463125ULL);
6835   auto [res1, fpsr1] = AsmUqshrn2(arg1, arg2);
6836   ASSERT_EQ(res1, MakeUInt128(0x2393582740194493ULL, 0x9866720444187476ULL));
6837   ASSERT_FALSE(IsQcBitSet(fpsr1));
6838 
6839   __uint128_t arg3 = MakeUInt128(0x0785297709734684ULL, 0x3030614624180358ULL);
6840   auto [res2, fpsr2] = AsmUqshrn2(arg3, arg2);
6841   ASSERT_EQ(res2, MakeUInt128(0x2393582740194493ULL, 0xffffff3578ff97ffULL));
6842   ASSERT_TRUE(IsQcBitSet(fpsr2));
6843 }
6844 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x1)6845 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x1) {
6846   constexpr auto AsmSqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrn %b0, %h2, #4");
6847 
6848   __uint128_t arg1 = MakeUInt128(0x9610330799410534ULL, 0x7784574699992128ULL);
6849   auto [res1, fpsr1] = AsmSqrshrn(arg1);
6850   ASSERT_EQ(res1, MakeUInt128(0x0000000000000053ULL, 0x0000000000000000ULL));
6851   ASSERT_FALSE(IsQcBitSet(fpsr1));
6852 
6853   __uint128_t arg2 = MakeUInt128(0x5999993996122816ULL, 0x1521931488876938ULL);
6854   auto [res2, fpsr2] = AsmSqrshrn(arg2);
6855   ASSERT_EQ(res2, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6856   ASSERT_TRUE(IsQcBitSet(fpsr2));
6857 
6858   __uint128_t arg3 = MakeUInt128(0x8022281083009986ULL, 0x0165494165426169ULL);
6859   auto [res3, fpsr3] = AsmSqrshrn(arg3);
6860   ASSERT_EQ(res3, MakeUInt128(0x0000000000000080ULL, 0x0000000000000000ULL));
6861   ASSERT_TRUE(IsQcBitSet(fpsr3));
6862 }
6863 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x8)6864 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x8) {
6865   constexpr auto AsmSqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrn %0.8b, %2.8h, #4");
6866 
6867   __uint128_t arg1 = MakeUInt128(0x0666070401700260ULL, 0x0520059204930759ULL);
6868   auto [res1, fpsr1] = AsmSqrshrn(arg1);
6869   ASSERT_EQ(res1, MakeUInt128(0x5259497666701726ULL, 0x0000000000000000ULL));
6870   ASSERT_FALSE(IsQcBitSet(fpsr1));
6871 
6872   __uint128_t arg2 = MakeUInt128(0x4143408146852981ULL, 0x5053947178900451ULL);
6873   auto [res2, fpsr2] = AsmSqrshrn(arg2);
6874   ASSERT_EQ(res2, MakeUInt128(0x7f807f457f7f7f7fULL, 0x0000000000000000ULL));
6875   ASSERT_TRUE(IsQcBitSet(fpsr2));
6876 }
6877 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x8Upper)6878 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x8Upper) {
6879   constexpr auto AsmSqrshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqrshrn2 %0.16b, %2.8h, #4");
6880 
6881   __uint128_t arg1 = MakeUInt128(0x0784017103960497ULL, 0x0707072501740336ULL);
6882   __uint128_t arg2 = MakeUInt128(0x5662725928440620ULL, 0x4302141137199227ULL);
6883   auto [res1, fpsr1] = AsmSqrshrn2(arg1, arg2);
6884   ASSERT_EQ(res1, MakeUInt128(0x5662725928440620ULL, 0x7072173378173949ULL));
6885   ASSERT_FALSE(IsQcBitSet(fpsr1));
6886 
6887   __uint128_t arg3 = MakeUInt128(0x2066886512756882ULL, 0x6614973078865701ULL);
6888   __uint128_t arg4 = MakeUInt128(0x5685016918647488ULL, 0x5416791545965072ULL);
6889   auto [res2, fpsr2] = AsmSqrshrn2(arg3, arg4);
6890   ASSERT_EQ(res2, MakeUInt128(0x5685016918647488ULL, 0x7f807f7f7f807f7fULL));
6891   ASSERT_TRUE(IsQcBitSet(fpsr2));
6892 }
6893 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x1)6894 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x1) {
6895   constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqrshrn %b0, %h2, #4");
6896 
6897   __uint128_t arg1 = MakeUInt128(0x9614236585950920ULL, 0x9083073323356034ULL);
6898   auto [res1, fpsr1] = AsmUqrshrn(arg1);
6899   ASSERT_EQ(res1, MakeUInt128(0x0000000000000092ULL, 0x0000000000000000ULL));
6900   ASSERT_FALSE(IsQcBitSet(fpsr1));
6901 
6902   __uint128_t arg2 = MakeUInt128(0x8465318730299026ULL, 0x6596450137183754ULL);
6903   auto [res2, fpsr2] = AsmUqrshrn(arg2);
6904   ASSERT_EQ(res2, MakeUInt128(0x00000000000000ffULL, 0x0000000000000000ULL));
6905   ASSERT_TRUE(IsQcBitSet(fpsr2));
6906 }
6907 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x8)6908 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x8) {
6909   constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqrshrn %0.8b, %2.8h, #4");
6910 
6911   __uint128_t arg1 = MakeUInt128(0x0301067603860240ULL, 0x0011030402470073ULL);
6912   auto [res1, fpsr1] = AsmUqrshrn(arg1);
6913   ASSERT_EQ(res1, MakeUInt128(0x0130240730673824ULL, 0x0000000000000000ULL));
6914   ASSERT_FALSE(IsQcBitSet(fpsr1));
6915 
6916   __uint128_t arg2 = MakeUInt128(0x5085082872462713ULL, 0x4946368501815469ULL);
6917   auto [res2, fpsr2] = AsmUqrshrn(arg2);
6918   ASSERT_EQ(res2, MakeUInt128(0xffff18ffff83ffffULL, 0x0000000000000000ULL));
6919   ASSERT_TRUE(IsQcBitSet(fpsr2));
6920 }
6921 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x8Upper)6922 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x8Upper) {
6923   constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqrshrn2 %0.16b, %2.8h, #4");
6924 
6925   __uint128_t arg1 = MakeUInt128(0x0388099005730661ULL, 0x0237022304780112ULL);
6926   __uint128_t arg2 = MakeUInt128(0x0392269110277722ULL, 0x6102544149221576ULL);
6927   auto [res1, fpsr1] = AsmUqrshrn(arg1, arg2);
6928   ASSERT_EQ(res1, MakeUInt128(0x0392269110277722ULL, 0x2322481139995766ULL));
6929   ASSERT_FALSE(IsQcBitSet(fpsr1));
6930 
6931   __uint128_t arg3 = MakeUInt128(0x9254069617600504ULL, 0x7974928060721268ULL);
6932   __uint128_t arg4 = MakeUInt128(0x8414695726397884ULL, 0x2560084531214065ULL);
6933   auto [res2, fpsr2] = AsmUqrshrn(arg3, arg4);
6934   ASSERT_EQ(res2, MakeUInt128(0x8414695726397884ULL, 0xffffffffff69ff50ULL));
6935   ASSERT_TRUE(IsQcBitSet(fpsr2));
6936 }
6937 
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x1)6938 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x1) {
6939   constexpr auto AsmSqshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrun %b0, %h2, #4");
6940 
6941   __uint128_t arg1 = MakeUInt128(0x9143611439920063ULL, 0x8005083214098760ULL);
6942   auto [res1, fpsr1] = AsmSqshrun(arg1);
6943   ASSERT_EQ(res1, MakeUInt128(0x06U, 0U));
6944   ASSERT_FALSE(IsQcBitSet(fpsr1));
6945 
6946   __uint128_t arg2 = MakeUInt128(0x3815174571259975ULL, 0x4953580239983146ULL);
6947   auto [res2, fpsr2] = AsmSqshrun(arg2);
6948   ASSERT_EQ(res2, MakeUInt128(0x00U, 0U));
6949   ASSERT_TRUE(IsQcBitSet(fpsr2));
6950 
6951   __uint128_t arg3 = MakeUInt128(0x4599309324851025ULL, 0x1682944672606661ULL);
6952   auto [res3, fpsr3] = AsmSqshrun(arg3);
6953   ASSERT_EQ(res3, MakeUInt128(0xffU, 0U));
6954   ASSERT_TRUE(IsQcBitSet(fpsr3));
6955 }
6956 
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x8)6957 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x8) {
6958   constexpr auto AsmSqshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrun %0.8b, %2.8h, #4");
6959 
6960   __uint128_t arg1 = MakeUInt128(0x0911066408340874ULL, 0x0800074107250670ULL);
6961   auto [res1, fpsr1] = AsmSqshrun(arg1);
6962   ASSERT_EQ(res1, MakeUInt128(0x8074726791668387ULL, 0U));
6963   ASSERT_FALSE(IsQcBitSet(fpsr1));
6964 
6965   __uint128_t arg2 = MakeUInt128(0x4792258319129415ULL, 0x7390809143831384ULL);
6966   auto [res2, fpsr2] = AsmSqshrun(arg2);
6967   ASSERT_EQ(res2, MakeUInt128(0xff00ffffffffff00ULL, 0U));
6968   ASSERT_TRUE(IsQcBitSet(fpsr2));
6969 }
6970 
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x8Upper)6971 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x8Upper) {
6972   constexpr auto AsmSqshrun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqshrun2 %0.16b, %2.8h, #4");
6973 
6974   __uint128_t arg1 = MakeUInt128(0x0625082101740415ULL, 0x0233074903960353ULL);
6975   __uint128_t arg2 = MakeUInt128(0x0136178653673760ULL, 0x6421667781377399ULL);
6976   auto [res1, fpsr1] = AsmSqshrun2(arg1, arg2);
6977   ASSERT_EQ(res1, MakeUInt128(0x0136178653673760ULL, 0x2374393562821741ULL));
6978   ASSERT_FALSE(IsQcBitSet(fpsr1));
6979 
6980   __uint128_t arg3 = MakeUInt128(0x4295810545651083ULL, 0x1046297282937584ULL);
6981   __uint128_t arg4 = MakeUInt128(0x1611625325625165ULL, 0x7249807849209989ULL);
6982   auto [res2, fpsr2] = AsmSqshrun2(arg3, arg4);
6983   ASSERT_EQ(res2, MakeUInt128(0x1611625325625165ULL, 0xffff00ffff00ffffULL));
6984   ASSERT_TRUE(IsQcBitSet(fpsr2));
6985 }
6986 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x1)6987 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x1) {
6988   constexpr auto AsmSqrshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrun %b0, %h2, #4");
6989 
6990   __uint128_t arg1 = MakeUInt128(0x5760186946490886ULL, 0x8154528562134698ULL);
6991   auto [res1, fpsr1] = AsmSqrshrun(arg1);
6992   ASSERT_EQ(res1, MakeUInt128(0x88ULL, 0U));
6993   ASSERT_FALSE(IsQcBitSet(fpsr1));
6994 
6995   __uint128_t arg2 = MakeUInt128(0x8355444560249556ULL, 0x6684366029221951ULL);
6996   auto [res2, fpsr2] = AsmSqrshrun(arg2);
6997   ASSERT_EQ(res2, MakeUInt128(0x00ULL, 0U));
6998   ASSERT_TRUE(IsQcBitSet(fpsr2));
6999 
7000   __uint128_t arg3 = MakeUInt128(0x2483091060537720ULL, 0x1980218310103270ULL);
7001   auto [res3, fpsr3] = AsmSqrshrun(arg3);
7002   ASSERT_EQ(res3, MakeUInt128(0xffULL, 0U));
7003   ASSERT_TRUE(IsQcBitSet(fpsr3));
7004 }
7005 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8)7006 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8) {
7007   constexpr auto AsmSqrshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrun %0.8b, %2.8h, #4");
7008 
7009   __uint128_t arg1 = MakeUInt128(0x0150069001490702ULL, 0x0673033808340550ULL);
7010   auto [res1, fpsr1] = AsmSqrshrun(arg1);
7011   ASSERT_EQ(res1, MakeUInt128(0x6734835515691570ULL, 0U));
7012   ASSERT_FALSE(IsQcBitSet(fpsr1));
7013 
7014   __uint128_t arg2 = MakeUInt128(0x8363660178487710ULL, 0x6080980426924713ULL);
7015   auto [res2, fpsr2] = AsmSqrshrun(arg2);
7016   ASSERT_EQ(res2, MakeUInt128(0xff00ffff00ffffffULL, 0U));
7017   ASSERT_TRUE(IsQcBitSet(fpsr2));
7018 }
7019 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8Upper)7020 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8Upper) {
7021   constexpr auto AsmSqrshrun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqrshrun2 %0.16b, %2.8h, #4");
7022 
7023   __uint128_t arg1 = MakeUInt128(0x0733049502080757ULL, 0x0651018705990498ULL);
7024   __uint128_t arg2 = MakeUInt128(0x5693795623875551ULL, 0x6175754380917805ULL);
7025   auto [res1, fpsr1] = AsmSqrshrun2(arg1, arg2);
7026   ASSERT_EQ(res1, MakeUInt128(0x5693795623875551ULL, 0x65185a4a73492175ULL));
7027   ASSERT_FALSE(IsQcBitSet(fpsr1));
7028 
7029   __uint128_t arg3 = MakeUInt128(0x1444671298615527ULL, 0x5982014514102756ULL);
7030   __uint128_t arg4 = MakeUInt128(0x0068929750246304ULL, 0x0173514891945763ULL);
7031   auto [res2, fpsr2] = AsmSqrshrun2(arg3, arg4);
7032   ASSERT_EQ(res2, MakeUInt128(0x0068929750246304ULL, 0xff14ffffffff00ffULL));
7033   ASSERT_TRUE(IsQcBitSet(fpsr2));
7034 }
7035 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftUnsignedImmInt32x1)7036 TEST(Arm64InsnTest, SignedSaturatingShiftLeftUnsignedImmInt32x1) {
7037   constexpr auto AsmSqshlu = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshlu %s0, %s2, #4");
7038 
7039   __uint128_t arg1 = MakeUInt128(0x9704033001862556ULL, 0x1473321177711744ULL);
7040   auto [res1, fpsr1] = AsmSqshlu(arg1);
7041   ASSERT_EQ(res1, MakeUInt128(0x18625560ULL, 0U));
7042   ASSERT_FALSE(IsQcBitSet(fpsr1));
7043 
7044   __uint128_t arg2 = MakeUInt128(0x3095760196946490ULL, 0x8868154528562134ULL);
7045   auto [res2, fpsr2] = AsmSqshlu(arg2);
7046   ASSERT_EQ(res2, MakeUInt128(0x00000000ULL, 0U));
7047   ASSERT_TRUE(IsQcBitSet(fpsr2));
7048 
7049   __uint128_t arg3 = MakeUInt128(0x1335028160884035ULL, 0x1781452541964320ULL);
7050   auto [res3, fpsr3] = AsmSqshlu(arg3);
7051   ASSERT_EQ(res3, MakeUInt128(0xffffffffULL, 0U));
7052   ASSERT_TRUE(IsQcBitSet(fpsr3));
7053 }
7054 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftUnsignedImmInt32x4)7055 TEST(Arm64InsnTest, SignedSaturatingShiftLeftUnsignedImmInt32x4) {
7056   constexpr auto AsmSqshlu = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshlu %0.4s, %2.4s, #4");
7057 
7058   __uint128_t arg1 = MakeUInt128(0x0865174507877133ULL, 0x0813875205980941ULL);
7059   auto [res1, fpsr1] = AsmSqshlu(arg1);
7060   ASSERT_EQ(res1, MakeUInt128(0x8651745078771330ULL, 0x8138752059809410ULL));
7061   ASSERT_FALSE(IsQcBitSet(fpsr1));
7062 
7063   __uint128_t arg2 = MakeUInt128(0x2174227300352296ULL, 0x0080891797050682ULL);
7064   auto [res2, fpsr2] = AsmSqshlu(arg2);
7065   ASSERT_EQ(res2, MakeUInt128(0xffffffff03522960ULL, 0x0808917000000000ULL));
7066   ASSERT_TRUE(IsQcBitSet(fpsr2));
7067 }
7068 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x2)7069 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x2) {
7070   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.2d, %2.2s, %3.2s");
7071 
7072   __uint128_t arg1 = MakeUInt128(0x0000000200000004ULL, 0xfeed000300000010ULL);
7073   __uint128_t arg2 = MakeUInt128(0x0000000300000002ULL, 0xfeed00040000002ULL);
7074   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7075   ASSERT_EQ(res1, MakeUInt128(0x0000000000000010ULL, 0x000000000000000cULL));
7076   ASSERT_FALSE(IsQcBitSet(fpsr1));
7077 
7078   __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7079   __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0xfeed00040000002ULL);
7080   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7081   ASSERT_EQ(res2, MakeUInt128(0x0000000000000010ULL, 0x7fffffffffffffffULL));
7082   ASSERT_TRUE(IsQcBitSet(fpsr2));
7083 }
7084 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong16x4)7085 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong16x4) {
7086   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.4s, %2.4h, %3.4h");
7087 
7088   __uint128_t arg1 = MakeUInt128(0x0004000200f00004ULL, 0xfeedfeedfeedfeedULL);
7089   __uint128_t arg2 = MakeUInt128(0x0008000300800002ULL, 0xabcd0123ffff4567ULL);
7090   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7091   ASSERT_EQ(res1, MakeUInt128(0x0000f00000000010ULL, 0x000000400000000cULL));
7092   ASSERT_FALSE(IsQcBitSet(fpsr1));
7093 
7094   __uint128_t arg3 = MakeUInt128(0x8000000200f00004ULL, 0xfeedfeedfeedfeedULL);
7095   __uint128_t arg4 = MakeUInt128(0x8000000300800002ULL, 0xabcd0123ffff4567ULL);
7096   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7097   ASSERT_EQ(res2, MakeUInt128(0x0000f00000000010ULL, 0x7fffffff0000000cULL));
7098   ASSERT_TRUE(IsQcBitSet(fpsr2));
7099 }
7100 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper32x2)7101 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper32x2) {
7102   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.2d, %2.4s, %3.4s");
7103 
7104   __uint128_t arg1 = MakeUInt128(0x0000000200000004ULL, 0xfeed000300000010ULL);
7105   __uint128_t arg2 = MakeUInt128(0x0000000300000002ULL, 0xfeed00040000002ULL);
7106   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7107   ASSERT_EQ(res1, MakeUInt128(0x0000000800000040ULL, 0xffddc4ed7f98e000ULL));
7108   ASSERT_FALSE(IsQcBitSet(fpsr1));
7109 
7110   __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0x8000000000000010ULL);
7111   __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0x8000000000000002ULL);
7112   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7113   ASSERT_EQ(res2, MakeUInt128(0x0000000000000040ULL, 0x7fffffffffffffffULL));
7114   ASSERT_TRUE(IsQcBitSet(fpsr2));
7115 }
7116 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper16x4)7117 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper16x4) {
7118   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.4s, %2.8h, %3.8h");
7119 
7120   __uint128_t arg1 = MakeUInt128(0x0004000200f00004ULL, 0xfeedfeedfeedfeedULL);
7121   __uint128_t arg2 = MakeUInt128(0x0008000300800002ULL, 0xabcd0123ffff4567ULL);
7122   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7123   ASSERT_EQ(res1, MakeUInt128(0x00000226ff6ae4b6ULL, 0x00b4e592fffd8eceULL));
7124   ASSERT_FALSE(IsQcBitSet(fpsr1));
7125 
7126   __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0x8000000000000010ULL);
7127   __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0x8000000000000002ULL);
7128   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7129   ASSERT_EQ(res2, MakeUInt128(0x0000000000000040ULL, 0x7fffffff00000000ULL));
7130   ASSERT_TRUE(IsQcBitSet(fpsr2));
7131 }
7132 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x2IndexedElem)7133 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x2IndexedElem) {
7134   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.2d, %2.2s, %3.s[1]");
7135 
7136   __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011LL);
7137   __uint128_t arg2 = MakeUInt128(0x0000000200000000ULL, 0x000000000000000ULL);
7138   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7139   ASSERT_EQ(res1, MakeUInt128(0x000000004488cd10ULL, 0x0000000000880088ULL));
7140   ASSERT_FALSE(IsQcBitSet(fpsr1));
7141 
7142   __uint128_t arg3 = MakeUInt128(0x0022002280000000ULL, 0x1122334400110011LL);
7143   __uint128_t arg4 = MakeUInt128(0x8000000000000000ULL, 0x000000000000000ULL);
7144   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7145   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xffddffde00000000ULL));
7146   ASSERT_TRUE(IsQcBitSet(fpsr2));
7147 }
7148 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x4IndexedElem)7149 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x4IndexedElem) {
7150   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.4s, %2.4h, %3.h[4]");
7151 
7152   __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011LL);
7153   __uint128_t arg2 = MakeUInt128(0x000f000f000f000fULL, 0x000f000f000f0002ULL);
7154   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7155   ASSERT_EQ(res1, MakeUInt128(0x000044880000cd10ULL, 0x0000008800000088ULL));
7156   ASSERT_FALSE(IsQcBitSet(fpsr1));
7157 
7158   __uint128_t arg3 = MakeUInt128(0x0022002280000000ULL, 0x1122334400118000ULL);
7159   __uint128_t arg4 = MakeUInt128(0x1111111122222222ULL, 0x1122334411228000ULL);
7160   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7161   ASSERT_EQ(res2, MakeUInt128(0x7fffffff00000000ULL, 0xffde0000ffde0000ULL));
7162   ASSERT_TRUE(IsQcBitSet(fpsr2));
7163 }
7164 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper64x2IndexedElem)7165 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper64x2IndexedElem) {
7166   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.2d, %2.4s, %3.s[3]");
7167 
7168   __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011ULL);
7169   __uint128_t arg2 = MakeUInt128(0xffffffffffffffffULL, 0x00000002ffffffffULL);
7170   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7171   ASSERT_EQ(res1, MakeUInt128(0x0000000000440044ULL, 0x000000004488cd10ULL));
7172   ASSERT_FALSE(IsQcBitSet(fpsr1));
7173 
7174   __uint128_t arg3 = MakeUInt128(0x80000000ffffffffULL, 0x1122334480000000ULL);
7175   __uint128_t arg4 = MakeUInt128(0x1122334411223344ULL, 0x80000000ffffffffULL);
7176   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7177   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xeeddccbc00000000ULL));
7178   ASSERT_TRUE(IsQcBitSet(fpsr2));
7179 }
7180 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper32x4IndexedElem)7181 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper32x4IndexedElem) {
7182   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.4s, %2.8h, %3.h[7]");
7183 
7184   __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011ULL);
7185   __uint128_t arg2 = MakeUInt128(0xffffffffffffffffULL, 0x0002ffffffffffffULL);
7186   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7187   ASSERT_EQ(res1, MakeUInt128(0x0000004400000044ULL, 0x000044880000cd10ULL));
7188   ASSERT_FALSE(IsQcBitSet(fpsr1));
7189 
7190   __uint128_t arg3 = MakeUInt128(0x80000000ffffffffULL, 0x112233448000ffffULL);
7191   __uint128_t arg4 = MakeUInt128(0x1122334411223344ULL, 0x8000ffffffffffffULL);
7192   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7193   ASSERT_EQ(res2, MakeUInt128(0x7fffffff00010000ULL, 0xeede0000ccbc0000ULL));
7194 }
7195 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x1)7196 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x1) {
7197   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %d0, %s2, %s3");
7198   __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7199   __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x0000000300000002ULL);
7200   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7201   ASSERT_EQ(res1, MakeUInt128(0x0222244440000000ULL, 0x0000000000000000ULL));
7202   ASSERT_FALSE(IsQcBitSet(fpsr1));
7203 
7204   __uint128_t arg3 = MakeUInt128(0xaabbccdd80000000ULL, 0x1122334400110011ULL);
7205   __uint128_t arg4 = MakeUInt128(0xff11ff1180000000ULL, 0xffffffff11223344ULL);
7206   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7207   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7208   ASSERT_TRUE(IsQcBitSet(fpsr2));
7209 }
7210 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x1)7211 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x1) {
7212   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %s0, %h2, %h3");
7213   __uint128_t arg1 = MakeUInt128(0x1111111811112222ULL, 0xf000000700080006ULL);
7214   __uint128_t arg2 = MakeUInt128(0x0000000510004444ULL, 0xf000000300080002ULL);
7215   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7216   ASSERT_EQ(res1, MakeUInt128(0x0000000012343210ULL, 0x0000000000000000ULL));
7217   ASSERT_FALSE(IsQcBitSet(fpsr1));
7218 
7219   __uint128_t arg3 = MakeUInt128(0xaabbccdd00008000ULL, 0x1122334400110011ULL);
7220   __uint128_t arg4 = MakeUInt128(0xff11ff1100008000ULL, 0xffffffff11223344ULL);
7221   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7222   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7223   ASSERT_TRUE(IsQcBitSet(fpsr2));
7224 }
7225 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x1IndexedElem)7226 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x1IndexedElem) {
7227   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %s0, %h2, %3.h[7]");
7228   __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7229   __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x1111000300000002ULL);
7230   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7231   ASSERT_EQ(res1, MakeUInt128(0x00000000048d0c84ULL, 0x0000000000000000ULL));
7232   ASSERT_FALSE(IsQcBitSet(fpsr1));
7233 
7234   __uint128_t arg3 = MakeUInt128(0xaabbccddaabb8000ULL, 0x1122334400110011ULL);
7235   __uint128_t arg4 = MakeUInt128(0xff11ff11ff000ff0ULL, 0x8000aabb11223344ULL);
7236   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7237   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7238   ASSERT_TRUE(IsQcBitSet(fpsr2));
7239 }
7240 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x1IndexedElem)7241 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x1IndexedElem) {
7242   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %d0, %s2, %3.s[3]");
7243   __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7244   __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x0000000300000002ULL);
7245   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7246   ASSERT_EQ(res1, MakeUInt128(0x000000006666ccccULL, 0x0000000000000000ULL));
7247   ASSERT_FALSE(IsQcBitSet(fpsr1));
7248 
7249   __uint128_t arg3 = MakeUInt128(0xaabbccdd80000000ULL, 0x1122334400110011ULL);
7250   __uint128_t arg4 = MakeUInt128(0xff11ff11ff000ff0ULL, 0x8000000011223344ULL);
7251   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7252   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7253   ASSERT_TRUE(IsQcBitSet(fpsr2));
7254 }
7255 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x2)7256 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x2) {
7257   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.2d, %2.2s, %3.2s");
7258 
7259   // No saturation.
7260   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7261   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7262   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7263   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7264   ASSERT_EQ(res1, MakeUInt128(0x0100010111011100ULL, 0x040004008c008c00ULL));
7265   ASSERT_FALSE(IsQcBitSet(fpsr1));
7266 
7267   // Saturates in the multiplication.
7268   __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7269   __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
7270   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7271   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7272   ASSERT_EQ(res2, MakeUInt128(0x0000080000000910ULL, 0x7fffffffffffffffULL));
7273   ASSERT_TRUE(IsQcBitSet(fpsr2));
7274 
7275   // Saturates in the addition.
7276   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7277   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7278   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7279   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7280   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x00000a0088013800ULL));
7281   ASSERT_TRUE(IsQcBitSet(fpsr3));
7282 }
7283 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong16x4)7284 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong16x4) {
7285   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.4s, %2.4h, %3.4h");
7286 
7287   // No saturation.
7288   __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
7289   __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
7290   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7291   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7292   ASSERT_EQ(res1, MakeUInt128(0x0100010001011100ULL, 0x03f0040004024600ULL));
7293   ASSERT_FALSE(IsQcBitSet(fpsr1));
7294 
7295   // Saturates in the multiplication.
7296   __uint128_t arg4 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
7297   __uint128_t arg5 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
7298   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7299   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7300   ASSERT_EQ(res2, MakeUInt128(0x0369cba90369cba9ULL, 0x7fffffff0369cba9ULL));
7301   ASSERT_TRUE(IsQcBitSet(fpsr2));
7302 
7303   // Saturates in the addition.
7304   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7305   __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
7306   __uint128_t arg9 = MakeUInt128(0x7fffffff12345678ULL, 0x00000a000000b000ULL);
7307   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7308   ASSERT_EQ(res3, MakeUInt128(0x7fffffff12356678ULL, 0x00000a0000013800ULL));
7309   ASSERT_TRUE(IsQcBitSet(fpsr3));
7310 }
7311 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper32x2)7312 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper32x2) {
7313   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.2d, %2.4s, %3.4s");
7314 
7315   // No saturation.
7316   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7317   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7318   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7319   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7320   ASSERT_EQ(res1, MakeUInt128(0x020d44926c1ce9e0ULL, 0x050d47926f1cece0ULL));
7321   ASSERT_FALSE(IsQcBitSet(fpsr1));
7322 
7323   // Saturates in the multiplication.
7324   __uint128_t arg4 = MakeUInt128(0x1234567800000004ULL, 0x8000000001100010ULL);
7325   __uint128_t arg5 = MakeUInt128(0x1234567800000002ULL, 0x8000000001100020ULL);
7326   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7327   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7328   ASSERT_EQ(res2, MakeUInt128(0x00024a0066000d00ULL, 0x7fffffffffffffffULL));
7329   ASSERT_TRUE(IsQcBitSet(fpsr2));
7330 
7331   // Saturates in the addition.
7332   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7333   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7334   __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x7fffffffffffffffULL);
7335   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7336   ASSERT_EQ(res3, MakeUInt128(0x13419a0a7d513f58ULL, 0x7fffffffffffffffULL));
7337   ASSERT_TRUE(IsQcBitSet(fpsr3));
7338 }
7339 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper16x4)7340 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper16x4) {
7341   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.4s, %2.8h, %3.8h");
7342 
7343   // No saturation.
7344   __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
7345   __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
7346   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7347   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7348   ASSERT_EQ(res1, MakeUInt128(0x020d03f81c24e9e0ULL, 0x050d06f81f24ece0ULL));
7349   ASSERT_FALSE(IsQcBitSet(fpsr1));
7350 
7351   // Saturates in the multiplication.
7352   __uint128_t arg4 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
7353   __uint128_t arg5 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
7354   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7355   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7356   ASSERT_EQ(res2, MakeUInt128(0x03b9fa8703b9fa87ULL, 0x7fffffff03b9fa87ULL));
7357   ASSERT_TRUE(IsQcBitSet(fpsr2));
7358 
7359   // Saturates in the addition.
7360   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7361   __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
7362   __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x7fffffff0000b000ULL);
7363   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7364   ASSERT_EQ(res3, MakeUInt128(0x134159702d593f58ULL, 0x7fffffff1b2598e0ULL));
7365   ASSERT_TRUE(IsQcBitSet(fpsr3));
7366 }
7367 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x1)7368 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x1) {
7369   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %d0, %s2, %s3");
7370 
7371   // No saturation.
7372   __uint128_t arg1 = MakeUInt128(0x1100110011223344ULL, 0x7654321076543210ULL);
7373   __uint128_t arg2 = MakeUInt128(0x0000000020000000ULL, 0x0123456701234567ULL);
7374   __uint128_t arg3 = MakeUInt128(0x12345678000000FFULL, 0x0400040004000400ULL);
7375   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7376   ASSERT_EQ(res1, MakeUInt128(0x167ce349000000ffULL, 0x0000000000000000ULL));
7377   ASSERT_FALSE(IsQcBitSet(fpsr1));
7378 
7379   // Saturates in the multiplication.
7380   __uint128_t arg4 = MakeUInt128(0x1122334480000000ULL, 0xfeed000300000010ULL);
7381   __uint128_t arg5 = MakeUInt128(0xaabbccdd80000000ULL, 0xfeed000400000020ULL);
7382   __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
7383   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7384   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7385   ASSERT_TRUE(IsQcBitSet(fpsr2));
7386 
7387   // Saturates in the addition.
7388   __uint128_t arg7 = MakeUInt128(0x1122334400111111ULL, 0x7654321076543210ULL);
7389   __uint128_t arg8 = MakeUInt128(0xaabbccdd00222222ULL, 0x0123456701234567ULL);
7390   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7391   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7392   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7393   ASSERT_TRUE(IsQcBitSet(fpsr3));
7394 }
7395 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x1)7396 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x1) {
7397   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %s0, %h2, %h3");
7398 
7399   // No saturation.
7400   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7401   __uint128_t arg2 = MakeUInt128(0x0000000000000004ULL, 0x0123456701234567ULL);
7402   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7403   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7404   ASSERT_EQ(res1, MakeUInt128(0x0000000001011100ULL, 0x0000000000000000ULL));
7405   ASSERT_FALSE(IsQcBitSet(fpsr1));
7406 
7407   // Saturates in the multiplication.
7408   __uint128_t arg4 = MakeUInt128(0x1122334411228000ULL, 0xfeed000300000010ULL);
7409   __uint128_t arg5 = MakeUInt128(0xaabbccddaabb8000ULL, 0xfeed000400000020ULL);
7410   __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
7411   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7412   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7413   ASSERT_TRUE(IsQcBitSet(fpsr2));
7414 
7415   // Saturates in the addition.
7416   __uint128_t arg7 = MakeUInt128(0x1122334411220123ULL, 0x7654321076543210ULL);
7417   __uint128_t arg8 = MakeUInt128(0xaabbccddaabb0044ULL, 0x0123456701234567ULL);
7418   __uint128_t arg9 = MakeUInt128(0xaabbccdd7fffffffULL, 0x00000a000000b000ULL);
7419   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7420   ASSERT_EQ(res3, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7421   ASSERT_TRUE(IsQcBitSet(fpsr3));
7422 }
7423 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x2IndexedElem)7424 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x2IndexedElem) {
7425   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.2d, %2.2s, %3.s[1]");
7426 
7427   // No saturation.
7428   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7429   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7430   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7431   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7432   ASSERT_EQ(res1, MakeUInt128(0x0100010111011100ULL, 0x040004008c008c00ULL));
7433   ASSERT_FALSE(IsQcBitSet(fpsr1));
7434 
7435   // Saturates in the multiplication.
7436   __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7437   __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
7438   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7439   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7440   ASSERT_EQ(res2, MakeUInt128(0x000007fc00000900ULL, 0x7fffffffffffffffULL));
7441   ASSERT_TRUE(IsQcBitSet(fpsr2));
7442 
7443   // Saturates in the addition.
7444   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7445   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7446   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7447   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7448   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x00000a0088013800ULL));
7449   ASSERT_TRUE(IsQcBitSet(fpsr3));
7450 }
7451 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x4IndexedElem)7452 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x4IndexedElem) {
7453   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.4s, %2.4h, %3.h[7]");
7454 
7455   // No saturation.
7456   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7457   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7458   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7459   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7460   ASSERT_EQ(res1, MakeUInt128(0x012eb10b89bbca1fULL, 0xfedf0524765b0d28ULL));
7461   ASSERT_FALSE(IsQcBitSet(fpsr1));
7462 
7463   // Saturates in the multiplication.
7464   __uint128_t arg4 = MakeUInt128(0x80000123456789a4ULL, 0xfeed000300000010ULL);
7465   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
7466   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7467   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7468   ASSERT_EQ(res2, MakeUInt128(0xbbbc4567777f4567ULL, 0x7fffffff00004567ULL));
7469   ASSERT_TRUE(IsQcBitSet(fpsr2));
7470 
7471   // Saturates in the addition.
7472   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7473   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
7474   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7475   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7476   ASSERT_EQ(res3, MakeUInt128(0x7fffffff004d4bffULL, 0x0026b00000275600ULL));
7477   ASSERT_TRUE(IsQcBitSet(fpsr3));
7478 }
7479 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper64x2IndexedElem)7480 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper64x2IndexedElem) {
7481   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.2d, %2.4s, %3.s[3]");
7482 
7483   // No saturation.
7484   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7485   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7486   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7487   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7488   ASSERT_EQ(res1, MakeUInt128(0x020d44926c1ce9e0ULL, 0x050d47926f1cece0ULL));
7489   ASSERT_FALSE(IsQcBitSet(fpsr1));
7490 
7491   // Saturates in the multiplication.
7492   __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0x1122334480000000ULL);
7493   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000000011223344ULL);
7494   __uint128_t arg6 = MakeUInt128(0x0101010102020202ULL, 0x0303030304040404ULL);
7495   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7496   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xf1e0cfbf04040404ULL));
7497   ASSERT_TRUE(IsQcBitSet(fpsr2));
7498 
7499   // Saturates in the addition.
7500   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7501   __uint128_t arg8 = MakeUInt128(0x1122334444332211ULL, 0x0123456701234567ULL);
7502   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7503   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7504   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x010d4d926b1d98e0ULL));
7505   ASSERT_TRUE(IsQcBitSet(fpsr3));
7506 }
7507 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper32x4IndexedElem)7508 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper32x4IndexedElem) {
7509   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.4s, %2.8h, %3.h[7]");
7510 
7511   // No saturation.
7512   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7513   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7514   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7515   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7516   ASSERT_EQ(res1, MakeUInt128(0x0230485f8a1d9e4fULL, 0xffe9bd9076c60270ULL));
7517   ASSERT_FALSE(IsQcBitSet(fpsr1));
7518 
7519   // Saturates in the multiplication.
7520   __uint128_t arg4 = MakeUInt128(0x0011223344556677ULL, 0xfeedfeedfeed8000ULL);
7521   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
7522   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7523   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7524   ASSERT_EQ(res2, MakeUInt128(0x023645677fffffffULL, 0x0236456702364567ULL));
7525   ASSERT_TRUE(IsQcBitSet(fpsr2));
7526 
7527   // Saturates in the addition.
7528   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7529   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
7530   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7531   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7532   ASSERT_EQ(res3, MakeUInt128(0x7fffffff0071d05fULL, 0x010d0cf800728060ULL));
7533   ASSERT_TRUE(IsQcBitSet(fpsr3));
7534 }
7535 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x1IndexedElem)7536 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x1IndexedElem) {
7537   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %d0, %s2, %3.s[3]");
7538 
7539   // No saturation.
7540   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7541   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7542   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7543   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7544   ASSERT_EQ(res1, MakeUInt128(0x012eb3d4d07fc65fULL, 0x0000000000000000ULL));
7545   ASSERT_FALSE(IsQcBitSet(fpsr1));
7546 
7547   // Saturates in the multiplication.
7548   __uint128_t arg4 = MakeUInt128(0x0011223380000000ULL, 0xfeedfeedfeed8000ULL);
7549   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x80000000ba123456ULL);
7550   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7551   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7552   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7553   ASSERT_TRUE(IsQcBitSet(fpsr2));
7554 
7555   // Saturates in the addition.
7556   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7557   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
7558   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7559   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7560   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7561   ASSERT_TRUE(IsQcBitSet(fpsr3));
7562 }
7563 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x1IndexedElem)7564 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x1IndexedElem) {
7565   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %s0, %h2, %3.h[7]");
7566 
7567   // No saturation.
7568   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7569   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7570   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7571   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7572   ASSERT_EQ(res1, MakeUInt128(0x0000000089bbca1fULL, 0x0000000000000000ULL));
7573   ASSERT_FALSE(IsQcBitSet(fpsr1));
7574 
7575   // Saturates in the multiplication.
7576   __uint128_t arg4 = MakeUInt128(0x0011223344558000ULL, 0xfeedfeedfeed1234ULL);
7577   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
7578   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7579   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7580   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7581   ASSERT_TRUE(IsQcBitSet(fpsr2));
7582 
7583   // Saturates in the addition.
7584   __uint128_t arg7 = MakeUInt128(0xaabbccddeeff2200ULL, 0x7654321076543210ULL);
7585   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x0123aabbccddeeffULL);
7586   __uint128_t arg9 = MakeUInt128(0xaabbccdd7fffffffULL, 0x0011223344556677ULL);
7587   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7588   ASSERT_EQ(res3, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7589   ASSERT_TRUE(IsQcBitSet(fpsr3));
7590 }
7591 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x2)7592 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x2) {
7593   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.2d, %2.2s, %3.2s");
7594 
7595   // No saturation.
7596   __uint128_t arg1 = MakeUInt128(0x0000000080000001ULL, 0x7654321076543210ULL);
7597   __uint128_t arg2 = MakeUInt128(0x0000000100000004ULL, 0x0123456701234567ULL);
7598   __uint128_t arg3 = MakeUInt128(0x0000100000000001ULL, 0x0400040004000400ULL);
7599   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7600   ASSERT_EQ(res1, MakeUInt128(0x00001003fffffff9ULL, 0x0400040004000400ULL));
7601   ASSERT_FALSE(IsQcBitSet(fpsr1));
7602 
7603   // Saturates in the multiplication.
7604   __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7605   __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
7606   __uint128_t arg6 = MakeUInt128(0x0000000000000900ULL, 0x00000a000000b000ULL);
7607   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7608   ASSERT_EQ(res2, MakeUInt128(0x00000000000008f0ULL, 0x80000a000000b001ULL));
7609   ASSERT_TRUE(IsQcBitSet(fpsr2));
7610 
7611   // Saturates in the subtraction.
7612   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7613   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7614   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7615   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7616   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x000009ff78002800ULL));
7617   ASSERT_TRUE(IsQcBitSet(fpsr3));
7618 }
7619 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong16x4)7620 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong16x4) {
7621   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.4s, %2.4h, %3.4h");
7622 
7623   // No saturation.
7624   __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
7625   __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
7626   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7627   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7628   ASSERT_EQ(res1, MakeUInt128(0x0100010000fef100ULL, 0x0410040003fdc200ULL));
7629   ASSERT_FALSE(IsQcBitSet(fpsr1));
7630 
7631   // Saturates in the multiplication.
7632   __uint128_t arg4 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
7633   __uint128_t arg5 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
7634   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7635   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7636   ASSERT_EQ(res2, MakeUInt128(0xfedcbf25fedcbf25ULL, 0x81234568fedcbf25ULL));
7637   ASSERT_TRUE(IsQcBitSet(fpsr2));
7638 
7639   // Saturates in the subtraction.
7640   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7641   __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
7642   __uint128_t arg9 = MakeUInt128(0x8000000012345678ULL, 0x00000a000000b000ULL);
7643   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7644   ASSERT_EQ(res3, MakeUInt128(0x8000000012334678ULL, 0x00000a0000002800ULL));
7645   ASSERT_TRUE(IsQcBitSet(fpsr3));
7646 }
7647 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper32x2)7648 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper32x2) {
7649   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.2d, %2.4s, %3.4s");
7650 
7651   // No saturation.
7652   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7653   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7654   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7655   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7656   ASSERT_EQ(res1, MakeUInt128(0xfff2bd6d95e31820ULL, 0x02f2c06d98e31b20ULL));
7657   ASSERT_FALSE(IsQcBitSet(fpsr1));
7658 
7659   // Saturates in the multiplication.
7660   __uint128_t arg4 = MakeUInt128(0x1234567800000004ULL, 0x8000000001100010ULL);
7661   __uint128_t arg5 = MakeUInt128(0x1234567800000002ULL, 0x8000000001100020ULL);
7662   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7663   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7664   ASSERT_EQ(res2, MakeUInt128(0xfffdc5ff9a000500ULL, 0x80000a000000b001ULL));
7665   ASSERT_TRUE(IsQcBitSet(fpsr2));
7666 
7667   // Saturates in the subtraction.
7668   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7669   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7670   __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x8000000000000000ULL);
7671   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7672   ASSERT_EQ(res3, MakeUInt128(0x112712e5a7176d98ULL, 0x8000000000000000ULL));
7673   ASSERT_TRUE(IsQcBitSet(fpsr3));
7674 }
7675 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper16x4)7676 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper16x4) {
7677   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.4s, %2.8h, %3.8h");
7678 
7679   // No saturation.
7680   __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
7681   __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
7682   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7683   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7684   ASSERT_EQ(res1, MakeUInt128(0xfff2fe08e5db1820ULL, 0x02f30108e8db1b20ULL));
7685   ASSERT_FALSE(IsQcBitSet(fpsr1));
7686 
7687   // Saturates in the multiplication.
7688   __uint128_t arg4 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
7689   __uint128_t arg5 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
7690   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7691   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7692   ASSERT_EQ(res2, MakeUInt128(0xfe8c9047fe8c9047ULL, 0x81234568fe8c9047ULL));
7693   ASSERT_TRUE(IsQcBitSet(fpsr2));
7694 
7695   // Saturates in the subtraction.
7696   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7697   __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
7698   __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x800000000000b000ULL);
7699   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7700   ASSERT_EQ(res3, MakeUInt128(0x11275380f70f6d98ULL, 0x80000000e4dbc720ULL));
7701   ASSERT_TRUE(IsQcBitSet(fpsr3));
7702 }
7703 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x1)7704 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x1) {
7705   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %d0, %s2, %s3");
7706 
7707   // No saturation.
7708   __uint128_t arg1 = MakeUInt128(0x1100110011223344ULL, 0x7654321076543210ULL);
7709   __uint128_t arg2 = MakeUInt128(0x0000000020000000ULL, 0x0123456701234567ULL);
7710   __uint128_t arg3 = MakeUInt128(0x12345678000000FFULL, 0x0400040004000400ULL);
7711   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7712   ASSERT_EQ(res1, MakeUInt128(0x0debc9a7000000ffULL, 0x0000000000000000ULL));
7713   ASSERT_FALSE(IsQcBitSet(fpsr1));
7714 
7715   // Saturates in the multiplication.
7716   __uint128_t arg4 = MakeUInt128(0x1122334480000000ULL, 0xfeed000300000010ULL);
7717   __uint128_t arg5 = MakeUInt128(0xaabbccdd80000000ULL, 0xfeed000400000020ULL);
7718   __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
7719   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7720   ASSERT_EQ(res2, MakeUInt128(0x9122334411111112ULL, 0x0000000000000000ULL));
7721   ASSERT_TRUE(IsQcBitSet(fpsr2));
7722 
7723   // Saturates in the subtraction.
7724   __uint128_t arg7 = MakeUInt128(0x1122334400111111ULL, 0x7654321076543210ULL);
7725   __uint128_t arg8 = MakeUInt128(0xaabbccdd00222222ULL, 0x0123456701234567ULL);
7726   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7727   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7728   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
7729   ASSERT_TRUE(IsQcBitSet(fpsr3));
7730 }
7731 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x1)7732 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x1) {
7733   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %s0, %h2, %h3");
7734 
7735   // No saturation.
7736   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7737   __uint128_t arg2 = MakeUInt128(0x0000000000000004ULL, 0x0123456701234567ULL);
7738   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7739   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7740   ASSERT_EQ(res1, MakeUInt128(0x0000000000fef100ULL, 0x0000000000000000ULL));
7741   ASSERT_FALSE(IsQcBitSet(fpsr1));
7742 
7743   // Saturates in the multiplication.
7744   __uint128_t arg4 = MakeUInt128(0x1122334411228000ULL, 0xfeed000300000010ULL);
7745   __uint128_t arg5 = MakeUInt128(0xaabbccddaabb8000ULL, 0xfeed000400000020ULL);
7746   __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
7747   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7748   ASSERT_EQ(res2, MakeUInt128(0x0000000091111112ULL, 0x0000000000000000ULL));
7749   ASSERT_TRUE(IsQcBitSet(fpsr2));
7750 
7751   // Saturates in the subtraction.
7752   __uint128_t arg7 = MakeUInt128(0x1122334411220123ULL, 0x7654321076543210ULL);
7753   __uint128_t arg8 = MakeUInt128(0xaabbccddaabb0044ULL, 0x0123456701234567ULL);
7754   __uint128_t arg9 = MakeUInt128(0xaabbccdd80000000ULL, 0x00000a000000b000ULL);
7755   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7756   ASSERT_EQ(res3, MakeUInt128(0x0000000080000000ULL, 0x0000000000000000ULL));
7757   ASSERT_TRUE(IsQcBitSet(fpsr3));
7758 }
7759 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x2IndexedElem)7760 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x2IndexedElem) {
7761   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.2d, %2.2s, %3.s[1]");
7762 
7763   // No saturation.
7764   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7765   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7766   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7767   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7768   ASSERT_EQ(res1, MakeUInt128(0x010000fef0fef100ULL, 0x040003ff7bff7c00ULL));
7769   ASSERT_FALSE(IsQcBitSet(fpsr1));
7770 
7771   // Saturates in the multiplication.
7772   __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7773   __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
7774   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7775   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7776   ASSERT_EQ(res2, MakeUInt128(0x0000080400000900ULL, 0x80000a000000b001ULL));
7777   ASSERT_TRUE(IsQcBitSet(fpsr2));
7778 
7779   // Saturates in the subtraction.
7780   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7781   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7782   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7783   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7784   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x000009ff78002800ULL));
7785   ASSERT_TRUE(IsQcBitSet(fpsr3));
7786 }
7787 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x4IndexedElem)7788 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x4IndexedElem) {
7789   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.4s, %2.4h, %3.h[7]");
7790 
7791   // No saturation.
7792   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7793   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7794   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7795   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7796   ASSERT_EQ(res1, MakeUInt128(0x0117d9c3899bd1bfULL, 0xfeda700c764d56f8ULL));
7797   ASSERT_FALSE(IsQcBitSet(fpsr1));
7798 
7799   // Saturates in the multiplication.
7800   __uint128_t arg4 = MakeUInt128(0x80000123456789a4ULL, 0xfeed000300000010ULL);
7801   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
7802   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7803   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7804   ASSERT_EQ(res2, MakeUInt128(0x468a45678ac74567ULL, 0x8123456802464567ULL));
7805   ASSERT_TRUE(IsQcBitSet(fpsr2));
7806 
7807   // Saturates in the subtraction.
7808   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7809   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
7810   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7811   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7812   ASSERT_EQ(res3, MakeUInt128(0x80000000ffb2b400ULL, 0xffd96400ffda0a00ULL));
7813   ASSERT_TRUE(IsQcBitSet(fpsr3));
7814 }
7815 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper64x2IndexedElem)7816 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper64x2IndexedElem) {
7817   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.2d, %2.4s, %3.s[3]");
7818 
7819   // No saturation.
7820   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7821   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7822   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7823   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7824   ASSERT_EQ(res1, MakeUInt128(0xfff2bd6d95e31820ULL, 0x02f2c06d98e31b20ULL));
7825   ASSERT_FALSE(IsQcBitSet(fpsr1));
7826 
7827   // Saturates in the multiplication.
7828   __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0x1122334480000000ULL);
7829   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000000011223344ULL);
7830   __uint128_t arg6 = MakeUInt128(0x0101010102020202ULL, 0x0303030304040404ULL);
7831   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7832   ASSERT_EQ(res2, MakeUInt128(0x8101010102020203ULL, 0x1425364704040404ULL));
7833   ASSERT_TRUE(IsQcBitSet(fpsr2));
7834 
7835   // Saturates in the subtraction.
7836   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7837   __uint128_t arg8 = MakeUInt128(0x1122334444332211ULL, 0x0123456701234567ULL);
7838   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7839   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7840   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0xfef2c66d94e3c720ULL));
7841   ASSERT_TRUE(IsQcBitSet(fpsr3));
7842 }
7843 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper32x4IndexedElem)7844 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper32x4IndexedElem) {
7845   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.4s, %2.8h, %3.h[7]");
7846 
7847   // No saturation.
7848   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7849   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7850   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7851   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7852   ASSERT_EQ(res1, MakeUInt128(0x0016426f8939fd8fULL, 0xfdcfb7a075e261b0ULL));
7853   ASSERT_FALSE(IsQcBitSet(fpsr1));
7854 
7855   // Saturates in the multiplication.
7856   __uint128_t arg4 = MakeUInt128(0x0011223344556677ULL, 0xfeedfeedfeed8000ULL);
7857   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
7858   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7859   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7860   ASSERT_EQ(res2, MakeUInt128(0x0010456781234568ULL, 0x0010456700104567ULL));
7861   ASSERT_TRUE(IsQcBitSet(fpsr2));
7862 
7863   // Saturates in the subtraction.
7864   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7865   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
7866   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7867   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7868   ASSERT_EQ(res3, MakeUInt128(0x80000000ff8e2fa0ULL, 0xfef30708ff8edfa0ULL));
7869   ASSERT_TRUE(IsQcBitSet(fpsr3));
7870 }
7871 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x1IndexedElem)7872 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x1IndexedElem) {
7873   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %d0, %s2, %3.s[3]");
7874 
7875   // No saturation.
7876   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7877   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7878   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7879   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7880   ASSERT_EQ(res1, MakeUInt128(0x0117d6fa42d7d57fULL, 0x0ULL));
7881   ASSERT_FALSE(IsQcBitSet(fpsr1));
7882 
7883   // Saturates in the multiplication.
7884   __uint128_t arg4 = MakeUInt128(0x0011223380000000ULL, 0xfeedfeedfeed8000ULL);
7885   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x80000000ba123456ULL);
7886   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7887   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7888   ASSERT_EQ(res2, MakeUInt128(0x8123456701234568ULL, 0x0ULL));
7889   ASSERT_TRUE(IsQcBitSet(fpsr2));
7890 
7891   // Saturates in the subtraction.
7892   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7893   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
7894   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
7895   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7896   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x0ULL));
7897   ASSERT_TRUE(IsQcBitSet(fpsr3));
7898 }
7899 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x1IndexedElem)7900 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x1IndexedElem) {
7901   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %s0, %h2, %3.h[7]");
7902 
7903   // No saturation.
7904   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
7905   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
7906   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7907   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
7908   ASSERT_EQ(res1, MakeUInt128(0x00000000899bd1bfULL, 0x0ULL));
7909   ASSERT_FALSE(IsQcBitSet(fpsr1));
7910 
7911   // Saturates in the multiplication.
7912   __uint128_t arg4 = MakeUInt128(0x0011223344558000ULL, 0xfeedfeedfeed1234ULL);
7913   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
7914   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7915   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
7916   ASSERT_EQ(res2, MakeUInt128(0x0000000081234568ULL, 0x0ULL));
7917   ASSERT_TRUE(IsQcBitSet(fpsr2));
7918 
7919   // Saturates in the subtraction.
7920   __uint128_t arg7 = MakeUInt128(0xaabbccddeeff2200ULL, 0x7654321076543210ULL);
7921   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x0123aabbccddeeffULL);
7922   __uint128_t arg9 = MakeUInt128(0xaabbccdd80000000ULL, 0x0011223344556677ULL);
7923   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
7924   ASSERT_EQ(res3, MakeUInt128(0x0000000080000000ULL, 0x0ULL));
7925   ASSERT_TRUE(IsQcBitSet(fpsr3));
7926 }
7927 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x4)7928 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x4) {
7929   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4s, %2.4s, %3.4s");
7930 
7931   __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
7932   __uint128_t arg2 = MakeU32x4(0x00000008UL, 0x00000002UL, 0x7eed0004UL, 0x00000002UL);
7933   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
7934   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7ddc4ed9UL, 0x0UL));
7935   ASSERT_FALSE(IsQcBitSet(fpsr1));
7936 
7937   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
7938   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xfeed0004UL, 0x00000002UL);
7939   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
7940   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x00024ed2UL, 0x0UL));
7941   ASSERT_TRUE(IsQcBitSet(fpsr2));
7942 }
7943 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x2)7944 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x2) {
7945   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.2s, %2.2s, %3.2s");
7946 
7947   __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
7948   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
7949   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
7950   ASSERT_EQ(res1, MakeU32x4(0x3, 0x0UL, 0x0UL, 0x0UL));
7951   ASSERT_FALSE(IsQcBitSet(fpsr1));
7952 
7953   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
7954   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
7955   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
7956   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
7957   ASSERT_TRUE(IsQcBitSet(fpsr2));
7958 }
7959 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x8)7960 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x8) {
7961   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.8h, %2.8h, %3.8h");
7962 
7963   __uint128_t arg1 = MakeUInt128(0x200000017fff1111ULL, 0x7eed000300000010ULL);
7964   __uint128_t arg2 = MakeUInt128(0x0008000840000000ULL, 0x7eed000400000002ULL);
7965   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
7966   ASSERT_EQ(res1, MakeUInt128(0x0002000040000000ULL, 0x7ddc000000000000ULL));
7967   ASSERT_FALSE(IsQcBitSet(fpsr1));
7968 
7969   __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xfeed0003ffff0010ULL);
7970   __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xfeed0004ffff0002ULL);
7971   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
7972   ASSERT_EQ(res2, MakeUInt128(0x7fff000100020000ULL, 0x0002000000000000ULL));
7973   ASSERT_TRUE(IsQcBitSet(fpsr2));
7974 }
7975 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x4)7976 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x4) {
7977   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4h, %2.4h, %3.4h");
7978 
7979   __uint128_t arg1 = MakeUInt128(0x555500017fff1111ULL, 0xdeadc0dedeadc0deULL);
7980   __uint128_t arg2 = MakeUInt128(0x0004000840000000ULL, 0xdeadc0dedeadc0deULL);
7981   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
7982   ASSERT_EQ(res1, MakeUInt128(0x0003000040000000ULL, 0x0000000000000000ULL));
7983   ASSERT_FALSE(IsQcBitSet(fpsr1));
7984 
7985   __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xdeadc0dedeadc0deULL);
7986   __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xdeadc0dedeadc0deULL);
7987   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
7988   ASSERT_EQ(res2, MakeUInt128(0x7fff000100020000ULL, 0x0000000000000000ULL));
7989   ASSERT_TRUE(IsQcBitSet(fpsr2));
7990 }
7991 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x4IndexedElem)7992 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x4IndexedElem) {
7993   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4s, %2.4s, %3.s[0]");
7994 
7995   __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003, 0x00000010UL);
7996   __uint128_t arg2 = MakeU32x4(0x00000008UL, 0xfeedfeedUL, 0xfeedfeed, 0xfeedfeedUL);
7997   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
7998   // Without rounding, result should be 7 instead of 8.
7999   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x8UL, 0x0UL));
8000   ASSERT_FALSE(IsQcBitSet(fpsr1));
8001 
8002   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8003   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8004   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8005   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0112fffdUL, 0xfffffff0UL));
8006   ASSERT_TRUE(IsQcBitSet(fpsr2));
8007 }
8008 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x2IndexedElem)8009 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x2IndexedElem) {
8010   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.2s, %2.2s, %3.s[0]");
8011 
8012   __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8013   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8014   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8015   ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8016   ASSERT_FALSE(IsQcBitSet(fpsr1));
8017 
8018   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8019   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8020   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8021   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0UL, 0x0UL));
8022   ASSERT_TRUE(IsQcBitSet(fpsr2));
8023 }
8024 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x8IndexedElem)8025 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x8IndexedElem) {
8026   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.8h, %2.8h, %3.h[7]");
8027 
8028   __uint128_t arg1 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8029   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0008feedfeedfeedULL);
8030   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8031   ASSERT_EQ(res1, MakeUInt128(0x0008fff800040000ULL, 0x0000000800020004ULL));
8032   ASSERT_FALSE(IsQcBitSet(fpsr1));
8033 
8034   __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8035   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8036   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8037   ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x02008800e000bfffULL));
8038   ASSERT_TRUE(IsQcBitSet(fpsr2));
8039 }
8040 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x4IndexedElem)8041 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x4IndexedElem) {
8042   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4h, %2.4h, %3.h[7]");
8043 
8044   __uint128_t arg1 = MakeUInt128(0x7fff800055550000ULL, 0xdeadc0dedeadc0deULL);
8045   __uint128_t arg2 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x0004c0dedeadc0deULL);
8046   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8047   ASSERT_EQ(res1, MakeUInt128(0x0004fffc00030000ULL, 0x0000000000000000ULL));
8048   ASSERT_FALSE(IsQcBitSet(fpsr1));
8049 
8050   __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xdeadc0dedeadc0deULL);
8051   __uint128_t arg4 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x8000c0dedeadc0deULL);
8052   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8053   ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x0000000000000000ULL));
8054   ASSERT_TRUE(IsQcBitSet(fpsr2));
8055 }
8056 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x1)8057 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x1) {
8058   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %s0, %s2, %s3");
8059 
8060   __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8061   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8062   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8063   // Without roundings, result should be 2 instead of 3.
8064   ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8065   ASSERT_FALSE(IsQcBitSet(fpsr1));
8066 
8067   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8068   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8069   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8070   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8071   ASSERT_TRUE(IsQcBitSet(fpsr2));
8072 }
8073 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x1)8074 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x1) {
8075   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %h0, %h2, %h3");
8076 
8077   __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8078   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeed0004ULL, 0xfeedfeedfeedfeedULL);
8079   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8080   ASSERT_EQ(res1, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
8081   ASSERT_FALSE(IsQcBitSet(fpsr1));
8082 
8083   __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8084   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8085   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8086   ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8087   ASSERT_TRUE(IsQcBitSet(fpsr2));
8088 }
8089 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x1IndexedElem)8090 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x1IndexedElem) {
8091   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %s0, %s2, %3.s[2]");
8092 
8093   __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8094   __uint128_t arg2 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x00000004UL, 0xfeedfeedUL);
8095   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8096   // Without rounding, result should be 2 instead of 3.
8097   ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8098   ASSERT_FALSE(IsQcBitSet(fpsr1));
8099 
8100   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8101   __uint128_t arg4 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x80000000UL, 0xfeedfeedUL);
8102   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8103   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8104   ASSERT_TRUE(IsQcBitSet(fpsr2));
8105 }
8106 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x1IndexedElem)8107 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x1IndexedElem) {
8108   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %h0, %h2, %3.h[7]");
8109 
8110   __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8111   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0004feedfeedfeedULL);
8112   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8113   // Without rounding, result should be 2 instead of 3.
8114   ASSERT_EQ(res1, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
8115   ASSERT_FALSE(IsQcBitSet(fpsr1));
8116 
8117   __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8118   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8119   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8120   ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8121   ASSERT_TRUE(IsQcBitSet(fpsr2));
8122 }
8123 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x4)8124 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x4) {
8125   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4s, %2.4s, %3.4s");
8126 
8127   __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8128   __uint128_t arg2 = MakeU32x4(0x00000008UL, 0x00000002UL, 0x7eed0004UL, 0x00000002UL);
8129   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8130   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7ddc4ed8UL, 0x0UL));
8131   ASSERT_FALSE(IsQcBitSet(fpsr1));
8132 
8133   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8134   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xfeed0004UL, 0x00000002UL);
8135   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8136   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x00024ed1UL, 0x0UL));
8137   ASSERT_TRUE(IsQcBitSet(fpsr2));
8138 }
8139 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x2)8140 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x2) {
8141   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.2s, %2.2s, %3.2s");
8142 
8143   __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8144   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8145   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8146   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8147   ASSERT_FALSE(IsQcBitSet(fpsr1));
8148 
8149   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8150   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8151   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8152   ASSERT_EQ(res2, MakeU32x4(0x7fffffff, 0x0UL, 0x0UL, 0x0UL));
8153   ASSERT_TRUE(IsQcBitSet(fpsr2));
8154 }
8155 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x8)8156 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x8) {
8157   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.8h, %2.8h, %3.8h");
8158 
8159   __uint128_t arg1 = MakeUInt128(0x200000017fff1111ULL, 0x7eed000300000010ULL);
8160   __uint128_t arg2 = MakeUInt128(0x0008000840000000ULL, 0x7eed000400000002ULL);
8161   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8162   ASSERT_EQ(res1, MakeUInt128(0x000200003fff0000ULL, 0x7ddc000000000000ULL));
8163   ASSERT_FALSE(IsQcBitSet(fpsr1));
8164 
8165   __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xfeed0003ffff0010ULL);
8166   __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xfeed0004ffff0002ULL);
8167   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8168   ASSERT_EQ(res2, MakeUInt128(0x7fff000000020000ULL, 0x0002000000000000ULL));
8169   ASSERT_TRUE(IsQcBitSet(fpsr2));
8170 }
8171 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x4)8172 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x4) {
8173   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4h, %2.4h, %3.4h");
8174 
8175   __uint128_t arg1 = MakeUInt128(0x555500017fff1111ULL, 0xdeadc0dedeadc0deULL);
8176   __uint128_t arg2 = MakeUInt128(0x0004000840000000ULL, 0xdeadc0dedeadc0deULL);
8177   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8178   ASSERT_EQ(res1, MakeUInt128(0x000200003fff0000ULL, 0x0000000000000000ULL));
8179   ASSERT_FALSE(IsQcBitSet(fpsr1));
8180 
8181   __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xdeadc0dedeadc0deULL);
8182   __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xdeadc0dedeadc0deULL);
8183   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8184   ASSERT_EQ(res2, MakeUInt128(0x7fff000000020000ULL, 0x0000000000000000ULL));
8185   ASSERT_TRUE(IsQcBitSet(fpsr2));
8186 }
8187 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x4IndexedElem)8188 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x4IndexedElem) {
8189   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4s, %2.4s, %3.s[0]");
8190 
8191   __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8192   __uint128_t arg2 = MakeU32x4(0x00000008UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8193   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8194   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7UL, 0x0UL));
8195   ASSERT_FALSE(IsQcBitSet(fpsr1));
8196 
8197   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8198   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8199   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8200   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0112fffdUL, 0xfffffff0UL));
8201   ASSERT_TRUE(IsQcBitSet(fpsr2));
8202 }
8203 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x2IndexedElem)8204 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x2IndexedElem) {
8205   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.2s, %2.2s, %3.s[0]");
8206 
8207   __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8208   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8209   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8210   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8211   ASSERT_FALSE(IsQcBitSet(fpsr1));
8212 
8213   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8214   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8215   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8216   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0UL, 0x0UL));
8217   ASSERT_TRUE(IsQcBitSet(fpsr2));
8218 }
8219 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x8IndexedElem)8220 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x8IndexedElem) {
8221   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.8h, %2.8h, %3.h[7]");
8222 
8223   __uint128_t arg1 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8224   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0008feedfeedfeedULL);
8225   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8226   ASSERT_EQ(res1, MakeUInt128(0x0007fff800040000ULL, 0xffff000700020004ULL));
8227   ASSERT_FALSE(IsQcBitSet(fpsr1));
8228 
8229   __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8230   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8231   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8232   ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x02008800e000bfffULL));
8233   ASSERT_TRUE(IsQcBitSet(fpsr2));
8234 }
8235 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x4IndexedElem)8236 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x4IndexedElem) {
8237   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4h, %2.4h, %3.h[7]");
8238 
8239   __uint128_t arg1 = MakeUInt128(0x7fff800055550000ULL, 0xdeadc0dedeadc0deULL);
8240   __uint128_t arg2 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x0004c0dedeadc0deULL);
8241   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8242   ASSERT_EQ(res1, MakeUInt128(0x0003fffc00020000ULL, 0x0000000000000000ULL));
8243   ASSERT_FALSE(IsQcBitSet(fpsr1));
8244 
8245   __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xdeadc0dedeadc0deULL);
8246   __uint128_t arg4 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x8000c0dedeadc0deULL);
8247   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8248   ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x0000000000000000ULL));
8249   ASSERT_TRUE(IsQcBitSet(fpsr2));
8250 }
8251 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x1)8252 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x1) {
8253   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %s0, %s2, %s3");
8254 
8255   __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8256   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8257   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8258   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0ULL));
8259   ASSERT_FALSE(IsQcBitSet(fpsr1));
8260 
8261   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8262   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8263   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8264   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8265   ASSERT_TRUE(IsQcBitSet(fpsr2));
8266 }
8267 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x1)8268 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x1) {
8269   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %h0, %h2, %h3");
8270 
8271   __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8272   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeed0004ULL, 0xfeedfeedfeedfeedULL);
8273   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8274   ASSERT_EQ(res1, MakeUInt128(0x0000000000000002ULL, 0x0ULL));
8275   ASSERT_FALSE(IsQcBitSet(fpsr1));
8276 
8277   __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8278   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8279   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8280   ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8281   ASSERT_TRUE(IsQcBitSet(fpsr2));
8282 }
8283 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x1IndexedElem)8284 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x1IndexedElem) {
8285   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %s0, %s2, %3.s[2]");
8286 
8287   __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8288   __uint128_t arg2 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x00000004UL, 0xfeedfeedUL);
8289   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8290   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8291   ASSERT_FALSE(IsQcBitSet(fpsr1));
8292 
8293   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8294   __uint128_t arg4 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x80000000UL, 0xfeedfeedUL);
8295   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8296   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8297   ASSERT_TRUE(IsQcBitSet(fpsr2));
8298 }
8299 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x1IndexedElem)8300 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x1IndexedElem) {
8301   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %h0, %h2, %3.h[7]");
8302 
8303   __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8304   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0004feedfeedfeedULL);
8305   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8306   ASSERT_EQ(res1, MakeUInt128(0x0000000000000002ULL, 0x0ULL));
8307   ASSERT_FALSE(IsQcBitSet(fpsr1));
8308 
8309   __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8310   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8311   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8312   ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8313   ASSERT_TRUE(IsQcBitSet(fpsr2));
8314 }
8315 
8316 class FpcrBitSupport : public testing::TestWithParam<uint64_t> {};
8317 
TEST_P(FpcrBitSupport,SupportsBit)8318 TEST_P(FpcrBitSupport, SupportsBit) {
8319   uint64_t fpcr1;
8320   asm("msr fpcr, %x1\n\t"
8321       "mrs %x0, fpcr"
8322       : "=r"(fpcr1)
8323       : "r"(static_cast<uint64_t>(GetParam())));
8324   ASSERT_EQ(fpcr1, GetParam()) << "Should be able to set then get FPCR bit: " << GetParam();
8325 };
8326 
8327 // Note: The exception enablement flags (such as IOE) are not checked, because when tested on actual
8328 // ARM64 device we find that the tests fail either because they cannot be written or are RAZ (read
8329 // as zero).
8330 INSTANTIATE_TEST_SUITE_P(Arm64InsnTest,
8331                          FpcrBitSupport,
8332                          testing::Values(kFpcrRModeTieEven,
8333                                          kFpcrRModeZero,
8334                                          kFpcrRModeNegInf,
8335                                          kFpcrRModePosInf,
8336                                          kFpcrFzBit,
8337                                          kFpcrDnBit,
8338                                          0));
8339 
8340 class FpsrBitSupport : public testing::TestWithParam<uint64_t> {};
8341 
TEST_P(FpsrBitSupport,SupportsBit)8342 TEST_P(FpsrBitSupport, SupportsBit) {
8343   uint64_t fpsr1;
8344   asm("msr fpsr, %1\n\t"
8345       "mrs %0, fpsr"
8346       : "=r"(fpsr1)
8347       : "r"(static_cast<uint64_t>(GetParam())));
8348   ASSERT_EQ(fpsr1, GetParam()) << "Should be able to set then get FPSR bit";
8349 };
8350 
8351 INSTANTIATE_TEST_SUITE_P(Arm64InsnTest,
8352                          FpsrBitSupport,
8353                          testing::Values(kFpsrIocBit,
8354                                          kFpsrDzcBit,
8355                                          kFpsrOfcBit,
8356                                          kFpsrUfcBit,
8357                                          kFpsrIxcBit,
8358                                          kFpsrIdcBit,
8359                                          kFpsrQcBit));
8360 
TEST(Arm64InsnTest,UnsignedDivide64)8361 TEST(Arm64InsnTest, UnsignedDivide64) {
8362   auto udiv64 = [](uint64_t num, uint64_t den) {
8363     uint64_t result;
8364     asm("udiv %0, %1, %2" : "=r"(result) : "r"(num), "r"(den));
8365     return result;
8366   };
8367   ASSERT_EQ(udiv64(0x8'0000'0000ULL, 2ULL), 0x4'0000'0000ULL) << "Division is 64-bit.";
8368   ASSERT_EQ(udiv64(123ULL, 0ULL), 0ULL) << "Div by 0 results in 0.";
8369 }
8370 
TEST(Arm64InsnTest,AesEncode)8371 TEST(Arm64InsnTest, AesEncode) {
8372   __uint128_t arg = MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL);
8373   __uint128_t key = MakeUInt128(0xaaaa'bbbb'cccc'ddddULL, 0xeeee'ffff'0000'9999ULL);
8374   __uint128_t res;
8375   asm("aese %0.16b, %2.16b" : "=w"(res) : "0"(arg), "w"(key));
8376   ASSERT_EQ(res, MakeUInt128(0x16ea'82ee'eaf5'eeeeULL, 0xf5ea'eeee'ea16'ee82ULL));
8377 }
8378 
TEST(Arm64InsnTest,AesMixColumns)8379 TEST(Arm64InsnTest, AesMixColumns) {
8380   __uint128_t arg = MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL);
8381   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("aesmc %0.16b, %1.16b")(arg);
8382   ASSERT_EQ(res, MakeUInt128(0x77114422dd33aa44ULL, 0x3355006692776d88ULL));
8383 }
8384 
TEST(Arm64InsnTest,AesDecode)8385 TEST(Arm64InsnTest, AesDecode) {
8386   // Check that it's opposite to AesEncode with extra XORs.
8387   __uint128_t arg = MakeUInt128(0x16ea'82ee'eaf5'eeeeULL, 0xf5ea'eeee'ea16'ee82ULL);
8388   __uint128_t key = MakeUInt128(0xaaaa'bbbb'cccc'ddddULL, 0xeeee'ffff'0000'9999ULL);
8389   arg ^= key;
8390   __uint128_t res;
8391   asm("aesd %0.16b, %2.16b" : "=w"(res) : "0"(arg), "w"(key));
8392   ASSERT_EQ(res ^ key, MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL));
8393 }
8394 
TEST(Arm64InsnTest,AesInverseMixColumns)8395 TEST(Arm64InsnTest, AesInverseMixColumns) {
8396   __uint128_t arg = MakeUInt128(0x77114422dd33aa44ULL, 0x3355006692776d88ULL);
8397   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("aesimc %0.16b, %1.16b")(arg);
8398   ASSERT_EQ(res, MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL));
8399 }
8400 
8401 }  // namespace
8402