1 // Copyright 2019, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include <sys/mman.h>
28 
29 #include <cfloat>
30 #include <cmath>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34 
35 #include "test-runner.h"
36 #include "test-utils.h"
37 #include "aarch64/test-utils-aarch64.h"
38 
39 #include "aarch64/cpu-aarch64.h"
40 #include "aarch64/disasm-aarch64.h"
41 #include "aarch64/macro-assembler-aarch64.h"
42 #include "aarch64/simulator-aarch64.h"
43 #include "test-assembler-aarch64.h"
44 
45 namespace vixl {
46 namespace aarch64 {
47 
TEST(load_store_b)48 TEST(load_store_b) {
49   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
50 
51   uint8_t src[3] = {0x12, 0x23, 0x34};
52   uint8_t dst[3] = {0, 0, 0};
53   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
54   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
55 
56   START();
57   __ Mov(x17, src_base);
58   __ Mov(x18, dst_base);
59   __ Mov(x19, src_base);
60   __ Mov(x20, dst_base);
61   __ Mov(x21, src_base);
62   __ Mov(x22, dst_base);
63   __ Ldr(b0, MemOperand(x17, sizeof(src[0])));
64   __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex));
65   __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex));
66   __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
67   __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
68   __ Str(b2, MemOperand(x22, sizeof(dst[0])));
69   END();
70 
71   if (CAN_RUN()) {
72     RUN();
73 
74     ASSERT_EQUAL_128(0, 0x23, q0);
75     ASSERT_EQUAL_64(0x23, dst[0]);
76     ASSERT_EQUAL_128(0, 0x12, q1);
77     ASSERT_EQUAL_64(0x12, dst[2]);
78     ASSERT_EQUAL_128(0, 0x34, q2);
79     ASSERT_EQUAL_64(0x34, dst[1]);
80     ASSERT_EQUAL_64(src_base, x17);
81     ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
82     ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
83     ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
84     ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
85     ASSERT_EQUAL_64(dst_base, x22);
86   }
87 }
88 
89 
TEST(load_store_h)90 TEST(load_store_h) {
91   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
92 
93   uint16_t src[3] = {0x1234, 0x2345, 0x3456};
94   uint16_t dst[3] = {0, 0, 0};
95   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
96   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
97 
98   START();
99   __ Mov(x17, src_base);
100   __ Mov(x18, dst_base);
101   __ Mov(x19, src_base);
102   __ Mov(x20, dst_base);
103   __ Mov(x21, src_base);
104   __ Mov(x22, dst_base);
105   __ Ldr(h0, MemOperand(x17, sizeof(src[0])));
106   __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex));
107   __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex));
108   __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
109   __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
110   __ Str(h2, MemOperand(x22, sizeof(dst[0])));
111   END();
112 
113   if (CAN_RUN()) {
114     RUN();
115 
116     ASSERT_EQUAL_128(0, 0x2345, q0);
117     ASSERT_EQUAL_64(0x2345, dst[0]);
118     ASSERT_EQUAL_128(0, 0x1234, q1);
119     ASSERT_EQUAL_64(0x1234, dst[2]);
120     ASSERT_EQUAL_128(0, 0x3456, q2);
121     ASSERT_EQUAL_64(0x3456, dst[1]);
122     ASSERT_EQUAL_64(src_base, x17);
123     ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
124     ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
125     ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
126     ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
127     ASSERT_EQUAL_64(dst_base, x22);
128   }
129 }
130 
131 
TEST(load_store_q)132 TEST(load_store_q) {
133   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
134 
135   uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, 0x01, 0x23,
136                      0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x21, 0x43, 0x65, 0x87,
137                      0xa9, 0xcb, 0xed, 0x0f, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc,
138                      0xde, 0xf0, 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02,
139                      0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20};
140 
141   uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
142   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
143   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
144 
145   START();
146   __ Mov(x17, src_base);
147   __ Mov(x18, dst_base);
148   __ Mov(x19, src_base);
149   __ Mov(x20, dst_base);
150   __ Mov(x21, src_base);
151   __ Mov(x22, dst_base);
152   __ Ldr(q0, MemOperand(x17, 16));
153   __ Str(q0, MemOperand(x18, 16, PostIndex));
154   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
155   __ Str(q1, MemOperand(x20, 32, PreIndex));
156   __ Ldr(q2, MemOperand(x21, 32, PreIndex));
157   __ Str(q2, MemOperand(x22, 16));
158   END();
159 
160   if (CAN_RUN()) {
161     RUN();
162 
163     ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0);
164     ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]);
165     ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]);
166     ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1);
167     ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]);
168     ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]);
169     ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2);
170     ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]);
171     ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]);
172     ASSERT_EQUAL_64(src_base, x17);
173     ASSERT_EQUAL_64(dst_base + 16, x18);
174     ASSERT_EQUAL_64(src_base + 16, x19);
175     ASSERT_EQUAL_64(dst_base + 32, x20);
176     ASSERT_EQUAL_64(src_base + 32, x21);
177     ASSERT_EQUAL_64(dst_base, x22);
178   }
179 }
180 
181 
TEST(load_store_v_regoffset)182 TEST(load_store_v_regoffset) {
183   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
184 
185   uint8_t src[64];
186   for (unsigned i = 0; i < sizeof(src); i++) {
187     src[i] = i;
188   }
189   uint8_t dst[64];
190   memset(dst, 0, sizeof(dst));
191 
192   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
193   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
194 
195   START();
196   __ Mov(x17, src_base + 16);
197   __ Mov(x18, 1);
198   __ Mov(w19, -1);
199   __ Mov(x20, dst_base - 1);
200 
201   __ Ldr(b0, MemOperand(x17, x18));
202   __ Ldr(b1, MemOperand(x17, x19, SXTW));
203 
204   __ Ldr(h2, MemOperand(x17, x18));
205   __ Ldr(h3, MemOperand(x17, x18, UXTW, 1));
206   __ Ldr(h4, MemOperand(x17, x19, SXTW, 1));
207   __ Ldr(h5, MemOperand(x17, x18, LSL, 1));
208 
209   __ Ldr(s16, MemOperand(x17, x18));
210   __ Ldr(s17, MemOperand(x17, x18, UXTW, 2));
211   __ Ldr(s18, MemOperand(x17, x19, SXTW, 2));
212   __ Ldr(s19, MemOperand(x17, x18, LSL, 2));
213 
214   __ Ldr(d20, MemOperand(x17, x18));
215   __ Ldr(d21, MemOperand(x17, x18, UXTW, 3));
216   __ Ldr(d22, MemOperand(x17, x19, SXTW, 3));
217   __ Ldr(d23, MemOperand(x17, x18, LSL, 3));
218 
219   __ Ldr(q24, MemOperand(x17, x18));
220   __ Ldr(q25, MemOperand(x17, x18, UXTW, 4));
221   __ Ldr(q26, MemOperand(x17, x19, SXTW, 4));
222   __ Ldr(q27, MemOperand(x17, x18, LSL, 4));
223 
224   // Store [bhsdq]27 to adjacent memory locations, then load again to check.
225   __ Str(b27, MemOperand(x20, x18));
226   __ Str(h27, MemOperand(x20, x18, UXTW, 1));
227   __ Add(x20, x20, 8);
228   __ Str(s27, MemOperand(x20, x19, SXTW, 2));
229   __ Sub(x20, x20, 8);
230   __ Str(d27, MemOperand(x20, x18, LSL, 3));
231   __ Add(x20, x20, 32);
232   __ Str(q27, MemOperand(x20, x19, SXTW, 4));
233 
234   __ Sub(x20, x20, 32);
235   __ Ldr(q6, MemOperand(x20, x18));
236   __ Ldr(q7, MemOperand(x20, x18, LSL, 4));
237 
238   END();
239 
240   if (CAN_RUN()) {
241     RUN();
242 
243     ASSERT_EQUAL_128(0, 0x11, q0);
244     ASSERT_EQUAL_128(0, 0x0f, q1);
245     ASSERT_EQUAL_128(0, 0x1211, q2);
246     ASSERT_EQUAL_128(0, 0x1312, q3);
247     ASSERT_EQUAL_128(0, 0x0f0e, q4);
248     ASSERT_EQUAL_128(0, 0x1312, q5);
249     ASSERT_EQUAL_128(0, 0x14131211, q16);
250     ASSERT_EQUAL_128(0, 0x17161514, q17);
251     ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18);
252     ASSERT_EQUAL_128(0, 0x17161514, q19);
253     ASSERT_EQUAL_128(0, 0x1817161514131211, q20);
254     ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21);
255     ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22);
256     ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23);
257     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24);
258     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25);
259     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26);
260     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27);
261     ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6);
262     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7);
263   }
264 }
265 
TEST(ldp_stp_quad)266 TEST(ldp_stp_quad) {
267   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
268 
269   uint64_t src[4] = {0x0123456789abcdef,
270                      0xaaaaaaaa55555555,
271                      0xfedcba9876543210,
272                      0x55555555aaaaaaaa};
273   uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
274   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
275   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
276 
277   START();
278   __ Mov(x16, src_base);
279   __ Mov(x17, dst_base);
280   __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex));
281   __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex));
282   END();
283 
284   if (CAN_RUN()) {
285     RUN();
286 
287     ASSERT_EQUAL_128(0xaaaaaaaa55555555, 0x0123456789abcdef, q31);
288     ASSERT_EQUAL_128(0x55555555aaaaaaaa, 0xfedcba9876543210, q0);
289     ASSERT_EQUAL_64(0, dst[0]);
290     ASSERT_EQUAL_64(0, dst[1]);
291     ASSERT_EQUAL_64(0xfedcba9876543210, dst[2]);
292     ASSERT_EQUAL_64(0x55555555aaaaaaaa, dst[3]);
293     ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
294     ASSERT_EQUAL_64(0xaaaaaaaa55555555, dst[5]);
295     ASSERT_EQUAL_64(src_base + 4 * sizeof(src[0]), x16);
296     ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[1]), x17);
297   }
298 }
299 
TEST(neon_ld1_d)300 TEST(neon_ld1_d) {
301   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
302 
303   uint8_t src[32 + 5];
304   for (unsigned i = 0; i < sizeof(src); i++) {
305     src[i] = i;
306   }
307   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
308 
309   START();
310   __ Mov(x17, src_base);
311   __ Ldr(q2, MemOperand(x17));  // Initialise top 64-bits of Q register.
312   __ Ld1(v2.V8B(), MemOperand(x17));
313   __ Add(x17, x17, 1);
314   __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17));
315   __ Add(x17, x17, 1);
316   __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17));
317   __ Add(x17, x17, 1);
318   __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
319   __ Add(x17, x17, 1);
320   __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
321   __ Add(x17, x17, 1);
322   __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17));
323   END();
324 
325   if (CAN_RUN()) {
326     RUN();
327 
328     ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
329     ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
330     ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
331     ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
332     ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
333     ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
334     ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
335     ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
336     ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
337     ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
338     ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
339     ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
340     ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
341     ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
342     ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
343     ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
344     ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
345     ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
346   }
347 }
348 
349 
TEST(neon_ld1_d_postindex)350 TEST(neon_ld1_d_postindex) {
351   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
352 
353   uint8_t src[32 + 5];
354   for (unsigned i = 0; i < sizeof(src); i++) {
355     src[i] = i;
356   }
357   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
358 
359   START();
360   __ Mov(x17, src_base);
361   __ Mov(x18, src_base + 1);
362   __ Mov(x19, src_base + 2);
363   __ Mov(x20, src_base + 3);
364   __ Mov(x21, src_base + 4);
365   __ Mov(x22, src_base + 5);
366   __ Mov(x23, 1);
367   __ Ldr(q2, MemOperand(x17));  // Initialise top 64-bits of Q register.
368   __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex));
369   __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex));
370   __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex));
371   __ Ld1(v16.V2S(),
372          v17.V2S(),
373          v18.V2S(),
374          v19.V2S(),
375          MemOperand(x20, 32, PostIndex));
376   __ Ld1(v30.V2S(),
377          v31.V2S(),
378          v0.V2S(),
379          v1.V2S(),
380          MemOperand(x21, 32, PostIndex));
381   __ Ld1(v20.V1D(),
382          v21.V1D(),
383          v22.V1D(),
384          v23.V1D(),
385          MemOperand(x22, 32, PostIndex));
386   END();
387 
388   if (CAN_RUN()) {
389     RUN();
390 
391     ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
392     ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
393     ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
394     ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
395     ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
396     ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
397     ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
398     ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
399     ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
400     ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
401     ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
402     ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
403     ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
404     ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
405     ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
406     ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
407     ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
408     ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
409     ASSERT_EQUAL_64(src_base + 1, x17);
410     ASSERT_EQUAL_64(src_base + 1 + 16, x18);
411     ASSERT_EQUAL_64(src_base + 2 + 24, x19);
412     ASSERT_EQUAL_64(src_base + 3 + 32, x20);
413     ASSERT_EQUAL_64(src_base + 4 + 32, x21);
414     ASSERT_EQUAL_64(src_base + 5 + 32, x22);
415   }
416 }
417 
418 
TEST(neon_ld1_q)419 TEST(neon_ld1_q) {
420   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
421 
422   uint8_t src[64 + 4];
423   for (unsigned i = 0; i < sizeof(src); i++) {
424     src[i] = i;
425   }
426   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
427 
428   START();
429   __ Mov(x17, src_base);
430   __ Ld1(v2.V16B(), MemOperand(x17));
431   __ Add(x17, x17, 1);
432   __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17));
433   __ Add(x17, x17, 1);
434   __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17));
435   __ Add(x17, x17, 1);
436   __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17));
437   __ Add(x17, x17, 1);
438   __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
439   END();
440 
441   if (CAN_RUN()) {
442     RUN();
443 
444     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
445     ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
446     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
447     ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
448     ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
449     ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
450     ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
451     ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
452     ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
453     ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
454     ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
455     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
456     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
457     ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
458   }
459 }
460 
461 
TEST(neon_ld1_q_postindex)462 TEST(neon_ld1_q_postindex) {
463   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
464 
465   uint8_t src[64 + 4];
466   for (unsigned i = 0; i < sizeof(src); i++) {
467     src[i] = i;
468   }
469   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
470 
471   START();
472   __ Mov(x17, src_base);
473   __ Mov(x18, src_base + 1);
474   __ Mov(x19, src_base + 2);
475   __ Mov(x20, src_base + 3);
476   __ Mov(x21, src_base + 4);
477   __ Mov(x22, 1);
478   __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex));
479   __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex));
480   __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex));
481   __ Ld1(v16.V4S(),
482          v17.V4S(),
483          v18.V4S(),
484          v19.V4S(),
485          MemOperand(x20, 64, PostIndex));
486   __ Ld1(v30.V2D(),
487          v31.V2D(),
488          v0.V2D(),
489          v1.V2D(),
490          MemOperand(x21, 64, PostIndex));
491   END();
492 
493   if (CAN_RUN()) {
494     RUN();
495 
496     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
497     ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
498     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
499     ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
500     ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
501     ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
502     ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
503     ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
504     ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
505     ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
506     ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
507     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
508     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
509     ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
510     ASSERT_EQUAL_64(src_base + 1, x17);
511     ASSERT_EQUAL_64(src_base + 1 + 32, x18);
512     ASSERT_EQUAL_64(src_base + 2 + 48, x19);
513     ASSERT_EQUAL_64(src_base + 3 + 64, x20);
514     ASSERT_EQUAL_64(src_base + 4 + 64, x21);
515   }
516 }
517 
518 
TEST(neon_ld1_lane)519 TEST(neon_ld1_lane) {
520   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
521 
522   uint8_t src[64];
523   for (unsigned i = 0; i < sizeof(src); i++) {
524     src[i] = i;
525   }
526   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
527 
528   START();
529 
530   // Test loading whole register by element.
531   __ Mov(x17, src_base);
532   for (int i = 15; i >= 0; i--) {
533     __ Ld1(v0.B(), i, MemOperand(x17));
534     __ Add(x17, x17, 1);
535   }
536 
537   __ Mov(x17, src_base);
538   for (int i = 7; i >= 0; i--) {
539     __ Ld1(v1.H(), i, MemOperand(x17));
540     __ Add(x17, x17, 1);
541   }
542 
543   __ Mov(x17, src_base);
544   for (int i = 3; i >= 0; i--) {
545     __ Ld1(v2.S(), i, MemOperand(x17));
546     __ Add(x17, x17, 1);
547   }
548 
549   __ Mov(x17, src_base);
550   for (int i = 1; i >= 0; i--) {
551     __ Ld1(v3.D(), i, MemOperand(x17));
552     __ Add(x17, x17, 1);
553   }
554 
555   // Test loading a single element into an initialised register.
556   __ Mov(x17, src_base);
557   __ Ldr(q4, MemOperand(x17));
558   __ Ld1(v4.B(), 4, MemOperand(x17));
559   __ Ldr(q5, MemOperand(x17));
560   __ Ld1(v5.H(), 3, MemOperand(x17));
561   __ Ldr(q6, MemOperand(x17));
562   __ Ld1(v6.S(), 2, MemOperand(x17));
563   __ Ldr(q7, MemOperand(x17));
564   __ Ld1(v7.D(), 1, MemOperand(x17));
565 
566   END();
567 
568   if (CAN_RUN()) {
569     RUN();
570 
571     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
572     ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1);
573     ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2);
574     ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3);
575     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
576     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
577     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
578     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
579   }
580 }
581 
TEST(neon_ld2_d)582 TEST(neon_ld2_d) {
583   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
584 
585   uint8_t src[64 + 4];
586   for (unsigned i = 0; i < sizeof(src); i++) {
587     src[i] = i;
588   }
589   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
590 
591   START();
592   __ Mov(x17, src_base);
593   __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17));
594   __ Add(x17, x17, 1);
595   __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17));
596   __ Add(x17, x17, 1);
597   __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17));
598   __ Add(x17, x17, 1);
599   __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17));
600   END();
601 
602   if (CAN_RUN()) {
603     RUN();
604 
605     ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
606     ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
607     ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
608     ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5);
609     ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6);
610     ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7);
611     ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31);
612     ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0);
613   }
614 }
615 
TEST(neon_ld2_d_postindex)616 TEST(neon_ld2_d_postindex) {
617   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
618 
619   uint8_t src[32 + 4];
620   for (unsigned i = 0; i < sizeof(src); i++) {
621     src[i] = i;
622   }
623   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
624 
625   START();
626   __ Mov(x17, src_base);
627   __ Mov(x18, src_base + 1);
628   __ Mov(x19, src_base + 2);
629   __ Mov(x20, src_base + 3);
630   __ Mov(x21, src_base + 4);
631   __ Mov(x22, 1);
632   __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex));
633   __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex));
634   __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex));
635   __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex));
636   __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex));
637   END();
638 
639   if (CAN_RUN()) {
640     RUN();
641 
642     ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
643     ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
644     ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
645     ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5);
646     ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6);
647     ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16);
648     ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17);
649     ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31);
650     ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0);
651 
652     ASSERT_EQUAL_64(src_base + 1, x17);
653     ASSERT_EQUAL_64(src_base + 1 + 16, x18);
654     ASSERT_EQUAL_64(src_base + 2 + 16, x19);
655     ASSERT_EQUAL_64(src_base + 3 + 16, x20);
656     ASSERT_EQUAL_64(src_base + 4 + 16, x21);
657   }
658 }
659 
660 
TEST(neon_ld2_q)661 TEST(neon_ld2_q) {
662   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
663 
664   uint8_t src[64 + 4];
665   for (unsigned i = 0; i < sizeof(src); i++) {
666     src[i] = i;
667   }
668   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
669 
670   START();
671   __ Mov(x17, src_base);
672   __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17));
673   __ Add(x17, x17, 1);
674   __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17));
675   __ Add(x17, x17, 1);
676   __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17));
677   __ Add(x17, x17, 1);
678   __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17));
679   __ Add(x17, x17, 1);
680   __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17));
681   END();
682 
683   if (CAN_RUN()) {
684     RUN();
685 
686     ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
687     ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
688     ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
689     ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
690     ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
691     ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
692     ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
693     ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
694     ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
695     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
696   }
697 }
698 
699 
TEST(neon_ld2_q_postindex)700 TEST(neon_ld2_q_postindex) {
701   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
702 
703   uint8_t src[64 + 4];
704   for (unsigned i = 0; i < sizeof(src); i++) {
705     src[i] = i;
706   }
707   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
708 
709   START();
710   __ Mov(x17, src_base);
711   __ Mov(x18, src_base + 1);
712   __ Mov(x19, src_base + 2);
713   __ Mov(x20, src_base + 3);
714   __ Mov(x21, src_base + 4);
715   __ Mov(x22, 1);
716   __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex));
717   __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex));
718   __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex));
719   __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex));
720   __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex));
721   END();
722 
723   if (CAN_RUN()) {
724     RUN();
725 
726     ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
727     ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
728     ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
729     ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
730     ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
731     ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
732     ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
733     ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
734     ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
735     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
736 
737 
738     ASSERT_EQUAL_64(src_base + 1, x17);
739     ASSERT_EQUAL_64(src_base + 1 + 32, x18);
740     ASSERT_EQUAL_64(src_base + 2 + 32, x19);
741     ASSERT_EQUAL_64(src_base + 3 + 32, x20);
742     ASSERT_EQUAL_64(src_base + 4 + 32, x21);
743   }
744 }
745 
746 
TEST(neon_ld2_lane)747 TEST(neon_ld2_lane) {
748   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
749 
750   uint8_t src[64];
751   for (unsigned i = 0; i < sizeof(src); i++) {
752     src[i] = i;
753   }
754   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
755 
756   START();
757 
758   // Test loading whole register by element.
759   __ Mov(x17, src_base);
760   for (int i = 15; i >= 0; i--) {
761     __ Ld2(v0.B(), v1.B(), i, MemOperand(x17));
762     __ Add(x17, x17, 1);
763   }
764 
765   __ Mov(x17, src_base);
766   for (int i = 7; i >= 0; i--) {
767     __ Ld2(v2.H(), v3.H(), i, MemOperand(x17));
768     __ Add(x17, x17, 1);
769   }
770 
771   __ Mov(x17, src_base);
772   for (int i = 3; i >= 0; i--) {
773     __ Ld2(v4.S(), v5.S(), i, MemOperand(x17));
774     __ Add(x17, x17, 1);
775   }
776 
777   __ Mov(x17, src_base);
778   for (int i = 1; i >= 0; i--) {
779     __ Ld2(v6.D(), v7.D(), i, MemOperand(x17));
780     __ Add(x17, x17, 1);
781   }
782 
783   // Test loading a single element into an initialised register.
784   __ Mov(x17, src_base);
785   __ Mov(x4, x17);
786   __ Ldr(q8, MemOperand(x4, 16, PostIndex));
787   __ Ldr(q9, MemOperand(x4));
788   __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17));
789   __ Mov(x5, x17);
790   __ Ldr(q10, MemOperand(x5, 16, PostIndex));
791   __ Ldr(q11, MemOperand(x5));
792   __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17));
793   __ Mov(x6, x17);
794   __ Ldr(q12, MemOperand(x6, 16, PostIndex));
795   __ Ldr(q13, MemOperand(x6));
796   __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17));
797   __ Mov(x7, x17);
798   __ Ldr(q14, MemOperand(x7, 16, PostIndex));
799   __ Ldr(q15, MemOperand(x7));
800   __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17));
801 
802   END();
803 
804   if (CAN_RUN()) {
805     RUN();
806 
807     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
808     ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
809     ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2);
810     ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3);
811     ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4);
812     ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5);
813     ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6);
814     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7);
815     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
816     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
817     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
818     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
819     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
820     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
821     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
822     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
823   }
824 }
825 
826 
TEST(neon_ld2_lane_postindex)827 TEST(neon_ld2_lane_postindex) {
828   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
829 
830   uint8_t src[64];
831   for (unsigned i = 0; i < sizeof(src); i++) {
832     src[i] = i;
833   }
834   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
835 
836   START();
837   __ Mov(x17, src_base);
838   __ Mov(x18, src_base);
839   __ Mov(x19, src_base);
840   __ Mov(x20, src_base);
841   __ Mov(x21, src_base);
842   __ Mov(x22, src_base);
843   __ Mov(x23, src_base);
844   __ Mov(x24, src_base);
845 
846   // Test loading whole register by element.
847   for (int i = 15; i >= 0; i--) {
848     __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex));
849   }
850 
851   for (int i = 7; i >= 0; i--) {
852     __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex));
853   }
854 
855   for (int i = 3; i >= 0; i--) {
856     __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex));
857   }
858 
859   for (int i = 1; i >= 0; i--) {
860     __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex));
861   }
862 
863   // Test loading a single element into an initialised register.
864   __ Mov(x25, 1);
865   __ Mov(x4, x21);
866   __ Ldr(q8, MemOperand(x4, 16, PostIndex));
867   __ Ldr(q9, MemOperand(x4));
868   __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex));
869   __ Add(x25, x25, 1);
870 
871   __ Mov(x5, x22);
872   __ Ldr(q10, MemOperand(x5, 16, PostIndex));
873   __ Ldr(q11, MemOperand(x5));
874   __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex));
875   __ Add(x25, x25, 1);
876 
877   __ Mov(x6, x23);
878   __ Ldr(q12, MemOperand(x6, 16, PostIndex));
879   __ Ldr(q13, MemOperand(x6));
880   __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex));
881   __ Add(x25, x25, 1);
882 
883   __ Mov(x7, x24);
884   __ Ldr(q14, MemOperand(x7, 16, PostIndex));
885   __ Ldr(q15, MemOperand(x7));
886   __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex));
887 
888   END();
889 
890   if (CAN_RUN()) {
891     RUN();
892 
893     ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0);
894     ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1);
895     ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2);
896     ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3);
897     ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4);
898     ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5);
899     ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6);
900     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7);
901     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
902     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
903     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
904     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
905     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
906     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
907     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
908     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
909 
910 
911     ASSERT_EQUAL_64(src_base + 32, x17);
912     ASSERT_EQUAL_64(src_base + 32, x18);
913     ASSERT_EQUAL_64(src_base + 32, x19);
914     ASSERT_EQUAL_64(src_base + 32, x20);
915     ASSERT_EQUAL_64(src_base + 1, x21);
916     ASSERT_EQUAL_64(src_base + 2, x22);
917     ASSERT_EQUAL_64(src_base + 3, x23);
918     ASSERT_EQUAL_64(src_base + 4, x24);
919   }
920 }
921 
922 
TEST(neon_ld2_alllanes)923 TEST(neon_ld2_alllanes) {
924   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
925 
926   uint8_t src[64];
927   for (unsigned i = 0; i < sizeof(src); i++) {
928     src[i] = i;
929   }
930   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
931 
932   START();
933   __ Mov(x17, src_base + 1);
934   __ Mov(x18, 1);
935   __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17));
936   __ Add(x17, x17, 2);
937   __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17));
938   __ Add(x17, x17, 1);
939   __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17));
940   __ Add(x17, x17, 1);
941   __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17));
942   __ Add(x17, x17, 4);
943   __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17));
944   __ Add(x17, x17, 1);
945   __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17));
946   __ Add(x17, x17, 8);
947   __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17));
948   END();
949 
950   if (CAN_RUN()) {
951     RUN();
952 
953     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
954     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
955     ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
956     ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
957     ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
958     ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
959     ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
960     ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
961     ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
962     ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
963     ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
964     ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
965     ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
966     ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
967   }
968 }
969 
970 
TEST(neon_ld2_alllanes_postindex)971 TEST(neon_ld2_alllanes_postindex) {
972   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
973 
974   uint8_t src[64];
975   for (unsigned i = 0; i < sizeof(src); i++) {
976     src[i] = i;
977   }
978   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
979 
980   START();
981   __ Mov(x17, src_base + 1);
982   __ Mov(x18, 1);
983   __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex));
984   __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex));
985   __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex));
986   __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex));
987   __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex));
988   __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex));
989   __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex));
990   END();
991 
992   if (CAN_RUN()) {
993     RUN();
994 
995     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
996     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
997     ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
998     ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
999     ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
1000     ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
1001     ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
1002     ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
1003     ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
1004     ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
1005     ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
1006     ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
1007     ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
1008     ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
1009     ASSERT_EQUAL_64(src_base + 34, x17);
1010   }
1011 }
1012 
1013 
TEST(neon_ld3_d)1014 TEST(neon_ld3_d) {
1015   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1016 
1017   uint8_t src[64 + 4];
1018   for (unsigned i = 0; i < sizeof(src); i++) {
1019     src[i] = i;
1020   }
1021   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1022 
1023   START();
1024   __ Mov(x17, src_base);
1025   __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17));
1026   __ Add(x17, x17, 1);
1027   __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17));
1028   __ Add(x17, x17, 1);
1029   __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17));
1030   __ Add(x17, x17, 1);
1031   __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1032   END();
1033 
1034   if (CAN_RUN()) {
1035     RUN();
1036 
1037     ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
1038     ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
1039     ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
1040     ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
1041     ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
1042     ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
1043     ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
1044     ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
1045     ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
1046     ASSERT_EQUAL_128(0, 0x1211100f06050403, q31);
1047     ASSERT_EQUAL_128(0, 0x161514130a090807, q0);
1048     ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1);
1049   }
1050 }
1051 
1052 
TEST(neon_ld3_d_postindex)1053 TEST(neon_ld3_d_postindex) {
1054   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1055 
1056   uint8_t src[32 + 4];
1057   for (unsigned i = 0; i < sizeof(src); i++) {
1058     src[i] = i;
1059   }
1060   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1061 
1062   START();
1063   __ Mov(x17, src_base);
1064   __ Mov(x18, src_base + 1);
1065   __ Mov(x19, src_base + 2);
1066   __ Mov(x20, src_base + 3);
1067   __ Mov(x21, src_base + 4);
1068   __ Mov(x22, 1);
1069   __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex));
1070   __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex));
1071   __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex));
1072   __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex));
1073   __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex));
1074   END();
1075 
1076   if (CAN_RUN()) {
1077     RUN();
1078 
1079     ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
1080     ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
1081     ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
1082     ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
1083     ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
1084     ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
1085     ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
1086     ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
1087     ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
1088     ASSERT_EQUAL_128(0, 0x1211100f06050403, q11);
1089     ASSERT_EQUAL_128(0, 0x161514130a090807, q12);
1090     ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13);
1091     ASSERT_EQUAL_128(0, 0x1312111007060504, q31);
1092     ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0);
1093     ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1);
1094 
1095     ASSERT_EQUAL_64(src_base + 1, x17);
1096     ASSERT_EQUAL_64(src_base + 1 + 24, x18);
1097     ASSERT_EQUAL_64(src_base + 2 + 24, x19);
1098     ASSERT_EQUAL_64(src_base + 3 + 24, x20);
1099     ASSERT_EQUAL_64(src_base + 4 + 24, x21);
1100   }
1101 }
1102 
1103 
TEST(neon_ld3_q)1104 TEST(neon_ld3_q) {
1105   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1106 
1107   uint8_t src[64 + 4];
1108   for (unsigned i = 0; i < sizeof(src); i++) {
1109     src[i] = i;
1110   }
1111   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1112 
1113   START();
1114   __ Mov(x17, src_base);
1115   __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17));
1116   __ Add(x17, x17, 1);
1117   __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
1118   __ Add(x17, x17, 1);
1119   __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17));
1120   __ Add(x17, x17, 1);
1121   __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17));
1122   __ Add(x17, x17, 1);
1123   __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
1124   END();
1125 
1126   if (CAN_RUN()) {
1127     RUN();
1128 
1129     ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
1130     ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
1131     ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
1132     ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
1133     ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
1134     ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
1135     ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
1136     ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
1137     ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
1138     ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
1139     ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
1140     ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
1141     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
1142     ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
1143     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
1144   }
1145 }
1146 
1147 
TEST(neon_ld3_q_postindex)1148 TEST(neon_ld3_q_postindex) {
1149   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1150 
1151   uint8_t src[64 + 4];
1152   for (unsigned i = 0; i < sizeof(src); i++) {
1153     src[i] = i;
1154   }
1155   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1156 
1157   START();
1158   __ Mov(x17, src_base);
1159   __ Mov(x18, src_base + 1);
1160   __ Mov(x19, src_base + 2);
1161   __ Mov(x20, src_base + 3);
1162   __ Mov(x21, src_base + 4);
1163   __ Mov(x22, 1);
1164 
1165   __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex));
1166   __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex));
1167   __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex));
1168   __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex));
1169   __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex));
1170   END();
1171 
1172   if (CAN_RUN()) {
1173     RUN();
1174 
1175     ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
1176     ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
1177     ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
1178     ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
1179     ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
1180     ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
1181     ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
1182     ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
1183     ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
1184     ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
1185     ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
1186     ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
1187     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
1188     ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
1189     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
1190 
1191     ASSERT_EQUAL_64(src_base + 1, x17);
1192     ASSERT_EQUAL_64(src_base + 1 + 48, x18);
1193     ASSERT_EQUAL_64(src_base + 2 + 48, x19);
1194     ASSERT_EQUAL_64(src_base + 3 + 48, x20);
1195     ASSERT_EQUAL_64(src_base + 4 + 48, x21);
1196   }
1197 }
1198 
1199 
TEST(neon_ld3_lane)1200 TEST(neon_ld3_lane) {
1201   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1202 
1203   uint8_t src[64];
1204   for (unsigned i = 0; i < sizeof(src); i++) {
1205     src[i] = i;
1206   }
1207   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1208 
1209   START();
1210 
1211   // Test loading whole register by element.
1212   __ Mov(x17, src_base);
1213   for (int i = 15; i >= 0; i--) {
1214     __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17));
1215     __ Add(x17, x17, 1);
1216   }
1217 
1218   __ Mov(x17, src_base);
1219   for (int i = 7; i >= 0; i--) {
1220     __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17));
1221     __ Add(x17, x17, 1);
1222   }
1223 
1224   __ Mov(x17, src_base);
1225   for (int i = 3; i >= 0; i--) {
1226     __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17));
1227     __ Add(x17, x17, 1);
1228   }
1229 
1230   __ Mov(x17, src_base);
1231   for (int i = 1; i >= 0; i--) {
1232     __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17));
1233     __ Add(x17, x17, 1);
1234   }
1235 
1236   // Test loading a single element into an initialised register.
1237   __ Mov(x17, src_base);
1238   __ Mov(x4, x17);
1239   __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1240   __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1241   __ Ldr(q14, MemOperand(x4));
1242   __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17));
1243   __ Mov(x5, x17);
1244   __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1245   __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1246   __ Ldr(q17, MemOperand(x5));
1247   __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17));
1248   __ Mov(x6, x17);
1249   __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1250   __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1251   __ Ldr(q20, MemOperand(x6));
1252   __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17));
1253   __ Mov(x7, x17);
1254   __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1255   __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1256   __ Ldr(q23, MemOperand(x7));
1257   __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17));
1258 
1259   END();
1260 
1261   if (CAN_RUN()) {
1262     RUN();
1263 
1264     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
1265     ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
1266     ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
1267     ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3);
1268     ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4);
1269     ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5);
1270     ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6);
1271     ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7);
1272     ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8);
1273     ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9);
1274     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10);
1275     ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11);
1276     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
1277     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
1278     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
1279     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
1280     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
1281     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
1282   }
1283 }
1284 
1285 
TEST(neon_ld3_lane_postindex)1286 TEST(neon_ld3_lane_postindex) {
1287   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1288 
1289   uint8_t src[64];
1290   for (unsigned i = 0; i < sizeof(src); i++) {
1291     src[i] = i;
1292   }
1293   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1294 
1295   START();
1296 
1297   // Test loading whole register by element.
1298   __ Mov(x17, src_base);
1299   __ Mov(x18, src_base);
1300   __ Mov(x19, src_base);
1301   __ Mov(x20, src_base);
1302   __ Mov(x21, src_base);
1303   __ Mov(x22, src_base);
1304   __ Mov(x23, src_base);
1305   __ Mov(x24, src_base);
1306   for (int i = 15; i >= 0; i--) {
1307     __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex));
1308   }
1309 
1310   for (int i = 7; i >= 0; i--) {
1311     __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex));
1312   }
1313 
1314   for (int i = 3; i >= 0; i--) {
1315     __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex));
1316   }
1317 
1318   for (int i = 1; i >= 0; i--) {
1319     __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex));
1320   }
1321 
1322 
1323   // Test loading a single element into an initialised register.
1324   __ Mov(x25, 1);
1325   __ Mov(x4, x21);
1326   __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1327   __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1328   __ Ldr(q14, MemOperand(x4));
1329   __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex));
1330   __ Add(x25, x25, 1);
1331 
1332   __ Mov(x5, x22);
1333   __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1334   __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1335   __ Ldr(q17, MemOperand(x5));
1336   __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex));
1337   __ Add(x25, x25, 1);
1338 
1339   __ Mov(x6, x23);
1340   __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1341   __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1342   __ Ldr(q20, MemOperand(x6));
1343   __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex));
1344   __ Add(x25, x25, 1);
1345 
1346   __ Mov(x7, x24);
1347   __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1348   __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1349   __ Ldr(q23, MemOperand(x7));
1350   __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex));
1351 
1352   END();
1353 
1354   if (CAN_RUN()) {
1355     RUN();
1356 
1357     ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0);
1358     ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1);
1359     ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2);
1360     ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3);
1361     ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4);
1362     ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5);
1363     ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6);
1364     ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7);
1365     ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8);
1366     ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9);
1367     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10);
1368     ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11);
1369     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
1370     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
1371     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
1372     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
1373     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
1374     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
1375     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18);
1376     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19);
1377     ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20);
1378     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21);
1379     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22);
1380     ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23);
1381 
1382     ASSERT_EQUAL_64(src_base + 48, x17);
1383     ASSERT_EQUAL_64(src_base + 48, x18);
1384     ASSERT_EQUAL_64(src_base + 48, x19);
1385     ASSERT_EQUAL_64(src_base + 48, x20);
1386     ASSERT_EQUAL_64(src_base + 1, x21);
1387     ASSERT_EQUAL_64(src_base + 2, x22);
1388     ASSERT_EQUAL_64(src_base + 3, x23);
1389     ASSERT_EQUAL_64(src_base + 4, x24);
1390   }
1391 }
1392 
1393 
TEST(neon_ld3_alllanes)1394 TEST(neon_ld3_alllanes) {
1395   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1396 
1397   uint8_t src[64];
1398   for (unsigned i = 0; i < sizeof(src); i++) {
1399     src[i] = i;
1400   }
1401   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1402 
1403   START();
1404   __ Mov(x17, src_base + 1);
1405   __ Mov(x18, 1);
1406   __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17));
1407   __ Add(x17, x17, 3);
1408   __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1409   __ Add(x17, x17, 1);
1410   __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17));
1411   __ Add(x17, x17, 1);
1412   __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17));
1413   __ Add(x17, x17, 6);
1414   __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17));
1415   __ Add(x17, x17, 1);
1416   __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1417   __ Add(x17, x17, 12);
1418   __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17));
1419   END();
1420 
1421   if (CAN_RUN()) {
1422     RUN();
1423 
1424     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
1425     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
1426     ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
1427     ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
1428     ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
1429     ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
1430     ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
1431     ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
1432     ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
1433     ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
1434     ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
1435     ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
1436     ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
1437     ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
1438     ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
1439     ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
1440     ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
1441     ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
1442     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
1443     ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
1444     ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
1445   }
1446 }
1447 
1448 
TEST(neon_ld3_alllanes_postindex)1449 TEST(neon_ld3_alllanes_postindex) {
1450   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1451 
1452   uint8_t src[64];
1453   for (unsigned i = 0; i < sizeof(src); i++) {
1454     src[i] = i;
1455   }
1456   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1457   __ Mov(x17, src_base + 1);
1458   __ Mov(x18, 1);
1459 
1460   START();
1461   __ Mov(x17, src_base + 1);
1462   __ Mov(x18, 1);
1463   __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex));
1464   __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex));
1465   __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex));
1466   __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex));
1467   __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex));
1468   __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex));
1469   __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex));
1470   END();
1471 
1472   if (CAN_RUN()) {
1473     RUN();
1474 
1475     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
1476     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
1477     ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
1478     ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
1479     ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
1480     ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
1481     ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
1482     ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
1483     ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
1484     ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
1485     ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
1486     ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
1487     ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
1488     ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
1489     ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
1490     ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
1491     ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
1492     ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
1493     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
1494     ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
1495     ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
1496   }
1497 }
1498 
1499 
TEST(neon_ld4_d)1500 TEST(neon_ld4_d) {
1501   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1502 
1503   uint8_t src[64 + 4];
1504   for (unsigned i = 0; i < sizeof(src); i++) {
1505     src[i] = i;
1506   }
1507   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1508 
1509   START();
1510   __ Mov(x17, src_base);
1511   __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17));
1512   __ Add(x17, x17, 1);
1513   __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17));
1514   __ Add(x17, x17, 1);
1515   __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17));
1516   __ Add(x17, x17, 1);
1517   __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1518   END();
1519 
1520   if (CAN_RUN()) {
1521     RUN();
1522 
1523     ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
1524     ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
1525     ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
1526     ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
1527     ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
1528     ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
1529     ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
1530     ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
1531     ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
1532     ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
1533     ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
1534     ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
1535     ASSERT_EQUAL_128(0, 0x1615141306050403, q30);
1536     ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31);
1537     ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0);
1538     ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1);
1539   }
1540 }
1541 
1542 
TEST(neon_ld4_d_postindex)1543 TEST(neon_ld4_d_postindex) {
1544   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1545 
1546   uint8_t src[32 + 4];
1547   for (unsigned i = 0; i < sizeof(src); i++) {
1548     src[i] = i;
1549   }
1550   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1551 
1552   START();
1553   __ Mov(x17, src_base);
1554   __ Mov(x18, src_base + 1);
1555   __ Mov(x19, src_base + 2);
1556   __ Mov(x20, src_base + 3);
1557   __ Mov(x21, src_base + 4);
1558   __ Mov(x22, 1);
1559   __ Ld4(v2.V8B(),
1560          v3.V8B(),
1561          v4.V8B(),
1562          v5.V8B(),
1563          MemOperand(x17, x22, PostIndex));
1564   __ Ld4(v6.V8B(),
1565          v7.V8B(),
1566          v8.V8B(),
1567          v9.V8B(),
1568          MemOperand(x18, 32, PostIndex));
1569   __ Ld4(v10.V4H(),
1570          v11.V4H(),
1571          v12.V4H(),
1572          v13.V4H(),
1573          MemOperand(x19, 32, PostIndex));
1574   __ Ld4(v14.V2S(),
1575          v15.V2S(),
1576          v16.V2S(),
1577          v17.V2S(),
1578          MemOperand(x20, 32, PostIndex));
1579   __ Ld4(v30.V2S(),
1580          v31.V2S(),
1581          v0.V2S(),
1582          v1.V2S(),
1583          MemOperand(x21, 32, PostIndex));
1584   END();
1585 
1586   if (CAN_RUN()) {
1587     RUN();
1588 
1589     ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
1590     ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
1591     ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
1592     ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
1593     ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
1594     ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
1595     ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
1596     ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
1597     ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
1598     ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
1599     ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
1600     ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
1601     ASSERT_EQUAL_128(0, 0x1615141306050403, q14);
1602     ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15);
1603     ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16);
1604     ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17);
1605     ASSERT_EQUAL_128(0, 0x1716151407060504, q30);
1606     ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31);
1607     ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0);
1608     ASSERT_EQUAL_128(0, 0x2322212013121110, q1);
1609 
1610 
1611     ASSERT_EQUAL_64(src_base + 1, x17);
1612     ASSERT_EQUAL_64(src_base + 1 + 32, x18);
1613     ASSERT_EQUAL_64(src_base + 2 + 32, x19);
1614     ASSERT_EQUAL_64(src_base + 3 + 32, x20);
1615     ASSERT_EQUAL_64(src_base + 4 + 32, x21);
1616   }
1617 }
1618 
1619 
TEST(neon_ld4_q)1620 TEST(neon_ld4_q) {
1621   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1622 
1623   uint8_t src[64 + 4];
1624   for (unsigned i = 0; i < sizeof(src); i++) {
1625     src[i] = i;
1626   }
1627   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1628 
1629   START();
1630   __ Mov(x17, src_base);
1631   __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1632   __ Add(x17, x17, 1);
1633   __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17));
1634   __ Add(x17, x17, 1);
1635   __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17));
1636   __ Add(x17, x17, 1);
1637   __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1638   __ Add(x17, x17, 1);
1639   __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17));
1640   END();
1641 
1642   if (CAN_RUN()) {
1643     RUN();
1644 
1645     ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
1646     ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
1647     ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
1648     ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
1649     ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
1650     ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
1651     ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
1652     ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
1653     ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
1654     ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
1655     ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
1656     ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
1657     ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
1658     ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
1659     ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
1660     ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
1661     ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18);
1662     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19);
1663     ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20);
1664     ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21);
1665   }
1666 }
1667 
1668 
TEST(neon_ld4_q_postindex)1669 TEST(neon_ld4_q_postindex) {
1670   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1671 
1672   uint8_t src[64 + 4];
1673   for (unsigned i = 0; i < sizeof(src); i++) {
1674     src[i] = i;
1675   }
1676   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1677 
1678   START();
1679   __ Mov(x17, src_base);
1680   __ Mov(x18, src_base + 1);
1681   __ Mov(x19, src_base + 2);
1682   __ Mov(x20, src_base + 3);
1683   __ Mov(x21, src_base + 4);
1684   __ Mov(x22, 1);
1685 
1686   __ Ld4(v2.V16B(),
1687          v3.V16B(),
1688          v4.V16B(),
1689          v5.V16B(),
1690          MemOperand(x17, x22, PostIndex));
1691   __ Ld4(v6.V16B(),
1692          v7.V16B(),
1693          v8.V16B(),
1694          v9.V16B(),
1695          MemOperand(x18, 64, PostIndex));
1696   __ Ld4(v10.V8H(),
1697          v11.V8H(),
1698          v12.V8H(),
1699          v13.V8H(),
1700          MemOperand(x19, 64, PostIndex));
1701   __ Ld4(v14.V4S(),
1702          v15.V4S(),
1703          v16.V4S(),
1704          v17.V4S(),
1705          MemOperand(x20, 64, PostIndex));
1706   __ Ld4(v30.V2D(),
1707          v31.V2D(),
1708          v0.V2D(),
1709          v1.V2D(),
1710          MemOperand(x21, 64, PostIndex));
1711   END();
1712 
1713   if (CAN_RUN()) {
1714     RUN();
1715 
1716     ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
1717     ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
1718     ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
1719     ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
1720     ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
1721     ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
1722     ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
1723     ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
1724     ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
1725     ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
1726     ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
1727     ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
1728     ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
1729     ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
1730     ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
1731     ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
1732     ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30);
1733     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31);
1734     ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0);
1735     ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1);
1736 
1737 
1738     ASSERT_EQUAL_64(src_base + 1, x17);
1739     ASSERT_EQUAL_64(src_base + 1 + 64, x18);
1740     ASSERT_EQUAL_64(src_base + 2 + 64, x19);
1741     ASSERT_EQUAL_64(src_base + 3 + 64, x20);
1742     ASSERT_EQUAL_64(src_base + 4 + 64, x21);
1743   }
1744 }
1745 
1746 
TEST(neon_ld4_lane)1747 TEST(neon_ld4_lane) {
1748   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1749 
1750   uint8_t src[64];
1751   for (unsigned i = 0; i < sizeof(src); i++) {
1752     src[i] = i;
1753   }
1754   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1755 
1756   START();
1757 
1758   // Test loading whole register by element.
1759   __ Mov(x17, src_base);
1760   for (int i = 15; i >= 0; i--) {
1761     __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17));
1762     __ Add(x17, x17, 1);
1763   }
1764 
1765   __ Mov(x17, src_base);
1766   for (int i = 7; i >= 0; i--) {
1767     __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17));
1768     __ Add(x17, x17, 1);
1769   }
1770 
1771   __ Mov(x17, src_base);
1772   for (int i = 3; i >= 0; i--) {
1773     __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17));
1774     __ Add(x17, x17, 1);
1775   }
1776 
1777   __ Mov(x17, src_base);
1778   for (int i = 1; i >= 0; i--) {
1779     __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17));
1780     __ Add(x17, x17, 1);
1781   }
1782 
1783   // Test loading a single element into an initialised register.
1784   __ Mov(x17, src_base);
1785   __ Mov(x4, x17);
1786   __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1787   __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1788   __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1789   __ Ldr(q19, MemOperand(x4));
1790   __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17));
1791 
1792   __ Mov(x5, x17);
1793   __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1794   __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1795   __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1796   __ Ldr(q23, MemOperand(x5));
1797   __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17));
1798 
1799   __ Mov(x6, x17);
1800   __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1801   __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1802   __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1803   __ Ldr(q27, MemOperand(x6));
1804   __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17));
1805 
1806   __ Mov(x7, x17);
1807   __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1808   __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1809   __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1810   __ Ldr(q31, MemOperand(x7));
1811   __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17));
1812 
1813   END();
1814 
1815   if (CAN_RUN()) {
1816     RUN();
1817 
1818     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
1819     ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
1820     ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
1821     ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3);
1822     ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4);
1823     ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5);
1824     ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6);
1825     ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7);
1826     ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8);
1827     ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9);
1828     ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10);
1829     ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11);
1830     ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12);
1831     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13);
1832     ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14);
1833     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15);
1834     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
1835     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
1836     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
1837     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
1838     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
1839     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
1840     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
1841     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
1842     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
1843     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
1844     ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
1845     ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
1846     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
1847     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
1848     ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
1849     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
1850   }
1851 }
1852 
1853 
TEST(neon_ld4_lane_postindex)1854 TEST(neon_ld4_lane_postindex) {
1855   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1856 
1857   uint8_t src[64];
1858   for (unsigned i = 0; i < sizeof(src); i++) {
1859     src[i] = i;
1860   }
1861   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1862 
1863   START();
1864 
1865   // Test loading whole register by element.
1866   __ Mov(x17, src_base);
1867   for (int i = 15; i >= 0; i--) {
1868     __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex));
1869   }
1870 
1871   __ Mov(x18, src_base);
1872   for (int i = 7; i >= 0; i--) {
1873     __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex));
1874   }
1875 
1876   __ Mov(x19, src_base);
1877   for (int i = 3; i >= 0; i--) {
1878     __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex));
1879   }
1880 
1881   __ Mov(x20, src_base);
1882   for (int i = 1; i >= 0; i--) {
1883     __ Ld4(v12.D(),
1884            v13.D(),
1885            v14.D(),
1886            v15.D(),
1887            i,
1888            MemOperand(x20, 32, PostIndex));
1889   }
1890 
1891   // Test loading a single element into an initialised register.
1892   __ Mov(x25, 1);
1893   __ Mov(x21, src_base);
1894   __ Mov(x22, src_base);
1895   __ Mov(x23, src_base);
1896   __ Mov(x24, src_base);
1897 
1898   __ Mov(x4, x21);
1899   __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1900   __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1901   __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1902   __ Ldr(q19, MemOperand(x4));
1903   __ Ld4(v16.B(),
1904          v17.B(),
1905          v18.B(),
1906          v19.B(),
1907          4,
1908          MemOperand(x21, x25, PostIndex));
1909   __ Add(x25, x25, 1);
1910 
1911   __ Mov(x5, x22);
1912   __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1913   __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1914   __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1915   __ Ldr(q23, MemOperand(x5));
1916   __ Ld4(v20.H(),
1917          v21.H(),
1918          v22.H(),
1919          v23.H(),
1920          3,
1921          MemOperand(x22, x25, PostIndex));
1922   __ Add(x25, x25, 1);
1923 
1924   __ Mov(x6, x23);
1925   __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1926   __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1927   __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1928   __ Ldr(q27, MemOperand(x6));
1929   __ Ld4(v24.S(),
1930          v25.S(),
1931          v26.S(),
1932          v27.S(),
1933          2,
1934          MemOperand(x23, x25, PostIndex));
1935   __ Add(x25, x25, 1);
1936 
1937   __ Mov(x7, x24);
1938   __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1939   __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1940   __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1941   __ Ldr(q31, MemOperand(x7));
1942   __ Ld4(v28.D(),
1943          v29.D(),
1944          v30.D(),
1945          v31.D(),
1946          1,
1947          MemOperand(x24, x25, PostIndex));
1948 
1949   END();
1950 
1951   if (CAN_RUN()) {
1952     RUN();
1953 
1954     ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0);
1955     ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1);
1956     ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2);
1957     ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3);
1958     ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4);
1959     ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5);
1960     ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6);
1961     ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7);
1962     ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8);
1963     ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9);
1964     ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10);
1965     ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11);
1966     ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12);
1967     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13);
1968     ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14);
1969     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15);
1970     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
1971     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
1972     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
1973     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
1974     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
1975     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
1976     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
1977     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
1978     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
1979     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
1980     ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
1981     ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
1982     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
1983     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
1984     ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
1985     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
1986 
1987     ASSERT_EQUAL_64(src_base + 64, x17);
1988     ASSERT_EQUAL_64(src_base + 64, x18);
1989     ASSERT_EQUAL_64(src_base + 64, x19);
1990     ASSERT_EQUAL_64(src_base + 64, x20);
1991     ASSERT_EQUAL_64(src_base + 1, x21);
1992     ASSERT_EQUAL_64(src_base + 2, x22);
1993     ASSERT_EQUAL_64(src_base + 3, x23);
1994     ASSERT_EQUAL_64(src_base + 4, x24);
1995   }
1996 }
1997 
1998 
TEST(neon_ld4_alllanes)1999 TEST(neon_ld4_alllanes) {
2000   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2001 
2002   uint8_t src[64];
2003   for (unsigned i = 0; i < sizeof(src); i++) {
2004     src[i] = i;
2005   }
2006   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2007 
2008   START();
2009   __ Mov(x17, src_base + 1);
2010   __ Mov(x18, 1);
2011   __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17));
2012   __ Add(x17, x17, 4);
2013   __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
2014   __ Add(x17, x17, 1);
2015   __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17));
2016   __ Add(x17, x17, 1);
2017   __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17));
2018   __ Add(x17, x17, 8);
2019   __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
2020   __ Add(x17, x17, 1);
2021   __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17));
2022   __ Add(x17, x17, 16);
2023   __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17));
2024 
2025 
2026   END();
2027 
2028   if (CAN_RUN()) {
2029     RUN();
2030 
2031     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
2032     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
2033     ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
2034     ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
2035     ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
2036     ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
2037     ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
2038     ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
2039     ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
2040     ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
2041     ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
2042     ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
2043     ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
2044     ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
2045     ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
2046     ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
2047     ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
2048     ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
2049     ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
2050     ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
2051     ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
2052     ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
2053     ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
2054     ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
2055     ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
2056     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
2057     ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
2058     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
2059   }
2060 }
2061 
2062 
TEST(neon_ld4_alllanes_postindex)2063 TEST(neon_ld4_alllanes_postindex) {
2064   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2065 
2066   uint8_t src[64];
2067   for (unsigned i = 0; i < sizeof(src); i++) {
2068     src[i] = i;
2069   }
2070   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2071   __ Mov(x17, src_base + 1);
2072   __ Mov(x18, 1);
2073 
2074   START();
2075   __ Mov(x17, src_base + 1);
2076   __ Mov(x18, 1);
2077   __ Ld4r(v0.V8B(),
2078           v1.V8B(),
2079           v2.V8B(),
2080           v3.V8B(),
2081           MemOperand(x17, 4, PostIndex));
2082   __ Ld4r(v4.V16B(),
2083           v5.V16B(),
2084           v6.V16B(),
2085           v7.V16B(),
2086           MemOperand(x17, x18, PostIndex));
2087   __ Ld4r(v8.V4H(),
2088           v9.V4H(),
2089           v10.V4H(),
2090           v11.V4H(),
2091           MemOperand(x17, x18, PostIndex));
2092   __ Ld4r(v12.V8H(),
2093           v13.V8H(),
2094           v14.V8H(),
2095           v15.V8H(),
2096           MemOperand(x17, 8, PostIndex));
2097   __ Ld4r(v16.V2S(),
2098           v17.V2S(),
2099           v18.V2S(),
2100           v19.V2S(),
2101           MemOperand(x17, x18, PostIndex));
2102   __ Ld4r(v20.V4S(),
2103           v21.V4S(),
2104           v22.V4S(),
2105           v23.V4S(),
2106           MemOperand(x17, 16, PostIndex));
2107   __ Ld4r(v24.V2D(),
2108           v25.V2D(),
2109           v26.V2D(),
2110           v27.V2D(),
2111           MemOperand(x17, 32, PostIndex));
2112   END();
2113 
2114   if (CAN_RUN()) {
2115     RUN();
2116 
2117     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
2118     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
2119     ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
2120     ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
2121     ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
2122     ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
2123     ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
2124     ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
2125     ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
2126     ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
2127     ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
2128     ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
2129     ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
2130     ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
2131     ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
2132     ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
2133     ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
2134     ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
2135     ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
2136     ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
2137     ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
2138     ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
2139     ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
2140     ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
2141     ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
2142     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
2143     ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
2144     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
2145     ASSERT_EQUAL_64(src_base + 64, x17);
2146   }
2147 }
2148 
2149 
TEST(neon_st1_lane)2150 TEST(neon_st1_lane) {
2151   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2152 
2153   uint8_t src[64];
2154   for (unsigned i = 0; i < sizeof(src); i++) {
2155     src[i] = i;
2156   }
2157   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2158 
2159   START();
2160   __ Mov(x17, src_base);
2161   __ Mov(x18, -16);
2162   __ Ldr(q0, MemOperand(x17));
2163 
2164   for (int i = 15; i >= 0; i--) {
2165     __ St1(v0.B(), i, MemOperand(x17));
2166     __ Add(x17, x17, 1);
2167   }
2168   __ Ldr(q1, MemOperand(x17, x18));
2169 
2170   for (int i = 7; i >= 0; i--) {
2171     __ St1(v0.H(), i, MemOperand(x17));
2172     __ Add(x17, x17, 2);
2173   }
2174   __ Ldr(q2, MemOperand(x17, x18));
2175 
2176   for (int i = 3; i >= 0; i--) {
2177     __ St1(v0.S(), i, MemOperand(x17));
2178     __ Add(x17, x17, 4);
2179   }
2180   __ Ldr(q3, MemOperand(x17, x18));
2181 
2182   for (int i = 1; i >= 0; i--) {
2183     __ St1(v0.D(), i, MemOperand(x17));
2184     __ Add(x17, x17, 8);
2185   }
2186   __ Ldr(q4, MemOperand(x17, x18));
2187 
2188   END();
2189 
2190   if (CAN_RUN()) {
2191     RUN();
2192 
2193     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
2194     ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
2195     ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
2196     ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
2197   }
2198 }
2199 
2200 
TEST(neon_st2_lane)2201 TEST(neon_st2_lane) {
2202   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2203 
2204   // Struct size * addressing modes * element sizes * vector size.
2205   uint8_t dst[2 * 2 * 4 * 16];
2206   memset(dst, 0, sizeof(dst));
2207   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2208 
2209   START();
2210   __ Mov(x17, dst_base);
2211   __ Mov(x18, dst_base);
2212   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2213   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2214 
2215   // Test B stores with and without post index.
2216   for (int i = 15; i >= 0; i--) {
2217     __ St2(v0.B(), v1.B(), i, MemOperand(x18));
2218     __ Add(x18, x18, 2);
2219   }
2220   for (int i = 15; i >= 0; i--) {
2221     __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex));
2222   }
2223   __ Ldr(q2, MemOperand(x17, 0 * 16));
2224   __ Ldr(q3, MemOperand(x17, 1 * 16));
2225   __ Ldr(q4, MemOperand(x17, 2 * 16));
2226   __ Ldr(q5, MemOperand(x17, 3 * 16));
2227 
2228   // Test H stores with and without post index.
2229   __ Mov(x0, 4);
2230   for (int i = 7; i >= 0; i--) {
2231     __ St2(v0.H(), v1.H(), i, MemOperand(x18));
2232     __ Add(x18, x18, 4);
2233   }
2234   for (int i = 7; i >= 0; i--) {
2235     __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex));
2236   }
2237   __ Ldr(q6, MemOperand(x17, 4 * 16));
2238   __ Ldr(q7, MemOperand(x17, 5 * 16));
2239   __ Ldr(q16, MemOperand(x17, 6 * 16));
2240   __ Ldr(q17, MemOperand(x17, 7 * 16));
2241 
2242   // Test S stores with and without post index.
2243   for (int i = 3; i >= 0; i--) {
2244     __ St2(v0.S(), v1.S(), i, MemOperand(x18));
2245     __ Add(x18, x18, 8);
2246   }
2247   for (int i = 3; i >= 0; i--) {
2248     __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex));
2249   }
2250   __ Ldr(q18, MemOperand(x17, 8 * 16));
2251   __ Ldr(q19, MemOperand(x17, 9 * 16));
2252   __ Ldr(q20, MemOperand(x17, 10 * 16));
2253   __ Ldr(q21, MemOperand(x17, 11 * 16));
2254 
2255   // Test D stores with and without post index.
2256   __ Mov(x0, 16);
2257   __ St2(v0.D(), v1.D(), 1, MemOperand(x18));
2258   __ Add(x18, x18, 16);
2259   __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex));
2260   __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex));
2261   __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex));
2262   __ Ldr(q22, MemOperand(x17, 12 * 16));
2263   __ Ldr(q23, MemOperand(x17, 13 * 16));
2264   __ Ldr(q24, MemOperand(x17, 14 * 16));
2265   __ Ldr(q25, MemOperand(x17, 15 * 16));
2266   END();
2267 
2268   if (CAN_RUN()) {
2269     RUN();
2270 
2271     ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q2);
2272     ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q3);
2273     ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q4);
2274     ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q5);
2275 
2276     ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q6);
2277     ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q7);
2278     ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q16);
2279     ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q17);
2280 
2281     ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q18);
2282     ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q19);
2283     ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q20);
2284     ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q21);
2285 
2286     ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
2287     ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
2288     ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
2289     ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
2290   }
2291 }
2292 
2293 
TEST(neon_st3_lane)2294 TEST(neon_st3_lane) {
2295   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2296 
2297   // Struct size * addressing modes * element sizes * vector size.
2298   uint8_t dst[3 * 2 * 4 * 16];
2299   memset(dst, 0, sizeof(dst));
2300   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2301 
2302   START();
2303   __ Mov(x17, dst_base);
2304   __ Mov(x18, dst_base);
2305   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2306   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2307   __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2308 
2309   // Test B stores with and without post index.
2310   for (int i = 15; i >= 0; i--) {
2311     __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18));
2312     __ Add(x18, x18, 3);
2313   }
2314   for (int i = 15; i >= 0; i--) {
2315     __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex));
2316   }
2317   __ Ldr(q3, MemOperand(x17, 0 * 16));
2318   __ Ldr(q4, MemOperand(x17, 1 * 16));
2319   __ Ldr(q5, MemOperand(x17, 2 * 16));
2320   __ Ldr(q6, MemOperand(x17, 3 * 16));
2321   __ Ldr(q7, MemOperand(x17, 4 * 16));
2322   __ Ldr(q16, MemOperand(x17, 5 * 16));
2323 
2324   // Test H stores with and without post index.
2325   __ Mov(x0, 6);
2326   for (int i = 7; i >= 0; i--) {
2327     __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18));
2328     __ Add(x18, x18, 6);
2329   }
2330   for (int i = 7; i >= 0; i--) {
2331     __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex));
2332   }
2333   __ Ldr(q17, MemOperand(x17, 6 * 16));
2334   __ Ldr(q18, MemOperand(x17, 7 * 16));
2335   __ Ldr(q19, MemOperand(x17, 8 * 16));
2336   __ Ldr(q20, MemOperand(x17, 9 * 16));
2337   __ Ldr(q21, MemOperand(x17, 10 * 16));
2338   __ Ldr(q22, MemOperand(x17, 11 * 16));
2339 
2340   // Test S stores with and without post index.
2341   for (int i = 3; i >= 0; i--) {
2342     __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18));
2343     __ Add(x18, x18, 12);
2344   }
2345   for (int i = 3; i >= 0; i--) {
2346     __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex));
2347   }
2348   __ Ldr(q23, MemOperand(x17, 12 * 16));
2349   __ Ldr(q24, MemOperand(x17, 13 * 16));
2350   __ Ldr(q25, MemOperand(x17, 14 * 16));
2351   __ Ldr(q26, MemOperand(x17, 15 * 16));
2352   __ Ldr(q27, MemOperand(x17, 16 * 16));
2353   __ Ldr(q28, MemOperand(x17, 17 * 16));
2354 
2355   // Test D stores with and without post index.
2356   __ Mov(x0, 24);
2357   __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18));
2358   __ Add(x18, x18, 24);
2359   __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex));
2360   __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex));
2361   __ Ldr(q29, MemOperand(x17, 18 * 16));
2362   __ Ldr(q30, MemOperand(x17, 19 * 16));
2363   __ Ldr(q31, MemOperand(x17, 20 * 16));
2364   END();
2365 
2366   if (CAN_RUN()) {
2367     RUN();
2368 
2369     ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q3);
2370     ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q4);
2371     ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q5);
2372     ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q6);
2373     ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q7);
2374     ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q16);
2375 
2376     ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q17);
2377     ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q18);
2378     ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q19);
2379     ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q20);
2380     ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q21);
2381     ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q22);
2382 
2383     ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q23);
2384     ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q24);
2385     ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q25);
2386     ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q26);
2387     ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q27);
2388     ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q28);
2389   }
2390 }
2391 
2392 
TEST(neon_st4_lane)2393 TEST(neon_st4_lane) {
2394   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2395 
2396   // Struct size * element sizes * vector size.
2397   uint8_t dst[4 * 4 * 16];
2398   memset(dst, 0, sizeof(dst));
2399   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2400 
2401   START();
2402   __ Mov(x17, dst_base);
2403   __ Mov(x18, dst_base);
2404   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2405   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2406   __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2407   __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2408 
2409   // Test B stores without post index.
2410   for (int i = 15; i >= 0; i--) {
2411     __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18));
2412     __ Add(x18, x18, 4);
2413   }
2414   __ Ldr(q4, MemOperand(x17, 0 * 16));
2415   __ Ldr(q5, MemOperand(x17, 1 * 16));
2416   __ Ldr(q6, MemOperand(x17, 2 * 16));
2417   __ Ldr(q7, MemOperand(x17, 3 * 16));
2418 
2419   // Test H stores with post index.
2420   __ Mov(x0, 8);
2421   for (int i = 7; i >= 0; i--) {
2422     __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex));
2423   }
2424   __ Ldr(q16, MemOperand(x17, 4 * 16));
2425   __ Ldr(q17, MemOperand(x17, 5 * 16));
2426   __ Ldr(q18, MemOperand(x17, 6 * 16));
2427   __ Ldr(q19, MemOperand(x17, 7 * 16));
2428 
2429   // Test S stores without post index.
2430   for (int i = 3; i >= 0; i--) {
2431     __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18));
2432     __ Add(x18, x18, 16);
2433   }
2434   __ Ldr(q20, MemOperand(x17, 8 * 16));
2435   __ Ldr(q21, MemOperand(x17, 9 * 16));
2436   __ Ldr(q22, MemOperand(x17, 10 * 16));
2437   __ Ldr(q23, MemOperand(x17, 11 * 16));
2438 
2439   // Test D stores with post index.
2440   __ Mov(x0, 32);
2441   __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex));
2442   __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex));
2443 
2444   __ Ldr(q24, MemOperand(x17, 12 * 16));
2445   __ Ldr(q25, MemOperand(x17, 13 * 16));
2446   __ Ldr(q26, MemOperand(x17, 14 * 16));
2447   __ Ldr(q27, MemOperand(x17, 15 * 16));
2448   END();
2449 
2450   if (CAN_RUN()) {
2451     RUN();
2452 
2453     ASSERT_EQUAL_128(0x2323130322221202, 0x2121110120201000, q4);
2454     ASSERT_EQUAL_128(0x2727170726261606, 0x2525150524241404, q5);
2455     ASSERT_EQUAL_128(0x2b2b1b0b2a2a1a0a, 0x2929190928281808, q6);
2456     ASSERT_EQUAL_128(0x2f2f1f0f2e2e1e0e, 0x2d2d1d0d2c2c1c0c, q7);
2457 
2458     ASSERT_EQUAL_128(0x2223222312130203, 0x2021202110110001, q16);
2459     ASSERT_EQUAL_128(0x2627262716170607, 0x2425242514150405, q17);
2460     ASSERT_EQUAL_128(0x2a2b2a2b1a1b0a0b, 0x2829282918190809, q18);
2461     ASSERT_EQUAL_128(0x2e2f2e2f1e1f0e0f, 0x2c2d2c2d1c1d0c0d, q19);
2462 
2463     ASSERT_EQUAL_128(0x2021222320212223, 0x1011121300010203, q20);
2464     ASSERT_EQUAL_128(0x2425262724252627, 0x1415161704050607, q21);
2465     ASSERT_EQUAL_128(0x28292a2b28292a2b, 0x18191a1b08090a0b, q22);
2466     ASSERT_EQUAL_128(0x2c2d2e2f2c2d2e2f, 0x1c1d1e1f0c0d0e0f, q23);
2467 
2468     ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q24);
2469     ASSERT_EQUAL_128(0x28292a2b2c2d2e2f, 0x28292a2b2c2d2e2f, q25);
2470     ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q26);
2471     ASSERT_EQUAL_128(0x2021222324252627, 0x2021222324252627, q27);
2472   }
2473 }
2474 
2475 
TEST(neon_ld1_lane_postindex)2476 TEST(neon_ld1_lane_postindex) {
2477   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2478 
2479   uint8_t src[64];
2480   for (unsigned i = 0; i < sizeof(src); i++) {
2481     src[i] = i;
2482   }
2483   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2484 
2485   START();
2486   __ Mov(x17, src_base);
2487   __ Mov(x18, src_base);
2488   __ Mov(x19, src_base);
2489   __ Mov(x20, src_base);
2490   __ Mov(x21, src_base);
2491   __ Mov(x22, src_base);
2492   __ Mov(x23, src_base);
2493   __ Mov(x24, src_base);
2494 
2495   // Test loading whole register by element.
2496   for (int i = 15; i >= 0; i--) {
2497     __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2498   }
2499 
2500   for (int i = 7; i >= 0; i--) {
2501     __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex));
2502   }
2503 
2504   for (int i = 3; i >= 0; i--) {
2505     __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex));
2506   }
2507 
2508   for (int i = 1; i >= 0; i--) {
2509     __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex));
2510   }
2511 
2512   // Test loading a single element into an initialised register.
2513   __ Mov(x25, 1);
2514   __ Ldr(q4, MemOperand(x21));
2515   __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex));
2516   __ Add(x25, x25, 1);
2517 
2518   __ Ldr(q5, MemOperand(x22));
2519   __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex));
2520   __ Add(x25, x25, 1);
2521 
2522   __ Ldr(q6, MemOperand(x23));
2523   __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex));
2524   __ Add(x25, x25, 1);
2525 
2526   __ Ldr(q7, MemOperand(x24));
2527   __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex));
2528 
2529   END();
2530 
2531   if (CAN_RUN()) {
2532     RUN();
2533 
2534     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
2535     ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q1);
2536     ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q2);
2537     ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q3);
2538     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
2539     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
2540     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
2541     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
2542     ASSERT_EQUAL_64(src_base + 16, x17);
2543     ASSERT_EQUAL_64(src_base + 16, x18);
2544     ASSERT_EQUAL_64(src_base + 16, x19);
2545     ASSERT_EQUAL_64(src_base + 16, x20);
2546     ASSERT_EQUAL_64(src_base + 1, x21);
2547     ASSERT_EQUAL_64(src_base + 2, x22);
2548     ASSERT_EQUAL_64(src_base + 3, x23);
2549     ASSERT_EQUAL_64(src_base + 4, x24);
2550   }
2551 }
2552 
2553 
TEST(neon_st1_lane_postindex)2554 TEST(neon_st1_lane_postindex) {
2555   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2556 
2557   uint8_t src[64];
2558   for (unsigned i = 0; i < sizeof(src); i++) {
2559     src[i] = i;
2560   }
2561   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2562 
2563   START();
2564   __ Mov(x17, src_base);
2565   __ Mov(x18, -16);
2566   __ Ldr(q0, MemOperand(x17));
2567 
2568   for (int i = 15; i >= 0; i--) {
2569     __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2570   }
2571   __ Ldr(q1, MemOperand(x17, x18));
2572 
2573   for (int i = 7; i >= 0; i--) {
2574     __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex));
2575   }
2576   __ Ldr(q2, MemOperand(x17, x18));
2577 
2578   for (int i = 3; i >= 0; i--) {
2579     __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex));
2580   }
2581   __ Ldr(q3, MemOperand(x17, x18));
2582 
2583   for (int i = 1; i >= 0; i--) {
2584     __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex));
2585   }
2586   __ Ldr(q4, MemOperand(x17, x18));
2587 
2588   END();
2589 
2590   if (CAN_RUN()) {
2591     RUN();
2592 
2593     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
2594     ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
2595     ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
2596     ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
2597   }
2598 }
2599 
2600 
TEST(neon_ld1_alllanes)2601 TEST(neon_ld1_alllanes) {
2602   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2603 
2604   uint8_t src[64];
2605   for (unsigned i = 0; i < sizeof(src); i++) {
2606     src[i] = i;
2607   }
2608   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2609 
2610   START();
2611   __ Mov(x17, src_base + 1);
2612   __ Ld1r(v0.V8B(), MemOperand(x17));
2613   __ Add(x17, x17, 1);
2614   __ Ld1r(v1.V16B(), MemOperand(x17));
2615   __ Add(x17, x17, 1);
2616   __ Ld1r(v2.V4H(), MemOperand(x17));
2617   __ Add(x17, x17, 1);
2618   __ Ld1r(v3.V8H(), MemOperand(x17));
2619   __ Add(x17, x17, 1);
2620   __ Ld1r(v4.V2S(), MemOperand(x17));
2621   __ Add(x17, x17, 1);
2622   __ Ld1r(v5.V4S(), MemOperand(x17));
2623   __ Add(x17, x17, 1);
2624   __ Ld1r(v6.V1D(), MemOperand(x17));
2625   __ Add(x17, x17, 1);
2626   __ Ld1r(v7.V2D(), MemOperand(x17));
2627   END();
2628 
2629   if (CAN_RUN()) {
2630     RUN();
2631 
2632     ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
2633     ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
2634     ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
2635     ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
2636     ASSERT_EQUAL_128(0, 0x0807060508070605, q4);
2637     ASSERT_EQUAL_128(0x0908070609080706, 0x0908070609080706, q5);
2638     ASSERT_EQUAL_128(0, 0x0e0d0c0b0a090807, q6);
2639     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0f0e0d0c0b0a0908, q7);
2640   }
2641 }
2642 
2643 
TEST(neon_ld1_alllanes_postindex)2644 TEST(neon_ld1_alllanes_postindex) {
2645   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2646 
2647   uint8_t src[64];
2648   for (unsigned i = 0; i < sizeof(src); i++) {
2649     src[i] = i;
2650   }
2651   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2652 
2653   START();
2654   __ Mov(x17, src_base + 1);
2655   __ Mov(x18, 1);
2656   __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex));
2657   __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex));
2658   __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex));
2659   __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex));
2660   __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex));
2661   __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex));
2662   __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex));
2663   END();
2664 
2665   if (CAN_RUN()) {
2666     RUN();
2667 
2668     ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
2669     ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
2670     ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
2671     ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
2672     ASSERT_EQUAL_128(0, 0x0908070609080706, q4);
2673     ASSERT_EQUAL_128(0x0a0908070a090807, 0x0a0908070a090807, q5);
2674     ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x1211100f0e0d0c0b, q6);
2675     ASSERT_EQUAL_64(src_base + 19, x17);
2676   }
2677 }
2678 
2679 
TEST(neon_st1_d)2680 TEST(neon_st1_d) {
2681   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2682 
2683   uint8_t src[14 * kDRegSizeInBytes];
2684   for (unsigned i = 0; i < sizeof(src); i++) {
2685     src[i] = i;
2686   }
2687   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2688 
2689   START();
2690   __ Mov(x17, src_base);
2691   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2692   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2693   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2694   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2695   __ Mov(x17, src_base);
2696 
2697   __ St1(v0.V8B(), MemOperand(x17));
2698   __ Ldr(d16, MemOperand(x17, 8, PostIndex));
2699 
2700   __ St1(v0.V8B(), v1.V8B(), MemOperand(x17));
2701   __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2702 
2703   __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17));
2704   __ Ldr(d18, MemOperand(x17, 8, PostIndex));
2705   __ Ldr(d19, MemOperand(x17, 8, PostIndex));
2706   __ Ldr(d20, MemOperand(x17, 8, PostIndex));
2707 
2708   __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17));
2709   __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2710   __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2711 
2712   __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17));
2713   __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2714   __ Ldr(q24, MemOperand(x17));
2715   END();
2716 
2717   if (CAN_RUN()) {
2718     RUN();
2719 
2720     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
2721     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q1);
2722     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q2);
2723     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q3);
2724     ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
2725     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
2726     ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
2727     ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
2728     ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
2729     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
2730     ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
2731     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
2732     ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
2733   }
2734 }
2735 
2736 
TEST(neon_st1_d_postindex)2737 TEST(neon_st1_d_postindex) {
2738   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2739 
2740   uint8_t src[64 + 14 * kDRegSizeInBytes];
2741   for (unsigned i = 0; i < sizeof(src); i++) {
2742     src[i] = i;
2743   }
2744   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2745 
2746   START();
2747   __ Mov(x17, src_base);
2748   __ Mov(x18, -8);
2749   __ Mov(x19, -16);
2750   __ Mov(x20, -24);
2751   __ Mov(x21, -32);
2752   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2753   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2754   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2755   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2756   __ Mov(x17, src_base);
2757 
2758   __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex));
2759   __ Ldr(d16, MemOperand(x17, x18));
2760 
2761   __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex));
2762   __ Ldr(q17, MemOperand(x17, x19));
2763 
2764   __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex));
2765   __ Ldr(d18, MemOperand(x17, x20));
2766   __ Ldr(d19, MemOperand(x17, x19));
2767   __ Ldr(d20, MemOperand(x17, x18));
2768 
2769   __ St1(v0.V2S(),
2770          v1.V2S(),
2771          v2.V2S(),
2772          v3.V2S(),
2773          MemOperand(x17, 32, PostIndex));
2774   __ Ldr(q21, MemOperand(x17, x21));
2775   __ Ldr(q22, MemOperand(x17, x19));
2776 
2777   __ St1(v0.V1D(),
2778          v1.V1D(),
2779          v2.V1D(),
2780          v3.V1D(),
2781          MemOperand(x17, 32, PostIndex));
2782   __ Ldr(q23, MemOperand(x17, x21));
2783   __ Ldr(q24, MemOperand(x17, x19));
2784   END();
2785 
2786   if (CAN_RUN()) {
2787     RUN();
2788 
2789     ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
2790     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
2791     ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
2792     ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
2793     ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
2794     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
2795     ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
2796     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
2797     ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
2798   }
2799 }
2800 
2801 
TEST(neon_st1_q)2802 TEST(neon_st1_q) {
2803   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2804 
2805   uint8_t src[64 + 160];
2806   for (unsigned i = 0; i < sizeof(src); i++) {
2807     src[i] = i;
2808   }
2809   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2810 
2811   START();
2812   __ Mov(x17, src_base);
2813   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2814   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2815   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2816   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2817 
2818   __ St1(v0.V16B(), MemOperand(x17));
2819   __ Ldr(q16, MemOperand(x17, 16, PostIndex));
2820 
2821   __ St1(v0.V8H(), v1.V8H(), MemOperand(x17));
2822   __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2823   __ Ldr(q18, MemOperand(x17, 16, PostIndex));
2824 
2825   __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17));
2826   __ Ldr(q19, MemOperand(x17, 16, PostIndex));
2827   __ Ldr(q20, MemOperand(x17, 16, PostIndex));
2828   __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2829 
2830   __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17));
2831   __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2832   __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2833   __ Ldr(q24, MemOperand(x17, 16, PostIndex));
2834   __ Ldr(q25, MemOperand(x17));
2835   END();
2836 
2837   if (CAN_RUN()) {
2838     RUN();
2839 
2840     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
2841     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
2842     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
2843     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
2844     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
2845     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
2846     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
2847     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
2848     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
2849     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
2850   }
2851 }
2852 
2853 
TEST(neon_st1_q_postindex)2854 TEST(neon_st1_q_postindex) {
2855   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2856 
2857   uint8_t src[64 + 160];
2858   for (unsigned i = 0; i < sizeof(src); i++) {
2859     src[i] = i;
2860   }
2861   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2862 
2863   START();
2864   __ Mov(x17, src_base);
2865   __ Mov(x18, -16);
2866   __ Mov(x19, -32);
2867   __ Mov(x20, -48);
2868   __ Mov(x21, -64);
2869   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2870   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2871   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2872   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2873 
2874   __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex));
2875   __ Ldr(q16, MemOperand(x17, x18));
2876 
2877   __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex));
2878   __ Ldr(q17, MemOperand(x17, x19));
2879   __ Ldr(q18, MemOperand(x17, x18));
2880 
2881   __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex));
2882   __ Ldr(q19, MemOperand(x17, x20));
2883   __ Ldr(q20, MemOperand(x17, x19));
2884   __ Ldr(q21, MemOperand(x17, x18));
2885 
2886   __ St1(v0.V2D(),
2887          v1.V2D(),
2888          v2.V2D(),
2889          v3.V2D(),
2890          MemOperand(x17, 64, PostIndex));
2891   __ Ldr(q22, MemOperand(x17, x21));
2892   __ Ldr(q23, MemOperand(x17, x20));
2893   __ Ldr(q24, MemOperand(x17, x19));
2894   __ Ldr(q25, MemOperand(x17, x18));
2895 
2896   END();
2897 
2898   if (CAN_RUN()) {
2899     RUN();
2900 
2901     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
2902     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
2903     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
2904     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
2905     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
2906     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
2907     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
2908     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
2909     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
2910     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
2911   }
2912 }
2913 
2914 
TEST(neon_st2_d)2915 TEST(neon_st2_d) {
2916   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2917 
2918   uint8_t src[4 * 16];
2919   for (unsigned i = 0; i < sizeof(src); i++) {
2920     src[i] = i;
2921   }
2922   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2923 
2924   START();
2925   __ Mov(x17, src_base);
2926   __ Mov(x18, src_base);
2927   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2928   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2929 
2930   __ St2(v0.V8B(), v1.V8B(), MemOperand(x18));
2931   __ Add(x18, x18, 22);
2932   __ St2(v0.V4H(), v1.V4H(), MemOperand(x18));
2933   __ Add(x18, x18, 11);
2934   __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2935 
2936   __ Mov(x19, src_base);
2937   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2938   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2939   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2940   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
2941 
2942   END();
2943 
2944   if (CAN_RUN()) {
2945     RUN();
2946 
2947     ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q0);
2948     ASSERT_EQUAL_128(0x0504131203021110, 0x0100151413121110, q1);
2949     ASSERT_EQUAL_128(0x1615140706050413, 0x1211100302010014, q2);
2950     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323117, q3);
2951   }
2952 }
2953 
2954 
TEST(neon_st2_d_postindex)2955 TEST(neon_st2_d_postindex) {
2956   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2957 
2958   uint8_t src[4 * 16];
2959   for (unsigned i = 0; i < sizeof(src); i++) {
2960     src[i] = i;
2961   }
2962   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2963 
2964   START();
2965   __ Mov(x22, 5);
2966   __ Mov(x17, src_base);
2967   __ Mov(x18, src_base);
2968   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2969   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2970 
2971   __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex));
2972   __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex));
2973   __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2974 
2975 
2976   __ Mov(x19, src_base);
2977   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2978   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2979   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2980 
2981   END();
2982 
2983   if (CAN_RUN()) {
2984     RUN();
2985 
2986     ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
2987     ASSERT_EQUAL_128(0x0605041312111003, 0x0201001716070615, q1);
2988     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726251716151407, q2);
2989   }
2990 }
2991 
2992 
TEST(neon_st2_q)2993 TEST(neon_st2_q) {
2994   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2995 
2996   uint8_t src[5 * 16];
2997   for (unsigned i = 0; i < sizeof(src); i++) {
2998     src[i] = i;
2999   }
3000   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3001 
3002   START();
3003   __ Mov(x17, src_base);
3004   __ Mov(x18, src_base);
3005   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3006   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3007 
3008   __ St2(v0.V16B(), v1.V16B(), MemOperand(x18));
3009   __ Add(x18, x18, 8);
3010   __ St2(v0.V8H(), v1.V8H(), MemOperand(x18));
3011   __ Add(x18, x18, 22);
3012   __ St2(v0.V4S(), v1.V4S(), MemOperand(x18));
3013   __ Add(x18, x18, 2);
3014   __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3015 
3016   __ Mov(x19, src_base);
3017   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3018   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3019   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3020   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3021 
3022   END();
3023 
3024   if (CAN_RUN()) {
3025     RUN();
3026 
3027     ASSERT_EQUAL_128(0x1312030211100100, 0x1303120211011000, q0);
3028     ASSERT_EQUAL_128(0x01000b0a19180908, 0x1716070615140504, q1);
3029     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q2);
3030     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0f0e0d0c0b0a0908, q3);
3031   }
3032 }
3033 
3034 
TEST(neon_st2_q_postindex)3035 TEST(neon_st2_q_postindex) {
3036   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3037 
3038   uint8_t src[5 * 16];
3039   for (unsigned i = 0; i < sizeof(src); i++) {
3040     src[i] = i;
3041   }
3042   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3043 
3044   START();
3045   __ Mov(x22, 5);
3046   __ Mov(x17, src_base);
3047   __ Mov(x18, src_base);
3048   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3049   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3050 
3051   __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex));
3052   __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex));
3053   __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex));
3054   __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3055 
3056   __ Mov(x19, src_base);
3057   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3058   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3059   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3060   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3061   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3062 
3063   END();
3064 
3065   if (CAN_RUN()) {
3066     RUN();
3067 
3068     ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
3069     ASSERT_EQUAL_128(0x1c0d0c1b1a0b0a19, 0x1809081716070615, q1);
3070     ASSERT_EQUAL_128(0x0504030201001003, 0x0201001f1e0f0e1d, q2);
3071     ASSERT_EQUAL_128(0x0d0c0b0a09081716, 0x1514131211100706, q3);
3072     ASSERT_EQUAL_128(0x4f4e4d4c4b4a1f1e, 0x1d1c1b1a19180f0e, q4);
3073   }
3074 }
3075 
3076 
TEST(neon_st3_d)3077 TEST(neon_st3_d) {
3078   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3079 
3080   uint8_t src[3 * 16];
3081   for (unsigned i = 0; i < sizeof(src); i++) {
3082     src[i] = i;
3083   }
3084   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3085 
3086   START();
3087   __ Mov(x17, src_base);
3088   __ Mov(x18, src_base);
3089   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3090   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3091   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3092 
3093   __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18));
3094   __ Add(x18, x18, 3);
3095   __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18));
3096   __ Add(x18, x18, 2);
3097   __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3098 
3099 
3100   __ Mov(x19, src_base);
3101   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3102   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3103 
3104   END();
3105 
3106   if (CAN_RUN()) {
3107     RUN();
3108 
3109     ASSERT_EQUAL_128(0x2221201312111003, 0x0201000100201000, q0);
3110     ASSERT_EQUAL_128(0x1f1e1d2726252417, 0x1615140706050423, q1);
3111   }
3112 }
3113 
3114 
TEST(neon_st3_d_postindex)3115 TEST(neon_st3_d_postindex) {
3116   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3117 
3118   uint8_t src[4 * 16];
3119   for (unsigned i = 0; i < sizeof(src); i++) {
3120     src[i] = i;
3121   }
3122   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3123 
3124   START();
3125   __ Mov(x22, 5);
3126   __ Mov(x17, src_base);
3127   __ Mov(x18, src_base);
3128   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3129   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3130   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3131 
3132   __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex));
3133   __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex));
3134   __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3135 
3136 
3137   __ Mov(x19, src_base);
3138   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3139   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3140   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3141   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3142 
3143   END();
3144 
3145   if (CAN_RUN()) {
3146     RUN();
3147 
3148     ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3149     ASSERT_EQUAL_128(0x0201002726171607, 0x0625241514050423, q1);
3150     ASSERT_EQUAL_128(0x1615140706050423, 0x2221201312111003, q2);
3151     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736352726252417, q3);
3152   }
3153 }
3154 
3155 
TEST(neon_st3_q)3156 TEST(neon_st3_q) {
3157   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3158 
3159   uint8_t src[6 * 16];
3160   for (unsigned i = 0; i < sizeof(src); i++) {
3161     src[i] = i;
3162   }
3163   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3164 
3165   START();
3166   __ Mov(x17, src_base);
3167   __ Mov(x18, src_base);
3168   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3169   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3170   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3171 
3172   __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18));
3173   __ Add(x18, x18, 5);
3174   __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18));
3175   __ Add(x18, x18, 12);
3176   __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18));
3177   __ Add(x18, x18, 22);
3178   __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3179 
3180   __ Mov(x19, src_base);
3181   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3182   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3183   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3184   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3185   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3186   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3187 
3188   END();
3189 
3190   if (CAN_RUN()) {
3191     RUN();
3192 
3193     ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3194     ASSERT_EQUAL_128(0x0605042322212013, 0x1211100302010023, q1);
3195     ASSERT_EQUAL_128(0x1007060504030201, 0x0025241716151407, q2);
3196     ASSERT_EQUAL_128(0x0827262524232221, 0x2017161514131211, q3);
3197     ASSERT_EQUAL_128(0x281f1e1d1c1b1a19, 0x180f0e0d0c0b0a09, q4);
3198     ASSERT_EQUAL_128(0x5f5e5d5c5b5a5958, 0x572f2e2d2c2b2a29, q5);
3199   }
3200 }
3201 
3202 
TEST(neon_st3_q_postindex)3203 TEST(neon_st3_q_postindex) {
3204   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3205 
3206   uint8_t src[7 * 16];
3207   for (unsigned i = 0; i < sizeof(src); i++) {
3208     src[i] = i;
3209   }
3210   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3211 
3212   START();
3213   __ Mov(x22, 5);
3214   __ Mov(x17, src_base);
3215   __ Mov(x18, src_base);
3216   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3217   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3218   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3219 
3220   __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex));
3221   __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex));
3222   __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex));
3223   __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3224 
3225   __ Mov(x19, src_base);
3226   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3227   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3228   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3229   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3230   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3231   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3232   __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3233 
3234   END();
3235 
3236   if (CAN_RUN()) {
3237     RUN();
3238 
3239     ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3240     ASSERT_EQUAL_128(0x1809082726171607, 0x0625241514050423, q1);
3241     ASSERT_EQUAL_128(0x0e2d2c1d1c0d0c2b, 0x2a1b1a0b0a292819, q2);
3242     ASSERT_EQUAL_128(0x0504030201001003, 0x0201002f2e1f1e0f, q3);
3243     ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q4);
3244     ASSERT_EQUAL_128(0x1d1c1b1a19180f0e, 0x0d0c0b0a09082726, q5);
3245     ASSERT_EQUAL_128(0x6f6e6d6c6b6a2f2e, 0x2d2c2b2a29281f1e, q6);
3246   }
3247 }
3248 
3249 
TEST(neon_st4_d)3250 TEST(neon_st4_d) {
3251   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3252 
3253   uint8_t src[4 * 16];
3254   for (unsigned i = 0; i < sizeof(src); i++) {
3255     src[i] = i;
3256   }
3257   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3258 
3259   START();
3260   __ Mov(x17, src_base);
3261   __ Mov(x18, src_base);
3262   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3263   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3264   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3265   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3266 
3267   __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18));
3268   __ Add(x18, x18, 12);
3269   __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18));
3270   __ Add(x18, x18, 15);
3271   __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3272 
3273 
3274   __ Mov(x19, src_base);
3275   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3276   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3277   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3278   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3279 
3280   END();
3281 
3282   if (CAN_RUN()) {
3283     RUN();
3284 
3285     ASSERT_EQUAL_128(0x1110010032221202, 0X3121110130201000, q0);
3286     ASSERT_EQUAL_128(0x1003020100322322, 0X1312030231302120, q1);
3287     ASSERT_EQUAL_128(0x1407060504333231, 0X3023222120131211, q2);
3288     ASSERT_EQUAL_128(0x3f3e3d3c3b373635, 0x3427262524171615, q3);
3289   }
3290 }
3291 
3292 
TEST(neon_st4_d_postindex)3293 TEST(neon_st4_d_postindex) {
3294   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3295 
3296   uint8_t src[5 * 16];
3297   for (unsigned i = 0; i < sizeof(src); i++) {
3298     src[i] = i;
3299   }
3300   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3301 
3302   START();
3303   __ Mov(x22, 5);
3304   __ Mov(x17, src_base);
3305   __ Mov(x18, src_base);
3306   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3307   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3308   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3309   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3310 
3311   __ St4(v0.V8B(),
3312          v1.V8B(),
3313          v2.V8B(),
3314          v3.V8B(),
3315          MemOperand(x18, x22, PostIndex));
3316   __ St4(v0.V4H(),
3317          v1.V4H(),
3318          v2.V4H(),
3319          v3.V4H(),
3320          MemOperand(x18, 32, PostIndex));
3321   __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3322 
3323 
3324   __ Mov(x19, src_base);
3325   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3326   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3327   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3328   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3329   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3330 
3331   END();
3332 
3333   if (CAN_RUN()) {
3334     RUN();
3335 
3336     ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3337     ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
3338     ASSERT_EQUAL_128(0x2221201312111003, 0x0201003736272617, q2);
3339     ASSERT_EQUAL_128(0x2625241716151407, 0x0605043332313023, q3);
3340     ASSERT_EQUAL_128(0x4f4e4d4c4b4a4948, 0x4746453736353427, q4);
3341   }
3342 }
3343 
3344 
TEST(neon_st4_q)3345 TEST(neon_st4_q) {
3346   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3347 
3348   uint8_t src[7 * 16];
3349   for (unsigned i = 0; i < sizeof(src); i++) {
3350     src[i] = i;
3351   }
3352   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3353 
3354   START();
3355   __ Mov(x17, src_base);
3356   __ Mov(x18, src_base);
3357   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3358   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3359   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3360   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3361 
3362   __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18));
3363   __ Add(x18, x18, 5);
3364   __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18));
3365   __ Add(x18, x18, 12);
3366   __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18));
3367   __ Add(x18, x18, 22);
3368   __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3369   __ Add(x18, x18, 10);
3370 
3371   __ Mov(x19, src_base);
3372   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3373   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3374   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3375   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3376   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3377   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3378   __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3379 
3380   END();
3381 
3382   if (CAN_RUN()) {
3383     RUN();
3384 
3385     ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3386     ASSERT_EQUAL_128(0x3231302322212013, 0x1211100302010013, q1);
3387     ASSERT_EQUAL_128(0x1007060504030201, 0x0015140706050433, q2);
3388     ASSERT_EQUAL_128(0x3027262524232221, 0x2017161514131211, q3);
3389     ASSERT_EQUAL_128(0x180f0e0d0c0b0a09, 0x0837363534333231, q4);
3390     ASSERT_EQUAL_128(0x382f2e2d2c2b2a29, 0x281f1e1d1c1b1a19, q5);
3391     ASSERT_EQUAL_128(0x6f6e6d6c6b6a6968, 0x673f3e3d3c3b3a39, q6);
3392   }
3393 }
3394 
3395 
TEST(neon_st4_q_postindex)3396 TEST(neon_st4_q_postindex) {
3397   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3398 
3399   uint8_t src[9 * 16];
3400   for (unsigned i = 0; i < sizeof(src); i++) {
3401     src[i] = i;
3402   }
3403   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3404 
3405   START();
3406   __ Mov(x22, 5);
3407   __ Mov(x17, src_base);
3408   __ Mov(x18, src_base);
3409   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3410   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3411   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3412   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3413 
3414   __ St4(v0.V16B(),
3415          v1.V16B(),
3416          v2.V16B(),
3417          v3.V16B(),
3418          MemOperand(x18, x22, PostIndex));
3419   __ St4(v0.V8H(),
3420          v1.V8H(),
3421          v2.V8H(),
3422          v3.V8H(),
3423          MemOperand(x18, 64, PostIndex));
3424   __ St4(v0.V4S(),
3425          v1.V4S(),
3426          v2.V4S(),
3427          v3.V4S(),
3428          MemOperand(x18, x22, PostIndex));
3429   __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3430 
3431   __ Mov(x19, src_base);
3432   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3433   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3434   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3435   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3436   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3437   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3438   __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3439   __ Ldr(q7, MemOperand(x19, 16, PostIndex));
3440   __ Ldr(q8, MemOperand(x19, 16, PostIndex));
3441 
3442   END();
3443 
3444   if (CAN_RUN()) {
3445     RUN();
3446 
3447     ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3448     ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
3449     ASSERT_EQUAL_128(0x1a0b0a3938292819, 0x1809083736272617, q2);
3450     ASSERT_EQUAL_128(0x1e0f0e3d3c2d2c1d, 0x1c0d0c3b3a2b2a1b, q3);
3451     ASSERT_EQUAL_128(0x0504030201001003, 0x0201003f3e2f2e1f, q4);
3452     ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q5);
3453     ASSERT_EQUAL_128(0x0d0c0b0a09083736, 0x3534333231302726, q6);
3454     ASSERT_EQUAL_128(0x2d2c2b2a29281f1e, 0x1d1c1b1a19180f0e, q7);
3455     ASSERT_EQUAL_128(0x8f8e8d8c8b8a3f3e, 0x3d3c3b3a39382f2e, q8);
3456   }
3457 }
3458 
3459 
TEST(neon_destructive_minmaxp)3460 TEST(neon_destructive_minmaxp) {
3461   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3462 
3463   START();
3464   __ Movi(v0.V2D(), 0, 0x2222222233333333);
3465   __ Movi(v1.V2D(), 0, 0x0000000011111111);
3466 
3467   __ Sminp(v16.V2S(), v0.V2S(), v1.V2S());
3468   __ Mov(v17, v0);
3469   __ Sminp(v17.V2S(), v17.V2S(), v1.V2S());
3470   __ Mov(v18, v1);
3471   __ Sminp(v18.V2S(), v0.V2S(), v18.V2S());
3472   __ Mov(v19, v0);
3473   __ Sminp(v19.V2S(), v19.V2S(), v19.V2S());
3474 
3475   __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
3476   __ Mov(v21, v0);
3477   __ Smaxp(v21.V2S(), v21.V2S(), v1.V2S());
3478   __ Mov(v22, v1);
3479   __ Smaxp(v22.V2S(), v0.V2S(), v22.V2S());
3480   __ Mov(v23, v0);
3481   __ Smaxp(v23.V2S(), v23.V2S(), v23.V2S());
3482 
3483   __ Uminp(v24.V2S(), v0.V2S(), v1.V2S());
3484   __ Mov(v25, v0);
3485   __ Uminp(v25.V2S(), v25.V2S(), v1.V2S());
3486   __ Mov(v26, v1);
3487   __ Uminp(v26.V2S(), v0.V2S(), v26.V2S());
3488   __ Mov(v27, v0);
3489   __ Uminp(v27.V2S(), v27.V2S(), v27.V2S());
3490 
3491   __ Umaxp(v28.V2S(), v0.V2S(), v1.V2S());
3492   __ Mov(v29, v0);
3493   __ Umaxp(v29.V2S(), v29.V2S(), v1.V2S());
3494   __ Mov(v30, v1);
3495   __ Umaxp(v30.V2S(), v0.V2S(), v30.V2S());
3496   __ Mov(v31, v0);
3497   __ Umaxp(v31.V2S(), v31.V2S(), v31.V2S());
3498   END();
3499 
3500   if (CAN_RUN()) {
3501     RUN();
3502 
3503     ASSERT_EQUAL_128(0, 0x0000000022222222, q16);
3504     ASSERT_EQUAL_128(0, 0x0000000022222222, q17);
3505     ASSERT_EQUAL_128(0, 0x0000000022222222, q18);
3506     ASSERT_EQUAL_128(0, 0x2222222222222222, q19);
3507 
3508     ASSERT_EQUAL_128(0, 0x1111111133333333, q20);
3509     ASSERT_EQUAL_128(0, 0x1111111133333333, q21);
3510     ASSERT_EQUAL_128(0, 0x1111111133333333, q22);
3511     ASSERT_EQUAL_128(0, 0x3333333333333333, q23);
3512 
3513     ASSERT_EQUAL_128(0, 0x0000000022222222, q24);
3514     ASSERT_EQUAL_128(0, 0x0000000022222222, q25);
3515     ASSERT_EQUAL_128(0, 0x0000000022222222, q26);
3516     ASSERT_EQUAL_128(0, 0x2222222222222222, q27);
3517 
3518     ASSERT_EQUAL_128(0, 0x1111111133333333, q28);
3519     ASSERT_EQUAL_128(0, 0x1111111133333333, q29);
3520     ASSERT_EQUAL_128(0, 0x1111111133333333, q30);
3521     ASSERT_EQUAL_128(0, 0x3333333333333333, q31);
3522   }
3523 }
3524 
3525 
TEST(neon_destructive_tbl)3526 TEST(neon_destructive_tbl) {
3527   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3528 
3529   START();
3530   __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3531   __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3532   __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3533   __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3534   __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3535 
3536   __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3537   __ Tbl(v16.V16B(), v1.V16B(), v0.V16B());
3538   __ Mov(v17, v0);
3539   __ Tbl(v17.V16B(), v1.V16B(), v17.V16B());
3540   __ Mov(v18, v1);
3541   __ Tbl(v18.V16B(), v18.V16B(), v0.V16B());
3542   __ Mov(v19, v0);
3543   __ Tbl(v19.V16B(), v19.V16B(), v19.V16B());
3544 
3545   __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3546   __ Tbl(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3547   __ Mov(v21, v0);
3548   __ Tbl(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3549   __ Mov(v22, v1);
3550   __ Mov(v23, v2);
3551   __ Mov(v24, v3);
3552   __ Mov(v25, v4);
3553   __ Tbl(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3554   __ Mov(v26, v0);
3555   __ Mov(v27, v1);
3556   __ Mov(v28, v2);
3557   __ Mov(v29, v3);
3558   __ Tbl(v26.V16B(),
3559          v26.V16B(),
3560          v27.V16B(),
3561          v28.V16B(),
3562          v29.V16B(),
3563          v26.V16B());
3564   END();
3565 
3566   if (CAN_RUN()) {
3567     RUN();
3568 
3569     ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q16);
3570     ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q17);
3571     ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q18);
3572     ASSERT_EQUAL_128(0x0f00000000000000, 0x0000000000424100, q19);
3573 
3574     ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
3575     ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
3576     ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q22);
3577     ASSERT_EQUAL_128(0x0f000000c4c5c6b7, 0xb8b9aaabac424100, q26);
3578   }
3579 }
3580 
3581 
TEST(neon_destructive_tbx)3582 TEST(neon_destructive_tbx) {
3583   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3584 
3585   START();
3586   __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3587   __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3588   __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3589   __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3590   __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3591 
3592   __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3593   __ Tbx(v16.V16B(), v1.V16B(), v0.V16B());
3594   __ Mov(v17, v0);
3595   __ Tbx(v17.V16B(), v1.V16B(), v17.V16B());
3596   __ Mov(v18, v1);
3597   __ Tbx(v18.V16B(), v18.V16B(), v0.V16B());
3598   __ Mov(v19, v0);
3599   __ Tbx(v19.V16B(), v19.V16B(), v19.V16B());
3600 
3601   __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3602   __ Tbx(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3603   __ Mov(v21, v0);
3604   __ Tbx(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3605   __ Mov(v22, v1);
3606   __ Mov(v23, v2);
3607   __ Mov(v24, v3);
3608   __ Mov(v25, v4);
3609   __ Tbx(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3610   __ Mov(v26, v0);
3611   __ Mov(v27, v1);
3612   __ Mov(v28, v2);
3613   __ Mov(v29, v3);
3614   __ Tbx(v26.V16B(),
3615          v26.V16B(),
3616          v27.V16B(),
3617          v28.V16B(),
3618          v29.V16B(),
3619          v26.V16B());
3620   END();
3621 
3622   if (CAN_RUN()) {
3623     RUN();
3624 
3625     ASSERT_EQUAL_128(0xa055555555555555, 0x5555555555adaeaf, q16);
3626     ASSERT_EQUAL_128(0xa041424334353627, 0x28291a1b1cadaeaf, q17);
3627     ASSERT_EQUAL_128(0xa0aeadacabaaa9a8, 0xa7a6a5a4a3adaeaf, q18);
3628     ASSERT_EQUAL_128(0x0f41424334353627, 0x28291a1b1c424100, q19);
3629 
3630     ASSERT_EQUAL_128(0xa0555555d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
3631     ASSERT_EQUAL_128(0xa0414243d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
3632     ASSERT_EQUAL_128(0xa0aeadacd4d5d6c7, 0xc8c9babbbcadaeaf, q22);
3633     ASSERT_EQUAL_128(0x0f414243c4c5c6b7, 0xb8b9aaabac424100, q26);
3634   }
3635 }
3636 
3637 
TEST(neon_destructive_fcvtl)3638 TEST(neon_destructive_fcvtl) {
3639   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3640 
3641   START();
3642   __ Movi(v0.V2D(), 0x400000003f800000, 0xbf800000c0000000);
3643   __ Fcvtl(v16.V2D(), v0.V2S());
3644   __ Fcvtl2(v17.V2D(), v0.V4S());
3645   __ Mov(v18, v0);
3646   __ Mov(v19, v0);
3647   __ Fcvtl(v18.V2D(), v18.V2S());
3648   __ Fcvtl2(v19.V2D(), v19.V4S());
3649 
3650   __ Movi(v1.V2D(), 0x40003c003c004000, 0xc000bc00bc00c000);
3651   __ Fcvtl(v20.V4S(), v1.V4H());
3652   __ Fcvtl2(v21.V4S(), v1.V8H());
3653   __ Mov(v22, v1);
3654   __ Mov(v23, v1);
3655   __ Fcvtl(v22.V4S(), v22.V4H());
3656   __ Fcvtl2(v23.V4S(), v23.V8H());
3657 
3658   END();
3659 
3660   if (CAN_RUN()) {
3661     RUN();
3662 
3663     ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q16);
3664     ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q17);
3665     ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q18);
3666     ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q19);
3667 
3668     ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q20);
3669     ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q21);
3670     ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q22);
3671     ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q23);
3672   }
3673 }
3674 
TEST(fadd_h_neon)3675 TEST(fadd_h_neon) {
3676   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3677                       CPUFeatures::kFP,
3678                       CPUFeatures::kNEONHalf);
3679 
3680   START();
3681   __ Fmov(v0.V4H(), 24.0);
3682   __ Fmov(v1.V4H(), 1024.0);
3683   __ Fmov(v2.V8H(), 5.5);
3684   __ Fmov(v3.V8H(), 2048.0);
3685   __ Fmov(v4.V8H(), kFP16PositiveInfinity);
3686   __ Fmov(v5.V8H(), kFP16NegativeInfinity);
3687   __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c2f));
3688   __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe0f));
3689 
3690   __ Fadd(v8.V4H(), v1.V4H(), v0.V4H());
3691   __ Fadd(v9.V8H(), v3.V8H(), v2.V8H());
3692   __ Fadd(v10.V4H(), v4.V4H(), v3.V4H());
3693 
3694   __ Fadd(v11.V4H(), v6.V4H(), v1.V4H());
3695   __ Fadd(v12.V4H(), v7.V4H(), v7.V4H());
3696 
3697   END();
3698 
3699   if (CAN_RUN()) {
3700     RUN();
3701 
3702     ASSERT_EQUAL_128(0x0000000000000000, 0x6418641864186418, q8);
3703     // 2053.5 is unrepresentable in FP16.
3704     ASSERT_EQUAL_128(0x6803680368036803, 0x6803680368036803, q9);
3705 
3706     // Note: we test NaNs here as vectors aren't covered by process_nans_half
3707     // and we don't have traces for half-precision enabled hardware.
3708     // Default (Signalling NaN)
3709     ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q10);
3710     // Quiet NaN from Signalling.
3711     ASSERT_EQUAL_128(0x0000000000000000, 0x7e2f7e2f7e2f7e2f, q11);
3712     // Quiet NaN.
3713     ASSERT_EQUAL_128(0x0000000000000000, 0xfe0ffe0ffe0ffe0f, q12);
3714   }
3715 }
3716 
TEST(fsub_h_neon)3717 TEST(fsub_h_neon) {
3718   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3719                       CPUFeatures::kFP,
3720                       CPUFeatures::kNEONHalf);
3721 
3722   START();
3723   __ Fmov(v0.V4H(), 24.0);
3724   __ Fmov(v1.V4H(), 1024.0);
3725   __ Fmov(v2.V8H(), 5.5);
3726   __ Fmov(v3.V8H(), 2048.0);
3727   __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3728   __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3729   __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c22));
3730   __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe02));
3731 
3732   __ Fsub(v0.V4H(), v1.V4H(), v0.V4H());
3733   __ Fsub(v8.V8H(), v3.V8H(), v2.V8H());
3734   __ Fsub(v9.V4H(), v4.V4H(), v3.V4H());
3735   __ Fsub(v10.V4H(), v0.V4H(), v1.V4H());
3736 
3737   __ Fsub(v11.V4H(), v6.V4H(), v2.V4H());
3738   __ Fsub(v12.V4H(), v7.V4H(), v7.V4H());
3739   END();
3740 
3741   if (CAN_RUN()) {
3742     RUN();
3743 
3744     ASSERT_EQUAL_128(0x0000000000000000, 0x63d063d063d063d0, q0);
3745     // 2042.5 is unpresentable in FP16:
3746     ASSERT_EQUAL_128(0x67fa67fa67fa67fa, 0x67fa67fa67fa67fa, q8);
3747 
3748     // Note: we test NaNs here as vectors aren't covered by process_nans_half
3749     // and we don't have traces for half-precision enabled hardware.
3750     // Signalling (Default) NaN.
3751     ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q9);
3752     ASSERT_EQUAL_128(0x0000000000000000, 0xce00ce00ce00ce00, q10);
3753     // Quiet NaN from Signalling.
3754     ASSERT_EQUAL_128(0x0000000000000000, 0x7e227e227e227e22, q11);
3755     // Quiet NaN.
3756     ASSERT_EQUAL_128(0x0000000000000000, 0xfe02fe02fe02fe02, q12);
3757   }
3758 }
3759 
TEST(fmul_h_neon)3760 TEST(fmul_h_neon) {
3761   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3762                       CPUFeatures::kFP,
3763                       CPUFeatures::kNEONHalf);
3764 
3765   START();
3766   __ Fmov(v0.V4H(), 24.0);
3767   __ Fmov(v1.V4H(), -2.0);
3768   __ Fmov(v2.V8H(), 5.5);
3769   __ Fmov(v3.V8H(), 0.5);
3770   __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3771   __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3772 
3773   __ Fmul(v6.V4H(), v1.V4H(), v0.V4H());
3774   __ Fmul(v7.V8H(), v3.V8H(), v2.V8H());
3775   __ Fmul(v8.V4H(), v4.V4H(), v3.V4H());
3776   __ Fmul(v9.V4H(), v0.V4H(), v1.V4H());
3777   __ Fmul(v10.V4H(), v5.V4H(), v0.V4H());
3778   END();
3779 
3780   if (CAN_RUN()) {
3781     RUN();
3782 
3783     ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q6);
3784     ASSERT_EQUAL_128(0x4180418041804180, 0x4180418041804180, q7);
3785     ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
3786     ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q9);
3787     ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
3788   }
3789 }
3790 
TEST(fdiv_h_neon)3791 TEST(fdiv_h_neon) {
3792   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3793                       CPUFeatures::kFP,
3794                       CPUFeatures::kNEONHalf);
3795 
3796   START();
3797   __ Fmov(v0.V4H(), 24.0);
3798   __ Fmov(v1.V4H(), -2.0);
3799   __ Fmov(v2.V8H(), 5.5);
3800   __ Fmov(v3.V8H(), 0.5);
3801   __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3802   __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3803 
3804   __ Fdiv(v6.V4H(), v0.V4H(), v1.V4H());
3805   __ Fdiv(v7.V8H(), v2.V8H(), v3.V8H());
3806   __ Fdiv(v8.V4H(), v4.V4H(), v3.V4H());
3807   __ Fdiv(v9.V4H(), v1.V4H(), v0.V4H());
3808   __ Fdiv(v10.V4H(), v5.V4H(), v0.V4H());
3809   END();
3810 
3811   if (CAN_RUN()) {
3812     RUN();
3813 
3814     ASSERT_EQUAL_128(0x0000000000000000, 0xca00ca00ca00ca00, q6);
3815     ASSERT_EQUAL_128(0x4980498049804980, 0x4980498049804980, q7);
3816     ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
3817     // -0.083333... is unrepresentable in FP16:
3818     ASSERT_EQUAL_128(0x0000000000000000, 0xad55ad55ad55ad55, q9);
3819     ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
3820   }
3821 }
3822 
TEST(neon_fcvtl)3823 TEST(neon_fcvtl) {
3824   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3825 
3826   START();
3827 
3828   __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00);
3829   __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01);
3830   __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3831   __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3832   __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3833   __ Fcvtl(v16.V4S(), v0.V4H());
3834   __ Fcvtl2(v17.V4S(), v0.V8H());
3835   __ Fcvtl(v18.V4S(), v1.V4H());
3836   __ Fcvtl2(v19.V4S(), v1.V8H());
3837 
3838   __ Fcvtl(v20.V2D(), v2.V2S());
3839   __ Fcvtl2(v21.V2D(), v2.V4S());
3840   __ Fcvtl(v22.V2D(), v3.V2S());
3841   __ Fcvtl2(v23.V2D(), v3.V4S());
3842   __ Fcvtl(v24.V2D(), v4.V2S());
3843   __ Fcvtl2(v25.V2D(), v4.V4S());
3844 
3845   END();
3846 
3847   if (CAN_RUN()) {
3848     RUN();
3849     ASSERT_EQUAL_128(0x3e200000be200000, 0x7f800000ff800000, q16);
3850     ASSERT_EQUAL_128(0x0000000080000000, 0x7fdfe000ffdfe000, q17);
3851     ASSERT_EQUAL_128(0x33800000b3800000, 0x7fc02000ffc02000, q18);
3852     ASSERT_EQUAL_128(0x387fc000b87fc000, 0x34400000b4400000, q19);
3853     ASSERT_EQUAL_128(0x7ff0000000000000, 0xfff0000000000000, q20);
3854     ASSERT_EQUAL_128(0x3fc4000000000000, 0xbfc4000000000000, q21);
3855     ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q22);
3856     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000000, q23);
3857     ASSERT_EQUAL_128(0x36a0000000000000, 0xb6a0000000000000, q24);
3858     ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q25);
3859   }
3860 }
3861 
3862 
TEST(neon_fcvtn)3863 TEST(neon_fcvtn) {
3864   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3865 
3866   START();
3867 
3868   __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3869   __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3870   __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3871   __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3872   __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3873   __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3874   __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3875   __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3876   __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3877 
3878   __ Fcvtn(v16.V4H(), v0.V4S());
3879   __ Fcvtn2(v16.V8H(), v1.V4S());
3880   __ Fcvtn(v17.V4H(), v2.V4S());
3881   __ Fcvtn(v18.V2S(), v3.V2D());
3882   __ Fcvtn2(v18.V4S(), v4.V2D());
3883   __ Fcvtn(v19.V2S(), v5.V2D());
3884   __ Fcvtn2(v19.V4S(), v6.V2D());
3885   __ Fcvtn(v20.V2S(), v7.V2D());
3886   __ Fcvtn2(v20.V4S(), v8.V2D());
3887   END();
3888 
3889   if (CAN_RUN()) {
3890     RUN();
3891     ASSERT_EQUAL_128(0x000080007e7ffe7f, 0x3100b1007c00fc00, q16);
3892     ASSERT_EQUAL_64(0x7e7ffe7f00008000, d17);
3893     ASSERT_EQUAL_128(0x7f800000ff800000, 0x3e200000be200000, q18);
3894     ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x0000000080000000, q19);
3895     ASSERT_EQUAL_128(0x0000000080000000, 0x7fc7ffffffc7ffff, q20);
3896   }
3897 }
3898 
3899 
TEST(neon_fcvtxn)3900 TEST(neon_fcvtxn) {
3901   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3902 
3903   START();
3904   __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3905   __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3906   __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3907   __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3908   __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3909   __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3910   __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3911   __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3912   __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3913   __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff);
3914   __ Fcvtxn(v16.V2S(), v0.V2D());
3915   __ Fcvtxn2(v16.V4S(), v1.V2D());
3916   __ Fcvtxn(v17.V2S(), v2.V2D());
3917   __ Fcvtxn2(v17.V4S(), v3.V2D());
3918   __ Fcvtxn(v18.V2S(), v4.V2D());
3919   __ Fcvtxn2(v18.V4S(), v5.V2D());
3920   __ Fcvtxn(v19.V2S(), v6.V2D());
3921   __ Fcvtxn2(v19.V4S(), v7.V2D());
3922   __ Fcvtxn(v20.V2S(), v8.V2D());
3923   __ Fcvtxn2(v20.V4S(), v9.V2D());
3924   __ Fcvtxn(s21, d0);
3925   END();
3926 
3927   if (CAN_RUN()) {
3928     RUN();
3929     ASSERT_EQUAL_128(0x000000017f7fffff, 0x310000057f7fffff, q16);
3930     ASSERT_EQUAL_128(0x3e200000be200000, 0x7f7fffff00000001, q17);
3931     ASSERT_EQUAL_128(0x0000000080000000, 0x7f800000ff800000, q18);
3932     ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x7fc7ffffffc7ffff, q19);
3933     ASSERT_EQUAL_128(0x4f6800004f7fffff, 0x0000000180000001, q20);
3934     ASSERT_EQUAL_128(0, 0x7f7fffff, q21);
3935   }
3936 }
3937 
TEST(neon_3same_addp)3938 TEST(neon_3same_addp) {
3939   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3940 
3941   START();
3942 
3943   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
3944   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
3945   __ Addp(v16.V16B(), v0.V16B(), v1.V16B());
3946 
3947   END();
3948 
3949   if (CAN_RUN()) {
3950     RUN();
3951     ASSERT_EQUAL_128(0x00ff54ffff54aaff, 0xffffffffffffffff, q16);
3952   }
3953 }
3954 
TEST(neon_3same_sqdmulh_sqrdmulh)3955 TEST(neon_3same_sqdmulh_sqrdmulh) {
3956   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3957 
3958   START();
3959 
3960   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
3961   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
3962   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
3963   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
3964 
3965   __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H());
3966   __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S());
3967   __ Sqdmulh(h18, h0, h1);
3968   __ Sqdmulh(s19, s2, s3);
3969 
3970   __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H());
3971   __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S());
3972   __ Sqrdmulh(h22, h0, h1);
3973   __ Sqrdmulh(s23, s2, s3);
3974 
3975   END();
3976 
3977   if (CAN_RUN()) {
3978     RUN();
3979     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100007fff, q16);
3980     ASSERT_EQUAL_128(0x000000017fffffff, 0x000000007fffffff, q17);
3981     ASSERT_EQUAL_128(0, 0x7fff, q18);
3982     ASSERT_EQUAL_128(0, 0x7fffffff, q19);
3983     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100017fff, q20);
3984     ASSERT_EQUAL_128(0x000000017fffffff, 0x000000017fffffff, q21);
3985     ASSERT_EQUAL_128(0, 0x7fff, q22);
3986     ASSERT_EQUAL_128(0, 0x7fffffff, q23);
3987   }
3988 }
3989 
TEST(neon_byelement_sqdmulh_sqrdmulh)3990 TEST(neon_byelement_sqdmulh_sqrdmulh) {
3991   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3992 
3993   START();
3994 
3995   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
3996   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
3997   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
3998   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
3999 
4000   __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1);
4001   __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1);
4002   __ Sqdmulh(h18, h0, v1.H(), 0);
4003   __ Sqdmulh(s19, s2, v3.S(), 0);
4004 
4005   __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1);
4006   __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1);
4007   __ Sqrdmulh(h22, h0, v1.H(), 0);
4008   __ Sqrdmulh(s23, s2, v3.S(), 0);
4009 
4010   END();
4011 
4012   if (CAN_RUN()) {
4013     RUN();
4014     ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000fff0, q16);
4015     ASSERT_EQUAL_128(0x00000000fffffff0, 0x00000000fffffff0, q17);
4016     ASSERT_EQUAL_128(0, 0x7fff, q18);
4017     ASSERT_EQUAL_128(0, 0x7fffffff, q19);
4018     ASSERT_EQUAL_128(0x0000000000000000, 0x000000010001fff0, q20);
4019     ASSERT_EQUAL_128(0x00000001fffffff0, 0x00000001fffffff0, q21);
4020     ASSERT_EQUAL_128(0, 0x7fff, q22);
4021     ASSERT_EQUAL_128(0, 0x7fffffff, q23);
4022   }
4023 }
4024 
TEST(neon_3same_sqrdmlah)4025 TEST(neon_3same_sqrdmlah) {
4026   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4027 
4028   START();
4029 
4030   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4031   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4032   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4033   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4034 
4035   __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4036   __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4037   __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4038   __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4039 
4040   __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.V4H());
4041   __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.V4S());
4042   __ Sqrdmlah(h18, h0, h1);
4043   __ Sqrdmlah(s19, s2, s3);
4044 
4045   END();
4046 
4047   if (CAN_RUN()) {
4048     RUN();
4049     ASSERT_EQUAL_128(0, 0x0000040104010000, q16);
4050     ASSERT_EQUAL_128(0x000000017fffffff, 0x000000217fffffff, q17);
4051     ASSERT_EQUAL_128(0, 0x7fff, q18);
4052     ASSERT_EQUAL_128(0, 0, q19);
4053   }
4054 }
4055 
TEST(neon_byelement_sqrdmlah)4056 TEST(neon_byelement_sqrdmlah) {
4057   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4058 
4059   START();
4060 
4061   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4062   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4063   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4064   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4065 
4066   __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4067   __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4068   __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4069   __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4070 
4071   __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.H(), 1);
4072   __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.S(), 1);
4073   __ Sqrdmlah(h18, h0, v1.H(), 0);
4074   __ Sqrdmlah(s19, s2, v3.S(), 0);
4075 
4076   END();
4077 
4078   if (CAN_RUN()) {
4079     RUN();
4080     ASSERT_EQUAL_128(0, 0x0000040104018000, q16);
4081     ASSERT_EQUAL_128(0x00000001fffffff0, 0x0000002100107ff0, q17);
4082     ASSERT_EQUAL_128(0, 0x7fff, q18);
4083     ASSERT_EQUAL_128(0, 0, q19);
4084   }
4085 }
4086 
TEST(neon_3same_sqrdmlsh)4087 TEST(neon_3same_sqrdmlsh) {
4088   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4089 
4090   START();
4091 
4092   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004000500);
4093   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000100080);
4094   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4095   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4096 
4097   __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4098   __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4099   __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4100   __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4101 
4102   __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.V4H());
4103   __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.V4S());
4104   __ Sqrdmlsh(h18, h0, h1);
4105   __ Sqrdmlsh(s19, s2, s3);
4106 
4107   END();
4108 
4109   if (CAN_RUN()) {
4110     RUN();
4111     ASSERT_EQUAL_128(0, 0x40003fff40003ffb, q16);
4112     ASSERT_EQUAL_128(0x40003fffc0004000, 0x40004000c0004000, q17);
4113     ASSERT_EQUAL_128(0, 0x3ffb, q18);
4114     ASSERT_EQUAL_128(0, 0xc0004000, q19);
4115   }
4116 }
4117 
TEST(neon_byelement_sqrdmlsh)4118 TEST(neon_byelement_sqrdmlsh) {
4119   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4120 
4121   START();
4122 
4123   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4124   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4125   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4126   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4127 
4128   __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4129   __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4130   __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4131   __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4132 
4133   __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.H(), 1);
4134   __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.S(), 1);
4135   __ Sqrdmlsh(h18, h0, v1.H(), 0);
4136   __ Sqrdmlsh(s19, s2, v3.S(), 0);
4137 
4138   END();
4139 
4140   if (CAN_RUN()) {
4141     RUN();
4142     ASSERT_EQUAL_128(0, 0x4000400040004010, q16);
4143     ASSERT_EQUAL_128(0x4000400040004010, 0x4000400040004010, q17);
4144     ASSERT_EQUAL_128(0, 0xc000, q18);
4145     ASSERT_EQUAL_128(0, 0xc0004000, q19);
4146   }
4147 }
4148 
TEST(neon_3same_sdot_udot)4149 TEST(neon_3same_sdot_udot) {
4150   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
4151 
4152   START();
4153 
4154   __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4155   __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4156   __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4157 
4158   __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4159   __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4160   __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4161   __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4162 
4163   __ Sdot(v16.V4S(), v0.V16B(), v1.V16B());
4164   __ Sdot(v17.V2S(), v1.V8B(), v2.V8B());
4165 
4166   __ Udot(v18.V4S(), v0.V16B(), v1.V16B());
4167   __ Udot(v19.V2S(), v1.V8B(), v2.V8B());
4168 
4169   END();
4170 
4171   if (CAN_RUN()) {
4172     RUN();
4173     ASSERT_EQUAL_128(0x000037d8000045f8, 0x000037d8000045f8, q16);
4174     ASSERT_EQUAL_128(0, 0x0000515e00004000, q17);
4175     ASSERT_EQUAL_128(0x000119d8000127f8, 0x000119d8000127f8, q18);
4176     ASSERT_EQUAL_128(0, 0x0000c35e00004000, q19);
4177   }
4178 }
4179 
TEST(neon_byelement_sdot_udot)4180 TEST(neon_byelement_sdot_udot) {
4181   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
4182 
4183   START();
4184 
4185   __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4186   __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4187   __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4188 
4189   __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4190   __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4191   __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4192   __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4193 
4194   __ Sdot(v16.V4S(), v0.V16B(), v1.S4B(), 1);
4195   __ Sdot(v17.V2S(), v1.V8B(), v2.S4B(), 1);
4196 
4197   __ Udot(v18.V4S(), v0.V16B(), v1.S4B(), 1);
4198   __ Udot(v19.V2S(), v1.V8B(), v2.S4B(), 1);
4199 
4200   END();
4201 
4202   if (CAN_RUN()) {
4203     RUN();
4204     ASSERT_EQUAL_128(0x000037d8000037d8, 0x000037d8000037d8, q16);
4205     ASSERT_EQUAL_128(0, 0x0000515e0000587e, q17);
4206     ASSERT_EQUAL_128(0x000119d8000119d8, 0x000119d8000119d8, q18);
4207     ASSERT_EQUAL_128(0, 0x0000c35e0000ca7e, q19);
4208   }
4209 }
4210 
4211 
TEST(neon_2regmisc_saddlp)4212 TEST(neon_2regmisc_saddlp) {
4213   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4214 
4215   START();
4216 
4217   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4218 
4219   __ Saddlp(v16.V8H(), v0.V16B());
4220   __ Saddlp(v17.V4H(), v0.V8B());
4221 
4222   __ Saddlp(v18.V4S(), v0.V8H());
4223   __ Saddlp(v19.V2S(), v0.V4H());
4224 
4225   __ Saddlp(v20.V2D(), v0.V4S());
4226   __ Saddlp(v21.V1D(), v0.V2S());
4227 
4228   END();
4229 
4230   if (CAN_RUN()) {
4231     RUN();
4232     ASSERT_EQUAL_128(0x0080ffffff010080, 0xff01ffff0080ff01, q16);
4233     ASSERT_EQUAL_128(0x0000000000000000, 0xff01ffff0080ff01, q17);
4234     ASSERT_EQUAL_128(0x0000800000000081, 0xffff7f81ffff8200, q18);
4235     ASSERT_EQUAL_128(0x0000000000000000, 0xffff7f81ffff8200, q19);
4236     ASSERT_EQUAL_128(0x0000000000818000, 0xffffffff82017f81, q20);
4237     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff82017f81, q21);
4238   }
4239 }
4240 
TEST(neon_2regmisc_uaddlp)4241 TEST(neon_2regmisc_uaddlp) {
4242   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4243 
4244   START();
4245 
4246   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4247 
4248   __ Uaddlp(v16.V8H(), v0.V16B());
4249   __ Uaddlp(v17.V4H(), v0.V8B());
4250 
4251   __ Uaddlp(v18.V4S(), v0.V8H());
4252   __ Uaddlp(v19.V2S(), v0.V4H());
4253 
4254   __ Uaddlp(v20.V2D(), v0.V4S());
4255   __ Uaddlp(v21.V1D(), v0.V2S());
4256 
4257   END();
4258 
4259   if (CAN_RUN()) {
4260     RUN();
4261     ASSERT_EQUAL_128(0x008000ff01010080, 0x010100ff00800101, q16);
4262     ASSERT_EQUAL_128(0x0000000000000000, 0x010100ff00800101, q17);
4263     ASSERT_EQUAL_128(0x0000800000010081, 0x00017f8100008200, q18);
4264     ASSERT_EQUAL_128(0x0000000000000000, 0x00017f8100008200, q19);
4265     ASSERT_EQUAL_128(0x0000000100818000, 0x0000000082017f81, q20);
4266     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000082017f81, q21);
4267   }
4268 }
4269 
TEST(neon_2regmisc_sadalp)4270 TEST(neon_2regmisc_sadalp) {
4271   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4272 
4273   START();
4274 
4275   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4276   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4277   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4278   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4279   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4280 
4281   __ Mov(v16.V16B(), v1.V16B());
4282   __ Mov(v17.V16B(), v1.V16B());
4283   __ Sadalp(v16.V8H(), v0.V16B());
4284   __ Sadalp(v17.V4H(), v0.V8B());
4285 
4286   __ Mov(v18.V16B(), v2.V16B());
4287   __ Mov(v19.V16B(), v2.V16B());
4288   __ Sadalp(v18.V4S(), v1.V8H());
4289   __ Sadalp(v19.V2S(), v1.V4H());
4290 
4291   __ Mov(v20.V16B(), v3.V16B());
4292   __ Mov(v21.V16B(), v4.V16B());
4293   __ Sadalp(v20.V2D(), v2.V4S());
4294   __ Sadalp(v21.V1D(), v2.V2S());
4295 
4296   END();
4297 
4298   if (CAN_RUN()) {
4299     RUN();
4300     ASSERT_EQUAL_128(0x80808000ff000080, 0xff00ffff00817f00, q16);
4301     ASSERT_EQUAL_128(0x0000000000000000, 0xff00ffff00817f00, q17);
4302     ASSERT_EQUAL_128(0x7fff0001fffffffe, 0xffffffff80007fff, q18);
4303     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff80007fff, q19);
4304     ASSERT_EQUAL_128(0x7fffffff80000000, 0x800000007ffffffe, q20);
4305     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
4306   }
4307 }
4308 
TEST(neon_2regmisc_uadalp)4309 TEST(neon_2regmisc_uadalp) {
4310   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4311 
4312   START();
4313 
4314   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4315   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4316   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4317   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4318   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4319 
4320   __ Mov(v16.V16B(), v1.V16B());
4321   __ Mov(v17.V16B(), v1.V16B());
4322   __ Uadalp(v16.V8H(), v0.V16B());
4323   __ Uadalp(v17.V4H(), v0.V8B());
4324 
4325   __ Mov(v18.V16B(), v2.V16B());
4326   __ Mov(v19.V16B(), v2.V16B());
4327   __ Uadalp(v18.V4S(), v1.V8H());
4328   __ Uadalp(v19.V2S(), v1.V4H());
4329 
4330   __ Mov(v20.V16B(), v3.V16B());
4331   __ Mov(v21.V16B(), v4.V16B());
4332   __ Uadalp(v20.V2D(), v2.V4S());
4333   __ Uadalp(v21.V1D(), v2.V2S());
4334 
4335   END();
4336 
4337   if (CAN_RUN()) {
4338     RUN();
4339     ASSERT_EQUAL_128(0x8080810001000080, 0x010000ff00818100, q16);
4340     ASSERT_EQUAL_128(0x0000000000000000, 0x010000ff00818100, q17);
4341     ASSERT_EQUAL_128(0x800100010000fffe, 0x0000ffff80007fff, q18);
4342     ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff80007fff, q19);
4343     ASSERT_EQUAL_128(0x8000000180000000, 0x800000007ffffffe, q20);
4344     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
4345   }
4346 }
4347 
TEST(neon_3same_mul)4348 TEST(neon_3same_mul) {
4349   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4350 
4351   START();
4352 
4353   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4354   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4355   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4356   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4357 
4358   __ Mla(v16.V16B(), v0.V16B(), v1.V16B());
4359   __ Mls(v17.V16B(), v0.V16B(), v1.V16B());
4360   __ Mul(v18.V16B(), v0.V16B(), v1.V16B());
4361 
4362   END();
4363 
4364   if (CAN_RUN()) {
4365     RUN();
4366     ASSERT_EQUAL_128(0x0102757605b1b208, 0x5f0a61450db90f56, q16);
4367     ASSERT_EQUAL_128(0x01029192055b5c08, 0xb30ab5d30d630faa, q17);
4368     ASSERT_EQUAL_128(0x0000727200abab00, 0x5600563900ab0056, q18);
4369   }
4370 }
4371 
4372 
TEST(neon_3same_absdiff)4373 TEST(neon_3same_absdiff) {
4374   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4375 
4376   START();
4377 
4378   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4379   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4380   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4381   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4382 
4383   __ Saba(v16.V16B(), v0.V16B(), v1.V16B());
4384   __ Uaba(v17.V16B(), v0.V16B(), v1.V16B());
4385   __ Sabd(v18.V16B(), v0.V16B(), v1.V16B());
4386   __ Uabd(v19.V16B(), v0.V16B(), v1.V16B());
4387 
4388   END();
4389 
4390   if (CAN_RUN()) {
4391     RUN();
4392     ASSERT_EQUAL_128(0x0202aeaf065c5d5e, 0x5e5f600c62646455, q16);
4393     ASSERT_EQUAL_128(0x0002585904b0b1b2, 0x5e5f600c62b86455, q17);
4394     ASSERT_EQUAL_128(0x0100abab01565656, 0x5555550055565555, q18);
4395     ASSERT_EQUAL_128(0xff005555ffaaaaaa, 0x5555550055aa5555, q19);
4396   }
4397 }
4398 
4399 
TEST(neon_byelement_mul)4400 TEST(neon_byelement_mul) {
4401   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4402 
4403   START();
4404 
4405   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4406   __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4407 
4408 
4409   __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0);
4410   __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7);
4411   __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0);
4412   __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3);
4413 
4414   __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004);
4415   __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004);
4416   __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0);
4417   __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7);
4418 
4419   __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004);
4420   __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004);
4421   __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0);
4422   __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3);
4423 
4424   __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456);
4425   __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4426   __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0);
4427   __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7);
4428 
4429   __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456);
4430   __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4);
4431   __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0);
4432   __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3);
4433 
4434   END();
4435 
4436   if (CAN_RUN()) {
4437     RUN();
4438     ASSERT_EQUAL_128(0x0000000000000000, 0x0100aaabfe015456, q16);
4439     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
4440     ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaabe1c85456, q18);
4441     ASSERT_EQUAL_128(0x39545572c6aa54e4, 0x39545572c6aa54e4, q19);
4442 
4443     ASSERT_EQUAL_128(0x0000000000000000, 0x0101aaadfe04545a, q20);
4444     ASSERT_EQUAL_128(0xff05aa5b010655b2, 0xff01aa57010255ae, q21);
4445     ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaade1c8545a, q22);
4446     ASSERT_EQUAL_128(0x39545578c6aa54ec, 0x39545574c6aa54e8, q23);
4447 
4448     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4449     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4450     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
4451     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
4452   }
4453 }
4454 
4455 
TEST(neon_byelement_mull)4456 TEST(neon_byelement_mull) {
4457   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4458 
4459   START();
4460 
4461   __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4462   __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4463 
4464 
4465   __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7);
4466   __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4467   __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7);
4468   __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0);
4469 
4470   __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4471   __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4472   __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4473   __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001);
4474 
4475   __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4476   __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4477   __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7);
4478   __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0);
4479 
4480   __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa);
4481   __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01);
4482   __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa);
4483   __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01);
4484 
4485   __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4486   __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4487   __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7);
4488   __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0);
4489 
4490   END();
4491 
4492   if (CAN_RUN()) {
4493     RUN();
4494 
4495     ASSERT_EQUAL_128(0xffffff00ffffaa55, 0x000000ff000055aa, q16);
4496     ASSERT_EQUAL_128(0xffaaaaabffff55ab, 0x0054ffab0000fe01, q17);
4497     ASSERT_EQUAL_128(0x0000ff000000aa55, 0x000000ff000055aa, q18);
4498     ASSERT_EQUAL_128(0x00a9aaab00fe55ab, 0x0054ffab0000fe01, q19);
4499 
4500     ASSERT_EQUAL_128(0xffffff01ffffaa57, 0x00000101000055ab, q20);
4501     ASSERT_EQUAL_128(0xffaaaaacffff55ad, 0x0054ffad0000fe02, q21);
4502     ASSERT_EQUAL_128(0x0000ff010000aa57, 0x00000101000055ab, q22);
4503     ASSERT_EQUAL_128(0x00a9aaac00fe55ad, 0x0054ffad0000fe02, q23);
4504 
4505     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4506     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4507     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
4508     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
4509   }
4510 }
4511 
4512 
TEST(neon_byelement_sqdmull)4513 TEST(neon_byelement_sqdmull) {
4514   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4515 
4516   START();
4517 
4518   __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4519   __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4520 
4521   __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7);
4522   __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4523   __ Sqdmull(s18, h0, v1.H(), 7);
4524 
4525   __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4526   __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4527   __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4528 
4529   __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4530   __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4531   __ Sqdmlal(s22, h0, v1.H(), 7);
4532 
4533   __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54);
4534   __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02);
4535   __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54);
4536 
4537   __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4538   __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4539   __ Sqdmlsl(s26, h0, v1.H(), 7);
4540 
4541   END();
4542 
4543   if (CAN_RUN()) {
4544     RUN();
4545 
4546     ASSERT_EQUAL_128(0xfffffe00ffff54aa, 0x000001fe0000ab54, q16);
4547     ASSERT_EQUAL_128(0xff555556fffeab56, 0x00a9ff560001fc02, q17);
4548     ASSERT_EQUAL_128(0, 0x0000ab54, q18);
4549 
4550     ASSERT_EQUAL_128(0xfffffe01ffff54ac, 0x000002000000ab55, q20);
4551     ASSERT_EQUAL_128(0xff555557fffeab58, 0x00a9ff580001fc03, q21);
4552     ASSERT_EQUAL_128(0, 0x0000ab55, q22);
4553 
4554     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4555     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4556     ASSERT_EQUAL_128(0, 0x00000000, q26);
4557   }
4558 }
4559 
4560 
TEST(neon_3diff_absdiff)4561 TEST(neon_3diff_absdiff) {
4562   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4563 
4564   START();
4565 
4566   __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4567   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4568   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4569   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4570   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4571   __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4572 
4573   __ Sabal(v16.V8H(), v0.V8B(), v1.V8B());
4574   __ Uabal(v17.V8H(), v0.V8B(), v1.V8B());
4575   __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B());
4576   __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B());
4577 
4578   END();
4579 
4580   if (CAN_RUN()) {
4581     RUN();
4582     ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0b620d630f55, q16);
4583     ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0bb60d630f55, q17);
4584     ASSERT_EQUAL_128(0x0103030405b107b3, 0x090b0b620d640f55, q18);
4585     ASSERT_EQUAL_128(0x02010304055b075d, 0x0a090bb60db80fab, q19);
4586   }
4587 }
4588 
4589 
TEST(neon_3diff_sqdmull)4590 TEST(neon_3diff_sqdmull) {
4591   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4592 
4593   START();
4594 
4595   __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4596   __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4597   __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4598   __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4599 
4600   __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H());
4601   __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H());
4602   __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S());
4603   __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S());
4604   __ Sqdmull(s20, h0, h1);
4605   __ Sqdmull(d21, s2, s3);
4606 
4607   END();
4608 
4609   if (CAN_RUN()) {
4610     RUN();
4611     ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q16);
4612     ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q17);
4613     ASSERT_EQUAL_128(0x8000000100000000, 0x7fffffffffffffff, q18);
4614     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000100000000, q19);
4615     ASSERT_EQUAL_128(0, 0x7fffffff, q20);
4616     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
4617   }
4618 }
4619 
4620 
TEST(neon_3diff_sqdmlal)4621 TEST(neon_3diff_sqdmlal) {
4622   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4623 
4624   START();
4625 
4626   __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4627   __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4628   __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4629   __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4630 
4631   __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001);
4632   __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff);
4633   __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001);
4634   __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff);
4635   __ Movi(v20.V2D(), 0, 0x00000001);
4636   __ Movi(v21.V2D(), 0, 0x00000001);
4637 
4638   __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H());
4639   __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H());
4640   __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S());
4641   __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S());
4642   __ Sqdmlal(s20, h0, h1);
4643   __ Sqdmlal(d21, s2, s3);
4644 
4645   END();
4646 
4647   if (CAN_RUN()) {
4648     RUN();
4649     ASSERT_EQUAL_128(0x8000ffff7ffe0003, 0x800000007fffffff, q16);
4650     ASSERT_EQUAL_128(0x800100017ffe0001, 0x800100017ffffffe, q17);
4651     ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q18);
4652     ASSERT_EQUAL_128(0x7ffffffffffffffe, 0x00000000ffffffff, q19);
4653     ASSERT_EQUAL_128(0, 0x7fffffff, q20);
4654     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
4655   }
4656 }
4657 
4658 
TEST(neon_3diff_sqdmlsl)4659 TEST(neon_3diff_sqdmlsl) {
4660   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4661 
4662   START();
4663 
4664   __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4665   __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4666   __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4667   __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4668 
4669   __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001);
4670   __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001);
4671   __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001);
4672   __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff);
4673   __ Movi(v20.V2D(), 0, 0x00000001);
4674   __ Movi(v21.V2D(), 0, 0x00000001);
4675 
4676   __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H());
4677   __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H());
4678   __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S());
4679   __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S());
4680   __ Sqdmlsl(s20, h0, h1);
4681   __ Sqdmlsl(d21, s2, s3);
4682 
4683   END();
4684 
4685   if (CAN_RUN()) {
4686     RUN();
4687     ASSERT_EQUAL_128(0x7ffeffff8001ffff, 0x7fffffff80000000, q16);
4688     ASSERT_EQUAL_128(0x7fff00018001fffd, 0x7fffffff80000002, q17);
4689     ASSERT_EQUAL_128(0xffffffff00000001, 0x8000000000000000, q18);
4690     ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q19);
4691     ASSERT_EQUAL_128(0, 0x80000002, q20);
4692     ASSERT_EQUAL_128(0, 0x8000000000000002, q21);
4693   }
4694 }
4695 
4696 
TEST(neon_3diff_mla)4697 TEST(neon_3diff_mla) {
4698   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4699 
4700   START();
4701 
4702   __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4703   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4704   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4705   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4706   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4707   __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4708 
4709   __ Smlal(v16.V8H(), v0.V8B(), v1.V8B());
4710   __ Umlal(v17.V8H(), v0.V8B(), v1.V8B());
4711   __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B());
4712   __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B());
4713 
4714   END();
4715 
4716   if (CAN_RUN()) {
4717     RUN();
4718     ASSERT_EQUAL_128(0x01580304055c2341, 0x090a0ab70d0e0f56, q16);
4719     ASSERT_EQUAL_128(0xaa580304ae5c2341, 0x090a5fb70d0eb856, q17);
4720     ASSERT_EQUAL_128(0x01020304e878ea7a, 0x090a0ab70cb90f00, q18);
4721     ASSERT_EQUAL_128(0x010203043d783f7a, 0x090a5fb761b90f00, q19);
4722   }
4723 }
4724 
4725 
TEST(neon_3diff_mls)4726 TEST(neon_3diff_mls) {
4727   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4728 
4729   START();
4730 
4731   __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4732   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4733   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4734   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4735   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4736   __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4737 
4738   __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B());
4739   __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B());
4740   __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B());
4741   __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B());
4742 
4743   END();
4744 
4745   if (CAN_RUN()) {
4746     RUN();
4747     ASSERT_EQUAL_128(0x00ac030404b0eacf, 0x090a0b610d0e0eaa, q16);
4748     ASSERT_EQUAL_128(0x57ac03045bb0eacf, 0x090ab6610d0e65aa, q17);
4749     ASSERT_EQUAL_128(0x0102030421942396, 0x090a0b610d630f00, q18);
4750     ASSERT_EQUAL_128(0x01020304cc94ce96, 0x090ab661b8630f00, q19);
4751   }
4752 }
4753 
4754 
TEST(neon_3same_compare)4755 TEST(neon_3same_compare) {
4756   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4757 
4758   START();
4759 
4760   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4761   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4762 
4763   __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B());
4764   __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B());
4765   __ Cmge(v18.V16B(), v0.V16B(), v0.V16B());
4766   __ Cmge(v19.V16B(), v0.V16B(), v1.V16B());
4767   __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B());
4768   __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B());
4769   __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B());
4770   __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B());
4771   __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B());
4772   __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B());
4773 
4774   END();
4775 
4776   if (CAN_RUN()) {
4777     RUN();
4778     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
4779     ASSERT_EQUAL_128(0x00ff000000000000, 0x000000ff00000000, q17);
4780     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
4781     ASSERT_EQUAL_128(0x00ff00ffff00ff00, 0xff0000ff0000ff00, q19);
4782     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
4783     ASSERT_EQUAL_128(0x000000ffff00ff00, 0xff0000000000ff00, q21);
4784     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
4785     ASSERT_EQUAL_128(0xff00ff0000ff00ff, 0xff00000000ffff00, q23);
4786     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q24);
4787     ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xff0000ff00ffff00, q25);
4788   }
4789 }
4790 
4791 
TEST(neon_3same_scalar_compare)4792 TEST(neon_3same_scalar_compare) {
4793   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4794 
4795   START();
4796 
4797   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4798   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4799 
4800   __ Cmeq(d16, d0, d0);
4801   __ Cmeq(d17, d0, d1);
4802   __ Cmeq(d18, d1, d0);
4803   __ Cmge(d19, d0, d0);
4804   __ Cmge(d20, d0, d1);
4805   __ Cmge(d21, d1, d0);
4806   __ Cmgt(d22, d0, d0);
4807   __ Cmgt(d23, d0, d1);
4808   __ Cmhi(d24, d0, d0);
4809   __ Cmhi(d25, d0, d1);
4810   __ Cmhs(d26, d0, d0);
4811   __ Cmhs(d27, d0, d1);
4812   __ Cmhs(d28, d1, d0);
4813 
4814   END();
4815 
4816   if (CAN_RUN()) {
4817     RUN();
4818 
4819     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q16);
4820     ASSERT_EQUAL_128(0, 0x0000000000000000, q17);
4821     ASSERT_EQUAL_128(0, 0x0000000000000000, q18);
4822     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
4823     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
4824     ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
4825     ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
4826     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q23);
4827     ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
4828     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
4829     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
4830     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q27);
4831     ASSERT_EQUAL_128(0, 0x0000000000000000, q28);
4832   }
4833 }
4834 
TEST(neon_fcmeq_h)4835 TEST(neon_fcmeq_h) {
4836   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4837                       CPUFeatures::kFP,
4838                       CPUFeatures::kNEONHalf);
4839 
4840   START();
4841 
4842   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
4843   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
4844   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
4845   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
4846 
4847   __ Fcmeq(v4.V8H(), v0.V8H(), v0.V8H());
4848   __ Fcmeq(v5.V8H(), v1.V8H(), v0.V8H());
4849   __ Fcmeq(v6.V8H(), v2.V8H(), v0.V8H());
4850   __ Fcmeq(v7.V8H(), v3.V8H(), v0.V8H());
4851   __ Fcmeq(v8.V4H(), v0.V4H(), v0.V4H());
4852   __ Fcmeq(v9.V4H(), v1.V4H(), v0.V4H());
4853   __ Fcmeq(v10.V4H(), v2.V4H(), v0.V4H());
4854   __ Fcmeq(v11.V4H(), v3.V4H(), v0.V4H());
4855 
4856   END();
4857 
4858   if (CAN_RUN()) {
4859     RUN();
4860 
4861     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
4862     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4863     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4864     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
4865     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
4866     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4867     ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
4868     ASSERT_EQUAL_128(0, 0x0000000000000000, v11);
4869   }
4870 }
4871 
TEST(neon_fcmeq_h_scalar)4872 TEST(neon_fcmeq_h_scalar) {
4873   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4874                       CPUFeatures::kFP,
4875                       CPUFeatures::kNEONHalf,
4876                       CPUFeatures::kFPHalf);
4877 
4878   START();
4879 
4880   __ Fmov(h0, Float16(0.0));
4881   __ Fmov(h1, RawbitsToFloat16(0xffff));
4882   __ Fmov(h2, Float16(-1.0));
4883   __ Fmov(h3, Float16(1.0));
4884   __ Fcmeq(h4, h0, h0);
4885   __ Fcmeq(h5, h1, h0);
4886   __ Fcmeq(h6, h2, h0);
4887   __ Fcmeq(h7, h3, h0);
4888 
4889   END();
4890 
4891   if (CAN_RUN()) {
4892     RUN();
4893 
4894     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
4895     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
4896     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
4897     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h7);
4898   }
4899 }
4900 
TEST(neon_fcmge_h)4901 TEST(neon_fcmge_h) {
4902   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4903                       CPUFeatures::kFP,
4904                       CPUFeatures::kNEONHalf);
4905 
4906   START();
4907 
4908   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
4909   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
4910   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
4911   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
4912 
4913   __ Fcmge(v4.V8H(), v0.V8H(), v0.V8H());
4914   __ Fcmge(v5.V8H(), v1.V8H(), v0.V8H());
4915   __ Fcmge(v6.V8H(), v2.V8H(), v0.V8H());
4916   __ Fcmge(v7.V8H(), v3.V8H(), v0.V8H());
4917   __ Fcmge(v8.V4H(), v0.V4H(), v0.V4H());
4918   __ Fcmge(v9.V4H(), v1.V4H(), v0.V4H());
4919   __ Fcmge(v10.V4H(), v2.V4H(), v0.V4H());
4920   __ Fcmge(v11.V4H(), v3.V4H(), v0.V4H());
4921 
4922   END();
4923 
4924   if (CAN_RUN()) {
4925     RUN();
4926 
4927     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
4928     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4929     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4930     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
4931     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
4932     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4933     ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
4934     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
4935   }
4936 }
4937 
TEST(neon_fcmge_h_scalar)4938 TEST(neon_fcmge_h_scalar) {
4939   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4940                       CPUFeatures::kFP,
4941                       CPUFeatures::kNEONHalf,
4942                       CPUFeatures::kFPHalf);
4943 
4944   START();
4945 
4946   __ Fmov(h0, Float16(0.0));
4947   __ Fmov(h1, RawbitsToFloat16(0xffff));
4948   __ Fmov(h2, Float16(-1.0));
4949   __ Fmov(h3, Float16(1.0));
4950   __ Fcmge(h4, h0, h0);
4951   __ Fcmge(h5, h1, h0);
4952   __ Fcmge(h6, h2, h0);
4953   __ Fcmge(h7, h3, h0);
4954 
4955   END();
4956 
4957   if (CAN_RUN()) {
4958     RUN();
4959 
4960     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
4961     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
4962     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
4963     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
4964   }
4965 }
4966 
TEST(neon_fcmgt_h)4967 TEST(neon_fcmgt_h) {
4968   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4969                       CPUFeatures::kFP,
4970                       CPUFeatures::kNEONHalf);
4971 
4972   START();
4973 
4974   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
4975   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
4976   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
4977   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
4978 
4979   __ Fcmgt(v4.V8H(), v0.V8H(), v0.V8H());
4980   __ Fcmgt(v5.V8H(), v1.V8H(), v0.V8H());
4981   __ Fcmgt(v6.V8H(), v2.V8H(), v0.V8H());
4982   __ Fcmgt(v7.V8H(), v3.V8H(), v0.V8H());
4983   __ Fcmgt(v8.V4H(), v0.V4H(), v0.V4H());
4984   __ Fcmgt(v9.V4H(), v1.V4H(), v0.V4H());
4985   __ Fcmgt(v10.V4H(), v2.V4H(), v0.V4H());
4986   __ Fcmgt(v11.V4H(), v3.V4H(), v0.V4H());
4987 
4988   END();
4989 
4990   if (CAN_RUN()) {
4991     RUN();
4992 
4993     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
4994     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4995     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4996     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
4997     ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
4998     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4999     ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
5000     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5001   }
5002 }
5003 
TEST(neon_fcmgt_h_scalar)5004 TEST(neon_fcmgt_h_scalar) {
5005   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5006                       CPUFeatures::kFP,
5007                       CPUFeatures::kNEONHalf,
5008                       CPUFeatures::kFPHalf);
5009 
5010   START();
5011 
5012   __ Fmov(h0, Float16(0.0));
5013   __ Fmov(h1, RawbitsToFloat16(0xffff));
5014   __ Fmov(h2, Float16(-1.0));
5015   __ Fmov(h3, Float16(1.0));
5016   __ Fcmgt(h4, h0, h0);
5017   __ Fcmgt(h5, h1, h0);
5018   __ Fcmgt(h6, h2, h0);
5019   __ Fcmgt(h7, h3, h0);
5020 
5021   END();
5022 
5023   if (CAN_RUN()) {
5024     RUN();
5025 
5026     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
5027     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5028     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
5029     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5030   }
5031 }
5032 
TEST(neon_facge_h)5033 TEST(neon_facge_h) {
5034   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5035                       CPUFeatures::kFP,
5036                       CPUFeatures::kNEONHalf);
5037 
5038   START();
5039 
5040   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
5041   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
5042   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
5043   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
5044 
5045   __ Facge(v4.V8H(), v0.V8H(), v0.V8H());
5046   __ Facge(v5.V8H(), v1.V8H(), v0.V8H());
5047   __ Facge(v6.V8H(), v2.V8H(), v0.V8H());
5048   __ Facge(v7.V8H(), v3.V8H(), v0.V8H());
5049   __ Facge(v8.V4H(), v0.V4H(), v0.V4H());
5050   __ Facge(v9.V4H(), v1.V4H(), v0.V4H());
5051   __ Facge(v10.V4H(), v2.V4H(), v0.V4H());
5052   __ Facge(v11.V4H(), v3.V4H(), v0.V4H());
5053 
5054   END();
5055 
5056   if (CAN_RUN()) {
5057     RUN();
5058 
5059     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
5060     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5061     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
5062     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5063     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
5064     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5065     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
5066     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5067   }
5068 }
5069 
TEST(neon_facge_h_scalar)5070 TEST(neon_facge_h_scalar) {
5071   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5072                       CPUFeatures::kFP,
5073                       CPUFeatures::kNEONHalf,
5074                       CPUFeatures::kFPHalf);
5075 
5076   START();
5077 
5078   __ Fmov(h0, Float16(0.0));
5079   __ Fmov(h1, RawbitsToFloat16(0xffff));
5080   __ Fmov(h2, Float16(-1.0));
5081   __ Fmov(h3, Float16(1.0));
5082   __ Facge(h4, h0, h0);
5083   __ Facge(h5, h1, h0);
5084   __ Facge(h6, h2, h0);
5085   __ Facge(h7, h3, h0);
5086 
5087   END();
5088 
5089   if (CAN_RUN()) {
5090     RUN();
5091 
5092     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
5093     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5094     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
5095     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5096   }
5097 }
5098 
TEST(neon_facgt_h)5099 TEST(neon_facgt_h) {
5100   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5101                       CPUFeatures::kFP,
5102                       CPUFeatures::kNEONHalf);
5103 
5104   START();
5105 
5106   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
5107   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
5108   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
5109   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
5110 
5111   __ Facgt(v4.V8H(), v0.V8H(), v0.V8H());
5112   __ Facgt(v5.V8H(), v1.V8H(), v0.V8H());
5113   __ Facgt(v6.V8H(), v2.V8H(), v0.V8H());
5114   __ Facgt(v7.V8H(), v3.V8H(), v0.V8H());
5115   __ Facgt(v8.V4H(), v0.V4H(), v0.V4H());
5116   __ Facgt(v9.V4H(), v1.V4H(), v0.V4H());
5117   __ Facgt(v10.V4H(), v2.V4H(), v0.V4H());
5118   __ Facgt(v11.V4H(), v3.V4H(), v0.V4H());
5119 
5120   END();
5121 
5122   if (CAN_RUN()) {
5123     RUN();
5124 
5125     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
5126     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5127     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
5128     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5129     ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
5130     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5131     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
5132     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5133   }
5134 }
5135 
TEST(neon_facgt_h_scalar)5136 TEST(neon_facgt_h_scalar) {
5137   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5138                       CPUFeatures::kFP,
5139                       CPUFeatures::kNEONHalf,
5140                       CPUFeatures::kFPHalf);
5141 
5142   START();
5143 
5144   __ Fmov(h0, Float16(0.0));
5145   __ Fmov(h1, RawbitsToFloat16(0xffff));
5146   __ Fmov(h2, Float16(-1.0));
5147   __ Fmov(h3, Float16(1.0));
5148   __ Facgt(h4, h0, h0);
5149   __ Facgt(h5, h1, h0);
5150   __ Facgt(h6, h2, h0);
5151   __ Facgt(h7, h3, h0);
5152 
5153   END();
5154 
5155   if (CAN_RUN()) {
5156     RUN();
5157 
5158     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
5159     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5160     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
5161     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5162   }
5163 }
5164 
TEST(neon_2regmisc_fcmeq)5165 TEST(neon_2regmisc_fcmeq) {
5166   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5167 
5168   START();
5169 
5170   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5171   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5172   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5173   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5174 
5175   __ Fcmeq(s16, s0, 0.0);
5176   __ Fcmeq(s17, s1, 0.0);
5177   __ Fcmeq(s18, s2, 0.0);
5178   __ Fcmeq(d19, d0, 0.0);
5179   __ Fcmeq(d20, d1, 0.0);
5180   __ Fcmeq(d21, d2, 0.0);
5181   __ Fcmeq(v22.V2S(), v0.V2S(), 0.0);
5182   __ Fcmeq(v23.V4S(), v1.V4S(), 0.0);
5183   __ Fcmeq(v24.V2D(), v1.V2D(), 0.0);
5184   __ Fcmeq(v25.V2D(), v2.V2D(), 0.0);
5185 
5186   END();
5187 
5188   if (CAN_RUN()) {
5189     RUN();
5190     ASSERT_EQUAL_128(0, 0xffffffff, q16);
5191     ASSERT_EQUAL_128(0, 0x00000000, q17);
5192     ASSERT_EQUAL_128(0, 0x00000000, q18);
5193     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5194     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5195     ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
5196     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5197     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5198     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5199     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
5200   }
5201 }
5202 
TEST(neon_2regmisc_fcmge)5203 TEST(neon_2regmisc_fcmge) {
5204   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5205 
5206   START();
5207 
5208   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5209   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5210   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5211   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5212 
5213   __ Fcmge(s16, s0, 0.0);
5214   __ Fcmge(s17, s1, 0.0);
5215   __ Fcmge(s18, s2, 0.0);
5216   __ Fcmge(d19, d0, 0.0);
5217   __ Fcmge(d20, d1, 0.0);
5218   __ Fcmge(d21, d3, 0.0);
5219   __ Fcmge(v22.V2S(), v0.V2S(), 0.0);
5220   __ Fcmge(v23.V4S(), v1.V4S(), 0.0);
5221   __ Fcmge(v24.V2D(), v1.V2D(), 0.0);
5222   __ Fcmge(v25.V2D(), v3.V2D(), 0.0);
5223 
5224   END();
5225 
5226   if (CAN_RUN()) {
5227     RUN();
5228     ASSERT_EQUAL_128(0, 0xffffffff, q16);
5229     ASSERT_EQUAL_128(0, 0x00000000, q17);
5230     ASSERT_EQUAL_128(0, 0x00000000, q18);
5231     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5232     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5233     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5234     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5235     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5236     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5237     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5238   }
5239 }
5240 
5241 
TEST(neon_2regmisc_fcmgt)5242 TEST(neon_2regmisc_fcmgt) {
5243   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5244 
5245   START();
5246 
5247   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5248   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5249   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5250   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5251 
5252   __ Fcmgt(s16, s0, 0.0);
5253   __ Fcmgt(s17, s1, 0.0);
5254   __ Fcmgt(s18, s2, 0.0);
5255   __ Fcmgt(d19, d0, 0.0);
5256   __ Fcmgt(d20, d1, 0.0);
5257   __ Fcmgt(d21, d3, 0.0);
5258   __ Fcmgt(v22.V2S(), v0.V2S(), 0.0);
5259   __ Fcmgt(v23.V4S(), v1.V4S(), 0.0);
5260   __ Fcmgt(v24.V2D(), v1.V2D(), 0.0);
5261   __ Fcmgt(v25.V2D(), v3.V2D(), 0.0);
5262 
5263   END();
5264 
5265   if (CAN_RUN()) {
5266     RUN();
5267     ASSERT_EQUAL_128(0, 0x00000000, q16);
5268     ASSERT_EQUAL_128(0, 0x00000000, q17);
5269     ASSERT_EQUAL_128(0, 0x00000000, q18);
5270     ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
5271     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5272     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5273     ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
5274     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5275     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5276     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5277   }
5278 }
5279 
TEST(neon_2regmisc_fcmle)5280 TEST(neon_2regmisc_fcmle) {
5281   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5282 
5283   START();
5284 
5285   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5286   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5287   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5288   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5289 
5290   __ Fcmle(s16, s0, 0.0);
5291   __ Fcmle(s17, s1, 0.0);
5292   __ Fcmle(s18, s3, 0.0);
5293   __ Fcmle(d19, d0, 0.0);
5294   __ Fcmle(d20, d1, 0.0);
5295   __ Fcmle(d21, d2, 0.0);
5296   __ Fcmle(v22.V2S(), v0.V2S(), 0.0);
5297   __ Fcmle(v23.V4S(), v1.V4S(), 0.0);
5298   __ Fcmle(v24.V2D(), v1.V2D(), 0.0);
5299   __ Fcmle(v25.V2D(), v2.V2D(), 0.0);
5300 
5301   END();
5302 
5303   if (CAN_RUN()) {
5304     RUN();
5305     ASSERT_EQUAL_128(0, 0xffffffff, q16);
5306     ASSERT_EQUAL_128(0, 0x00000000, q17);
5307     ASSERT_EQUAL_128(0, 0x00000000, q18);
5308     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5309     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5310     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5311     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5312     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5313     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5314     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5315   }
5316 }
5317 
5318 
TEST(neon_2regmisc_fcmlt)5319 TEST(neon_2regmisc_fcmlt) {
5320   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5321 
5322   START();
5323 
5324   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5325   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5326   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5327   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5328 
5329   __ Fcmlt(s16, s0, 0.0);
5330   __ Fcmlt(s17, s1, 0.0);
5331   __ Fcmlt(s18, s3, 0.0);
5332   __ Fcmlt(d19, d0, 0.0);
5333   __ Fcmlt(d20, d1, 0.0);
5334   __ Fcmlt(d21, d2, 0.0);
5335   __ Fcmlt(v22.V2S(), v0.V2S(), 0.0);
5336   __ Fcmlt(v23.V4S(), v1.V4S(), 0.0);
5337   __ Fcmlt(v24.V2D(), v1.V2D(), 0.0);
5338   __ Fcmlt(v25.V2D(), v2.V2D(), 0.0);
5339 
5340   END();
5341 
5342   if (CAN_RUN()) {
5343     RUN();
5344     ASSERT_EQUAL_128(0, 0x00000000, q16);
5345     ASSERT_EQUAL_128(0, 0x00000000, q17);
5346     ASSERT_EQUAL_128(0, 0x00000000, q18);
5347     ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
5348     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5349     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5350     ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
5351     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5352     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5353     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5354   }
5355 }
5356 
TEST(neon_2regmisc_cmeq)5357 TEST(neon_2regmisc_cmeq) {
5358   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5359 
5360   START();
5361 
5362   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5363   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5364 
5365   __ Cmeq(v16.V8B(), v1.V8B(), 0);
5366   __ Cmeq(v17.V16B(), v1.V16B(), 0);
5367   __ Cmeq(v18.V4H(), v1.V4H(), 0);
5368   __ Cmeq(v19.V8H(), v1.V8H(), 0);
5369   __ Cmeq(v20.V2S(), v0.V2S(), 0);
5370   __ Cmeq(v21.V4S(), v0.V4S(), 0);
5371   __ Cmeq(d22, d0, 0);
5372   __ Cmeq(d23, d1, 0);
5373   __ Cmeq(v24.V2D(), v0.V2D(), 0);
5374 
5375   END();
5376 
5377   if (CAN_RUN()) {
5378     RUN();
5379     ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000000ff00, q16);
5380     ASSERT_EQUAL_128(0xffff0000000000ff, 0xffff00000000ff00, q17);
5381     ASSERT_EQUAL_128(0x0000000000000000, 0xffff000000000000, q18);
5382     ASSERT_EQUAL_128(0xffff000000000000, 0xffff000000000000, q19);
5383     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
5384     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q21);
5385     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5386     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5387     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5388   }
5389 }
5390 
5391 
TEST(neon_2regmisc_cmge)5392 TEST(neon_2regmisc_cmge) {
5393   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5394 
5395   START();
5396 
5397   __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000);
5398   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5399 
5400   __ Cmge(v16.V8B(), v1.V8B(), 0);
5401   __ Cmge(v17.V16B(), v1.V16B(), 0);
5402   __ Cmge(v18.V4H(), v1.V4H(), 0);
5403   __ Cmge(v19.V8H(), v1.V8H(), 0);
5404   __ Cmge(v20.V2S(), v0.V2S(), 0);
5405   __ Cmge(v21.V4S(), v0.V4S(), 0);
5406   __ Cmge(d22, d0, 0);
5407   __ Cmge(d23, d1, 0);
5408   __ Cmge(v24.V2D(), v0.V2D(), 0);
5409 
5410   END();
5411 
5412   if (CAN_RUN()) {
5413     RUN();
5414     ASSERT_EQUAL_128(0x0000000000000000, 0xffff00ffffffff00, q16);
5415     ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xffff00ffffffff00, q17);
5416     ASSERT_EQUAL_128(0x0000000000000000, 0xffff0000ffffffff, q18);
5417     ASSERT_EQUAL_128(0xffffffff00000000, 0xffff0000ffffffff, q19);
5418     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
5419     ASSERT_EQUAL_128(0x00000000ffffffff, 0xffffffffffffffff, q21);
5420     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5421     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
5422     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5423   }
5424 }
5425 
5426 
TEST(neon_2regmisc_cmlt)5427 TEST(neon_2regmisc_cmlt) {
5428   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5429 
5430   START();
5431 
5432   __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000);
5433   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5434 
5435   __ Cmlt(v16.V8B(), v1.V8B(), 0);
5436   __ Cmlt(v17.V16B(), v1.V16B(), 0);
5437   __ Cmlt(v18.V4H(), v1.V4H(), 0);
5438   __ Cmlt(v19.V8H(), v1.V8H(), 0);
5439   __ Cmlt(v20.V2S(), v1.V2S(), 0);
5440   __ Cmlt(v21.V4S(), v1.V4S(), 0);
5441   __ Cmlt(d22, d0, 0);
5442   __ Cmlt(d23, d1, 0);
5443   __ Cmlt(v24.V2D(), v0.V2D(), 0);
5444 
5445   END();
5446 
5447   if (CAN_RUN()) {
5448     RUN();
5449     ASSERT_EQUAL_128(0x0000000000000000, 0x0000ff00000000ff, q16);
5450     ASSERT_EQUAL_128(0x000000ffff00ff00, 0x0000ff00000000ff, q17);
5451     ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff00000000, q18);
5452     ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000ffff00000000, q19);
5453     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5454     ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
5455     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5456     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5457     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5458   }
5459 }
5460 
5461 
TEST(neon_2regmisc_cmle)5462 TEST(neon_2regmisc_cmle) {
5463   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5464 
5465   START();
5466 
5467   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5468   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5469 
5470   __ Cmle(v16.V8B(), v1.V8B(), 0);
5471   __ Cmle(v17.V16B(), v1.V16B(), 0);
5472   __ Cmle(v18.V4H(), v1.V4H(), 0);
5473   __ Cmle(v19.V8H(), v1.V8H(), 0);
5474   __ Cmle(v20.V2S(), v1.V2S(), 0);
5475   __ Cmle(v21.V4S(), v1.V4S(), 0);
5476   __ Cmle(d22, d0, 0);
5477   __ Cmle(d23, d1, 0);
5478   __ Cmle(v24.V2D(), v0.V2D(), 0);
5479 
5480   END();
5481 
5482   if (CAN_RUN()) {
5483     RUN();
5484     ASSERT_EQUAL_128(0x0000000000000000, 0xffffff000000ffff, q16);
5485     ASSERT_EQUAL_128(0xffff00ffff00ffff, 0xffffff000000ffff, q17);
5486     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff00000000, q18);
5487     ASSERT_EQUAL_128(0xffff0000ffffffff, 0xffffffff00000000, q19);
5488     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5489     ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
5490     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5491     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5492     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5493   }
5494 }
5495 
5496 
TEST(neon_2regmisc_cmgt)5497 TEST(neon_2regmisc_cmgt) {
5498   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5499 
5500   START();
5501 
5502   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5503   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5504 
5505   __ Cmgt(v16.V8B(), v1.V8B(), 0);
5506   __ Cmgt(v17.V16B(), v1.V16B(), 0);
5507   __ Cmgt(v18.V4H(), v1.V4H(), 0);
5508   __ Cmgt(v19.V8H(), v1.V8H(), 0);
5509   __ Cmgt(v20.V2S(), v0.V2S(), 0);
5510   __ Cmgt(v21.V4S(), v0.V4S(), 0);
5511   __ Cmgt(d22, d0, 0);
5512   __ Cmgt(d23, d1, 0);
5513   __ Cmgt(v24.V2D(), v0.V2D(), 0);
5514 
5515   END();
5516 
5517   if (CAN_RUN()) {
5518     RUN();
5519     ASSERT_EQUAL_128(0x0000000000000000, 0x000000ffffff0000, q16);
5520     ASSERT_EQUAL_128(0x0000ff0000ff0000, 0x000000ffffff0000, q17);
5521     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
5522     ASSERT_EQUAL_128(0x0000ffff00000000, 0x00000000ffffffff, q19);
5523     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5524     ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q21);
5525     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
5526     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
5527     ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q24);
5528   }
5529 }
5530 
5531 
TEST(neon_2regmisc_neg)5532 TEST(neon_2regmisc_neg) {
5533   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5534 
5535   START();
5536 
5537   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5538   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5539   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5540   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5541   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5542 
5543   __ Neg(v16.V8B(), v0.V8B());
5544   __ Neg(v17.V16B(), v0.V16B());
5545   __ Neg(v18.V4H(), v1.V4H());
5546   __ Neg(v19.V8H(), v1.V8H());
5547   __ Neg(v20.V2S(), v2.V2S());
5548   __ Neg(v21.V4S(), v2.V4S());
5549   __ Neg(d22, d3);
5550   __ Neg(v23.V2D(), v3.V2D());
5551   __ Neg(v24.V2D(), v4.V2D());
5552 
5553   END();
5554 
5555   if (CAN_RUN()) {
5556     RUN();
5557     ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100ff81807f, q16);
5558     ASSERT_EQUAL_128(0x81ff00017f8081ff, 0x807f0100ff81807f, q17);
5559     ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
5560     ASSERT_EQUAL_128(0x80007fff00010000, 0x00010000ffff8001, q19);
5561     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
5562     ASSERT_EQUAL_128(0x8000000000000001, 0x0000000080000001, q21);
5563     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000001, q22);
5564     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q23);
5565     ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
5566   }
5567 }
5568 
5569 
TEST(neon_2regmisc_sqneg)5570 TEST(neon_2regmisc_sqneg) {
5571   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5572 
5573   START();
5574 
5575   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5576   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5577   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5578   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5579   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5580 
5581   __ Sqneg(v16.V8B(), v0.V8B());
5582   __ Sqneg(v17.V16B(), v0.V16B());
5583   __ Sqneg(v18.V4H(), v1.V4H());
5584   __ Sqneg(v19.V8H(), v1.V8H());
5585   __ Sqneg(v20.V2S(), v2.V2S());
5586   __ Sqneg(v21.V4S(), v2.V4S());
5587   __ Sqneg(v22.V2D(), v3.V2D());
5588   __ Sqneg(v23.V2D(), v4.V2D());
5589 
5590   __ Sqneg(b24, b0);
5591   __ Sqneg(h25, h1);
5592   __ Sqneg(s26, s2);
5593   __ Sqneg(d27, d3);
5594 
5595   END();
5596 
5597   if (CAN_RUN()) {
5598     RUN();
5599     ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100ff817f7f, q16);
5600     ASSERT_EQUAL_128(0x81ff00017f7f81ff, 0x7f7f0100ff817f7f, q17);
5601     ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
5602     ASSERT_EQUAL_128(0x7fff7fff00010000, 0x00010000ffff8001, q19);
5603     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
5604     ASSERT_EQUAL_128(0x7fffffff00000001, 0x0000000080000001, q21);
5605     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q22);
5606     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
5607 
5608     ASSERT_EQUAL_128(0, 0x7f, q24);
5609     ASSERT_EQUAL_128(0, 0x8001, q25);
5610     ASSERT_EQUAL_128(0, 0x80000001, q26);
5611     ASSERT_EQUAL_128(0, 0x8000000000000001, q27);
5612   }
5613 }
5614 
5615 
TEST(neon_2regmisc_abs)5616 TEST(neon_2regmisc_abs) {
5617   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5618 
5619   START();
5620 
5621   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5622   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5623   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5624   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5625   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5626 
5627   __ Abs(v16.V8B(), v0.V8B());
5628   __ Abs(v17.V16B(), v0.V16B());
5629   __ Abs(v18.V4H(), v1.V4H());
5630   __ Abs(v19.V8H(), v1.V8H());
5631   __ Abs(v20.V2S(), v2.V2S());
5632   __ Abs(v21.V4S(), v2.V4S());
5633   __ Abs(d22, d3);
5634   __ Abs(v23.V2D(), v3.V2D());
5635   __ Abs(v24.V2D(), v4.V2D());
5636 
5637   END();
5638 
5639   if (CAN_RUN()) {
5640     RUN();
5641     ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100017f807f, q16);
5642     ASSERT_EQUAL_128(0x7f0100017f807f01, 0x807f0100017f807f, q17);
5643     ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
5644     ASSERT_EQUAL_128(0x80007fff00010000, 0x0001000000017fff, q19);
5645     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
5646     ASSERT_EQUAL_128(0x8000000000000001, 0x000000007fffffff, q21);
5647     ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffffffffffff, q22);
5648     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q23);
5649     ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
5650   }
5651 }
5652 
5653 
TEST(neon_2regmisc_sqabs)5654 TEST(neon_2regmisc_sqabs) {
5655   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5656 
5657   START();
5658 
5659   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5660   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5661   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5662   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5663   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5664 
5665   __ Sqabs(v16.V8B(), v0.V8B());
5666   __ Sqabs(v17.V16B(), v0.V16B());
5667   __ Sqabs(v18.V4H(), v1.V4H());
5668   __ Sqabs(v19.V8H(), v1.V8H());
5669   __ Sqabs(v20.V2S(), v2.V2S());
5670   __ Sqabs(v21.V4S(), v2.V4S());
5671   __ Sqabs(v22.V2D(), v3.V2D());
5672   __ Sqabs(v23.V2D(), v4.V2D());
5673 
5674   __ Sqabs(b24, b0);
5675   __ Sqabs(h25, h1);
5676   __ Sqabs(s26, s2);
5677   __ Sqabs(d27, d3);
5678 
5679   END();
5680 
5681   if (CAN_RUN()) {
5682     RUN();
5683     ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100017f7f7f, q16);
5684     ASSERT_EQUAL_128(0x7f0100017f7f7f01, 0x7f7f0100017f7f7f, q17);
5685     ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
5686     ASSERT_EQUAL_128(0x7fff7fff00010000, 0x0001000000017fff, q19);
5687     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
5688     ASSERT_EQUAL_128(0x7fffffff00000001, 0x000000007fffffff, q21);
5689     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q22);
5690     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
5691 
5692     ASSERT_EQUAL_128(0, 0x7f, q24);
5693     ASSERT_EQUAL_128(0, 0x7fff, q25);
5694     ASSERT_EQUAL_128(0, 0x7fffffff, q26);
5695     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
5696   }
5697 }
5698 
TEST(neon_2regmisc_suqadd)5699 TEST(neon_2regmisc_suqadd) {
5700   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5701 
5702   START();
5703 
5704   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5705   __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff);
5706 
5707   __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd);
5708   __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001);
5709 
5710   __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe);
5711   __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002);
5712 
5713   __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5714   __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002);
5715 
5716   __ Mov(v16.V2D(), v0.V2D());
5717   __ Mov(v17.V2D(), v0.V2D());
5718   __ Mov(v18.V2D(), v2.V2D());
5719   __ Mov(v19.V2D(), v2.V2D());
5720   __ Mov(v20.V2D(), v4.V2D());
5721   __ Mov(v21.V2D(), v4.V2D());
5722   __ Mov(v22.V2D(), v6.V2D());
5723 
5724   __ Mov(v23.V2D(), v0.V2D());
5725   __ Mov(v24.V2D(), v2.V2D());
5726   __ Mov(v25.V2D(), v4.V2D());
5727   __ Mov(v26.V2D(), v6.V2D());
5728 
5729   __ Suqadd(v16.V8B(), v1.V8B());
5730   __ Suqadd(v17.V16B(), v1.V16B());
5731   __ Suqadd(v18.V4H(), v3.V4H());
5732   __ Suqadd(v19.V8H(), v3.V8H());
5733   __ Suqadd(v20.V2S(), v5.V2S());
5734   __ Suqadd(v21.V4S(), v5.V4S());
5735   __ Suqadd(v22.V2D(), v7.V2D());
5736 
5737   __ Suqadd(b23, b1);
5738   __ Suqadd(h24, h3);
5739   __ Suqadd(s25, s5);
5740   __ Suqadd(d26, d7);
5741 
5742   END();
5743 
5744   if (CAN_RUN()) {
5745     RUN();
5746     ASSERT_EQUAL_128(0x0000000000000000, 0x81817f7f7f7f007f, q16);
5747     ASSERT_EQUAL_128(0x7f7f7f7f7f807f7f, 0x81817f7f7f7f007f, q17);
5748     ASSERT_EQUAL_128(0x0000000000000000, 0x00007fff7fff7ffe, q18);
5749     ASSERT_EQUAL_128(0x7fff80017fff7fff, 0x00007fff7fff7ffe, q19);
5750     ASSERT_EQUAL_128(0x0000000000000000, 0x7ffffff07fffffff, q20);
5751     ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x7ffffff07fffffff, q21);
5752     ASSERT_EQUAL_128(0x0000000000000001, 0x7fffffffffffffff, q22);
5753 
5754     ASSERT_EQUAL_128(0, 0x7f, q23);
5755     ASSERT_EQUAL_128(0, 0x7ffe, q24);
5756     ASSERT_EQUAL_128(0, 0x7fffffff, q25);
5757     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
5758   }
5759 }
5760 
TEST(neon_2regmisc_usqadd)5761 TEST(neon_2regmisc_usqadd) {
5762   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5763 
5764   START();
5765 
5766   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe);
5767   __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002);
5768 
5769   __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd);
5770   __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001);
5771 
5772   __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe);
5773   __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002);
5774 
5775   __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff);
5776   __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000);
5777 
5778   __ Mov(v16.V2D(), v0.V2D());
5779   __ Mov(v17.V2D(), v0.V2D());
5780   __ Mov(v18.V2D(), v2.V2D());
5781   __ Mov(v19.V2D(), v2.V2D());
5782   __ Mov(v20.V2D(), v4.V2D());
5783   __ Mov(v21.V2D(), v4.V2D());
5784   __ Mov(v22.V2D(), v6.V2D());
5785 
5786   __ Mov(v23.V2D(), v0.V2D());
5787   __ Mov(v24.V2D(), v2.V2D());
5788   __ Mov(v25.V2D(), v4.V2D());
5789   __ Mov(v26.V2D(), v6.V2D());
5790 
5791   __ Usqadd(v16.V8B(), v1.V8B());
5792   __ Usqadd(v17.V16B(), v1.V16B());
5793   __ Usqadd(v18.V4H(), v3.V4H());
5794   __ Usqadd(v19.V8H(), v3.V8H());
5795   __ Usqadd(v20.V2S(), v5.V2S());
5796   __ Usqadd(v21.V4S(), v5.V4S());
5797   __ Usqadd(v22.V2D(), v7.V2D());
5798 
5799   __ Usqadd(b23, b1);
5800   __ Usqadd(h24, h3);
5801   __ Usqadd(s25, s5);
5802   __ Usqadd(d26, d7);
5803 
5804   END();
5805 
5806   if (CAN_RUN()) {
5807     RUN();
5808     ASSERT_EQUAL_128(0x0000000000000000, 0x81817f00808000ff, q16);
5809     ASSERT_EQUAL_128(0x8080008080808080, 0x81817f00808000ff, q17);
5810     ASSERT_EQUAL_128(0x0000000000000000, 0xffff7fff00007ffe, q18);
5811     ASSERT_EQUAL_128(0x7fff8001ffff0000, 0xffff7fff00007ffe, q19);
5812     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q20);
5813     ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x00000000ffffffff, q21);
5814     ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q22);
5815 
5816     ASSERT_EQUAL_128(0, 0xff, q23);
5817     ASSERT_EQUAL_128(0, 0x7ffe, q24);
5818     ASSERT_EQUAL_128(0, 0xffffffff, q25);
5819     ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
5820   }
5821 }
5822 
TEST(neon_2regmisc_xtn)5823 TEST(neon_2regmisc_xtn) {
5824   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5825 
5826   START();
5827 
5828   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5829   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5830   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5831   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5832   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5833 
5834   __ Xtn(v16.V8B(), v0.V8H());
5835   __ Xtn2(v16.V16B(), v1.V8H());
5836   __ Xtn(v17.V4H(), v1.V4S());
5837   __ Xtn2(v17.V8H(), v2.V4S());
5838   __ Xtn(v18.V2S(), v3.V2D());
5839   __ Xtn2(v18.V4S(), v4.V2D());
5840 
5841   END();
5842 
5843   if (CAN_RUN()) {
5844     RUN();
5845     ASSERT_EQUAL_128(0x0001ff00ff0001ff, 0x01ff800181007f81, q16);
5846     ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8001000000007fff, q17);
5847     ASSERT_EQUAL_128(0x0000000000000000, 0x00000001ffffffff, q18);
5848   }
5849 }
5850 
5851 
TEST(neon_2regmisc_sqxtn)5852 TEST(neon_2regmisc_sqxtn) {
5853   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5854 
5855   START();
5856 
5857   __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5858   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5859   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5860   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5861   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5862 
5863   __ Sqxtn(v16.V8B(), v0.V8H());
5864   __ Sqxtn2(v16.V16B(), v1.V8H());
5865   __ Sqxtn(v17.V4H(), v1.V4S());
5866   __ Sqxtn2(v17.V8H(), v2.V4S());
5867   __ Sqxtn(v18.V2S(), v3.V2D());
5868   __ Sqxtn2(v18.V4S(), v4.V2D());
5869   __ Sqxtn(b19, h0);
5870   __ Sqxtn(h20, s0);
5871   __ Sqxtn(s21, d0);
5872 
5873   END();
5874 
5875   if (CAN_RUN()) {
5876     RUN();
5877     ASSERT_EQUAL_128(0x8080ff00ff00017f, 0x7f7a807f80807f80, q16);
5878     ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000800080007fff, q17);
5879     ASSERT_EQUAL_128(0x8000000000000000, 0x800000007fffffff, q18);
5880     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
5881     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000007fff, q20);
5882     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
5883   }
5884 }
5885 
5886 
TEST(neon_2regmisc_uqxtn)5887 TEST(neon_2regmisc_uqxtn) {
5888   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5889 
5890   START();
5891 
5892   __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5893   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5894   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5895   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5896   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5897 
5898   __ Uqxtn(v16.V8B(), v0.V8H());
5899   __ Uqxtn2(v16.V16B(), v1.V8H());
5900   __ Uqxtn(v17.V4H(), v1.V4S());
5901   __ Uqxtn2(v17.V8H(), v2.V4S());
5902   __ Uqxtn(v18.V2S(), v3.V2D());
5903   __ Uqxtn2(v18.V4S(), v4.V2D());
5904   __ Uqxtn(b19, h0);
5905   __ Uqxtn(h20, s0);
5906   __ Uqxtn(s21, d0);
5907 
5908   END();
5909 
5910   if (CAN_RUN()) {
5911     RUN();
5912     ASSERT_EQUAL_128(0xffffff00ff0001ff, 0xff7affffffffffff, q16);
5913     ASSERT_EQUAL_128(0xffffffff0000ffff, 0xffffffffffffffff, q17);
5914     ASSERT_EQUAL_128(0xffffffff00000000, 0xffffffffffffffff, q18);
5915     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000ff, q19);
5916     ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
5917     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q21);
5918   }
5919 }
5920 
5921 
TEST(neon_2regmisc_sqxtun)5922 TEST(neon_2regmisc_sqxtun) {
5923   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5924 
5925   START();
5926 
5927   __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5928   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5929   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5930   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5931   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5932 
5933   __ Sqxtun(v16.V8B(), v0.V8H());
5934   __ Sqxtun2(v16.V16B(), v1.V8H());
5935   __ Sqxtun(v17.V4H(), v1.V4S());
5936   __ Sqxtun2(v17.V8H(), v2.V4S());
5937   __ Sqxtun(v18.V2S(), v3.V2D());
5938   __ Sqxtun2(v18.V4S(), v4.V2D());
5939   __ Sqxtun(b19, h0);
5940   __ Sqxtun(h20, s0);
5941   __ Sqxtun(s21, d0);
5942 
5943   END();
5944 
5945   if (CAN_RUN()) {
5946     RUN();
5947     ASSERT_EQUAL_128(0x00000000000001ff, 0xff7a00ff0000ff00, q16);
5948     ASSERT_EQUAL_128(0x000000000000ffff, 0x000000000000ffff, q17);
5949     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
5950     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
5951     ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
5952     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q21);
5953   }
5954 }
5955 
TEST(neon_3same_and)5956 TEST(neon_3same_and) {
5957   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5958 
5959   START();
5960 
5961   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
5962   __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
5963 
5964   __ And(v16.V16B(), v0.V16B(), v0.V16B());  // self test
5965   __ And(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
5966   __ And(v24.V8B(), v0.V8B(), v0.V8B());     // self test
5967   __ And(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
5968   END();
5969 
5970   if (CAN_RUN()) {
5971     RUN();
5972     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
5973     ASSERT_EQUAL_128(0x0000000000555500, 0xaa00aa00005500aa, q17);
5974     ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
5975     ASSERT_EQUAL_128(0, 0xaa00aa00005500aa, q25);
5976   }
5977 }
5978 
TEST(neon_3same_bic)5979 TEST(neon_3same_bic) {
5980   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5981 
5982   START();
5983 
5984   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
5985   __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
5986 
5987   __ Bic(v16.V16B(), v0.V16B(), v0.V16B());  // self test
5988   __ Bic(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
5989   __ Bic(v24.V8B(), v0.V8B(), v0.V8B());     // self test
5990   __ Bic(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
5991   END();
5992 
5993   if (CAN_RUN()) {
5994     RUN();
5995     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
5996     ASSERT_EQUAL_128(0xff00005500aa5500, 0x0000aa0000005500, q17);
5997     ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
5998     ASSERT_EQUAL_128(0, 0x0000aa0000005500, q25);
5999   }
6000 }
6001 
TEST(neon_3same_orr)6002 TEST(neon_3same_orr) {
6003   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6004 
6005   START();
6006 
6007   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6008   __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6009 
6010   __ Orr(v16.V16B(), v0.V16B(), v0.V16B());  // self test
6011   __ Orr(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
6012   __ Orr(v24.V8B(), v0.V8B(), v0.V8B());     // self test
6013   __ Orr(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
6014   END();
6015 
6016   if (CAN_RUN()) {
6017     RUN();
6018     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6019     ASSERT_EQUAL_128(0xffaaffffffffffaa, 0xff55ff5555ff55ff, q17);
6020     ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
6021     ASSERT_EQUAL_128(0, 0xff55ff5555ff55ff, q25);
6022   }
6023 }
6024 
TEST(neon_3same_mov)6025 TEST(neon_3same_mov) {
6026   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6027 
6028   START();
6029 
6030   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6031 
6032   __ Mov(v16.V16B(), v0.V16B());
6033   __ Mov(v17.V8H(), v0.V8H());
6034   __ Mov(v18.V4S(), v0.V4S());
6035   __ Mov(v19.V2D(), v0.V2D());
6036 
6037   __ Mov(v24.V8B(), v0.V8B());
6038   __ Mov(v25.V4H(), v0.V4H());
6039   __ Mov(v26.V2S(), v0.V2S());
6040   END();
6041 
6042   if (CAN_RUN()) {
6043     RUN();
6044 
6045     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6046     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
6047     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q18);
6048     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q19);
6049 
6050     ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q24);
6051     ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q25);
6052     ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q26);
6053   }
6054 }
6055 
TEST(neon_3same_orn)6056 TEST(neon_3same_orn) {
6057   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6058 
6059   START();
6060 
6061   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6062   __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6063 
6064   __ Orn(v16.V16B(), v0.V16B(), v0.V16B());  // self test
6065   __ Orn(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
6066   __ Orn(v24.V8B(), v0.V8B(), v0.V8B());     // self test
6067   __ Orn(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
6068   END();
6069 
6070   if (CAN_RUN()) {
6071     RUN();
6072     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
6073     ASSERT_EQUAL_128(0xff55aa5500ff55ff, 0xffaaaaffaaffffaa, q17);
6074     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q24);
6075     ASSERT_EQUAL_128(0, 0xffaaaaffaaffffaa, q25);
6076   }
6077 }
6078 
TEST(neon_3same_eor)6079 TEST(neon_3same_eor) {
6080   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6081 
6082   START();
6083 
6084   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6085   __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
6086 
6087   __ Eor(v16.V16B(), v0.V16B(), v0.V16B());  // self test
6088   __ Eor(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
6089   __ Eor(v24.V8B(), v0.V8B(), v0.V8B());     // self test
6090   __ Eor(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
6091   END();
6092 
6093   if (CAN_RUN()) {
6094     RUN();
6095     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
6096     ASSERT_EQUAL_128(0xffff0055aaaaff55, 0x00ffaa0000005555, q17);
6097     ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
6098     ASSERT_EQUAL_128(0, 0x00ffaa0000005555, q25);
6099   }
6100 }
6101 
TEST(neon_3same_bif)6102 TEST(neon_3same_bif) {
6103   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6104 
6105   START();
6106 
6107   __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6108   __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6109   __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6110 
6111   __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6112   __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6113   __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6114 
6115   __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6116   __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6117   __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6118 
6119   __ Bif(v16.V16B(), v0.V16B(), v1.V16B());
6120   __ Bif(v17.V16B(), v2.V16B(), v3.V16B());
6121   __ Bif(v18.V8B(), v4.V8B(), v5.V8B());
6122   END();
6123 
6124   if (CAN_RUN()) {
6125     RUN();
6126 
6127     ASSERT_EQUAL_128(0xffffff00ff0055ff, 0xffaa0055aa00aaaa, q16);
6128     ASSERT_EQUAL_128(0x5555ffffffcccc00, 0xff333300fff0f000, q17);
6129     ASSERT_EQUAL_128(0, 0xf0f0f0f0f00f0ff0, q18);
6130   }
6131 }
6132 
TEST(neon_3same_bit)6133 TEST(neon_3same_bit) {
6134   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6135 
6136   START();
6137 
6138   __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6139   __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6140   __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6141 
6142   __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6143   __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6144   __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6145 
6146   __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6147   __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6148   __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6149 
6150   __ Bit(v16.V16B(), v0.V16B(), v1.V16B());
6151   __ Bit(v17.V16B(), v2.V16B(), v3.V16B());
6152   __ Bit(v18.V8B(), v4.V8B(), v5.V8B());
6153   END();
6154 
6155   if (CAN_RUN()) {
6156     RUN();
6157 
6158     ASSERT_EQUAL_128(0xff000000ff00ff55, 0xaaff550000aaaaaa, q16);
6159     ASSERT_EQUAL_128(0x55550000cc00ffcc, 0x3300ff33f000fff0, q17);
6160     ASSERT_EQUAL_128(0, 0xf0f0f0f00ff0f00f, q18);
6161   }
6162 }
6163 
TEST(neon_3same_bsl)6164 TEST(neon_3same_bsl) {
6165   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6166 
6167   START();
6168 
6169   __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6170   __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6171   __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6172 
6173   __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6174   __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6175   __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6176 
6177   __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6178   __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6179   __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6180 
6181   __ Bsl(v16.V16B(), v0.V16B(), v1.V16B());
6182   __ Bsl(v17.V16B(), v2.V16B(), v3.V16B());
6183   __ Bsl(v18.V8B(), v4.V8B(), v5.V8B());
6184   END();
6185 
6186   if (CAN_RUN()) {
6187     RUN();
6188 
6189     ASSERT_EQUAL_128(0xff0000ffff005555, 0xaaaa55aa55aaffaa, q16);
6190     ASSERT_EQUAL_128(0xff550000cc33ff00, 0x33ccff00f00fff00, q17);
6191     ASSERT_EQUAL_128(0, 0xf0fffff000f0f000, q18);
6192   }
6193 }
6194 
6195 
TEST(neon_3same_smax)6196 TEST(neon_3same_smax) {
6197   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6198 
6199   START();
6200 
6201   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6202   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6203 
6204   __ Smax(v16.V8B(), v0.V8B(), v1.V8B());
6205   __ Smax(v18.V4H(), v0.V4H(), v1.V4H());
6206   __ Smax(v20.V2S(), v0.V2S(), v1.V2S());
6207 
6208   __ Smax(v17.V16B(), v0.V16B(), v1.V16B());
6209   __ Smax(v19.V8H(), v0.V8H(), v1.V8H());
6210   __ Smax(v21.V4S(), v0.V4S(), v1.V4S());
6211   END();
6212 
6213   if (CAN_RUN()) {
6214     RUN();
6215 
6216     ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
6217     ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
6218     ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6219     ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
6220     ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
6221     ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
6222   }
6223 }
6224 
6225 
TEST(neon_3same_smaxp)6226 TEST(neon_3same_smaxp) {
6227   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6228 
6229   START();
6230 
6231   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6232   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6233 
6234   __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B());
6235   __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H());
6236   __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
6237 
6238   __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B());
6239   __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H());
6240   __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S());
6241   END();
6242 
6243   if (CAN_RUN()) {
6244     RUN();
6245 
6246     ASSERT_EQUAL_128(0x0, 0x0000ff55ffff0055, q16);
6247     ASSERT_EQUAL_128(0x0, 0x000055ffffff0000, q18);
6248     ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6249     ASSERT_EQUAL_128(0x5555aaaa0000ff55, 0xaaaa5555ffff0055, q17);
6250     ASSERT_EQUAL_128(0x55aaaaaa000055ff, 0xaaaa5555ffff0000, q19);
6251     ASSERT_EQUAL_128(0x55aa555500000000, 0x555555550000aa55, q21);
6252   }
6253 }
6254 
6255 
TEST(neon_addp_scalar)6256 TEST(neon_addp_scalar) {
6257   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6258 
6259   START();
6260 
6261   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6262   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6263   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6264 
6265   __ Addp(d16, v0.V2D());
6266   __ Addp(d17, v1.V2D());
6267   __ Addp(d18, v2.V2D());
6268 
6269   END();
6270 
6271   if (CAN_RUN()) {
6272     RUN();
6273 
6274     ASSERT_EQUAL_128(0x0, 0x00224466ef66fa80, q16);
6275     ASSERT_EQUAL_128(0x0, 0x55aa5556aa5500a9, q17);
6276     ASSERT_EQUAL_128(0x0, 0xaaaaaaa96655ff55, q18);
6277   }
6278 }
6279 
TEST(neon_acrosslanes_addv)6280 TEST(neon_acrosslanes_addv) {
6281   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6282 
6283   START();
6284 
6285   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6286   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6287   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6288 
6289   __ Addv(b16, v0.V8B());
6290   __ Addv(b17, v0.V16B());
6291   __ Addv(h18, v1.V4H());
6292   __ Addv(h19, v1.V8H());
6293   __ Addv(s20, v2.V4S());
6294 
6295   END();
6296 
6297   if (CAN_RUN()) {
6298     RUN();
6299 
6300     ASSERT_EQUAL_128(0x0, 0xc7, q16);
6301     ASSERT_EQUAL_128(0x0, 0x99, q17);
6302     ASSERT_EQUAL_128(0x0, 0x55a9, q18);
6303     ASSERT_EQUAL_128(0x0, 0x55fc, q19);
6304     ASSERT_EQUAL_128(0x0, 0x1100a9fe, q20);
6305   }
6306 }
6307 
6308 
TEST(neon_acrosslanes_saddlv)6309 TEST(neon_acrosslanes_saddlv) {
6310   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6311 
6312   START();
6313 
6314   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6315   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6316   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6317 
6318   __ Saddlv(h16, v0.V8B());
6319   __ Saddlv(h17, v0.V16B());
6320   __ Saddlv(s18, v1.V4H());
6321   __ Saddlv(s19, v1.V8H());
6322   __ Saddlv(d20, v2.V4S());
6323 
6324   END();
6325 
6326   if (CAN_RUN()) {
6327     RUN();
6328 
6329     ASSERT_EQUAL_128(0x0, 0xffc7, q16);
6330     ASSERT_EQUAL_128(0x0, 0xff99, q17);
6331     ASSERT_EQUAL_128(0x0, 0x000055a9, q18);
6332     ASSERT_EQUAL_128(0x0, 0x000055fc, q19);
6333     ASSERT_EQUAL_128(0x0, 0x0000001100a9fe, q20);
6334   }
6335 }
6336 
6337 
TEST(neon_acrosslanes_uaddlv)6338 TEST(neon_acrosslanes_uaddlv) {
6339   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6340 
6341   START();
6342 
6343   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6344   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6345   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6346 
6347   __ Uaddlv(h16, v0.V8B());
6348   __ Uaddlv(h17, v0.V16B());
6349   __ Uaddlv(s18, v1.V4H());
6350   __ Uaddlv(s19, v1.V8H());
6351   __ Uaddlv(d20, v2.V4S());
6352 
6353   END();
6354 
6355   if (CAN_RUN()) {
6356     RUN();
6357 
6358     ASSERT_EQUAL_128(0x0, 0x02c7, q16);
6359     ASSERT_EQUAL_128(0x0, 0x0599, q17);
6360     ASSERT_EQUAL_128(0x0, 0x000155a9, q18);
6361     ASSERT_EQUAL_128(0x0, 0x000355fc, q19);
6362     ASSERT_EQUAL_128(0x0, 0x000000021100a9fe, q20);
6363   }
6364 }
6365 
6366 
TEST(neon_acrosslanes_smaxv)6367 TEST(neon_acrosslanes_smaxv) {
6368   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6369 
6370   START();
6371 
6372   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6373   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6374   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6375 
6376   __ Smaxv(b16, v0.V8B());
6377   __ Smaxv(b17, v0.V16B());
6378   __ Smaxv(h18, v1.V4H());
6379   __ Smaxv(h19, v1.V8H());
6380   __ Smaxv(s20, v2.V4S());
6381 
6382   END();
6383 
6384   if (CAN_RUN()) {
6385     RUN();
6386 
6387     ASSERT_EQUAL_128(0x0, 0x33, q16);
6388     ASSERT_EQUAL_128(0x0, 0x44, q17);
6389     ASSERT_EQUAL_128(0x0, 0x55ff, q18);
6390     ASSERT_EQUAL_128(0x0, 0x55ff, q19);
6391     ASSERT_EQUAL_128(0x0, 0x66555555, q20);
6392   }
6393 }
6394 
6395 
TEST(neon_acrosslanes_sminv)6396 TEST(neon_acrosslanes_sminv) {
6397   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6398 
6399   START();
6400 
6401   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6402   __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff);
6403   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6404 
6405   __ Sminv(b16, v0.V8B());
6406   __ Sminv(b17, v0.V16B());
6407   __ Sminv(h18, v1.V4H());
6408   __ Sminv(h19, v1.V8H());
6409   __ Sminv(s20, v2.V4S());
6410 
6411   END();
6412 
6413   if (CAN_RUN()) {
6414     RUN();
6415 
6416     ASSERT_EQUAL_128(0x0, 0xaa, q16);
6417     ASSERT_EQUAL_128(0x0, 0x80, q17);
6418     ASSERT_EQUAL_128(0x0, 0xffaa, q18);
6419     ASSERT_EQUAL_128(0x0, 0xaaaa, q19);
6420     ASSERT_EQUAL_128(0x0, 0xaaaaaaaa, q20);
6421   }
6422 }
6423 
TEST(neon_acrosslanes_umaxv)6424 TEST(neon_acrosslanes_umaxv) {
6425   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6426 
6427   START();
6428 
6429   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6430   __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff);
6431   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6432 
6433   __ Umaxv(b16, v0.V8B());
6434   __ Umaxv(b17, v0.V16B());
6435   __ Umaxv(h18, v1.V4H());
6436   __ Umaxv(h19, v1.V8H());
6437   __ Umaxv(s20, v2.V4S());
6438 
6439   END();
6440 
6441   if (CAN_RUN()) {
6442     RUN();
6443 
6444     ASSERT_EQUAL_128(0x0, 0xfc, q16);
6445     ASSERT_EQUAL_128(0x0, 0xfe, q17);
6446     ASSERT_EQUAL_128(0x0, 0xffaa, q18);
6447     ASSERT_EQUAL_128(0x0, 0xffab, q19);
6448     ASSERT_EQUAL_128(0x0, 0xffffffff, q20);
6449   }
6450 }
6451 
6452 
TEST(neon_acrosslanes_uminv)6453 TEST(neon_acrosslanes_uminv) {
6454   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6455 
6456   START();
6457 
6458   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01);
6459   __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff);
6460   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6461 
6462   __ Uminv(b16, v0.V8B());
6463   __ Uminv(b17, v0.V16B());
6464   __ Uminv(h18, v1.V4H());
6465   __ Uminv(h19, v1.V8H());
6466   __ Uminv(s20, v2.V4S());
6467 
6468   END();
6469 
6470   if (CAN_RUN()) {
6471     RUN();
6472 
6473     ASSERT_EQUAL_128(0x0, 0x01, q16);
6474     ASSERT_EQUAL_128(0x0, 0x00, q17);
6475     ASSERT_EQUAL_128(0x0, 0x0001, q18);
6476     ASSERT_EQUAL_128(0x0, 0x0000, q19);
6477     ASSERT_EQUAL_128(0x0, 0x0000aa00, q20);
6478   }
6479 }
6480 
6481 
TEST(neon_3same_smin)6482 TEST(neon_3same_smin) {
6483   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6484 
6485   START();
6486 
6487   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6488   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6489 
6490   __ Smin(v16.V8B(), v0.V8B(), v1.V8B());
6491   __ Smin(v18.V4H(), v0.V4H(), v1.V4H());
6492   __ Smin(v20.V2S(), v0.V2S(), v1.V2S());
6493 
6494   __ Smin(v17.V16B(), v0.V16B(), v1.V16B());
6495   __ Smin(v19.V8H(), v0.V8H(), v1.V8H());
6496   __ Smin(v21.V4S(), v0.V4S(), v1.V4S());
6497   END();
6498 
6499   if (CAN_RUN()) {
6500     RUN();
6501 
6502     ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
6503     ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
6504     ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
6505     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
6506     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
6507     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
6508   }
6509 }
6510 
6511 
TEST(neon_3same_umax)6512 TEST(neon_3same_umax) {
6513   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6514 
6515   START();
6516 
6517   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6518   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6519 
6520   __ Umax(v16.V8B(), v0.V8B(), v1.V8B());
6521   __ Umax(v18.V4H(), v0.V4H(), v1.V4H());
6522   __ Umax(v20.V2S(), v0.V2S(), v1.V2S());
6523 
6524   __ Umax(v17.V16B(), v0.V16B(), v1.V16B());
6525   __ Umax(v19.V8H(), v0.V8H(), v1.V8H());
6526   __ Umax(v21.V4S(), v0.V4S(), v1.V4S());
6527   END();
6528 
6529   if (CAN_RUN()) {
6530     RUN();
6531 
6532     ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
6533     ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
6534     ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
6535     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
6536     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
6537     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
6538   }
6539 }
6540 
6541 
TEST(neon_3same_umin)6542 TEST(neon_3same_umin) {
6543   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6544 
6545   START();
6546 
6547   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6548   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6549 
6550   __ Umin(v16.V8B(), v0.V8B(), v1.V8B());
6551   __ Umin(v18.V4H(), v0.V4H(), v1.V4H());
6552   __ Umin(v20.V2S(), v0.V2S(), v1.V2S());
6553 
6554   __ Umin(v17.V16B(), v0.V16B(), v1.V16B());
6555   __ Umin(v19.V8H(), v0.V8H(), v1.V8H());
6556   __ Umin(v21.V4S(), v0.V4S(), v1.V4S());
6557   END();
6558 
6559   if (CAN_RUN()) {
6560     RUN();
6561 
6562     ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
6563     ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
6564     ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6565     ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
6566     ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
6567     ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
6568   }
6569 }
6570 
6571 
TEST(neon_3same_extra_fcadd)6572 TEST(neon_3same_extra_fcadd) {
6573   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6574 
6575   START();
6576 
6577   // (0i, 5) (d)
6578   __ Movi(v0.V2D(), 0x0, 0x4014000000000000);
6579   // (5i, 0) (d)
6580   __ Movi(v1.V2D(), 0x4014000000000000, 0x0);
6581   // (10i, 10) (d)
6582   __ Movi(v2.V2D(), 0x4024000000000000, 0x4024000000000000);
6583   // (5i, 5), (5i, 5) (f)
6584   __ Movi(v3.V2D(), 0x40A0000040A00000, 0x40A0000040A00000);
6585   // (5i, 5), (0i, 0) (f)
6586   __ Movi(v4.V2D(), 0x40A0000040A00000, 0x0);
6587   // 324567i, 16000 (f)
6588   __ Movi(v5.V2D(), 0x0, 0x489E7AE0467A0000);
6589 
6590   // Subtraction (10, 10) - (5, 5) == (5, 5)
6591   __ Fcadd(v31.V2D(), v2.V2D(), v1.V2D(), 90);
6592   __ Fcadd(v31.V2D(), v31.V2D(), v0.V2D(), 270);
6593 
6594   // Addition (10, 10) + (5, 5) == (15, 15)
6595   __ Fcadd(v30.V2D(), v2.V2D(), v1.V2D(), 270);
6596   __ Fcadd(v30.V2D(), v30.V2D(), v0.V2D(), 90);
6597 
6598   // 2S
6599   __ Fcadd(v29.V2S(), v4.V2S(), v5.V2S(), 90);
6600   __ Fcadd(v28.V2S(), v4.V2S(), v5.V2S(), 270);
6601 
6602   // 4S
6603   __ Fcadd(v27.V4S(), v3.V4S(), v4.V4S(), 90);
6604   __ Fcadd(v26.V4S(), v3.V4S(), v4.V4S(), 270);
6605 
6606   END();
6607 
6608   if (CAN_RUN()) {
6609     RUN();
6610     ASSERT_EQUAL_128(0x4014000000000000, 0x4014000000000000, q31);
6611     ASSERT_EQUAL_128(0x402E000000000000, 0x402E000000000000, q30);
6612     ASSERT_EQUAL_128(0x0, 0x467a0000c89e7ae0, q29);  // (16000i, -324567)
6613     ASSERT_EQUAL_128(0x0, 0xc67a0000489e7ae0, q28);  // (-16000i, 324567)
6614     ASSERT_EQUAL_128(0x4120000000000000, 0x40A0000040A00000, q27);
6615     ASSERT_EQUAL_128(0x0000000041200000, 0x40A0000040A00000, q26);
6616   }
6617 }
6618 
6619 
TEST(neon_3same_extra_fcmla)6620 TEST(neon_3same_extra_fcmla) {
6621   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6622 
6623   START();
6624 
6625   __ Movi(v1.V2D(), 0x0, 0x40A0000040400000);  // (5i, 3) (f)
6626   __ Movi(v2.V2D(), 0x0, 0x4040000040A00000);  // (3i, 5) (f)
6627 
6628   __ Movi(v3.V2D(), 0x0, 0x4000000040400000);  // (2i, 3) (f)
6629   __ Movi(v4.V2D(), 0x0, 0x40E000003F800000);  // (7i, 1) (f)
6630 
6631   __ Movi(v5.V2D(), 0x0, 0x4000000040400000);  // (2i, 3) (f)
6632   __ Movi(v6.V2D(), 0x0, 0x408000003F800000);  // (4i, 1) (f)
6633 
6634   // (1.5i, 2.5), (31.5i, 1024) (f)
6635   __ Movi(v7.V2D(), 0x3FC0000040200000, 0x41FC000044800000);
6636   // (2048i, 412.75), (3645i, 0) (f)
6637   __ Movi(v8.V2D(), 0x4500000043CE6000, 0x4563D00000000000);
6638   // (2000i, 450,000) (d)
6639   __ Movi(v9.V2D(), 0x409F400000000000, 0x411B774000000000);
6640   // (30,000i, 1250) (d)
6641   __ Movi(v10.V2D(), 0x40DD4C0000000000, 0x4093880000000000);
6642 
6643   // DST
6644   __ Movi(v24.V2D(), 0x0, 0x0);
6645   __ Movi(v25.V2D(), 0x0, 0x0);
6646   __ Movi(v26.V2D(), 0x0, 0x0);
6647   __ Movi(v27.V2D(), 0x0, 0x0);
6648   __ Movi(v28.V2D(), 0x0, 0x0);
6649   __ Movi(v29.V2D(), 0x0, 0x0);
6650   __ Movi(v30.V2D(), 0x0, 0x0);
6651   __ Movi(v31.V2D(), 0x0, 0x0);
6652 
6653   // Full calculations
6654   __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 90);
6655   __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 0);
6656 
6657   __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 0);
6658   __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 90);
6659 
6660   __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 90);
6661   __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 0);
6662 
6663   __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 0);
6664   __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 90);
6665 
6666   // Partial checks
6667   __ Fcmla(v27.V2S(), v1.V2S(), v2.V2S(), 0);
6668   __ Fcmla(v26.V2S(), v2.V2S(), v1.V2S(), 0);
6669 
6670   __ Fcmla(v25.V4S(), v7.V4S(), v8.V4S(), 270);
6671   __ Fcmla(v24.V4S(), v7.V4S(), v8.V4S(), 180);
6672 
6673   END();
6674 
6675   if (CAN_RUN()) {
6676     RUN();
6677 
6678     ASSERT_EQUAL_128(0x0, 0x4208000000000000, q31);  // (34i, 0)
6679     ASSERT_EQUAL_128(0x0, 0x41B80000C1300000, q30);  // (23i, -11)
6680     ASSERT_EQUAL_128(0x0, 0x41600000C0A00000, q29);  // (14i, -5)
6681 
6682     // (13502500000i, 502500000)
6683     ASSERT_EQUAL_128(0x4209267E65000000, 0x41BDF38AA0000000, q28);
6684     ASSERT_EQUAL_128(0x0, 0x4110000041700000, q27);  //  (9i, 15)
6685     ASSERT_EQUAL_128(0x0, 0x41C8000041700000, q26);  // (25i, 15)
6686     // (512i, 1.031875E3), (373248i, 0)
6687     ASSERT_EQUAL_128(0xc41ac80045400000, 0x0000000047e040c0, q25);
6688     // (619.125i, -3072), (0i, -114817.5)
6689     ASSERT_EQUAL_128(0xc5a00000c480fc00, 0xca63d00000000000, q24);
6690   }
6691 }
6692 
6693 
TEST(neon_byelement_fcmla)6694 TEST(neon_byelement_fcmla) {
6695   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6696 
6697   START();
6698 
6699   // (5i, 3), (5i, 3) (f)
6700   __ Movi(v1.V2D(), 0x40A0000040400000, 0x40A0000040400000);
6701   // (3i, 5), (3i, 5) (f)
6702   __ Movi(v2.V2D(), 0x4040000040A00000, 0x4040000040A00000);
6703   // (7i, 1), (5i, 3) (f)
6704   __ Movi(v3.V2D(), 0x40E000003F800000, 0x40A0000040400000);
6705   // (4i, 1), (3i, 5) (f)
6706   __ Movi(v4.V2D(), 0x408000003F800000, 0x4040000040A00000);
6707   // (4i, 1), (7i, 1) (f)
6708   __ Movi(v5.V2D(), 0x408000003F800000, 0x40E000003F800000);
6709   // (2i, 3), (0, 0) (f)
6710   __ Movi(v6.V2D(), 0x4000000040400000, 0x0);
6711 
6712   // DST
6713   __ Movi(v22.V2D(), 0x0, 0x0);
6714   __ Movi(v23.V2D(), 0x0, 0x0);
6715   __ Movi(v24.V2D(), 0x0, 0x0);
6716   __ Movi(v25.V2D(), 0x0, 0x0);
6717   __ Movi(v26.V2D(), 0x0, 0x0);
6718   __ Movi(v27.V2D(), 0x0, 0x0);
6719   __ Movi(v28.V2D(), 0x0, 0x0);
6720   __ Movi(v29.V2D(), 0x0, 0x0);
6721   __ Movi(v30.V2D(), 0x0, 0x0);
6722   __ Movi(v31.V2D(), 0x0, 0x0);
6723 
6724   // Full calculation (pairs)
6725   __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 90);
6726   __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 0);
6727   __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 90);
6728   __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 0);
6729 
6730   // Rotations
6731   __ Fcmla(v29.V4S(), v3.V4S(), v4.S(), 1, 0);
6732   __ Fcmla(v28.V4S(), v3.V4S(), v4.S(), 1, 90);
6733   __ Fcmla(v27.V4S(), v3.V4S(), v4.S(), 1, 180);
6734   __ Fcmla(v26.V4S(), v3.V4S(), v4.S(), 1, 270);
6735   __ Fcmla(v25.V4S(), v3.V4S(), v4.S(), 0, 270);
6736   __ Fcmla(v24.V4S(), v3.V4S(), v4.S(), 0, 180);
6737   __ Fcmla(v23.V4S(), v3.V4S(), v4.S(), 0, 90);
6738   __ Fcmla(v22.V4S(), v3.V4S(), v4.S(), 0, 0);
6739 
6740   END();
6741 
6742   if (CAN_RUN()) {
6743     RUN();
6744     // (34i, 0), (34i, 0)
6745     ASSERT_EQUAL_128(0x4208000000000000, 0x4208000000000000, q31);
6746     // (14i, -5), (23i, -11)
6747     ASSERT_EQUAL_128(0x41600000C0A00000, 0x41B80000C1300000, q30);
6748     // (4i, 1), (12i, 3)
6749     ASSERT_EQUAL_128(0x408000003f800000, 0x4140000040400000, q29);
6750     // (7i, -28), (5i, -20)
6751     ASSERT_EQUAL_128(0x40e00000c1e00000, 0x40a00000c1a00000, q28);
6752     // (-4i, -1), (-12i, -3)
6753     ASSERT_EQUAL_128(0xc0800000bf800000, 0xc1400000c0400000, q27);
6754     // (-7i, 28), (-5i, 20)
6755     ASSERT_EQUAL_128(0xc0e0000041e00000, 0xc0a0000041a00000, q26);
6756     // (-35i, 21), (-25i, 15)
6757     ASSERT_EQUAL_128(0xc20c000041a80000, 0xc1c8000041700000, q25);
6758     // (-3i, -5), (-9i, -15)
6759     ASSERT_EQUAL_128(0xc0400000c0a00000, 0xc1100000c1700000, q24);
6760     // (35i, -21), (25i, -15)
6761     ASSERT_EQUAL_128(0x420c0000c1a80000, 0x41c80000c1700000, q23);
6762     // (3i, 5), (9i, 15)
6763     ASSERT_EQUAL_128(0x4040000040a00000, 0x4110000041700000, q22);
6764   }
6765 }
6766 
6767 
TEST(neon_2regmisc_mvn)6768 TEST(neon_2regmisc_mvn) {
6769   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6770 
6771   START();
6772 
6773   __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6774 
6775   __ Mvn(v16.V16B(), v0.V16B());
6776   __ Mvn(v17.V8H(), v0.V8H());
6777   __ Mvn(v18.V4S(), v0.V4S());
6778   __ Mvn(v19.V2D(), v0.V2D());
6779 
6780   __ Mvn(v24.V8B(), v0.V8B());
6781   __ Mvn(v25.V4H(), v0.V4H());
6782   __ Mvn(v26.V2S(), v0.V2S());
6783 
6784   END();
6785 
6786   if (CAN_RUN()) {
6787     RUN();
6788 
6789     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
6790     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q17);
6791     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q18);
6792     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q19);
6793 
6794     ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q24);
6795     ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q25);
6796     ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q26);
6797   }
6798 }
6799 
6800 
TEST(neon_2regmisc_not)6801 TEST(neon_2regmisc_not) {
6802   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6803 
6804   START();
6805 
6806   __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6807   __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00);
6808 
6809   __ Not(v16.V16B(), v0.V16B());
6810   __ Not(v17.V8B(), v1.V8B());
6811   END();
6812 
6813   if (CAN_RUN()) {
6814     RUN();
6815 
6816     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
6817     ASSERT_EQUAL_128(0x0, 0xff0000ffff0000ff, q17);
6818   }
6819 }
6820 
6821 
TEST(neon_2regmisc_cls_clz_cnt)6822 TEST(neon_2regmisc_cls_clz_cnt) {
6823   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6824 
6825   START();
6826 
6827   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6828   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6829 
6830   __ Cls(v16.V8B(), v1.V8B());
6831   __ Cls(v17.V16B(), v1.V16B());
6832   __ Cls(v18.V4H(), v1.V4H());
6833   __ Cls(v19.V8H(), v1.V8H());
6834   __ Cls(v20.V2S(), v1.V2S());
6835   __ Cls(v21.V4S(), v1.V4S());
6836 
6837   __ Clz(v22.V8B(), v0.V8B());
6838   __ Clz(v23.V16B(), v0.V16B());
6839   __ Clz(v24.V4H(), v0.V4H());
6840   __ Clz(v25.V8H(), v0.V8H());
6841   __ Clz(v26.V2S(), v0.V2S());
6842   __ Clz(v27.V4S(), v0.V4S());
6843 
6844   __ Cnt(v28.V8B(), v0.V8B());
6845   __ Cnt(v29.V16B(), v1.V16B());
6846 
6847   END();
6848 
6849   if (CAN_RUN()) {
6850     RUN();
6851 
6852     ASSERT_EQUAL_128(0x0000000000000000, 0x0601000000000102, q16);
6853     ASSERT_EQUAL_128(0x0601000000000102, 0x0601000000000102, q17);
6854     ASSERT_EQUAL_128(0x0000000000000000, 0x0006000000000001, q18);
6855     ASSERT_EQUAL_128(0x0006000000000001, 0x0006000000000001, q19);
6856     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000600000000, q20);
6857     ASSERT_EQUAL_128(0x0000000600000000, 0x0000000600000000, q21);
6858 
6859     ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q22);
6860     ASSERT_EQUAL_128(0x0807060605050505, 0x0404040404040404, q23);
6861     ASSERT_EQUAL_128(0x0000000000000000, 0x0004000400040004, q24);
6862     ASSERT_EQUAL_128(0x000f000600050005, 0x0004000400040004, q25);
6863     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000400000004, q26);
6864     ASSERT_EQUAL_128(0x0000000f00000005, 0x0000000400000004, q27);
6865 
6866     ASSERT_EQUAL_128(0x0000000000000000, 0x0102020302030304, q28);
6867     ASSERT_EQUAL_128(0x0705050305030301, 0x0103030503050507, q29);
6868   }
6869 }
6870 
TEST(neon_2regmisc_rev)6871 TEST(neon_2regmisc_rev) {
6872   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6873 
6874   START();
6875 
6876   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6877   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6878 
6879   __ Rev16(v16.V8B(), v0.V8B());
6880   __ Rev16(v17.V16B(), v0.V16B());
6881 
6882   __ Rev32(v18.V8B(), v0.V8B());
6883   __ Rev32(v19.V16B(), v0.V16B());
6884   __ Rev32(v20.V4H(), v0.V4H());
6885   __ Rev32(v21.V8H(), v0.V8H());
6886 
6887   __ Rev64(v22.V8B(), v0.V8B());
6888   __ Rev64(v23.V16B(), v0.V16B());
6889   __ Rev64(v24.V4H(), v0.V4H());
6890   __ Rev64(v25.V8H(), v0.V8H());
6891   __ Rev64(v26.V2S(), v0.V2S());
6892   __ Rev64(v27.V4S(), v0.V4S());
6893 
6894   __ Rbit(v28.V8B(), v1.V8B());
6895   __ Rbit(v29.V16B(), v1.V16B());
6896 
6897   END();
6898 
6899   if (CAN_RUN()) {
6900     RUN();
6901 
6902     ASSERT_EQUAL_128(0x0000000000000000, 0x09080b0a0d0c0f0e, q16);
6903     ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q17);
6904 
6905     ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a09080f0e0d0c, q18);
6906     ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q19);
6907     ASSERT_EQUAL_128(0x0000000000000000, 0x0a0b08090e0f0c0d, q20);
6908     ASSERT_EQUAL_128(0x0203000106070405, 0x0a0b08090e0f0c0d, q21);
6909 
6910     ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0b0a0908, q22);
6911     ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q23);
6912     ASSERT_EQUAL_128(0x0000000000000000, 0x0e0f0c0d0a0b0809, q24);
6913     ASSERT_EQUAL_128(0x0607040502030001, 0x0e0f0c0d0a0b0809, q25);
6914     ASSERT_EQUAL_128(0x0000000000000000, 0x0c0d0e0f08090a0b, q26);
6915     ASSERT_EQUAL_128(0x0405060700010203, 0x0c0d0e0f08090a0b, q27);
6916 
6917     ASSERT_EQUAL_128(0x0000000000000000, 0x80c4a2e691d5b3f7, q28);
6918     ASSERT_EQUAL_128(0x7f3b5d196e2a4c08, 0x80c4a2e691d5b3f7, q29);
6919   }
6920 }
6921 
6922 
TEST(neon_sli)6923 TEST(neon_sli) {
6924   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6925 
6926   START();
6927 
6928   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6929   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6930 
6931   __ Mov(v16.V2D(), v0.V2D());
6932   __ Mov(v17.V2D(), v0.V2D());
6933   __ Mov(v18.V2D(), v0.V2D());
6934   __ Mov(v19.V2D(), v0.V2D());
6935   __ Mov(v20.V2D(), v0.V2D());
6936   __ Mov(v21.V2D(), v0.V2D());
6937   __ Mov(v22.V2D(), v0.V2D());
6938   __ Mov(v23.V2D(), v0.V2D());
6939 
6940   __ Sli(v16.V8B(), v1.V8B(), 4);
6941   __ Sli(v17.V16B(), v1.V16B(), 7);
6942   __ Sli(v18.V4H(), v1.V4H(), 8);
6943   __ Sli(v19.V8H(), v1.V8H(), 15);
6944   __ Sli(v20.V2S(), v1.V2S(), 0);
6945   __ Sli(v21.V4S(), v1.V4S(), 31);
6946   __ Sli(v22.V2D(), v1.V2D(), 48);
6947 
6948   __ Sli(d23, d1, 48);
6949 
6950   END();
6951 
6952   if (CAN_RUN()) {
6953     RUN();
6954 
6955     ASSERT_EQUAL_128(0x0000000000000000, 0x18395a7b9cbddeff, q16);
6956     ASSERT_EQUAL_128(0x0001020304050607, 0x88898a8b8c8d8e8f, q17);
6957     ASSERT_EQUAL_128(0x0000000000000000, 0x2309670bab0def0f, q18);
6958     ASSERT_EQUAL_128(0x0001020304050607, 0x88098a0b8c0d8e0f, q19);
6959     ASSERT_EQUAL_128(0x0000000000000000, 0x0123456789abcdef, q20);
6960     ASSERT_EQUAL_128(0x0001020304050607, 0x88090a0b8c0d0e0f, q21);
6961     ASSERT_EQUAL_128(0x3210020304050607, 0xcdef0a0b0c0d0e0f, q22);
6962 
6963     ASSERT_EQUAL_128(0x0000000000000000, 0xcdef0a0b0c0d0e0f, q23);
6964   }
6965 }
6966 
6967 
TEST(neon_sri)6968 TEST(neon_sri) {
6969   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6970 
6971   START();
6972 
6973   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6974   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6975 
6976   __ Mov(v16.V2D(), v0.V2D());
6977   __ Mov(v17.V2D(), v0.V2D());
6978   __ Mov(v18.V2D(), v0.V2D());
6979   __ Mov(v19.V2D(), v0.V2D());
6980   __ Mov(v20.V2D(), v0.V2D());
6981   __ Mov(v21.V2D(), v0.V2D());
6982   __ Mov(v22.V2D(), v0.V2D());
6983   __ Mov(v23.V2D(), v0.V2D());
6984 
6985   __ Sri(v16.V8B(), v1.V8B(), 4);
6986   __ Sri(v17.V16B(), v1.V16B(), 7);
6987   __ Sri(v18.V4H(), v1.V4H(), 8);
6988   __ Sri(v19.V8H(), v1.V8H(), 15);
6989   __ Sri(v20.V2S(), v1.V2S(), 1);
6990   __ Sri(v21.V4S(), v1.V4S(), 31);
6991   __ Sri(v22.V2D(), v1.V2D(), 48);
6992 
6993   __ Sri(d23, d1, 48);
6994 
6995   END();
6996 
6997   if (CAN_RUN()) {
6998     RUN();
6999 
7000     ASSERT_EQUAL_128(0x0000000000000000, 0x00020406080a0c0e, q16);
7001     ASSERT_EQUAL_128(0x0101030304040606, 0x08080a0a0d0d0f0f, q17);
7002     ASSERT_EQUAL_128(0x0000000000000000, 0x08010a450c890ecd, q18);
7003     ASSERT_EQUAL_128(0x0001020304040606, 0x08080a0a0c0d0e0f, q19);
7004     ASSERT_EQUAL_128(0x0000000000000000, 0x0091a2b344d5e6f7, q20);
7005     ASSERT_EQUAL_128(0x0001020304050606, 0x08090a0a0c0d0e0f, q21);
7006     ASSERT_EQUAL_128(0x000102030405fedc, 0x08090a0b0c0d0123, q22);
7007 
7008     ASSERT_EQUAL_128(0x0000000000000000, 0x08090a0b0c0d0123, q23);
7009   }
7010 }
7011 
7012 
TEST(neon_shrn)7013 TEST(neon_shrn) {
7014   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7015 
7016   START();
7017 
7018   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7019   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7020   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7021   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7022   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7023 
7024   __ Shrn(v16.V8B(), v0.V8H(), 8);
7025   __ Shrn2(v16.V16B(), v1.V8H(), 1);
7026   __ Shrn(v17.V4H(), v1.V4S(), 16);
7027   __ Shrn2(v17.V8H(), v2.V4S(), 1);
7028   __ Shrn(v18.V2S(), v3.V2D(), 32);
7029   __ Shrn2(v18.V4S(), v3.V2D(), 1);
7030 
7031   END();
7032 
7033   if (CAN_RUN()) {
7034     RUN();
7035     ASSERT_EQUAL_128(0x0000ff00ff0000ff, 0x7f00817f80ff0180, q16);
7036     ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8000ffffffff0001, q17);
7037     ASSERT_EQUAL_128(0x00000000ffffffff, 0x800000007fffffff, q18);
7038   }
7039 }
7040 
7041 
TEST(neon_rshrn)7042 TEST(neon_rshrn) {
7043   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7044 
7045   START();
7046 
7047   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7048   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7049   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7050   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7051   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7052 
7053   __ Rshrn(v16.V8B(), v0.V8H(), 8);
7054   __ Rshrn2(v16.V16B(), v1.V8H(), 1);
7055   __ Rshrn(v17.V4H(), v1.V4S(), 16);
7056   __ Rshrn2(v17.V8H(), v2.V4S(), 1);
7057   __ Rshrn(v18.V2S(), v3.V2D(), 32);
7058   __ Rshrn2(v18.V4S(), v3.V2D(), 1);
7059 
7060   END();
7061 
7062   if (CAN_RUN()) {
7063     RUN();
7064     ASSERT_EQUAL_128(0x0001000000000100, 0x7f01827f81ff0181, q16);
7065     ASSERT_EQUAL_128(0x0000000000000000, 0x8001ffffffff0001, q17);
7066     ASSERT_EQUAL_128(0x0000000100000000, 0x8000000080000000, q18);
7067   }
7068 }
7069 
7070 
TEST(neon_uqshrn)7071 TEST(neon_uqshrn) {
7072   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7073 
7074   START();
7075 
7076   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7077   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7078   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7079   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7080   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7081 
7082   __ Uqshrn(v16.V8B(), v0.V8H(), 8);
7083   __ Uqshrn2(v16.V16B(), v1.V8H(), 1);
7084   __ Uqshrn(v17.V4H(), v1.V4S(), 16);
7085   __ Uqshrn2(v17.V8H(), v2.V4S(), 1);
7086   __ Uqshrn(v18.V2S(), v3.V2D(), 32);
7087   __ Uqshrn2(v18.V4S(), v3.V2D(), 1);
7088 
7089   __ Uqshrn(b19, h0, 8);
7090   __ Uqshrn(h20, s1, 16);
7091   __ Uqshrn(s21, d3, 32);
7092 
7093   END();
7094 
7095   if (CAN_RUN()) {
7096     RUN();
7097     ASSERT_EQUAL_128(0xffffff00ff0000ff, 0x7f00817f80ff0180, q16);
7098     ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8000ffffffff0001, q17);
7099     ASSERT_EQUAL_128(0xffffffffffffffff, 0x800000007fffffff, q18);
7100     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
7101     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7102     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7103   }
7104 }
7105 
7106 
TEST(neon_uqrshrn)7107 TEST(neon_uqrshrn) {
7108   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7109 
7110   START();
7111 
7112   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7113   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7114   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7115   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7116   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7117 
7118   __ Uqrshrn(v16.V8B(), v0.V8H(), 8);
7119   __ Uqrshrn2(v16.V16B(), v1.V8H(), 1);
7120   __ Uqrshrn(v17.V4H(), v1.V4S(), 16);
7121   __ Uqrshrn2(v17.V8H(), v2.V4S(), 1);
7122   __ Uqrshrn(v18.V2S(), v3.V2D(), 32);
7123   __ Uqrshrn2(v18.V4S(), v3.V2D(), 1);
7124 
7125   __ Uqrshrn(b19, h0, 8);
7126   __ Uqrshrn(h20, s1, 16);
7127   __ Uqrshrn(s21, d3, 32);
7128 
7129   END();
7130 
7131   if (CAN_RUN()) {
7132     RUN();
7133     ASSERT_EQUAL_128(0xffffff00ff0001ff, 0x7f01827f81ff0181, q16);
7134     ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8001ffffffff0001, q17);
7135     ASSERT_EQUAL_128(0xffffffffffffffff, 0x8000000080000000, q18);
7136     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
7137     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7138     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
7139   }
7140 }
7141 
7142 
TEST(neon_sqshrn)7143 TEST(neon_sqshrn) {
7144   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7145 
7146   START();
7147 
7148   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7149   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7150   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7151   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7152   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7153 
7154   __ Sqshrn(v16.V8B(), v0.V8H(), 8);
7155   __ Sqshrn2(v16.V16B(), v1.V8H(), 1);
7156   __ Sqshrn(v17.V4H(), v1.V4S(), 16);
7157   __ Sqshrn2(v17.V8H(), v2.V4S(), 1);
7158   __ Sqshrn(v18.V2S(), v3.V2D(), 32);
7159   __ Sqshrn2(v18.V4S(), v3.V2D(), 1);
7160 
7161   __ Sqshrn(b19, h0, 8);
7162   __ Sqshrn(h20, s1, 16);
7163   __ Sqshrn(s21, d3, 32);
7164 
7165   END();
7166 
7167   if (CAN_RUN()) {
7168     RUN();
7169     ASSERT_EQUAL_128(0x8080ff00ff00007f, 0x7f00817f80ff0180, q16);
7170     ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000ffffffff0001, q17);
7171     ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
7172     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
7173     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7174     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7175   }
7176 }
7177 
7178 
TEST(neon_sqrshrn)7179 TEST(neon_sqrshrn) {
7180   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7181 
7182   START();
7183 
7184   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7185   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7186   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7187   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7188   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7189 
7190   __ Sqrshrn(v16.V8B(), v0.V8H(), 8);
7191   __ Sqrshrn2(v16.V16B(), v1.V8H(), 1);
7192   __ Sqrshrn(v17.V4H(), v1.V4S(), 16);
7193   __ Sqrshrn2(v17.V8H(), v2.V4S(), 1);
7194   __ Sqrshrn(v18.V2S(), v3.V2D(), 32);
7195   __ Sqrshrn2(v18.V4S(), v3.V2D(), 1);
7196 
7197   __ Sqrshrn(b19, h0, 8);
7198   __ Sqrshrn(h20, s1, 16);
7199   __ Sqrshrn(s21, d3, 32);
7200 
7201   END();
7202 
7203   if (CAN_RUN()) {
7204     RUN();
7205     ASSERT_EQUAL_128(0x808000000000017f, 0x7f01827f81ff0181, q16);
7206     ASSERT_EQUAL_128(0x8000000000007fff, 0x8001ffffffff0001, q17);
7207     ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
7208     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
7209     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7210     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7211   }
7212 }
7213 
7214 
TEST(neon_sqshrun)7215 TEST(neon_sqshrun) {
7216   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7217 
7218   START();
7219 
7220   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7221   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7222   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7223   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7224   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7225 
7226   __ Sqshrun(v16.V8B(), v0.V8H(), 8);
7227   __ Sqshrun2(v16.V16B(), v1.V8H(), 1);
7228   __ Sqshrun(v17.V4H(), v1.V4S(), 16);
7229   __ Sqshrun2(v17.V8H(), v2.V4S(), 1);
7230   __ Sqshrun(v18.V2S(), v3.V2D(), 32);
7231   __ Sqshrun2(v18.V4S(), v3.V2D(), 1);
7232 
7233   __ Sqshrun(b19, h0, 8);
7234   __ Sqshrun(h20, s1, 16);
7235   __ Sqshrun(s21, d3, 32);
7236 
7237   END();
7238 
7239   if (CAN_RUN()) {
7240     RUN();
7241     ASSERT_EQUAL_128(0x00000000000000ff, 0x7f00007f00000100, q16);
7242     ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
7243     ASSERT_EQUAL_128(0x00000000ffffffff, 0x000000007fffffff, q18);
7244     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
7245     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7246     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7247   }
7248 }
7249 
7250 
TEST(neon_sqrshrun)7251 TEST(neon_sqrshrun) {
7252   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7253 
7254   START();
7255 
7256   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7257   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7258   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7259   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7260   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7261 
7262   __ Sqrshrun(v16.V8B(), v0.V8H(), 8);
7263   __ Sqrshrun2(v16.V16B(), v1.V8H(), 1);
7264   __ Sqrshrun(v17.V4H(), v1.V4S(), 16);
7265   __ Sqrshrun2(v17.V8H(), v2.V4S(), 1);
7266   __ Sqrshrun(v18.V2S(), v3.V2D(), 32);
7267   __ Sqrshrun2(v18.V4S(), v3.V2D(), 1);
7268 
7269   __ Sqrshrun(b19, h0, 8);
7270   __ Sqrshrun(h20, s1, 16);
7271   __ Sqrshrun(s21, d3, 32);
7272 
7273   END();
7274 
7275   if (CAN_RUN()) {
7276     RUN();
7277     ASSERT_EQUAL_128(0x00000000000001ff, 0x7f01007f00000100, q16);
7278     ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
7279     ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000080000000, q18);
7280     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
7281     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7282     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
7283   }
7284 }
7285 
TEST(neon_modimm_bic)7286 TEST(neon_modimm_bic) {
7287   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7288 
7289   START();
7290 
7291   __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7292   __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7293   __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7294   __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7295   __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7296   __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7297   __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7298   __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7299   __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7300   __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7301   __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7302   __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7303 
7304   __ Bic(v16.V4H(), 0x00, 0);
7305   __ Bic(v17.V4H(), 0xff, 8);
7306   __ Bic(v18.V8H(), 0x00, 0);
7307   __ Bic(v19.V8H(), 0xff, 8);
7308 
7309   __ Bic(v20.V2S(), 0x00, 0);
7310   __ Bic(v21.V2S(), 0xff, 8);
7311   __ Bic(v22.V2S(), 0x00, 16);
7312   __ Bic(v23.V2S(), 0xff, 24);
7313 
7314   __ Bic(v24.V4S(), 0xff, 0);
7315   __ Bic(v25.V4S(), 0x00, 8);
7316   __ Bic(v26.V4S(), 0xff, 16);
7317   __ Bic(v27.V4S(), 0x00, 24);
7318 
7319   END();
7320 
7321   if (CAN_RUN()) {
7322     RUN();
7323 
7324     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
7325     ASSERT_EQUAL_128(0x0, 0x005500ff000000aa, q17);
7326     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
7327     ASSERT_EQUAL_128(0x00aa0055000000aa, 0x005500ff000000aa, q19);
7328 
7329     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
7330     ASSERT_EQUAL_128(0x0, 0x555500ff000000aa, q21);
7331     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
7332     ASSERT_EQUAL_128(0x0, 0x0055ffff0000aaaa, q23);
7333 
7334     ASSERT_EQUAL_128(0x00aaff00ff005500, 0x5555ff000000aa00, q24);
7335     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
7336     ASSERT_EQUAL_128(0x0000ff55ff0055aa, 0x5500ffff0000aaaa, q26);
7337     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
7338   }
7339 }
7340 
7341 
TEST(neon_modimm_movi_16bit_any)7342 TEST(neon_modimm_movi_16bit_any) {
7343   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7344 
7345   START();
7346 
7347   __ Movi(v0.V4H(), 0xabab);
7348   __ Movi(v1.V4H(), 0xab00);
7349   __ Movi(v2.V4H(), 0xabff);
7350   __ Movi(v3.V8H(), 0x00ab);
7351   __ Movi(v4.V8H(), 0xffab);
7352   __ Movi(v5.V8H(), 0xabcd);
7353 
7354   END();
7355 
7356   if (CAN_RUN()) {
7357     RUN();
7358 
7359     ASSERT_EQUAL_128(0x0, 0xabababababababab, q0);
7360     ASSERT_EQUAL_128(0x0, 0xab00ab00ab00ab00, q1);
7361     ASSERT_EQUAL_128(0x0, 0xabffabffabffabff, q2);
7362     ASSERT_EQUAL_128(0x00ab00ab00ab00ab, 0x00ab00ab00ab00ab, q3);
7363     ASSERT_EQUAL_128(0xffabffabffabffab, 0xffabffabffabffab, q4);
7364     ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q5);
7365   }
7366 }
7367 
7368 
TEST(neon_modimm_movi_32bit_any)7369 TEST(neon_modimm_movi_32bit_any) {
7370   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7371 
7372   START();
7373 
7374   __ Movi(v0.V2S(), 0x000000ab);
7375   __ Movi(v1.V2S(), 0x0000ab00);
7376   __ Movi(v2.V4S(), 0x00ab0000);
7377   __ Movi(v3.V4S(), 0xab000000);
7378 
7379   __ Movi(v4.V2S(), 0xffffffab);
7380   __ Movi(v5.V2S(), 0xffffabff);
7381   __ Movi(v6.V4S(), 0xffabffff);
7382   __ Movi(v7.V4S(), 0xabffffff);
7383 
7384   __ Movi(v16.V2S(), 0x0000abff);
7385   __ Movi(v17.V2S(), 0x00abffff);
7386   __ Movi(v18.V4S(), 0xffab0000);
7387   __ Movi(v19.V4S(), 0xffffab00);
7388 
7389   __ Movi(v20.V4S(), 0xabababab);
7390   __ Movi(v21.V4S(), 0xabcdabcd);
7391   __ Movi(v22.V4S(), 0xabcdef01);
7392   __ Movi(v23.V4S(), 0x00ffff00);
7393 
7394   END();
7395 
7396   if (CAN_RUN()) {
7397     RUN();
7398 
7399     ASSERT_EQUAL_128(0x0, 0x000000ab000000ab, q0);
7400     ASSERT_EQUAL_128(0x0, 0x0000ab000000ab00, q1);
7401     ASSERT_EQUAL_128(0x00ab000000ab0000, 0x00ab000000ab0000, q2);
7402     ASSERT_EQUAL_128(0xab000000ab000000, 0xab000000ab000000, q3);
7403 
7404     ASSERT_EQUAL_128(0x0, 0xffffffabffffffab, q4);
7405     ASSERT_EQUAL_128(0x0, 0xffffabffffffabff, q5);
7406     ASSERT_EQUAL_128(0xffabffffffabffff, 0xffabffffffabffff, q6);
7407     ASSERT_EQUAL_128(0xabffffffabffffff, 0xabffffffabffffff, q7);
7408 
7409     ASSERT_EQUAL_128(0x0, 0x0000abff0000abff, q16);
7410     ASSERT_EQUAL_128(0x0, 0x00abffff00abffff, q17);
7411     ASSERT_EQUAL_128(0xffab0000ffab0000, 0xffab0000ffab0000, q18);
7412     ASSERT_EQUAL_128(0xffffab00ffffab00, 0xffffab00ffffab00, q19);
7413 
7414     ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q20);
7415     ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q21);
7416     ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q22);
7417     ASSERT_EQUAL_128(0x00ffff0000ffff00, 0x00ffff0000ffff00, q23);
7418   }
7419 }
7420 
7421 
TEST(neon_modimm_movi_64bit_any)7422 TEST(neon_modimm_movi_64bit_any) {
7423   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7424 
7425   START();
7426 
7427   __ Movi(v0.V1D(), 0x00ffff0000ffffff);
7428   __ Movi(v1.V2D(), 0xabababababababab);
7429   __ Movi(v2.V2D(), 0xabcdabcdabcdabcd);
7430   __ Movi(v3.V2D(), 0xabcdef01abcdef01);
7431   __ Movi(v4.V1D(), 0xabcdef0123456789);
7432   __ Movi(v5.V2D(), 0xabcdef0123456789);
7433 
7434   END();
7435 
7436   if (CAN_RUN()) {
7437     RUN();
7438 
7439     ASSERT_EQUAL_64(0x00ffff0000ffffff, d0);
7440     ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q1);
7441     ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q2);
7442     ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q3);
7443     ASSERT_EQUAL_64(0xabcdef0123456789, d4);
7444     ASSERT_EQUAL_128(0xabcdef0123456789, 0xabcdef0123456789, q5);
7445   }
7446 }
7447 
7448 
TEST(neon_modimm_movi)7449 TEST(neon_modimm_movi) {
7450   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7451 
7452   START();
7453 
7454   __ Movi(v0.V8B(), 0xaa);
7455   __ Movi(v1.V16B(), 0x55);
7456 
7457   __ Movi(d2, 0x00ffff0000ffffff);
7458   __ Movi(v3.V2D(), 0x00ffff0000ffffff);
7459 
7460   __ Movi(v16.V4H(), 0x00, LSL, 0);
7461   __ Movi(v17.V4H(), 0xff, LSL, 8);
7462   __ Movi(v18.V8H(), 0x00, LSL, 0);
7463   __ Movi(v19.V8H(), 0xff, LSL, 8);
7464 
7465   __ Movi(v20.V2S(), 0x00, LSL, 0);
7466   __ Movi(v21.V2S(), 0xff, LSL, 8);
7467   __ Movi(v22.V2S(), 0x00, LSL, 16);
7468   __ Movi(v23.V2S(), 0xff, LSL, 24);
7469 
7470   __ Movi(v24.V4S(), 0xff, LSL, 0);
7471   __ Movi(v25.V4S(), 0x00, LSL, 8);
7472   __ Movi(v26.V4S(), 0xff, LSL, 16);
7473   __ Movi(v27.V4S(), 0x00, LSL, 24);
7474 
7475   __ Movi(v28.V2S(), 0xaa, MSL, 8);
7476   __ Movi(v29.V2S(), 0x55, MSL, 16);
7477   __ Movi(v30.V4S(), 0xff, MSL, 8);
7478   __ Movi(v31.V4S(), 0x00, MSL, 16);
7479 
7480   END();
7481 
7482   if (CAN_RUN()) {
7483     RUN();
7484 
7485     ASSERT_EQUAL_128(0x0, 0xaaaaaaaaaaaaaaaa, q0);
7486     ASSERT_EQUAL_128(0x5555555555555555, 0x5555555555555555, q1);
7487 
7488     ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q2);
7489     ASSERT_EQUAL_128(0x00ffff0000ffffff, 0x00ffff0000ffffff, q3);
7490 
7491     ASSERT_EQUAL_128(0x0, 0x0000000000000000, q16);
7492     ASSERT_EQUAL_128(0x0, 0xff00ff00ff00ff00, q17);
7493     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q18);
7494     ASSERT_EQUAL_128(0xff00ff00ff00ff00, 0xff00ff00ff00ff00, q19);
7495 
7496     ASSERT_EQUAL_128(0x0, 0x0000000000000000, q20);
7497     ASSERT_EQUAL_128(0x0, 0x0000ff000000ff00, q21);
7498     ASSERT_EQUAL_128(0x0, 0x0000000000000000, q22);
7499     ASSERT_EQUAL_128(0x0, 0xff000000ff000000, q23);
7500 
7501     ASSERT_EQUAL_128(0x000000ff000000ff, 0x000000ff000000ff, q24);
7502     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
7503     ASSERT_EQUAL_128(0x00ff000000ff0000, 0x00ff000000ff0000, q26);
7504     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
7505 
7506     ASSERT_EQUAL_128(0x0, 0x0000aaff0000aaff, q28);
7507     ASSERT_EQUAL_128(0x0, 0x0055ffff0055ffff, q29);
7508     ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q30);
7509     ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q31);
7510   }
7511 }
7512 
7513 
TEST(neon_modimm_mvni)7514 TEST(neon_modimm_mvni) {
7515   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7516 
7517   START();
7518 
7519   __ Mvni(v16.V4H(), 0x00, LSL, 0);
7520   __ Mvni(v17.V4H(), 0xff, LSL, 8);
7521   __ Mvni(v18.V8H(), 0x00, LSL, 0);
7522   __ Mvni(v19.V8H(), 0xff, LSL, 8);
7523 
7524   __ Mvni(v20.V2S(), 0x00, LSL, 0);
7525   __ Mvni(v21.V2S(), 0xff, LSL, 8);
7526   __ Mvni(v22.V2S(), 0x00, LSL, 16);
7527   __ Mvni(v23.V2S(), 0xff, LSL, 24);
7528 
7529   __ Mvni(v24.V4S(), 0xff, LSL, 0);
7530   __ Mvni(v25.V4S(), 0x00, LSL, 8);
7531   __ Mvni(v26.V4S(), 0xff, LSL, 16);
7532   __ Mvni(v27.V4S(), 0x00, LSL, 24);
7533 
7534   __ Mvni(v28.V2S(), 0xaa, MSL, 8);
7535   __ Mvni(v29.V2S(), 0x55, MSL, 16);
7536   __ Mvni(v30.V4S(), 0xff, MSL, 8);
7537   __ Mvni(v31.V4S(), 0x00, MSL, 16);
7538 
7539   END();
7540 
7541   if (CAN_RUN()) {
7542     RUN();
7543 
7544     ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q16);
7545     ASSERT_EQUAL_128(0x0, 0x00ff00ff00ff00ff, q17);
7546     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
7547     ASSERT_EQUAL_128(0x00ff00ff00ff00ff, 0x00ff00ff00ff00ff, q19);
7548 
7549     ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q20);
7550     ASSERT_EQUAL_128(0x0, 0xffff00ffffff00ff, q21);
7551     ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q22);
7552     ASSERT_EQUAL_128(0x0, 0x00ffffff00ffffff, q23);
7553 
7554     ASSERT_EQUAL_128(0xffffff00ffffff00, 0xffffff00ffffff00, q24);
7555     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
7556     ASSERT_EQUAL_128(0xff00ffffff00ffff, 0xff00ffffff00ffff, q26);
7557     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q27);
7558 
7559     ASSERT_EQUAL_128(0x0, 0xffff5500ffff5500, q28);
7560     ASSERT_EQUAL_128(0x0, 0xffaa0000ffaa0000, q29);
7561     ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q30);
7562     ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q31);
7563   }
7564 }
7565 
7566 
TEST(neon_modimm_orr)7567 TEST(neon_modimm_orr) {
7568   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7569 
7570   START();
7571 
7572   __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7573   __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7574   __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7575   __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7576   __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7577   __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7578   __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7579   __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7580   __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7581   __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7582   __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7583   __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7584 
7585   __ Orr(v16.V4H(), 0x00, 0);
7586   __ Orr(v17.V4H(), 0xff, 8);
7587   __ Orr(v18.V8H(), 0x00, 0);
7588   __ Orr(v19.V8H(), 0xff, 8);
7589 
7590   __ Orr(v20.V2S(), 0x00, 0);
7591   __ Orr(v21.V2S(), 0xff, 8);
7592   __ Orr(v22.V2S(), 0x00, 16);
7593   __ Orr(v23.V2S(), 0xff, 24);
7594 
7595   __ Orr(v24.V4S(), 0xff, 0);
7596   __ Orr(v25.V4S(), 0x00, 8);
7597   __ Orr(v26.V4S(), 0xff, 16);
7598   __ Orr(v27.V4S(), 0x00, 24);
7599 
7600   END();
7601 
7602   if (CAN_RUN()) {
7603     RUN();
7604 
7605     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
7606     ASSERT_EQUAL_128(0x0, 0xff55ffffff00ffaa, q17);
7607     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
7608     ASSERT_EQUAL_128(0xffaaff55ff00ffaa, 0xff55ffffff00ffaa, q19);
7609 
7610     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
7611     ASSERT_EQUAL_128(0x0, 0x5555ffff0000ffaa, q21);
7612     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
7613     ASSERT_EQUAL_128(0x0, 0xff55ffffff00aaaa, q23);
7614 
7615     ASSERT_EQUAL_128(0x00aaffffff0055ff, 0x5555ffff0000aaff, q24);
7616     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
7617     ASSERT_EQUAL_128(0x00ffff55ffff55aa, 0x55ffffff00ffaaaa, q26);
7618     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
7619   }
7620 }
7621 
TEST(ldr_literal_values_q)7622 TEST(ldr_literal_values_q) {
7623   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7624 
7625   static const uint64_t kHalfValues[] = {0x8000000000000000,
7626                                          0x7fffffffffffffff,
7627                                          0x0000000000000000,
7628                                          0xffffffffffffffff,
7629                                          0x00ff00ff00ff00ff,
7630                                          0x1234567890abcdef};
7631   const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]);
7632   const Register& ref_low64 = x1;
7633   const Register& ref_high64 = x2;
7634   const Register& loaded_low64 = x3;
7635   const Register& loaded_high64 = x4;
7636   const VRegister& tgt = q0;
7637 
7638   START();
7639   __ Mov(x0, 0);
7640 
7641   for (int i = 0; i < card; i++) {
7642     __ Mov(ref_low64, kHalfValues[i]);
7643     for (int j = 0; j < card; j++) {
7644       __ Mov(ref_high64, kHalfValues[j]);
7645       __ Ldr(tgt, kHalfValues[j], kHalfValues[i]);
7646       __ Mov(loaded_low64, tgt.V2D(), 0);
7647       __ Mov(loaded_high64, tgt.V2D(), 1);
7648       __ Cmp(loaded_low64, ref_low64);
7649       __ Ccmp(loaded_high64, ref_high64, NoFlag, eq);
7650       __ Cset(x0, ne);
7651     }
7652   }
7653   END();
7654 
7655   if (CAN_RUN()) {
7656     RUN();
7657 
7658     // If one of the values differs, the trace can be used to identify which
7659     // one.
7660     ASSERT_EQUAL_64(0, x0);
7661   }
7662 }
7663 
TEST(fmov_vec_imm)7664 TEST(fmov_vec_imm) {
7665   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
7666                       CPUFeatures::kFP,
7667                       CPUFeatures::kNEONHalf);
7668 
7669   START();
7670 
7671   __ Fmov(v0.V2S(), 20.0);
7672   __ Fmov(v1.V4S(), 1024.0);
7673 
7674   __ Fmov(v2.V4H(), RawbitsToFloat16(0xC500U));
7675   __ Fmov(v3.V8H(), RawbitsToFloat16(0x4A80U));
7676 
7677   END();
7678   if (CAN_RUN()) {
7679     RUN();
7680 
7681     ASSERT_EQUAL_64(0x41A0000041A00000, d0);
7682     ASSERT_EQUAL_128(0x4480000044800000, 0x4480000044800000, q1);
7683     ASSERT_EQUAL_64(0xC500C500C500C500, d2);
7684     ASSERT_EQUAL_128(0x4A804A804A804A80, 0x4A804A804A804A80, q3);
7685   }
7686 }
7687 
7688 // TODO: add arbitrary values once load literal to Q registers is supported.
TEST(neon_modimm_fmov)7689 TEST(neon_modimm_fmov) {
7690   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
7691 
7692   // Immediates which can be encoded in the instructions.
7693   const float kOne = 1.0f;
7694   const float kPointFive = 0.5f;
7695   const double kMinusThirteen = -13.0;
7696   // Immediates which cannot be encoded in the instructions.
7697   const float kNonImmFP32 = 255.0f;
7698   const double kNonImmFP64 = 12.3456;
7699 
7700   START();
7701   __ Fmov(v11.V2S(), kOne);
7702   __ Fmov(v12.V4S(), kPointFive);
7703   __ Fmov(v22.V2D(), kMinusThirteen);
7704   __ Fmov(v13.V2S(), kNonImmFP32);
7705   __ Fmov(v14.V4S(), kNonImmFP32);
7706   __ Fmov(v23.V2D(), kNonImmFP64);
7707   __ Fmov(v1.V2S(), 0.0);
7708   __ Fmov(v2.V4S(), 0.0);
7709   __ Fmov(v3.V2D(), 0.0);
7710   __ Fmov(v4.V2S(), kFP32PositiveInfinity);
7711   __ Fmov(v5.V4S(), kFP32PositiveInfinity);
7712   __ Fmov(v6.V2D(), kFP64PositiveInfinity);
7713   END();
7714 
7715   if (CAN_RUN()) {
7716     RUN();
7717 
7718     const uint64_t kOne1S = FloatToRawbits(1.0);
7719     const uint64_t kOne2S = (kOne1S << 32) | kOne1S;
7720     const uint64_t kPointFive1S = FloatToRawbits(0.5);
7721     const uint64_t kPointFive2S = (kPointFive1S << 32) | kPointFive1S;
7722     const uint64_t kMinusThirteen1D = DoubleToRawbits(-13.0);
7723     const uint64_t kNonImmFP321S = FloatToRawbits(kNonImmFP32);
7724     const uint64_t kNonImmFP322S = (kNonImmFP321S << 32) | kNonImmFP321S;
7725     const uint64_t kNonImmFP641D = DoubleToRawbits(kNonImmFP64);
7726     const uint64_t kFP32Inf1S = FloatToRawbits(kFP32PositiveInfinity);
7727     const uint64_t kFP32Inf2S = (kFP32Inf1S << 32) | kFP32Inf1S;
7728     const uint64_t kFP64Inf1D = DoubleToRawbits(kFP64PositiveInfinity);
7729 
7730     ASSERT_EQUAL_128(0x0, kOne2S, q11);
7731     ASSERT_EQUAL_128(kPointFive2S, kPointFive2S, q12);
7732     ASSERT_EQUAL_128(kMinusThirteen1D, kMinusThirteen1D, q22);
7733     ASSERT_EQUAL_128(0x0, kNonImmFP322S, q13);
7734     ASSERT_EQUAL_128(kNonImmFP322S, kNonImmFP322S, q14);
7735     ASSERT_EQUAL_128(kNonImmFP641D, kNonImmFP641D, q23);
7736     ASSERT_EQUAL_128(0x0, 0x0, q1);
7737     ASSERT_EQUAL_128(0x0, 0x0, q2);
7738     ASSERT_EQUAL_128(0x0, 0x0, q3);
7739     ASSERT_EQUAL_128(0x0, kFP32Inf2S, q4);
7740     ASSERT_EQUAL_128(kFP32Inf2S, kFP32Inf2S, q5);
7741     ASSERT_EQUAL_128(kFP64Inf1D, kFP64Inf1D, q6);
7742   }
7743 }
7744 
7745 
TEST(neon_perm)7746 TEST(neon_perm) {
7747   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7748 
7749   START();
7750 
7751   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
7752   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
7753 
7754   __ Trn1(v16.V16B(), v0.V16B(), v1.V16B());
7755   __ Trn2(v17.V16B(), v0.V16B(), v1.V16B());
7756   __ Zip1(v18.V16B(), v0.V16B(), v1.V16B());
7757   __ Zip2(v19.V16B(), v0.V16B(), v1.V16B());
7758   __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B());
7759   __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B());
7760 
7761   END();
7762 
7763   if (CAN_RUN()) {
7764     RUN();
7765 
7766     ASSERT_EQUAL_128(0x1101130315051707, 0x19091b0b1d0d1f0f, q16);
7767     ASSERT_EQUAL_128(0x1000120214041606, 0x18081a0a1c0c1e0e, q17);
7768     ASSERT_EQUAL_128(0x180819091a0a1b0b, 0x1c0c1d0d1e0e1f0f, q18);
7769     ASSERT_EQUAL_128(0x1000110112021303, 0x1404150516061707, q19);
7770     ASSERT_EQUAL_128(0x11131517191b1d1f, 0x01030507090b0d0f, q20);
7771     ASSERT_EQUAL_128(0x10121416181a1c1e, 0x00020406080a0c0e, q21);
7772   }
7773 }
7774 
7775 
TEST(neon_copy_dup_element)7776 TEST(neon_copy_dup_element) {
7777   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7778 
7779   START();
7780 
7781   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7782   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7783   __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677);
7784   __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff);
7785   __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef);
7786   __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef);
7787 
7788   __ Dup(v16.V16B(), v0.B(), 0);
7789   __ Dup(v17.V8H(), v1.H(), 7);
7790   __ Dup(v18.V4S(), v1.S(), 3);
7791   __ Dup(v19.V2D(), v0.D(), 0);
7792 
7793   __ Dup(v20.V8B(), v0.B(), 0);
7794   __ Dup(v21.V4H(), v1.H(), 7);
7795   __ Dup(v22.V2S(), v1.S(), 3);
7796 
7797   __ Dup(v23.B(), v0.B(), 0);
7798   __ Dup(v24.H(), v1.H(), 7);
7799   __ Dup(v25.S(), v1.S(), 3);
7800   __ Dup(v26.D(), v0.D(), 0);
7801 
7802   __ Dup(v2.V16B(), v2.B(), 0);
7803   __ Dup(v3.V8H(), v3.H(), 7);
7804   __ Dup(v4.V4S(), v4.S(), 0);
7805   __ Dup(v5.V2D(), v5.D(), 1);
7806 
7807   END();
7808 
7809   if (CAN_RUN()) {
7810     RUN();
7811 
7812     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
7813     ASSERT_EQUAL_128(0xffedffedffedffed, 0xffedffedffedffed, q17);
7814     ASSERT_EQUAL_128(0xffeddccbffeddccb, 0xffeddccbffeddccb, q18);
7815     ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7816 
7817     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
7818     ASSERT_EQUAL_128(0, 0xffedffedffedffed, q21);
7819     ASSERT_EQUAL_128(0, 0xffeddccbffeddccb, q22);
7820 
7821     ASSERT_EQUAL_128(0, 0x00000000000000ff, q23);
7822     ASSERT_EQUAL_128(0, 0x000000000000ffed, q24);
7823     ASSERT_EQUAL_128(0, 0x00000000ffeddccb, q25);
7824     ASSERT_EQUAL_128(0, 0x8899aabbccddeeff, q26);
7825 
7826     ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q2);
7827     ASSERT_EQUAL_128(0x7766776677667766, 0x7766776677667766, q3);
7828     ASSERT_EQUAL_128(0x89abcdef89abcdef, 0x89abcdef89abcdef, q4);
7829     ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q5);
7830   }
7831 }
7832 
7833 
TEST(neon_copy_dup_general)7834 TEST(neon_copy_dup_general) {
7835   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7836 
7837   START();
7838 
7839   __ Mov(x0, 0x0011223344556677);
7840 
7841   __ Dup(v16.V16B(), w0);
7842   __ Dup(v17.V8H(), w0);
7843   __ Dup(v18.V4S(), w0);
7844   __ Dup(v19.V2D(), x0);
7845 
7846   __ Dup(v20.V8B(), w0);
7847   __ Dup(v21.V4H(), w0);
7848   __ Dup(v22.V2S(), w0);
7849 
7850   __ Dup(v2.V16B(), wzr);
7851   __ Dup(v3.V8H(), wzr);
7852   __ Dup(v4.V4S(), wzr);
7853   __ Dup(v5.V2D(), xzr);
7854 
7855   END();
7856 
7857   if (CAN_RUN()) {
7858     RUN();
7859 
7860     ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q16);
7861     ASSERT_EQUAL_128(0x6677667766776677, 0x6677667766776677, q17);
7862     ASSERT_EQUAL_128(0x4455667744556677, 0x4455667744556677, q18);
7863     ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
7864 
7865     ASSERT_EQUAL_128(0, 0x7777777777777777, q20);
7866     ASSERT_EQUAL_128(0, 0x6677667766776677, q21);
7867     ASSERT_EQUAL_128(0, 0x4455667744556677, q22);
7868 
7869     ASSERT_EQUAL_128(0, 0, q2);
7870     ASSERT_EQUAL_128(0, 0, q3);
7871     ASSERT_EQUAL_128(0, 0, q4);
7872     ASSERT_EQUAL_128(0, 0, q5);
7873   }
7874 }
7875 
7876 
TEST(neon_copy_ins_element)7877 TEST(neon_copy_ins_element) {
7878   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7879 
7880   START();
7881 
7882   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7883   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7884   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7885   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7886   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7887   __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7888 
7889   __ Movi(v2.V2D(), 0, 0x0011223344556677);
7890   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7891   __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7892   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7893 
7894   __ Ins(v16.V16B(), 15, v0.V16B(), 0);
7895   __ Ins(v17.V8H(), 0, v1.V8H(), 7);
7896   __ Ins(v18.V4S(), 3, v1.V4S(), 0);
7897   __ Ins(v19.V2D(), 1, v0.V2D(), 0);
7898 
7899   __ Ins(v2.V16B(), 2, v2.V16B(), 0);
7900   __ Ins(v3.V8H(), 0, v3.V8H(), 7);
7901   __ Ins(v4.V4S(), 3, v4.V4S(), 0);
7902   __ Ins(v5.V2D(), 0, v5.V2D(), 1);
7903 
7904   END();
7905 
7906   if (CAN_RUN()) {
7907     RUN();
7908 
7909     ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
7910     ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
7911     ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
7912     ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7913 
7914     ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
7915     ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
7916     ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
7917     ASSERT_EQUAL_128(0, 0, q5);
7918   }
7919 }
7920 
7921 
TEST(neon_copy_mov_element)7922 TEST(neon_copy_mov_element) {
7923   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7924 
7925   START();
7926 
7927   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7928   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7929   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7930   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7931   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7932   __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7933 
7934   __ Movi(v2.V2D(), 0, 0x0011223344556677);
7935   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7936   __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7937   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7938 
7939   __ Mov(v16.V16B(), 15, v0.V16B(), 0);
7940   __ Mov(v17.V8H(), 0, v1.V8H(), 7);
7941   __ Mov(v18.V4S(), 3, v1.V4S(), 0);
7942   __ Mov(v19.V2D(), 1, v0.V2D(), 0);
7943 
7944   __ Mov(v2.V16B(), 2, v2.V16B(), 0);
7945   __ Mov(v3.V8H(), 0, v3.V8H(), 7);
7946   __ Mov(v4.V4S(), 3, v4.V4S(), 0);
7947   __ Mov(v5.V2D(), 0, v5.V2D(), 1);
7948 
7949   END();
7950 
7951   if (CAN_RUN()) {
7952     RUN();
7953 
7954     ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
7955     ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
7956     ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
7957     ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7958 
7959     ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
7960     ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
7961     ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
7962     ASSERT_EQUAL_128(0, 0, q5);
7963   }
7964 }
7965 
7966 
TEST(neon_copy_smov)7967 TEST(neon_copy_smov) {
7968   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7969 
7970   START();
7971 
7972   __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7973 
7974   __ Smov(w0, v0.B(), 7);
7975   __ Smov(w1, v0.B(), 15);
7976 
7977   __ Smov(w2, v0.H(), 0);
7978   __ Smov(w3, v0.H(), 3);
7979 
7980   __ Smov(x4, v0.B(), 7);
7981   __ Smov(x5, v0.B(), 15);
7982 
7983   __ Smov(x6, v0.H(), 0);
7984   __ Smov(x7, v0.H(), 3);
7985 
7986   __ Smov(x16, v0.S(), 0);
7987   __ Smov(x17, v0.S(), 1);
7988 
7989   END();
7990 
7991   if (CAN_RUN()) {
7992     RUN();
7993 
7994     ASSERT_EQUAL_32(0xfffffffe, w0);
7995     ASSERT_EQUAL_32(0x00000001, w1);
7996     ASSERT_EQUAL_32(0x00003210, w2);
7997     ASSERT_EQUAL_32(0xfffffedc, w3);
7998     ASSERT_EQUAL_64(0xfffffffffffffffe, x4);
7999     ASSERT_EQUAL_64(0x0000000000000001, x5);
8000     ASSERT_EQUAL_64(0x0000000000003210, x6);
8001     ASSERT_EQUAL_64(0xfffffffffffffedc, x7);
8002     ASSERT_EQUAL_64(0x0000000076543210, x16);
8003     ASSERT_EQUAL_64(0xfffffffffedcba98, x17);
8004   }
8005 }
8006 
8007 
TEST(neon_copy_umov_mov)8008 TEST(neon_copy_umov_mov) {
8009   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8010 
8011   START();
8012 
8013   __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8014 
8015   __ Umov(w0, v0.B(), 15);
8016   __ Umov(w1, v0.H(), 0);
8017   __ Umov(w2, v0.S(), 3);
8018   __ Umov(x3, v0.D(), 1);
8019 
8020   __ Mov(w4, v0.S(), 3);
8021   __ Mov(x5, v0.D(), 1);
8022 
8023   END();
8024 
8025   if (CAN_RUN()) {
8026     RUN();
8027 
8028     ASSERT_EQUAL_32(0x00000001, w0);
8029     ASSERT_EQUAL_32(0x00003210, w1);
8030     ASSERT_EQUAL_32(0x01234567, w2);
8031     ASSERT_EQUAL_64(0x0123456789abcdef, x3);
8032     ASSERT_EQUAL_32(0x01234567, w4);
8033     ASSERT_EQUAL_64(0x0123456789abcdef, x5);
8034   }
8035 }
8036 
8037 
TEST(neon_copy_ins_general)8038 TEST(neon_copy_ins_general) {
8039   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8040 
8041   START();
8042 
8043   __ Mov(x0, 0x0011223344556677);
8044   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8045   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
8046   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8047   __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8048 
8049   __ Movi(v2.V2D(), 0, 0x0011223344556677);
8050   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8051   __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
8052   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
8053 
8054   __ Ins(v16.V16B(), 15, w0);
8055   __ Ins(v17.V8H(), 0, w0);
8056   __ Ins(v18.V4S(), 3, w0);
8057   __ Ins(v19.V2D(), 0, x0);
8058 
8059   __ Ins(v2.V16B(), 2, w0);
8060   __ Ins(v3.V8H(), 0, w0);
8061   __ Ins(v4.V4S(), 3, w0);
8062   __ Ins(v5.V2D(), 1, x0);
8063 
8064   END();
8065 
8066   if (CAN_RUN()) {
8067     RUN();
8068 
8069     ASSERT_EQUAL_128(0x7723456789abcdef, 0xfedcba9876543210, q16);
8070     ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789ab6677, q17);
8071     ASSERT_EQUAL_128(0x4455667744556677, 0x8899aabbccddeeff, q18);
8072     ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
8073 
8074     ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
8075     ASSERT_EQUAL_128(0, 0x8899aabbccdd6677, q3);
8076     ASSERT_EQUAL_128(0x4455667700000000, 0x0123456789abcdef, q4);
8077     ASSERT_EQUAL_128(0x0011223344556677, 0x0123456789abcdef, q5);
8078   }
8079 }
8080 
8081 
TEST(neon_extract_ext)8082 TEST(neon_extract_ext) {
8083   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8084 
8085   START();
8086 
8087   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8088   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
8089 
8090   __ Movi(v2.V2D(), 0, 0x0011223344556677);
8091   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8092 
8093   __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0);
8094   __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15);
8095   __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8);  // Dest is same as one Src
8096   __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8);  // All reg are the same
8097 
8098   __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0);
8099   __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7);
8100   __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4);  // Dest is same as one Src
8101   __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4);  // All reg are the same
8102 
8103   END();
8104 
8105   if (CAN_RUN()) {
8106     RUN();
8107 
8108     ASSERT_EQUAL_128(0x0011223344556677, 0x8899aabbccddeeff, q16);
8109     ASSERT_EQUAL_128(0xeddccbbaae998877, 0x6655443322110000, q17);
8110     ASSERT_EQUAL_128(0x7766554433221100, 0x0011223344556677, q1);
8111     ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q0);
8112 
8113     ASSERT_EQUAL_128(0, 0x0011223344556677, q18);
8114     ASSERT_EQUAL_128(0, 0x99aabbccddeeff00, q19);
8115     ASSERT_EQUAL_128(0, 0xccddeeff00112233, q2);
8116     ASSERT_EQUAL_128(0, 0xccddeeff8899aabb, q3);
8117   }
8118 }
8119 
8120 
TEST(neon_3different_uaddl)8121 TEST(neon_3different_uaddl) {
8122   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8123 
8124   START();
8125 
8126   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);
8127   __ Movi(v1.V2D(), 0, 0x00010280810e0fff);
8128   __ Movi(v2.V2D(), 0, 0x0101010101010101);
8129 
8130   __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000);
8131   __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000);
8132   __ Movi(v5.V2D(), 0, 0x0000000180008001);
8133   __ Movi(v6.V2D(), 0, 0x000e000ff000ffff);
8134   __ Movi(v7.V2D(), 0, 0x0001000100010001);
8135 
8136   __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000);
8137   __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000);
8138   __ Movi(v18.V2D(), 0, 0x0000000000000001);
8139   __ Movi(v19.V2D(), 0, 0x80000001ffffffff);
8140   __ Movi(v20.V2D(), 0, 0x0000000100000001);
8141 
8142   __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B());
8143 
8144   __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H());
8145   __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H());
8146 
8147   __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S());
8148   __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S());
8149 
8150 
8151   END();
8152 
8153   if (CAN_RUN()) {
8154     RUN();
8155 
8156     ASSERT_EQUAL_128(0x0001000200030081, 0x0082000f00100100, q0);
8157     ASSERT_EQUAL_128(0x0000000100000002, 0x0000800100008002, q3);
8158     ASSERT_EQUAL_128(0x0000000f00000010, 0x0000f00100010000, q4);
8159     ASSERT_EQUAL_128(0x0000000000000001, 0x0000000000000002, q16);
8160     ASSERT_EQUAL_128(0x0000000080000002, 0x0000000100000000, q17);
8161   }
8162 }
8163 
8164 
TEST(neon_3different_addhn_subhn)8165 TEST(neon_3different_addhn_subhn) {
8166   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8167 
8168   START();
8169 
8170   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8171   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8172   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8173   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8174   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8175 
8176   __ Addhn(v16.V8B(), v0.V8H(), v1.V8H());
8177   __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H());
8178   __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H());
8179   __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H());
8180   __ Subhn(v18.V8B(), v0.V8H(), v1.V8H());
8181   __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H());
8182   __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H());
8183   __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H());
8184 
8185   END();
8186 
8187   if (CAN_RUN()) {
8188     RUN();
8189 
8190     ASSERT_EQUAL_128(0x0000ff007fff7fff, 0xff81817f80ff0100, q16);
8191     ASSERT_EQUAL_128(0x0000000080008000, 0xff81817f81ff0201, q17);
8192     ASSERT_EQUAL_128(0x0000ffff80008000, 0xff80817f80ff0100, q18);
8193     ASSERT_EQUAL_128(0x0000000080008000, 0xff81827f81ff0101, q19);
8194   }
8195 }
8196 
TEST(neon_d_only_scalar)8197 TEST(neon_d_only_scalar) {
8198   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8199 
8200   START();
8201 
8202   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8203   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8204   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
8205   __ Movi(v3.V2D(), 0xffffffffffffffff, 2);
8206   __ Movi(v4.V2D(), 0xffffffffffffffff, -2);
8207 
8208   __ Add(d16, d0, d0);
8209   __ Add(d17, d1, d1);
8210   __ Add(d18, d2, d2);
8211   __ Sub(d19, d0, d0);
8212   __ Sub(d20, d0, d1);
8213   __ Sub(d21, d1, d0);
8214   __ Ushl(d22, d0, d3);
8215   __ Ushl(d23, d0, d4);
8216   __ Sshl(d24, d0, d3);
8217   __ Sshl(d25, d0, d4);
8218   __ Ushr(d26, d0, 1);
8219   __ Sshr(d27, d0, 3);
8220   __ Shl(d28, d0, 0);
8221   __ Shl(d29, d0, 16);
8222 
8223   END();
8224 
8225   if (CAN_RUN()) {
8226     RUN();
8227 
8228     ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q16);
8229     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q17);
8230     ASSERT_EQUAL_128(0, 0x2000000020002020, q18);
8231     ASSERT_EQUAL_128(0, 0, q19);
8232     ASSERT_EQUAL_128(0, 0x7000000170017171, q20);
8233     ASSERT_EQUAL_128(0, 0x8ffffffe8ffe8e8f, q21);
8234     ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q22);
8235     ASSERT_EQUAL_128(0, 0x3c0000003c003c3c, q23);
8236     ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q24);
8237     ASSERT_EQUAL_128(0, 0xfc0000003c003c3c, q25);
8238     ASSERT_EQUAL_128(0, 0x7800000078007878, q26);
8239     ASSERT_EQUAL_128(0, 0xfe0000001e001e1e, q27);
8240     ASSERT_EQUAL_128(0, 0xf0000000f000f0f0, q28);
8241     ASSERT_EQUAL_128(0, 0x0000f000f0f00000, q29);
8242   }
8243 }
8244 
8245 
TEST(neon_sqshl_imm_scalar)8246 TEST(neon_sqshl_imm_scalar) {
8247   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8248 
8249   START();
8250 
8251   __ Movi(v0.V2D(), 0x0, 0x7f);
8252   __ Movi(v1.V2D(), 0x0, 0x80);
8253   __ Movi(v2.V2D(), 0x0, 0x01);
8254   __ Sqshl(b16, b0, 1);
8255   __ Sqshl(b17, b1, 1);
8256   __ Sqshl(b18, b2, 1);
8257 
8258   __ Movi(v0.V2D(), 0x0, 0x7fff);
8259   __ Movi(v1.V2D(), 0x0, 0x8000);
8260   __ Movi(v2.V2D(), 0x0, 0x0001);
8261   __ Sqshl(h19, h0, 1);
8262   __ Sqshl(h20, h1, 1);
8263   __ Sqshl(h21, h2, 1);
8264 
8265   __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8266   __ Movi(v1.V2D(), 0x0, 0x80000000);
8267   __ Movi(v2.V2D(), 0x0, 0x00000001);
8268   __ Sqshl(s22, s0, 1);
8269   __ Sqshl(s23, s1, 1);
8270   __ Sqshl(s24, s2, 1);
8271 
8272   __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8273   __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8274   __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8275   __ Sqshl(d25, d0, 1);
8276   __ Sqshl(d26, d1, 1);
8277   __ Sqshl(d27, d2, 1);
8278 
8279   END();
8280 
8281   if (CAN_RUN()) {
8282     RUN();
8283 
8284     ASSERT_EQUAL_128(0, 0x7f, q16);
8285     ASSERT_EQUAL_128(0, 0x80, q17);
8286     ASSERT_EQUAL_128(0, 0x02, q18);
8287 
8288     ASSERT_EQUAL_128(0, 0x7fff, q19);
8289     ASSERT_EQUAL_128(0, 0x8000, q20);
8290     ASSERT_EQUAL_128(0, 0x0002, q21);
8291 
8292     ASSERT_EQUAL_128(0, 0x7fffffff, q22);
8293     ASSERT_EQUAL_128(0, 0x80000000, q23);
8294     ASSERT_EQUAL_128(0, 0x00000002, q24);
8295 
8296     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q25);
8297     ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
8298     ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
8299   }
8300 }
8301 
8302 
TEST(neon_uqshl_imm_scalar)8303 TEST(neon_uqshl_imm_scalar) {
8304   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8305 
8306   START();
8307 
8308   __ Movi(v0.V2D(), 0x0, 0x7f);
8309   __ Movi(v1.V2D(), 0x0, 0x80);
8310   __ Movi(v2.V2D(), 0x0, 0x01);
8311   __ Uqshl(b16, b0, 1);
8312   __ Uqshl(b17, b1, 1);
8313   __ Uqshl(b18, b2, 1);
8314 
8315   __ Movi(v0.V2D(), 0x0, 0x7fff);
8316   __ Movi(v1.V2D(), 0x0, 0x8000);
8317   __ Movi(v2.V2D(), 0x0, 0x0001);
8318   __ Uqshl(h19, h0, 1);
8319   __ Uqshl(h20, h1, 1);
8320   __ Uqshl(h21, h2, 1);
8321 
8322   __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8323   __ Movi(v1.V2D(), 0x0, 0x80000000);
8324   __ Movi(v2.V2D(), 0x0, 0x00000001);
8325   __ Uqshl(s22, s0, 1);
8326   __ Uqshl(s23, s1, 1);
8327   __ Uqshl(s24, s2, 1);
8328 
8329   __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8330   __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8331   __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8332   __ Uqshl(d25, d0, 1);
8333   __ Uqshl(d26, d1, 1);
8334   __ Uqshl(d27, d2, 1);
8335 
8336   END();
8337 
8338   if (CAN_RUN()) {
8339     RUN();
8340 
8341     ASSERT_EQUAL_128(0, 0xfe, q16);
8342     ASSERT_EQUAL_128(0, 0xff, q17);
8343     ASSERT_EQUAL_128(0, 0x02, q18);
8344 
8345     ASSERT_EQUAL_128(0, 0xfffe, q19);
8346     ASSERT_EQUAL_128(0, 0xffff, q20);
8347     ASSERT_EQUAL_128(0, 0x0002, q21);
8348 
8349     ASSERT_EQUAL_128(0, 0xfffffffe, q22);
8350     ASSERT_EQUAL_128(0, 0xffffffff, q23);
8351     ASSERT_EQUAL_128(0, 0x00000002, q24);
8352 
8353     ASSERT_EQUAL_128(0, 0xfffffffffffffffe, q25);
8354     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
8355     ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
8356   }
8357 }
8358 
8359 
TEST(neon_sqshlu_scalar)8360 TEST(neon_sqshlu_scalar) {
8361   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8362 
8363   START();
8364 
8365   __ Movi(v0.V2D(), 0x0, 0x7f);
8366   __ Movi(v1.V2D(), 0x0, 0x80);
8367   __ Movi(v2.V2D(), 0x0, 0x01);
8368   __ Sqshlu(b16, b0, 2);
8369   __ Sqshlu(b17, b1, 2);
8370   __ Sqshlu(b18, b2, 2);
8371 
8372   __ Movi(v0.V2D(), 0x0, 0x7fff);
8373   __ Movi(v1.V2D(), 0x0, 0x8000);
8374   __ Movi(v2.V2D(), 0x0, 0x0001);
8375   __ Sqshlu(h19, h0, 2);
8376   __ Sqshlu(h20, h1, 2);
8377   __ Sqshlu(h21, h2, 2);
8378 
8379   __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8380   __ Movi(v1.V2D(), 0x0, 0x80000000);
8381   __ Movi(v2.V2D(), 0x0, 0x00000001);
8382   __ Sqshlu(s22, s0, 2);
8383   __ Sqshlu(s23, s1, 2);
8384   __ Sqshlu(s24, s2, 2);
8385 
8386   __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8387   __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8388   __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8389   __ Sqshlu(d25, d0, 2);
8390   __ Sqshlu(d26, d1, 2);
8391   __ Sqshlu(d27, d2, 2);
8392 
8393   END();
8394 
8395   if (CAN_RUN()) {
8396     RUN();
8397 
8398     ASSERT_EQUAL_128(0, 0xff, q16);
8399     ASSERT_EQUAL_128(0, 0x00, q17);
8400     ASSERT_EQUAL_128(0, 0x04, q18);
8401 
8402     ASSERT_EQUAL_128(0, 0xffff, q19);
8403     ASSERT_EQUAL_128(0, 0x0000, q20);
8404     ASSERT_EQUAL_128(0, 0x0004, q21);
8405 
8406     ASSERT_EQUAL_128(0, 0xffffffff, q22);
8407     ASSERT_EQUAL_128(0, 0x00000000, q23);
8408     ASSERT_EQUAL_128(0, 0x00000004, q24);
8409 
8410     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
8411     ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
8412     ASSERT_EQUAL_128(0, 0x0000000000000004, q27);
8413   }
8414 }
8415 
8416 
TEST(neon_sshll)8417 TEST(neon_sshll) {
8418   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8419 
8420   START();
8421 
8422   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8423   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8424   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8425 
8426   __ Sshll(v16.V8H(), v0.V8B(), 4);
8427   __ Sshll2(v17.V8H(), v0.V16B(), 4);
8428 
8429   __ Sshll(v18.V4S(), v1.V4H(), 8);
8430   __ Sshll2(v19.V4S(), v1.V8H(), 8);
8431 
8432   __ Sshll(v20.V2D(), v2.V2S(), 16);
8433   __ Sshll2(v21.V2D(), v2.V4S(), 16);
8434 
8435   END();
8436 
8437   if (CAN_RUN()) {
8438     RUN();
8439 
8440     ASSERT_EQUAL_128(0xf800f810fff00000, 0x001007f0f800f810, q16);
8441     ASSERT_EQUAL_128(0x07f000100000fff0, 0xf810f80007f00010, q17);
8442     ASSERT_EQUAL_128(0xffffff0000000000, 0x00000100007fff00, q18);
8443     ASSERT_EQUAL_128(0xff800000ff800100, 0xffffff0000000000, q19);
8444     ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
8445     ASSERT_EQUAL_128(0xffff800000000000, 0xffffffffffff0000, q21);
8446   }
8447 }
8448 
TEST(neon_shll)8449 TEST(neon_shll) {
8450   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8451 
8452   START();
8453 
8454   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8455   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8456   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8457 
8458   __ Shll(v16.V8H(), v0.V8B(), 8);
8459   __ Shll2(v17.V8H(), v0.V16B(), 8);
8460 
8461   __ Shll(v18.V4S(), v1.V4H(), 16);
8462   __ Shll2(v19.V4S(), v1.V8H(), 16);
8463 
8464   __ Shll(v20.V2D(), v2.V2S(), 32);
8465   __ Shll2(v21.V2D(), v2.V4S(), 32);
8466 
8467   END();
8468 
8469   if (CAN_RUN()) {
8470     RUN();
8471 
8472     ASSERT_EQUAL_128(0x80008100ff000000, 0x01007f0080008100, q16);
8473     ASSERT_EQUAL_128(0x7f0001000000ff00, 0x810080007f000100, q17);
8474     ASSERT_EQUAL_128(0xffff000000000000, 0x000100007fff0000, q18);
8475     ASSERT_EQUAL_128(0x8000000080010000, 0xffff000000000000, q19);
8476     ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff00000000, q20);
8477     ASSERT_EQUAL_128(0x8000000000000000, 0xffffffff00000000, q21);
8478   }
8479 }
8480 
TEST(neon_ushll)8481 TEST(neon_ushll) {
8482   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8483 
8484   START();
8485 
8486   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8487   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8488   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8489 
8490   __ Ushll(v16.V8H(), v0.V8B(), 4);
8491   __ Ushll2(v17.V8H(), v0.V16B(), 4);
8492 
8493   __ Ushll(v18.V4S(), v1.V4H(), 8);
8494   __ Ushll2(v19.V4S(), v1.V8H(), 8);
8495 
8496   __ Ushll(v20.V2D(), v2.V2S(), 16);
8497   __ Ushll2(v21.V2D(), v2.V4S(), 16);
8498 
8499   END();
8500 
8501   if (CAN_RUN()) {
8502     RUN();
8503 
8504     ASSERT_EQUAL_128(0x080008100ff00000, 0x001007f008000810, q16);
8505     ASSERT_EQUAL_128(0x07f0001000000ff0, 0x0810080007f00010, q17);
8506     ASSERT_EQUAL_128(0x00ffff0000000000, 0x00000100007fff00, q18);
8507     ASSERT_EQUAL_128(0x0080000000800100, 0x00ffff0000000000, q19);
8508     ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
8509     ASSERT_EQUAL_128(0x0000800000000000, 0x0000ffffffff0000, q21);
8510   }
8511 }
8512 
8513 
TEST(neon_sxtl)8514 TEST(neon_sxtl) {
8515   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8516 
8517   START();
8518 
8519   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8520   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8521   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8522 
8523   __ Sxtl(v16.V8H(), v0.V8B());
8524   __ Sxtl2(v17.V8H(), v0.V16B());
8525 
8526   __ Sxtl(v18.V4S(), v1.V4H());
8527   __ Sxtl2(v19.V4S(), v1.V8H());
8528 
8529   __ Sxtl(v20.V2D(), v2.V2S());
8530   __ Sxtl2(v21.V2D(), v2.V4S());
8531 
8532   END();
8533 
8534   if (CAN_RUN()) {
8535     RUN();
8536 
8537     ASSERT_EQUAL_128(0xff80ff81ffff0000, 0x0001007fff80ff81, q16);
8538     ASSERT_EQUAL_128(0x007f00010000ffff, 0xff81ff80007f0001, q17);
8539     ASSERT_EQUAL_128(0xffffffff00000000, 0x0000000100007fff, q18);
8540     ASSERT_EQUAL_128(0xffff8000ffff8001, 0xffffffff00000000, q19);
8541     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
8542     ASSERT_EQUAL_128(0xffffffff80000000, 0xffffffffffffffff, q21);
8543   }
8544 }
8545 
8546 
TEST(neon_uxtl)8547 TEST(neon_uxtl) {
8548   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8549 
8550   START();
8551 
8552   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8553   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8554   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8555 
8556   __ Uxtl(v16.V8H(), v0.V8B());
8557   __ Uxtl2(v17.V8H(), v0.V16B());
8558 
8559   __ Uxtl(v18.V4S(), v1.V4H());
8560   __ Uxtl2(v19.V4S(), v1.V8H());
8561 
8562   __ Uxtl(v20.V2D(), v2.V2S());
8563   __ Uxtl2(v21.V2D(), v2.V4S());
8564 
8565   END();
8566 
8567   if (CAN_RUN()) {
8568     RUN();
8569 
8570     ASSERT_EQUAL_128(0x0080008100ff0000, 0x0001007f00800081, q16);
8571     ASSERT_EQUAL_128(0x007f0001000000ff, 0x00810080007f0001, q17);
8572     ASSERT_EQUAL_128(0x0000ffff00000000, 0x0000000100007fff, q18);
8573     ASSERT_EQUAL_128(0x0000800000008001, 0x0000ffff00000000, q19);
8574     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
8575     ASSERT_EQUAL_128(0x0000000080000000, 0x00000000ffffffff, q21);
8576   }
8577 }
8578 
8579 
TEST(neon_ssra)8580 TEST(neon_ssra) {
8581   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8582 
8583   START();
8584 
8585   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8586   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8587   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8588   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8589   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8590 
8591   __ Mov(v16.V2D(), v0.V2D());
8592   __ Mov(v17.V2D(), v0.V2D());
8593   __ Mov(v18.V2D(), v1.V2D());
8594   __ Mov(v19.V2D(), v1.V2D());
8595   __ Mov(v20.V2D(), v2.V2D());
8596   __ Mov(v21.V2D(), v2.V2D());
8597   __ Mov(v22.V2D(), v3.V2D());
8598   __ Mov(v23.V2D(), v4.V2D());
8599   __ Mov(v24.V2D(), v3.V2D());
8600   __ Mov(v25.V2D(), v4.V2D());
8601 
8602   __ Ssra(v16.V8B(), v0.V8B(), 4);
8603   __ Ssra(v17.V16B(), v0.V16B(), 4);
8604 
8605   __ Ssra(v18.V4H(), v1.V4H(), 8);
8606   __ Ssra(v19.V8H(), v1.V8H(), 8);
8607 
8608   __ Ssra(v20.V2S(), v2.V2S(), 16);
8609   __ Ssra(v21.V4S(), v2.V4S(), 16);
8610 
8611   __ Ssra(v22.V2D(), v3.V2D(), 32);
8612   __ Ssra(v23.V2D(), v4.V2D(), 32);
8613 
8614   __ Ssra(d24, d3, 48);
8615 
8616   END();
8617 
8618   if (CAN_RUN()) {
8619     RUN();
8620 
8621     ASSERT_EQUAL_128(0x0000000000000000, 0x7879fe0001867879, q16);
8622     ASSERT_EQUAL_128(0x860100fe79788601, 0x7879fe0001867879, q17);
8623     ASSERT_EQUAL_128(0x0000000000000000, 0xfffe00000001807e, q18);
8624     ASSERT_EQUAL_128(0x7f807f81fffe0000, 0xfffe00000001807e, q19);
8625     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
8626     ASSERT_EQUAL_128(0x7fff8000fffffffe, 0x0000000080007ffe, q21);
8627     ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007ffffffe, q22);
8628     ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
8629     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
8630   }
8631 }
8632 
TEST(neon_srsra)8633 TEST(neon_srsra) {
8634   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8635 
8636   START();
8637 
8638   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8639   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8640   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8641   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8642   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8643 
8644   __ Mov(v16.V2D(), v0.V2D());
8645   __ Mov(v17.V2D(), v0.V2D());
8646   __ Mov(v18.V2D(), v1.V2D());
8647   __ Mov(v19.V2D(), v1.V2D());
8648   __ Mov(v20.V2D(), v2.V2D());
8649   __ Mov(v21.V2D(), v2.V2D());
8650   __ Mov(v22.V2D(), v3.V2D());
8651   __ Mov(v23.V2D(), v4.V2D());
8652   __ Mov(v24.V2D(), v3.V2D());
8653   __ Mov(v25.V2D(), v4.V2D());
8654 
8655   __ Srsra(v16.V8B(), v0.V8B(), 4);
8656   __ Srsra(v17.V16B(), v0.V16B(), 4);
8657 
8658   __ Srsra(v18.V4H(), v1.V4H(), 8);
8659   __ Srsra(v19.V8H(), v1.V8H(), 8);
8660 
8661   __ Srsra(v20.V2S(), v2.V2S(), 16);
8662   __ Srsra(v21.V4S(), v2.V4S(), 16);
8663 
8664   __ Srsra(v22.V2D(), v3.V2D(), 32);
8665   __ Srsra(v23.V2D(), v4.V2D(), 32);
8666 
8667   __ Srsra(d24, d3, 48);
8668 
8669   END();
8670 
8671   if (CAN_RUN()) {
8672     RUN();
8673 
8674     ASSERT_EQUAL_128(0x0000000000000000, 0x7879ff0001877879, q16);
8675     ASSERT_EQUAL_128(0x870100ff79788701, 0x7879ff0001877879, q17);
8676     ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000001807f, q18);
8677     ASSERT_EQUAL_128(0x7f807f81ffff0000, 0xffff00000001807f, q19);
8678     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
8679     ASSERT_EQUAL_128(0x7fff8000ffffffff, 0x0000000080007fff, q21);
8680     ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007fffffff, q22);
8681     ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
8682     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
8683   }
8684 }
8685 
TEST(neon_usra)8686 TEST(neon_usra) {
8687   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8688 
8689   START();
8690 
8691   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8692   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8693   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8694   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8695   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8696 
8697   __ Mov(v16.V2D(), v0.V2D());
8698   __ Mov(v17.V2D(), v0.V2D());
8699   __ Mov(v18.V2D(), v1.V2D());
8700   __ Mov(v19.V2D(), v1.V2D());
8701   __ Mov(v20.V2D(), v2.V2D());
8702   __ Mov(v21.V2D(), v2.V2D());
8703   __ Mov(v22.V2D(), v3.V2D());
8704   __ Mov(v23.V2D(), v4.V2D());
8705   __ Mov(v24.V2D(), v3.V2D());
8706   __ Mov(v25.V2D(), v4.V2D());
8707 
8708   __ Usra(v16.V8B(), v0.V8B(), 4);
8709   __ Usra(v17.V16B(), v0.V16B(), 4);
8710 
8711   __ Usra(v18.V4H(), v1.V4H(), 8);
8712   __ Usra(v19.V8H(), v1.V8H(), 8);
8713 
8714   __ Usra(v20.V2S(), v2.V2S(), 16);
8715   __ Usra(v21.V4S(), v2.V4S(), 16);
8716 
8717   __ Usra(v22.V2D(), v3.V2D(), 32);
8718   __ Usra(v23.V2D(), v4.V2D(), 32);
8719 
8720   __ Usra(d24, d3, 48);
8721 
8722   END();
8723 
8724   if (CAN_RUN()) {
8725     RUN();
8726 
8727     ASSERT_EQUAL_128(0x0000000000000000, 0x88890e0001868889, q16);
8728     ASSERT_EQUAL_128(0x8601000e89888601, 0x88890e0001868889, q17);
8729     ASSERT_EQUAL_128(0x0000000000000000, 0x00fe00000001807e, q18);
8730     ASSERT_EQUAL_128(0x8080808100fe0000, 0x00fe00000001807e, q19);
8731     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
8732     ASSERT_EQUAL_128(0x800080000000fffe, 0x0000000080007ffe, q21);
8733     ASSERT_EQUAL_128(0x8000000080000001, 0x800000007ffffffe, q22);
8734     ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
8735     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
8736   }
8737 }
8738 
TEST(neon_ursra)8739 TEST(neon_ursra) {
8740   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8741 
8742   START();
8743 
8744   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8745   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8746   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8747   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8748   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8749 
8750   __ Mov(v16.V2D(), v0.V2D());
8751   __ Mov(v17.V2D(), v0.V2D());
8752   __ Mov(v18.V2D(), v1.V2D());
8753   __ Mov(v19.V2D(), v1.V2D());
8754   __ Mov(v20.V2D(), v2.V2D());
8755   __ Mov(v21.V2D(), v2.V2D());
8756   __ Mov(v22.V2D(), v3.V2D());
8757   __ Mov(v23.V2D(), v4.V2D());
8758   __ Mov(v24.V2D(), v3.V2D());
8759   __ Mov(v25.V2D(), v4.V2D());
8760 
8761   __ Ursra(v16.V8B(), v0.V8B(), 4);
8762   __ Ursra(v17.V16B(), v0.V16B(), 4);
8763 
8764   __ Ursra(v18.V4H(), v1.V4H(), 8);
8765   __ Ursra(v19.V8H(), v1.V8H(), 8);
8766 
8767   __ Ursra(v20.V2S(), v2.V2S(), 16);
8768   __ Ursra(v21.V4S(), v2.V4S(), 16);
8769 
8770   __ Ursra(v22.V2D(), v3.V2D(), 32);
8771   __ Ursra(v23.V2D(), v4.V2D(), 32);
8772 
8773   __ Ursra(d24, d3, 48);
8774 
8775   END();
8776 
8777   if (CAN_RUN()) {
8778     RUN();
8779 
8780     ASSERT_EQUAL_128(0x0000000000000000, 0x88890f0001878889, q16);
8781     ASSERT_EQUAL_128(0x8701000f89888701, 0x88890f0001878889, q17);
8782     ASSERT_EQUAL_128(0x0000000000000000, 0x00ff00000001807f, q18);
8783     ASSERT_EQUAL_128(0x8080808100ff0000, 0x00ff00000001807f, q19);
8784     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
8785     ASSERT_EQUAL_128(0x800080000000ffff, 0x0000000080007fff, q21);
8786     ASSERT_EQUAL_128(0x8000000080000001, 0x800000007fffffff, q22);
8787     ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
8788     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
8789   }
8790 }
8791 
8792 
TEST(neon_uqshl_scalar)8793 TEST(neon_uqshl_scalar) {
8794   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8795 
8796   START();
8797 
8798   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8799   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8800   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8801   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8802 
8803   __ Uqshl(b16, b0, b2);
8804   __ Uqshl(b17, b0, b3);
8805   __ Uqshl(b18, b1, b2);
8806   __ Uqshl(b19, b1, b3);
8807   __ Uqshl(h20, h0, h2);
8808   __ Uqshl(h21, h0, h3);
8809   __ Uqshl(h22, h1, h2);
8810   __ Uqshl(h23, h1, h3);
8811   __ Uqshl(s24, s0, s2);
8812   __ Uqshl(s25, s0, s3);
8813   __ Uqshl(s26, s1, s2);
8814   __ Uqshl(s27, s1, s3);
8815   __ Uqshl(d28, d0, d2);
8816   __ Uqshl(d29, d0, d3);
8817   __ Uqshl(d30, d1, d2);
8818   __ Uqshl(d31, d1, d3);
8819 
8820   END();
8821 
8822   if (CAN_RUN()) {
8823     RUN();
8824 
8825     ASSERT_EQUAL_128(0, 0xff, q16);
8826     ASSERT_EQUAL_128(0, 0x78, q17);
8827     ASSERT_EQUAL_128(0, 0xfe, q18);
8828     ASSERT_EQUAL_128(0, 0x3f, q19);
8829     ASSERT_EQUAL_128(0, 0xffff, q20);
8830     ASSERT_EQUAL_128(0, 0x7878, q21);
8831     ASSERT_EQUAL_128(0, 0xfefe, q22);
8832     ASSERT_EQUAL_128(0, 0x3fbf, q23);
8833     ASSERT_EQUAL_128(0, 0xffffffff, q24);
8834     ASSERT_EQUAL_128(0, 0x78007878, q25);
8835     ASSERT_EQUAL_128(0, 0xfffefefe, q26);
8836     ASSERT_EQUAL_128(0, 0x3fffbfbf, q27);
8837     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
8838     ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8839     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
8840     ASSERT_EQUAL_128(0, 0x3fffffffbfffbfbf, q31);
8841   }
8842 }
8843 
8844 
TEST(neon_sqshl_scalar)8845 TEST(neon_sqshl_scalar) {
8846   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8847 
8848   START();
8849 
8850   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8851   __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8852   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8853   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8854 
8855   __ Sqshl(b16, b0, b2);
8856   __ Sqshl(b17, b0, b3);
8857   __ Sqshl(b18, b1, b2);
8858   __ Sqshl(b19, b1, b3);
8859   __ Sqshl(h20, h0, h2);
8860   __ Sqshl(h21, h0, h3);
8861   __ Sqshl(h22, h1, h2);
8862   __ Sqshl(h23, h1, h3);
8863   __ Sqshl(s24, s0, s2);
8864   __ Sqshl(s25, s0, s3);
8865   __ Sqshl(s26, s1, s2);
8866   __ Sqshl(s27, s1, s3);
8867   __ Sqshl(d28, d0, d2);
8868   __ Sqshl(d29, d0, d3);
8869   __ Sqshl(d30, d1, d2);
8870   __ Sqshl(d31, d1, d3);
8871 
8872   END();
8873 
8874   if (CAN_RUN()) {
8875     RUN();
8876 
8877     ASSERT_EQUAL_128(0, 0x80, q16);
8878     ASSERT_EQUAL_128(0, 0xdf, q17);
8879     ASSERT_EQUAL_128(0, 0x7f, q18);
8880     ASSERT_EQUAL_128(0, 0x20, q19);
8881     ASSERT_EQUAL_128(0, 0x8000, q20);
8882     ASSERT_EQUAL_128(0, 0xdfdf, q21);
8883     ASSERT_EQUAL_128(0, 0x7fff, q22);
8884     ASSERT_EQUAL_128(0, 0x2020, q23);
8885     ASSERT_EQUAL_128(0, 0x80000000, q24);
8886     ASSERT_EQUAL_128(0, 0xdfffdfdf, q25);
8887     ASSERT_EQUAL_128(0, 0x7fffffff, q26);
8888     ASSERT_EQUAL_128(0, 0x20002020, q27);
8889     ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
8890     ASSERT_EQUAL_128(0, 0xdfffffffdfffdfdf, q29);
8891     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
8892     ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
8893   }
8894 }
8895 
8896 
TEST(neon_urshl_scalar)8897 TEST(neon_urshl_scalar) {
8898   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8899 
8900   START();
8901 
8902   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8903   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8904   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8905   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8906 
8907   __ Urshl(d28, d0, d2);
8908   __ Urshl(d29, d0, d3);
8909   __ Urshl(d30, d1, d2);
8910   __ Urshl(d31, d1, d3);
8911 
8912   END();
8913 
8914   if (CAN_RUN()) {
8915     RUN();
8916 
8917     ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q28);
8918     ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8919     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
8920     ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
8921   }
8922 }
8923 
8924 
TEST(neon_srshl_scalar)8925 TEST(neon_srshl_scalar) {
8926   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8927 
8928   START();
8929 
8930   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8931   __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8932   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8933   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8934 
8935   __ Srshl(d28, d0, d2);
8936   __ Srshl(d29, d0, d3);
8937   __ Srshl(d30, d1, d2);
8938   __ Srshl(d31, d1, d3);
8939 
8940   END();
8941 
8942   if (CAN_RUN()) {
8943     RUN();
8944 
8945     ASSERT_EQUAL_128(0, 0x7fffffff7fff7f7e, q28);
8946     ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
8947     ASSERT_EQUAL_128(0, 0x8000000080008080, q30);
8948     ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
8949   }
8950 }
8951 
8952 
TEST(neon_uqrshl_scalar)8953 TEST(neon_uqrshl_scalar) {
8954   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8955 
8956   START();
8957 
8958   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8959   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8960   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8961   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8962 
8963   __ Uqrshl(b16, b0, b2);
8964   __ Uqrshl(b17, b0, b3);
8965   __ Uqrshl(b18, b1, b2);
8966   __ Uqrshl(b19, b1, b3);
8967   __ Uqrshl(h20, h0, h2);
8968   __ Uqrshl(h21, h0, h3);
8969   __ Uqrshl(h22, h1, h2);
8970   __ Uqrshl(h23, h1, h3);
8971   __ Uqrshl(s24, s0, s2);
8972   __ Uqrshl(s25, s0, s3);
8973   __ Uqrshl(s26, s1, s2);
8974   __ Uqrshl(s27, s1, s3);
8975   __ Uqrshl(d28, d0, d2);
8976   __ Uqrshl(d29, d0, d3);
8977   __ Uqrshl(d30, d1, d2);
8978   __ Uqrshl(d31, d1, d3);
8979 
8980   END();
8981 
8982   if (CAN_RUN()) {
8983     RUN();
8984 
8985     ASSERT_EQUAL_128(0, 0xff, q16);
8986     ASSERT_EQUAL_128(0, 0x78, q17);
8987     ASSERT_EQUAL_128(0, 0xfe, q18);
8988     ASSERT_EQUAL_128(0, 0x40, q19);
8989     ASSERT_EQUAL_128(0, 0xffff, q20);
8990     ASSERT_EQUAL_128(0, 0x7878, q21);
8991     ASSERT_EQUAL_128(0, 0xfefe, q22);
8992     ASSERT_EQUAL_128(0, 0x3fc0, q23);
8993     ASSERT_EQUAL_128(0, 0xffffffff, q24);
8994     ASSERT_EQUAL_128(0, 0x78007878, q25);
8995     ASSERT_EQUAL_128(0, 0xfffefefe, q26);
8996     ASSERT_EQUAL_128(0, 0x3fffbfc0, q27);
8997     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
8998     ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8999     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
9000     ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
9001   }
9002 }
9003 
9004 
TEST(neon_sqrshl_scalar)9005 TEST(neon_sqrshl_scalar) {
9006   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9007 
9008   START();
9009 
9010   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
9011   __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
9012   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
9013   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
9014 
9015   __ Sqrshl(b16, b0, b2);
9016   __ Sqrshl(b17, b0, b3);
9017   __ Sqrshl(b18, b1, b2);
9018   __ Sqrshl(b19, b1, b3);
9019   __ Sqrshl(h20, h0, h2);
9020   __ Sqrshl(h21, h0, h3);
9021   __ Sqrshl(h22, h1, h2);
9022   __ Sqrshl(h23, h1, h3);
9023   __ Sqrshl(s24, s0, s2);
9024   __ Sqrshl(s25, s0, s3);
9025   __ Sqrshl(s26, s1, s2);
9026   __ Sqrshl(s27, s1, s3);
9027   __ Sqrshl(d28, d0, d2);
9028   __ Sqrshl(d29, d0, d3);
9029   __ Sqrshl(d30, d1, d2);
9030   __ Sqrshl(d31, d1, d3);
9031 
9032   END();
9033 
9034   if (CAN_RUN()) {
9035     RUN();
9036 
9037     ASSERT_EQUAL_128(0, 0x80, q16);
9038     ASSERT_EQUAL_128(0, 0xe0, q17);
9039     ASSERT_EQUAL_128(0, 0x7f, q18);
9040     ASSERT_EQUAL_128(0, 0x20, q19);
9041     ASSERT_EQUAL_128(0, 0x8000, q20);
9042     ASSERT_EQUAL_128(0, 0xdfe0, q21);
9043     ASSERT_EQUAL_128(0, 0x7fff, q22);
9044     ASSERT_EQUAL_128(0, 0x2020, q23);
9045     ASSERT_EQUAL_128(0, 0x80000000, q24);
9046     ASSERT_EQUAL_128(0, 0xdfffdfe0, q25);
9047     ASSERT_EQUAL_128(0, 0x7fffffff, q26);
9048     ASSERT_EQUAL_128(0, 0x20002020, q27);
9049     ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
9050     ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
9051     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
9052     ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
9053   }
9054 }
9055 
9056 
TEST(neon_uqadd_scalar)9057 TEST(neon_uqadd_scalar) {
9058   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9059 
9060   START();
9061 
9062   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9063   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9064   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9065 
9066   __ Uqadd(b16, b0, b0);
9067   __ Uqadd(b17, b1, b1);
9068   __ Uqadd(b18, b2, b2);
9069   __ Uqadd(h19, h0, h0);
9070   __ Uqadd(h20, h1, h1);
9071   __ Uqadd(h21, h2, h2);
9072   __ Uqadd(s22, s0, s0);
9073   __ Uqadd(s23, s1, s1);
9074   __ Uqadd(s24, s2, s2);
9075   __ Uqadd(d25, d0, d0);
9076   __ Uqadd(d26, d1, d1);
9077   __ Uqadd(d27, d2, d2);
9078 
9079   END();
9080 
9081   if (CAN_RUN()) {
9082     RUN();
9083 
9084     ASSERT_EQUAL_128(0, 0xff, q16);
9085     ASSERT_EQUAL_128(0, 0xfe, q17);
9086     ASSERT_EQUAL_128(0, 0x20, q18);
9087     ASSERT_EQUAL_128(0, 0xffff, q19);
9088     ASSERT_EQUAL_128(0, 0xfefe, q20);
9089     ASSERT_EQUAL_128(0, 0x2020, q21);
9090     ASSERT_EQUAL_128(0, 0xffffffff, q22);
9091     ASSERT_EQUAL_128(0, 0xfffefefe, q23);
9092     ASSERT_EQUAL_128(0, 0x20002020, q24);
9093     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
9094     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q26);
9095     ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
9096   }
9097 }
9098 
9099 
TEST(neon_sqadd_scalar)9100 TEST(neon_sqadd_scalar) {
9101   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9102 
9103   START();
9104 
9105   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181);
9106   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9107   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9108 
9109   __ Sqadd(b16, b0, b0);
9110   __ Sqadd(b17, b1, b1);
9111   __ Sqadd(b18, b2, b2);
9112   __ Sqadd(h19, h0, h0);
9113   __ Sqadd(h20, h1, h1);
9114   __ Sqadd(h21, h2, h2);
9115   __ Sqadd(s22, s0, s0);
9116   __ Sqadd(s23, s1, s1);
9117   __ Sqadd(s24, s2, s2);
9118   __ Sqadd(d25, d0, d0);
9119   __ Sqadd(d26, d1, d1);
9120   __ Sqadd(d27, d2, d2);
9121 
9122   END();
9123 
9124   if (CAN_RUN()) {
9125     RUN();
9126 
9127     ASSERT_EQUAL_128(0, 0x80, q16);
9128     ASSERT_EQUAL_128(0, 0x7f, q17);
9129     ASSERT_EQUAL_128(0, 0x20, q18);
9130     ASSERT_EQUAL_128(0, 0x8000, q19);
9131     ASSERT_EQUAL_128(0, 0x7fff, q20);
9132     ASSERT_EQUAL_128(0, 0x2020, q21);
9133     ASSERT_EQUAL_128(0, 0x80000000, q22);
9134     ASSERT_EQUAL_128(0, 0x7fffffff, q23);
9135     ASSERT_EQUAL_128(0, 0x20002020, q24);
9136     ASSERT_EQUAL_128(0, 0x8000000000000000, q25);
9137     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
9138     ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
9139   }
9140 }
9141 
9142 
TEST(neon_uqsub_scalar)9143 TEST(neon_uqsub_scalar) {
9144   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9145 
9146   START();
9147 
9148   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9149   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9150 
9151   __ Uqsub(b16, b0, b0);
9152   __ Uqsub(b17, b0, b1);
9153   __ Uqsub(b18, b1, b0);
9154   __ Uqsub(h19, h0, h0);
9155   __ Uqsub(h20, h0, h1);
9156   __ Uqsub(h21, h1, h0);
9157   __ Uqsub(s22, s0, s0);
9158   __ Uqsub(s23, s0, s1);
9159   __ Uqsub(s24, s1, s0);
9160   __ Uqsub(d25, d0, d0);
9161   __ Uqsub(d26, d0, d1);
9162   __ Uqsub(d27, d1, d0);
9163 
9164   END();
9165 
9166   if (CAN_RUN()) {
9167     RUN();
9168 
9169     ASSERT_EQUAL_128(0, 0, q16);
9170     ASSERT_EQUAL_128(0, 0x71, q17);
9171     ASSERT_EQUAL_128(0, 0, q18);
9172 
9173     ASSERT_EQUAL_128(0, 0, q19);
9174     ASSERT_EQUAL_128(0, 0x7171, q20);
9175     ASSERT_EQUAL_128(0, 0, q21);
9176 
9177     ASSERT_EQUAL_128(0, 0, q22);
9178     ASSERT_EQUAL_128(0, 0x70017171, q23);
9179     ASSERT_EQUAL_128(0, 0, q24);
9180 
9181     ASSERT_EQUAL_128(0, 0, q25);
9182     ASSERT_EQUAL_128(0, 0x7000000170017171, q26);
9183     ASSERT_EQUAL_128(0, 0, q27);
9184   }
9185 }
9186 
9187 
TEST(neon_sqsub_scalar)9188 TEST(neon_sqsub_scalar) {
9189   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9190 
9191   START();
9192 
9193   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9194   __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e);
9195 
9196   __ Sqsub(b16, b0, b0);
9197   __ Sqsub(b17, b0, b1);
9198   __ Sqsub(b18, b1, b0);
9199   __ Sqsub(h19, h0, h0);
9200   __ Sqsub(h20, h0, h1);
9201   __ Sqsub(h21, h1, h0);
9202   __ Sqsub(s22, s0, s0);
9203   __ Sqsub(s23, s0, s1);
9204   __ Sqsub(s24, s1, s0);
9205   __ Sqsub(d25, d0, d0);
9206   __ Sqsub(d26, d0, d1);
9207   __ Sqsub(d27, d1, d0);
9208 
9209   END();
9210 
9211   if (CAN_RUN()) {
9212     RUN();
9213 
9214     ASSERT_EQUAL_128(0, 0, q16);
9215     ASSERT_EQUAL_128(0, 0x80, q17);
9216     ASSERT_EQUAL_128(0, 0x7f, q18);
9217 
9218     ASSERT_EQUAL_128(0, 0, q19);
9219     ASSERT_EQUAL_128(0, 0x8000, q20);
9220     ASSERT_EQUAL_128(0, 0x7fff, q21);
9221 
9222     ASSERT_EQUAL_128(0, 0, q22);
9223     ASSERT_EQUAL_128(0, 0x80000000, q23);
9224     ASSERT_EQUAL_128(0, 0x7fffffff, q24);
9225 
9226     ASSERT_EQUAL_128(0, 0, q25);
9227     ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
9228     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
9229   }
9230 }
9231 
9232 
TEST(neon_fmla_fmls)9233 TEST(neon_fmla_fmls) {
9234   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9235 
9236   START();
9237   __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000);
9238   __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000);
9239   __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000);
9240   __ Mov(v16.V16B(), v0.V16B());
9241   __ Mov(v17.V16B(), v0.V16B());
9242   __ Mov(v18.V16B(), v0.V16B());
9243   __ Mov(v19.V16B(), v0.V16B());
9244   __ Mov(v20.V16B(), v0.V16B());
9245   __ Mov(v21.V16B(), v0.V16B());
9246 
9247   __ Fmla(v16.V2S(), v1.V2S(), v2.V2S());
9248   __ Fmla(v17.V4S(), v1.V4S(), v2.V4S());
9249   __ Fmla(v18.V2D(), v1.V2D(), v2.V2D());
9250   __ Fmls(v19.V2S(), v1.V2S(), v2.V2S());
9251   __ Fmls(v20.V4S(), v1.V4S(), v2.V4S());
9252   __ Fmls(v21.V2D(), v1.V2D(), v2.V2D());
9253   END();
9254 
9255   if (CAN_RUN()) {
9256     RUN();
9257 
9258     ASSERT_EQUAL_128(0x0000000000000000, 0x7fc00000ff800000, q16);
9259     ASSERT_EQUAL_128(0x40400000ffffffff, 0x7fc00000ff800000, q17);
9260     ASSERT_EQUAL_128(0x3f9800015f8003f7, 0x41000000000000fe, q18);
9261     ASSERT_EQUAL_128(0x0000000000000000, 0x7fc000007f800000, q19);
9262     ASSERT_EQUAL_128(0xbf800000ffffffff, 0x7fc000007f800000, q20);
9263     ASSERT_EQUAL_128(0xbf8000023f0007ee, 0x40fffffffffffe04, q21);
9264   }
9265 }
9266 
9267 
TEST(neon_fmla_h)9268 TEST(neon_fmla_h) {
9269   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9270                       CPUFeatures::kFP,
9271                       CPUFeatures::kNEONHalf);
9272 
9273   START();
9274   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9275   __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9276   __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9277   __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9278   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9279   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9280   __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9281   __ Mov(v16.V2D(), v0.V2D());
9282   __ Mov(v17.V2D(), v0.V2D());
9283   __ Mov(v18.V2D(), v4.V2D());
9284   __ Mov(v19.V2D(), v5.V2D());
9285   __ Mov(v20.V2D(), v0.V2D());
9286   __ Mov(v21.V2D(), v0.V2D());
9287   __ Mov(v22.V2D(), v4.V2D());
9288   __ Mov(v23.V2D(), v5.V2D());
9289 
9290   __ Fmla(v16.V8H(), v0.V8H(), v1.V8H());
9291   __ Fmla(v17.V8H(), v2.V8H(), v3.V8H());
9292   __ Fmla(v18.V8H(), v2.V8H(), v6.V8H());
9293   __ Fmla(v19.V8H(), v3.V8H(), v6.V8H());
9294   __ Fmla(v20.V4H(), v0.V4H(), v1.V4H());
9295   __ Fmla(v21.V4H(), v2.V4H(), v3.V4H());
9296   __ Fmla(v22.V4H(), v2.V4H(), v6.V4H());
9297   __ Fmla(v23.V4H(), v3.V4H(), v6.V4H());
9298   END();
9299 
9300   if (CAN_RUN()) {
9301     RUN();
9302 
9303     ASSERT_EQUAL_128(0x55c055c055c055c0, 0x55c055c055c055c0, v16);
9304     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v17);
9305     ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
9306     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
9307     ASSERT_EQUAL_128(0, 0x55c055c055c055c0, v20);
9308     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v21);
9309     ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
9310     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
9311   }
9312 }
9313 
9314 
TEST(neon_fmls_h)9315 TEST(neon_fmls_h) {
9316   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9317                       CPUFeatures::kFP,
9318                       CPUFeatures::kNEONHalf);
9319 
9320   START();
9321   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9322   __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9323   __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9324   __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9325   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9326   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9327   __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9328   __ Mov(v16.V2D(), v0.V2D());
9329   __ Mov(v17.V2D(), v0.V2D());
9330   __ Mov(v18.V2D(), v4.V2D());
9331   __ Mov(v19.V2D(), v5.V2D());
9332   __ Mov(v20.V2D(), v0.V2D());
9333   __ Mov(v21.V2D(), v0.V2D());
9334   __ Mov(v22.V2D(), v4.V2D());
9335   __ Mov(v23.V2D(), v5.V2D());
9336 
9337   __ Fmls(v16.V8H(), v0.V8H(), v1.V8H());
9338   __ Fmls(v17.V8H(), v2.V8H(), v3.V8H());
9339   __ Fmls(v18.V8H(), v2.V8H(), v6.V8H());
9340   __ Fmls(v19.V8H(), v3.V8H(), v6.V8H());
9341   __ Fmls(v20.V4H(), v0.V4H(), v1.V4H());
9342   __ Fmls(v21.V4H(), v2.V4H(), v3.V4H());
9343   __ Fmls(v22.V4H(), v2.V4H(), v6.V4H());
9344   __ Fmls(v23.V4H(), v3.V4H(), v6.V4H());
9345   END();
9346 
9347   if (CAN_RUN()) {
9348     RUN();
9349 
9350     ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v16);
9351     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v17);
9352     ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
9353     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
9354     ASSERT_EQUAL_128(0, 0xd580d580d580d580, v20);
9355     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v21);
9356     ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
9357     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
9358   }
9359 }
9360 
9361 
TEST(neon_fhm)9362 TEST(neon_fhm) {
9363   // Test basic operation of fmlal{2} and fmlsl{2}. The simulator tests have
9364   // more comprehensive input sets.
9365   SETUP_WITH_FEATURES(CPUFeatures::kFP,
9366                       CPUFeatures::kNEON,
9367                       CPUFeatures::kNEONHalf,
9368                       CPUFeatures::kFHM);
9369 
9370   START();
9371   // Test multiplications:
9372   //        v30                               v31
9373   //  [0]   65504 (max normal)          *     65504 (max normal)
9374   //  [1]   -1                          *     0
9375   //  [2]   2^-24 (min subnormal)       *     2^-24 (min subnormal)
9376   //  [3]   -2^-24 (min subnormal)      *     65504 (max normal)
9377   //  [4]   6.10e-5 (min normal)        *     0.99...
9378   //  [5]   0                           *     -0
9379   //  [6]   -0                          *     0
9380   //  [7]   -Inf                        *     -Inf
9381   __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9382   __ Movi(v31.V8H(), 0xfc00000080003bff, 0x7bff000100007bff);
9383 
9384   // Accumulators for use with Fmlal{2}:
9385   // v0.S[0] = 384
9386   // v0.S[1] = -0
9387   __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x8000000043c00000);
9388   // v1.S[0] = -(2^-48 + 2^-71)
9389   // v1.S[1] = 0
9390   __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7800001);
9391   // v2.S[0] = 128
9392   // v2.S[1] = 0
9393   // v2.S[2] = 1
9394   // v2.S[3] = 1
9395   __ Movi(v2.V4S(), 0x3f8000003f800000, 0x0000000043000000);
9396   // v3.S[0] = 0
9397   // v3.S[1] = -0
9398   // v3.S[2] = -0
9399   // v3.S[3] = 0
9400   __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9401   // For Fmlsl{2}, we simply negate the accumulators above so that the Fmlsl{2}
9402   // results are just the negation of the Fmlal{2} results.
9403   __ Fneg(v4.V4S(), v0.V4S());
9404   __ Fneg(v5.V4S(), v1.V4S());
9405   __ Fneg(v6.V4S(), v2.V4S());
9406   __ Fneg(v7.V4S(), v3.V4S());
9407 
9408   __ Fmlal(v0.V2S(), v30.V2H(), v31.V2H());
9409   __ Fmlal2(v1.V2S(), v30.V2H(), v31.V2H());
9410   __ Fmlal(v2.V4S(), v30.V4H(), v31.V4H());
9411   __ Fmlal2(v3.V4S(), v30.V4H(), v31.V4H());
9412 
9413   __ Fmlsl(v4.V2S(), v30.V2H(), v31.V2H());
9414   __ Fmlsl2(v5.V2S(), v30.V2H(), v31.V2H());
9415   __ Fmlsl(v6.V4S(), v30.V4H(), v31.V4H());
9416   __ Fmlsl2(v7.V4S(), v30.V4H(), v31.V4H());
9417   END();
9418 
9419   if (CAN_RUN()) {
9420     RUN();
9421 
9422     // Fmlal(2S)
9423     // v0.S[0] = 384 + (65504 * 65504) = 4290774528 (rounded from 4290774400)
9424     // v0.S[1] = -0 + (-1 * 0) = -0
9425     ASSERT_EQUAL_128(0x0000000000000000, 0x800000004f7fc006, v0);
9426     // Fmlal2(2S)
9427     // v1.S[0] = -(2^-48 + 2^-71) + (2^-24 * 2^-24) = -2^-71
9428     // v1.S[1] = 0 + (-2^-24 * 65504) = -0.003904...
9429     ASSERT_EQUAL_128(0x0000000000000000, 0xbb7fe0009c000000, v1);
9430     // Fmlal(4S)
9431     // v2.S[0] = 128 + (65504 * 65504) = 4290774016 (rounded from 4290774144)
9432     // v2.S[1] = 0 + (-1 * 0) = 0
9433     // v2.S[2] = 1 + (2^-24 * 2^-24) = 1 (rounded)
9434     // v2.S[3] = 1 + (-2^-24 * 65504) = 0.996...
9435     ASSERT_EQUAL_128(0x3f7f00203f800000, 0x000000004f7fc004, v2);
9436     // Fmlal2(4S)
9437     // v3.S[0] = 0 + (6.103516e-5 * 0.99...) = 6.100535e-5
9438     // v3.S[1] = -0 + (0 * -0) = -0
9439     // v3.S[2] = -0 + (-0 * 0) = -0
9440     // v3.S[3] = 0 + (-Inf * -Inf) = Inf
9441     ASSERT_EQUAL_128(0x7f80000080000000, 0x80000000387fe000, v3);
9442 
9443     // Fmlsl results are mostly the same, but negated.
9444     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000cf7fc006, v4);
9445     ASSERT_EQUAL_128(0x0000000000000000, 0x3b7fe0001c000000, v5);
9446     // In this case: v6.S[1] = 0 - (0 * -0) = 0
9447     ASSERT_EQUAL_128(0xbf7f0020bf800000, 0x00000000cf7fc004, v6);
9448     ASSERT_EQUAL_128(0xff80000000000000, 0x00000000b87fe000, v7);
9449   }
9450 }
9451 
9452 
TEST(neon_byelement_fhm)9453 TEST(neon_byelement_fhm) {
9454   // Test basic operation of fmlal{2} and fmlsl{2} (by element). The simulator
9455   // tests have more comprehensive input sets.
9456   SETUP_WITH_FEATURES(CPUFeatures::kFP,
9457                       CPUFeatures::kNEON,
9458                       CPUFeatures::kNEONHalf,
9459                       CPUFeatures::kFHM);
9460 
9461   START();
9462   // Set up multiplication inputs.
9463   //
9464   // v30.H[0] = 65504 (max normal)
9465   // v30.H[1] = -1
9466   // v30.H[2] = 2^-24 (min subnormal)
9467   // v30.H[3] = -2^-24 (min subnormal)
9468   // v30.H[4] = 6.10e-5 (min normal)
9469   // v30.H[5] = 0
9470   // v30.H[6] = -0
9471   // v30.H[7] = -Inf
9472   __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9473 
9474   // Each test instruction should only use one lane of vm, so set up unique
9475   // registers with poison values in other lanes. The poison NaN avoids the
9476   // default NaN (so it shouldn't be encountered accidentally), but is otherwise
9477   // arbitrary.
9478   VRegister poison = v29;
9479   __ Movi(v29.V8H(), 0x7f417f417f417f41, 0x7f417f417f417f41);
9480   // v31.H[0,2,4,...]: 0.9995117 (the value just below 1)
9481   // v31.H[1,3,5,...]: 1.000977 (the value just above 1)
9482   __ Movi(v31.V8H(), 0x3bff3c013bff3c01, 0x3bff3c013bff3c01);
9483   // Set up [v8,v15] as vm inputs.
9484   for (int i = 0; i <= 7; i++) {
9485     VRegister vm(i + 8);
9486     __ Mov(vm, poison);
9487     __ Ins(vm.V8H(), i, v31.V8H(), i);
9488   }
9489 
9490   // Accumulators for use with Fmlal{2}:
9491   // v0.S[0] = 2^-8
9492   // v0.S[1] = 1
9493   __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x3f8000003b800000);
9494   // v1.S[0] = -1.5 * 2^-49
9495   // v1.S[1] = 0
9496   __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7400000);
9497   // v2.S[0] = 0
9498   // v2.S[1] = 2^14
9499   // v2.S[2] = 1.5 * 2^-48
9500   // v2.S[3] = Inf
9501   __ Movi(v2.V4S(), 0x7f80000027c00000, 0xc680000000000000);
9502   // v3.S[0] = 0
9503   // v3.S[1] = -0
9504   // v3.S[2] = -0
9505   // v3.S[3] = 0
9506   __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9507   // For Fmlsl{2}, we simply negate the accumulators above so that the Fmlsl{2}
9508   // results are just the negation of the Fmlal{2} results.
9509   __ Fneg(v4.V4S(), v0.V4S());
9510   __ Fneg(v5.V4S(), v1.V4S());
9511   __ Fneg(v6.V4S(), v2.V4S());
9512   __ Fneg(v7.V4S(), v3.V4S());
9513 
9514   __ Fmlal(v0.V2S(), v30.V2H(), v8.H(), 0);
9515   __ Fmlal2(v1.V2S(), v30.V2H(), v9.H(), 1);
9516   __ Fmlal(v2.V4S(), v30.V4H(), v10.H(), 2);
9517   __ Fmlal2(v3.V4S(), v30.V4H(), v11.H(), 3);
9518 
9519   __ Fmlsl(v4.V2S(), v30.V2H(), v12.H(), 4);
9520   __ Fmlsl2(v5.V2S(), v30.V2H(), v13.H(), 5);
9521   __ Fmlsl(v6.V4S(), v30.V4H(), v14.H(), 6);
9522   __ Fmlsl2(v7.V4S(), v30.V4H(), v15.H(), 7);
9523   END();
9524 
9525   if (CAN_RUN()) {
9526     RUN();
9527 
9528     // Fmlal(2S)
9529     // v0.S[0] = 2^-8 + (65504 * 1.000977) = 65567.96875 (rounded)
9530     // v0.S[1] = 1 + (-1 * 1.000977) = -0.000976...
9531     ASSERT_EQUAL_128(0x0000000000000000, 0xba80000047800ffc, v0);
9532     // Fmlal2(2S)
9533     // v1.S[0] = (-1.5 * 2^-49) + (2^-24 * 0.9995117) = 5.958e-8 (rounded)
9534     // v1.S[1] = 0 + (-2^-24 * 0.9995117) = -5.958e-8
9535     ASSERT_EQUAL_128(0x0000000000000000, 0xb37fe000337fdfff, v1);
9536     // Fmlal(4S)
9537     // v2.S[0] = 0 + (65504 * 1.000977) = 65566.96875
9538     // v2.S[1] = 2^14 + (-1 * 1.000977) = -16385 (rounded from -16385.5)
9539     // v2.S[2] = (1.5 * 2^-48) + (2^-24 * 1.000977) = 5.966e-8 (rounded up)
9540     // v2.S[3] = Inf + (-2^-24 * 1.000977) = Inf
9541     ASSERT_EQUAL_128(0x7f80000033802001, 0xc680020047800ffc, v2);
9542     // Fmlal2(4S)
9543     // v3.S[0] = 0 + (6.103516e-5 * 0.9995117) = 6.100535e-5
9544     // v3.S[1] = -0 + (0 * 0.9995117) = 0
9545     // v3.S[2] = -0 + (-0 * 0.9995117) = -0
9546     // v3.S[3] = 0 + (-Inf * 0.9995117) = -Inf
9547     ASSERT_EQUAL_128(0xff80000080000000, 0x00000000387fe000, v3);
9548 
9549     // Fmlsl results are mostly the same, but negated.
9550     ASSERT_EQUAL_128(0x0000000000000000, 0x3a800000c7800ffc, v4);
9551     ASSERT_EQUAL_128(0x0000000000000000, 0x337fe000b37fdfff, v5);
9552     ASSERT_EQUAL_128(0xff800000b3802001, 0x46800200c7800ffc, v6);
9553     // In this case: v7.S[2] = 0 - (-0 * 0.9995117) = 0
9554     ASSERT_EQUAL_128(0x7f80000000000000, 0x00000000b87fe000, v7);
9555   }
9556 }
9557 
9558 
TEST(neon_fmulx_scalar)9559 TEST(neon_fmulx_scalar) {
9560   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9561 
9562   START();
9563   __ Fmov(s0, 2.0);
9564   __ Fmov(s1, 0.5);
9565   __ Fmov(s2, 0.0);
9566   __ Fmov(s3, -0.0);
9567   __ Fmov(s4, kFP32PositiveInfinity);
9568   __ Fmov(s5, kFP32NegativeInfinity);
9569   __ Fmulx(s16, s0, s1);
9570   __ Fmulx(s17, s2, s4);
9571   __ Fmulx(s18, s2, s5);
9572   __ Fmulx(s19, s3, s4);
9573   __ Fmulx(s20, s3, s5);
9574 
9575   __ Fmov(d21, 2.0);
9576   __ Fmov(d22, 0.5);
9577   __ Fmov(d23, 0.0);
9578   __ Fmov(d24, -0.0);
9579   __ Fmov(d25, kFP64PositiveInfinity);
9580   __ Fmov(d26, kFP64NegativeInfinity);
9581   __ Fmulx(d27, d21, d22);
9582   __ Fmulx(d28, d23, d25);
9583   __ Fmulx(d29, d23, d26);
9584   __ Fmulx(d30, d24, d25);
9585   __ Fmulx(d31, d24, d26);
9586   END();
9587 
9588   if (CAN_RUN()) {
9589     RUN();
9590 
9591     ASSERT_EQUAL_FP32(1.0, s16);
9592     ASSERT_EQUAL_FP32(2.0, s17);
9593     ASSERT_EQUAL_FP32(-2.0, s18);
9594     ASSERT_EQUAL_FP32(-2.0, s19);
9595     ASSERT_EQUAL_FP32(2.0, s20);
9596     ASSERT_EQUAL_FP64(1.0, d27);
9597     ASSERT_EQUAL_FP64(2.0, d28);
9598     ASSERT_EQUAL_FP64(-2.0, d29);
9599     ASSERT_EQUAL_FP64(-2.0, d30);
9600     ASSERT_EQUAL_FP64(2.0, d31);
9601   }
9602 }
9603 
9604 
TEST(neon_fmulx_h)9605 TEST(neon_fmulx_h) {
9606   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9607                       CPUFeatures::kFP,
9608                       CPUFeatures::kNEONHalf);
9609 
9610   START();
9611   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9612   __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9613   __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9614   __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9615   __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9616   __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9617   __ Fmulx(v6.V8H(), v0.V8H(), v1.V8H());
9618   __ Fmulx(v7.V8H(), v2.V8H(), v4.V8H());
9619   __ Fmulx(v8.V8H(), v2.V8H(), v5.V8H());
9620   __ Fmulx(v9.V8H(), v3.V8H(), v4.V8H());
9621   __ Fmulx(v10.V8H(), v3.V8H(), v5.V8H());
9622   __ Fmulx(v11.V4H(), v0.V4H(), v1.V4H());
9623   __ Fmulx(v12.V4H(), v2.V4H(), v4.V4H());
9624   __ Fmulx(v13.V4H(), v2.V4H(), v5.V4H());
9625   __ Fmulx(v14.V4H(), v3.V4H(), v4.V4H());
9626   __ Fmulx(v15.V4H(), v3.V4H(), v5.V4H());
9627   END();
9628 
9629   if (CAN_RUN()) {
9630     RUN();
9631     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
9632     ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v7);
9633     ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v8);
9634     ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v9);
9635     ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v10);
9636     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v11);
9637     ASSERT_EQUAL_128(0, 0x4000400040004000, v12);
9638     ASSERT_EQUAL_128(0, 0xc000c000c000c000, v13);
9639     ASSERT_EQUAL_128(0, 0xc000c000c000c000, v14);
9640     ASSERT_EQUAL_128(0, 0x4000400040004000, v15);
9641   }
9642 }
9643 
9644 
TEST(neon_fmulx_h_scalar)9645 TEST(neon_fmulx_h_scalar) {
9646   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9647                       CPUFeatures::kFP,
9648                       CPUFeatures::kNEONHalf,
9649                       CPUFeatures::kFPHalf);
9650 
9651   START();
9652   __ Fmov(h0, Float16(2.0));
9653   __ Fmov(h1, Float16(0.5));
9654   __ Fmov(h2, Float16(0.0));
9655   __ Fmov(h3, Float16(-0.0));
9656   __ Fmov(h4, kFP16PositiveInfinity);
9657   __ Fmov(h5, kFP16NegativeInfinity);
9658   __ Fmulx(h6, h0, h1);
9659   __ Fmulx(h7, h2, h4);
9660   __ Fmulx(h8, h2, h5);
9661   __ Fmulx(h9, h3, h4);
9662   __ Fmulx(h10, h3, h5);
9663   END();
9664 
9665   if (CAN_RUN()) {
9666     RUN();
9667     ASSERT_EQUAL_FP16(Float16(1.0), h6);
9668     ASSERT_EQUAL_FP16(Float16(2.0), h7);
9669     ASSERT_EQUAL_FP16(Float16(-2.0), h8);
9670     ASSERT_EQUAL_FP16(Float16(-2.0), h9);
9671     ASSERT_EQUAL_FP16(Float16(2.0), h10);
9672   }
9673 }
9674 
TEST(neon_fabd_h)9675 TEST(neon_fabd_h) {
9676   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9677                       CPUFeatures::kFP,
9678                       CPUFeatures::kNEONHalf);
9679 
9680   START();
9681   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9682   __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9683   __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9684   __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9685   __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9686   __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9687 
9688   __ Fabd(v6.V8H(), v1.V8H(), v0.V8H());
9689   __ Fabd(v7.V8H(), v2.V8H(), v3.V8H());
9690   __ Fabd(v8.V8H(), v2.V8H(), v5.V8H());
9691   __ Fabd(v9.V8H(), v3.V8H(), v4.V8H());
9692   __ Fabd(v10.V8H(), v3.V8H(), v5.V8H());
9693   __ Fabd(v11.V4H(), v1.V4H(), v0.V4H());
9694   __ Fabd(v12.V4H(), v2.V4H(), v3.V4H());
9695   __ Fabd(v13.V4H(), v2.V4H(), v5.V4H());
9696   __ Fabd(v14.V4H(), v3.V4H(), v4.V4H());
9697   __ Fabd(v15.V4H(), v3.V4H(), v5.V4H());
9698   END();
9699 
9700   if (CAN_RUN()) {
9701     RUN();
9702 
9703     ASSERT_EQUAL_128(0x3e003e003e003e00, 0x3e003e003e003e00, v6);
9704     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
9705     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9706     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v9);
9707     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v10);
9708     ASSERT_EQUAL_128(0, 0x3e003e003e003e00, v11);
9709     ASSERT_EQUAL_128(0, 0x0000000000000000, v12);
9710     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v13);
9711     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v14);
9712     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v15);
9713   }
9714 }
9715 
9716 
TEST(neon_fabd_h_scalar)9717 TEST(neon_fabd_h_scalar) {
9718   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9719                       CPUFeatures::kFP,
9720                       CPUFeatures::kNEONHalf,
9721                       CPUFeatures::kFPHalf);
9722 
9723   START();
9724   __ Fmov(h0, Float16(2.0));
9725   __ Fmov(h1, Float16(0.5));
9726   __ Fmov(h2, Float16(0.0));
9727   __ Fmov(h3, Float16(-0.0));
9728   __ Fmov(h4, kFP16PositiveInfinity);
9729   __ Fmov(h5, kFP16NegativeInfinity);
9730   __ Fabd(h16, h1, h0);
9731   __ Fabd(h17, h2, h3);
9732   __ Fabd(h18, h2, h5);
9733   __ Fabd(h19, h3, h4);
9734   __ Fabd(h20, h3, h5);
9735   END();
9736 
9737   if (CAN_RUN()) {
9738     RUN();
9739     ASSERT_EQUAL_FP16(Float16(1.5), h16);
9740     ASSERT_EQUAL_FP16(Float16(0.0), h17);
9741     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h18);
9742     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h19);
9743     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h20);
9744   }
9745 }
9746 
9747 
TEST(neon_fabd_scalar)9748 TEST(neon_fabd_scalar) {
9749   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9750 
9751   START();
9752   __ Fmov(s0, 2.0);
9753   __ Fmov(s1, 0.5);
9754   __ Fmov(s2, 0.0);
9755   __ Fmov(s3, -0.0);
9756   __ Fmov(s4, kFP32PositiveInfinity);
9757   __ Fmov(s5, kFP32NegativeInfinity);
9758   __ Fabd(s16, s1, s0);
9759   __ Fabd(s17, s2, s3);
9760   __ Fabd(s18, s2, s5);
9761   __ Fabd(s19, s3, s4);
9762   __ Fabd(s20, s3, s5);
9763 
9764   __ Fmov(d21, 2.0);
9765   __ Fmov(d22, 0.5);
9766   __ Fmov(d23, 0.0);
9767   __ Fmov(d24, -0.0);
9768   __ Fmov(d25, kFP64PositiveInfinity);
9769   __ Fmov(d26, kFP64NegativeInfinity);
9770   __ Fabd(d27, d21, d22);
9771   __ Fabd(d28, d23, d24);
9772   __ Fabd(d29, d23, d26);
9773   __ Fabd(d30, d24, d25);
9774   __ Fabd(d31, d24, d26);
9775   END();
9776 
9777   if (CAN_RUN()) {
9778     RUN();
9779 
9780     ASSERT_EQUAL_FP32(1.5, s16);
9781     ASSERT_EQUAL_FP32(0.0, s17);
9782     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s18);
9783     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s19);
9784     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s20);
9785     ASSERT_EQUAL_FP64(1.5, d27);
9786     ASSERT_EQUAL_FP64(0.0, d28);
9787     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d29);
9788     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d30);
9789     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d31);
9790   }
9791 }
9792 
9793 
TEST(neon_frecps_h)9794 TEST(neon_frecps_h) {
9795   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9796                       CPUFeatures::kFP,
9797                       CPUFeatures::kNEONHalf);
9798 
9799   START();
9800   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9801   __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9802   __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9803   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9804   __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9805 
9806   __ Frecps(v5.V8H(), v0.V8H(), v2.V8H());
9807   __ Frecps(v6.V8H(), v1.V8H(), v2.V8H());
9808   __ Frecps(v7.V8H(), v0.V8H(), v3.V8H());
9809   __ Frecps(v8.V8H(), v0.V8H(), v4.V8H());
9810   __ Frecps(v9.V4H(), v0.V4H(), v2.V4H());
9811   __ Frecps(v10.V4H(), v1.V4H(), v2.V4H());
9812   __ Frecps(v11.V4H(), v0.V4H(), v3.V4H());
9813   __ Frecps(v12.V4H(), v0.V4H(), v4.V4H());
9814   END();
9815 
9816   if (CAN_RUN()) {
9817     RUN();
9818 
9819     ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v5);
9820     ASSERT_EQUAL_128(0x51e051e051e051e0, 0x51e051e051e051e0, v6);
9821     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
9822     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9823     ASSERT_EQUAL_128(0, 0xd580d580d580d580, v9);
9824     ASSERT_EQUAL_128(0, 0x51e051e051e051e0, v10);
9825     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
9826     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
9827   }
9828 }
9829 
9830 
TEST(neon_frecps_h_scalar)9831 TEST(neon_frecps_h_scalar) {
9832   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9833                       CPUFeatures::kFP,
9834                       CPUFeatures::kNEONHalf,
9835                       CPUFeatures::kFPHalf);
9836 
9837   START();
9838   __ Fmov(h0, Float16(2.0));
9839   __ Fmov(h1, Float16(-1.0));
9840   __ Fmov(h2, Float16(45.0));
9841   __ Fmov(h3, kFP16PositiveInfinity);
9842   __ Fmov(h4, kFP16NegativeInfinity);
9843 
9844   __ Frecps(h5, h0, h2);
9845   __ Frecps(h6, h1, h2);
9846   __ Frecps(h7, h0, h3);
9847   __ Frecps(h8, h0, h4);
9848   END();
9849 
9850   if (CAN_RUN()) {
9851     RUN();
9852 
9853     ASSERT_EQUAL_FP16(Float16(-88.0), h5);
9854     ASSERT_EQUAL_FP16(Float16(47.0), h6);
9855     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
9856     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
9857   }
9858 }
9859 
9860 
TEST(neon_frsqrts_h)9861 TEST(neon_frsqrts_h) {
9862   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9863                       CPUFeatures::kFP,
9864                       CPUFeatures::kNEONHalf);
9865 
9866   START();
9867   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9868   __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9869   __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9870   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9871   __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9872 
9873   __ Frsqrts(v5.V8H(), v0.V8H(), v2.V8H());
9874   __ Frsqrts(v6.V8H(), v1.V8H(), v2.V8H());
9875   __ Frsqrts(v7.V8H(), v0.V8H(), v3.V8H());
9876   __ Frsqrts(v8.V8H(), v0.V8H(), v4.V8H());
9877   __ Frsqrts(v9.V4H(), v0.V4H(), v2.V4H());
9878   __ Frsqrts(v10.V4H(), v1.V4H(), v2.V4H());
9879   __ Frsqrts(v11.V4H(), v0.V4H(), v3.V4H());
9880   __ Frsqrts(v12.V4H(), v0.V4H(), v4.V4H());
9881   END();
9882 
9883   if (CAN_RUN()) {
9884     RUN();
9885 
9886     ASSERT_EQUAL_128(0xd170d170d170d170, 0xd170d170d170d170, v5);
9887     ASSERT_EQUAL_128(0x4e004e004e004e00, 0x4e004e004e004e00, v6);
9888     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
9889     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9890     ASSERT_EQUAL_128(0, 0xd170d170d170d170, v9);
9891     ASSERT_EQUAL_128(0, 0x4e004e004e004e00, v10);
9892     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
9893     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
9894   }
9895 }
9896 
9897 
TEST(neon_frsqrts_h_scalar)9898 TEST(neon_frsqrts_h_scalar) {
9899   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9900                       CPUFeatures::kFP,
9901                       CPUFeatures::kNEONHalf,
9902                       CPUFeatures::kFPHalf);
9903 
9904   START();
9905   __ Fmov(h0, Float16(2.0));
9906   __ Fmov(h1, Float16(-1.0));
9907   __ Fmov(h2, Float16(45.0));
9908   __ Fmov(h3, kFP16PositiveInfinity);
9909   __ Fmov(h4, kFP16NegativeInfinity);
9910 
9911   __ Frsqrts(h5, h0, h2);
9912   __ Frsqrts(h6, h1, h2);
9913   __ Frsqrts(h7, h0, h3);
9914   __ Frsqrts(h8, h0, h4);
9915   END();
9916 
9917   if (CAN_RUN()) {
9918     RUN();
9919 
9920     ASSERT_EQUAL_FP16(Float16(-43.5), h5);
9921     ASSERT_EQUAL_FP16(Float16(24.0), h6);
9922     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
9923     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
9924   }
9925 }
9926 
9927 
TEST(neon_faddp_h)9928 TEST(neon_faddp_h) {
9929   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9930                       CPUFeatures::kFP,
9931                       CPUFeatures::kNEONHalf);
9932 
9933   START();
9934   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
9935   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
9936   __ Movi(v2.V2D(), 0x0000800000008000, 0x0000800000008000);
9937   __ Movi(v3.V2D(), 0x7e007c017e007c01, 0x7e007c017e007c01);
9938 
9939   __ Faddp(v4.V8H(), v1.V8H(), v0.V8H());
9940   __ Faddp(v5.V8H(), v3.V8H(), v2.V8H());
9941   __ Faddp(v6.V4H(), v1.V4H(), v0.V4H());
9942   __ Faddp(v7.V4H(), v3.V4H(), v2.V4H());
9943   END();
9944 
9945   if (CAN_RUN()) {
9946     RUN();
9947 
9948     ASSERT_EQUAL_128(0x4200420042004200, 0x7e007e007e007e00, v4);
9949     ASSERT_EQUAL_128(0x0000000000000000, 0x7e017e017e017e01, v5);
9950     ASSERT_EQUAL_128(0, 0x420042007e007e00, v6);
9951     ASSERT_EQUAL_128(0, 0x000000007e017e01, v7);
9952   }
9953 }
9954 
9955 
TEST(neon_faddp_scalar)9956 TEST(neon_faddp_scalar) {
9957   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9958 
9959   START();
9960   __ Movi(d0, 0x3f80000040000000);
9961   __ Movi(d1, 0xff8000007f800000);
9962   __ Movi(d2, 0x0000000080000000);
9963   __ Faddp(s0, v0.V2S());
9964   __ Faddp(s1, v1.V2S());
9965   __ Faddp(s2, v2.V2S());
9966 
9967   __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000);
9968   __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000);
9969   __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
9970   __ Faddp(d3, v3.V2D());
9971   __ Faddp(d4, v4.V2D());
9972   __ Faddp(d5, v5.V2D());
9973   END();
9974 
9975   if (CAN_RUN()) {
9976     RUN();
9977 
9978     ASSERT_EQUAL_FP32(3.0, s0);
9979     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s1);
9980     ASSERT_EQUAL_FP32(0.0, s2);
9981     ASSERT_EQUAL_FP64(0.0, d3);
9982     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d4);
9983     ASSERT_EQUAL_FP64(0.0, d5);
9984   }
9985 }
9986 
9987 
TEST(neon_faddp_h_scalar)9988 TEST(neon_faddp_h_scalar) {
9989   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9990                       CPUFeatures::kFP,
9991                       CPUFeatures::kNEONHalf);
9992 
9993   START();
9994   __ Movi(s0, 0x3c004000);
9995   __ Movi(s1, 0xfc007c00);
9996   __ Movi(s2, 0x00008000);
9997   __ Faddp(h0, v0.V2H());
9998   __ Faddp(h1, v1.V2H());
9999   __ Faddp(h2, v2.V2H());
10000   END();
10001 
10002   if (CAN_RUN()) {
10003     RUN();
10004 
10005     ASSERT_EQUAL_FP16(Float16(3.0), h0);
10006     ASSERT_EQUAL_FP16(kFP16DefaultNaN, h1);
10007     ASSERT_EQUAL_FP16(Float16(0.0), h2);
10008   }
10009 }
10010 
10011 
TEST(neon_fmaxp_scalar)10012 TEST(neon_fmaxp_scalar) {
10013   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10014 
10015   START();
10016   __ Movi(d0, 0x3f80000040000000);
10017   __ Movi(d1, 0xff8000007f800000);
10018   __ Movi(d2, 0x7fc00000ff800000);
10019   __ Fmaxp(s0, v0.V2S());
10020   __ Fmaxp(s1, v1.V2S());
10021   __ Fmaxp(s2, v2.V2S());
10022 
10023   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10024   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10025   __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10026   __ Fmaxp(d3, v3.V2D());
10027   __ Fmaxp(d4, v4.V2D());
10028   __ Fmaxp(d5, v5.V2D());
10029   END();
10030 
10031   if (CAN_RUN()) {
10032     RUN();
10033 
10034     ASSERT_EQUAL_FP32(2.0, s0);
10035     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
10036     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
10037     ASSERT_EQUAL_FP64(2.0, d3);
10038     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
10039     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
10040   }
10041 }
10042 
10043 
TEST(neon_fmaxp_h_scalar)10044 TEST(neon_fmaxp_h_scalar) {
10045   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10046                       CPUFeatures::kFP,
10047                       CPUFeatures::kNEONHalf);
10048 
10049   START();
10050   __ Movi(s0, 0x3c004000);
10051   __ Movi(s1, 0xfc007c00);
10052   __ Movi(s2, 0x7e00fc00);
10053   __ Fmaxp(h0, v0.V2H());
10054   __ Fmaxp(h1, v1.V2H());
10055   __ Fmaxp(h2, v2.V2H());
10056   END();
10057 
10058   if (CAN_RUN()) {
10059     RUN();
10060 
10061     ASSERT_EQUAL_FP16(Float16(2.0), h0);
10062     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
10063     ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
10064   }
10065 }
10066 
10067 
TEST(neon_fmax_h)10068 TEST(neon_fmax_h) {
10069   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10070                       CPUFeatures::kFP,
10071                       CPUFeatures::kNEONHalf);
10072 
10073   START();
10074   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10075   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10076   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10077   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10078   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10079   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10080 
10081   __ Fmax(v6.V8H(), v0.V8H(), v1.V8H());
10082   __ Fmax(v7.V8H(), v2.V8H(), v3.V8H());
10083   __ Fmax(v8.V8H(), v4.V8H(), v0.V8H());
10084   __ Fmax(v9.V8H(), v5.V8H(), v1.V8H());
10085   __ Fmax(v10.V4H(), v0.V4H(), v1.V4H());
10086   __ Fmax(v11.V4H(), v2.V4H(), v3.V4H());
10087   __ Fmax(v12.V4H(), v4.V4H(), v0.V4H());
10088   __ Fmax(v13.V4H(), v5.V4H(), v1.V4H());
10089   END();
10090 
10091   if (CAN_RUN()) {
10092     RUN();
10093 
10094     ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
10095     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
10096     ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
10097     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10098     ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
10099     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
10100     ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
10101     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10102   }
10103 }
10104 
10105 
TEST(neon_fmaxp_h)10106 TEST(neon_fmaxp_h) {
10107   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10108                       CPUFeatures::kFP,
10109                       CPUFeatures::kNEONHalf);
10110 
10111   START();
10112   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10113   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10114   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10115   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10116 
10117   __ Fmaxp(v6.V8H(), v0.V8H(), v1.V8H());
10118   __ Fmaxp(v7.V8H(), v2.V8H(), v3.V8H());
10119   __ Fmaxp(v8.V4H(), v0.V4H(), v1.V4H());
10120   __ Fmaxp(v9.V4H(), v2.V4H(), v3.V4H());
10121   END();
10122 
10123   if (CAN_RUN()) {
10124     RUN();
10125 
10126     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
10127     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
10128     ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
10129     ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
10130   }
10131 }
10132 
10133 
TEST(neon_fmaxnm_h)10134 TEST(neon_fmaxnm_h) {
10135   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10136                       CPUFeatures::kFP,
10137                       CPUFeatures::kNEONHalf);
10138 
10139   START();
10140   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10141   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10142   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10143   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10144   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10145   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10146 
10147   __ Fmaxnm(v6.V8H(), v0.V8H(), v1.V8H());
10148   __ Fmaxnm(v7.V8H(), v2.V8H(), v3.V8H());
10149   __ Fmaxnm(v8.V8H(), v4.V8H(), v0.V8H());
10150   __ Fmaxnm(v9.V8H(), v5.V8H(), v1.V8H());
10151   __ Fmaxnm(v10.V4H(), v0.V4H(), v1.V4H());
10152   __ Fmaxnm(v11.V4H(), v2.V4H(), v3.V4H());
10153   __ Fmaxnm(v12.V4H(), v4.V4H(), v0.V4H());
10154   __ Fmaxnm(v13.V4H(), v5.V4H(), v1.V4H());
10155   END();
10156 
10157   if (CAN_RUN()) {
10158     RUN();
10159 
10160     ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
10161     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
10162     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
10163     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10164     ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
10165     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
10166     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
10167     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10168   }
10169 }
10170 
10171 
TEST(neon_fmaxnmp_h)10172 TEST(neon_fmaxnmp_h) {
10173   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10174                       CPUFeatures::kFP,
10175                       CPUFeatures::kNEONHalf);
10176 
10177   START();
10178   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10179   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10180   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10181   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10182 
10183   __ Fmaxnmp(v6.V8H(), v0.V8H(), v1.V8H());
10184   __ Fmaxnmp(v7.V8H(), v2.V8H(), v3.V8H());
10185   __ Fmaxnmp(v8.V4H(), v0.V4H(), v1.V4H());
10186   __ Fmaxnmp(v9.V4H(), v2.V4H(), v3.V4H());
10187   END();
10188 
10189   if (CAN_RUN()) {
10190     RUN();
10191 
10192     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
10193     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
10194     ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
10195     ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
10196   }
10197 }
10198 
10199 
TEST(neon_fmaxnmp_scalar)10200 TEST(neon_fmaxnmp_scalar) {
10201   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10202 
10203   START();
10204   __ Movi(d0, 0x3f80000040000000);
10205   __ Movi(d1, 0xff8000007f800000);
10206   __ Movi(d2, 0x7fc00000ff800000);
10207   __ Fmaxnmp(s0, v0.V2S());
10208   __ Fmaxnmp(s1, v1.V2S());
10209   __ Fmaxnmp(s2, v2.V2S());
10210 
10211   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10212   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10213   __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10214   __ Fmaxnmp(d3, v3.V2D());
10215   __ Fmaxnmp(d4, v4.V2D());
10216   __ Fmaxnmp(d5, v5.V2D());
10217   END();
10218 
10219   if (CAN_RUN()) {
10220     RUN();
10221 
10222     ASSERT_EQUAL_FP32(2.0, s0);
10223     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
10224     ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
10225     ASSERT_EQUAL_FP64(2.0, d3);
10226     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
10227     ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
10228   }
10229 }
10230 
10231 
TEST(neon_fmaxnmp_h_scalar)10232 TEST(neon_fmaxnmp_h_scalar) {
10233   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10234                       CPUFeatures::kFP,
10235                       CPUFeatures::kNEONHalf);
10236 
10237   START();
10238   __ Movi(s0, 0x3c004000);
10239   __ Movi(s1, 0xfc007c00);
10240   __ Movi(s2, 0x7e00fc00);
10241   __ Fmaxnmp(h0, v0.V2H());
10242   __ Fmaxnmp(h1, v1.V2H());
10243   __ Fmaxnmp(h2, v2.V2H());
10244   END();
10245 
10246   if (CAN_RUN()) {
10247     RUN();
10248 
10249     ASSERT_EQUAL_FP16(Float16(2.0), h0);
10250     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
10251     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
10252   }
10253 }
10254 
10255 
TEST(neon_fminp_scalar)10256 TEST(neon_fminp_scalar) {
10257   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10258 
10259   START();
10260   __ Movi(d0, 0x3f80000040000000);
10261   __ Movi(d1, 0xff8000007f800000);
10262   __ Movi(d2, 0x7fc00000ff800000);
10263   __ Fminp(s0, v0.V2S());
10264   __ Fminp(s1, v1.V2S());
10265   __ Fminp(s2, v2.V2S());
10266 
10267   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10268   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10269   __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10270   __ Fminp(d3, v3.V2D());
10271   __ Fminp(d4, v4.V2D());
10272   __ Fminp(d5, v5.V2D());
10273   END();
10274 
10275   if (CAN_RUN()) {
10276     RUN();
10277 
10278     ASSERT_EQUAL_FP32(1.0, s0);
10279     ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
10280     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
10281     ASSERT_EQUAL_FP64(1.0, d3);
10282     ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
10283     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
10284   }
10285 }
10286 
10287 
TEST(neon_fminp_h_scalar)10288 TEST(neon_fminp_h_scalar) {
10289   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10290                       CPUFeatures::kFP,
10291                       CPUFeatures::kNEONHalf);
10292 
10293   START();
10294   __ Movi(s0, 0x3c004000);
10295   __ Movi(s1, 0xfc007c00);
10296   __ Movi(s2, 0x7e00fc00);
10297   __ Fminp(h0, v0.V2H());
10298   __ Fminp(h1, v1.V2H());
10299   __ Fminp(h2, v2.V2H());
10300   END();
10301 
10302   if (CAN_RUN()) {
10303     RUN();
10304 
10305     ASSERT_EQUAL_FP16(Float16(1.0), h0);
10306     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
10307     ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
10308   }
10309 }
10310 
10311 
TEST(neon_fmin_h)10312 TEST(neon_fmin_h) {
10313   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10314                       CPUFeatures::kFP,
10315                       CPUFeatures::kNEONHalf);
10316 
10317   START();
10318   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10319   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10320   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10321   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10322   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10323   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10324 
10325   __ Fmin(v6.V8H(), v0.V8H(), v1.V8H());
10326   __ Fmin(v7.V8H(), v2.V8H(), v3.V8H());
10327   __ Fmin(v8.V8H(), v4.V8H(), v0.V8H());
10328   __ Fmin(v9.V8H(), v5.V8H(), v1.V8H());
10329   __ Fmin(v10.V4H(), v0.V4H(), v1.V4H());
10330   __ Fmin(v11.V4H(), v2.V4H(), v3.V4H());
10331   __ Fmin(v12.V4H(), v4.V4H(), v0.V4H());
10332   __ Fmin(v13.V4H(), v5.V4H(), v1.V4H());
10333   END();
10334 
10335   if (CAN_RUN()) {
10336     RUN();
10337 
10338     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
10339     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
10340     ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
10341     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10342     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
10343     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
10344     ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
10345     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10346   }
10347 }
10348 
10349 
TEST(neon_fminp_h)10350 TEST(neon_fminp_h) {
10351   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10352                       CPUFeatures::kFP,
10353                       CPUFeatures::kNEONHalf);
10354 
10355   START();
10356   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10357   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10358   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10359   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10360 
10361   __ Fminp(v6.V8H(), v0.V8H(), v1.V8H());
10362   __ Fminp(v7.V8H(), v2.V8H(), v3.V8H());
10363   __ Fminp(v8.V4H(), v0.V4H(), v1.V4H());
10364   __ Fminp(v9.V4H(), v2.V4H(), v3.V4H());
10365   END();
10366 
10367   if (CAN_RUN()) {
10368     RUN();
10369 
10370     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
10371     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
10372     ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
10373     ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
10374   }
10375 }
10376 
10377 
TEST(neon_fminnm_h)10378 TEST(neon_fminnm_h) {
10379   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10380                       CPUFeatures::kFP,
10381                       CPUFeatures::kNEONHalf);
10382 
10383   START();
10384   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10385   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10386   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10387   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10388   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10389   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10390 
10391   __ Fminnm(v6.V8H(), v0.V8H(), v1.V8H());
10392   __ Fminnm(v7.V8H(), v2.V8H(), v3.V8H());
10393   __ Fminnm(v8.V8H(), v4.V8H(), v0.V8H());
10394   __ Fminnm(v9.V8H(), v5.V8H(), v1.V8H());
10395   __ Fminnm(v10.V4H(), v0.V4H(), v1.V4H());
10396   __ Fminnm(v11.V4H(), v2.V4H(), v3.V4H());
10397   __ Fminnm(v12.V4H(), v4.V4H(), v0.V4H());
10398   __ Fminnm(v13.V4H(), v5.V4H(), v1.V4H());
10399   END();
10400 
10401   if (CAN_RUN()) {
10402     RUN();
10403 
10404     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
10405     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
10406     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
10407     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10408     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
10409     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
10410     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
10411     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10412   }
10413 }
10414 
10415 
TEST(neon_fminnmp_h)10416 TEST(neon_fminnmp_h) {
10417   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10418                       CPUFeatures::kFP,
10419                       CPUFeatures::kNEONHalf);
10420 
10421   START();
10422   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10423   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10424   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10425   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10426 
10427   __ Fminnmp(v6.V8H(), v0.V8H(), v1.V8H());
10428   __ Fminnmp(v7.V8H(), v2.V8H(), v3.V8H());
10429   __ Fminnmp(v8.V4H(), v0.V4H(), v1.V4H());
10430   __ Fminnmp(v9.V4H(), v2.V4H(), v3.V4H());
10431   END();
10432 
10433   if (CAN_RUN()) {
10434     RUN();
10435 
10436     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
10437     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
10438     ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
10439     ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
10440   }
10441 }
10442 
10443 
TEST(neon_fminnmp_scalar)10444 TEST(neon_fminnmp_scalar) {
10445   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10446 
10447   START();
10448   __ Movi(d0, 0x3f80000040000000);
10449   __ Movi(d1, 0xff8000007f800000);
10450   __ Movi(d2, 0x7fc00000ff800000);
10451   __ Fminnmp(s0, v0.V2S());
10452   __ Fminnmp(s1, v1.V2S());
10453   __ Fminnmp(s2, v2.V2S());
10454 
10455   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10456   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10457   __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10458   __ Fminnmp(d3, v3.V2D());
10459   __ Fminnmp(d4, v4.V2D());
10460   __ Fminnmp(d5, v5.V2D());
10461   END();
10462 
10463   if (CAN_RUN()) {
10464     RUN();
10465 
10466     ASSERT_EQUAL_FP32(1.0, s0);
10467     ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
10468     ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
10469     ASSERT_EQUAL_FP64(1.0, d3);
10470     ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
10471     ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
10472   }
10473 }
10474 
10475 
TEST(neon_fminnmp_h_scalar)10476 TEST(neon_fminnmp_h_scalar) {
10477   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10478                       CPUFeatures::kFP,
10479                       CPUFeatures::kNEONHalf);
10480 
10481   START();
10482   __ Movi(s0, 0x3c004000);
10483   __ Movi(s1, 0xfc007c00);
10484   __ Movi(s2, 0x7e00fc00);
10485   __ Fminnmp(h0, v0.V2H());
10486   __ Fminnmp(h1, v1.V2H());
10487   __ Fminnmp(h2, v2.V2H());
10488   END();
10489 
10490   if (CAN_RUN()) {
10491     RUN();
10492 
10493     ASSERT_EQUAL_FP16(Float16(1.0), h0);
10494     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
10495     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
10496   }
10497 }
10498 
Float16ToV4H(Float16 f)10499 static uint64_t Float16ToV4H(Float16 f) {
10500   uint64_t bits = static_cast<uint64_t>(Float16ToRawbits(f));
10501   return (bits << 48) | (bits << 32) | (bits << 16) | bits;
10502 }
10503 
10504 
FminFmaxFloat16Helper(Float16 n,Float16 m,Float16 min,Float16 max,Float16 minnm,Float16 maxnm)10505 static void FminFmaxFloat16Helper(Float16 n,
10506                                   Float16 m,
10507                                   Float16 min,
10508                                   Float16 max,
10509                                   Float16 minnm,
10510                                   Float16 maxnm) {
10511   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10512                       CPUFeatures::kFP,
10513                       CPUFeatures::kNEONHalf,
10514                       CPUFeatures::kFPHalf);
10515 
10516   START();
10517   __ Fmov(h0, n);
10518   __ Fmov(h1, m);
10519   __ Fmov(v0.V8H(), n);
10520   __ Fmov(v1.V8H(), m);
10521   __ Fmin(h28, h0, h1);
10522   __ Fmin(v2.V4H(), v0.V4H(), v1.V4H());
10523   __ Fmin(v3.V8H(), v0.V8H(), v1.V8H());
10524   __ Fmax(h29, h0, h1);
10525   __ Fmax(v4.V4H(), v0.V4H(), v1.V4H());
10526   __ Fmax(v5.V8H(), v0.V8H(), v1.V8H());
10527   __ Fminnm(h30, h0, h1);
10528   __ Fminnm(v6.V4H(), v0.V4H(), v1.V4H());
10529   __ Fminnm(v7.V8H(), v0.V8H(), v1.V8H());
10530   __ Fmaxnm(h31, h0, h1);
10531   __ Fmaxnm(v8.V4H(), v0.V4H(), v1.V4H());
10532   __ Fmaxnm(v9.V8H(), v0.V8H(), v1.V8H());
10533   END();
10534 
10535   uint64_t min_vec = Float16ToV4H(min);
10536   uint64_t max_vec = Float16ToV4H(max);
10537   uint64_t minnm_vec = Float16ToV4H(minnm);
10538   uint64_t maxnm_vec = Float16ToV4H(maxnm);
10539 
10540   if (CAN_RUN()) {
10541     RUN();
10542 
10543     ASSERT_EQUAL_FP16(min, h28);
10544     ASSERT_EQUAL_FP16(max, h29);
10545     ASSERT_EQUAL_FP16(minnm, h30);
10546     ASSERT_EQUAL_FP16(maxnm, h31);
10547 
10548 
10549     ASSERT_EQUAL_128(0, min_vec, v2);
10550     ASSERT_EQUAL_128(min_vec, min_vec, v3);
10551     ASSERT_EQUAL_128(0, max_vec, v4);
10552     ASSERT_EQUAL_128(max_vec, max_vec, v5);
10553     ASSERT_EQUAL_128(0, minnm_vec, v6);
10554     ASSERT_EQUAL_128(minnm_vec, minnm_vec, v7);
10555     ASSERT_EQUAL_128(0, maxnm_vec, v8);
10556     ASSERT_EQUAL_128(maxnm_vec, maxnm_vec, v9);
10557   }
10558 }
10559 
MinMaxHelper(Float16 n,Float16 m,bool min,Float16 quiet_nan_substitute=Float16 (0.0))10560 static Float16 MinMaxHelper(Float16 n,
10561                             Float16 m,
10562                             bool min,
10563                             Float16 quiet_nan_substitute = Float16(0.0)) {
10564   const uint64_t kFP16QuietNaNMask = 0x0200;
10565   uint16_t raw_n = Float16ToRawbits(n);
10566   uint16_t raw_m = Float16ToRawbits(m);
10567 
10568   if (IsSignallingNaN(n)) {
10569     // n is signalling NaN.
10570     return RawbitsToFloat16(raw_n | kFP16QuietNaNMask);
10571   } else if (IsSignallingNaN(m)) {
10572     // m is signalling NaN.
10573     return RawbitsToFloat16(raw_m | kFP16QuietNaNMask);
10574   } else if (IsZero(quiet_nan_substitute)) {
10575     if (IsNaN(n)) {
10576       // n is quiet NaN.
10577       return n;
10578     } else if (IsNaN(m)) {
10579       // m is quiet NaN.
10580       return m;
10581     }
10582   } else {
10583     // Substitute n or m if one is quiet, but not both.
10584     if (IsNaN(n) && !IsNaN(m)) {
10585       // n is quiet NaN: replace with substitute.
10586       n = quiet_nan_substitute;
10587     } else if (!IsNaN(n) && IsNaN(m)) {
10588       // m is quiet NaN: replace with substitute.
10589       m = quiet_nan_substitute;
10590     }
10591   }
10592 
10593   uint16_t sign_mask = 0x8000;
10594   if (IsZero(n) && IsZero(m) && ((raw_n & sign_mask) != (raw_m & sign_mask))) {
10595     return min ? Float16(-0.0) : Float16(0.0);
10596   }
10597 
10598   if (FPToDouble(n, kIgnoreDefaultNaN) < FPToDouble(m, kIgnoreDefaultNaN)) {
10599     return min ? n : m;
10600   }
10601   return min ? m : n;
10602 }
10603 
TEST(fmax_fmin_h)10604 TEST(fmax_fmin_h) {
10605   // Use non-standard NaNs to check that the payload bits are preserved.
10606   Float16 snan = RawbitsToFloat16(0x7c12);
10607   Float16 qnan = RawbitsToFloat16(0x7e34);
10608 
10609   Float16 snan_processed = RawbitsToFloat16(0x7e12);
10610   Float16 qnan_processed = qnan;
10611 
10612   VIXL_ASSERT(IsSignallingNaN(snan));
10613   VIXL_ASSERT(IsQuietNaN(qnan));
10614   VIXL_ASSERT(IsQuietNaN(snan_processed));
10615   VIXL_ASSERT(IsQuietNaN(qnan_processed));
10616 
10617   // Bootstrap tests.
10618   FminFmaxFloat16Helper(Float16(0),
10619                         Float16(0),
10620                         Float16(0),
10621                         Float16(0),
10622                         Float16(0),
10623                         Float16(0));
10624   FminFmaxFloat16Helper(Float16(0),
10625                         Float16(1),
10626                         Float16(0),
10627                         Float16(1),
10628                         Float16(0),
10629                         Float16(1));
10630   FminFmaxFloat16Helper(kFP16PositiveInfinity,
10631                         kFP16NegativeInfinity,
10632                         kFP16NegativeInfinity,
10633                         kFP16PositiveInfinity,
10634                         kFP16NegativeInfinity,
10635                         kFP16PositiveInfinity);
10636   FminFmaxFloat16Helper(snan,
10637                         Float16(0),
10638                         snan_processed,
10639                         snan_processed,
10640                         snan_processed,
10641                         snan_processed);
10642   FminFmaxFloat16Helper(Float16(0),
10643                         snan,
10644                         snan_processed,
10645                         snan_processed,
10646                         snan_processed,
10647                         snan_processed);
10648   FminFmaxFloat16Helper(qnan,
10649                         Float16(0),
10650                         qnan_processed,
10651                         qnan_processed,
10652                         Float16(0),
10653                         Float16(0));
10654   FminFmaxFloat16Helper(Float16(0),
10655                         qnan,
10656                         qnan_processed,
10657                         qnan_processed,
10658                         Float16(0),
10659                         Float16(0));
10660   FminFmaxFloat16Helper(qnan,
10661                         snan,
10662                         snan_processed,
10663                         snan_processed,
10664                         snan_processed,
10665                         snan_processed);
10666   FminFmaxFloat16Helper(snan,
10667                         qnan,
10668                         snan_processed,
10669                         snan_processed,
10670                         snan_processed,
10671                         snan_processed);
10672 
10673   // Iterate over all combinations of inputs.
10674   Float16 inputs[] = {RawbitsToFloat16(0x7bff),
10675                       RawbitsToFloat16(0x0400),
10676                       Float16(1.0),
10677                       Float16(0.0),
10678                       RawbitsToFloat16(0xfbff),
10679                       RawbitsToFloat16(0x8400),
10680                       Float16(-1.0),
10681                       Float16(-0.0),
10682                       kFP16PositiveInfinity,
10683                       kFP16NegativeInfinity,
10684                       kFP16QuietNaN,
10685                       kFP16SignallingNaN};
10686 
10687   const int count = sizeof(inputs) / sizeof(inputs[0]);
10688 
10689   for (int in = 0; in < count; in++) {
10690     Float16 n = inputs[in];
10691     for (int im = 0; im < count; im++) {
10692       Float16 m = inputs[im];
10693       FminFmaxFloat16Helper(n,
10694                             m,
10695                             MinMaxHelper(n, m, true),
10696                             MinMaxHelper(n, m, false),
10697                             MinMaxHelper(n, m, true, kFP16PositiveInfinity),
10698                             MinMaxHelper(n, m, false, kFP16NegativeInfinity));
10699     }
10700   }
10701 }
10702 
TEST(neon_frint_saturating)10703 TEST(neon_frint_saturating) {
10704   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10705                       CPUFeatures::kFP,
10706                       CPUFeatures::kFrintToFixedSizedInt);
10707 
10708   START();
10709 
10710   __ Movi(v0.V2D(), 0x3f8000003f8ccccd, 0x3fc000003ff33333);
10711   __ Movi(v1.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
10712   __ Movi(v2.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10713   __ Frint32x(v16.V2S(), v0.V2S());
10714   __ Frint32x(v17.V4S(), v1.V4S());
10715   __ Frint32x(v18.V2D(), v2.V2D());
10716   __ Frint64x(v19.V2S(), v0.V2S());
10717   __ Frint64x(v20.V4S(), v1.V4S());
10718   __ Frint64x(v21.V2D(), v2.V2D());
10719   __ Frint32z(v22.V2S(), v0.V2S());
10720   __ Frint32z(v23.V4S(), v1.V4S());
10721   __ Frint32z(v24.V2D(), v2.V2D());
10722   __ Frint64z(v25.V2S(), v0.V2S());
10723   __ Frint64z(v26.V4S(), v1.V4S());
10724   __ Frint64z(v27.V2D(), v2.V2D());
10725 
10726   END();
10727 
10728   if (CAN_RUN()) {
10729     RUN();
10730 
10731     ASSERT_EQUAL_128(0x0000000000000000, 0x4000000040000000, q16);
10732     ASSERT_EQUAL_128(0x0000000080000000, 0xcf000000cf000000, q17);
10733     ASSERT_EQUAL_128(0xc1e0000000000000, 0xc1e0000000000000, q18);
10734     ASSERT_EQUAL_128(0x0000000000000000, 0x4000000040000000, q19);
10735     ASSERT_EQUAL_128(0x0000000080000000, 0xdf000000df000000, q20);
10736     ASSERT_EQUAL_128(0xc3e0000000000000, 0xc3e0000000000000, q21);
10737     ASSERT_EQUAL_128(0x0000000000000000, 0x3f8000003f800000, q22);
10738     ASSERT_EQUAL_128(0x0000000080000000, 0xcf000000cf000000, q23);
10739     ASSERT_EQUAL_128(0xc1e0000000000000, 0xc1e0000000000000, q24);
10740     ASSERT_EQUAL_128(0x0000000000000000, 0x3f8000003f800000, q25);
10741     ASSERT_EQUAL_128(0x0000000080000000, 0xdf000000df000000, q26);
10742     ASSERT_EQUAL_128(0xc3e0000000000000, 0xc3e0000000000000, q27);
10743   }
10744 }
10745 
10746 
TEST(neon_tbl)10747 TEST(neon_tbl) {
10748   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
10749 
10750   START();
10751   __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8);
10752   __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e);
10753   __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e);
10754   __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80);
10755 
10756   __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff);
10757   __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c);
10758   __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33);
10759   __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739);
10760 
10761   __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10762   __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10763   __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10764   __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10765   __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10766   __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10767   __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10768   __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10769 
10770   __ Tbl(v8.V16B(), v1.V16B(), v4.V16B());
10771   __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10772   __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10773   __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10774   __ Tbl(v12.V8B(), v1.V16B(), v4.V8B());
10775   __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10776   __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10777   __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10778 
10779   __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10780   __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10781   __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10782   __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10783   __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10784   __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10785   __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10786   __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10787 
10788   __ Tbx(v16.V16B(), v1.V16B(), v4.V16B());
10789   __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10790   __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10791   __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10792   __ Tbx(v20.V8B(), v1.V16B(), v4.V8B());
10793   __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10794   __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10795   __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10796   END();
10797 
10798   if (CAN_RUN()) {
10799     RUN();
10800 
10801     ASSERT_EQUAL_128(0x00090e1c800e0000, 0x80f0ecf50e001c00, v8);
10802     ASSERT_EQUAL_128(0x1ebf5ed100f50000, 0x0072324b82c6c682, v9);
10803     ASSERT_EQUAL_128(0x00005e4b4cd10e00, 0x0900005e80008800, v10);
10804     ASSERT_EQUAL_128(0x0000883d2b00001e, 0x00d1822b5bbff074, v11);
10805     ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e001c00, v12);
10806     ASSERT_EQUAL_128(0x0000000000000000, 0x0072324b82c6c682, v13);
10807     ASSERT_EQUAL_128(0x0000000000000000, 0x0900005e80008800, v14);
10808     ASSERT_EQUAL_128(0x0000000000000000, 0x00d1822b5bbff074, v15);
10809 
10810     ASSERT_EQUAL_128(0xb7090e1c800e8f13, 0x80f0ecf50e961c42, v16);
10811     ASSERT_EQUAL_128(0x1ebf5ed1c6f547ec, 0x8e72324b82c6c682, v17);
10812     ASSERT_EQUAL_128(0x9bd25e4b4cd10e8f, 0x0943d05e802688d2, v18);
10813     ASSERT_EQUAL_128(0xc31d883d2b39301e, 0x1ed1822b5bbff074, v19);
10814     ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e011c3b, v20);
10815     ASSERT_EQUAL_128(0x0000000000000000, 0x2072324b82c6c682, v21);
10816     ASSERT_EQUAL_128(0x0000000000000000, 0x0946cd5e80ba8882, v22);
10817     ASSERT_EQUAL_128(0x0000000000000000, 0xe6d1822b5bbff074, v23);
10818   }
10819 }
10820 
10821 
10822 }  // namespace aarch64
10823 }  // namespace vixl
10824