1 // Copyright 2015, ARM Limited
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include <stdio.h>
28 #include <float.h>
29 
30 #include "test-runner.h"
31 #include "test-utils-a64.h"
32 #include "test-simulator-inputs-a64.h"
33 #include "test-simulator-traces-a64.h"
34 #include "vixl/a64/macro-assembler-a64.h"
35 #include "vixl/a64/simulator-a64.h"
36 
37 namespace vixl {
38 
39 // ==== Simulator Tests ====
40 //
41 // These simulator tests check instruction behaviour against a trace taken from
42 // real AArch64 hardware. The same test code is used to generate the trace; the
43 // results are printed to stdout when the test is run with --sim_test_trace.
44 //
45 // The input lists and expected results are stored in test/traces. The expected
46 // results can be regenerated using tools/generate_simulator_traces.py. Adding
47 // a test for a new instruction is described at the top of
48 // test-simulator-traces-a64.h.
49 
50 #define __ masm.
51 #define TEST(name)  TEST_(SIM_##name)
52 
53 #define BUF_SIZE (256)
54 
55 #ifdef VIXL_INCLUDE_SIMULATOR
56 
57 #define SETUP()                                                               \
58   MacroAssembler masm(BUF_SIZE);                                              \
59   Decoder decoder;                                                            \
60   Simulator* simulator = Test::run_debugger() ? new Debugger(&decoder)        \
61                                               : new Simulator(&decoder);      \
62   simulator->set_coloured_trace(Test::coloured_trace());                      \
63   simulator->set_instruction_stats(Test::instruction_stats());                \
64 
65 #define START()                                                               \
66   masm.Reset();                                                               \
67   simulator->ResetState();                                                    \
68   __ PushCalleeSavedRegisters();                                              \
69   if (Test::trace_reg()) {                                                    \
70     __ Trace(LOG_STATE, TRACE_ENABLE);                                        \
71   }                                                                           \
72   if (Test::trace_write()) {                                                  \
73     __ Trace(LOG_WRITE, TRACE_ENABLE);                                        \
74   }                                                                           \
75   if (Test::trace_sim()) {                                                    \
76     __ Trace(LOG_DISASM, TRACE_ENABLE);                                       \
77   }                                                                           \
78   if (Test::instruction_stats()) {                                            \
79     __ EnableInstrumentation();                                               \
80   }
81 
82 #define END()                                                                 \
83   if (Test::instruction_stats()) {                                            \
84     __ DisableInstrumentation();                                              \
85   }                                                                           \
86   __ Trace(LOG_ALL, TRACE_DISABLE);                                           \
87   __ PopCalleeSavedRegisters();                                               \
88   __ Ret();                                                                   \
89   masm.FinalizeCode()
90 
91 #define RUN()                                                                 \
92   simulator->RunFrom(masm.GetStartAddress<Instruction*>())
93 
94 #define TEARDOWN()                                                            \
95   delete simulator;
96 
97 #else     // VIXL_INCLUDE_SIMULATOR
98 
99 #define SETUP()                                                               \
100   MacroAssembler masm(BUF_SIZE);                                              \
101   CPU::SetUp()
102 
103 #define START()                                                               \
104   masm.Reset();                                                               \
105   __ PushCalleeSavedRegisters()
106 
107 #define END()                                                                 \
108   __ PopCalleeSavedRegisters();                                               \
109   __ Ret();                                                                   \
110   masm.FinalizeCode()
111 
112 #define RUN()                                                                  \
113   {                                                                            \
114     byte* buffer_start = masm.GetStartAddress<byte*>();                        \
115     size_t buffer_length = masm.CursorOffset();                                \
116     void (*test_function)(void);                                               \
117                                                                                \
118     CPU::EnsureIAndDCacheCoherency(buffer_start, buffer_length);               \
119     VIXL_STATIC_ASSERT(sizeof(buffer_start) == sizeof(test_function));         \
120     memcpy(&test_function, &buffer_start, sizeof(buffer_start));               \
121     test_function();                                                           \
122   }
123 
124 #define TEARDOWN()
125 
126 #endif    // VIXL_INCLUDE_SIMULATOR
127 
128 
129 // The maximum number of errors to report in detail for each test.
130 static const unsigned kErrorReportLimit = 8;
131 
132 
133 // Overloaded versions of rawbits_to_double and rawbits_to_float for use in the
134 // templated test functions.
rawbits_to_fp(uint32_t bits)135 static float rawbits_to_fp(uint32_t bits) {
136   return rawbits_to_float(bits);
137 }
138 
rawbits_to_fp(uint64_t bits)139 static double rawbits_to_fp(uint64_t bits) {
140   return rawbits_to_double(bits);
141 }
142 
143 
144 // MacroAssembler member function pointers to pass to the test dispatchers.
145 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd,
146                                                   const FPRegister& fn);
147 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd,
148                                                   const FPRegister& fn,
149                                                   const FPRegister& fm);
150 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd,
151                                                   const FPRegister& fn,
152                                                   const FPRegister& fm,
153                                                   const FPRegister& fa);
154 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn,
155                                                   const FPRegister& fm);
156 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn,
157                                                       double value);
158 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
159                                                     const FPRegister& fn);
160 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
161                                                       const FPRegister& fn,
162                                                       int fbits);
163 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd,
164                                                       const Register& rn,
165                                                       int fbits);
166 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
167 //       consolidated into one routine.
168 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(
169   const VRegister& vd, const VRegister& vn);
170 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(
171   const VRegister& vd, const VRegister& vn, const VRegister& vm);
172 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(
173   const VRegister& vd, const VRegister& vn, const VRegister& vm, int vm_index);
174 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
175   const VRegister& vd, int imm1, const VRegister& vn, int imm2);
176 
177 // This helps using the same typename for both the function pointer
178 // and the array of immediates passed to helper routines.
179 template <typename T>
180 class Test2OpImmediateNEONHelper_t {
181  public:
182     typedef void (MacroAssembler::*mnemonic)(
183       const VRegister& vd, const VRegister& vn, T imm);
184 };
185 
186 
187 // Maximum number of hex characters required to represent values of either
188 // templated type.
189 template <typename Ta, typename Tb>
MaxHexCharCount()190 static unsigned MaxHexCharCount() {
191   unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
192   return (count * 8) / 4;
193 }
194 
195 
196 // Standard test dispatchers.
197 
198 
Test1Op_Helper(Test1OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size)199 static void Test1Op_Helper(Test1OpFPHelper_t helper, uintptr_t inputs,
200                            unsigned inputs_length, uintptr_t results,
201                            unsigned d_size, unsigned n_size) {
202   VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize));
203   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
204 
205   SETUP();
206   START();
207 
208   // Roll up the loop to keep the code size down.
209   Label loop_n;
210 
211   Register out = x0;
212   Register inputs_base = x1;
213   Register length = w2;
214   Register index_n = w3;
215 
216   const int n_index_shift =
217       (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
218 
219   FPRegister fd = (d_size == kDRegSize) ? d0 : s0;
220   FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
221 
222   __ Mov(out, results);
223   __ Mov(inputs_base, inputs);
224   __ Mov(length, inputs_length);
225 
226   __ Mov(index_n, 0);
227   __ Bind(&loop_n);
228   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
229 
230   {
231     SingleEmissionCheckScope guard(&masm);
232     (masm.*helper)(fd, fn);
233   }
234   __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
235 
236   __ Add(index_n, index_n, 1);
237   __ Cmp(index_n, inputs_length);
238   __ B(lo, &loop_n);
239 
240   END();
241   RUN();
242   TEARDOWN();
243 }
244 
245 
246 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
247 // rawbits representations of doubles or floats. This ensures that exact bit
248 // comparisons can be performed.
249 template <typename Tn, typename Td>
Test1Op(const char * name,Test1OpFPHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)250 static void Test1Op(const char * name, Test1OpFPHelper_t helper,
251                     const Tn inputs[], unsigned inputs_length,
252                     const Td expected[], unsigned expected_length) {
253   VIXL_ASSERT(inputs_length > 0);
254 
255   const unsigned results_length = inputs_length;
256   Td * results = new Td[results_length];
257 
258   const unsigned d_bits = sizeof(Td) * 8;
259   const unsigned n_bits = sizeof(Tn) * 8;
260 
261   Test1Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
262                  reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
263 
264   if (Test::sim_test_trace()) {
265     // Print the results.
266     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
267     for (unsigned d = 0; d < results_length; d++) {
268       printf("  0x%0*" PRIx64 ",\n",
269              d_bits / 4, static_cast<uint64_t>(results[d]));
270     }
271     printf("};\n");
272     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
273   } else {
274     // Check the results.
275     VIXL_CHECK(expected_length == results_length);
276     unsigned error_count = 0;
277     unsigned d = 0;
278     for (unsigned n = 0; n < inputs_length; n++, d++) {
279       if (results[d] != expected[d]) {
280         if (++error_count > kErrorReportLimit) continue;
281 
282         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
283                name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
284                name, rawbits_to_fp(inputs[n]));
285         printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
286                d_bits / 4, static_cast<uint64_t>(expected[d]),
287                rawbits_to_fp(expected[d]));
288         printf("  Found:    0x%0*" PRIx64 " (%g)\n",
289                d_bits / 4, static_cast<uint64_t>(results[d]),
290                rawbits_to_fp(results[d]));
291         printf("\n");
292       }
293     }
294     VIXL_ASSERT(d == expected_length);
295     if (error_count > kErrorReportLimit) {
296       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
297     }
298     VIXL_CHECK(error_count == 0);
299   }
300   delete[] results;
301 }
302 
303 
Test2Op_Helper(Test2OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)304 static void Test2Op_Helper(Test2OpFPHelper_t helper,
305                            uintptr_t inputs, unsigned inputs_length,
306                            uintptr_t results, unsigned reg_size) {
307   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
308 
309   SETUP();
310   START();
311 
312   // Roll up the loop to keep the code size down.
313   Label loop_n, loop_m;
314 
315   Register out = x0;
316   Register inputs_base = x1;
317   Register length = w2;
318   Register index_n = w3;
319   Register index_m = w4;
320 
321   bool double_op = reg_size == kDRegSize;
322   const int index_shift =
323       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
324 
325   FPRegister fd = double_op ? d0 : s0;
326   FPRegister fn = double_op ? d1 : s1;
327   FPRegister fm = double_op ? d2 : s2;
328 
329   __ Mov(out, results);
330   __ Mov(inputs_base, inputs);
331   __ Mov(length, inputs_length);
332 
333   __ Mov(index_n, 0);
334   __ Bind(&loop_n);
335   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
336 
337   __ Mov(index_m, 0);
338   __ Bind(&loop_m);
339   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
340 
341   {
342     SingleEmissionCheckScope guard(&masm);
343     (masm.*helper)(fd, fn, fm);
344   }
345     __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
346 
347   __ Add(index_m, index_m, 1);
348   __ Cmp(index_m, inputs_length);
349   __ B(lo, &loop_m);
350 
351   __ Add(index_n, index_n, 1);
352   __ Cmp(index_n, inputs_length);
353   __ B(lo, &loop_n);
354 
355   END();
356   RUN();
357   TEARDOWN();
358 }
359 
360 
361 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
362 // rawbits representations of doubles or floats. This ensures that exact bit
363 // comparisons can be performed.
364 template <typename T>
Test2Op(const char * name,Test2OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)365 static void Test2Op(const char * name, Test2OpFPHelper_t helper,
366                     const T inputs[], unsigned inputs_length,
367                     const T expected[], unsigned expected_length) {
368   VIXL_ASSERT(inputs_length > 0);
369 
370   const unsigned results_length = inputs_length * inputs_length;
371   T * results = new T[results_length];
372 
373   const unsigned bits = sizeof(T) * 8;
374 
375   Test2Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
376                  reinterpret_cast<uintptr_t>(results), bits);
377 
378   if (Test::sim_test_trace()) {
379     // Print the results.
380     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
381     for (unsigned d = 0; d < results_length; d++) {
382       printf("  0x%0*" PRIx64 ",\n",
383              bits / 4, static_cast<uint64_t>(results[d]));
384     }
385     printf("};\n");
386     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
387   } else {
388     // Check the results.
389     VIXL_CHECK(expected_length == results_length);
390     unsigned error_count = 0;
391     unsigned d = 0;
392     for (unsigned n = 0; n < inputs_length; n++) {
393       for (unsigned m = 0; m < inputs_length; m++, d++) {
394         if (results[d] != expected[d]) {
395           if (++error_count > kErrorReportLimit) continue;
396 
397           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
398                  name,
399                  bits / 4, static_cast<uint64_t>(inputs[n]),
400                  bits / 4, static_cast<uint64_t>(inputs[m]),
401                  name,
402                  rawbits_to_fp(inputs[n]),
403                  rawbits_to_fp(inputs[m]));
404           printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
405                  bits / 4, static_cast<uint64_t>(expected[d]),
406                  rawbits_to_fp(expected[d]));
407           printf("  Found:    0x%0*" PRIx64 " (%g)\n",
408                  bits / 4, static_cast<uint64_t>(results[d]),
409                  rawbits_to_fp(results[d]));
410           printf("\n");
411         }
412       }
413     }
414     VIXL_ASSERT(d == expected_length);
415     if (error_count > kErrorReportLimit) {
416       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
417     }
418     VIXL_CHECK(error_count == 0);
419   }
420   delete[] results;
421 }
422 
423 
Test3Op_Helper(Test3OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)424 static void Test3Op_Helper(Test3OpFPHelper_t helper,
425                            uintptr_t inputs, unsigned inputs_length,
426                            uintptr_t results, unsigned reg_size) {
427   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
428 
429   SETUP();
430   START();
431 
432   // Roll up the loop to keep the code size down.
433   Label loop_n, loop_m, loop_a;
434 
435   Register out = x0;
436   Register inputs_base = x1;
437   Register length = w2;
438   Register index_n = w3;
439   Register index_m = w4;
440   Register index_a = w5;
441 
442   bool double_op = reg_size == kDRegSize;
443   const int index_shift =
444       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
445 
446   FPRegister fd = double_op ? d0 : s0;
447   FPRegister fn = double_op ? d1 : s1;
448   FPRegister fm = double_op ? d2 : s2;
449   FPRegister fa = double_op ? d3 : s3;
450 
451   __ Mov(out, results);
452   __ Mov(inputs_base, inputs);
453   __ Mov(length, inputs_length);
454 
455   __ Mov(index_n, 0);
456   __ Bind(&loop_n);
457   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
458 
459   __ Mov(index_m, 0);
460   __ Bind(&loop_m);
461   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
462 
463   __ Mov(index_a, 0);
464   __ Bind(&loop_a);
465   __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
466 
467   {
468     SingleEmissionCheckScope guard(&masm);
469     (masm.*helper)(fd, fn, fm, fa);
470   }
471   __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
472 
473   __ Add(index_a, index_a, 1);
474   __ Cmp(index_a, inputs_length);
475   __ B(lo, &loop_a);
476 
477   __ Add(index_m, index_m, 1);
478   __ Cmp(index_m, inputs_length);
479   __ B(lo, &loop_m);
480 
481   __ Add(index_n, index_n, 1);
482   __ Cmp(index_n, inputs_length);
483   __ B(lo, &loop_n);
484 
485   END();
486   RUN();
487   TEARDOWN();
488 }
489 
490 
491 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
492 // rawbits representations of doubles or floats. This ensures that exact bit
493 // comparisons can be performed.
494 template <typename T>
Test3Op(const char * name,Test3OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)495 static void Test3Op(const char * name, Test3OpFPHelper_t helper,
496                     const T inputs[], unsigned inputs_length,
497                     const T expected[], unsigned expected_length) {
498   VIXL_ASSERT(inputs_length > 0);
499 
500   const unsigned results_length = inputs_length * inputs_length * inputs_length;
501   T * results = new T[results_length];
502 
503   const unsigned bits = sizeof(T) * 8;
504 
505   Test3Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
506                  reinterpret_cast<uintptr_t>(results), bits);
507 
508   if (Test::sim_test_trace()) {
509     // Print the results.
510     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
511     for (unsigned d = 0; d < results_length; d++) {
512       printf("  0x%0*" PRIx64 ",\n",
513              bits / 4, static_cast<uint64_t>(results[d]));
514     }
515     printf("};\n");
516     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
517   } else {
518     // Check the results.
519     VIXL_CHECK(expected_length == results_length);
520     unsigned error_count = 0;
521     unsigned d = 0;
522     for (unsigned n = 0; n < inputs_length; n++) {
523       for (unsigned m = 0; m < inputs_length; m++) {
524         for (unsigned a = 0; a < inputs_length; a++, d++) {
525           if (results[d] != expected[d]) {
526             if (++error_count > kErrorReportLimit) continue;
527 
528             printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
529                    " (%s %g %g %g):\n",
530                    name,
531                    bits / 4, static_cast<uint64_t>(inputs[n]),
532                    bits / 4, static_cast<uint64_t>(inputs[m]),
533                    bits / 4, static_cast<uint64_t>(inputs[a]),
534                    name,
535                    rawbits_to_fp(inputs[n]),
536                    rawbits_to_fp(inputs[m]),
537                    rawbits_to_fp(inputs[a]));
538             printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
539                    bits / 4, static_cast<uint64_t>(expected[d]),
540                    rawbits_to_fp(expected[d]));
541             printf("  Found:    0x%0*" PRIx64 " (%g)\n",
542                    bits / 4, static_cast<uint64_t>(results[d]),
543                    rawbits_to_fp(results[d]));
544             printf("\n");
545           }
546         }
547       }
548     }
549     VIXL_ASSERT(d == expected_length);
550     if (error_count > kErrorReportLimit) {
551       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
552     }
553     VIXL_CHECK(error_count == 0);
554   }
555   delete[] results;
556 }
557 
558 
TestCmp_Helper(TestFPCmpHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)559 static void TestCmp_Helper(TestFPCmpHelper_t helper,
560                            uintptr_t inputs, unsigned inputs_length,
561                            uintptr_t results, unsigned reg_size) {
562   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
563 
564   SETUP();
565   START();
566 
567   // Roll up the loop to keep the code size down.
568   Label loop_n, loop_m;
569 
570   Register out = x0;
571   Register inputs_base = x1;
572   Register length = w2;
573   Register index_n = w3;
574   Register index_m = w4;
575   Register flags = x5;
576 
577   bool double_op = reg_size == kDRegSize;
578   const int index_shift =
579       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
580 
581   FPRegister fn = double_op ? d1 : s1;
582   FPRegister fm = double_op ? d2 : s2;
583 
584   __ Mov(out, results);
585   __ Mov(inputs_base, inputs);
586   __ Mov(length, inputs_length);
587 
588   __ Mov(index_n, 0);
589   __ Bind(&loop_n);
590   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
591 
592   __ Mov(index_m, 0);
593   __ Bind(&loop_m);
594   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
595 
596   {
597     SingleEmissionCheckScope guard(&masm);
598     (masm.*helper)(fn, fm);
599   }
600   __ Mrs(flags, NZCV);
601   __ Ubfx(flags, flags, 28, 4);
602   __ Strb(flags, MemOperand(out, 1, PostIndex));
603 
604   __ Add(index_m, index_m, 1);
605   __ Cmp(index_m, inputs_length);
606   __ B(lo, &loop_m);
607 
608   __ Add(index_n, index_n, 1);
609   __ Cmp(index_n, inputs_length);
610   __ B(lo, &loop_n);
611 
612   END();
613   RUN();
614   TEARDOWN();
615 }
616 
617 
618 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
619 // rawbits representations of doubles or floats. This ensures that exact bit
620 // comparisons can be performed.
621 template <typename T>
TestCmp(const char * name,TestFPCmpHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)622 static void TestCmp(const char * name, TestFPCmpHelper_t helper,
623                     const T inputs[], unsigned inputs_length,
624                     const uint8_t expected[], unsigned expected_length) {
625   VIXL_ASSERT(inputs_length > 0);
626 
627   const unsigned results_length = inputs_length * inputs_length;
628   uint8_t * results = new uint8_t[results_length];
629 
630   const unsigned bits = sizeof(T) * 8;
631 
632   TestCmp_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
633                  reinterpret_cast<uintptr_t>(results), bits);
634 
635   if (Test::sim_test_trace()) {
636     // Print the results.
637     printf("const uint8_t kExpected_%s[] = {\n", name);
638     for (unsigned d = 0; d < results_length; d++) {
639       // Each NZCV result only requires 4 bits.
640       VIXL_ASSERT((results[d] & 0xf) == results[d]);
641       printf("  0x%" PRIx8 ",\n", results[d]);
642     }
643     printf("};\n");
644     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
645   } else {
646     // Check the results.
647     VIXL_CHECK(expected_length == results_length);
648     unsigned error_count = 0;
649     unsigned d = 0;
650     for (unsigned n = 0; n < inputs_length; n++) {
651       for (unsigned m = 0; m < inputs_length; m++, d++) {
652         if (results[d] != expected[d]) {
653           if (++error_count > kErrorReportLimit) continue;
654 
655           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
656                  name,
657                  bits / 4, static_cast<uint64_t>(inputs[n]),
658                  bits / 4, static_cast<uint64_t>(inputs[m]),
659                  name,
660                  rawbits_to_fp(inputs[n]),
661                  rawbits_to_fp(inputs[m]));
662           printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
663                  (expected[d] & 0x8) ? 'N' : 'n',
664                  (expected[d] & 0x4) ? 'Z' : 'z',
665                  (expected[d] & 0x2) ? 'C' : 'c',
666                  (expected[d] & 0x1) ? 'V' : 'v',
667                  expected[d]);
668           printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
669                  (results[d] & 0x8) ? 'N' : 'n',
670                  (results[d] & 0x4) ? 'Z' : 'z',
671                  (results[d] & 0x2) ? 'C' : 'c',
672                  (results[d] & 0x1) ? 'V' : 'v',
673                  results[d]);
674           printf("\n");
675         }
676       }
677     }
678     VIXL_ASSERT(d == expected_length);
679     if (error_count > kErrorReportLimit) {
680       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
681     }
682     VIXL_CHECK(error_count == 0);
683   }
684   delete[] results;
685 }
686 
687 
TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)688 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
689                                uintptr_t inputs, unsigned inputs_length,
690                                uintptr_t results, unsigned reg_size) {
691   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
692 
693   SETUP();
694   START();
695 
696   // Roll up the loop to keep the code size down.
697   Label loop_n, loop_m;
698 
699   Register out = x0;
700   Register inputs_base = x1;
701   Register length = w2;
702   Register index_n = w3;
703   Register flags = x4;
704 
705   bool double_op = reg_size == kDRegSize;
706   const int index_shift =
707       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
708 
709   FPRegister fn = double_op ? d1 : s1;
710 
711   __ Mov(out, results);
712   __ Mov(inputs_base, inputs);
713   __ Mov(length, inputs_length);
714 
715   __ Mov(index_n, 0);
716   __ Bind(&loop_n);
717   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
718 
719   {
720     SingleEmissionCheckScope guard(&masm);
721     (masm.*helper)(fn, 0.0);
722   }
723   __ Mrs(flags, NZCV);
724   __ Ubfx(flags, flags, 28, 4);
725   __ Strb(flags, MemOperand(out, 1, PostIndex));
726 
727   __ Add(index_n, index_n, 1);
728   __ Cmp(index_n, inputs_length);
729   __ B(lo, &loop_n);
730 
731   END();
732   RUN();
733   TEARDOWN();
734 }
735 
736 
737 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
738 // rawbits representations of doubles or floats. This ensures that exact bit
739 // comparisons can be performed.
740 template <typename T>
TestCmpZero(const char * name,TestFPCmpZeroHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)741 static void TestCmpZero(const char * name, TestFPCmpZeroHelper_t helper,
742                         const T inputs[], unsigned inputs_length,
743                         const uint8_t expected[], unsigned expected_length) {
744   VIXL_ASSERT(inputs_length > 0);
745 
746   const unsigned results_length = inputs_length;
747   uint8_t * results = new uint8_t[results_length];
748 
749   const unsigned bits = sizeof(T) * 8;
750 
751   TestCmpZero_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
752                      reinterpret_cast<uintptr_t>(results), bits);
753 
754   if (Test::sim_test_trace()) {
755     // Print the results.
756     printf("const uint8_t kExpected_%s[] = {\n", name);
757     for (unsigned d = 0; d < results_length; d++) {
758       // Each NZCV result only requires 4 bits.
759       VIXL_ASSERT((results[d] & 0xf) == results[d]);
760       printf("  0x%" PRIx8 ",\n", results[d]);
761     }
762     printf("};\n");
763     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
764   } else {
765     // Check the results.
766     VIXL_CHECK(expected_length == results_length);
767     unsigned error_count = 0;
768     unsigned d = 0;
769     for (unsigned n = 0; n < inputs_length; n++, d++) {
770       if (results[d] != expected[d]) {
771         if (++error_count > kErrorReportLimit) continue;
772 
773         printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
774                name,
775                bits / 4, static_cast<uint64_t>(inputs[n]),
776                bits / 4, 0,
777                name,
778                rawbits_to_fp(inputs[n]));
779         printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
780                (expected[d] & 0x8) ? 'N' : 'n',
781                (expected[d] & 0x4) ? 'Z' : 'z',
782                (expected[d] & 0x2) ? 'C' : 'c',
783                (expected[d] & 0x1) ? 'V' : 'v',
784                expected[d]);
785         printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
786                (results[d] & 0x8) ? 'N' : 'n',
787                (results[d] & 0x4) ? 'Z' : 'z',
788                (results[d] & 0x2) ? 'C' : 'c',
789                (results[d] & 0x1) ? 'V' : 'v',
790                results[d]);
791         printf("\n");
792       }
793     }
794     VIXL_ASSERT(d == expected_length);
795     if (error_count > kErrorReportLimit) {
796       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
797     }
798     VIXL_CHECK(error_count == 0);
799   }
800   delete[] results;
801 }
802 
803 
TestFPToFixed_Helper(TestFPToFixedHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size)804 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
805                                  uintptr_t inputs, unsigned inputs_length,
806                                  uintptr_t results,
807                                  unsigned d_size, unsigned n_size) {
808   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
809   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
810 
811   SETUP();
812   START();
813 
814   // Roll up the loop to keep the code size down.
815   Label loop_n;
816 
817   Register out = x0;
818   Register inputs_base = x1;
819   Register length = w2;
820   Register index_n = w3;
821 
822   const int n_index_shift =
823       (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
824 
825   Register rd = (d_size == kXRegSize) ? x10 : w10;
826   FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
827 
828   __ Mov(out, results);
829   __ Mov(inputs_base, inputs);
830   __ Mov(length, inputs_length);
831 
832   __ Mov(index_n, 0);
833   __ Bind(&loop_n);
834   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
835 
836   for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
837     {
838       SingleEmissionCheckScope guard(&masm);
839       (masm.*helper)(rd, fn, fbits);
840     }
841     __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex));
842   }
843 
844   __ Add(index_n, index_n, 1);
845   __ Cmp(index_n, inputs_length);
846   __ B(lo, &loop_n);
847 
848   END();
849   RUN();
850   TEARDOWN();
851 }
852 
853 
TestFPToInt_Helper(TestFPToIntHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size)854 static void TestFPToInt_Helper(TestFPToIntHelper_t helper, uintptr_t inputs,
855                                unsigned inputs_length, uintptr_t results,
856                                unsigned d_size, unsigned n_size) {
857   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
858   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
859 
860   SETUP();
861   START();
862 
863   // Roll up the loop to keep the code size down.
864   Label loop_n;
865 
866   Register out = x0;
867   Register inputs_base = x1;
868   Register length = w2;
869   Register index_n = w3;
870 
871   const int n_index_shift =
872       (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
873 
874   Register rd = (d_size == kXRegSize) ? x10 : w10;
875   FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
876 
877   __ Mov(out, results);
878   __ Mov(inputs_base, inputs);
879   __ Mov(length, inputs_length);
880 
881   __ Mov(index_n, 0);
882   __ Bind(&loop_n);
883   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
884 
885   {
886     SingleEmissionCheckScope guard(&masm);
887     (masm.*helper)(rd, fn);
888   }
889   __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex));
890 
891   __ Add(index_n, index_n, 1);
892   __ Cmp(index_n, inputs_length);
893   __ B(lo, &loop_n);
894 
895   END();
896   RUN();
897   TEARDOWN();
898 }
899 
900 
901 // Test FP instructions.
902 //  - The inputs[] array should be an array of rawbits representations of
903 //    doubles or floats. This ensures that exact bit comparisons can be
904 //    performed.
905 //  - The expected[] array should be an array of signed integers.
906 template <typename Tn, typename Td>
TestFPToS(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)907 static void TestFPToS(const char * name, TestFPToIntHelper_t helper,
908                       const Tn inputs[], unsigned inputs_length,
909                       const Td expected[], unsigned expected_length) {
910   VIXL_ASSERT(inputs_length > 0);
911 
912   const unsigned results_length = inputs_length;
913   Td * results = new Td[results_length];
914 
915   const unsigned d_bits = sizeof(Td) * 8;
916   const unsigned n_bits = sizeof(Tn) * 8;
917 
918   TestFPToInt_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
919                      reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
920 
921   if (Test::sim_test_trace()) {
922     // Print the results.
923     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
924     // There is no simple C++ literal for INT*_MIN that doesn't produce
925     // warnings, so we use an appropriate constant in that case instead.
926     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
927     // the like) avoids warnings about comparing values with differing ranges.
928     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
929     const int64_t int_d_min = -(int_d_max) - 1;
930     for (unsigned d = 0; d < results_length; d++) {
931       if (results[d] == int_d_min) {
932         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
933       } else {
934         // Some constants (such as those between INT32_MAX and UINT32_MAX)
935         // trigger compiler warnings. To avoid these warnings, use an
936         // appropriate macro to make the type explicit.
937         int64_t result_int64 = static_cast<int64_t>(results[d]);
938         if (result_int64 >= 0) {
939           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
940         } else {
941           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
942         }
943       }
944     }
945     printf("};\n");
946     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
947   } else {
948     // Check the results.
949     VIXL_CHECK(expected_length == results_length);
950     unsigned error_count = 0;
951     unsigned d = 0;
952     for (unsigned n = 0; n < inputs_length; n++, d++) {
953       if (results[d] != expected[d]) {
954         if (++error_count > kErrorReportLimit) continue;
955 
956         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
957                name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
958                name, rawbits_to_fp(inputs[n]));
959         printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
960                d_bits / 4, static_cast<uint64_t>(expected[d]),
961                static_cast<int64_t>(expected[d]));
962         printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
963                d_bits / 4, static_cast<uint64_t>(results[d]),
964                static_cast<int64_t>(results[d]));
965         printf("\n");
966       }
967     }
968     VIXL_ASSERT(d == expected_length);
969     if (error_count > kErrorReportLimit) {
970       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
971     }
972     VIXL_CHECK(error_count == 0);
973   }
974   delete[] results;
975 }
976 
977 
978 // Test FP instructions.
979 //  - The inputs[] array should be an array of rawbits representations of
980 //    doubles or floats. This ensures that exact bit comparisons can be
981 //    performed.
982 //  - The expected[] array should be an array of unsigned integers.
983 template <typename Tn, typename Td>
TestFPToU(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)984 static void TestFPToU(const char * name, TestFPToIntHelper_t helper,
985                       const Tn inputs[], unsigned inputs_length,
986                       const Td expected[], unsigned expected_length) {
987   VIXL_ASSERT(inputs_length > 0);
988 
989   const unsigned results_length = inputs_length;
990   Td * results = new Td[results_length];
991 
992   const unsigned d_bits = sizeof(Td) * 8;
993   const unsigned n_bits = sizeof(Tn) * 8;
994 
995   TestFPToInt_Helper(helper,
996                      reinterpret_cast<uintptr_t>(inputs), inputs_length,
997                      reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
998 
999   if (Test::sim_test_trace()) {
1000     // Print the results.
1001     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1002     for (unsigned d = 0; d < results_length; d++) {
1003       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1004     }
1005     printf("};\n");
1006     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1007   } else {
1008     // Check the results.
1009     VIXL_CHECK(expected_length == results_length);
1010     unsigned error_count = 0;
1011     unsigned d = 0;
1012     for (unsigned n = 0; n < inputs_length; n++, d++) {
1013       if (results[d] != expected[d]) {
1014         if (++error_count > kErrorReportLimit) continue;
1015 
1016         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1017                name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
1018                name, rawbits_to_fp(inputs[n]));
1019         printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1020                d_bits / 4, static_cast<uint64_t>(expected[d]),
1021                static_cast<uint64_t>(expected[d]));
1022         printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1023                d_bits / 4, static_cast<uint64_t>(results[d]),
1024                static_cast<uint64_t>(results[d]));
1025         printf("\n");
1026       }
1027     }
1028     VIXL_ASSERT(d == expected_length);
1029     if (error_count > kErrorReportLimit) {
1030       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1031     }
1032     VIXL_CHECK(error_count == 0);
1033   }
1034   delete[] results;
1035 }
1036 
1037 
1038 // Test FP instructions.
1039 //  - The inputs[] array should be an array of rawbits representations of
1040 //    doubles or floats. This ensures that exact bit comparisons can be
1041 //    performed.
1042 //  - The expected[] array should be an array of signed integers.
1043 template <typename Tn, typename Td>
TestFPToFixedS(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1044 static void TestFPToFixedS(const char * name, TestFPToFixedHelper_t helper,
1045                            const Tn inputs[], unsigned inputs_length,
1046                            const Td expected[], unsigned expected_length) {
1047   VIXL_ASSERT(inputs_length > 0);
1048 
1049   const unsigned d_bits = sizeof(Td) * 8;
1050   const unsigned n_bits = sizeof(Tn) * 8;
1051 
1052   const unsigned results_length = inputs_length * (d_bits + 1);
1053   Td * results = new Td[results_length];
1054 
1055   TestFPToFixed_Helper(helper,
1056                        reinterpret_cast<uintptr_t>(inputs), inputs_length,
1057                        reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
1058 
1059   if (Test::sim_test_trace()) {
1060     // Print the results.
1061     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1062     // There is no simple C++ literal for INT*_MIN that doesn't produce
1063     // warnings, so we use an appropriate constant in that case instead.
1064     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1065     // the like) avoids warnings about comparing values with differing ranges.
1066     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1067     const int64_t int_d_min = -(int_d_max) - 1;
1068     for (unsigned d = 0; d < results_length; d++) {
1069       if (results[d] == int_d_min) {
1070         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1071       } else {
1072         // Some constants (such as those between INT32_MAX and UINT32_MAX)
1073         // trigger compiler warnings. To avoid these warnings, use an
1074         // appropriate macro to make the type explicit.
1075         int64_t result_int64 = static_cast<int64_t>(results[d]);
1076         if (result_int64 >= 0) {
1077           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1078         } else {
1079           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1080         }
1081       }
1082     }
1083     printf("};\n");
1084     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1085   } else {
1086     // Check the results.
1087     VIXL_CHECK(expected_length == results_length);
1088     unsigned error_count = 0;
1089     unsigned d = 0;
1090     for (unsigned n = 0; n < inputs_length; n++) {
1091       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1092         if (results[d] != expected[d]) {
1093           if (++error_count > kErrorReportLimit) continue;
1094 
1095           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1096                  name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits,
1097                  name, rawbits_to_fp(inputs[n]), fbits);
1098           printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1099                  d_bits / 4, static_cast<uint64_t>(expected[d]),
1100                  static_cast<int64_t>(expected[d]));
1101           printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1102                  d_bits / 4, static_cast<uint64_t>(results[d]),
1103                  static_cast<int64_t>(results[d]));
1104           printf("\n");
1105         }
1106       }
1107     }
1108     VIXL_ASSERT(d == expected_length);
1109     if (error_count > kErrorReportLimit) {
1110       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1111     }
1112     VIXL_CHECK(error_count == 0);
1113   }
1114   delete[] results;
1115 }
1116 
1117 
1118 // Test FP instructions.
1119 //  - The inputs[] array should be an array of rawbits representations of
1120 //    doubles or floats. This ensures that exact bit comparisons can be
1121 //    performed.
1122 //  - The expected[] array should be an array of unsigned integers.
1123 template <typename Tn, typename Td>
TestFPToFixedU(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1124 static void TestFPToFixedU(const char * name, TestFPToFixedHelper_t helper,
1125                            const Tn inputs[], unsigned inputs_length,
1126                            const Td expected[], unsigned expected_length) {
1127   VIXL_ASSERT(inputs_length > 0);
1128 
1129   const unsigned d_bits = sizeof(Td) * 8;
1130   const unsigned n_bits = sizeof(Tn) * 8;
1131 
1132   const unsigned results_length = inputs_length * (d_bits + 1);
1133   Td * results = new Td[results_length];
1134 
1135   TestFPToFixed_Helper(helper,
1136                        reinterpret_cast<uintptr_t>(inputs), inputs_length,
1137                        reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
1138 
1139   if (Test::sim_test_trace()) {
1140     // Print the results.
1141     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1142     for (unsigned d = 0; d < results_length; d++) {
1143       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1144     }
1145     printf("};\n");
1146     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1147   } else {
1148     // Check the results.
1149     VIXL_CHECK(expected_length == results_length);
1150     unsigned error_count = 0;
1151     unsigned d = 0;
1152     for (unsigned n = 0; n < inputs_length; n++) {
1153       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1154         if (results[d] != expected[d]) {
1155           if (++error_count > kErrorReportLimit) continue;
1156 
1157           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1158                  name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits,
1159                  name, rawbits_to_fp(inputs[n]), fbits);
1160           printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1161                  d_bits / 4, static_cast<uint64_t>(expected[d]),
1162                  static_cast<uint64_t>(expected[d]));
1163           printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1164                  d_bits / 4, static_cast<uint64_t>(results[d]),
1165                  static_cast<uint64_t>(results[d]));
1166           printf("\n");
1167         }
1168       }
1169     }
1170     VIXL_ASSERT(d == expected_length);
1171     if (error_count > kErrorReportLimit) {
1172       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1173     }
1174     VIXL_CHECK(error_count == 0);
1175   }
1176   delete[] results;
1177 }
1178 
1179 
1180 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
1181 
1182 
Test1OpNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)1183 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1184                                uintptr_t inputs_n, unsigned inputs_n_length,
1185                                uintptr_t results,
1186                                VectorFormat vd_form,
1187                                VectorFormat vn_form) {
1188   VIXL_ASSERT(vd_form != kFormatUndefined);
1189   VIXL_ASSERT(vn_form != kFormatUndefined);
1190 
1191   SETUP();
1192   START();
1193 
1194   // Roll up the loop to keep the code size down.
1195   Label loop_n;
1196 
1197   Register out = x0;
1198   Register inputs_n_base = x1;
1199   Register inputs_n_last_16bytes = x3;
1200   Register index_n = x5;
1201 
1202   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1203   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1204   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1205 
1206   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1207   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1208   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1209   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1210   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1211 
1212 
1213   // These will be either a D- or a Q-register form, with a single lane
1214   // (for use in scalar load and store operations).
1215   VRegister vd = VRegister(0, vd_bits);
1216   VRegister vn = v1.V16B();
1217   VRegister vntmp = v3.V16B();
1218 
1219   // These will have the correct format for use when calling 'helper'.
1220   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1221   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1222 
1223   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1224   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1225 
1226   __ Mov(out, results);
1227 
1228   __ Mov(inputs_n_base, inputs_n);
1229   __ Mov(inputs_n_last_16bytes,
1230          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1231 
1232   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1233 
1234   __ Mov(index_n, 0);
1235   __ Bind(&loop_n);
1236 
1237   __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1238                                   vn_lane_bytes_log2));
1239   __ Ext(vn, vn, vntmp, vn_lane_bytes);
1240 
1241   // Set the destination to zero.
1242   // TODO: Setting the destination to values other than zero
1243   //       might be a better test for instructions such as sqxtn2
1244   //       which may leave parts of V registers unchanged.
1245   __ Movi(vd.V16B(), 0);
1246 
1247   {
1248     SingleEmissionCheckScope guard(&masm);
1249     (masm.*helper)(vd_helper, vn_helper);
1250   }
1251   __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
1252 
1253   __ Add(index_n, index_n, 1);
1254   __ Cmp(index_n, inputs_n_length);
1255   __ B(lo, &loop_n);
1256 
1257   END();
1258   RUN();
1259   TEARDOWN();
1260 }
1261 
1262 
1263 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1264 // arrays of rawbit representation of input values. This ensures that
1265 // exact bit comparisons can be performed.
1266 template <typename Td, typename Tn>
Test1OpNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1267 static void Test1OpNEON(const char * name, Test1OpNEONHelper_t helper,
1268                         const Tn inputs_n[], unsigned inputs_n_length,
1269                         const Td expected[], unsigned expected_length,
1270                         VectorFormat vd_form,
1271                         VectorFormat vn_form) {
1272   VIXL_ASSERT(inputs_n_length > 0);
1273 
1274   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1275   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1276   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1277 
1278   const unsigned results_length = inputs_n_length;
1279   Td* results = new Td[results_length * vd_lane_count];
1280   const unsigned lane_bit = sizeof(Td) * 8;
1281   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1282 
1283   Test1OpNEON_Helper(helper,
1284                      reinterpret_cast<uintptr_t>(inputs_n),
1285                      inputs_n_length,
1286                      reinterpret_cast<uintptr_t>(results),
1287                      vd_form, vn_form);
1288 
1289   if (Test::sim_test_trace()) {
1290     // Print the results.
1291     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1292     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1293       printf(" ");
1294       // Output a separate result for each element of the result vector.
1295       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1296         unsigned index = lane + (iteration * vd_lane_count);
1297         printf(" 0x%0*" PRIx64 ",",
1298                lane_len_in_hex,
1299                static_cast<uint64_t>(results[index]));
1300       }
1301       printf("\n");
1302     }
1303 
1304     printf("};\n");
1305     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1306            name,
1307            results_length);
1308   } else {
1309     // Check the results.
1310     VIXL_CHECK(expected_length == results_length);
1311     unsigned error_count = 0;
1312     unsigned d = 0;
1313     const char* padding = "                    ";
1314     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1315     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1316       bool error_in_vector = false;
1317 
1318       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1319         unsigned output_index = (n * vd_lane_count) + lane;
1320 
1321         if (results[output_index] != expected[output_index]) {
1322           error_in_vector = true;
1323           break;
1324         }
1325       }
1326 
1327       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1328         printf("%s\n", name);
1329         printf(" Vn%.*s| Vd%.*s| Expected\n",
1330                 lane_len_in_hex+1, padding,
1331                 lane_len_in_hex+1, padding);
1332 
1333         const unsigned first_index_n =
1334           inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1335 
1336         for (unsigned lane = 0;
1337              lane < std::max(vd_lane_count, vn_lane_count);
1338              lane++) {
1339           unsigned output_index = (n * vd_lane_count) + lane;
1340           unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1341 
1342           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
1343                   "| 0x%0*" PRIx64 "\n",
1344                   results[output_index] != expected[output_index] ? '*' : ' ',
1345                   lane_len_in_hex,
1346                   static_cast<uint64_t>(inputs_n[input_index_n]),
1347                   lane_len_in_hex,
1348                   static_cast<uint64_t>(results[output_index]),
1349                   lane_len_in_hex,
1350                   static_cast<uint64_t>(expected[output_index]));
1351         }
1352       }
1353     }
1354     VIXL_ASSERT(d == expected_length);
1355     if (error_count > kErrorReportLimit) {
1356       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1357     }
1358     VIXL_CHECK(error_count == 0);
1359   }
1360   delete[] results;
1361 }
1362 
1363 
1364 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1365 //      where <V> is one of B, H, S or D registers.
1366 //      e.g. saddlv H1, v0.8B
1367 
1368 // TODO: Change tests to store all lanes of the resulting V register.
1369 //       Some tests store all 128 bits of the resulting V register to
1370 //       check the simulator's behaviour on the rest of the register.
1371 //       This is better than storing the affected lanes only.
1372 //       Change any tests such as the 'Across' template to do the same.
1373 
Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)1374 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1375                                      uintptr_t inputs_n,
1376                                      unsigned inputs_n_length,
1377                                      uintptr_t results,
1378                                      VectorFormat vd_form,
1379                                      VectorFormat vn_form) {
1380   VIXL_ASSERT(vd_form != kFormatUndefined);
1381   VIXL_ASSERT(vn_form != kFormatUndefined);
1382 
1383   SETUP();
1384   START();
1385 
1386   // Roll up the loop to keep the code size down.
1387   Label loop_n;
1388 
1389   Register out = x0;
1390   Register inputs_n_base = x1;
1391   Register inputs_n_last_vector = x3;
1392   Register index_n = x5;
1393 
1394   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1395   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1396 
1397   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1398   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1399   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1400   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1401   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1402 
1403 
1404   // These will be either a D- or a Q-register form, with a single lane
1405   // (for use in scalar load and store operations).
1406   VRegister vd = VRegister(0, vd_bits);
1407   VRegister vn = VRegister(1, vn_bits);
1408   VRegister vntmp = VRegister(3, vn_bits);
1409 
1410   // These will have the correct format for use when calling 'helper'.
1411   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1412 
1413   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1414   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1415 
1416   // Same registers for use in the 'ext' instructions.
1417   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1418   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1419 
1420   __ Mov(out, results);
1421 
1422   __ Mov(inputs_n_base, inputs_n);
1423   __ Mov(inputs_n_last_vector,
1424          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1425 
1426   __ Ldr(vn, MemOperand(inputs_n_last_vector));
1427 
1428   __ Mov(index_n, 0);
1429   __ Bind(&loop_n);
1430 
1431   __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1432                                   vn_lane_bytes_log2));
1433   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1434 
1435   // Set the destination to zero for tests such as '[r]shrn2'.
1436   // TODO: Setting the destination to values other than zero
1437   //       might be a better test for instructions such as sqxtn2
1438   //       which may leave parts of V registers unchanged.
1439   __ Movi(vd.V16B(), 0);
1440 
1441   {
1442     SingleEmissionCheckScope guard(&masm);
1443     (masm.*helper)(vd, vn_helper);
1444   }
1445   __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
1446 
1447   __ Add(index_n, index_n, 1);
1448   __ Cmp(index_n, inputs_n_length);
1449   __ B(lo, &loop_n);
1450 
1451   END();
1452   RUN();
1453   TEARDOWN();
1454 }
1455 
1456 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1457 // arrays of rawbit representation of input values. This ensures that
1458 // exact bit comparisons can be performed.
1459 template <typename Td, typename Tn>
Test1OpAcrossNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1460 static void Test1OpAcrossNEON(const char * name, Test1OpNEONHelper_t helper,
1461                               const Tn inputs_n[], unsigned inputs_n_length,
1462                               const Td expected[], unsigned expected_length,
1463                               VectorFormat vd_form,
1464                               VectorFormat vn_form) {
1465   VIXL_ASSERT(inputs_n_length > 0);
1466 
1467   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1468 
1469   const unsigned results_length = inputs_n_length;
1470   Td* results = new Td[results_length * vd_lane_count];
1471   const unsigned lane_bit = sizeof(Td) * 8;
1472   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1473 
1474   Test1OpAcrossNEON_Helper(helper,
1475                            reinterpret_cast<uintptr_t>(inputs_n),
1476                            inputs_n_length,
1477                            reinterpret_cast<uintptr_t>(results),
1478                            vd_form, vn_form);
1479 
1480   if (Test::sim_test_trace()) {
1481     // Print the results.
1482     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1483     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1484       printf(" ");
1485       // Output a separate result for each element of the result vector.
1486       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1487         unsigned index = lane + (iteration * vd_lane_count);
1488         printf(" 0x%0*" PRIx64 ",",
1489                lane_len_in_hex,
1490                static_cast<uint64_t>(results[index]));
1491       }
1492       printf("\n");
1493     }
1494 
1495     printf("};\n");
1496     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1497            name,
1498            results_length);
1499   } else {
1500     // Check the results.
1501     VIXL_CHECK(expected_length == results_length);
1502     unsigned error_count = 0;
1503     unsigned d = 0;
1504     const char* padding = "                    ";
1505     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1506     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1507       bool error_in_vector = false;
1508 
1509       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1510         unsigned output_index = (n * vd_lane_count) + lane;
1511 
1512         if (results[output_index] != expected[output_index]) {
1513           error_in_vector = true;
1514           break;
1515         }
1516       }
1517 
1518       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1519         const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1520 
1521         printf("%s\n", name);
1522         printf(" Vn%.*s| Vd%.*s| Expected\n",
1523                 lane_len_in_hex+1, padding,
1524                 lane_len_in_hex+1, padding);
1525 
1526         // TODO: In case of an error, all tests print out as many elements as
1527         //       there are lanes in the output or input vectors. This way
1528         //       the viewer can read all the values that were needed for the
1529         //       operation but the output contains also unnecessary values.
1530         //       These prints can be improved according to the arguments
1531         //       passed to test functions.
1532         //       This output for the 'Across' category has the required
1533         //       modifications.
1534         for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1535           unsigned output_index = n * vd_lane_count;
1536           unsigned input_index_n = (inputs_n_length - vn_lane_count +
1537               n + 1 + lane) % inputs_n_length;
1538 
1539           if (vn_lane_count-1 == lane) {  // Is this the last lane?
1540             // Print the result element(s) in the last lane only.
1541             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
1542                   "| 0x%0*" PRIx64 "\n",
1543                   results[output_index] != expected[output_index] ? '*' : ' ',
1544                   lane_len_in_hex,
1545                   static_cast<uint64_t>(inputs_n[input_index_n]),
1546                   lane_len_in_hex,
1547                   static_cast<uint64_t>(results[output_index]),
1548                   lane_len_in_hex,
1549                   static_cast<uint64_t>(expected[output_index]));
1550           } else {
1551             printf(" 0x%0*" PRIx64 " |   %.*s|   %.*s\n",
1552                   lane_len_in_hex,
1553                   static_cast<uint64_t>(inputs_n[input_index_n]),
1554                   lane_len_in_hex+1, padding,
1555                   lane_len_in_hex+1, padding);
1556           }
1557         }
1558       }
1559     }
1560     VIXL_ASSERT(d == expected_length);
1561     if (error_count > kErrorReportLimit) {
1562       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1563     }
1564     VIXL_CHECK(error_count == 0);
1565   }
1566   delete[] results;
1567 }
1568 
1569 
1570 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1571 
1572 // TODO: Iterate over inputs_d once the traces file is split.
1573 
Test2OpNEON_Helper(Test2OpNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1574 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1575                                uintptr_t inputs_d,
1576                                uintptr_t inputs_n, unsigned inputs_n_length,
1577                                uintptr_t inputs_m, unsigned inputs_m_length,
1578                                uintptr_t results,
1579                                VectorFormat vd_form,
1580                                VectorFormat vn_form,
1581                                VectorFormat vm_form) {
1582   VIXL_ASSERT(vd_form != kFormatUndefined);
1583   VIXL_ASSERT(vn_form != kFormatUndefined);
1584   VIXL_ASSERT(vm_form != kFormatUndefined);
1585 
1586   SETUP();
1587   START();
1588 
1589   // Roll up the loop to keep the code size down.
1590   Label loop_n, loop_m;
1591 
1592   Register out = x0;
1593   Register inputs_n_base = x1;
1594   Register inputs_m_base = x2;
1595   Register inputs_d_base = x3;
1596   Register inputs_n_last_16bytes = x4;
1597   Register inputs_m_last_16bytes = x5;
1598   Register index_n = x6;
1599   Register index_m = x7;
1600 
1601   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1602   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1603   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1604 
1605   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1606   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1607   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1608   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1609   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1610 
1611   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1612   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1613   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1614   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1615   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1616 
1617 
1618   // Always load and store 128 bits regardless of the format.
1619   VRegister vd = v0.V16B();
1620   VRegister vn = v1.V16B();
1621   VRegister vm = v2.V16B();
1622   VRegister vntmp = v3.V16B();
1623   VRegister vmtmp = v4.V16B();
1624   VRegister vres = v5.V16B();
1625 
1626   // These will have the correct format for calling the 'helper'.
1627   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1628   VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1629   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1630 
1631   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1632   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1633   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1634 
1635   __ Mov(out, results);
1636 
1637   __ Mov(inputs_d_base, inputs_d);
1638 
1639   __ Mov(inputs_n_base, inputs_n);
1640   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1641   __ Mov(inputs_m_base, inputs_m);
1642   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1643 
1644   __ Ldr(vd, MemOperand(inputs_d_base));
1645   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1646   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1647 
1648   __ Mov(index_n, 0);
1649   __ Bind(&loop_n);
1650 
1651   __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1652                                   vn_lane_bytes_log2));
1653   __ Ext(vn, vn, vntmp, vn_lane_bytes);
1654 
1655   __ Mov(index_m, 0);
1656   __ Bind(&loop_m);
1657 
1658   __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL,
1659                                   vm_lane_bytes_log2));
1660   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1661 
1662   __ Mov(vres, vd);
1663   {
1664     SingleEmissionCheckScope guard(&masm);
1665     (masm.*helper)(vres_helper, vn_helper, vm_helper);
1666   }
1667   __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
1668 
1669   __ Add(index_m, index_m, 1);
1670   __ Cmp(index_m, inputs_m_length);
1671   __ B(lo, &loop_m);
1672 
1673   __ Add(index_n, index_n, 1);
1674   __ Cmp(index_n, inputs_n_length);
1675   __ B(lo, &loop_n);
1676 
1677   END();
1678   RUN();
1679   TEARDOWN();
1680 }
1681 
1682 
1683 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1684 // arrays of rawbit representation of input values. This ensures that
1685 // exact bit comparisons can be performed.
1686 template <typename Td, typename Tn, typename Tm>
Test2OpNEON(const char * name,Test2OpNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1687 static void Test2OpNEON(const char * name, Test2OpNEONHelper_t helper,
1688                         const Td inputs_d[],
1689                         const Tn inputs_n[], unsigned inputs_n_length,
1690                         const Tm inputs_m[], unsigned inputs_m_length,
1691                         const Td expected[], unsigned expected_length,
1692                         VectorFormat vd_form,
1693                         VectorFormat vn_form,
1694                         VectorFormat vm_form) {
1695   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
1696 
1697   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
1698 
1699   const unsigned results_length = inputs_n_length * inputs_m_length;
1700   Td* results = new Td[results_length * vd_lane_count];
1701   const unsigned lane_bit = sizeof(Td) * 8;
1702   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
1703 
1704   Test2OpNEON_Helper(helper,
1705                      reinterpret_cast<uintptr_t>(inputs_d),
1706                      reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
1707                      reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
1708                      reinterpret_cast<uintptr_t>(results),
1709                      vd_form, vn_form, vm_form);
1710 
1711   if (Test::sim_test_trace()) {
1712     // Print the results.
1713     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1714     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1715       printf(" ");
1716       // Output a separate result for each element of the result vector.
1717       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1718         unsigned index = lane + (iteration * vd_lane_count);
1719         printf(" 0x%0*" PRIx64 ",",
1720                lane_len_in_hex,
1721                static_cast<uint64_t>(results[index]));
1722       }
1723       printf("\n");
1724     }
1725 
1726     printf("};\n");
1727     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1728            name,
1729            results_length);
1730   } else {
1731     // Check the results.
1732     VIXL_CHECK(expected_length == results_length);
1733     unsigned error_count = 0;
1734     unsigned d = 0;
1735     const char* padding = "                    ";
1736     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1737     for (unsigned n = 0; n < inputs_n_length; n++) {
1738       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
1739         bool error_in_vector = false;
1740 
1741         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1742           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1743               (m * vd_lane_count) + lane;
1744 
1745           if (results[output_index] != expected[output_index]) {
1746             error_in_vector = true;
1747             break;
1748           }
1749         }
1750 
1751         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1752           printf("%s\n", name);
1753           printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
1754                  lane_len_in_hex+1, padding,
1755                  lane_len_in_hex+1, padding,
1756                  lane_len_in_hex+1, padding,
1757                  lane_len_in_hex+1, padding);
1758 
1759           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1760             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1761                 (m * vd_lane_count) + lane;
1762             unsigned input_index_n = (inputs_n_length - vd_lane_count +
1763                 n + 1 + lane) % inputs_n_length;
1764             unsigned input_index_m = (inputs_m_length - vd_lane_count +
1765                 m + 1 + lane) % inputs_m_length;
1766 
1767             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
1768                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1769                    results[output_index] != expected[output_index] ? '*' : ' ',
1770                    lane_len_in_hex,
1771                    static_cast<uint64_t>(inputs_d[lane]),
1772                    lane_len_in_hex,
1773                    static_cast<uint64_t>(inputs_n[input_index_n]),
1774                    lane_len_in_hex,
1775                    static_cast<uint64_t>(inputs_m[input_index_m]),
1776                    lane_len_in_hex,
1777                    static_cast<uint64_t>(results[output_index]),
1778                    lane_len_in_hex,
1779                    static_cast<uint64_t>(expected[output_index]));
1780           }
1781         }
1782       }
1783     }
1784     VIXL_ASSERT(d == expected_length);
1785     if (error_count > kErrorReportLimit) {
1786       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1787     }
1788     VIXL_CHECK(error_count == 0);
1789   }
1790   delete[] results;
1791 }
1792 
1793 
1794 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
1795 
TestByElementNEON_Helper(TestByElementNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,const int indices[],unsigned indices_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1796 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
1797                                      uintptr_t inputs_d,
1798                                      uintptr_t inputs_n,
1799                                      unsigned inputs_n_length,
1800                                      uintptr_t inputs_m,
1801                                      unsigned inputs_m_length,
1802                                      const int indices[],
1803                                      unsigned indices_length,
1804                                      uintptr_t results,
1805                                      VectorFormat vd_form,
1806                                      VectorFormat vn_form,
1807                                      VectorFormat vm_form) {
1808   VIXL_ASSERT(vd_form != kFormatUndefined);
1809   VIXL_ASSERT(vn_form != kFormatUndefined);
1810   VIXL_ASSERT(vm_form != kFormatUndefined);
1811 
1812   SETUP();
1813   START();
1814 
1815   // Roll up the loop to keep the code size down.
1816   Label loop_n, loop_m;
1817 
1818   Register out = x0;
1819   Register inputs_n_base = x1;
1820   Register inputs_m_base = x2;
1821   Register inputs_d_base = x3;
1822   Register inputs_n_last_16bytes = x4;
1823   Register inputs_m_last_16bytes = x5;
1824   Register index_n = x6;
1825   Register index_m = x7;
1826 
1827   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1828   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1829   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1830 
1831   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1832   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1833   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1834   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1835   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1836 
1837   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1838   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1839   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1840   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1841   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1842 
1843 
1844   // Always load and store 128 bits regardless of the format.
1845   VRegister vd = v0.V16B();
1846   VRegister vn = v1.V16B();
1847   VRegister vm = v2.V16B();
1848   VRegister vntmp = v3.V16B();
1849   VRegister vmtmp = v4.V16B();
1850   VRegister vres = v5.V16B();
1851 
1852   // These will have the correct format for calling the 'helper'.
1853   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1854   VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1855   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1856 
1857   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1858   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1859   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1860 
1861   __ Mov(out, results);
1862 
1863   __ Mov(inputs_d_base, inputs_d);
1864 
1865   __ Mov(inputs_n_base, inputs_n);
1866   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1867   __ Mov(inputs_m_base, inputs_m);
1868   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1869 
1870   __ Ldr(vd, MemOperand(inputs_d_base));
1871   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1872   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1873 
1874   __ Mov(index_n, 0);
1875   __ Bind(&loop_n);
1876 
1877   __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1878                                   vn_lane_bytes_log2));
1879   __ Ext(vn, vn, vntmp, vn_lane_bytes);
1880 
1881   __ Mov(index_m, 0);
1882   __ Bind(&loop_m);
1883 
1884   __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL,
1885                                   vm_lane_bytes_log2));
1886   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1887 
1888   __ Mov(vres, vd);
1889   {
1890     for (unsigned i = 0; i < indices_length; i++) {
1891       {
1892         SingleEmissionCheckScope guard(&masm);
1893         (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
1894       }
1895       __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
1896     }
1897   }
1898 
1899   __ Add(index_m, index_m, 1);
1900   __ Cmp(index_m, inputs_m_length);
1901   __ B(lo, &loop_m);
1902 
1903   __ Add(index_n, index_n, 1);
1904   __ Cmp(index_n, inputs_n_length);
1905   __ B(lo, &loop_n);
1906 
1907   END();
1908   RUN();
1909   TEARDOWN();
1910 }
1911 
1912 
1913 
1914 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1915 // arrays of rawbit representation of input values. This ensures that
1916 // exact bit comparisons can be performed.
1917 template <typename Td, typename Tn, typename Tm>
TestByElementNEON(const char * name,TestByElementNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const int indices[],unsigned indices_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1918 static void TestByElementNEON(const char *name,
1919                               TestByElementNEONHelper_t helper,
1920                               const Td inputs_d[],
1921                               const Tn inputs_n[], unsigned inputs_n_length,
1922                               const Tm inputs_m[], unsigned inputs_m_length,
1923                               const int indices[], unsigned indices_length,
1924                               const Td expected[], unsigned expected_length,
1925                               VectorFormat vd_form,
1926                               VectorFormat vn_form,
1927                               VectorFormat vm_form) {
1928   VIXL_ASSERT(inputs_n_length > 0);
1929   VIXL_ASSERT(inputs_m_length > 0);
1930   VIXL_ASSERT(indices_length > 0);
1931 
1932   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
1933 
1934   const unsigned results_length = inputs_n_length * inputs_m_length *
1935                                   indices_length;
1936   Td* results = new Td[results_length * vd_lane_count];
1937   const unsigned lane_bit = sizeof(Td) * 8;
1938   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
1939 
1940   TestByElementNEON_Helper(helper,
1941     reinterpret_cast<uintptr_t>(inputs_d),
1942     reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
1943     reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
1944     indices, indices_length,
1945     reinterpret_cast<uintptr_t>(results),
1946     vd_form, vn_form, vm_form);
1947 
1948   if (Test::sim_test_trace()) {
1949     // Print the results.
1950     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1951     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1952       printf(" ");
1953       // Output a separate result for each element of the result vector.
1954       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1955         unsigned index = lane + (iteration * vd_lane_count);
1956         printf(" 0x%0*" PRIx64 ",",
1957                lane_len_in_hex,
1958                static_cast<uint64_t>(results[index]));
1959       }
1960       printf("\n");
1961     }
1962 
1963     printf("};\n");
1964     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1965            name,
1966            results_length);
1967   } else {
1968     // Check the results.
1969     VIXL_CHECK(expected_length == results_length);
1970     unsigned error_count = 0;
1971     unsigned d = 0;
1972     const char* padding = "                    ";
1973     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1974     for (unsigned n = 0; n < inputs_n_length; n++) {
1975       for (unsigned m = 0; m < inputs_m_length; m++) {
1976         for (unsigned index = 0; index < indices_length; index++, d++) {
1977           bool error_in_vector = false;
1978 
1979           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1980             unsigned output_index =
1981                 (n * inputs_m_length * indices_length * vd_lane_count) +
1982                 (m * indices_length * vd_lane_count) +
1983                 (index * vd_lane_count) + lane;
1984 
1985             if (results[output_index] != expected[output_index]) {
1986               error_in_vector = true;
1987               break;
1988             }
1989           }
1990 
1991           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1992             printf("%s\n", name);
1993             printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
1994                   lane_len_in_hex+1, padding,
1995                   lane_len_in_hex+1, padding,
1996                   lane_len_in_hex+1, padding,
1997                   lane_len_in_hex+1, padding);
1998 
1999             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2000               unsigned output_index =
2001                   (n * inputs_m_length * indices_length * vd_lane_count) +
2002                   (m * indices_length * vd_lane_count) +
2003                   (index * vd_lane_count) + lane;
2004               unsigned input_index_n = (inputs_n_length - vd_lane_count +
2005                   n + 1 + lane) % inputs_n_length;
2006               unsigned input_index_m = (inputs_m_length - vd_lane_count +
2007                   m + 1 + lane) % inputs_m_length;
2008 
2009               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
2010                 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2011                 results[output_index] != expected[output_index] ? '*' : ' ',
2012                 lane_len_in_hex,
2013                 static_cast<uint64_t>(inputs_d[lane]),
2014                 lane_len_in_hex,
2015                 static_cast<uint64_t>(inputs_n[input_index_n]),
2016                 lane_len_in_hex,
2017                 static_cast<uint64_t>(inputs_m[input_index_m]),
2018                 indices[index],
2019                 lane_len_in_hex,
2020                 static_cast<uint64_t>(results[output_index]),
2021                 lane_len_in_hex,
2022                 static_cast<uint64_t>(expected[output_index]));
2023             }
2024           }
2025         }
2026       }
2027     }
2028     VIXL_ASSERT(d == expected_length);
2029     if (error_count > kErrorReportLimit) {
2030       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2031     }
2032     VIXL_CHECK(error_count == 0);
2033   }
2034   delete[] results;
2035 }
2036 
2037 
2038 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2039 
2040 
2041 template <typename Tm>
Test2OpImmNEON_Helper(typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,uintptr_t inputs_n,unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)2042 void Test2OpImmNEON_Helper(
2043     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2044     uintptr_t inputs_n,
2045     unsigned inputs_n_length,
2046     const Tm inputs_m[],
2047     unsigned inputs_m_length,
2048     uintptr_t results,
2049     VectorFormat vd_form,
2050     VectorFormat vn_form) {
2051   VIXL_ASSERT(vd_form != kFormatUndefined &&
2052               vn_form != kFormatUndefined);
2053 
2054   SETUP();
2055   START();
2056 
2057   // Roll up the loop to keep the code size down.
2058   Label loop_n;
2059 
2060   Register out = x0;
2061   Register inputs_n_base = x1;
2062   Register inputs_n_last_16bytes = x3;
2063   Register index_n = x5;
2064 
2065   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2066   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2067   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2068 
2069   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2070   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2071   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2072   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2073   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2074 
2075 
2076   // These will be either a D- or a Q-register form, with a single lane
2077   // (for use in scalar load and store operations).
2078   VRegister vd = VRegister(0, vd_bits);
2079   VRegister vn = v1.V16B();
2080   VRegister vntmp = v3.V16B();
2081 
2082   // These will have the correct format for use when calling 'helper'.
2083   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2084   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2085 
2086   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2087   VRegister vntmp_single = VRegister(3, vn_lane_bits);
2088 
2089   __ Mov(out, results);
2090 
2091   __ Mov(inputs_n_base, inputs_n);
2092   __ Mov(inputs_n_last_16bytes,
2093          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2094 
2095   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2096 
2097   __ Mov(index_n, 0);
2098   __ Bind(&loop_n);
2099 
2100   __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
2101                                   vn_lane_bytes_log2));
2102   __ Ext(vn, vn, vntmp, vn_lane_bytes);
2103 
2104   // Set the destination to zero for tests such as '[r]shrn2'.
2105   // TODO: Setting the destination to values other than zero might be a better
2106   //       test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2107   __ Movi(vd.V16B(), 0);
2108 
2109   {
2110     for (unsigned i = 0; i < inputs_m_length; i++) {
2111       {
2112         SingleEmissionCheckScope guard(&masm);
2113         (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2114       }
2115       __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
2116     }
2117   }
2118 
2119   __ Add(index_n, index_n, 1);
2120   __ Cmp(index_n, inputs_n_length);
2121   __ B(lo, &loop_n);
2122 
2123   END();
2124   RUN();
2125   TEARDOWN();
2126 }
2127 
2128 
2129 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2130 // arrays of rawbit representation of input values. This ensures that
2131 // exact bit comparisons can be performed.
2132 template <typename Td, typename Tn, typename Tm>
Test2OpImmNEON(const char * name,typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2133 static void Test2OpImmNEON(
2134     const char * name,
2135     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2136     const Tn inputs_n[], unsigned inputs_n_length,
2137     const Tm inputs_m[], unsigned inputs_m_length,
2138     const Td expected[], unsigned expected_length,
2139     VectorFormat vd_form,
2140     VectorFormat vn_form) {
2141   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2142 
2143   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2144   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2145   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2146 
2147   const unsigned results_length = inputs_n_length * inputs_m_length;
2148   Td* results = new Td[results_length * vd_lane_count];
2149   const unsigned lane_bit = sizeof(Td) * 8;
2150   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2151 
2152   Test2OpImmNEON_Helper(helper,
2153                         reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
2154                         inputs_m, inputs_m_length,
2155                         reinterpret_cast<uintptr_t>(results),
2156                         vd_form, vn_form);
2157 
2158   if (Test::sim_test_trace()) {
2159     // Print the results.
2160     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2161     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2162       printf(" ");
2163       // Output a separate result for each element of the result vector.
2164       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2165         unsigned index = lane + (iteration * vd_lane_count);
2166         printf(" 0x%0*" PRIx64 ",",
2167                lane_len_in_hex,
2168                static_cast<uint64_t>(results[index]));
2169       }
2170       printf("\n");
2171     }
2172 
2173     printf("};\n");
2174     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2175            name,
2176            results_length);
2177   } else {
2178     // Check the results.
2179     VIXL_CHECK(expected_length == results_length);
2180     unsigned error_count = 0;
2181     unsigned d = 0;
2182     const char* padding = "                    ";
2183     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2184     for (unsigned n = 0; n < inputs_n_length; n++) {
2185       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2186         bool error_in_vector = false;
2187 
2188         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2189           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2190               (m * vd_lane_count) + lane;
2191 
2192           if (results[output_index] != expected[output_index]) {
2193             error_in_vector = true;
2194             break;
2195           }
2196         }
2197 
2198         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2199           printf("%s\n", name);
2200           printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2201                  lane_len_in_hex+1, padding,
2202                  lane_len_in_hex, padding,
2203                  lane_len_in_hex+1, padding);
2204 
2205         const unsigned first_index_n =
2206           inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2207 
2208         for (unsigned lane = 0;
2209              lane < std::max(vd_lane_count, vn_lane_count);
2210              lane++) {
2211             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2212                 (m * vd_lane_count) + lane;
2213             unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2214             unsigned input_index_m = m;
2215 
2216             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
2217                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2218                    results[output_index] != expected[output_index] ? '*' : ' ',
2219                    lane_len_in_hex,
2220                    static_cast<uint64_t>(inputs_n[input_index_n]),
2221                    lane_len_in_hex,
2222                    static_cast<uint64_t>(inputs_m[input_index_m]),
2223                    lane_len_in_hex,
2224                    static_cast<uint64_t>(results[output_index]),
2225                    lane_len_in_hex,
2226                    static_cast<uint64_t>(expected[output_index]));
2227           }
2228         }
2229       }
2230     }
2231     VIXL_ASSERT(d == expected_length);
2232     if (error_count > kErrorReportLimit) {
2233       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2234     }
2235     VIXL_CHECK(error_count == 0);
2236   }
2237   delete[] results;
2238 }
2239 
2240 
2241 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2242 
2243 
TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,uintptr_t inputs_d,const int inputs_imm1[],unsigned inputs_imm1_length,uintptr_t inputs_n,unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)2244 static void TestOpImmOpImmNEON_Helper(
2245   TestOpImmOpImmVdUpdateNEONHelper_t helper,
2246   uintptr_t inputs_d,
2247   const int inputs_imm1[], unsigned inputs_imm1_length,
2248   uintptr_t inputs_n, unsigned inputs_n_length,
2249   const int inputs_imm2[], unsigned inputs_imm2_length,
2250   uintptr_t results,
2251   VectorFormat vd_form, VectorFormat vn_form) {
2252   VIXL_ASSERT(vd_form != kFormatUndefined);
2253   VIXL_ASSERT(vn_form != kFormatUndefined);
2254 
2255   SETUP();
2256   START();
2257 
2258   // Roll up the loop to keep the code size down.
2259   Label loop_n;
2260 
2261   Register out = x0;
2262   Register inputs_d_base = x1;
2263   Register inputs_n_base = x2;
2264   Register inputs_n_last_vector = x4;
2265   Register index_n = x6;
2266 
2267   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2268   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2269   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2270 
2271   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2272   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2273   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2274   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2275   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2276 
2277 
2278   // These will be either a D- or a Q-register form, with a single lane
2279   // (for use in scalar load and store operations).
2280   VRegister vd = VRegister(0, vd_bits);
2281   VRegister vn = VRegister(1, vn_bits);
2282   VRegister vntmp = VRegister(4, vn_bits);
2283   VRegister vres = VRegister(5, vn_bits);
2284 
2285   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2286   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2287 
2288   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2289   VRegister vntmp_single = VRegister(4, vn_lane_bits);
2290 
2291   // Same registers for use in the 'ext' instructions.
2292   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2293   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2294 
2295   __ Mov(out, results);
2296 
2297   __ Mov(inputs_d_base, inputs_d);
2298 
2299   __ Mov(inputs_n_base, inputs_n);
2300   __ Mov(inputs_n_last_vector,
2301          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2302 
2303   __ Ldr(vd, MemOperand(inputs_d_base));
2304 
2305   __ Ldr(vn, MemOperand(inputs_n_last_vector));
2306 
2307   __ Mov(index_n, 0);
2308   __ Bind(&loop_n);
2309 
2310   __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
2311                                   vn_lane_bytes_log2));
2312   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2313 
2314   {
2315     EmissionCheckScope guard(&masm,
2316         kInstructionSize * inputs_imm1_length * inputs_imm2_length * 3);
2317     for (unsigned i = 0; i < inputs_imm1_length; i++) {
2318       for (unsigned j = 0; j < inputs_imm2_length; j++) {
2319         __ Mov(vres, vd);
2320         (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2321         __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
2322       }
2323     }
2324   }
2325 
2326   __ Add(index_n, index_n, 1);
2327   __ Cmp(index_n, inputs_n_length);
2328   __ B(lo, &loop_n);
2329 
2330   END();
2331   RUN();
2332   TEARDOWN();
2333 }
2334 
2335 
2336 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2337 // arrays of rawbit representation of input values. This ensures that
2338 // exact bit comparisons can be performed.
2339 template <typename Td, typename Tn>
TestOpImmOpImmNEON(const char * name,TestOpImmOpImmVdUpdateNEONHelper_t helper,const Td inputs_d[],const int inputs_imm1[],unsigned inputs_imm1_length,const Tn inputs_n[],unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2340 static void TestOpImmOpImmNEON(const char * name,
2341                                TestOpImmOpImmVdUpdateNEONHelper_t helper,
2342                                const Td inputs_d[],
2343                                const int inputs_imm1[],
2344                                unsigned inputs_imm1_length,
2345                                const Tn inputs_n[],
2346                                unsigned inputs_n_length,
2347                                const int inputs_imm2[],
2348                                unsigned inputs_imm2_length,
2349                                const Td expected[],
2350                                unsigned expected_length,
2351                                VectorFormat vd_form,
2352                                VectorFormat vn_form) {
2353   VIXL_ASSERT(inputs_n_length > 0);
2354   VIXL_ASSERT(inputs_imm1_length > 0);
2355   VIXL_ASSERT(inputs_imm2_length > 0);
2356 
2357   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2358 
2359   const unsigned results_length = inputs_n_length *
2360       inputs_imm1_length * inputs_imm2_length;
2361 
2362   Td* results = new Td[results_length * vd_lane_count];
2363   const unsigned lane_bit = sizeof(Td) * 8;
2364   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2365 
2366   TestOpImmOpImmNEON_Helper(helper,
2367                             reinterpret_cast<uintptr_t>(inputs_d),
2368                             inputs_imm1,
2369                             inputs_imm1_length,
2370                             reinterpret_cast<uintptr_t>(inputs_n),
2371                             inputs_n_length,
2372                             inputs_imm2,
2373                             inputs_imm2_length,
2374                             reinterpret_cast<uintptr_t>(results),
2375                             vd_form, vn_form);
2376 
2377   if (Test::sim_test_trace()) {
2378     // Print the results.
2379     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2380     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2381       printf(" ");
2382       // Output a separate result for each element of the result vector.
2383       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2384         unsigned index = lane + (iteration * vd_lane_count);
2385         printf(" 0x%0*" PRIx64 ",",
2386                lane_len_in_hex,
2387                static_cast<uint64_t>(results[index]));
2388       }
2389       printf("\n");
2390     }
2391 
2392     printf("};\n");
2393     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2394            name,
2395            results_length);
2396   } else {
2397     // Check the results.
2398     VIXL_CHECK(expected_length == results_length);
2399     unsigned error_count = 0;
2400     unsigned counted_length = 0;
2401     const char* padding = "                    ";
2402     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2403     for (unsigned n = 0; n < inputs_n_length; n++) {
2404       for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2405         for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2406           bool error_in_vector = false;
2407 
2408           counted_length++;
2409 
2410           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2411             unsigned output_index =
2412                 (n * inputs_imm1_length *
2413                  inputs_imm2_length * vd_lane_count) +
2414                 (imm1 * inputs_imm2_length * vd_lane_count) +
2415                 (imm2 * vd_lane_count) + lane;
2416 
2417             if (results[output_index] != expected[output_index]) {
2418               error_in_vector = true;
2419               break;
2420             }
2421           }
2422 
2423           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2424             printf("%s\n", name);
2425             printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2426                    lane_len_in_hex+1, padding,
2427                    lane_len_in_hex, padding,
2428                    lane_len_in_hex+1, padding,
2429                    lane_len_in_hex, padding,
2430                    lane_len_in_hex+1, padding);
2431 
2432             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2433               unsigned output_index =
2434                 (n * inputs_imm1_length *
2435                  inputs_imm2_length * vd_lane_count) +
2436                 (imm1 * inputs_imm2_length * vd_lane_count) +
2437                 (imm2 * vd_lane_count) + lane;
2438               unsigned input_index_n = (inputs_n_length - vd_lane_count +
2439                   n + 1 + lane) % inputs_n_length;
2440               unsigned input_index_imm1 = imm1;
2441               unsigned input_index_imm2 = imm2;
2442 
2443               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
2444                 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2445                 results[output_index] !=
2446                   expected[output_index] ? '*' : ' ',
2447                 lane_len_in_hex,
2448                 static_cast<uint64_t>(inputs_d[lane]),
2449                 lane_len_in_hex,
2450                 static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2451                 lane_len_in_hex,
2452                 static_cast<uint64_t>(inputs_n[input_index_n]),
2453                 lane_len_in_hex,
2454                 static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2455                 lane_len_in_hex,
2456                 static_cast<uint64_t>(results[output_index]),
2457                 lane_len_in_hex,
2458                 static_cast<uint64_t>(expected[output_index]));
2459             }
2460           }
2461         }
2462       }
2463     }
2464     VIXL_ASSERT(counted_length == expected_length);
2465     if (error_count > kErrorReportLimit) {
2466       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2467     }
2468     VIXL_CHECK(error_count == 0);
2469   }
2470   delete[] results;
2471 }
2472 
2473 
2474 // ==== Floating-point tests. ====
2475 
2476 
2477 // Standard floating-point test expansion for both double- and single-precision
2478 // operations.
2479 #define STRINGIFY(s) #s
2480 
2481 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input)         \
2482     Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),          \
2483                &MacroAssembler::mnemonic,                           \
2484                input, sizeof(input) / sizeof(input[0]),             \
2485                kExpected_##mnemonic##_##variant,                    \
2486                kExpectedCount_##mnemonic##_##variant)
2487 
2488 #define DEFINE_TEST_FP(mnemonic, type, input)                       \
2489     TEST(mnemonic##_d) {                                            \
2490       CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input);  \
2491     }                                                               \
2492     TEST(mnemonic##_s) {                                            \
2493       CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);   \
2494     }
2495 
2496 // TODO: Test with a newer version of valgrind.
2497 //
2498 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2499 // Therefore this test will be exiting though an ASSERT and thus leaking
2500 // memory.
2501 DEFINE_TEST_FP(fmadd, 3Op, Basic)
2502 DEFINE_TEST_FP(fmsub, 3Op, Basic)
2503 DEFINE_TEST_FP(fnmadd, 3Op, Basic)
2504 DEFINE_TEST_FP(fnmsub, 3Op, Basic)
2505 
2506 DEFINE_TEST_FP(fadd, 2Op, Basic)
2507 DEFINE_TEST_FP(fdiv, 2Op, Basic)
2508 DEFINE_TEST_FP(fmax, 2Op, Basic)
2509 DEFINE_TEST_FP(fmaxnm, 2Op, Basic)
2510 DEFINE_TEST_FP(fmin, 2Op, Basic)
2511 DEFINE_TEST_FP(fminnm, 2Op, Basic)
2512 DEFINE_TEST_FP(fmul, 2Op, Basic)
2513 DEFINE_TEST_FP(fsub, 2Op, Basic)
2514 DEFINE_TEST_FP(fnmul, 2Op, Basic)
2515 
2516 DEFINE_TEST_FP(fabs, 1Op, Basic)
2517 DEFINE_TEST_FP(fmov, 1Op, Basic)
2518 DEFINE_TEST_FP(fneg, 1Op, Basic)
2519 DEFINE_TEST_FP(fsqrt, 1Op, Basic)
2520 DEFINE_TEST_FP(frinta, 1Op, Conversions)
2521 DEFINE_TEST_FP(frinti, 1Op, Conversions)
2522 DEFINE_TEST_FP(frintm, 1Op, Conversions)
2523 DEFINE_TEST_FP(frintn, 1Op, Conversions)
2524 DEFINE_TEST_FP(frintp, 1Op, Conversions)
2525 DEFINE_TEST_FP(frintx, 1Op, Conversions)
2526 DEFINE_TEST_FP(frintz, 1Op, Conversions)
2527 
TEST(fcmp_d)2528 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
TEST(fcmp_s)2529 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
TEST(fcmp_dz)2530 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
TEST(fcmp_sz)2531 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
2532 
TEST(fcvt_sd)2533 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
TEST(fcvt_ds)2534 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
2535 
2536 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)                \
2537     TEST(mnemonic##_xd) {                                           \
2538       CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
2539     }                                                               \
2540     TEST(mnemonic##_xs) {                                           \
2541       CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);  \
2542     }                                                               \
2543     TEST(mnemonic##_wd) {                                           \
2544       CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2545     }                                                               \
2546     TEST(mnemonic##_ws) {                                           \
2547       CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);  \
2548     }
2549 
DEFINE_TEST_FP_TO_INT(fcvtas,FPToS,Conversions)2550 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
2551 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
2552 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
2553 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
2554 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
2555 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
2556 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
2557 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
2558 
2559 // TODO: Scvtf-fixed-point
2560 // TODO: Scvtf-integer
2561 // TODO: Ucvtf-fixed-point
2562 // TODO: Ucvtf-integer
2563 
2564 // TODO: Fccmp
2565 // TODO: Fcsel
2566 
2567 
2568 // ==== NEON Tests. ====
2569 
2570 #define CALL_TEST_NEON_HELPER_1Op(mnemonic,                                  \
2571                                   vdform, vnform,                            \
2572                                   input_n)                                   \
2573     Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),                   \
2574                 &MacroAssembler::mnemonic,                                   \
2575                 input_n,                                                     \
2576                 (sizeof(input_n) / sizeof(input_n[0])),                      \
2577                 kExpected_NEON_##mnemonic##_##vdform,                        \
2578                 kExpectedCount_NEON_##mnemonic##_##vdform,                   \
2579                 kFormat##vdform,                                             \
2580                 kFormat##vnform)
2581 
2582 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic,                            \
2583                                         vdform, vnform,                      \
2584                                         input_n)                             \
2585     Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform)              \
2586                                           "_" STRINGIFY(vnform),             \
2587                       &MacroAssembler::mnemonic,                             \
2588                       input_n,                                               \
2589                       (sizeof(input_n) / sizeof(input_n[0])),                \
2590                       kExpected_NEON_##mnemonic##_##vdform##_##vnform,       \
2591                       kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,  \
2592                       kFormat##vdform,                                       \
2593                       kFormat##vnform)
2594 
2595 #define CALL_TEST_NEON_HELPER_2Op(mnemonic,                                  \
2596                                   vdform, vnform, vmform,                    \
2597                                   input_d, input_n, input_m)                 \
2598     Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),                   \
2599                 &MacroAssembler::mnemonic,                                   \
2600                 input_d,                                                     \
2601                 input_n,                                                     \
2602                 (sizeof(input_n) / sizeof(input_n[0])),                      \
2603                 input_m,                                                     \
2604                 (sizeof(input_m) / sizeof(input_m[0])),                      \
2605                 kExpected_NEON_##mnemonic##_##vdform,                        \
2606                 kExpectedCount_NEON_##mnemonic##_##vdform,                   \
2607                 kFormat##vdform,                                             \
2608                 kFormat##vnform,                                             \
2609                 kFormat##vmform)
2610 
2611 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                               \
2612                                      vdform, vnform,                         \
2613                                      input_n, input_m)                       \
2614     Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM",      \
2615                    &MacroAssembler::mnemonic,                                \
2616                    input_n,                                                  \
2617                    (sizeof(input_n) / sizeof(input_n[0])),                   \
2618                    input_m,                                                  \
2619                    (sizeof(input_m) / sizeof(input_m[0])),                   \
2620                    kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,            \
2621                    kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,       \
2622                    kFormat##vdform,                                          \
2623                    kFormat##vnform)
2624 
2625 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic,                            \
2626                                         vdform, vnform, vmform,              \
2627                                         input_d, input_n, input_m, indices)  \
2628     TestByElementNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform)              \
2629         "_" STRINGIFY(vnform) "_" STRINGIFY(vmform),                         \
2630         &MacroAssembler::mnemonic,                                           \
2631         input_d,                                                             \
2632         input_n,                                                             \
2633         (sizeof(input_n) / sizeof(input_n[0])),                              \
2634         input_m,                                                             \
2635         (sizeof(input_m) / sizeof(input_m[0])),                              \
2636         indices,                                                             \
2637         (sizeof(indices) / sizeof(indices[0])),                              \
2638         kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,          \
2639         kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,     \
2640         kFormat##vdform,                                                     \
2641         kFormat##vnform,                                                     \
2642         kFormat##vmform)
2643 
2644 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                             \
2645                                          mnemonic,                           \
2646                                          vdform, vnform,                     \
2647                                          input_d, input_imm1,                \
2648                                          input_n, input_imm2)                \
2649     TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),            \
2650                        helper,                                               \
2651                        input_d,                                              \
2652                        input_imm1,                                           \
2653                        (sizeof(input_imm1) / sizeof(input_imm1[0])),         \
2654                        input_n,                                              \
2655                        (sizeof(input_n) / sizeof(input_n[0])),               \
2656                        input_imm2,                                           \
2657                        (sizeof(input_imm2) / sizeof(input_imm2[0])),         \
2658                        kExpected_NEON_##mnemonic##_##vdform,                 \
2659                        kExpectedCount_NEON_##mnemonic##_##vdform,            \
2660                        kFormat##vdform,                                      \
2661                        kFormat##vnform)
2662 
2663 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input)                \
2664     CALL_TEST_NEON_HELPER_1Op(mnemonic,                                      \
2665                               variant, variant,                              \
2666                               input)
2667 
2668 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)                       \
2669     TEST(mnemonic##_8B) {                                                    \
2670       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);         \
2671     }                                                                        \
2672     TEST(mnemonic##_16B) {                                                   \
2673       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input);        \
2674     }
2675 
2676 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)                        \
2677     TEST(mnemonic##_4H) {                                                    \
2678       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input);        \
2679     }                                                                        \
2680     TEST(mnemonic##_8H) {                                                    \
2681       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input);        \
2682     }
2683 
2684 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                        \
2685     TEST(mnemonic##_2S) {                                                    \
2686       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input);        \
2687     }                                                                        \
2688     TEST(mnemonic##_4S) {                                                    \
2689       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input);        \
2690     }
2691 
2692 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)                           \
2693     DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)                           \
2694     DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
2695 
2696 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                         \
2697     DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)                               \
2698     DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
2699 
2700 #define DEFINE_TEST_NEON_2SAME(mnemonic, input)                              \
2701     DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                             \
2702     TEST(mnemonic##_2D) {                                                    \
2703       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input);        \
2704     }
2705 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                           \
2706     DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                            \
2707     TEST(mnemonic##_2D) {                                                    \
2708       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input);        \
2709     }
2710 
2711 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                           \
2712     TEST(mnemonic##_2S) {                                                    \
2713       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);         \
2714     }                                                                        \
2715     TEST(mnemonic##_4S) {                                                    \
2716       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);         \
2717     }                                                                        \
2718     TEST(mnemonic##_2D) {                                                    \
2719       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input);        \
2720     }
2721 
2722 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input)                    \
2723     TEST(mnemonic##_S) {                                                     \
2724       CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);          \
2725     }                                                                        \
2726     TEST(mnemonic##_D) {                                                     \
2727       CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input);         \
2728     }
2729 
2730 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)                     \
2731     TEST(mnemonic##_B) {                                                     \
2732       CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input);          \
2733     }
2734 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)                     \
2735     TEST(mnemonic##_H) {                                                     \
2736       CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input);         \
2737     }
2738 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                     \
2739     TEST(mnemonic##_S) {                                                     \
2740       CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input);         \
2741     }
2742 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)                     \
2743     TEST(mnemonic##_D) {                                                     \
2744       CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input);         \
2745     }
2746 
2747 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input)                       \
2748     DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)                         \
2749     DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)                         \
2750     DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                         \
2751     DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
2752 
2753 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input)                    \
2754     DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                         \
2755     DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
2756 
2757 
2758 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n)    \
2759     CALL_TEST_NEON_HELPER_1OpAcross(mnemonic,                                \
2760                                     vd_form, vn_form,                        \
2761                                     input_n)
2762 
2763 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                             \
2764     TEST(mnemonic##_B_8B) {                                                  \
2765       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);     \
2766     }                                                                        \
2767     TEST(mnemonic##_B_16B) {                                                 \
2768       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input);    \
2769     }                                                                        \
2770     TEST(mnemonic##_H_4H) {                                                  \
2771       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input);    \
2772     }                                                                        \
2773     TEST(mnemonic##_H_8H) {                                                  \
2774       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input);    \
2775     }                                                                        \
2776     TEST(mnemonic##_S_4S) {                                                  \
2777       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input);    \
2778     }
2779 
2780 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                        \
2781     TEST(mnemonic##_H_8B) {                                                  \
2782       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);     \
2783     }                                                                        \
2784     TEST(mnemonic##_H_16B) {                                                 \
2785       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input);    \
2786     }                                                                        \
2787     TEST(mnemonic##_S_4H) {                                                  \
2788       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input);    \
2789     }                                                                        \
2790     TEST(mnemonic##_S_8H) {                                                  \
2791       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input);    \
2792     }                                                                        \
2793     TEST(mnemonic##_D_4S) {                                                  \
2794       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input);    \
2795     }
2796 
2797 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                          \
2798     TEST(mnemonic##_S_4S) {                                                  \
2799       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input);     \
2800     }
2801 
2802 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic,                                \
2803                                     vdform, vnform,                          \
2804                                     input_n)                                 \
2805     CALL_TEST_NEON_HELPER_1Op(mnemonic,                                      \
2806                               vdform, vnform,                                \
2807                               input_n)
2808 
2809 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                         \
2810     TEST(mnemonic##_4H) {                                                    \
2811       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);     \
2812     }                                                                        \
2813     TEST(mnemonic##_8H) {                                                    \
2814       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input);    \
2815     }                                                                        \
2816     TEST(mnemonic##_2S) {                                                    \
2817       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input);    \
2818     }                                                                        \
2819     TEST(mnemonic##_4S) {                                                    \
2820       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input);    \
2821     }                                                                        \
2822     TEST(mnemonic##_1D) {                                                    \
2823       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input);    \
2824     }                                                                        \
2825     TEST(mnemonic##_2D) {                                                    \
2826       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input);    \
2827     }
2828 
2829 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                       \
2830     TEST(mnemonic##_8B) {                                                    \
2831       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);    \
2832     }                                                                        \
2833     TEST(mnemonic##_4H) {                                                    \
2834       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);    \
2835     }                                                                        \
2836     TEST(mnemonic##_2S) {                                                    \
2837       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);    \
2838     }                                                                        \
2839     TEST(mnemonic##2_16B) {                                                  \
2840       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input);\
2841     }                                                                        \
2842     TEST(mnemonic##2_8H) {                                                   \
2843       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \
2844     }                                                                        \
2845     TEST(mnemonic##2_4S) {                                                   \
2846       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \
2847     }
2848 
2849 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                      \
2850     TEST(mnemonic##_4S) {                                                    \
2851       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);   \
2852     }                                                                        \
2853     TEST(mnemonic##_2D) {                                                    \
2854       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);     \
2855     }                                                                        \
2856     TEST(mnemonic##2_4S) {                                                   \
2857       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input);\
2858     }                                                                        \
2859     TEST(mnemonic##2_2D) {                                                   \
2860       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);  \
2861     }
2862 
2863 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                    \
2864     TEST(mnemonic##_4H) {                                                    \
2865       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
2866     }                                                                        \
2867     TEST(mnemonic##_2S) {                                                    \
2868       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
2869     }                                                                        \
2870     TEST(mnemonic##2_8H) {                                                   \
2871       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
2872     }                                                                        \
2873     TEST(mnemonic##2_4S) {                                                   \
2874       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
2875     }
2876 
2877 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)                 \
2878     TEST(mnemonic##_2S) {                                                    \
2879       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
2880     }                                                                        \
2881     TEST(mnemonic##2_4S) {                                                   \
2882       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
2883     }
2884 
2885 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)                \
2886     TEST(mnemonic##_B) {                                                     \
2887       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input);      \
2888     }                                                                        \
2889     TEST(mnemonic##_H) {                                                     \
2890       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input);      \
2891     }                                                                        \
2892     TEST(mnemonic##_S) {                                                     \
2893       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input);      \
2894     }
2895 
2896 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)                 \
2897     TEST(mnemonic##_S) {                                                     \
2898       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);      \
2899     }                                                                        \
2900     TEST(mnemonic##_D) {                                                     \
2901       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input);     \
2902     }
2903 
2904 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) {  \
2905     CALL_TEST_NEON_HELPER_2Op(mnemonic,                                      \
2906                               variant, variant, variant,                     \
2907                               input_d, input_nm, input_nm);                  \
2908     }
2909 
2910 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)                       \
2911     TEST(mnemonic##_8B) {                                                    \
2912       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B,                              \
2913                                   kInput8bitsAccDestination,                 \
2914                                   kInput8bits##input);                       \
2915     }                                                                        \
2916     TEST(mnemonic##_16B) {                                                   \
2917       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B,                             \
2918                                   kInput8bitsAccDestination,                 \
2919                                   kInput8bits##input);                       \
2920     }                                                                        \
2921 
2922 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)                           \
2923     TEST(mnemonic##_4H) {                                                    \
2924       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H,                              \
2925                                   kInput16bitsAccDestination,                \
2926                                   kInput16bits##input);                      \
2927     }                                                                        \
2928     TEST(mnemonic##_8H) {                                                    \
2929       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H,                              \
2930                                   kInput16bitsAccDestination,                \
2931                                   kInput16bits##input);                      \
2932     }                                                                        \
2933     TEST(mnemonic##_2S) {                                                    \
2934       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S,                              \
2935                                   kInput32bitsAccDestination,                \
2936                                   kInput32bits##input);                      \
2937     }                                                                        \
2938     TEST(mnemonic##_4S) {                                                    \
2939       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S,                              \
2940                                   kInput32bitsAccDestination,                \
2941                                   kInput32bits##input);                      \
2942     }
2943 
2944 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)                         \
2945     DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)                           \
2946     DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
2947 
2948 #define DEFINE_TEST_NEON_3SAME(mnemonic, input)                              \
2949     DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)                             \
2950     TEST(mnemonic##_2D) {                                                    \
2951       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D,                              \
2952                                   kInput64bitsAccDestination,                \
2953                                   kInput64bits##input);                      \
2954     }
2955 
2956 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)                           \
2957     TEST(mnemonic##_2S) {                                                    \
2958       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S,                              \
2959                                   kInputFloatAccDestination,                 \
2960                                   kInputFloat##input);                       \
2961     }                                                                        \
2962     TEST(mnemonic##_4S) {                                                    \
2963       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S,                              \
2964                                   kInputFloatAccDestination,                 \
2965                                   kInputFloat##input);                       \
2966     }                                                                        \
2967     TEST(mnemonic##_2D) {                                                    \
2968       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D,                              \
2969                                   kInputDoubleAccDestination,                \
2970                                   kInputDouble##input);                      \
2971     }
2972 
2973 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)                     \
2974     TEST(mnemonic##_D) {                                                     \
2975       CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
2976                                   kInput64bitsAccDestination,                \
2977                                   kInput64bits##input);                      \
2978     }
2979 
2980 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)                    \
2981     TEST(mnemonic##_H) {                                                     \
2982       CALL_TEST_NEON_HELPER_3SAME(mnemonic, H,                               \
2983                                   kInput16bitsAccDestination,                \
2984                                   kInput16bits##input);                      \
2985     }                                                                        \
2986     TEST(mnemonic##_S) {                                                     \
2987       CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
2988                                   kInput32bitsAccDestination,                \
2989                                   kInput32bits##input);                      \
2990     }                                                                        \
2991 
2992 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)                       \
2993     TEST(mnemonic##_B) {                                                     \
2994       CALL_TEST_NEON_HELPER_3SAME(mnemonic, B,                               \
2995                                   kInput8bitsAccDestination,                 \
2996                                   kInput8bits##input);                       \
2997     }                                                                        \
2998     TEST(mnemonic##_H) {                                                     \
2999       CALL_TEST_NEON_HELPER_3SAME(mnemonic, H,                               \
3000                                   kInput16bitsAccDestination,                \
3001                                   kInput16bits##input);                      \
3002     }                                                                        \
3003     TEST(mnemonic##_S) {                                                     \
3004       CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
3005                                   kInput32bitsAccDestination,                \
3006                                   kInput32bits##input);                      \
3007     }                                                                        \
3008     TEST(mnemonic##_D) {                                                     \
3009       CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
3010                                   kInput64bitsAccDestination,                \
3011                                   kInput64bits##input);                      \
3012     }
3013 
3014 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)                    \
3015     TEST(mnemonic##_S) {                                                     \
3016       CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
3017                                   kInputFloatAccDestination,                 \
3018                                   kInputFloat##input);                       \
3019     }                                                                        \
3020     TEST(mnemonic##_D) {                                                     \
3021       CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
3022                                   kInputDoubleAccDestination,                \
3023                                   kInputDouble##input);                      \
3024     }
3025 
3026 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                                \
3027                                     vdform, vnform, vmform,                  \
3028                                     input_d, input_n, input_m) {             \
3029     CALL_TEST_NEON_HELPER_2Op(mnemonic,                                      \
3030                               vdform, vnform, vmform,                        \
3031                               input_d, input_n, input_m);                    \
3032     }
3033 
3034 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)                      \
3035     TEST(mnemonic##_8H) {                                                    \
3036       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B,                      \
3037                                   kInput16bitsAccDestination,                \
3038                                   kInput8bits##input, kInput8bits##input);   \
3039     }                                                                        \
3040     TEST(mnemonic##2_8H) {                                                   \
3041       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B,                 \
3042                                   kInput16bitsAccDestination,                \
3043                                   kInput8bits##input, kInput8bits##input);   \
3044     }
3045 
3046 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                      \
3047     TEST(mnemonic##_4S) {                                                    \
3048       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H,                      \
3049                                   kInput32bitsAccDestination,                \
3050                                   kInput16bits##input, kInput16bits##input); \
3051     }                                                                        \
3052     TEST(mnemonic##2_4S) {                                                   \
3053       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H,                   \
3054                                   kInput32bitsAccDestination,                \
3055                                   kInput16bits##input, kInput16bits##input); \
3056     }
3057 
3058 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)                      \
3059     TEST(mnemonic##_2D) {                                                    \
3060       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S,                      \
3061                                   kInput64bitsAccDestination,                \
3062                                   kInput32bits##input, kInput32bits##input); \
3063     }                                                                        \
3064     TEST(mnemonic##2_2D) {                                                   \
3065       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S,                   \
3066                                   kInput64bitsAccDestination,                \
3067                                   kInput32bits##input, kInput32bits##input); \
3068     }
3069 
3070 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input)                      \
3071     DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                          \
3072     DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3073 
3074 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input)                         \
3075     DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)                          \
3076     DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                          \
3077     DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3078 
3079 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)                \
3080     TEST(mnemonic##_S) {                                                     \
3081       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H,                         \
3082                                   kInput32bitsAccDestination,                \
3083                                   kInput16bits##input,                       \
3084                                   kInput16bits##input);                      \
3085     }
3086 
3087 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)                \
3088     TEST(mnemonic##_D) {                                                     \
3089       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S,                         \
3090                                   kInput64bitsAccDestination,                \
3091                                   kInput32bits##input,                       \
3092                                   kInput32bits##input);                      \
3093     }
3094 
3095 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input)               \
3096     DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)                    \
3097     DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3098 
3099 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)                         \
3100     TEST(mnemonic##_8H) {                                                    \
3101       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B,                      \
3102                                   kInput16bitsAccDestination,                \
3103                                   kInput16bits##input, kInput8bits##input);  \
3104     }                                                                        \
3105     TEST(mnemonic##_4S) {                                                    \
3106       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H,                      \
3107                                   kInput32bitsAccDestination,                \
3108                                   kInput32bits##input, kInput16bits##input); \
3109     }                                                                        \
3110     TEST(mnemonic##_2D) {                                                    \
3111       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S,                      \
3112                                   kInput64bitsAccDestination,                \
3113                                   kInput64bits##input, kInput32bits##input); \
3114     }                                                                        \
3115     TEST(mnemonic##2_8H) {                                                   \
3116       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B,                  \
3117                                   kInput16bitsAccDestination,                \
3118                                   kInput16bits##input, kInput8bits##input);  \
3119     }                                                                        \
3120     TEST(mnemonic##2_4S) {                                                   \
3121       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H,                   \
3122                                   kInput32bitsAccDestination,                \
3123                                   kInput32bits##input, kInput16bits##input); \
3124     }                                                                        \
3125     TEST(mnemonic##2_2D) {                                                   \
3126       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S,                   \
3127                                   kInput64bitsAccDestination,                \
3128                                   kInput64bits##input, kInput32bits##input); \
3129     }
3130 
3131 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)                       \
3132     TEST(mnemonic##_8B) {                                                    \
3133       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H,                      \
3134                                   kInput8bitsAccDestination,                 \
3135                                   kInput16bits##input, kInput16bits##input); \
3136     }                                                                        \
3137     TEST(mnemonic##_4H) {                                                    \
3138       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S,                      \
3139                                   kInput16bitsAccDestination,                \
3140                                   kInput32bits##input, kInput32bits##input); \
3141     }                                                                        \
3142     TEST(mnemonic##_2S) {                                                    \
3143       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D,                      \
3144                                   kInput32bitsAccDestination,                \
3145                                   kInput64bits##input, kInput64bits##input); \
3146     }                                                                        \
3147     TEST(mnemonic##2_16B) {                                                  \
3148       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H,                  \
3149                                   kInput8bitsAccDestination,                 \
3150                                   kInput16bits##input, kInput16bits##input); \
3151     }                                                                        \
3152     TEST(mnemonic##2_8H) {                                                   \
3153       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S,                   \
3154                                   kInput16bitsAccDestination,                \
3155                                   kInput32bits##input, kInput32bits##input); \
3156     }                                                                        \
3157     TEST(mnemonic##2_4S) {                                                   \
3158       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D,                   \
3159                                   kInput32bitsAccDestination,                \
3160                                   kInput64bits##input, kInput64bits##input); \
3161     }
3162 
3163 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                               \
3164                                      vdform, vnform,                         \
3165                                      input_n,                                \
3166                                      input_imm) {                            \
3167     CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                                   \
3168                                  vdform, vnform,                             \
3169                                  input_n, input_imm);                        \
3170     }
3171 
3172 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)                  \
3173     TEST(mnemonic##_8B_2OPIMM) {                                             \
3174       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3175                                    8B, 8B,                                   \
3176                                    kInput8bits##input,                       \
3177                                    kInput8bitsImm##input_imm);               \
3178     }                                                                        \
3179     TEST(mnemonic##_16B_2OPIMM) {                                            \
3180       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3181                                    16B, 16B,                                 \
3182                                    kInput8bits##input,                       \
3183                                    kInput8bitsImm##input_imm);               \
3184     }                                                                        \
3185     TEST(mnemonic##_4H_2OPIMM) {                                             \
3186       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3187                                    4H, 4H,                                   \
3188                                    kInput16bits##input,                      \
3189                                    kInput16bitsImm##input_imm);              \
3190     }                                                                        \
3191     TEST(mnemonic##_8H_2OPIMM) {                                             \
3192       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3193                                    8H, 8H,                                   \
3194                                    kInput16bits##input,                      \
3195                                    kInput16bitsImm##input_imm);              \
3196     }                                                                        \
3197     TEST(mnemonic##_2S_2OPIMM) {                                             \
3198       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3199                                    2S, 2S,                                   \
3200                                    kInput32bits##input,                      \
3201                                    kInput32bitsImm##input_imm);              \
3202     }                                                                        \
3203     TEST(mnemonic##_4S_2OPIMM) {                                             \
3204       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3205                                    4S, 4S,                                   \
3206                                    kInput32bits##input,                      \
3207                                    kInput32bitsImm##input_imm);              \
3208     }                                                                        \
3209     TEST(mnemonic##_2D_2OPIMM) {                                             \
3210       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3211                                    2D, 2D,                                   \
3212                                    kInput64bits##input,                      \
3213                                    kInput64bitsImm##input_imm);              \
3214     }
3215 
3216 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm)             \
3217     TEST(mnemonic##_8B_2OPIMM) {                                             \
3218       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3219                                    8B, B,                                    \
3220                                    kInput8bits##input,                       \
3221                                    kInput8bitsImm##input_imm);               \
3222     }                                                                        \
3223     TEST(mnemonic##_16B_2OPIMM) {                                            \
3224       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3225                                    16B, B,                                   \
3226                                    kInput8bits##input,                       \
3227                                    kInput8bitsImm##input_imm);               \
3228     }                                                                        \
3229     TEST(mnemonic##_4H_2OPIMM) {                                             \
3230       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3231                                    4H, H,                                    \
3232                                    kInput16bits##input,                      \
3233                                    kInput16bitsImm##input_imm);              \
3234     }                                                                        \
3235     TEST(mnemonic##_8H_2OPIMM) {                                             \
3236       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3237                                    8H, H,                                    \
3238                                    kInput16bits##input,                      \
3239                                    kInput16bitsImm##input_imm);              \
3240     }                                                                        \
3241     TEST(mnemonic##_2S_2OPIMM) {                                             \
3242       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3243                                    2S, S,                                    \
3244                                    kInput32bits##input,                      \
3245                                    kInput32bitsImm##input_imm);              \
3246     }                                                                        \
3247     TEST(mnemonic##_4S_2OPIMM) {                                             \
3248       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3249                                    4S, S,                                    \
3250                                    kInput32bits##input,                      \
3251                                    kInput32bitsImm##input_imm);              \
3252     }                                                                        \
3253     TEST(mnemonic##_2D_2OPIMM) {                                             \
3254       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3255                                    2D, D,                                    \
3256                                    kInput64bits##input,                      \
3257                                    kInput64bitsImm##input_imm);              \
3258     }
3259 
3260 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm)           \
3261     TEST(mnemonic##_8B_2OPIMM) {                                             \
3262       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3263                                    8B, 8H,                                   \
3264                                    kInput16bits##input,                      \
3265                                    kInput8bitsImm##input_imm);               \
3266     }                                                                        \
3267     TEST(mnemonic##_4H_2OPIMM) {                                             \
3268       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3269                                    4H, 4S,                                   \
3270                                    kInput32bits##input,                      \
3271                                    kInput16bitsImm##input_imm);              \
3272     }                                                                        \
3273     TEST(mnemonic##_2S_2OPIMM) {                                             \
3274       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3275                                    2S, 2D,                                   \
3276                                    kInput64bits##input,                      \
3277                                    kInput32bitsImm##input_imm);              \
3278     }                                                                        \
3279     TEST(mnemonic##2_16B_2OPIMM) {                                           \
3280       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3281                                    16B, 8H,                                  \
3282                                    kInput16bits##input,                      \
3283                                    kInput8bitsImm##input_imm);               \
3284     }                                                                        \
3285     TEST(mnemonic##2_8H_2OPIMM) {                                            \
3286       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3287                                    8H, 4S,                                   \
3288                                    kInput32bits##input,                      \
3289                                    kInput16bitsImm##input_imm);              \
3290     }                                                                        \
3291     TEST(mnemonic##2_4S_2OPIMM) {                                            \
3292       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3293                                    4S, 2D,                                   \
3294                                    kInput64bits##input,                      \
3295                                    kInput32bitsImm##input_imm);              \
3296     }
3297 
3298 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm)    \
3299     TEST(mnemonic##_B_2OPIMM) {                                              \
3300       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3301                                    B, H,                                     \
3302                                    kInput16bits##input,                      \
3303                                    kInput8bitsImm##input_imm);               \
3304     }                                                                        \
3305     TEST(mnemonic##_H_2OPIMM) {                                              \
3306       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3307                                    H, S,                                     \
3308                                    kInput32bits##input,                      \
3309                                    kInput16bitsImm##input_imm);              \
3310     }                                                                        \
3311     TEST(mnemonic##_S_2OPIMM) {                                              \
3312       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3313                                    S, D,                                     \
3314                                    kInput64bits##input,                      \
3315                                    kInput32bitsImm##input_imm);              \
3316     }
3317 
3318 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm)        \
3319     TEST(mnemonic##_2S_2OPIMM) {                                             \
3320       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3321           mnemonic,                                                          \
3322           2S, 2S,                                                            \
3323           kInputFloat##Basic,                                                \
3324           kInputDoubleImm##input_imm)                                        \
3325     }                                                                        \
3326     TEST(mnemonic##_4S_2OPIMM) {                                             \
3327       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3328           mnemonic,                                                          \
3329           4S, 4S,                                                            \
3330           kInputFloat##input,                                                \
3331           kInputDoubleImm##input_imm);                                       \
3332     }                                                                        \
3333     TEST(mnemonic##_2D_2OPIMM) {                                             \
3334       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3335           mnemonic,                                                          \
3336           2D, 2D,                                                            \
3337           kInputDouble##input,                                               \
3338           kInputDoubleImm##input_imm);                                       \
3339     }
3340 
3341 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm)               \
3342     TEST(mnemonic##_2S_2OPIMM) {                                             \
3343       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3344           mnemonic,                                                          \
3345           2S, 2S,                                                            \
3346           kInputFloat##Basic,                                                \
3347           kInput32bitsImm##input_imm)                                        \
3348     }                                                                        \
3349     TEST(mnemonic##_4S_2OPIMM) {                                             \
3350       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3351           mnemonic,                                                          \
3352           4S, 4S,                                                            \
3353           kInputFloat##input,                                                \
3354           kInput32bitsImm##input_imm)                                        \
3355     }                                                                        \
3356     TEST(mnemonic##_2D_2OPIMM) {                                             \
3357       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3358           mnemonic,                                                          \
3359           2D, 2D,                                                            \
3360           kInputDouble##input,                                               \
3361           kInput64bitsImm##input_imm)                                        \
3362     }
3363 
3364 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm)        \
3365     TEST(mnemonic##_S_2OPIMM) {                                              \
3366       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3367           mnemonic,                                                          \
3368           S, S,                                                              \
3369           kInputFloat##Basic,                                                \
3370           kInput32bitsImm##input_imm)                                        \
3371     }                                                                        \
3372     TEST(mnemonic##_D_2OPIMM) {                                              \
3373       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3374           mnemonic,                                                          \
3375           D, D,                                                              \
3376           kInputDouble##input,                                               \
3377           kInput64bitsImm##input_imm)                                        \
3378     }
3379 
3380 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm)               \
3381     TEST(mnemonic##_2S_2OPIMM) {                                             \
3382       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3383                                    2S, 2S,                                   \
3384                                    kInput32bits##input,                      \
3385                                    kInput32bitsImm##input_imm);              \
3386     }                                                                        \
3387     TEST(mnemonic##_4S_2OPIMM) {                                             \
3388       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3389                                    4S, 4S,                                   \
3390                                    kInput32bits##input,                      \
3391                                    kInput32bitsImm##input_imm);              \
3392     }                                                                        \
3393     TEST(mnemonic##_2D_2OPIMM) {                                             \
3394       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3395                                    2D, 2D,                                   \
3396                                    kInput64bits##input,                      \
3397                                    kInput64bitsImm##input_imm);              \
3398     }
3399 
3400 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)         \
3401     TEST(mnemonic##_D_2OPIMM) {                                              \
3402       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3403                                    D, D,                                     \
3404                                    kInput64bits##input,                      \
3405                                    kInput64bitsImm##input_imm);              \
3406     }
3407 
3408 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)        \
3409     TEST(mnemonic##_S_2OPIMM) {                                              \
3410       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3411                                    S, S,                                     \
3412                                    kInput32bits##input,                      \
3413                                    kInput32bitsImm##input_imm);              \
3414     }                                                                        \
3415     DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
3416 
3417 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)      \
3418     TEST(mnemonic##_D_2OPIMM) {                                              \
3419       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3420                                    D, D,                                     \
3421                                    kInputDouble##input,                      \
3422                                    kInputDoubleImm##input_imm);              \
3423     }
3424 
3425 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm)     \
3426     TEST(mnemonic##_S_2OPIMM) {                                              \
3427       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3428                                    S, S,                                     \
3429                                    kInputFloat##input,                       \
3430                                    kInputDoubleImm##input_imm);              \
3431     }                                                                        \
3432     DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
3433 
3434 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm)           \
3435     TEST(mnemonic##_B_2OPIMM) {                                              \
3436       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3437                                    B, B,                                     \
3438                                    kInput8bits##input,                       \
3439                                    kInput8bitsImm##input_imm);               \
3440     }                                                                        \
3441     TEST(mnemonic##_H_2OPIMM) {                                              \
3442       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3443                                    H, H,                                     \
3444                                    kInput16bits##input,                      \
3445                                    kInput16bitsImm##input_imm);              \
3446     }                                                                        \
3447     DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)
3448 
3449 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm)             \
3450     TEST(mnemonic##_8H_2OPIMM) {                                             \
3451       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3452                                    8H, 8B,                                   \
3453                                    kInput8bits##input,                       \
3454                                    kInput8bitsImm##input_imm);               \
3455     }                                                                        \
3456     TEST(mnemonic##_4S_2OPIMM) {                                             \
3457       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3458                                    4S, 4H,                                   \
3459                                    kInput16bits##input,                      \
3460                                    kInput16bitsImm##input_imm);              \
3461     }                                                                        \
3462     TEST(mnemonic##_2D_2OPIMM) {                                             \
3463       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3464                                    2D, 2S,                                   \
3465                                    kInput32bits##input,                      \
3466                                    kInput32bitsImm##input_imm);              \
3467     }                                                                        \
3468     TEST(mnemonic##2_8H_2OPIMM) {                                            \
3469       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3470                                    8H, 16B,                                  \
3471                                    kInput8bits##input,                       \
3472                                    kInput8bitsImm##input_imm);               \
3473     }                                                                        \
3474     TEST(mnemonic##2_4S_2OPIMM) {                                            \
3475       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3476                                    4S, 8H,                                   \
3477                                    kInput16bits##input,                      \
3478                                    kInput16bitsImm##input_imm);              \
3479     }                                                                        \
3480     TEST(mnemonic##2_2D_2OPIMM) {                                            \
3481       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3482                                    2D, 4S,                                   \
3483                                    kInput32bits##input,                      \
3484                                    kInput32bitsImm##input_imm);              \
3485     }
3486 
3487 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                            \
3488                                         vdform, vnform, vmform,              \
3489                                         input_d, input_n,                    \
3490                                         input_m, indices) {                  \
3491     CALL_TEST_NEON_HELPER_ByElement(mnemonic,                                \
3492                                     vdform, vnform, vmform,                  \
3493                                     input_d, input_n,                        \
3494                                     input_m, indices);                       \
3495     }
3496 
3497 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m)      \
3498     TEST(mnemonic##_4H_4H_H) {                                               \
3499       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3500                                       4H, 4H, H,                             \
3501                                       kInput16bits##input_d,                 \
3502                                       kInput16bits##input_n,                 \
3503                                       kInput16bits##input_m,                 \
3504                                       kInputHIndices);                       \
3505     }                                                                        \
3506     TEST(mnemonic##_8H_8H_H) {                                               \
3507       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3508                                       8H, 8H, H,                             \
3509                                       kInput16bits##input_d,                 \
3510                                       kInput16bits##input_n,                 \
3511                                       kInput16bits##input_m,                 \
3512                                       kInputHIndices);                       \
3513     }                                                                        \
3514     TEST(mnemonic##_2S_2S_S) {                                               \
3515       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3516                                       2S, 2S, S,                             \
3517                                       kInput32bits##input_d,                 \
3518                                       kInput32bits##input_n,                 \
3519                                       kInput32bits##input_m,                 \
3520                                       kInputSIndices);                       \
3521     }                                                                        \
3522     TEST(mnemonic##_4S_4S_S) {                                               \
3523       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3524                                       4S, 4S, S,                             \
3525                                       kInput32bits##input_d,                 \
3526                                       kInput32bits##input_n,                 \
3527                                       kInput32bits##input_m,                 \
3528                                       kInputSIndices);                       \
3529     }
3530 
3531 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic,                          \
3532                                           input_d, input_n, input_m)         \
3533     TEST(mnemonic##_H_H_H) {                                                 \
3534       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3535                                       H, H, H,                               \
3536                                       kInput16bits##input_d,                 \
3537                                       kInput16bits##input_n,                 \
3538                                       kInput16bits##input_m,                 \
3539                                       kInputHIndices);                       \
3540     }                                                                        \
3541     TEST(mnemonic##_S_S_S) {                                                 \
3542       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3543                                       S, S, S,                               \
3544                                       kInput32bits##input_d,                 \
3545                                       kInput32bits##input_n,                 \
3546                                       kInput32bits##input_m,                 \
3547                                       kInputSIndices);                       \
3548     }
3549 
3550 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m)   \
3551     TEST(mnemonic##_2S_2S_S) {                                               \
3552       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3553                                       2S, 2S, S,                             \
3554                                       kInputFloat##input_d,                  \
3555                                       kInputFloat##input_n,                  \
3556                                       kInputFloat##input_m,                  \
3557                                       kInputSIndices);                       \
3558     }                                                                        \
3559     TEST(mnemonic##_4S_4S_S) {                                               \
3560       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3561                                       4S, 4S, S,                             \
3562                                       kInputFloat##input_d,                  \
3563                                       kInputFloat##input_n,                  \
3564                                       kInputFloat##input_m,                  \
3565                                       kInputSIndices);                       \
3566     }                                                                        \
3567     TEST(mnemonic##_2D_2D_D) {                                               \
3568       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3569                                       2D, 2D, D,                             \
3570                                       kInputDouble##input_d,                 \
3571                                       kInputDouble##input_n,                 \
3572                                       kInputDouble##input_m,                 \
3573                                       kInputDIndices);                       \
3574     }                                                                        \
3575 
3576 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m)  \
3577     TEST(mnemonic##_S_S_S) {                                                 \
3578       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3579                                       S, S, S,                               \
3580                                       kInputFloat##inp_d,                    \
3581                                       kInputFloat##inp_n,                    \
3582                                       kInputFloat##inp_m,                    \
3583                                       kInputSIndices);                       \
3584     }                                                                        \
3585     TEST(mnemonic##_D_D_D) {                                                 \
3586       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3587                                       D, D, D,                               \
3588                                       kInputDouble##inp_d,                   \
3589                                       kInputDouble##inp_n,                   \
3590                                       kInputDouble##inp_m,                   \
3591                                       kInputDIndices);                       \
3592     }                                                                        \
3593 
3594 
3595 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
3596     TEST(mnemonic##_4S_4H_H) {                                               \
3597       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3598                                       4S, 4H, H,                             \
3599                                       kInput32bits##input_d,                 \
3600                                       kInput16bits##input_n,                 \
3601                                       kInput16bits##input_m,                 \
3602                                       kInputHIndices);                       \
3603     }                                                                        \
3604     TEST(mnemonic##2_4S_8H_H) {                                              \
3605       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                           \
3606                                       4S, 8H, H,                             \
3607                                       kInput32bits##input_d,                 \
3608                                       kInput16bits##input_n,                 \
3609                                       kInput16bits##input_m,                 \
3610                                       kInputHIndices);                       \
3611     }                                                                        \
3612     TEST(mnemonic##_2D_2S_S) {                                               \
3613       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3614                                       2D, 2S, S,                             \
3615                                       kInput64bits##input_d,                 \
3616                                       kInput32bits##input_n,                 \
3617                                       kInput32bits##input_m,                 \
3618                                       kInputSIndices);                       \
3619     }                                                                        \
3620     TEST(mnemonic##2_2D_4S_S) {                                              \
3621       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                           \
3622                                       2D, 4S, S,                             \
3623                                       kInput64bits##input_d,                 \
3624                                       kInput32bits##input_n,                 \
3625                                       kInput32bits##input_m,                 \
3626                                       kInputSIndices);                       \
3627     }
3628 
3629 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic,                     \
3630                                                input_d, input_n, input_m)    \
3631     TEST(mnemonic##_S_H_H) {                                                 \
3632       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3633                                       S, H, H,                               \
3634                                       kInput32bits##input_d,                 \
3635                                       kInput16bits##input_n,                 \
3636                                       kInput16bits##input_m,                 \
3637                                       kInputHIndices);                       \
3638     }                                                                        \
3639     TEST(mnemonic##_D_S_S) {                                                 \
3640       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3641                                       D, S, S,                               \
3642                                       kInput64bits##input_d,                 \
3643                                       kInput32bits##input_n,                 \
3644                                       kInput32bits##input_m,                 \
3645                                       kInputSIndices);                       \
3646     }
3647 
3648 
3649 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                              \
3650                                       variant,                               \
3651                                       input_d,                               \
3652                                       input_imm1,                            \
3653                                       input_n,                               \
3654                                       input_imm2) {                          \
3655     CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic,              \
3656                                      mnemonic,                               \
3657                                      variant, variant,                       \
3658                                      input_d, input_imm1,                    \
3659                                      input_n, input_imm2);                   \
3660     }
3661 
3662 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic,                                   \
3663                                  input_d, input_imm1,                        \
3664                                  input_n, input_imm2)                        \
3665     TEST(mnemonic##_B) {                                                     \
3666       CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
3667                                     16B,                                     \
3668                                     kInput8bits##input_d,                    \
3669                                     kInput8bitsImm##input_imm1,              \
3670                                     kInput8bits##input_n,                    \
3671                                     kInput8bitsImm##input_imm2);             \
3672     }                                                                        \
3673     TEST(mnemonic##_H) {                                                     \
3674       CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
3675                                     8H,                                      \
3676                                     kInput16bits##input_d,                   \
3677                                     kInput16bitsImm##input_imm1,             \
3678                                     kInput16bits##input_n,                   \
3679                                     kInput16bitsImm##input_imm2);            \
3680     }                                                                        \
3681     TEST(mnemonic##_S) {                                                     \
3682       CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
3683                                     4S,                                      \
3684                                     kInput32bits##input_d,                   \
3685                                     kInput32bitsImm##input_imm1,             \
3686                                     kInput32bits##input_n,                   \
3687                                     kInput32bitsImm##input_imm2);            \
3688     }                                                                        \
3689     TEST(mnemonic##_D) {                                                     \
3690       CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
3691                                     2D,                                      \
3692                                     kInput64bits##input_d,                   \
3693                                     kInput64bitsImm##input_imm1,             \
3694                                     kInput64bits##input_n,                   \
3695                                     kInput64bitsImm##input_imm2);            \
3696     }
3697 
3698 
3699 // Advanced SIMD copy.
3700 DEFINE_TEST_NEON_2OP2IMM(ins,
3701                          Basic, LaneCountFromZero,
3702                          Basic, LaneCountFromZero)
3703 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
3704 
3705 
3706 // Advanced SIMD scalar copy.
3707 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
3708 
3709 
3710 // Advanced SIMD three same.
3711 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
3712 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
3713 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
3714 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
3715 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
3716 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
3717 DEFINE_TEST_NEON_3SAME(cmge, Basic)
3718 DEFINE_TEST_NEON_3SAME(sshl, Basic)
3719 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
3720 DEFINE_TEST_NEON_3SAME(srshl, Basic)
3721 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
3722 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
3723 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
3724 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
3725 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
3726 DEFINE_TEST_NEON_3SAME(add, Basic)
3727 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
3728 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
3729 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
3730 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
3731 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
3732 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
3733 DEFINE_TEST_NEON_3SAME(addp, Basic)
3734 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
3735 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
3736 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
3737 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
3738 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
3739 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
3740 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
3741 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
3742 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
3743 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
3744 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
3745 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
3746 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
3747 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
3748 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
3749 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
3750 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
3751 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
3752 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
3753 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
3754 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
3755 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
3756 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
3757 DEFINE_TEST_NEON_3SAME(ushl, Basic)
3758 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
3759 DEFINE_TEST_NEON_3SAME(urshl, Basic)
3760 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
3761 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
3762 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
3763 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
3764 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
3765 DEFINE_TEST_NEON_3SAME(sub, Basic)
3766 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
3767 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
3768 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
3769 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
3770 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
3771 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
3772 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
3773 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
3774 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
3775 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
3776 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
3777 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
3778 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
3779 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
3780 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
3781 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
3782 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
3783 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
3784 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
3785 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
3786 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
3787 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
3788 
3789 
3790 // Advanced SIMD scalar three same.
3791 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
3792 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
3793 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
3794 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
3795 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
3796 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
3797 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
3798 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
3799 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
3800 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
3801 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
3802 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
3803 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
3804 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
3805 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
3806 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
3807 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
3808 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
3809 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
3810 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
3811 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
3812 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
3813 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
3814 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
3815 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
3816 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
3817 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
3818 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
3819 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
3820 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
3821 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
3822 
3823 
3824 // Advanced SIMD three different.
3825 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
3826 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
3827 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
3828 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
3829 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
3830 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
3831 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
3832 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
3833 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
3834 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
3835 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
3836 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
3837 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
3838 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
3839 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
3840 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
3841 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
3842 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
3843 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
3844 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
3845 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
3846 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
3847 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
3848 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
3849 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
3850 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
3851 
3852 
3853 // Advanced SIMD scalar three different.
3854 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
3855 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
3856 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
3857 
3858 
3859 // Advanced SIMD scalar pairwise.
3860 TEST(addp_SCALAR) {
3861   CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
3862 }
DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp,Basic)3863 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
3864 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
3865 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
3866 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
3867 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
3868 
3869 
3870 // Advanced SIMD shift by immediate.
3871 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
3872 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
3873 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
3874 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
3875 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
3876 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
3877 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
3878 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
3879 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
3880 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
3881 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
3882 DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions, \
3883                            TypeWidthFromZeroToWidth)
3884 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
3885 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
3886 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
3887 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
3888 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
3889 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
3890 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
3891 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
3892 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
3893 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
3894 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
3895 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
3896 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
3897 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
3898 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions, \
3899                            TypeWidthFromZeroToWidth)
3900 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
3901 
3902 
3903 // Advanced SIMD scalar shift by immediate..
3904 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
3905 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
3906 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
3907 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
3908 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
3909 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
3910 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
3911 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
3912 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions, \
3913                                   TypeWidthFromZeroToWidth)
3914 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
3915 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
3916 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
3917 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
3918 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
3919 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
3920 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
3921 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
3922 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
3923 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
3924 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
3925 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
3926 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
3927 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions, \
3928                                   TypeWidthFromZeroToWidth)
3929 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
3930 
3931 
3932 // Advanced SIMD two-register miscellaneous.
3933 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
3934 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
3935 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
3936 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
3937 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
3938 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
3939 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
3940 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
3941 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
3942 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
3943 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
3944 DEFINE_TEST_NEON_2SAME(abs, Basic)
3945 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
3946 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
3947 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
3948 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
3949 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions)
3950 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions)
3951 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions)
3952 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions)
3953 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions)
3954 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
3955 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
3956 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
3957 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
3958 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic)
3959 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions)
3960 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions)
3961 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions)
3962 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
3963 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
3964 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic)
3965 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
3966 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
3967 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
3968 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
3969 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
3970 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
3971 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
3972 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
3973 DEFINE_TEST_NEON_2SAME(neg, Basic)
3974 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
3975 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
3976 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
3977 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
3978 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions)
3979 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions)
3980 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions)
3981 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions)
3982 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions)
3983 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
3984 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
3985 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
3986 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
3987 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
3988 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic)
3989 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions)
3990 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions)
3991 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
3992 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
3993 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic)
3994 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic)
3995 
3996 
3997 // Advanced SIMD scalar two-register miscellaneous.
3998 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
3999 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
4000 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
4001 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
4002 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
4003 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
4004 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
4005 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions)
4006 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions)
4007 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions)
4008 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4009 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero)
4010 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero)
4011 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero)
4012 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions)
4013 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4014 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic)
4015 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic)
4016 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
4017 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
4018 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
4019 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
4020 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
4021 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
4022 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
4023 TEST(fcvtxn_SCALAR) {
4024   CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
4025 }
4026 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions)
4027 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions)
4028 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions)
4029 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4030 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero)
4031 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero)
4032 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions)
4033 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4034 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic)
4035 
4036 
4037 // Advanced SIMD across lanes.
4038 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
4039 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
4040 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
4041 DEFINE_TEST_NEON_ACROSS(addv, Basic)
4042 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
4043 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
4044 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
4045 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
4046 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
4047 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
4048 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
4049 
4050 
4051 // Advanced SIMD permute.
4052 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
4053 DEFINE_TEST_NEON_3SAME(trn1, Basic)
4054 DEFINE_TEST_NEON_3SAME(zip1, Basic)
4055 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
4056 DEFINE_TEST_NEON_3SAME(trn2, Basic)
4057 DEFINE_TEST_NEON_3SAME(zip2, Basic)
4058 
4059 
4060 // Advanced SIMD vector x indexed element.
4061 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
4062 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
4063 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
4064 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
4065 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
4066 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
4067 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
4068 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
4069 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
4070 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
4071 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
4072 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
4073 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
4074 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
4075 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
4076 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
4077 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
4078 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
4079 
4080 
4081 // Advanced SIMD scalar x indexed element.
4082 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
4083 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
4084 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
4085 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
4086 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
4087 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
4088 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
4089 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
4090 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
4091 
4092 }  // namespace vixl
4093