1 // Copyright 2015, ARM Limited
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <stdio.h>
28 #include <float.h>
29
30 #include "test-runner.h"
31 #include "test-utils-a64.h"
32 #include "test-simulator-inputs-a64.h"
33 #include "test-simulator-traces-a64.h"
34 #include "vixl/a64/macro-assembler-a64.h"
35 #include "vixl/a64/simulator-a64.h"
36
37 namespace vixl {
38
39 // ==== Simulator Tests ====
40 //
41 // These simulator tests check instruction behaviour against a trace taken from
42 // real AArch64 hardware. The same test code is used to generate the trace; the
43 // results are printed to stdout when the test is run with --sim_test_trace.
44 //
45 // The input lists and expected results are stored in test/traces. The expected
46 // results can be regenerated using tools/generate_simulator_traces.py. Adding
47 // a test for a new instruction is described at the top of
48 // test-simulator-traces-a64.h.
49
50 #define __ masm.
51 #define TEST(name) TEST_(SIM_##name)
52
53 #define BUF_SIZE (256)
54
55 #ifdef VIXL_INCLUDE_SIMULATOR
56
57 #define SETUP() \
58 MacroAssembler masm(BUF_SIZE); \
59 Decoder decoder; \
60 Simulator* simulator = Test::run_debugger() ? new Debugger(&decoder) \
61 : new Simulator(&decoder); \
62 simulator->set_coloured_trace(Test::coloured_trace()); \
63 simulator->set_instruction_stats(Test::instruction_stats()); \
64
65 #define START() \
66 masm.Reset(); \
67 simulator->ResetState(); \
68 __ PushCalleeSavedRegisters(); \
69 if (Test::trace_reg()) { \
70 __ Trace(LOG_STATE, TRACE_ENABLE); \
71 } \
72 if (Test::trace_write()) { \
73 __ Trace(LOG_WRITE, TRACE_ENABLE); \
74 } \
75 if (Test::trace_sim()) { \
76 __ Trace(LOG_DISASM, TRACE_ENABLE); \
77 } \
78 if (Test::instruction_stats()) { \
79 __ EnableInstrumentation(); \
80 }
81
82 #define END() \
83 if (Test::instruction_stats()) { \
84 __ DisableInstrumentation(); \
85 } \
86 __ Trace(LOG_ALL, TRACE_DISABLE); \
87 __ PopCalleeSavedRegisters(); \
88 __ Ret(); \
89 masm.FinalizeCode()
90
91 #define RUN() \
92 simulator->RunFrom(masm.GetStartAddress<Instruction*>())
93
94 #define TEARDOWN() \
95 delete simulator;
96
97 #else // VIXL_INCLUDE_SIMULATOR
98
99 #define SETUP() \
100 MacroAssembler masm(BUF_SIZE); \
101 CPU::SetUp()
102
103 #define START() \
104 masm.Reset(); \
105 __ PushCalleeSavedRegisters()
106
107 #define END() \
108 __ PopCalleeSavedRegisters(); \
109 __ Ret(); \
110 masm.FinalizeCode()
111
112 #define RUN() \
113 { \
114 byte* buffer_start = masm.GetStartAddress<byte*>(); \
115 size_t buffer_length = masm.CursorOffset(); \
116 void (*test_function)(void); \
117 \
118 CPU::EnsureIAndDCacheCoherency(buffer_start, buffer_length); \
119 VIXL_STATIC_ASSERT(sizeof(buffer_start) == sizeof(test_function)); \
120 memcpy(&test_function, &buffer_start, sizeof(buffer_start)); \
121 test_function(); \
122 }
123
124 #define TEARDOWN()
125
126 #endif // VIXL_INCLUDE_SIMULATOR
127
128
129 // The maximum number of errors to report in detail for each test.
130 static const unsigned kErrorReportLimit = 8;
131
132
133 // Overloaded versions of rawbits_to_double and rawbits_to_float for use in the
134 // templated test functions.
rawbits_to_fp(uint32_t bits)135 static float rawbits_to_fp(uint32_t bits) {
136 return rawbits_to_float(bits);
137 }
138
rawbits_to_fp(uint64_t bits)139 static double rawbits_to_fp(uint64_t bits) {
140 return rawbits_to_double(bits);
141 }
142
143
144 // MacroAssembler member function pointers to pass to the test dispatchers.
145 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd,
146 const FPRegister& fn);
147 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd,
148 const FPRegister& fn,
149 const FPRegister& fm);
150 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd,
151 const FPRegister& fn,
152 const FPRegister& fm,
153 const FPRegister& fa);
154 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn,
155 const FPRegister& fm);
156 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn,
157 double value);
158 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
159 const FPRegister& fn);
160 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
161 const FPRegister& fn,
162 int fbits);
163 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd,
164 const Register& rn,
165 int fbits);
166 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
167 // consolidated into one routine.
168 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(
169 const VRegister& vd, const VRegister& vn);
170 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(
171 const VRegister& vd, const VRegister& vn, const VRegister& vm);
172 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(
173 const VRegister& vd, const VRegister& vn, const VRegister& vm, int vm_index);
174 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
175 const VRegister& vd, int imm1, const VRegister& vn, int imm2);
176
177 // This helps using the same typename for both the function pointer
178 // and the array of immediates passed to helper routines.
179 template <typename T>
180 class Test2OpImmediateNEONHelper_t {
181 public:
182 typedef void (MacroAssembler::*mnemonic)(
183 const VRegister& vd, const VRegister& vn, T imm);
184 };
185
186
187 // Maximum number of hex characters required to represent values of either
188 // templated type.
189 template <typename Ta, typename Tb>
MaxHexCharCount()190 static unsigned MaxHexCharCount() {
191 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
192 return (count * 8) / 4;
193 }
194
195
196 // Standard test dispatchers.
197
198
Test1Op_Helper(Test1OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size)199 static void Test1Op_Helper(Test1OpFPHelper_t helper, uintptr_t inputs,
200 unsigned inputs_length, uintptr_t results,
201 unsigned d_size, unsigned n_size) {
202 VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize));
203 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
204
205 SETUP();
206 START();
207
208 // Roll up the loop to keep the code size down.
209 Label loop_n;
210
211 Register out = x0;
212 Register inputs_base = x1;
213 Register length = w2;
214 Register index_n = w3;
215
216 const int n_index_shift =
217 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
218
219 FPRegister fd = (d_size == kDRegSize) ? d0 : s0;
220 FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
221
222 __ Mov(out, results);
223 __ Mov(inputs_base, inputs);
224 __ Mov(length, inputs_length);
225
226 __ Mov(index_n, 0);
227 __ Bind(&loop_n);
228 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
229
230 {
231 SingleEmissionCheckScope guard(&masm);
232 (masm.*helper)(fd, fn);
233 }
234 __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
235
236 __ Add(index_n, index_n, 1);
237 __ Cmp(index_n, inputs_length);
238 __ B(lo, &loop_n);
239
240 END();
241 RUN();
242 TEARDOWN();
243 }
244
245
246 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
247 // rawbits representations of doubles or floats. This ensures that exact bit
248 // comparisons can be performed.
249 template <typename Tn, typename Td>
Test1Op(const char * name,Test1OpFPHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)250 static void Test1Op(const char * name, Test1OpFPHelper_t helper,
251 const Tn inputs[], unsigned inputs_length,
252 const Td expected[], unsigned expected_length) {
253 VIXL_ASSERT(inputs_length > 0);
254
255 const unsigned results_length = inputs_length;
256 Td * results = new Td[results_length];
257
258 const unsigned d_bits = sizeof(Td) * 8;
259 const unsigned n_bits = sizeof(Tn) * 8;
260
261 Test1Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
262 reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
263
264 if (Test::sim_test_trace()) {
265 // Print the results.
266 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
267 for (unsigned d = 0; d < results_length; d++) {
268 printf(" 0x%0*" PRIx64 ",\n",
269 d_bits / 4, static_cast<uint64_t>(results[d]));
270 }
271 printf("};\n");
272 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
273 } else {
274 // Check the results.
275 VIXL_CHECK(expected_length == results_length);
276 unsigned error_count = 0;
277 unsigned d = 0;
278 for (unsigned n = 0; n < inputs_length; n++, d++) {
279 if (results[d] != expected[d]) {
280 if (++error_count > kErrorReportLimit) continue;
281
282 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
283 name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
284 name, rawbits_to_fp(inputs[n]));
285 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
286 d_bits / 4, static_cast<uint64_t>(expected[d]),
287 rawbits_to_fp(expected[d]));
288 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
289 d_bits / 4, static_cast<uint64_t>(results[d]),
290 rawbits_to_fp(results[d]));
291 printf("\n");
292 }
293 }
294 VIXL_ASSERT(d == expected_length);
295 if (error_count > kErrorReportLimit) {
296 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
297 }
298 VIXL_CHECK(error_count == 0);
299 }
300 delete[] results;
301 }
302
303
Test2Op_Helper(Test2OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)304 static void Test2Op_Helper(Test2OpFPHelper_t helper,
305 uintptr_t inputs, unsigned inputs_length,
306 uintptr_t results, unsigned reg_size) {
307 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
308
309 SETUP();
310 START();
311
312 // Roll up the loop to keep the code size down.
313 Label loop_n, loop_m;
314
315 Register out = x0;
316 Register inputs_base = x1;
317 Register length = w2;
318 Register index_n = w3;
319 Register index_m = w4;
320
321 bool double_op = reg_size == kDRegSize;
322 const int index_shift =
323 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
324
325 FPRegister fd = double_op ? d0 : s0;
326 FPRegister fn = double_op ? d1 : s1;
327 FPRegister fm = double_op ? d2 : s2;
328
329 __ Mov(out, results);
330 __ Mov(inputs_base, inputs);
331 __ Mov(length, inputs_length);
332
333 __ Mov(index_n, 0);
334 __ Bind(&loop_n);
335 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
336
337 __ Mov(index_m, 0);
338 __ Bind(&loop_m);
339 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
340
341 {
342 SingleEmissionCheckScope guard(&masm);
343 (masm.*helper)(fd, fn, fm);
344 }
345 __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
346
347 __ Add(index_m, index_m, 1);
348 __ Cmp(index_m, inputs_length);
349 __ B(lo, &loop_m);
350
351 __ Add(index_n, index_n, 1);
352 __ Cmp(index_n, inputs_length);
353 __ B(lo, &loop_n);
354
355 END();
356 RUN();
357 TEARDOWN();
358 }
359
360
361 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
362 // rawbits representations of doubles or floats. This ensures that exact bit
363 // comparisons can be performed.
364 template <typename T>
Test2Op(const char * name,Test2OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)365 static void Test2Op(const char * name, Test2OpFPHelper_t helper,
366 const T inputs[], unsigned inputs_length,
367 const T expected[], unsigned expected_length) {
368 VIXL_ASSERT(inputs_length > 0);
369
370 const unsigned results_length = inputs_length * inputs_length;
371 T * results = new T[results_length];
372
373 const unsigned bits = sizeof(T) * 8;
374
375 Test2Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
376 reinterpret_cast<uintptr_t>(results), bits);
377
378 if (Test::sim_test_trace()) {
379 // Print the results.
380 printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
381 for (unsigned d = 0; d < results_length; d++) {
382 printf(" 0x%0*" PRIx64 ",\n",
383 bits / 4, static_cast<uint64_t>(results[d]));
384 }
385 printf("};\n");
386 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
387 } else {
388 // Check the results.
389 VIXL_CHECK(expected_length == results_length);
390 unsigned error_count = 0;
391 unsigned d = 0;
392 for (unsigned n = 0; n < inputs_length; n++) {
393 for (unsigned m = 0; m < inputs_length; m++, d++) {
394 if (results[d] != expected[d]) {
395 if (++error_count > kErrorReportLimit) continue;
396
397 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
398 name,
399 bits / 4, static_cast<uint64_t>(inputs[n]),
400 bits / 4, static_cast<uint64_t>(inputs[m]),
401 name,
402 rawbits_to_fp(inputs[n]),
403 rawbits_to_fp(inputs[m]));
404 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
405 bits / 4, static_cast<uint64_t>(expected[d]),
406 rawbits_to_fp(expected[d]));
407 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
408 bits / 4, static_cast<uint64_t>(results[d]),
409 rawbits_to_fp(results[d]));
410 printf("\n");
411 }
412 }
413 }
414 VIXL_ASSERT(d == expected_length);
415 if (error_count > kErrorReportLimit) {
416 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
417 }
418 VIXL_CHECK(error_count == 0);
419 }
420 delete[] results;
421 }
422
423
Test3Op_Helper(Test3OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)424 static void Test3Op_Helper(Test3OpFPHelper_t helper,
425 uintptr_t inputs, unsigned inputs_length,
426 uintptr_t results, unsigned reg_size) {
427 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
428
429 SETUP();
430 START();
431
432 // Roll up the loop to keep the code size down.
433 Label loop_n, loop_m, loop_a;
434
435 Register out = x0;
436 Register inputs_base = x1;
437 Register length = w2;
438 Register index_n = w3;
439 Register index_m = w4;
440 Register index_a = w5;
441
442 bool double_op = reg_size == kDRegSize;
443 const int index_shift =
444 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
445
446 FPRegister fd = double_op ? d0 : s0;
447 FPRegister fn = double_op ? d1 : s1;
448 FPRegister fm = double_op ? d2 : s2;
449 FPRegister fa = double_op ? d3 : s3;
450
451 __ Mov(out, results);
452 __ Mov(inputs_base, inputs);
453 __ Mov(length, inputs_length);
454
455 __ Mov(index_n, 0);
456 __ Bind(&loop_n);
457 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
458
459 __ Mov(index_m, 0);
460 __ Bind(&loop_m);
461 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
462
463 __ Mov(index_a, 0);
464 __ Bind(&loop_a);
465 __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
466
467 {
468 SingleEmissionCheckScope guard(&masm);
469 (masm.*helper)(fd, fn, fm, fa);
470 }
471 __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
472
473 __ Add(index_a, index_a, 1);
474 __ Cmp(index_a, inputs_length);
475 __ B(lo, &loop_a);
476
477 __ Add(index_m, index_m, 1);
478 __ Cmp(index_m, inputs_length);
479 __ B(lo, &loop_m);
480
481 __ Add(index_n, index_n, 1);
482 __ Cmp(index_n, inputs_length);
483 __ B(lo, &loop_n);
484
485 END();
486 RUN();
487 TEARDOWN();
488 }
489
490
491 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
492 // rawbits representations of doubles or floats. This ensures that exact bit
493 // comparisons can be performed.
494 template <typename T>
Test3Op(const char * name,Test3OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)495 static void Test3Op(const char * name, Test3OpFPHelper_t helper,
496 const T inputs[], unsigned inputs_length,
497 const T expected[], unsigned expected_length) {
498 VIXL_ASSERT(inputs_length > 0);
499
500 const unsigned results_length = inputs_length * inputs_length * inputs_length;
501 T * results = new T[results_length];
502
503 const unsigned bits = sizeof(T) * 8;
504
505 Test3Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
506 reinterpret_cast<uintptr_t>(results), bits);
507
508 if (Test::sim_test_trace()) {
509 // Print the results.
510 printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
511 for (unsigned d = 0; d < results_length; d++) {
512 printf(" 0x%0*" PRIx64 ",\n",
513 bits / 4, static_cast<uint64_t>(results[d]));
514 }
515 printf("};\n");
516 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
517 } else {
518 // Check the results.
519 VIXL_CHECK(expected_length == results_length);
520 unsigned error_count = 0;
521 unsigned d = 0;
522 for (unsigned n = 0; n < inputs_length; n++) {
523 for (unsigned m = 0; m < inputs_length; m++) {
524 for (unsigned a = 0; a < inputs_length; a++, d++) {
525 if (results[d] != expected[d]) {
526 if (++error_count > kErrorReportLimit) continue;
527
528 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
529 " (%s %g %g %g):\n",
530 name,
531 bits / 4, static_cast<uint64_t>(inputs[n]),
532 bits / 4, static_cast<uint64_t>(inputs[m]),
533 bits / 4, static_cast<uint64_t>(inputs[a]),
534 name,
535 rawbits_to_fp(inputs[n]),
536 rawbits_to_fp(inputs[m]),
537 rawbits_to_fp(inputs[a]));
538 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
539 bits / 4, static_cast<uint64_t>(expected[d]),
540 rawbits_to_fp(expected[d]));
541 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
542 bits / 4, static_cast<uint64_t>(results[d]),
543 rawbits_to_fp(results[d]));
544 printf("\n");
545 }
546 }
547 }
548 }
549 VIXL_ASSERT(d == expected_length);
550 if (error_count > kErrorReportLimit) {
551 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
552 }
553 VIXL_CHECK(error_count == 0);
554 }
555 delete[] results;
556 }
557
558
TestCmp_Helper(TestFPCmpHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)559 static void TestCmp_Helper(TestFPCmpHelper_t helper,
560 uintptr_t inputs, unsigned inputs_length,
561 uintptr_t results, unsigned reg_size) {
562 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
563
564 SETUP();
565 START();
566
567 // Roll up the loop to keep the code size down.
568 Label loop_n, loop_m;
569
570 Register out = x0;
571 Register inputs_base = x1;
572 Register length = w2;
573 Register index_n = w3;
574 Register index_m = w4;
575 Register flags = x5;
576
577 bool double_op = reg_size == kDRegSize;
578 const int index_shift =
579 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
580
581 FPRegister fn = double_op ? d1 : s1;
582 FPRegister fm = double_op ? d2 : s2;
583
584 __ Mov(out, results);
585 __ Mov(inputs_base, inputs);
586 __ Mov(length, inputs_length);
587
588 __ Mov(index_n, 0);
589 __ Bind(&loop_n);
590 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
591
592 __ Mov(index_m, 0);
593 __ Bind(&loop_m);
594 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
595
596 {
597 SingleEmissionCheckScope guard(&masm);
598 (masm.*helper)(fn, fm);
599 }
600 __ Mrs(flags, NZCV);
601 __ Ubfx(flags, flags, 28, 4);
602 __ Strb(flags, MemOperand(out, 1, PostIndex));
603
604 __ Add(index_m, index_m, 1);
605 __ Cmp(index_m, inputs_length);
606 __ B(lo, &loop_m);
607
608 __ Add(index_n, index_n, 1);
609 __ Cmp(index_n, inputs_length);
610 __ B(lo, &loop_n);
611
612 END();
613 RUN();
614 TEARDOWN();
615 }
616
617
618 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
619 // rawbits representations of doubles or floats. This ensures that exact bit
620 // comparisons can be performed.
621 template <typename T>
TestCmp(const char * name,TestFPCmpHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)622 static void TestCmp(const char * name, TestFPCmpHelper_t helper,
623 const T inputs[], unsigned inputs_length,
624 const uint8_t expected[], unsigned expected_length) {
625 VIXL_ASSERT(inputs_length > 0);
626
627 const unsigned results_length = inputs_length * inputs_length;
628 uint8_t * results = new uint8_t[results_length];
629
630 const unsigned bits = sizeof(T) * 8;
631
632 TestCmp_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
633 reinterpret_cast<uintptr_t>(results), bits);
634
635 if (Test::sim_test_trace()) {
636 // Print the results.
637 printf("const uint8_t kExpected_%s[] = {\n", name);
638 for (unsigned d = 0; d < results_length; d++) {
639 // Each NZCV result only requires 4 bits.
640 VIXL_ASSERT((results[d] & 0xf) == results[d]);
641 printf(" 0x%" PRIx8 ",\n", results[d]);
642 }
643 printf("};\n");
644 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
645 } else {
646 // Check the results.
647 VIXL_CHECK(expected_length == results_length);
648 unsigned error_count = 0;
649 unsigned d = 0;
650 for (unsigned n = 0; n < inputs_length; n++) {
651 for (unsigned m = 0; m < inputs_length; m++, d++) {
652 if (results[d] != expected[d]) {
653 if (++error_count > kErrorReportLimit) continue;
654
655 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
656 name,
657 bits / 4, static_cast<uint64_t>(inputs[n]),
658 bits / 4, static_cast<uint64_t>(inputs[m]),
659 name,
660 rawbits_to_fp(inputs[n]),
661 rawbits_to_fp(inputs[m]));
662 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n",
663 (expected[d] & 0x8) ? 'N' : 'n',
664 (expected[d] & 0x4) ? 'Z' : 'z',
665 (expected[d] & 0x2) ? 'C' : 'c',
666 (expected[d] & 0x1) ? 'V' : 'v',
667 expected[d]);
668 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n",
669 (results[d] & 0x8) ? 'N' : 'n',
670 (results[d] & 0x4) ? 'Z' : 'z',
671 (results[d] & 0x2) ? 'C' : 'c',
672 (results[d] & 0x1) ? 'V' : 'v',
673 results[d]);
674 printf("\n");
675 }
676 }
677 }
678 VIXL_ASSERT(d == expected_length);
679 if (error_count > kErrorReportLimit) {
680 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
681 }
682 VIXL_CHECK(error_count == 0);
683 }
684 delete[] results;
685 }
686
687
TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)688 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
689 uintptr_t inputs, unsigned inputs_length,
690 uintptr_t results, unsigned reg_size) {
691 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
692
693 SETUP();
694 START();
695
696 // Roll up the loop to keep the code size down.
697 Label loop_n, loop_m;
698
699 Register out = x0;
700 Register inputs_base = x1;
701 Register length = w2;
702 Register index_n = w3;
703 Register flags = x4;
704
705 bool double_op = reg_size == kDRegSize;
706 const int index_shift =
707 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
708
709 FPRegister fn = double_op ? d1 : s1;
710
711 __ Mov(out, results);
712 __ Mov(inputs_base, inputs);
713 __ Mov(length, inputs_length);
714
715 __ Mov(index_n, 0);
716 __ Bind(&loop_n);
717 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
718
719 {
720 SingleEmissionCheckScope guard(&masm);
721 (masm.*helper)(fn, 0.0);
722 }
723 __ Mrs(flags, NZCV);
724 __ Ubfx(flags, flags, 28, 4);
725 __ Strb(flags, MemOperand(out, 1, PostIndex));
726
727 __ Add(index_n, index_n, 1);
728 __ Cmp(index_n, inputs_length);
729 __ B(lo, &loop_n);
730
731 END();
732 RUN();
733 TEARDOWN();
734 }
735
736
737 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
738 // rawbits representations of doubles or floats. This ensures that exact bit
739 // comparisons can be performed.
740 template <typename T>
TestCmpZero(const char * name,TestFPCmpZeroHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)741 static void TestCmpZero(const char * name, TestFPCmpZeroHelper_t helper,
742 const T inputs[], unsigned inputs_length,
743 const uint8_t expected[], unsigned expected_length) {
744 VIXL_ASSERT(inputs_length > 0);
745
746 const unsigned results_length = inputs_length;
747 uint8_t * results = new uint8_t[results_length];
748
749 const unsigned bits = sizeof(T) * 8;
750
751 TestCmpZero_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
752 reinterpret_cast<uintptr_t>(results), bits);
753
754 if (Test::sim_test_trace()) {
755 // Print the results.
756 printf("const uint8_t kExpected_%s[] = {\n", name);
757 for (unsigned d = 0; d < results_length; d++) {
758 // Each NZCV result only requires 4 bits.
759 VIXL_ASSERT((results[d] & 0xf) == results[d]);
760 printf(" 0x%" PRIx8 ",\n", results[d]);
761 }
762 printf("};\n");
763 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
764 } else {
765 // Check the results.
766 VIXL_CHECK(expected_length == results_length);
767 unsigned error_count = 0;
768 unsigned d = 0;
769 for (unsigned n = 0; n < inputs_length; n++, d++) {
770 if (results[d] != expected[d]) {
771 if (++error_count > kErrorReportLimit) continue;
772
773 printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
774 name,
775 bits / 4, static_cast<uint64_t>(inputs[n]),
776 bits / 4, 0,
777 name,
778 rawbits_to_fp(inputs[n]));
779 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n",
780 (expected[d] & 0x8) ? 'N' : 'n',
781 (expected[d] & 0x4) ? 'Z' : 'z',
782 (expected[d] & 0x2) ? 'C' : 'c',
783 (expected[d] & 0x1) ? 'V' : 'v',
784 expected[d]);
785 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n",
786 (results[d] & 0x8) ? 'N' : 'n',
787 (results[d] & 0x4) ? 'Z' : 'z',
788 (results[d] & 0x2) ? 'C' : 'c',
789 (results[d] & 0x1) ? 'V' : 'v',
790 results[d]);
791 printf("\n");
792 }
793 }
794 VIXL_ASSERT(d == expected_length);
795 if (error_count > kErrorReportLimit) {
796 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
797 }
798 VIXL_CHECK(error_count == 0);
799 }
800 delete[] results;
801 }
802
803
TestFPToFixed_Helper(TestFPToFixedHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size)804 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
805 uintptr_t inputs, unsigned inputs_length,
806 uintptr_t results,
807 unsigned d_size, unsigned n_size) {
808 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
809 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
810
811 SETUP();
812 START();
813
814 // Roll up the loop to keep the code size down.
815 Label loop_n;
816
817 Register out = x0;
818 Register inputs_base = x1;
819 Register length = w2;
820 Register index_n = w3;
821
822 const int n_index_shift =
823 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
824
825 Register rd = (d_size == kXRegSize) ? x10 : w10;
826 FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
827
828 __ Mov(out, results);
829 __ Mov(inputs_base, inputs);
830 __ Mov(length, inputs_length);
831
832 __ Mov(index_n, 0);
833 __ Bind(&loop_n);
834 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
835
836 for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
837 {
838 SingleEmissionCheckScope guard(&masm);
839 (masm.*helper)(rd, fn, fbits);
840 }
841 __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex));
842 }
843
844 __ Add(index_n, index_n, 1);
845 __ Cmp(index_n, inputs_length);
846 __ B(lo, &loop_n);
847
848 END();
849 RUN();
850 TEARDOWN();
851 }
852
853
TestFPToInt_Helper(TestFPToIntHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size)854 static void TestFPToInt_Helper(TestFPToIntHelper_t helper, uintptr_t inputs,
855 unsigned inputs_length, uintptr_t results,
856 unsigned d_size, unsigned n_size) {
857 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
858 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
859
860 SETUP();
861 START();
862
863 // Roll up the loop to keep the code size down.
864 Label loop_n;
865
866 Register out = x0;
867 Register inputs_base = x1;
868 Register length = w2;
869 Register index_n = w3;
870
871 const int n_index_shift =
872 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
873
874 Register rd = (d_size == kXRegSize) ? x10 : w10;
875 FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
876
877 __ Mov(out, results);
878 __ Mov(inputs_base, inputs);
879 __ Mov(length, inputs_length);
880
881 __ Mov(index_n, 0);
882 __ Bind(&loop_n);
883 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
884
885 {
886 SingleEmissionCheckScope guard(&masm);
887 (masm.*helper)(rd, fn);
888 }
889 __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex));
890
891 __ Add(index_n, index_n, 1);
892 __ Cmp(index_n, inputs_length);
893 __ B(lo, &loop_n);
894
895 END();
896 RUN();
897 TEARDOWN();
898 }
899
900
901 // Test FP instructions.
902 // - The inputs[] array should be an array of rawbits representations of
903 // doubles or floats. This ensures that exact bit comparisons can be
904 // performed.
905 // - The expected[] array should be an array of signed integers.
906 template <typename Tn, typename Td>
TestFPToS(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)907 static void TestFPToS(const char * name, TestFPToIntHelper_t helper,
908 const Tn inputs[], unsigned inputs_length,
909 const Td expected[], unsigned expected_length) {
910 VIXL_ASSERT(inputs_length > 0);
911
912 const unsigned results_length = inputs_length;
913 Td * results = new Td[results_length];
914
915 const unsigned d_bits = sizeof(Td) * 8;
916 const unsigned n_bits = sizeof(Tn) * 8;
917
918 TestFPToInt_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
919 reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
920
921 if (Test::sim_test_trace()) {
922 // Print the results.
923 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
924 // There is no simple C++ literal for INT*_MIN that doesn't produce
925 // warnings, so we use an appropriate constant in that case instead.
926 // Deriving int_d_min in this way (rather than just checking INT64_MIN and
927 // the like) avoids warnings about comparing values with differing ranges.
928 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
929 const int64_t int_d_min = -(int_d_max) - 1;
930 for (unsigned d = 0; d < results_length; d++) {
931 if (results[d] == int_d_min) {
932 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
933 } else {
934 // Some constants (such as those between INT32_MAX and UINT32_MAX)
935 // trigger compiler warnings. To avoid these warnings, use an
936 // appropriate macro to make the type explicit.
937 int64_t result_int64 = static_cast<int64_t>(results[d]);
938 if (result_int64 >= 0) {
939 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
940 } else {
941 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
942 }
943 }
944 }
945 printf("};\n");
946 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
947 } else {
948 // Check the results.
949 VIXL_CHECK(expected_length == results_length);
950 unsigned error_count = 0;
951 unsigned d = 0;
952 for (unsigned n = 0; n < inputs_length; n++, d++) {
953 if (results[d] != expected[d]) {
954 if (++error_count > kErrorReportLimit) continue;
955
956 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
957 name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
958 name, rawbits_to_fp(inputs[n]));
959 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
960 d_bits / 4, static_cast<uint64_t>(expected[d]),
961 static_cast<int64_t>(expected[d]));
962 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n",
963 d_bits / 4, static_cast<uint64_t>(results[d]),
964 static_cast<int64_t>(results[d]));
965 printf("\n");
966 }
967 }
968 VIXL_ASSERT(d == expected_length);
969 if (error_count > kErrorReportLimit) {
970 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
971 }
972 VIXL_CHECK(error_count == 0);
973 }
974 delete[] results;
975 }
976
977
978 // Test FP instructions.
979 // - The inputs[] array should be an array of rawbits representations of
980 // doubles or floats. This ensures that exact bit comparisons can be
981 // performed.
982 // - The expected[] array should be an array of unsigned integers.
983 template <typename Tn, typename Td>
TestFPToU(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)984 static void TestFPToU(const char * name, TestFPToIntHelper_t helper,
985 const Tn inputs[], unsigned inputs_length,
986 const Td expected[], unsigned expected_length) {
987 VIXL_ASSERT(inputs_length > 0);
988
989 const unsigned results_length = inputs_length;
990 Td * results = new Td[results_length];
991
992 const unsigned d_bits = sizeof(Td) * 8;
993 const unsigned n_bits = sizeof(Tn) * 8;
994
995 TestFPToInt_Helper(helper,
996 reinterpret_cast<uintptr_t>(inputs), inputs_length,
997 reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
998
999 if (Test::sim_test_trace()) {
1000 // Print the results.
1001 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1002 for (unsigned d = 0; d < results_length; d++) {
1003 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1004 }
1005 printf("};\n");
1006 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1007 } else {
1008 // Check the results.
1009 VIXL_CHECK(expected_length == results_length);
1010 unsigned error_count = 0;
1011 unsigned d = 0;
1012 for (unsigned n = 0; n < inputs_length; n++, d++) {
1013 if (results[d] != expected[d]) {
1014 if (++error_count > kErrorReportLimit) continue;
1015
1016 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1017 name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
1018 name, rawbits_to_fp(inputs[n]));
1019 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1020 d_bits / 4, static_cast<uint64_t>(expected[d]),
1021 static_cast<uint64_t>(expected[d]));
1022 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1023 d_bits / 4, static_cast<uint64_t>(results[d]),
1024 static_cast<uint64_t>(results[d]));
1025 printf("\n");
1026 }
1027 }
1028 VIXL_ASSERT(d == expected_length);
1029 if (error_count > kErrorReportLimit) {
1030 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1031 }
1032 VIXL_CHECK(error_count == 0);
1033 }
1034 delete[] results;
1035 }
1036
1037
1038 // Test FP instructions.
1039 // - The inputs[] array should be an array of rawbits representations of
1040 // doubles or floats. This ensures that exact bit comparisons can be
1041 // performed.
1042 // - The expected[] array should be an array of signed integers.
1043 template <typename Tn, typename Td>
TestFPToFixedS(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1044 static void TestFPToFixedS(const char * name, TestFPToFixedHelper_t helper,
1045 const Tn inputs[], unsigned inputs_length,
1046 const Td expected[], unsigned expected_length) {
1047 VIXL_ASSERT(inputs_length > 0);
1048
1049 const unsigned d_bits = sizeof(Td) * 8;
1050 const unsigned n_bits = sizeof(Tn) * 8;
1051
1052 const unsigned results_length = inputs_length * (d_bits + 1);
1053 Td * results = new Td[results_length];
1054
1055 TestFPToFixed_Helper(helper,
1056 reinterpret_cast<uintptr_t>(inputs), inputs_length,
1057 reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
1058
1059 if (Test::sim_test_trace()) {
1060 // Print the results.
1061 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1062 // There is no simple C++ literal for INT*_MIN that doesn't produce
1063 // warnings, so we use an appropriate constant in that case instead.
1064 // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1065 // the like) avoids warnings about comparing values with differing ranges.
1066 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1067 const int64_t int_d_min = -(int_d_max) - 1;
1068 for (unsigned d = 0; d < results_length; d++) {
1069 if (results[d] == int_d_min) {
1070 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1071 } else {
1072 // Some constants (such as those between INT32_MAX and UINT32_MAX)
1073 // trigger compiler warnings. To avoid these warnings, use an
1074 // appropriate macro to make the type explicit.
1075 int64_t result_int64 = static_cast<int64_t>(results[d]);
1076 if (result_int64 >= 0) {
1077 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1078 } else {
1079 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1080 }
1081 }
1082 }
1083 printf("};\n");
1084 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1085 } else {
1086 // Check the results.
1087 VIXL_CHECK(expected_length == results_length);
1088 unsigned error_count = 0;
1089 unsigned d = 0;
1090 for (unsigned n = 0; n < inputs_length; n++) {
1091 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1092 if (results[d] != expected[d]) {
1093 if (++error_count > kErrorReportLimit) continue;
1094
1095 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1096 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits,
1097 name, rawbits_to_fp(inputs[n]), fbits);
1098 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1099 d_bits / 4, static_cast<uint64_t>(expected[d]),
1100 static_cast<int64_t>(expected[d]));
1101 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1102 d_bits / 4, static_cast<uint64_t>(results[d]),
1103 static_cast<int64_t>(results[d]));
1104 printf("\n");
1105 }
1106 }
1107 }
1108 VIXL_ASSERT(d == expected_length);
1109 if (error_count > kErrorReportLimit) {
1110 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1111 }
1112 VIXL_CHECK(error_count == 0);
1113 }
1114 delete[] results;
1115 }
1116
1117
1118 // Test FP instructions.
1119 // - The inputs[] array should be an array of rawbits representations of
1120 // doubles or floats. This ensures that exact bit comparisons can be
1121 // performed.
1122 // - The expected[] array should be an array of unsigned integers.
1123 template <typename Tn, typename Td>
TestFPToFixedU(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1124 static void TestFPToFixedU(const char * name, TestFPToFixedHelper_t helper,
1125 const Tn inputs[], unsigned inputs_length,
1126 const Td expected[], unsigned expected_length) {
1127 VIXL_ASSERT(inputs_length > 0);
1128
1129 const unsigned d_bits = sizeof(Td) * 8;
1130 const unsigned n_bits = sizeof(Tn) * 8;
1131
1132 const unsigned results_length = inputs_length * (d_bits + 1);
1133 Td * results = new Td[results_length];
1134
1135 TestFPToFixed_Helper(helper,
1136 reinterpret_cast<uintptr_t>(inputs), inputs_length,
1137 reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
1138
1139 if (Test::sim_test_trace()) {
1140 // Print the results.
1141 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1142 for (unsigned d = 0; d < results_length; d++) {
1143 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1144 }
1145 printf("};\n");
1146 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1147 } else {
1148 // Check the results.
1149 VIXL_CHECK(expected_length == results_length);
1150 unsigned error_count = 0;
1151 unsigned d = 0;
1152 for (unsigned n = 0; n < inputs_length; n++) {
1153 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1154 if (results[d] != expected[d]) {
1155 if (++error_count > kErrorReportLimit) continue;
1156
1157 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1158 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits,
1159 name, rawbits_to_fp(inputs[n]), fbits);
1160 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1161 d_bits / 4, static_cast<uint64_t>(expected[d]),
1162 static_cast<uint64_t>(expected[d]));
1163 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1164 d_bits / 4, static_cast<uint64_t>(results[d]),
1165 static_cast<uint64_t>(results[d]));
1166 printf("\n");
1167 }
1168 }
1169 }
1170 VIXL_ASSERT(d == expected_length);
1171 if (error_count > kErrorReportLimit) {
1172 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1173 }
1174 VIXL_CHECK(error_count == 0);
1175 }
1176 delete[] results;
1177 }
1178
1179
1180 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
1181
1182
Test1OpNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)1183 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1184 uintptr_t inputs_n, unsigned inputs_n_length,
1185 uintptr_t results,
1186 VectorFormat vd_form,
1187 VectorFormat vn_form) {
1188 VIXL_ASSERT(vd_form != kFormatUndefined);
1189 VIXL_ASSERT(vn_form != kFormatUndefined);
1190
1191 SETUP();
1192 START();
1193
1194 // Roll up the loop to keep the code size down.
1195 Label loop_n;
1196
1197 Register out = x0;
1198 Register inputs_n_base = x1;
1199 Register inputs_n_last_16bytes = x3;
1200 Register index_n = x5;
1201
1202 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1203 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1204 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1205
1206 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1207 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1208 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1209 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1210 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1211
1212
1213 // These will be either a D- or a Q-register form, with a single lane
1214 // (for use in scalar load and store operations).
1215 VRegister vd = VRegister(0, vd_bits);
1216 VRegister vn = v1.V16B();
1217 VRegister vntmp = v3.V16B();
1218
1219 // These will have the correct format for use when calling 'helper'.
1220 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1221 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1222
1223 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1224 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1225
1226 __ Mov(out, results);
1227
1228 __ Mov(inputs_n_base, inputs_n);
1229 __ Mov(inputs_n_last_16bytes,
1230 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1231
1232 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1233
1234 __ Mov(index_n, 0);
1235 __ Bind(&loop_n);
1236
1237 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1238 vn_lane_bytes_log2));
1239 __ Ext(vn, vn, vntmp, vn_lane_bytes);
1240
1241 // Set the destination to zero.
1242 // TODO: Setting the destination to values other than zero
1243 // might be a better test for instructions such as sqxtn2
1244 // which may leave parts of V registers unchanged.
1245 __ Movi(vd.V16B(), 0);
1246
1247 {
1248 SingleEmissionCheckScope guard(&masm);
1249 (masm.*helper)(vd_helper, vn_helper);
1250 }
1251 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
1252
1253 __ Add(index_n, index_n, 1);
1254 __ Cmp(index_n, inputs_n_length);
1255 __ B(lo, &loop_n);
1256
1257 END();
1258 RUN();
1259 TEARDOWN();
1260 }
1261
1262
1263 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1264 // arrays of rawbit representation of input values. This ensures that
1265 // exact bit comparisons can be performed.
1266 template <typename Td, typename Tn>
Test1OpNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1267 static void Test1OpNEON(const char * name, Test1OpNEONHelper_t helper,
1268 const Tn inputs_n[], unsigned inputs_n_length,
1269 const Td expected[], unsigned expected_length,
1270 VectorFormat vd_form,
1271 VectorFormat vn_form) {
1272 VIXL_ASSERT(inputs_n_length > 0);
1273
1274 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1275 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1276 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1277
1278 const unsigned results_length = inputs_n_length;
1279 Td* results = new Td[results_length * vd_lane_count];
1280 const unsigned lane_bit = sizeof(Td) * 8;
1281 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1282
1283 Test1OpNEON_Helper(helper,
1284 reinterpret_cast<uintptr_t>(inputs_n),
1285 inputs_n_length,
1286 reinterpret_cast<uintptr_t>(results),
1287 vd_form, vn_form);
1288
1289 if (Test::sim_test_trace()) {
1290 // Print the results.
1291 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1292 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1293 printf(" ");
1294 // Output a separate result for each element of the result vector.
1295 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1296 unsigned index = lane + (iteration * vd_lane_count);
1297 printf(" 0x%0*" PRIx64 ",",
1298 lane_len_in_hex,
1299 static_cast<uint64_t>(results[index]));
1300 }
1301 printf("\n");
1302 }
1303
1304 printf("};\n");
1305 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1306 name,
1307 results_length);
1308 } else {
1309 // Check the results.
1310 VIXL_CHECK(expected_length == results_length);
1311 unsigned error_count = 0;
1312 unsigned d = 0;
1313 const char* padding = " ";
1314 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1315 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1316 bool error_in_vector = false;
1317
1318 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1319 unsigned output_index = (n * vd_lane_count) + lane;
1320
1321 if (results[output_index] != expected[output_index]) {
1322 error_in_vector = true;
1323 break;
1324 }
1325 }
1326
1327 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1328 printf("%s\n", name);
1329 printf(" Vn%.*s| Vd%.*s| Expected\n",
1330 lane_len_in_hex+1, padding,
1331 lane_len_in_hex+1, padding);
1332
1333 const unsigned first_index_n =
1334 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1335
1336 for (unsigned lane = 0;
1337 lane < std::max(vd_lane_count, vn_lane_count);
1338 lane++) {
1339 unsigned output_index = (n * vd_lane_count) + lane;
1340 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1341
1342 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
1343 "| 0x%0*" PRIx64 "\n",
1344 results[output_index] != expected[output_index] ? '*' : ' ',
1345 lane_len_in_hex,
1346 static_cast<uint64_t>(inputs_n[input_index_n]),
1347 lane_len_in_hex,
1348 static_cast<uint64_t>(results[output_index]),
1349 lane_len_in_hex,
1350 static_cast<uint64_t>(expected[output_index]));
1351 }
1352 }
1353 }
1354 VIXL_ASSERT(d == expected_length);
1355 if (error_count > kErrorReportLimit) {
1356 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1357 }
1358 VIXL_CHECK(error_count == 0);
1359 }
1360 delete[] results;
1361 }
1362
1363
1364 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1365 // where <V> is one of B, H, S or D registers.
1366 // e.g. saddlv H1, v0.8B
1367
1368 // TODO: Change tests to store all lanes of the resulting V register.
1369 // Some tests store all 128 bits of the resulting V register to
1370 // check the simulator's behaviour on the rest of the register.
1371 // This is better than storing the affected lanes only.
1372 // Change any tests such as the 'Across' template to do the same.
1373
Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)1374 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1375 uintptr_t inputs_n,
1376 unsigned inputs_n_length,
1377 uintptr_t results,
1378 VectorFormat vd_form,
1379 VectorFormat vn_form) {
1380 VIXL_ASSERT(vd_form != kFormatUndefined);
1381 VIXL_ASSERT(vn_form != kFormatUndefined);
1382
1383 SETUP();
1384 START();
1385
1386 // Roll up the loop to keep the code size down.
1387 Label loop_n;
1388
1389 Register out = x0;
1390 Register inputs_n_base = x1;
1391 Register inputs_n_last_vector = x3;
1392 Register index_n = x5;
1393
1394 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1395 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1396
1397 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1398 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1399 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1400 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1401 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1402
1403
1404 // These will be either a D- or a Q-register form, with a single lane
1405 // (for use in scalar load and store operations).
1406 VRegister vd = VRegister(0, vd_bits);
1407 VRegister vn = VRegister(1, vn_bits);
1408 VRegister vntmp = VRegister(3, vn_bits);
1409
1410 // These will have the correct format for use when calling 'helper'.
1411 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1412
1413 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1414 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1415
1416 // Same registers for use in the 'ext' instructions.
1417 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1418 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1419
1420 __ Mov(out, results);
1421
1422 __ Mov(inputs_n_base, inputs_n);
1423 __ Mov(inputs_n_last_vector,
1424 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1425
1426 __ Ldr(vn, MemOperand(inputs_n_last_vector));
1427
1428 __ Mov(index_n, 0);
1429 __ Bind(&loop_n);
1430
1431 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1432 vn_lane_bytes_log2));
1433 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1434
1435 // Set the destination to zero for tests such as '[r]shrn2'.
1436 // TODO: Setting the destination to values other than zero
1437 // might be a better test for instructions such as sqxtn2
1438 // which may leave parts of V registers unchanged.
1439 __ Movi(vd.V16B(), 0);
1440
1441 {
1442 SingleEmissionCheckScope guard(&masm);
1443 (masm.*helper)(vd, vn_helper);
1444 }
1445 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
1446
1447 __ Add(index_n, index_n, 1);
1448 __ Cmp(index_n, inputs_n_length);
1449 __ B(lo, &loop_n);
1450
1451 END();
1452 RUN();
1453 TEARDOWN();
1454 }
1455
1456 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1457 // arrays of rawbit representation of input values. This ensures that
1458 // exact bit comparisons can be performed.
1459 template <typename Td, typename Tn>
Test1OpAcrossNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1460 static void Test1OpAcrossNEON(const char * name, Test1OpNEONHelper_t helper,
1461 const Tn inputs_n[], unsigned inputs_n_length,
1462 const Td expected[], unsigned expected_length,
1463 VectorFormat vd_form,
1464 VectorFormat vn_form) {
1465 VIXL_ASSERT(inputs_n_length > 0);
1466
1467 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1468
1469 const unsigned results_length = inputs_n_length;
1470 Td* results = new Td[results_length * vd_lane_count];
1471 const unsigned lane_bit = sizeof(Td) * 8;
1472 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1473
1474 Test1OpAcrossNEON_Helper(helper,
1475 reinterpret_cast<uintptr_t>(inputs_n),
1476 inputs_n_length,
1477 reinterpret_cast<uintptr_t>(results),
1478 vd_form, vn_form);
1479
1480 if (Test::sim_test_trace()) {
1481 // Print the results.
1482 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1483 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1484 printf(" ");
1485 // Output a separate result for each element of the result vector.
1486 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1487 unsigned index = lane + (iteration * vd_lane_count);
1488 printf(" 0x%0*" PRIx64 ",",
1489 lane_len_in_hex,
1490 static_cast<uint64_t>(results[index]));
1491 }
1492 printf("\n");
1493 }
1494
1495 printf("};\n");
1496 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1497 name,
1498 results_length);
1499 } else {
1500 // Check the results.
1501 VIXL_CHECK(expected_length == results_length);
1502 unsigned error_count = 0;
1503 unsigned d = 0;
1504 const char* padding = " ";
1505 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1506 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1507 bool error_in_vector = false;
1508
1509 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1510 unsigned output_index = (n * vd_lane_count) + lane;
1511
1512 if (results[output_index] != expected[output_index]) {
1513 error_in_vector = true;
1514 break;
1515 }
1516 }
1517
1518 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1519 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1520
1521 printf("%s\n", name);
1522 printf(" Vn%.*s| Vd%.*s| Expected\n",
1523 lane_len_in_hex+1, padding,
1524 lane_len_in_hex+1, padding);
1525
1526 // TODO: In case of an error, all tests print out as many elements as
1527 // there are lanes in the output or input vectors. This way
1528 // the viewer can read all the values that were needed for the
1529 // operation but the output contains also unnecessary values.
1530 // These prints can be improved according to the arguments
1531 // passed to test functions.
1532 // This output for the 'Across' category has the required
1533 // modifications.
1534 for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1535 unsigned output_index = n * vd_lane_count;
1536 unsigned input_index_n = (inputs_n_length - vn_lane_count +
1537 n + 1 + lane) % inputs_n_length;
1538
1539 if (vn_lane_count-1 == lane) { // Is this the last lane?
1540 // Print the result element(s) in the last lane only.
1541 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
1542 "| 0x%0*" PRIx64 "\n",
1543 results[output_index] != expected[output_index] ? '*' : ' ',
1544 lane_len_in_hex,
1545 static_cast<uint64_t>(inputs_n[input_index_n]),
1546 lane_len_in_hex,
1547 static_cast<uint64_t>(results[output_index]),
1548 lane_len_in_hex,
1549 static_cast<uint64_t>(expected[output_index]));
1550 } else {
1551 printf(" 0x%0*" PRIx64 " | %.*s| %.*s\n",
1552 lane_len_in_hex,
1553 static_cast<uint64_t>(inputs_n[input_index_n]),
1554 lane_len_in_hex+1, padding,
1555 lane_len_in_hex+1, padding);
1556 }
1557 }
1558 }
1559 }
1560 VIXL_ASSERT(d == expected_length);
1561 if (error_count > kErrorReportLimit) {
1562 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1563 }
1564 VIXL_CHECK(error_count == 0);
1565 }
1566 delete[] results;
1567 }
1568
1569
1570 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1571
1572 // TODO: Iterate over inputs_d once the traces file is split.
1573
Test2OpNEON_Helper(Test2OpNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1574 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1575 uintptr_t inputs_d,
1576 uintptr_t inputs_n, unsigned inputs_n_length,
1577 uintptr_t inputs_m, unsigned inputs_m_length,
1578 uintptr_t results,
1579 VectorFormat vd_form,
1580 VectorFormat vn_form,
1581 VectorFormat vm_form) {
1582 VIXL_ASSERT(vd_form != kFormatUndefined);
1583 VIXL_ASSERT(vn_form != kFormatUndefined);
1584 VIXL_ASSERT(vm_form != kFormatUndefined);
1585
1586 SETUP();
1587 START();
1588
1589 // Roll up the loop to keep the code size down.
1590 Label loop_n, loop_m;
1591
1592 Register out = x0;
1593 Register inputs_n_base = x1;
1594 Register inputs_m_base = x2;
1595 Register inputs_d_base = x3;
1596 Register inputs_n_last_16bytes = x4;
1597 Register inputs_m_last_16bytes = x5;
1598 Register index_n = x6;
1599 Register index_m = x7;
1600
1601 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1602 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1603 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1604
1605 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1606 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1607 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1608 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1609 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1610
1611 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1612 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1613 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1614 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1615 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1616
1617
1618 // Always load and store 128 bits regardless of the format.
1619 VRegister vd = v0.V16B();
1620 VRegister vn = v1.V16B();
1621 VRegister vm = v2.V16B();
1622 VRegister vntmp = v3.V16B();
1623 VRegister vmtmp = v4.V16B();
1624 VRegister vres = v5.V16B();
1625
1626 // These will have the correct format for calling the 'helper'.
1627 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1628 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1629 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1630
1631 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1632 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1633 VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1634
1635 __ Mov(out, results);
1636
1637 __ Mov(inputs_d_base, inputs_d);
1638
1639 __ Mov(inputs_n_base, inputs_n);
1640 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1641 __ Mov(inputs_m_base, inputs_m);
1642 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1643
1644 __ Ldr(vd, MemOperand(inputs_d_base));
1645 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1646 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1647
1648 __ Mov(index_n, 0);
1649 __ Bind(&loop_n);
1650
1651 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1652 vn_lane_bytes_log2));
1653 __ Ext(vn, vn, vntmp, vn_lane_bytes);
1654
1655 __ Mov(index_m, 0);
1656 __ Bind(&loop_m);
1657
1658 __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL,
1659 vm_lane_bytes_log2));
1660 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1661
1662 __ Mov(vres, vd);
1663 {
1664 SingleEmissionCheckScope guard(&masm);
1665 (masm.*helper)(vres_helper, vn_helper, vm_helper);
1666 }
1667 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
1668
1669 __ Add(index_m, index_m, 1);
1670 __ Cmp(index_m, inputs_m_length);
1671 __ B(lo, &loop_m);
1672
1673 __ Add(index_n, index_n, 1);
1674 __ Cmp(index_n, inputs_n_length);
1675 __ B(lo, &loop_n);
1676
1677 END();
1678 RUN();
1679 TEARDOWN();
1680 }
1681
1682
1683 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1684 // arrays of rawbit representation of input values. This ensures that
1685 // exact bit comparisons can be performed.
1686 template <typename Td, typename Tn, typename Tm>
Test2OpNEON(const char * name,Test2OpNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1687 static void Test2OpNEON(const char * name, Test2OpNEONHelper_t helper,
1688 const Td inputs_d[],
1689 const Tn inputs_n[], unsigned inputs_n_length,
1690 const Tm inputs_m[], unsigned inputs_m_length,
1691 const Td expected[], unsigned expected_length,
1692 VectorFormat vd_form,
1693 VectorFormat vn_form,
1694 VectorFormat vm_form) {
1695 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
1696
1697 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
1698
1699 const unsigned results_length = inputs_n_length * inputs_m_length;
1700 Td* results = new Td[results_length * vd_lane_count];
1701 const unsigned lane_bit = sizeof(Td) * 8;
1702 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
1703
1704 Test2OpNEON_Helper(helper,
1705 reinterpret_cast<uintptr_t>(inputs_d),
1706 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
1707 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
1708 reinterpret_cast<uintptr_t>(results),
1709 vd_form, vn_form, vm_form);
1710
1711 if (Test::sim_test_trace()) {
1712 // Print the results.
1713 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1714 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1715 printf(" ");
1716 // Output a separate result for each element of the result vector.
1717 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1718 unsigned index = lane + (iteration * vd_lane_count);
1719 printf(" 0x%0*" PRIx64 ",",
1720 lane_len_in_hex,
1721 static_cast<uint64_t>(results[index]));
1722 }
1723 printf("\n");
1724 }
1725
1726 printf("};\n");
1727 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1728 name,
1729 results_length);
1730 } else {
1731 // Check the results.
1732 VIXL_CHECK(expected_length == results_length);
1733 unsigned error_count = 0;
1734 unsigned d = 0;
1735 const char* padding = " ";
1736 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1737 for (unsigned n = 0; n < inputs_n_length; n++) {
1738 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
1739 bool error_in_vector = false;
1740
1741 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1742 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1743 (m * vd_lane_count) + lane;
1744
1745 if (results[output_index] != expected[output_index]) {
1746 error_in_vector = true;
1747 break;
1748 }
1749 }
1750
1751 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1752 printf("%s\n", name);
1753 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
1754 lane_len_in_hex+1, padding,
1755 lane_len_in_hex+1, padding,
1756 lane_len_in_hex+1, padding,
1757 lane_len_in_hex+1, padding);
1758
1759 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1760 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1761 (m * vd_lane_count) + lane;
1762 unsigned input_index_n = (inputs_n_length - vd_lane_count +
1763 n + 1 + lane) % inputs_n_length;
1764 unsigned input_index_m = (inputs_m_length - vd_lane_count +
1765 m + 1 + lane) % inputs_m_length;
1766
1767 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
1768 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1769 results[output_index] != expected[output_index] ? '*' : ' ',
1770 lane_len_in_hex,
1771 static_cast<uint64_t>(inputs_d[lane]),
1772 lane_len_in_hex,
1773 static_cast<uint64_t>(inputs_n[input_index_n]),
1774 lane_len_in_hex,
1775 static_cast<uint64_t>(inputs_m[input_index_m]),
1776 lane_len_in_hex,
1777 static_cast<uint64_t>(results[output_index]),
1778 lane_len_in_hex,
1779 static_cast<uint64_t>(expected[output_index]));
1780 }
1781 }
1782 }
1783 }
1784 VIXL_ASSERT(d == expected_length);
1785 if (error_count > kErrorReportLimit) {
1786 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1787 }
1788 VIXL_CHECK(error_count == 0);
1789 }
1790 delete[] results;
1791 }
1792
1793
1794 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
1795
TestByElementNEON_Helper(TestByElementNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,const int indices[],unsigned indices_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1796 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
1797 uintptr_t inputs_d,
1798 uintptr_t inputs_n,
1799 unsigned inputs_n_length,
1800 uintptr_t inputs_m,
1801 unsigned inputs_m_length,
1802 const int indices[],
1803 unsigned indices_length,
1804 uintptr_t results,
1805 VectorFormat vd_form,
1806 VectorFormat vn_form,
1807 VectorFormat vm_form) {
1808 VIXL_ASSERT(vd_form != kFormatUndefined);
1809 VIXL_ASSERT(vn_form != kFormatUndefined);
1810 VIXL_ASSERT(vm_form != kFormatUndefined);
1811
1812 SETUP();
1813 START();
1814
1815 // Roll up the loop to keep the code size down.
1816 Label loop_n, loop_m;
1817
1818 Register out = x0;
1819 Register inputs_n_base = x1;
1820 Register inputs_m_base = x2;
1821 Register inputs_d_base = x3;
1822 Register inputs_n_last_16bytes = x4;
1823 Register inputs_m_last_16bytes = x5;
1824 Register index_n = x6;
1825 Register index_m = x7;
1826
1827 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1828 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1829 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1830
1831 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1832 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1833 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1834 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1835 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1836
1837 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1838 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1839 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1840 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1841 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1842
1843
1844 // Always load and store 128 bits regardless of the format.
1845 VRegister vd = v0.V16B();
1846 VRegister vn = v1.V16B();
1847 VRegister vm = v2.V16B();
1848 VRegister vntmp = v3.V16B();
1849 VRegister vmtmp = v4.V16B();
1850 VRegister vres = v5.V16B();
1851
1852 // These will have the correct format for calling the 'helper'.
1853 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1854 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1855 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1856
1857 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1858 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1859 VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1860
1861 __ Mov(out, results);
1862
1863 __ Mov(inputs_d_base, inputs_d);
1864
1865 __ Mov(inputs_n_base, inputs_n);
1866 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1867 __ Mov(inputs_m_base, inputs_m);
1868 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1869
1870 __ Ldr(vd, MemOperand(inputs_d_base));
1871 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1872 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1873
1874 __ Mov(index_n, 0);
1875 __ Bind(&loop_n);
1876
1877 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1878 vn_lane_bytes_log2));
1879 __ Ext(vn, vn, vntmp, vn_lane_bytes);
1880
1881 __ Mov(index_m, 0);
1882 __ Bind(&loop_m);
1883
1884 __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL,
1885 vm_lane_bytes_log2));
1886 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1887
1888 __ Mov(vres, vd);
1889 {
1890 for (unsigned i = 0; i < indices_length; i++) {
1891 {
1892 SingleEmissionCheckScope guard(&masm);
1893 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
1894 }
1895 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
1896 }
1897 }
1898
1899 __ Add(index_m, index_m, 1);
1900 __ Cmp(index_m, inputs_m_length);
1901 __ B(lo, &loop_m);
1902
1903 __ Add(index_n, index_n, 1);
1904 __ Cmp(index_n, inputs_n_length);
1905 __ B(lo, &loop_n);
1906
1907 END();
1908 RUN();
1909 TEARDOWN();
1910 }
1911
1912
1913
1914 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1915 // arrays of rawbit representation of input values. This ensures that
1916 // exact bit comparisons can be performed.
1917 template <typename Td, typename Tn, typename Tm>
TestByElementNEON(const char * name,TestByElementNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const int indices[],unsigned indices_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1918 static void TestByElementNEON(const char *name,
1919 TestByElementNEONHelper_t helper,
1920 const Td inputs_d[],
1921 const Tn inputs_n[], unsigned inputs_n_length,
1922 const Tm inputs_m[], unsigned inputs_m_length,
1923 const int indices[], unsigned indices_length,
1924 const Td expected[], unsigned expected_length,
1925 VectorFormat vd_form,
1926 VectorFormat vn_form,
1927 VectorFormat vm_form) {
1928 VIXL_ASSERT(inputs_n_length > 0);
1929 VIXL_ASSERT(inputs_m_length > 0);
1930 VIXL_ASSERT(indices_length > 0);
1931
1932 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
1933
1934 const unsigned results_length = inputs_n_length * inputs_m_length *
1935 indices_length;
1936 Td* results = new Td[results_length * vd_lane_count];
1937 const unsigned lane_bit = sizeof(Td) * 8;
1938 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
1939
1940 TestByElementNEON_Helper(helper,
1941 reinterpret_cast<uintptr_t>(inputs_d),
1942 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
1943 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
1944 indices, indices_length,
1945 reinterpret_cast<uintptr_t>(results),
1946 vd_form, vn_form, vm_form);
1947
1948 if (Test::sim_test_trace()) {
1949 // Print the results.
1950 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1951 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1952 printf(" ");
1953 // Output a separate result for each element of the result vector.
1954 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1955 unsigned index = lane + (iteration * vd_lane_count);
1956 printf(" 0x%0*" PRIx64 ",",
1957 lane_len_in_hex,
1958 static_cast<uint64_t>(results[index]));
1959 }
1960 printf("\n");
1961 }
1962
1963 printf("};\n");
1964 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1965 name,
1966 results_length);
1967 } else {
1968 // Check the results.
1969 VIXL_CHECK(expected_length == results_length);
1970 unsigned error_count = 0;
1971 unsigned d = 0;
1972 const char* padding = " ";
1973 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1974 for (unsigned n = 0; n < inputs_n_length; n++) {
1975 for (unsigned m = 0; m < inputs_m_length; m++) {
1976 for (unsigned index = 0; index < indices_length; index++, d++) {
1977 bool error_in_vector = false;
1978
1979 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1980 unsigned output_index =
1981 (n * inputs_m_length * indices_length * vd_lane_count) +
1982 (m * indices_length * vd_lane_count) +
1983 (index * vd_lane_count) + lane;
1984
1985 if (results[output_index] != expected[output_index]) {
1986 error_in_vector = true;
1987 break;
1988 }
1989 }
1990
1991 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1992 printf("%s\n", name);
1993 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
1994 lane_len_in_hex+1, padding,
1995 lane_len_in_hex+1, padding,
1996 lane_len_in_hex+1, padding,
1997 lane_len_in_hex+1, padding);
1998
1999 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2000 unsigned output_index =
2001 (n * inputs_m_length * indices_length * vd_lane_count) +
2002 (m * indices_length * vd_lane_count) +
2003 (index * vd_lane_count) + lane;
2004 unsigned input_index_n = (inputs_n_length - vd_lane_count +
2005 n + 1 + lane) % inputs_n_length;
2006 unsigned input_index_m = (inputs_m_length - vd_lane_count +
2007 m + 1 + lane) % inputs_m_length;
2008
2009 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
2010 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2011 results[output_index] != expected[output_index] ? '*' : ' ',
2012 lane_len_in_hex,
2013 static_cast<uint64_t>(inputs_d[lane]),
2014 lane_len_in_hex,
2015 static_cast<uint64_t>(inputs_n[input_index_n]),
2016 lane_len_in_hex,
2017 static_cast<uint64_t>(inputs_m[input_index_m]),
2018 indices[index],
2019 lane_len_in_hex,
2020 static_cast<uint64_t>(results[output_index]),
2021 lane_len_in_hex,
2022 static_cast<uint64_t>(expected[output_index]));
2023 }
2024 }
2025 }
2026 }
2027 }
2028 VIXL_ASSERT(d == expected_length);
2029 if (error_count > kErrorReportLimit) {
2030 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2031 }
2032 VIXL_CHECK(error_count == 0);
2033 }
2034 delete[] results;
2035 }
2036
2037
2038 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2039
2040
2041 template <typename Tm>
Test2OpImmNEON_Helper(typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,uintptr_t inputs_n,unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)2042 void Test2OpImmNEON_Helper(
2043 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2044 uintptr_t inputs_n,
2045 unsigned inputs_n_length,
2046 const Tm inputs_m[],
2047 unsigned inputs_m_length,
2048 uintptr_t results,
2049 VectorFormat vd_form,
2050 VectorFormat vn_form) {
2051 VIXL_ASSERT(vd_form != kFormatUndefined &&
2052 vn_form != kFormatUndefined);
2053
2054 SETUP();
2055 START();
2056
2057 // Roll up the loop to keep the code size down.
2058 Label loop_n;
2059
2060 Register out = x0;
2061 Register inputs_n_base = x1;
2062 Register inputs_n_last_16bytes = x3;
2063 Register index_n = x5;
2064
2065 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2066 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2067 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2068
2069 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2070 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2071 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2072 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2073 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2074
2075
2076 // These will be either a D- or a Q-register form, with a single lane
2077 // (for use in scalar load and store operations).
2078 VRegister vd = VRegister(0, vd_bits);
2079 VRegister vn = v1.V16B();
2080 VRegister vntmp = v3.V16B();
2081
2082 // These will have the correct format for use when calling 'helper'.
2083 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2084 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2085
2086 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2087 VRegister vntmp_single = VRegister(3, vn_lane_bits);
2088
2089 __ Mov(out, results);
2090
2091 __ Mov(inputs_n_base, inputs_n);
2092 __ Mov(inputs_n_last_16bytes,
2093 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2094
2095 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2096
2097 __ Mov(index_n, 0);
2098 __ Bind(&loop_n);
2099
2100 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
2101 vn_lane_bytes_log2));
2102 __ Ext(vn, vn, vntmp, vn_lane_bytes);
2103
2104 // Set the destination to zero for tests such as '[r]shrn2'.
2105 // TODO: Setting the destination to values other than zero might be a better
2106 // test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2107 __ Movi(vd.V16B(), 0);
2108
2109 {
2110 for (unsigned i = 0; i < inputs_m_length; i++) {
2111 {
2112 SingleEmissionCheckScope guard(&masm);
2113 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2114 }
2115 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
2116 }
2117 }
2118
2119 __ Add(index_n, index_n, 1);
2120 __ Cmp(index_n, inputs_n_length);
2121 __ B(lo, &loop_n);
2122
2123 END();
2124 RUN();
2125 TEARDOWN();
2126 }
2127
2128
2129 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2130 // arrays of rawbit representation of input values. This ensures that
2131 // exact bit comparisons can be performed.
2132 template <typename Td, typename Tn, typename Tm>
Test2OpImmNEON(const char * name,typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2133 static void Test2OpImmNEON(
2134 const char * name,
2135 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2136 const Tn inputs_n[], unsigned inputs_n_length,
2137 const Tm inputs_m[], unsigned inputs_m_length,
2138 const Td expected[], unsigned expected_length,
2139 VectorFormat vd_form,
2140 VectorFormat vn_form) {
2141 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2142
2143 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2144 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2145 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2146
2147 const unsigned results_length = inputs_n_length * inputs_m_length;
2148 Td* results = new Td[results_length * vd_lane_count];
2149 const unsigned lane_bit = sizeof(Td) * 8;
2150 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2151
2152 Test2OpImmNEON_Helper(helper,
2153 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
2154 inputs_m, inputs_m_length,
2155 reinterpret_cast<uintptr_t>(results),
2156 vd_form, vn_form);
2157
2158 if (Test::sim_test_trace()) {
2159 // Print the results.
2160 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2161 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2162 printf(" ");
2163 // Output a separate result for each element of the result vector.
2164 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2165 unsigned index = lane + (iteration * vd_lane_count);
2166 printf(" 0x%0*" PRIx64 ",",
2167 lane_len_in_hex,
2168 static_cast<uint64_t>(results[index]));
2169 }
2170 printf("\n");
2171 }
2172
2173 printf("};\n");
2174 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2175 name,
2176 results_length);
2177 } else {
2178 // Check the results.
2179 VIXL_CHECK(expected_length == results_length);
2180 unsigned error_count = 0;
2181 unsigned d = 0;
2182 const char* padding = " ";
2183 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2184 for (unsigned n = 0; n < inputs_n_length; n++) {
2185 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2186 bool error_in_vector = false;
2187
2188 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2189 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2190 (m * vd_lane_count) + lane;
2191
2192 if (results[output_index] != expected[output_index]) {
2193 error_in_vector = true;
2194 break;
2195 }
2196 }
2197
2198 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2199 printf("%s\n", name);
2200 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2201 lane_len_in_hex+1, padding,
2202 lane_len_in_hex, padding,
2203 lane_len_in_hex+1, padding);
2204
2205 const unsigned first_index_n =
2206 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2207
2208 for (unsigned lane = 0;
2209 lane < std::max(vd_lane_count, vn_lane_count);
2210 lane++) {
2211 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2212 (m * vd_lane_count) + lane;
2213 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2214 unsigned input_index_m = m;
2215
2216 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
2217 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2218 results[output_index] != expected[output_index] ? '*' : ' ',
2219 lane_len_in_hex,
2220 static_cast<uint64_t>(inputs_n[input_index_n]),
2221 lane_len_in_hex,
2222 static_cast<uint64_t>(inputs_m[input_index_m]),
2223 lane_len_in_hex,
2224 static_cast<uint64_t>(results[output_index]),
2225 lane_len_in_hex,
2226 static_cast<uint64_t>(expected[output_index]));
2227 }
2228 }
2229 }
2230 }
2231 VIXL_ASSERT(d == expected_length);
2232 if (error_count > kErrorReportLimit) {
2233 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2234 }
2235 VIXL_CHECK(error_count == 0);
2236 }
2237 delete[] results;
2238 }
2239
2240
2241 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2242
2243
TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,uintptr_t inputs_d,const int inputs_imm1[],unsigned inputs_imm1_length,uintptr_t inputs_n,unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)2244 static void TestOpImmOpImmNEON_Helper(
2245 TestOpImmOpImmVdUpdateNEONHelper_t helper,
2246 uintptr_t inputs_d,
2247 const int inputs_imm1[], unsigned inputs_imm1_length,
2248 uintptr_t inputs_n, unsigned inputs_n_length,
2249 const int inputs_imm2[], unsigned inputs_imm2_length,
2250 uintptr_t results,
2251 VectorFormat vd_form, VectorFormat vn_form) {
2252 VIXL_ASSERT(vd_form != kFormatUndefined);
2253 VIXL_ASSERT(vn_form != kFormatUndefined);
2254
2255 SETUP();
2256 START();
2257
2258 // Roll up the loop to keep the code size down.
2259 Label loop_n;
2260
2261 Register out = x0;
2262 Register inputs_d_base = x1;
2263 Register inputs_n_base = x2;
2264 Register inputs_n_last_vector = x4;
2265 Register index_n = x6;
2266
2267 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2268 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2269 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2270
2271 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2272 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2273 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2274 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2275 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2276
2277
2278 // These will be either a D- or a Q-register form, with a single lane
2279 // (for use in scalar load and store operations).
2280 VRegister vd = VRegister(0, vd_bits);
2281 VRegister vn = VRegister(1, vn_bits);
2282 VRegister vntmp = VRegister(4, vn_bits);
2283 VRegister vres = VRegister(5, vn_bits);
2284
2285 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2286 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2287
2288 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2289 VRegister vntmp_single = VRegister(4, vn_lane_bits);
2290
2291 // Same registers for use in the 'ext' instructions.
2292 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2293 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2294
2295 __ Mov(out, results);
2296
2297 __ Mov(inputs_d_base, inputs_d);
2298
2299 __ Mov(inputs_n_base, inputs_n);
2300 __ Mov(inputs_n_last_vector,
2301 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2302
2303 __ Ldr(vd, MemOperand(inputs_d_base));
2304
2305 __ Ldr(vn, MemOperand(inputs_n_last_vector));
2306
2307 __ Mov(index_n, 0);
2308 __ Bind(&loop_n);
2309
2310 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
2311 vn_lane_bytes_log2));
2312 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2313
2314 {
2315 EmissionCheckScope guard(&masm,
2316 kInstructionSize * inputs_imm1_length * inputs_imm2_length * 3);
2317 for (unsigned i = 0; i < inputs_imm1_length; i++) {
2318 for (unsigned j = 0; j < inputs_imm2_length; j++) {
2319 __ Mov(vres, vd);
2320 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2321 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
2322 }
2323 }
2324 }
2325
2326 __ Add(index_n, index_n, 1);
2327 __ Cmp(index_n, inputs_n_length);
2328 __ B(lo, &loop_n);
2329
2330 END();
2331 RUN();
2332 TEARDOWN();
2333 }
2334
2335
2336 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2337 // arrays of rawbit representation of input values. This ensures that
2338 // exact bit comparisons can be performed.
2339 template <typename Td, typename Tn>
TestOpImmOpImmNEON(const char * name,TestOpImmOpImmVdUpdateNEONHelper_t helper,const Td inputs_d[],const int inputs_imm1[],unsigned inputs_imm1_length,const Tn inputs_n[],unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2340 static void TestOpImmOpImmNEON(const char * name,
2341 TestOpImmOpImmVdUpdateNEONHelper_t helper,
2342 const Td inputs_d[],
2343 const int inputs_imm1[],
2344 unsigned inputs_imm1_length,
2345 const Tn inputs_n[],
2346 unsigned inputs_n_length,
2347 const int inputs_imm2[],
2348 unsigned inputs_imm2_length,
2349 const Td expected[],
2350 unsigned expected_length,
2351 VectorFormat vd_form,
2352 VectorFormat vn_form) {
2353 VIXL_ASSERT(inputs_n_length > 0);
2354 VIXL_ASSERT(inputs_imm1_length > 0);
2355 VIXL_ASSERT(inputs_imm2_length > 0);
2356
2357 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2358
2359 const unsigned results_length = inputs_n_length *
2360 inputs_imm1_length * inputs_imm2_length;
2361
2362 Td* results = new Td[results_length * vd_lane_count];
2363 const unsigned lane_bit = sizeof(Td) * 8;
2364 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2365
2366 TestOpImmOpImmNEON_Helper(helper,
2367 reinterpret_cast<uintptr_t>(inputs_d),
2368 inputs_imm1,
2369 inputs_imm1_length,
2370 reinterpret_cast<uintptr_t>(inputs_n),
2371 inputs_n_length,
2372 inputs_imm2,
2373 inputs_imm2_length,
2374 reinterpret_cast<uintptr_t>(results),
2375 vd_form, vn_form);
2376
2377 if (Test::sim_test_trace()) {
2378 // Print the results.
2379 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2380 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2381 printf(" ");
2382 // Output a separate result for each element of the result vector.
2383 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2384 unsigned index = lane + (iteration * vd_lane_count);
2385 printf(" 0x%0*" PRIx64 ",",
2386 lane_len_in_hex,
2387 static_cast<uint64_t>(results[index]));
2388 }
2389 printf("\n");
2390 }
2391
2392 printf("};\n");
2393 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2394 name,
2395 results_length);
2396 } else {
2397 // Check the results.
2398 VIXL_CHECK(expected_length == results_length);
2399 unsigned error_count = 0;
2400 unsigned counted_length = 0;
2401 const char* padding = " ";
2402 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2403 for (unsigned n = 0; n < inputs_n_length; n++) {
2404 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2405 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2406 bool error_in_vector = false;
2407
2408 counted_length++;
2409
2410 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2411 unsigned output_index =
2412 (n * inputs_imm1_length *
2413 inputs_imm2_length * vd_lane_count) +
2414 (imm1 * inputs_imm2_length * vd_lane_count) +
2415 (imm2 * vd_lane_count) + lane;
2416
2417 if (results[output_index] != expected[output_index]) {
2418 error_in_vector = true;
2419 break;
2420 }
2421 }
2422
2423 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2424 printf("%s\n", name);
2425 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2426 lane_len_in_hex+1, padding,
2427 lane_len_in_hex, padding,
2428 lane_len_in_hex+1, padding,
2429 lane_len_in_hex, padding,
2430 lane_len_in_hex+1, padding);
2431
2432 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2433 unsigned output_index =
2434 (n * inputs_imm1_length *
2435 inputs_imm2_length * vd_lane_count) +
2436 (imm1 * inputs_imm2_length * vd_lane_count) +
2437 (imm2 * vd_lane_count) + lane;
2438 unsigned input_index_n = (inputs_n_length - vd_lane_count +
2439 n + 1 + lane) % inputs_n_length;
2440 unsigned input_index_imm1 = imm1;
2441 unsigned input_index_imm2 = imm2;
2442
2443 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
2444 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2445 results[output_index] !=
2446 expected[output_index] ? '*' : ' ',
2447 lane_len_in_hex,
2448 static_cast<uint64_t>(inputs_d[lane]),
2449 lane_len_in_hex,
2450 static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2451 lane_len_in_hex,
2452 static_cast<uint64_t>(inputs_n[input_index_n]),
2453 lane_len_in_hex,
2454 static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2455 lane_len_in_hex,
2456 static_cast<uint64_t>(results[output_index]),
2457 lane_len_in_hex,
2458 static_cast<uint64_t>(expected[output_index]));
2459 }
2460 }
2461 }
2462 }
2463 }
2464 VIXL_ASSERT(counted_length == expected_length);
2465 if (error_count > kErrorReportLimit) {
2466 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2467 }
2468 VIXL_CHECK(error_count == 0);
2469 }
2470 delete[] results;
2471 }
2472
2473
2474 // ==== Floating-point tests. ====
2475
2476
2477 // Standard floating-point test expansion for both double- and single-precision
2478 // operations.
2479 #define STRINGIFY(s) #s
2480
2481 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
2482 Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant), \
2483 &MacroAssembler::mnemonic, \
2484 input, sizeof(input) / sizeof(input[0]), \
2485 kExpected_##mnemonic##_##variant, \
2486 kExpectedCount_##mnemonic##_##variant)
2487
2488 #define DEFINE_TEST_FP(mnemonic, type, input) \
2489 TEST(mnemonic##_d) { \
2490 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2491 } \
2492 TEST(mnemonic##_s) { \
2493 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \
2494 }
2495
2496 // TODO: Test with a newer version of valgrind.
2497 //
2498 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2499 // Therefore this test will be exiting though an ASSERT and thus leaking
2500 // memory.
2501 DEFINE_TEST_FP(fmadd, 3Op, Basic)
2502 DEFINE_TEST_FP(fmsub, 3Op, Basic)
2503 DEFINE_TEST_FP(fnmadd, 3Op, Basic)
2504 DEFINE_TEST_FP(fnmsub, 3Op, Basic)
2505
2506 DEFINE_TEST_FP(fadd, 2Op, Basic)
2507 DEFINE_TEST_FP(fdiv, 2Op, Basic)
2508 DEFINE_TEST_FP(fmax, 2Op, Basic)
2509 DEFINE_TEST_FP(fmaxnm, 2Op, Basic)
2510 DEFINE_TEST_FP(fmin, 2Op, Basic)
2511 DEFINE_TEST_FP(fminnm, 2Op, Basic)
2512 DEFINE_TEST_FP(fmul, 2Op, Basic)
2513 DEFINE_TEST_FP(fsub, 2Op, Basic)
2514 DEFINE_TEST_FP(fnmul, 2Op, Basic)
2515
2516 DEFINE_TEST_FP(fabs, 1Op, Basic)
2517 DEFINE_TEST_FP(fmov, 1Op, Basic)
2518 DEFINE_TEST_FP(fneg, 1Op, Basic)
2519 DEFINE_TEST_FP(fsqrt, 1Op, Basic)
2520 DEFINE_TEST_FP(frinta, 1Op, Conversions)
2521 DEFINE_TEST_FP(frinti, 1Op, Conversions)
2522 DEFINE_TEST_FP(frintm, 1Op, Conversions)
2523 DEFINE_TEST_FP(frintn, 1Op, Conversions)
2524 DEFINE_TEST_FP(frintp, 1Op, Conversions)
2525 DEFINE_TEST_FP(frintx, 1Op, Conversions)
2526 DEFINE_TEST_FP(frintz, 1Op, Conversions)
2527
TEST(fcmp_d)2528 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
TEST(fcmp_s)2529 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
TEST(fcmp_dz)2530 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
TEST(fcmp_sz)2531 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
2532
TEST(fcvt_sd)2533 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
TEST(fcvt_ds)2534 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
2535
2536 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input) \
2537 TEST(mnemonic##_xd) { \
2538 CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
2539 } \
2540 TEST(mnemonic##_xs) { \
2541 CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input); \
2542 } \
2543 TEST(mnemonic##_wd) { \
2544 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2545 } \
2546 TEST(mnemonic##_ws) { \
2547 CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input); \
2548 }
2549
DEFINE_TEST_FP_TO_INT(fcvtas,FPToS,Conversions)2550 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
2551 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
2552 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
2553 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
2554 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
2555 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
2556 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
2557 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
2558
2559 // TODO: Scvtf-fixed-point
2560 // TODO: Scvtf-integer
2561 // TODO: Ucvtf-fixed-point
2562 // TODO: Ucvtf-integer
2563
2564 // TODO: Fccmp
2565 // TODO: Fcsel
2566
2567
2568 // ==== NEON Tests. ====
2569
2570 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, \
2571 vdform, vnform, \
2572 input_n) \
2573 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
2574 &MacroAssembler::mnemonic, \
2575 input_n, \
2576 (sizeof(input_n) / sizeof(input_n[0])), \
2577 kExpected_NEON_##mnemonic##_##vdform, \
2578 kExpectedCount_NEON_##mnemonic##_##vdform, \
2579 kFormat##vdform, \
2580 kFormat##vnform)
2581
2582 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, \
2583 vdform, vnform, \
2584 input_n) \
2585 Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) \
2586 "_" STRINGIFY(vnform), \
2587 &MacroAssembler::mnemonic, \
2588 input_n, \
2589 (sizeof(input_n) / sizeof(input_n[0])), \
2590 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \
2591 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, \
2592 kFormat##vdform, \
2593 kFormat##vnform)
2594
2595 #define CALL_TEST_NEON_HELPER_2Op(mnemonic, \
2596 vdform, vnform, vmform, \
2597 input_d, input_n, input_m) \
2598 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
2599 &MacroAssembler::mnemonic, \
2600 input_d, \
2601 input_n, \
2602 (sizeof(input_n) / sizeof(input_n[0])), \
2603 input_m, \
2604 (sizeof(input_m) / sizeof(input_m[0])), \
2605 kExpected_NEON_##mnemonic##_##vdform, \
2606 kExpectedCount_NEON_##mnemonic##_##vdform, \
2607 kFormat##vdform, \
2608 kFormat##vnform, \
2609 kFormat##vmform)
2610
2611 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \
2612 vdform, vnform, \
2613 input_n, input_m) \
2614 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
2615 &MacroAssembler::mnemonic, \
2616 input_n, \
2617 (sizeof(input_n) / sizeof(input_n[0])), \
2618 input_m, \
2619 (sizeof(input_m) / sizeof(input_m[0])), \
2620 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \
2621 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \
2622 kFormat##vdform, \
2623 kFormat##vnform)
2624
2625 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic, \
2626 vdform, vnform, vmform, \
2627 input_d, input_n, input_m, indices) \
2628 TestByElementNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) \
2629 "_" STRINGIFY(vnform) "_" STRINGIFY(vmform), \
2630 &MacroAssembler::mnemonic, \
2631 input_d, \
2632 input_n, \
2633 (sizeof(input_n) / sizeof(input_n[0])), \
2634 input_m, \
2635 (sizeof(input_m) / sizeof(input_m[0])), \
2636 indices, \
2637 (sizeof(indices) / sizeof(indices[0])), \
2638 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
2639 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
2640 kFormat##vdform, \
2641 kFormat##vnform, \
2642 kFormat##vmform)
2643
2644 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, \
2645 mnemonic, \
2646 vdform, vnform, \
2647 input_d, input_imm1, \
2648 input_n, input_imm2) \
2649 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
2650 helper, \
2651 input_d, \
2652 input_imm1, \
2653 (sizeof(input_imm1) / sizeof(input_imm1[0])), \
2654 input_n, \
2655 (sizeof(input_n) / sizeof(input_n[0])), \
2656 input_imm2, \
2657 (sizeof(input_imm2) / sizeof(input_imm2[0])), \
2658 kExpected_NEON_##mnemonic##_##vdform, \
2659 kExpectedCount_NEON_##mnemonic##_##vdform, \
2660 kFormat##vdform, \
2661 kFormat##vnform)
2662
2663 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
2664 CALL_TEST_NEON_HELPER_1Op(mnemonic, \
2665 variant, variant, \
2666 input)
2667
2668 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
2669 TEST(mnemonic##_8B) { \
2670 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \
2671 } \
2672 TEST(mnemonic##_16B) { \
2673 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
2674 }
2675
2676 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \
2677 TEST(mnemonic##_4H) { \
2678 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
2679 } \
2680 TEST(mnemonic##_8H) { \
2681 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
2682 }
2683
2684 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
2685 TEST(mnemonic##_2S) { \
2686 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
2687 } \
2688 TEST(mnemonic##_4S) { \
2689 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
2690 }
2691
2692 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
2693 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
2694 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
2695
2696 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
2697 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
2698 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
2699
2700 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \
2701 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
2702 TEST(mnemonic##_2D) { \
2703 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
2704 }
2705 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \
2706 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
2707 TEST(mnemonic##_2D) { \
2708 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
2709 }
2710
2711 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \
2712 TEST(mnemonic##_2S) { \
2713 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \
2714 } \
2715 TEST(mnemonic##_4S) { \
2716 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \
2717 } \
2718 TEST(mnemonic##_2D) { \
2719 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
2720 }
2721
2722 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input) \
2723 TEST(mnemonic##_S) { \
2724 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \
2725 } \
2726 TEST(mnemonic##_D) { \
2727 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \
2728 }
2729
2730 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
2731 TEST(mnemonic##_B) { \
2732 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
2733 }
2734 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
2735 TEST(mnemonic##_H) { \
2736 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
2737 }
2738 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
2739 TEST(mnemonic##_S) { \
2740 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
2741 }
2742 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \
2743 TEST(mnemonic##_D) { \
2744 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
2745 }
2746
2747 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
2748 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
2749 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
2750 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
2751 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
2752
2753 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
2754 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
2755 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
2756
2757
2758 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
2759 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, \
2760 vd_form, vn_form, \
2761 input_n)
2762
2763 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \
2764 TEST(mnemonic##_B_8B) { \
2765 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \
2766 } \
2767 TEST(mnemonic##_B_16B) { \
2768 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
2769 } \
2770 TEST(mnemonic##_H_4H) { \
2771 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
2772 } \
2773 TEST(mnemonic##_H_8H) { \
2774 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
2775 } \
2776 TEST(mnemonic##_S_4S) { \
2777 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
2778 }
2779
2780 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \
2781 TEST(mnemonic##_H_8B) { \
2782 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \
2783 } \
2784 TEST(mnemonic##_H_16B) { \
2785 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
2786 } \
2787 TEST(mnemonic##_S_4H) { \
2788 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
2789 } \
2790 TEST(mnemonic##_S_8H) { \
2791 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
2792 } \
2793 TEST(mnemonic##_D_4S) { \
2794 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
2795 }
2796
2797 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \
2798 TEST(mnemonic##_S_4S) { \
2799 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \
2800 }
2801
2802 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, \
2803 vdform, vnform, \
2804 input_n) \
2805 CALL_TEST_NEON_HELPER_1Op(mnemonic, \
2806 vdform, vnform, \
2807 input_n)
2808
2809 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \
2810 TEST(mnemonic##_4H) { \
2811 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \
2812 } \
2813 TEST(mnemonic##_8H) { \
2814 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
2815 } \
2816 TEST(mnemonic##_2S) { \
2817 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
2818 } \
2819 TEST(mnemonic##_4S) { \
2820 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
2821 } \
2822 TEST(mnemonic##_1D) { \
2823 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
2824 } \
2825 TEST(mnemonic##_2D) { \
2826 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
2827 }
2828
2829 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \
2830 TEST(mnemonic##_8B) { \
2831 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \
2832 } \
2833 TEST(mnemonic##_4H) { \
2834 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \
2835 } \
2836 TEST(mnemonic##_2S) { \
2837 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \
2838 } \
2839 TEST(mnemonic##2_16B) { \
2840 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input);\
2841 } \
2842 TEST(mnemonic##2_8H) { \
2843 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \
2844 } \
2845 TEST(mnemonic##2_4S) { \
2846 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \
2847 }
2848
2849 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \
2850 TEST(mnemonic##_4S) { \
2851 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \
2852 } \
2853 TEST(mnemonic##_2D) { \
2854 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \
2855 } \
2856 TEST(mnemonic##2_4S) { \
2857 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input);\
2858 } \
2859 TEST(mnemonic##2_2D) { \
2860 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \
2861 }
2862
2863 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \
2864 TEST(mnemonic##_4H) { \
2865 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \
2866 } \
2867 TEST(mnemonic##_2S) { \
2868 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
2869 } \
2870 TEST(mnemonic##2_8H) { \
2871 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \
2872 } \
2873 TEST(mnemonic##2_4S) { \
2874 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
2875 }
2876
2877 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \
2878 TEST(mnemonic##_2S) { \
2879 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
2880 } \
2881 TEST(mnemonic##2_4S) { \
2882 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
2883 }
2884
2885 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \
2886 TEST(mnemonic##_B) { \
2887 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
2888 } \
2889 TEST(mnemonic##_H) { \
2890 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
2891 } \
2892 TEST(mnemonic##_S) { \
2893 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
2894 }
2895
2896 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \
2897 TEST(mnemonic##_S) { \
2898 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \
2899 } \
2900 TEST(mnemonic##_D) { \
2901 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \
2902 }
2903
2904 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) { \
2905 CALL_TEST_NEON_HELPER_2Op(mnemonic, \
2906 variant, variant, variant, \
2907 input_d, input_nm, input_nm); \
2908 }
2909
2910 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
2911 TEST(mnemonic##_8B) { \
2912 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B, \
2913 kInput8bitsAccDestination, \
2914 kInput8bits##input); \
2915 } \
2916 TEST(mnemonic##_16B) { \
2917 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B, \
2918 kInput8bitsAccDestination, \
2919 kInput8bits##input); \
2920 } \
2921
2922 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \
2923 TEST(mnemonic##_4H) { \
2924 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H, \
2925 kInput16bitsAccDestination, \
2926 kInput16bits##input); \
2927 } \
2928 TEST(mnemonic##_8H) { \
2929 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H, \
2930 kInput16bitsAccDestination, \
2931 kInput16bits##input); \
2932 } \
2933 TEST(mnemonic##_2S) { \
2934 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, \
2935 kInput32bitsAccDestination, \
2936 kInput32bits##input); \
2937 } \
2938 TEST(mnemonic##_4S) { \
2939 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, \
2940 kInput32bitsAccDestination, \
2941 kInput32bits##input); \
2942 }
2943
2944 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
2945 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
2946 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
2947
2948 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \
2949 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
2950 TEST(mnemonic##_2D) { \
2951 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, \
2952 kInput64bitsAccDestination, \
2953 kInput64bits##input); \
2954 }
2955
2956 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \
2957 TEST(mnemonic##_2S) { \
2958 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, \
2959 kInputFloatAccDestination, \
2960 kInputFloat##input); \
2961 } \
2962 TEST(mnemonic##_4S) { \
2963 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, \
2964 kInputFloatAccDestination, \
2965 kInputFloat##input); \
2966 } \
2967 TEST(mnemonic##_2D) { \
2968 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, \
2969 kInputDoubleAccDestination, \
2970 kInputDouble##input); \
2971 }
2972
2973 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \
2974 TEST(mnemonic##_D) { \
2975 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, \
2976 kInput64bitsAccDestination, \
2977 kInput64bits##input); \
2978 }
2979
2980 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \
2981 TEST(mnemonic##_H) { \
2982 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, \
2983 kInput16bitsAccDestination, \
2984 kInput16bits##input); \
2985 } \
2986 TEST(mnemonic##_S) { \
2987 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, \
2988 kInput32bitsAccDestination, \
2989 kInput32bits##input); \
2990 } \
2991
2992 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \
2993 TEST(mnemonic##_B) { \
2994 CALL_TEST_NEON_HELPER_3SAME(mnemonic, B, \
2995 kInput8bitsAccDestination, \
2996 kInput8bits##input); \
2997 } \
2998 TEST(mnemonic##_H) { \
2999 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, \
3000 kInput16bitsAccDestination, \
3001 kInput16bits##input); \
3002 } \
3003 TEST(mnemonic##_S) { \
3004 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, \
3005 kInput32bitsAccDestination, \
3006 kInput32bits##input); \
3007 } \
3008 TEST(mnemonic##_D) { \
3009 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, \
3010 kInput64bitsAccDestination, \
3011 kInput64bits##input); \
3012 }
3013
3014 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \
3015 TEST(mnemonic##_S) { \
3016 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, \
3017 kInputFloatAccDestination, \
3018 kInputFloat##input); \
3019 } \
3020 TEST(mnemonic##_D) { \
3021 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, \
3022 kInputDoubleAccDestination, \
3023 kInputDouble##input); \
3024 }
3025
3026 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3027 vdform, vnform, vmform, \
3028 input_d, input_n, input_m) { \
3029 CALL_TEST_NEON_HELPER_2Op(mnemonic, \
3030 vdform, vnform, vmform, \
3031 input_d, input_n, input_m); \
3032 }
3033
3034 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
3035 TEST(mnemonic##_8H) { \
3036 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B, \
3037 kInput16bitsAccDestination, \
3038 kInput8bits##input, kInput8bits##input); \
3039 } \
3040 TEST(mnemonic##2_8H) { \
3041 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B, \
3042 kInput16bitsAccDestination, \
3043 kInput8bits##input, kInput8bits##input); \
3044 }
3045
3046 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3047 TEST(mnemonic##_4S) { \
3048 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H, \
3049 kInput32bitsAccDestination, \
3050 kInput16bits##input, kInput16bits##input); \
3051 } \
3052 TEST(mnemonic##2_4S) { \
3053 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H, \
3054 kInput32bitsAccDestination, \
3055 kInput16bits##input, kInput16bits##input); \
3056 }
3057
3058 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \
3059 TEST(mnemonic##_2D) { \
3060 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S, \
3061 kInput64bitsAccDestination, \
3062 kInput32bits##input, kInput32bits##input); \
3063 } \
3064 TEST(mnemonic##2_2D) { \
3065 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S, \
3066 kInput64bitsAccDestination, \
3067 kInput32bits##input, kInput32bits##input); \
3068 }
3069
3070 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
3071 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3072 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3073
3074 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
3075 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
3076 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3077 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3078
3079 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3080 TEST(mnemonic##_S) { \
3081 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H, \
3082 kInput32bitsAccDestination, \
3083 kInput16bits##input, \
3084 kInput16bits##input); \
3085 }
3086
3087 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
3088 TEST(mnemonic##_D) { \
3089 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S, \
3090 kInput64bitsAccDestination, \
3091 kInput32bits##input, \
3092 kInput32bits##input); \
3093 }
3094
3095 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
3096 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3097 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3098
3099 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \
3100 TEST(mnemonic##_8H) { \
3101 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B, \
3102 kInput16bitsAccDestination, \
3103 kInput16bits##input, kInput8bits##input); \
3104 } \
3105 TEST(mnemonic##_4S) { \
3106 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H, \
3107 kInput32bitsAccDestination, \
3108 kInput32bits##input, kInput16bits##input); \
3109 } \
3110 TEST(mnemonic##_2D) { \
3111 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S, \
3112 kInput64bitsAccDestination, \
3113 kInput64bits##input, kInput32bits##input); \
3114 } \
3115 TEST(mnemonic##2_8H) { \
3116 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B, \
3117 kInput16bitsAccDestination, \
3118 kInput16bits##input, kInput8bits##input); \
3119 } \
3120 TEST(mnemonic##2_4S) { \
3121 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H, \
3122 kInput32bitsAccDestination, \
3123 kInput32bits##input, kInput16bits##input); \
3124 } \
3125 TEST(mnemonic##2_2D) { \
3126 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S, \
3127 kInput64bitsAccDestination, \
3128 kInput64bits##input, kInput32bits##input); \
3129 }
3130
3131 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \
3132 TEST(mnemonic##_8B) { \
3133 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H, \
3134 kInput8bitsAccDestination, \
3135 kInput16bits##input, kInput16bits##input); \
3136 } \
3137 TEST(mnemonic##_4H) { \
3138 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S, \
3139 kInput16bitsAccDestination, \
3140 kInput32bits##input, kInput32bits##input); \
3141 } \
3142 TEST(mnemonic##_2S) { \
3143 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D, \
3144 kInput32bitsAccDestination, \
3145 kInput64bits##input, kInput64bits##input); \
3146 } \
3147 TEST(mnemonic##2_16B) { \
3148 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H, \
3149 kInput8bitsAccDestination, \
3150 kInput16bits##input, kInput16bits##input); \
3151 } \
3152 TEST(mnemonic##2_8H) { \
3153 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S, \
3154 kInput16bitsAccDestination, \
3155 kInput32bits##input, kInput32bits##input); \
3156 } \
3157 TEST(mnemonic##2_4S) { \
3158 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D, \
3159 kInput32bitsAccDestination, \
3160 kInput64bits##input, kInput64bits##input); \
3161 }
3162
3163 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3164 vdform, vnform, \
3165 input_n, \
3166 input_imm) { \
3167 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \
3168 vdform, vnform, \
3169 input_n, input_imm); \
3170 }
3171
3172 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \
3173 TEST(mnemonic##_8B_2OPIMM) { \
3174 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3175 8B, 8B, \
3176 kInput8bits##input, \
3177 kInput8bitsImm##input_imm); \
3178 } \
3179 TEST(mnemonic##_16B_2OPIMM) { \
3180 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3181 16B, 16B, \
3182 kInput8bits##input, \
3183 kInput8bitsImm##input_imm); \
3184 } \
3185 TEST(mnemonic##_4H_2OPIMM) { \
3186 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3187 4H, 4H, \
3188 kInput16bits##input, \
3189 kInput16bitsImm##input_imm); \
3190 } \
3191 TEST(mnemonic##_8H_2OPIMM) { \
3192 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3193 8H, 8H, \
3194 kInput16bits##input, \
3195 kInput16bitsImm##input_imm); \
3196 } \
3197 TEST(mnemonic##_2S_2OPIMM) { \
3198 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3199 2S, 2S, \
3200 kInput32bits##input, \
3201 kInput32bitsImm##input_imm); \
3202 } \
3203 TEST(mnemonic##_4S_2OPIMM) { \
3204 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3205 4S, 4S, \
3206 kInput32bits##input, \
3207 kInput32bitsImm##input_imm); \
3208 } \
3209 TEST(mnemonic##_2D_2OPIMM) { \
3210 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3211 2D, 2D, \
3212 kInput64bits##input, \
3213 kInput64bitsImm##input_imm); \
3214 }
3215
3216 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
3217 TEST(mnemonic##_8B_2OPIMM) { \
3218 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3219 8B, B, \
3220 kInput8bits##input, \
3221 kInput8bitsImm##input_imm); \
3222 } \
3223 TEST(mnemonic##_16B_2OPIMM) { \
3224 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3225 16B, B, \
3226 kInput8bits##input, \
3227 kInput8bitsImm##input_imm); \
3228 } \
3229 TEST(mnemonic##_4H_2OPIMM) { \
3230 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3231 4H, H, \
3232 kInput16bits##input, \
3233 kInput16bitsImm##input_imm); \
3234 } \
3235 TEST(mnemonic##_8H_2OPIMM) { \
3236 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3237 8H, H, \
3238 kInput16bits##input, \
3239 kInput16bitsImm##input_imm); \
3240 } \
3241 TEST(mnemonic##_2S_2OPIMM) { \
3242 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3243 2S, S, \
3244 kInput32bits##input, \
3245 kInput32bitsImm##input_imm); \
3246 } \
3247 TEST(mnemonic##_4S_2OPIMM) { \
3248 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3249 4S, S, \
3250 kInput32bits##input, \
3251 kInput32bitsImm##input_imm); \
3252 } \
3253 TEST(mnemonic##_2D_2OPIMM) { \
3254 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3255 2D, D, \
3256 kInput64bits##input, \
3257 kInput64bitsImm##input_imm); \
3258 }
3259
3260 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
3261 TEST(mnemonic##_8B_2OPIMM) { \
3262 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3263 8B, 8H, \
3264 kInput16bits##input, \
3265 kInput8bitsImm##input_imm); \
3266 } \
3267 TEST(mnemonic##_4H_2OPIMM) { \
3268 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3269 4H, 4S, \
3270 kInput32bits##input, \
3271 kInput16bitsImm##input_imm); \
3272 } \
3273 TEST(mnemonic##_2S_2OPIMM) { \
3274 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3275 2S, 2D, \
3276 kInput64bits##input, \
3277 kInput32bitsImm##input_imm); \
3278 } \
3279 TEST(mnemonic##2_16B_2OPIMM) { \
3280 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3281 16B, 8H, \
3282 kInput16bits##input, \
3283 kInput8bitsImm##input_imm); \
3284 } \
3285 TEST(mnemonic##2_8H_2OPIMM) { \
3286 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3287 8H, 4S, \
3288 kInput32bits##input, \
3289 kInput16bitsImm##input_imm); \
3290 } \
3291 TEST(mnemonic##2_4S_2OPIMM) { \
3292 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3293 4S, 2D, \
3294 kInput64bits##input, \
3295 kInput32bitsImm##input_imm); \
3296 }
3297
3298 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
3299 TEST(mnemonic##_B_2OPIMM) { \
3300 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3301 B, H, \
3302 kInput16bits##input, \
3303 kInput8bitsImm##input_imm); \
3304 } \
3305 TEST(mnemonic##_H_2OPIMM) { \
3306 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3307 H, S, \
3308 kInput32bits##input, \
3309 kInput16bitsImm##input_imm); \
3310 } \
3311 TEST(mnemonic##_S_2OPIMM) { \
3312 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3313 S, D, \
3314 kInput64bits##input, \
3315 kInput32bitsImm##input_imm); \
3316 }
3317
3318 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
3319 TEST(mnemonic##_2S_2OPIMM) { \
3320 CALL_TEST_NEON_HELPER_2OPIMM( \
3321 mnemonic, \
3322 2S, 2S, \
3323 kInputFloat##Basic, \
3324 kInputDoubleImm##input_imm) \
3325 } \
3326 TEST(mnemonic##_4S_2OPIMM) { \
3327 CALL_TEST_NEON_HELPER_2OPIMM( \
3328 mnemonic, \
3329 4S, 4S, \
3330 kInputFloat##input, \
3331 kInputDoubleImm##input_imm); \
3332 } \
3333 TEST(mnemonic##_2D_2OPIMM) { \
3334 CALL_TEST_NEON_HELPER_2OPIMM( \
3335 mnemonic, \
3336 2D, 2D, \
3337 kInputDouble##input, \
3338 kInputDoubleImm##input_imm); \
3339 }
3340
3341 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
3342 TEST(mnemonic##_2S_2OPIMM) { \
3343 CALL_TEST_NEON_HELPER_2OPIMM( \
3344 mnemonic, \
3345 2S, 2S, \
3346 kInputFloat##Basic, \
3347 kInput32bitsImm##input_imm) \
3348 } \
3349 TEST(mnemonic##_4S_2OPIMM) { \
3350 CALL_TEST_NEON_HELPER_2OPIMM( \
3351 mnemonic, \
3352 4S, 4S, \
3353 kInputFloat##input, \
3354 kInput32bitsImm##input_imm) \
3355 } \
3356 TEST(mnemonic##_2D_2OPIMM) { \
3357 CALL_TEST_NEON_HELPER_2OPIMM( \
3358 mnemonic, \
3359 2D, 2D, \
3360 kInputDouble##input, \
3361 kInput64bitsImm##input_imm) \
3362 }
3363
3364 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
3365 TEST(mnemonic##_S_2OPIMM) { \
3366 CALL_TEST_NEON_HELPER_2OPIMM( \
3367 mnemonic, \
3368 S, S, \
3369 kInputFloat##Basic, \
3370 kInput32bitsImm##input_imm) \
3371 } \
3372 TEST(mnemonic##_D_2OPIMM) { \
3373 CALL_TEST_NEON_HELPER_2OPIMM( \
3374 mnemonic, \
3375 D, D, \
3376 kInputDouble##input, \
3377 kInput64bitsImm##input_imm) \
3378 }
3379
3380 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \
3381 TEST(mnemonic##_2S_2OPIMM) { \
3382 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3383 2S, 2S, \
3384 kInput32bits##input, \
3385 kInput32bitsImm##input_imm); \
3386 } \
3387 TEST(mnemonic##_4S_2OPIMM) { \
3388 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3389 4S, 4S, \
3390 kInput32bits##input, \
3391 kInput32bitsImm##input_imm); \
3392 } \
3393 TEST(mnemonic##_2D_2OPIMM) { \
3394 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3395 2D, 2D, \
3396 kInput64bits##input, \
3397 kInput64bitsImm##input_imm); \
3398 }
3399
3400 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
3401 TEST(mnemonic##_D_2OPIMM) { \
3402 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3403 D, D, \
3404 kInput64bits##input, \
3405 kInput64bitsImm##input_imm); \
3406 }
3407
3408 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \
3409 TEST(mnemonic##_S_2OPIMM) { \
3410 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3411 S, S, \
3412 kInput32bits##input, \
3413 kInput32bitsImm##input_imm); \
3414 } \
3415 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
3416
3417 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
3418 TEST(mnemonic##_D_2OPIMM) { \
3419 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3420 D, D, \
3421 kInputDouble##input, \
3422 kInputDoubleImm##input_imm); \
3423 }
3424
3425 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \
3426 TEST(mnemonic##_S_2OPIMM) { \
3427 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3428 S, S, \
3429 kInputFloat##input, \
3430 kInputDoubleImm##input_imm); \
3431 } \
3432 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
3433
3434 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
3435 TEST(mnemonic##_B_2OPIMM) { \
3436 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3437 B, B, \
3438 kInput8bits##input, \
3439 kInput8bitsImm##input_imm); \
3440 } \
3441 TEST(mnemonic##_H_2OPIMM) { \
3442 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3443 H, H, \
3444 kInput16bits##input, \
3445 kInput16bitsImm##input_imm); \
3446 } \
3447 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)
3448
3449 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
3450 TEST(mnemonic##_8H_2OPIMM) { \
3451 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3452 8H, 8B, \
3453 kInput8bits##input, \
3454 kInput8bitsImm##input_imm); \
3455 } \
3456 TEST(mnemonic##_4S_2OPIMM) { \
3457 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3458 4S, 4H, \
3459 kInput16bits##input, \
3460 kInput16bitsImm##input_imm); \
3461 } \
3462 TEST(mnemonic##_2D_2OPIMM) { \
3463 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3464 2D, 2S, \
3465 kInput32bits##input, \
3466 kInput32bitsImm##input_imm); \
3467 } \
3468 TEST(mnemonic##2_8H_2OPIMM) { \
3469 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3470 8H, 16B, \
3471 kInput8bits##input, \
3472 kInput8bitsImm##input_imm); \
3473 } \
3474 TEST(mnemonic##2_4S_2OPIMM) { \
3475 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3476 4S, 8H, \
3477 kInput16bits##input, \
3478 kInput16bitsImm##input_imm); \
3479 } \
3480 TEST(mnemonic##2_2D_2OPIMM) { \
3481 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3482 2D, 4S, \
3483 kInput32bits##input, \
3484 kInput32bitsImm##input_imm); \
3485 }
3486
3487 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3488 vdform, vnform, vmform, \
3489 input_d, input_n, \
3490 input_m, indices) { \
3491 CALL_TEST_NEON_HELPER_ByElement(mnemonic, \
3492 vdform, vnform, vmform, \
3493 input_d, input_n, \
3494 input_m, indices); \
3495 }
3496
3497 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
3498 TEST(mnemonic##_4H_4H_H) { \
3499 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3500 4H, 4H, H, \
3501 kInput16bits##input_d, \
3502 kInput16bits##input_n, \
3503 kInput16bits##input_m, \
3504 kInputHIndices); \
3505 } \
3506 TEST(mnemonic##_8H_8H_H) { \
3507 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3508 8H, 8H, H, \
3509 kInput16bits##input_d, \
3510 kInput16bits##input_n, \
3511 kInput16bits##input_m, \
3512 kInputHIndices); \
3513 } \
3514 TEST(mnemonic##_2S_2S_S) { \
3515 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3516 2S, 2S, S, \
3517 kInput32bits##input_d, \
3518 kInput32bits##input_n, \
3519 kInput32bits##input_m, \
3520 kInputSIndices); \
3521 } \
3522 TEST(mnemonic##_4S_4S_S) { \
3523 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3524 4S, 4S, S, \
3525 kInput32bits##input_d, \
3526 kInput32bits##input_n, \
3527 kInput32bits##input_m, \
3528 kInputSIndices); \
3529 }
3530
3531 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, \
3532 input_d, input_n, input_m) \
3533 TEST(mnemonic##_H_H_H) { \
3534 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3535 H, H, H, \
3536 kInput16bits##input_d, \
3537 kInput16bits##input_n, \
3538 kInput16bits##input_m, \
3539 kInputHIndices); \
3540 } \
3541 TEST(mnemonic##_S_S_S) { \
3542 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3543 S, S, S, \
3544 kInput32bits##input_d, \
3545 kInput32bits##input_n, \
3546 kInput32bits##input_m, \
3547 kInputSIndices); \
3548 }
3549
3550 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
3551 TEST(mnemonic##_2S_2S_S) { \
3552 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3553 2S, 2S, S, \
3554 kInputFloat##input_d, \
3555 kInputFloat##input_n, \
3556 kInputFloat##input_m, \
3557 kInputSIndices); \
3558 } \
3559 TEST(mnemonic##_4S_4S_S) { \
3560 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3561 4S, 4S, S, \
3562 kInputFloat##input_d, \
3563 kInputFloat##input_n, \
3564 kInputFloat##input_m, \
3565 kInputSIndices); \
3566 } \
3567 TEST(mnemonic##_2D_2D_D) { \
3568 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3569 2D, 2D, D, \
3570 kInputDouble##input_d, \
3571 kInputDouble##input_n, \
3572 kInputDouble##input_m, \
3573 kInputDIndices); \
3574 } \
3575
3576 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
3577 TEST(mnemonic##_S_S_S) { \
3578 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3579 S, S, S, \
3580 kInputFloat##inp_d, \
3581 kInputFloat##inp_n, \
3582 kInputFloat##inp_m, \
3583 kInputSIndices); \
3584 } \
3585 TEST(mnemonic##_D_D_D) { \
3586 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3587 D, D, D, \
3588 kInputDouble##inp_d, \
3589 kInputDouble##inp_n, \
3590 kInputDouble##inp_m, \
3591 kInputDIndices); \
3592 } \
3593
3594
3595 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
3596 TEST(mnemonic##_4S_4H_H) { \
3597 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3598 4S, 4H, H, \
3599 kInput32bits##input_d, \
3600 kInput16bits##input_n, \
3601 kInput16bits##input_m, \
3602 kInputHIndices); \
3603 } \
3604 TEST(mnemonic##2_4S_8H_H) { \
3605 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \
3606 4S, 8H, H, \
3607 kInput32bits##input_d, \
3608 kInput16bits##input_n, \
3609 kInput16bits##input_m, \
3610 kInputHIndices); \
3611 } \
3612 TEST(mnemonic##_2D_2S_S) { \
3613 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3614 2D, 2S, S, \
3615 kInput64bits##input_d, \
3616 kInput32bits##input_n, \
3617 kInput32bits##input_m, \
3618 kInputSIndices); \
3619 } \
3620 TEST(mnemonic##2_2D_4S_S) { \
3621 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \
3622 2D, 4S, S, \
3623 kInput64bits##input_d, \
3624 kInput32bits##input_n, \
3625 kInput32bits##input_m, \
3626 kInputSIndices); \
3627 }
3628
3629 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, \
3630 input_d, input_n, input_m) \
3631 TEST(mnemonic##_S_H_H) { \
3632 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3633 S, H, H, \
3634 kInput32bits##input_d, \
3635 kInput16bits##input_n, \
3636 kInput16bits##input_m, \
3637 kInputHIndices); \
3638 } \
3639 TEST(mnemonic##_D_S_S) { \
3640 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3641 D, S, S, \
3642 kInput64bits##input_d, \
3643 kInput32bits##input_n, \
3644 kInput32bits##input_m, \
3645 kInputSIndices); \
3646 }
3647
3648
3649 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
3650 variant, \
3651 input_d, \
3652 input_imm1, \
3653 input_n, \
3654 input_imm2) { \
3655 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \
3656 mnemonic, \
3657 variant, variant, \
3658 input_d, input_imm1, \
3659 input_n, input_imm2); \
3660 }
3661
3662 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic, \
3663 input_d, input_imm1, \
3664 input_n, input_imm2) \
3665 TEST(mnemonic##_B) { \
3666 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
3667 16B, \
3668 kInput8bits##input_d, \
3669 kInput8bitsImm##input_imm1, \
3670 kInput8bits##input_n, \
3671 kInput8bitsImm##input_imm2); \
3672 } \
3673 TEST(mnemonic##_H) { \
3674 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
3675 8H, \
3676 kInput16bits##input_d, \
3677 kInput16bitsImm##input_imm1, \
3678 kInput16bits##input_n, \
3679 kInput16bitsImm##input_imm2); \
3680 } \
3681 TEST(mnemonic##_S) { \
3682 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
3683 4S, \
3684 kInput32bits##input_d, \
3685 kInput32bitsImm##input_imm1, \
3686 kInput32bits##input_n, \
3687 kInput32bitsImm##input_imm2); \
3688 } \
3689 TEST(mnemonic##_D) { \
3690 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
3691 2D, \
3692 kInput64bits##input_d, \
3693 kInput64bitsImm##input_imm1, \
3694 kInput64bits##input_n, \
3695 kInput64bitsImm##input_imm2); \
3696 }
3697
3698
3699 // Advanced SIMD copy.
3700 DEFINE_TEST_NEON_2OP2IMM(ins,
3701 Basic, LaneCountFromZero,
3702 Basic, LaneCountFromZero)
3703 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
3704
3705
3706 // Advanced SIMD scalar copy.
3707 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
3708
3709
3710 // Advanced SIMD three same.
3711 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
3712 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
3713 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
3714 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
3715 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
3716 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
3717 DEFINE_TEST_NEON_3SAME(cmge, Basic)
3718 DEFINE_TEST_NEON_3SAME(sshl, Basic)
3719 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
3720 DEFINE_TEST_NEON_3SAME(srshl, Basic)
3721 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
3722 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
3723 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
3724 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
3725 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
3726 DEFINE_TEST_NEON_3SAME(add, Basic)
3727 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
3728 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
3729 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
3730 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
3731 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
3732 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
3733 DEFINE_TEST_NEON_3SAME(addp, Basic)
3734 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
3735 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
3736 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
3737 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
3738 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
3739 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
3740 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
3741 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
3742 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
3743 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
3744 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
3745 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
3746 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
3747 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
3748 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
3749 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
3750 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
3751 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
3752 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
3753 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
3754 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
3755 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
3756 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
3757 DEFINE_TEST_NEON_3SAME(ushl, Basic)
3758 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
3759 DEFINE_TEST_NEON_3SAME(urshl, Basic)
3760 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
3761 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
3762 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
3763 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
3764 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
3765 DEFINE_TEST_NEON_3SAME(sub, Basic)
3766 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
3767 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
3768 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
3769 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
3770 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
3771 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
3772 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
3773 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
3774 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
3775 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
3776 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
3777 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
3778 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
3779 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
3780 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
3781 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
3782 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
3783 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
3784 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
3785 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
3786 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
3787 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
3788
3789
3790 // Advanced SIMD scalar three same.
3791 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
3792 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
3793 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
3794 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
3795 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
3796 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
3797 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
3798 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
3799 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
3800 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
3801 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
3802 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
3803 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
3804 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
3805 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
3806 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
3807 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
3808 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
3809 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
3810 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
3811 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
3812 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
3813 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
3814 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
3815 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
3816 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
3817 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
3818 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
3819 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
3820 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
3821 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
3822
3823
3824 // Advanced SIMD three different.
3825 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
3826 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
3827 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
3828 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
3829 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
3830 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
3831 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
3832 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
3833 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
3834 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
3835 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
3836 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
3837 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
3838 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
3839 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
3840 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
3841 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
3842 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
3843 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
3844 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
3845 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
3846 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
3847 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
3848 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
3849 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
3850 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
3851
3852
3853 // Advanced SIMD scalar three different.
3854 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
3855 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
3856 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
3857
3858
3859 // Advanced SIMD scalar pairwise.
3860 TEST(addp_SCALAR) {
3861 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
3862 }
DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp,Basic)3863 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
3864 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
3865 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
3866 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
3867 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
3868
3869
3870 // Advanced SIMD shift by immediate.
3871 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
3872 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
3873 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
3874 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
3875 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
3876 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
3877 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
3878 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
3879 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
3880 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
3881 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
3882 DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions, \
3883 TypeWidthFromZeroToWidth)
3884 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
3885 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
3886 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
3887 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
3888 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
3889 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
3890 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
3891 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
3892 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
3893 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
3894 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
3895 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
3896 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
3897 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
3898 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions, \
3899 TypeWidthFromZeroToWidth)
3900 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
3901
3902
3903 // Advanced SIMD scalar shift by immediate..
3904 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
3905 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
3906 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
3907 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
3908 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
3909 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
3910 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
3911 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
3912 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions, \
3913 TypeWidthFromZeroToWidth)
3914 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
3915 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
3916 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
3917 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
3918 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
3919 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
3920 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
3921 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
3922 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
3923 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
3924 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
3925 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
3926 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
3927 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions, \
3928 TypeWidthFromZeroToWidth)
3929 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
3930
3931
3932 // Advanced SIMD two-register miscellaneous.
3933 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
3934 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
3935 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
3936 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
3937 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
3938 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
3939 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
3940 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
3941 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
3942 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
3943 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
3944 DEFINE_TEST_NEON_2SAME(abs, Basic)
3945 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
3946 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
3947 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
3948 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
3949 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions)
3950 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions)
3951 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions)
3952 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions)
3953 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions)
3954 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
3955 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
3956 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
3957 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
3958 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic)
3959 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions)
3960 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions)
3961 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions)
3962 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
3963 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
3964 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic)
3965 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
3966 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
3967 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
3968 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
3969 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
3970 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
3971 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
3972 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
3973 DEFINE_TEST_NEON_2SAME(neg, Basic)
3974 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
3975 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
3976 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
3977 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
3978 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions)
3979 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions)
3980 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions)
3981 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions)
3982 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions)
3983 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
3984 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
3985 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
3986 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
3987 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
3988 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic)
3989 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions)
3990 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions)
3991 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
3992 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
3993 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic)
3994 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic)
3995
3996
3997 // Advanced SIMD scalar two-register miscellaneous.
3998 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
3999 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
4000 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
4001 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
4002 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
4003 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
4004 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
4005 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions)
4006 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions)
4007 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions)
4008 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4009 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero)
4010 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero)
4011 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero)
4012 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions)
4013 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4014 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic)
4015 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic)
4016 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
4017 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
4018 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
4019 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
4020 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
4021 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
4022 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
4023 TEST(fcvtxn_SCALAR) {
4024 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
4025 }
4026 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions)
4027 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions)
4028 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions)
4029 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4030 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero)
4031 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero)
4032 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions)
4033 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4034 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic)
4035
4036
4037 // Advanced SIMD across lanes.
4038 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
4039 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
4040 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
4041 DEFINE_TEST_NEON_ACROSS(addv, Basic)
4042 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
4043 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
4044 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
4045 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
4046 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
4047 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
4048 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
4049
4050
4051 // Advanced SIMD permute.
4052 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
4053 DEFINE_TEST_NEON_3SAME(trn1, Basic)
4054 DEFINE_TEST_NEON_3SAME(zip1, Basic)
4055 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
4056 DEFINE_TEST_NEON_3SAME(trn2, Basic)
4057 DEFINE_TEST_NEON_3SAME(zip2, Basic)
4058
4059
4060 // Advanced SIMD vector x indexed element.
4061 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
4062 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
4063 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
4064 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
4065 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
4066 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
4067 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
4068 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
4069 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
4070 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
4071 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
4072 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
4073 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
4074 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
4075 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
4076 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
4077 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
4078 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
4079
4080
4081 // Advanced SIMD scalar x indexed element.
4082 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
4083 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
4084 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
4085 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
4086 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
4087 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
4088 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
4089 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
4090 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
4091
4092 } // namespace vixl
4093