1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
2; arm64 has a separate copy due to intrinsics
3
4define <4 x i32> @copyTuple.QPair(i32* %a, i32* %b) {
5; CHECK-LABEL: copyTuple.QPair:
6; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
7; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
8; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
9entry:
10  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i64 1, i32* %a)
11  %extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0
12  %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i64 1, i32* %b)
13  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld1, 0
14  ret <4 x i32> %vld1.fca.0.extract
15}
16
17define <4 x i32> @copyTuple.QTriple(i32* %a, i32* %b, <4 x i32> %c) {
18; CHECK-LABEL: copyTuple.QTriple:
19; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
20; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
21; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
22; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
23entry:
24  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a)
25  %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
26  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, i64 1, i32* %b)
27  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0
28  ret <4 x i32> %vld1.fca.0.extract
29}
30
31define <4 x i32> @copyTuple.QQuad(i32* %a, i32* %b, <4 x i32> %c) {
32; CHECK-LABEL: copyTuple.QQuad:
33; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
34; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
35; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
36; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
37; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
38entry:
39  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a)
40  %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
41  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %b)
42  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0
43  ret <4 x i32> %vld1.fca.0.extract
44}
45
46declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
47declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
48declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
49