1; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
2
3;;; Test vector merge intrinsic instructions
4;;;
5;;; Note:
6;;;   We test VMRG*vvml, VMRG*vvml_v, VMRG*rvml, VMRG*rvml_v, VMRG*ivml, and
7;;;   VMRG*ivml_v instructions.
8
9; Function Attrs: nounwind readnone
10define fastcc <256 x double> @vmrg_vvvml(<256 x double> %0, <256 x double> %1, <256 x i1> %2) {
11; CHECK-LABEL: vmrg_vvvml:
12; CHECK:       # %bb.0:
13; CHECK-NEXT:    lea %s0, 256
14; CHECK-NEXT:    lvl %s0
15; CHECK-NEXT:    vmrg %v0, %v0, %v1, %vm1
16; CHECK-NEXT:    b.l.t (, %s10)
17  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vvvml(<256 x double> %0, <256 x double> %1, <256 x i1> %2, i32 256)
18  ret <256 x double> %4
19}
20
21; Function Attrs: nounwind readnone
22declare <256 x double> @llvm.ve.vl.vmrg.vvvml(<256 x double>, <256 x double>, <256 x i1>, i32)
23
24; Function Attrs: nounwind readnone
25define fastcc <256 x double> @vmrg_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
26; CHECK-LABEL: vmrg_vvvmvl:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    lea %s0, 128
29; CHECK-NEXT:    lvl %s0
30; CHECK-NEXT:    vmrg %v2, %v0, %v1, %vm1
31; CHECK-NEXT:    lea %s16, 256
32; CHECK-NEXT:    lvl %s16
33; CHECK-NEXT:    vor %v0, (0)1, %v2
34; CHECK-NEXT:    b.l.t (, %s10)
35  %5 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
36  ret <256 x double> %5
37}
38
39; Function Attrs: nounwind readnone
40declare <256 x double> @llvm.ve.vl.vmrg.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
41
42; Function Attrs: nounwind readnone
43define fastcc <256 x double> @vmrg_vsvml(i64 %0, <256 x double> %1, <256 x i1> %2) {
44; CHECK-LABEL: vmrg_vsvml:
45; CHECK:       # %bb.0:
46; CHECK-NEXT:    lea %s1, 256
47; CHECK-NEXT:    lvl %s1
48; CHECK-NEXT:    vmrg %v0, %s0, %v0, %vm1
49; CHECK-NEXT:    b.l.t (, %s10)
50  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvml(i64 %0, <256 x double> %1, <256 x i1> %2, i32 256)
51  ret <256 x double> %4
52}
53
54; Function Attrs: nounwind readnone
55declare <256 x double> @llvm.ve.vl.vmrg.vsvml(i64, <256 x double>, <256 x i1>, i32)
56
57; Function Attrs: nounwind readnone
58define fastcc <256 x double> @vmrg_vsvmvl(i64 %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
59; CHECK-LABEL: vmrg_vsvmvl:
60; CHECK:       # %bb.0:
61; CHECK-NEXT:    lea %s1, 128
62; CHECK-NEXT:    lvl %s1
63; CHECK-NEXT:    vmrg %v1, %s0, %v0, %vm1
64; CHECK-NEXT:    lea %s16, 256
65; CHECK-NEXT:    lvl %s16
66; CHECK-NEXT:    vor %v0, (0)1, %v1
67; CHECK-NEXT:    b.l.t (, %s10)
68  %5 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvmvl(i64 %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
69  ret <256 x double> %5
70}
71
72; Function Attrs: nounwind readnone
73declare <256 x double> @llvm.ve.vl.vmrg.vsvmvl(i64, <256 x double>, <256 x i1>, <256 x double>, i32)
74
75; Function Attrs: nounwind readnone
76define fastcc <256 x double> @vmrg_vsvml_imm(<256 x double> %0, <256 x i1> %1) {
77; CHECK-LABEL: vmrg_vsvml_imm:
78; CHECK:       # %bb.0:
79; CHECK-NEXT:    lea %s0, 256
80; CHECK-NEXT:    lvl %s0
81; CHECK-NEXT:    vmrg %v0, 8, %v0, %vm1
82; CHECK-NEXT:    b.l.t (, %s10)
83  %3 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvml(i64 8, <256 x double> %0, <256 x i1> %1, i32 256)
84  ret <256 x double> %3
85}
86
87; Function Attrs: nounwind readnone
88define fastcc <256 x double> @vmrg_vsvmvl_imm(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
89; CHECK-LABEL: vmrg_vsvmvl_imm:
90; CHECK:       # %bb.0:
91; CHECK-NEXT:    lea %s0, 128
92; CHECK-NEXT:    lvl %s0
93; CHECK-NEXT:    vmrg %v1, 8, %v0, %vm1
94; CHECK-NEXT:    lea %s16, 256
95; CHECK-NEXT:    lvl %s16
96; CHECK-NEXT:    vor %v0, (0)1, %v1
97; CHECK-NEXT:    b.l.t (, %s10)
98  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvmvl(i64 8, <256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
99  ret <256 x double> %4
100}
101
102; Function Attrs: nounwind readnone
103define fastcc <256 x double> @vmrgw_vvvMl(<256 x double> %0, <256 x double> %1, <512 x i1> %2) {
104; CHECK-LABEL: vmrgw_vvvMl:
105; CHECK:       # %bb.0:
106; CHECK-NEXT:    lea %s0, 256
107; CHECK-NEXT:    lvl %s0
108; CHECK-NEXT:    vmrg.w %v0, %v0, %v1, %vm2
109; CHECK-NEXT:    b.l.t (, %s10)
110  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vvvMl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, i32 256)
111  ret <256 x double> %4
112}
113
114; Function Attrs: nounwind readnone
115declare <256 x double> @llvm.ve.vl.vmrgw.vvvMl(<256 x double>, <256 x double>, <512 x i1>, i32)
116
117; Function Attrs: nounwind readnone
118define fastcc <256 x double> @vmrgw_vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
119; CHECK-LABEL: vmrgw_vvvMvl:
120; CHECK:       # %bb.0:
121; CHECK-NEXT:    lea %s0, 128
122; CHECK-NEXT:    lvl %s0
123; CHECK-NEXT:    vmrg.w %v2, %v0, %v1, %vm2
124; CHECK-NEXT:    lea %s16, 256
125; CHECK-NEXT:    lvl %s16
126; CHECK-NEXT:    vor %v0, (0)1, %v2
127; CHECK-NEXT:    b.l.t (, %s10)
128  %5 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
129  ret <256 x double> %5
130}
131
132; Function Attrs: nounwind readnone
133declare <256 x double> @llvm.ve.vl.vmrgw.vvvMvl(<256 x double>, <256 x double>, <512 x i1>, <256 x double>, i32)
134