1; RUN: llc -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s
2; REQUIRES: asserts
3
4target triple = "x86_64-pc-linux-gnu"
5
6; Can we lower a single vector?
7define <2 x i8 addrspace(1)*> @test(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" {
8entry:
9; CHECK-LABEL: @test
10; CHECK: subq	$24, %rsp
11; CHECK: movaps	%xmm0, (%rsp)
12; CHECK: callq	do_safepoint
13; CHECK: movaps	(%rsp), %xmm0
14; CHECK: addq	$24, %rsp
15  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj)
16  %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
17  ret <2 x i8 addrspace(1)*> %obj.relocated
18}
19
20; Can we lower the base, derived pairs if both are vectors?
21define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) gc "statepoint-example" {
22entry:
23; CHECK-LABEL: @test2
24; CHECK: subq	$40, %rsp
25; CHECK: movd	%rdi, %xmm1
26; CHECK: pshufd	$68, %xmm1, %xmm1       # xmm1 = xmm1[0,1,0,1]
27; CHECK: paddq	%xmm0, %xmm1
28; CHECK: movdqa	%xmm0, 16(%rsp)
29; CHECK: movdqa	%xmm1, (%rsp)
30; CHECK: callq	do_safepoint
31; CHECK: movaps	(%rsp), %xmm0
32; CHECK: addq	$40, %rsp
33  %derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset
34  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj, <2 x i8 addrspace(1)*> %derived)
35  %derived.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 8) ; (%obj, %derived)
36  ret <2 x i8 addrspace(1)*> %derived.relocated
37}
38
39; Originally, this was just a variant of @test2 above, but it ends up
40; covering a bunch of interesting missed optimizations.  Specifically:
41; - We waste a stack slot for a value that a backend transform pass
42;   CSEd to another spilled one.
43; - We don't remove the testb even though it serves no purpose
44; - We could in principal reuse the argument memory (%rsi) and do away
45;   with stack slots entirely.
46define <2 x i64 addrspace(1)*> @test3(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
47entry:
48; CHECK-LABEL: @test3
49; CHECK: subq	$40, %rsp
50; CHECK: testb	$1, %dil
51; CHECK: movaps	(%rsi), %xmm0
52; CHECK: movaps	%xmm0, 16(%rsp)
53; CHECK: movaps	%xmm0, (%rsp)
54; CHECK: callq	do_safepoint
55; CHECK: movaps	(%rsp), %xmm0
56; CHECK: addq	$40, %rsp
57  br i1 %cnd, label %taken, label %untaken
58
59taken:                                            ; preds = %entry
60  %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
61  br label %merge
62
63untaken:                                          ; preds = %entry
64  %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
65  br label %merge
66
67merge:                                            ; preds = %untaken, %taken
68  %obj.base = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
69  %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
70  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i64 addrspace(1)*> %obj, <2 x i64 addrspace(1)*> %obj.base)
71  %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 7) ; (%obj.base, %obj)
72  %obj.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.relocated to <2 x i64 addrspace(1)*>
73  %obj.base.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 8) ; (%obj.base, %obj.base)
74  %obj.base.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.base.relocated to <2 x i64 addrspace(1)*>
75  ret <2 x i64 addrspace(1)*> %obj.relocated.casted
76}
77
78; Can we handle vector constants?  At the moment, we don't appear to actually
79; get selection dag nodes for these.
80define <2 x i8 addrspace(1)*> @test4() gc "statepoint-example" {
81entry:
82; CHECK-LABEL: @test4
83; CHECK: subq	$24, %rsp
84; CHECK: xorps %xmm0, %xmm0
85; CHECK: movaps	%xmm0, (%rsp)
86; CHECK: callq	do_safepoint
87; CHECK: movaps	(%rsp), %xmm0
88; CHECK: addq	$24, %rsp
89  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> zeroinitializer)
90  %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
91  ret <2 x i8 addrspace(1)*> %obj.relocated
92}
93
94; Check that we can lower a constant typed as i128 correctly.  Note that the
95; actual value is representable in 64 bits.  We don't have a representation
96; of larger than 64 bit constant in the StackMap format.
97define void @test5() gc "statepoint-example" {
98entry:
99; CHECK-LABEL: @test5
100; CHECK: push
101; CHECK: callq	do_safepoint
102; CHECK: pop
103  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 1, i128 0)
104  ret void
105}
106
107; CHECK: __LLVM_StackMaps:
108
109; CHECK: .Ltmp1-test
110; Check for the two spill slots
111; Stack Maps: 		Loc 3: Indirect 7+0	[encoding: .byte 3, .byte 16, .short 7, .int 0]
112; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 16, .short 7, .int 0]
113; CHECK: .byte	3
114; CHECK: .byte	16
115; CHECK: .short	7
116; CHECK: .long	0
117; CHECK: .byte	3
118; CHECK: .byte	16
119; CHECK: .short	7
120; CHECK: .long	0
121
122; CHECK: .Ltmp3-test2
123; Check for the two spill slots
124; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 16, .short 7, .int 16]
125; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 16, .short 7, .int 0]
126; CHECK: .byte	3
127; CHECK: .byte	16
128; CHECK: .short	7
129; CHECK: .long	16
130; CHECK: .byte	3
131; CHECK: .byte	16
132; CHECK: .short	7
133; CHECK: .long	0
134
135; CHECK: .Ltmp5-test3
136; Check for the four spill slots
137; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 16, .short 7, .int 16]
138; Stack Maps: 		Loc 4: Indirect 7+16	[encoding: .byte 3, .byte 16, .short 7, .int 16]
139; Stack Maps: 		Loc 5: Indirect 7+16	[encoding: .byte 3, .byte 16, .short 7, .int 16]
140; Stack Maps: 		Loc 6: Indirect 7+0		[encoding: .byte 3, .byte 16, .short 7, .int 0]
141; CHECK: .byte	3
142; CHECK: .byte	16
143; CHECK: .short	7
144; CHECK: .long	16
145; CHECK: .byte	3
146; CHECK: .byte	16
147; CHECK: .short	7
148; CHECK: .long	16
149; CHECK: .byte	3
150; CHECK: .byte	16
151; CHECK: .short	7
152; CHECK: .long	16
153; CHECK: .byte	3
154; CHECK: .byte	16
155; CHECK: .short	7
156; CHECK: .long	0
157
158declare void @do_safepoint()
159
160declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
161declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
162declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)
163