1; Tests basics and corner cases of x86-32 sandboxing, using -Om1 in
2; the hope that the output will remain stable.  When packing bundles,
3; we try to limit to a few instructions with well known sizes and
4; minimal use of registers and stack slots in the lowering sequence.
5
6; XFAIL: filtype=asm
7; RUN: %p2i -i %s --sandbox --filetype=obj --disassemble --args -Om1 \
8; RUN:   -allow-externally-defined-symbols \
9; RUN:   -ffunction-sections | FileCheck %s
10
11; RUN: %p2i -i %s --sandbox --filetype=obj --disassemble --target=x8664 \
12; RUN:   --args -Om1 -allow-externally-defined-symbols  \
13; RUN:   -ffunction-sections | FileCheck %s --check-prefix X8664
14
15declare void @call_target()
16@global_byte = internal global [1 x i8] zeroinitializer
17@global_short = internal global [2 x i8] zeroinitializer
18@global_int = internal global [4 x i8] zeroinitializer
19
20; A direct call sequence uses the right mask and register-call sequence.
21define internal void @test_direct_call() {
22entry:
23  call void @call_target()
24  ret void
25}
26; CHECK-LABEL: test_direct_call
27; CHECK: nop
28; CHECK: 1b: {{.*}} call 1c
29; CHECK-NEXT: 20:
30; X8664-LABEL: test_direct_call
31; X8664: push {{.*}} R_X86_64_32S test_direct_call+{{.*}}20
32; X8664: jmp {{.*}} call_target
33
34; An indirect call sequence uses the right mask and register-call sequence.
35define internal void @test_indirect_call(i32 %target) {
36entry:
37  %__1 = inttoptr i32 %target to void ()*
38  call void %__1()
39  ret void
40}
41; CHECK-LABEL: test_indirect_call
42; CHECK: mov [[REG:.*]],DWORD PTR [esp
43; CHECK-NEXT: nop
44; CHECK: 1b: {{.*}} and [[REG]],0xffffffe0
45; CHECK-NEXT: call [[REG]]
46; CHECk-NEXT: 20:
47; X8664-LABEL: test_indirect_call
48; X8664: push {{.*}} R_X86_64_32S test_indirect_call+{{.*}}20
49; X8664: {{.*}} and e[[REG:..]],0xffffffe0
50; X8664: add r[[REG]],r15
51; X8664: jmp r[[REG]]
52
53; A return sequence uses the right pop / mask / jmp sequence.
54define internal void @test_ret() {
55entry:
56  ret void
57}
58; CHECK-LABEL: test_ret
59; CHECK: pop ecx
60; CHECK-NEXT: and ecx,0xffffffe0
61; CHECK-NEXT: jmp ecx
62; X8664-LABEL: test_ret
63; X8664: pop rcx
64; X8664: and ecx,0xffffffe0
65; X8664: add rcx,r15
66; X8664: jmp rcx
67
68; A perfectly packed bundle should not have nops at the end.
69define internal void @packed_bundle() {
70entry:
71  call void @call_target()
72  ; bundle boundary
73  %addr_byte = bitcast [1 x i8]* @global_byte to i8*
74  %addr_short = bitcast [2 x i8]* @global_short to i16*
75  store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
76  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
77  store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
78  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
79  ; bundle boundary
80  store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
81  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
82  ret void
83}
84; CHECK-LABEL: packed_bundle
85; CHECK: call
86; CHECK-NEXT: 20: {{.*}} mov BYTE PTR
87; CHECK-NEXT: 27: {{.*}} mov WORD PTR
88; CHECK-NEXT: 30: {{.*}} mov BYTE PTR
89; CHECK-NEXT: 37: {{.*}} mov WORD PTR
90; CHECK-NEXT: 40: {{.*}} mov BYTE PTR
91; CHECK-NEXT: 47: {{.*}} mov WORD PTR
92
93; An imperfectly packed bundle should have one or more nops at the end.
94define internal void @nonpacked_bundle() {
95entry:
96  call void @call_target()
97  ; bundle boundary
98  %addr_short = bitcast [2 x i8]* @global_short to i16*
99  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
100  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
101  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
102  ; nop padding
103  ; bundle boundary
104  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
105  ret void
106}
107; CHECK-LABEL: nonpacked_bundle
108; CHECK: call
109; CHECK-NEXT: 20: {{.*}} mov WORD PTR
110; CHECK-NEXT: 29: {{.*}} mov WORD PTR
111; CHECK-NEXT: 32: {{.*}} mov WORD PTR
112; CHECK-NEXT: 3b: {{.*}} nop
113; CHECK: 40: {{.*}} mov WORD PTR
114
115; A zero-byte instruction (e.g. local label definition) at a bundle
116; boundary should not trigger nop padding.
117define internal void @label_at_boundary(i32 %arg, float %farg1, float %farg2) {
118entry:
119  %argi8 = trunc i32 %arg to i8
120  call void @call_target()
121  ; bundle boundary
122  %addr_short = bitcast [2 x i8]* @global_short to i16*
123  %addr_int = bitcast [4 x i8]* @global_int to i32*
124  store i32 0, i32* %addr_int, align 1           ; 10-byte instruction
125  %blah = select i1 true, i8 %argi8, i8 %argi8   ; 22-byte lowering sequence
126  ; label is here
127  store i16 0, i16* %addr_short, align 1         ; 9-byte instruction
128  ret void
129}
130; CHECK-LABEL: label_at_boundary
131; CHECK: call
132; We rely on a particular 7-instruction 22-byte Om1 lowering sequence
133; for select.
134; CHECK-NEXT: 20: {{.*}} mov DWORD PTR
135; CHECK-NEXT: 2a: {{.*}} mov {{.*}},0x1
136; CHECK-NEXT: 2c: {{.*}} cmp {{.*}},0x0
137; CHECK-NEXT: 2e: {{.*}} mov {{.*}},BYTE PTR
138; CHECK-NEXT: 32: {{.*}} mov BYTE PTR
139; CHECK-NEXT: 36: {{.*}} jne 40
140; CHECK-NEXT: 38: {{.*}} mov {{.*}},BYTE PTR
141; CHECK-NEXT: 3c: {{.*}} mov BYTE PTR
142; CHECK-NEXT: 40: {{.*}} mov WORD PTR
143
144; Bundle lock without padding.
145define internal void @bundle_lock_without_padding() {
146entry:
147  %addr_short = bitcast [2 x i8]* @global_short to i16*
148  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
149  ret void
150}
151; CHECK-LABEL: bundle_lock_without_padding
152; CHECK: mov WORD PTR
153; CHECK-NEXT: pop ecx
154; CHECK-NEXT: and ecx,0xffffffe0
155; CHECK-NEXT: jmp ecx
156
157; Bundle lock with padding.
158define internal void @bundle_lock_with_padding() {
159entry:
160  call void @call_target()
161  ; bundle boundary
162  %addr_byte = bitcast [1 x i8]* @global_byte to i8*
163  %addr_short = bitcast [2 x i8]* @global_short to i16*
164  store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
165  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
166  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
167  ret void
168  ; 3 bytes to restore stack pointer
169  ; 1 byte to pop ecx
170  ; bundle_lock
171  ; 3 bytes to mask ecx
172  ; This is now 32 bytes from the beginning of the bundle, so
173  ; a 3-byte nop will need to be emitted before the bundle_lock.
174  ; 2 bytes to jump to ecx
175  ; bundle_unlock
176}
177; CHECK-LABEL: bundle_lock_with_padding
178; CHECK: call
179; CHECK-NEXT: 20: {{.*}} mov BYTE PTR
180; CHECK-NEXT: 27: {{.*}} mov WORD PTR
181; CHECK-NEXT: 30: {{.*}} mov WORD PTR
182; CHECK-NEXT: 39: {{.*}} add esp,
183; CHECK-NEXT: 3c: {{.*}} pop ecx
184; CHECK-NEXT: 3d: {{.*}} nop
185; CHECK-NEXT: 40: {{.*}} and ecx,0xffffffe0
186; CHECK-NEXT: 43: {{.*}} jmp ecx
187
188; Bundle lock align_to_end without any padding.
189define internal void @bundle_lock_align_to_end_padding_0() {
190entry:
191  call void @call_target()
192  ; bundle boundary
193  %addr_short = bitcast [2 x i8]* @global_short to i16*
194  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
195  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
196  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
197  call void @call_target()                 ; 5-byte instruction
198  ret void
199}
200; CHECK-LABEL: bundle_lock_align_to_end_padding_0
201; CHECK: call
202; CHECK-NEXT: 20: {{.*}} mov WORD PTR
203; CHECK-NEXT: 29: {{.*}} mov WORD PTR
204; CHECK-NEXT: 32: {{.*}} mov WORD PTR
205; CHECK-NEXT: 3b: {{.*}} call
206
207; Bundle lock align_to_end with one bunch of padding.
208define internal void @bundle_lock_align_to_end_padding_1() {
209entry:
210  call void @call_target()
211  ; bundle boundary
212  %addr_byte = bitcast [1 x i8]* @global_byte to i8*
213  store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
214  store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
215  store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
216  call void @call_target()                 ; 5-byte instruction
217  ret void
218}
219; CHECK-LABEL: bundle_lock_align_to_end_padding_1
220; CHECK: call
221; CHECK-NEXT: 20: {{.*}} mov BYTE PTR
222; CHECK-NEXT: 27: {{.*}} mov BYTE PTR
223; CHECK-NEXT: 2e: {{.*}} mov BYTE PTR
224; CHECK-NEXT: 35: {{.*}} nop
225; CHECK: 3b: {{.*}} call
226
227; Bundle lock align_to_end with two bunches of padding.
228define internal void @bundle_lock_align_to_end_padding_2(i32 %target) {
229entry:
230  call void @call_target()
231  ; bundle boundary
232  %addr_byte = bitcast [1 x i8]* @global_byte to i8*
233  %addr_short = bitcast [2 x i8]* @global_short to i16*
234  %__1 = inttoptr i32 %target to void ()*
235  store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
236  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
237  store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
238  call void %__1()
239  ; 4 bytes to load %target into a register
240  ; bundle_lock align_to_end
241  ; 3 bytes to mask the register
242  ; This is now 32 bytes from the beginning of the bundle, so
243  ; a 3-byte nop will need to be emitted before the bundle_lock,
244  ; followed by a 27-byte nop before the mask/jump.
245  ; 2 bytes to jump to the register
246  ; bundle_unlock
247  ret void
248}
249; CHECK-LABEL: bundle_lock_align_to_end_padding_2
250; CHECK: call
251; CHECK-NEXT: 20: {{.*}} mov BYTE PTR
252; CHECK-NEXT: 27: {{.*}} mov WORD PTR
253; CHECK-NEXT: 30: {{.*}} mov WORD PTR
254; CHECK-NEXT: 39: {{.*}} mov [[REG:.*]],DWORD PTR [esp
255; CHECK-NEXT: 3d: {{.*}} nop
256; CHECK: 40: {{.*}} nop
257; CHECK: 5b: {{.*}} and [[REG]],0xffffffe0
258; CHECK-NEXT: 5e: {{.*}} call [[REG]]
259
260; Tests the pad_to_end bundle alignment with no padding bytes needed.
261define internal void @bundle_lock_pad_to_end_padding_0(i32 %arg0, i32 %arg1,
262                                                       i32 %arg3, i32 %arg4,
263                                                       i32 %arg5, i32 %arg6) {
264  call void @call_target()
265  ; bundle boundary
266  %x = add i32 %arg5, %arg6  ; 12 bytes
267  %y = trunc i32 %x to i16   ; 10 bytes
268  call void @call_target()   ; 10 bytes
269  ; bundle boundary
270  ret void
271}
272; X8664: 56: {{.*}} push {{.*}} R_X86_64_32S bundle_lock_pad_to_end_padding_0+{{.*}}60
273; X8664: 5b: {{.*}} jmp {{.*}} call_target
274; X8664: 60: {{.*}} add
275
276; Tests the pad_to_end bundle alignment with 11 padding bytes needed, and some
277; instructions before the call.
278define internal void @bundle_lock_pad_to_end_padding_11(i32 %arg0, i32 %arg1,
279                                                        i32 %arg3, i32 %arg4,
280                                                        i32 %arg5, i32 %arg6) {
281  call void @call_target()
282  ; bundle boundary
283  %x = add i32 %arg5, %arg6  ; 11 bytes
284  call void @call_target()   ; 10 bytes
285                             ; 11 bytes of nop
286  ; bundle boundary
287  ret void
288}
289; X8664: 4b: {{.*}} push {{.*}} R_X86_64_32S bundle_lock_pad_to_end_padding_11+{{.*}}60
290; X8664: 50: {{.*}} jmp {{.*}} call_target
291; X8664: 55: {{.*}} nop
292; X8664: 5d: {{.*}} nop
293; X8664: 60: {{.*}} add
294
295; Tests the pad_to_end bundle alignment with 22 padding bytes needed, and no
296; instructions before the call.
297define internal void @bundle_lock_pad_to_end_padding_22(i32 %arg0, i32 %arg1,
298                                                        i32 %arg3, i32 %arg4,
299                                                        i32 %arg5, i32 %arg6) {
300  call void @call_target()
301  ; bundle boundary
302  call void @call_target()   ; 10 bytes
303                             ; 22 bytes of nop
304  ; bundle boundary
305  ret void
306}
307; X8664: 40: {{.*}} push {{.*}} R_X86_64_32S bundle_lock_pad_to_end_padding_22+{{.*}}60
308; X8664: 45: {{.*}} jmp {{.*}} call_target
309; X8664: 4a: {{.*}} nop
310; X8664: 52: {{.*}} nop
311; X8664: 5a: {{.*}} nop
312; X8664: 60: {{.*}} add
313
314; Stack adjustment state during an argument push sequence gets
315; properly checkpointed and restored during the two passes, as
316; observed by the stack adjustment for accessing stack-allocated
317; variables.
318define internal void @checkpoint_restore_stack_adjustment(i32 %arg) {
319entry:
320  call void @call_target()
321  ; bundle boundary
322  call void @checkpoint_restore_stack_adjustment(i32 %arg)
323  ret void
324}
325; CHECK-LABEL: checkpoint_restore_stack_adjustment
326; CHECK: sub esp,0x1c
327; CHECK: call
328; The address of %arg should be [esp+0x20], not [esp+0x30].
329; CHECK-NEXT: mov [[REG:.*]],DWORD PTR [esp+0x20]
330; CHECK-NEXT: mov DWORD PTR [esp],[[REG]]
331; CHECK: call
332; CHECK: add esp,0x1c
333