1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
3; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED
4; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2
5; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512
6
7; ModuleID = '../testSFB/testOverlapBlocks.c'
8source_filename = "../testSFB/testOverlapBlocks.c"
9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10target triple = "x86_64-unknown-linux-gnu"
11
12; Function Attrs: nounwind uwtable
13define dso_local void @test_overlap_1(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
14; CHECK-LABEL: test_overlap_1:
15; CHECK:       # %bb.0: # %entry
16; CHECK-NEXT:    movl $7, -8(%rdi)
17; CHECK-NEXT:    movq -16(%rdi), %rax
18; CHECK-NEXT:    movq %rax, (%rdi)
19; CHECK-NEXT:    movl -8(%rdi), %eax
20; CHECK-NEXT:    movl %eax, 8(%rdi)
21; CHECK-NEXT:    movl -4(%rdi), %eax
22; CHECK-NEXT:    movl %eax, 12(%rdi)
23; CHECK-NEXT:    movslq %esi, %rax
24; CHECK-NEXT:    movq %rax, -9(%rdi)
25; CHECK-NEXT:    movq %rax, -16(%rdi)
26; CHECK-NEXT:    movb $0, -1(%rdi)
27; CHECK-NEXT:    movq -16(%rdi), %rax
28; CHECK-NEXT:    movq %rax, 16(%rdi)
29; CHECK-NEXT:    movl -8(%rdi), %eax
30; CHECK-NEXT:    movl %eax, 24(%rdi)
31; CHECK-NEXT:    movzwl -4(%rdi), %eax
32; CHECK-NEXT:    movw %ax, 28(%rdi)
33; CHECK-NEXT:    movb -2(%rdi), %al
34; CHECK-NEXT:    movb %al, 30(%rdi)
35; CHECK-NEXT:    movb -1(%rdi), %al
36; CHECK-NEXT:    movb %al, 31(%rdi)
37; CHECK-NEXT:    retq
38;
39; DISABLED-LABEL: test_overlap_1:
40; DISABLED:       # %bb.0: # %entry
41; DISABLED-NEXT:    movl $7, -8(%rdi)
42; DISABLED-NEXT:    movups -16(%rdi), %xmm0
43; DISABLED-NEXT:    movups %xmm0, (%rdi)
44; DISABLED-NEXT:    movslq %esi, %rax
45; DISABLED-NEXT:    movq %rax, -9(%rdi)
46; DISABLED-NEXT:    movq %rax, -16(%rdi)
47; DISABLED-NEXT:    movb $0, -1(%rdi)
48; DISABLED-NEXT:    movups -16(%rdi), %xmm0
49; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
50; DISABLED-NEXT:    retq
51;
52; CHECK-AVX2-LABEL: test_overlap_1:
53; CHECK-AVX2:       # %bb.0: # %entry
54; CHECK-AVX2-NEXT:    movl $7, -8(%rdi)
55; CHECK-AVX2-NEXT:    movq -16(%rdi), %rax
56; CHECK-AVX2-NEXT:    movq %rax, (%rdi)
57; CHECK-AVX2-NEXT:    movl -8(%rdi), %eax
58; CHECK-AVX2-NEXT:    movl %eax, 8(%rdi)
59; CHECK-AVX2-NEXT:    movl -4(%rdi), %eax
60; CHECK-AVX2-NEXT:    movl %eax, 12(%rdi)
61; CHECK-AVX2-NEXT:    movslq %esi, %rax
62; CHECK-AVX2-NEXT:    movq %rax, -9(%rdi)
63; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
64; CHECK-AVX2-NEXT:    movb $0, -1(%rdi)
65; CHECK-AVX2-NEXT:    movq -16(%rdi), %rax
66; CHECK-AVX2-NEXT:    movq %rax, 16(%rdi)
67; CHECK-AVX2-NEXT:    movl -8(%rdi), %eax
68; CHECK-AVX2-NEXT:    movl %eax, 24(%rdi)
69; CHECK-AVX2-NEXT:    movzwl -4(%rdi), %eax
70; CHECK-AVX2-NEXT:    movw %ax, 28(%rdi)
71; CHECK-AVX2-NEXT:    movb -2(%rdi), %al
72; CHECK-AVX2-NEXT:    movb %al, 30(%rdi)
73; CHECK-AVX2-NEXT:    movb -1(%rdi), %al
74; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
75; CHECK-AVX2-NEXT:    retq
76;
77; CHECK-AVX512-LABEL: test_overlap_1:
78; CHECK-AVX512:       # %bb.0: # %entry
79; CHECK-AVX512-NEXT:    movl $7, -8(%rdi)
80; CHECK-AVX512-NEXT:    movq -16(%rdi), %rax
81; CHECK-AVX512-NEXT:    movq %rax, (%rdi)
82; CHECK-AVX512-NEXT:    movl -8(%rdi), %eax
83; CHECK-AVX512-NEXT:    movl %eax, 8(%rdi)
84; CHECK-AVX512-NEXT:    movl -4(%rdi), %eax
85; CHECK-AVX512-NEXT:    movl %eax, 12(%rdi)
86; CHECK-AVX512-NEXT:    movslq %esi, %rax
87; CHECK-AVX512-NEXT:    movq %rax, -9(%rdi)
88; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
89; CHECK-AVX512-NEXT:    movb $0, -1(%rdi)
90; CHECK-AVX512-NEXT:    movq -16(%rdi), %rax
91; CHECK-AVX512-NEXT:    movq %rax, 16(%rdi)
92; CHECK-AVX512-NEXT:    movl -8(%rdi), %eax
93; CHECK-AVX512-NEXT:    movl %eax, 24(%rdi)
94; CHECK-AVX512-NEXT:    movzwl -4(%rdi), %eax
95; CHECK-AVX512-NEXT:    movw %ax, 28(%rdi)
96; CHECK-AVX512-NEXT:    movb -2(%rdi), %al
97; CHECK-AVX512-NEXT:    movb %al, 30(%rdi)
98; CHECK-AVX512-NEXT:    movb -1(%rdi), %al
99; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
100; CHECK-AVX512-NEXT:    retq
101entry:
102  %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
103  %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8
104  %0 = bitcast i8* %add.ptr1 to i32*
105  store i32 7, i32* %0, align 4
106  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
107  %conv = sext i32 %x to i64
108  %add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9
109  %1 = bitcast i8* %add.ptr2 to i64*
110  store i64 %conv, i64* %1, align 8
111  %2 = bitcast i8* %add.ptr to i64*
112  store i64 %conv, i64* %2, align 8
113  %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1
114  store i8 0, i8* %add.ptr5, align 1
115  %add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16
116  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
117  ret void
118}
119
120; Function Attrs: argmemonly nounwind
121declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
122
123; Function Attrs: nounwind uwtable
124define dso_local void @test_overlap_2(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
125; CHECK-LABEL: test_overlap_2:
126; CHECK:       # %bb.0: # %entry
127; CHECK-NEXT:    movslq %esi, %rax
128; CHECK-NEXT:    movq %rax, -16(%rdi)
129; CHECK-NEXT:    movq -16(%rdi), %rcx
130; CHECK-NEXT:    movq %rcx, (%rdi)
131; CHECK-NEXT:    movq -8(%rdi), %rcx
132; CHECK-NEXT:    movq %rcx, 8(%rdi)
133; CHECK-NEXT:    movq %rax, -8(%rdi)
134; CHECK-NEXT:    movl $7, -12(%rdi)
135; CHECK-NEXT:    movl -16(%rdi), %eax
136; CHECK-NEXT:    movl %eax, 16(%rdi)
137; CHECK-NEXT:    movl -12(%rdi), %eax
138; CHECK-NEXT:    movl %eax, 20(%rdi)
139; CHECK-NEXT:    movq -8(%rdi), %rax
140; CHECK-NEXT:    movq %rax, 24(%rdi)
141; CHECK-NEXT:    retq
142;
143; DISABLED-LABEL: test_overlap_2:
144; DISABLED:       # %bb.0: # %entry
145; DISABLED-NEXT:    movslq %esi, %rax
146; DISABLED-NEXT:    movq %rax, -16(%rdi)
147; DISABLED-NEXT:    movups -16(%rdi), %xmm0
148; DISABLED-NEXT:    movups %xmm0, (%rdi)
149; DISABLED-NEXT:    movq %rax, -8(%rdi)
150; DISABLED-NEXT:    movl $7, -12(%rdi)
151; DISABLED-NEXT:    movups -16(%rdi), %xmm0
152; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
153; DISABLED-NEXT:    retq
154;
155; CHECK-AVX2-LABEL: test_overlap_2:
156; CHECK-AVX2:       # %bb.0: # %entry
157; CHECK-AVX2-NEXT:    movslq %esi, %rax
158; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
159; CHECK-AVX2-NEXT:    movq -16(%rdi), %rcx
160; CHECK-AVX2-NEXT:    movq %rcx, (%rdi)
161; CHECK-AVX2-NEXT:    movq -8(%rdi), %rcx
162; CHECK-AVX2-NEXT:    movq %rcx, 8(%rdi)
163; CHECK-AVX2-NEXT:    movq %rax, -8(%rdi)
164; CHECK-AVX2-NEXT:    movl $7, -12(%rdi)
165; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
166; CHECK-AVX2-NEXT:    movl %eax, 16(%rdi)
167; CHECK-AVX2-NEXT:    movl -12(%rdi), %eax
168; CHECK-AVX2-NEXT:    movl %eax, 20(%rdi)
169; CHECK-AVX2-NEXT:    movq -8(%rdi), %rax
170; CHECK-AVX2-NEXT:    movq %rax, 24(%rdi)
171; CHECK-AVX2-NEXT:    retq
172;
173; CHECK-AVX512-LABEL: test_overlap_2:
174; CHECK-AVX512:       # %bb.0: # %entry
175; CHECK-AVX512-NEXT:    movslq %esi, %rax
176; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
177; CHECK-AVX512-NEXT:    movq -16(%rdi), %rcx
178; CHECK-AVX512-NEXT:    movq %rcx, (%rdi)
179; CHECK-AVX512-NEXT:    movq -8(%rdi), %rcx
180; CHECK-AVX512-NEXT:    movq %rcx, 8(%rdi)
181; CHECK-AVX512-NEXT:    movq %rax, -8(%rdi)
182; CHECK-AVX512-NEXT:    movl $7, -12(%rdi)
183; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
184; CHECK-AVX512-NEXT:    movl %eax, 16(%rdi)
185; CHECK-AVX512-NEXT:    movl -12(%rdi), %eax
186; CHECK-AVX512-NEXT:    movl %eax, 20(%rdi)
187; CHECK-AVX512-NEXT:    movq -8(%rdi), %rax
188; CHECK-AVX512-NEXT:    movq %rax, 24(%rdi)
189; CHECK-AVX512-NEXT:    retq
190entry:
191  %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
192  %conv = sext i32 %x to i64
193  %0 = bitcast i8* %add.ptr to i64*
194  store i64 %conv, i64* %0, align 8
195  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
196  %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -8
197  %1 = bitcast i8* %add.ptr3 to i64*
198  store i64 %conv, i64* %1, align 8
199  %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -12
200  %2 = bitcast i8* %add.ptr4 to i32*
201  store i32 7, i32* %2, align 4
202  %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16
203  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
204  ret void
205}
206
207; Function Attrs: nounwind uwtable
208define dso_local void @test_overlap_3(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
209; CHECK-LABEL: test_overlap_3:
210; CHECK:       # %bb.0: # %entry
211; CHECK-NEXT:    movl $7, -10(%rdi)
212; CHECK-NEXT:    movl -16(%rdi), %eax
213; CHECK-NEXT:    movl %eax, (%rdi)
214; CHECK-NEXT:    movzwl -12(%rdi), %eax
215; CHECK-NEXT:    movw %ax, 4(%rdi)
216; CHECK-NEXT:    movl -10(%rdi), %eax
217; CHECK-NEXT:    movl %eax, 6(%rdi)
218; CHECK-NEXT:    movl -6(%rdi), %eax
219; CHECK-NEXT:    movl %eax, 10(%rdi)
220; CHECK-NEXT:    movzwl -2(%rdi), %eax
221; CHECK-NEXT:    movw %ax, 14(%rdi)
222; CHECK-NEXT:    movslq %esi, %rax
223; CHECK-NEXT:    movq %rax, -9(%rdi)
224; CHECK-NEXT:    movq %rax, -16(%rdi)
225; CHECK-NEXT:    movb $0, -1(%rdi)
226; CHECK-NEXT:    movq -16(%rdi), %rax
227; CHECK-NEXT:    movq %rax, 16(%rdi)
228; CHECK-NEXT:    movzwl -8(%rdi), %eax
229; CHECK-NEXT:    movw %ax, 24(%rdi)
230; CHECK-NEXT:    movl -6(%rdi), %eax
231; CHECK-NEXT:    movl %eax, 26(%rdi)
232; CHECK-NEXT:    movb -2(%rdi), %al
233; CHECK-NEXT:    movb %al, 30(%rdi)
234; CHECK-NEXT:    movb -1(%rdi), %al
235; CHECK-NEXT:    movb %al, 31(%rdi)
236; CHECK-NEXT:    retq
237;
238; DISABLED-LABEL: test_overlap_3:
239; DISABLED:       # %bb.0: # %entry
240; DISABLED-NEXT:    movl $7, -10(%rdi)
241; DISABLED-NEXT:    movups -16(%rdi), %xmm0
242; DISABLED-NEXT:    movups %xmm0, (%rdi)
243; DISABLED-NEXT:    movslq %esi, %rax
244; DISABLED-NEXT:    movq %rax, -9(%rdi)
245; DISABLED-NEXT:    movq %rax, -16(%rdi)
246; DISABLED-NEXT:    movb $0, -1(%rdi)
247; DISABLED-NEXT:    movups -16(%rdi), %xmm0
248; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
249; DISABLED-NEXT:    retq
250;
251; CHECK-AVX2-LABEL: test_overlap_3:
252; CHECK-AVX2:       # %bb.0: # %entry
253; CHECK-AVX2-NEXT:    movl $7, -10(%rdi)
254; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
255; CHECK-AVX2-NEXT:    movl %eax, (%rdi)
256; CHECK-AVX2-NEXT:    movzwl -12(%rdi), %eax
257; CHECK-AVX2-NEXT:    movw %ax, 4(%rdi)
258; CHECK-AVX2-NEXT:    movl -10(%rdi), %eax
259; CHECK-AVX2-NEXT:    movl %eax, 6(%rdi)
260; CHECK-AVX2-NEXT:    movl -6(%rdi), %eax
261; CHECK-AVX2-NEXT:    movl %eax, 10(%rdi)
262; CHECK-AVX2-NEXT:    movzwl -2(%rdi), %eax
263; CHECK-AVX2-NEXT:    movw %ax, 14(%rdi)
264; CHECK-AVX2-NEXT:    movslq %esi, %rax
265; CHECK-AVX2-NEXT:    movq %rax, -9(%rdi)
266; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
267; CHECK-AVX2-NEXT:    movb $0, -1(%rdi)
268; CHECK-AVX2-NEXT:    movq -16(%rdi), %rax
269; CHECK-AVX2-NEXT:    movq %rax, 16(%rdi)
270; CHECK-AVX2-NEXT:    movzwl -8(%rdi), %eax
271; CHECK-AVX2-NEXT:    movw %ax, 24(%rdi)
272; CHECK-AVX2-NEXT:    movl -6(%rdi), %eax
273; CHECK-AVX2-NEXT:    movl %eax, 26(%rdi)
274; CHECK-AVX2-NEXT:    movb -2(%rdi), %al
275; CHECK-AVX2-NEXT:    movb %al, 30(%rdi)
276; CHECK-AVX2-NEXT:    movb -1(%rdi), %al
277; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
278; CHECK-AVX2-NEXT:    retq
279;
280; CHECK-AVX512-LABEL: test_overlap_3:
281; CHECK-AVX512:       # %bb.0: # %entry
282; CHECK-AVX512-NEXT:    movl $7, -10(%rdi)
283; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
284; CHECK-AVX512-NEXT:    movl %eax, (%rdi)
285; CHECK-AVX512-NEXT:    movzwl -12(%rdi), %eax
286; CHECK-AVX512-NEXT:    movw %ax, 4(%rdi)
287; CHECK-AVX512-NEXT:    movl -10(%rdi), %eax
288; CHECK-AVX512-NEXT:    movl %eax, 6(%rdi)
289; CHECK-AVX512-NEXT:    movl -6(%rdi), %eax
290; CHECK-AVX512-NEXT:    movl %eax, 10(%rdi)
291; CHECK-AVX512-NEXT:    movzwl -2(%rdi), %eax
292; CHECK-AVX512-NEXT:    movw %ax, 14(%rdi)
293; CHECK-AVX512-NEXT:    movslq %esi, %rax
294; CHECK-AVX512-NEXT:    movq %rax, -9(%rdi)
295; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
296; CHECK-AVX512-NEXT:    movb $0, -1(%rdi)
297; CHECK-AVX512-NEXT:    movq -16(%rdi), %rax
298; CHECK-AVX512-NEXT:    movq %rax, 16(%rdi)
299; CHECK-AVX512-NEXT:    movzwl -8(%rdi), %eax
300; CHECK-AVX512-NEXT:    movw %ax, 24(%rdi)
301; CHECK-AVX512-NEXT:    movl -6(%rdi), %eax
302; CHECK-AVX512-NEXT:    movl %eax, 26(%rdi)
303; CHECK-AVX512-NEXT:    movb -2(%rdi), %al
304; CHECK-AVX512-NEXT:    movb %al, 30(%rdi)
305; CHECK-AVX512-NEXT:    movb -1(%rdi), %al
306; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
307; CHECK-AVX512-NEXT:    retq
308entry:
309  %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
310  %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -10
311  %0 = bitcast i8* %add.ptr1 to i32*
312  store i32 7, i32* %0, align 4
313  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
314  %conv = sext i32 %x to i64
315  %add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9
316  %1 = bitcast i8* %add.ptr2 to i64*
317  store i64 %conv, i64* %1, align 8
318  %2 = bitcast i8* %add.ptr to i64*
319  store i64 %conv, i64* %2, align 8
320  %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1
321  store i8 0, i8* %add.ptr5, align 1
322  %add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16
323  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
324  ret void
325}
326
327; Function Attrs: nounwind uwtable
328define dso_local void @test_overlap_4(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
329; CHECK-LABEL: test_overlap_4:
330; CHECK:       # %bb.0: # %entry
331; CHECK-NEXT:    movups -16(%rdi), %xmm0
332; CHECK-NEXT:    movups %xmm0, (%rdi)
333; CHECK-NEXT:    movslq %esi, %rax
334; CHECK-NEXT:    movq %rax, -8(%rdi)
335; CHECK-NEXT:    movl %eax, -16(%rdi)
336; CHECK-NEXT:    movl $0, -11(%rdi)
337; CHECK-NEXT:    movl -16(%rdi), %eax
338; CHECK-NEXT:    movl %eax, 16(%rdi)
339; CHECK-NEXT:    movb -12(%rdi), %al
340; CHECK-NEXT:    movb %al, 20(%rdi)
341; CHECK-NEXT:    movl -11(%rdi), %eax
342; CHECK-NEXT:    movl %eax, 21(%rdi)
343; CHECK-NEXT:    movl -7(%rdi), %eax
344; CHECK-NEXT:    movl %eax, 25(%rdi)
345; CHECK-NEXT:    movzwl -3(%rdi), %eax
346; CHECK-NEXT:    movw %ax, 29(%rdi)
347; CHECK-NEXT:    movb -1(%rdi), %al
348; CHECK-NEXT:    movb %al, 31(%rdi)
349; CHECK-NEXT:    retq
350;
351; DISABLED-LABEL: test_overlap_4:
352; DISABLED:       # %bb.0: # %entry
353; DISABLED-NEXT:    movups -16(%rdi), %xmm0
354; DISABLED-NEXT:    movups %xmm0, (%rdi)
355; DISABLED-NEXT:    movslq %esi, %rax
356; DISABLED-NEXT:    movq %rax, -8(%rdi)
357; DISABLED-NEXT:    movl %eax, -16(%rdi)
358; DISABLED-NEXT:    movl $0, -11(%rdi)
359; DISABLED-NEXT:    movups -16(%rdi), %xmm0
360; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
361; DISABLED-NEXT:    retq
362;
363; CHECK-AVX2-LABEL: test_overlap_4:
364; CHECK-AVX2:       # %bb.0: # %entry
365; CHECK-AVX2-NEXT:    vmovups -16(%rdi), %xmm0
366; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
367; CHECK-AVX2-NEXT:    movslq %esi, %rax
368; CHECK-AVX2-NEXT:    movq %rax, -8(%rdi)
369; CHECK-AVX2-NEXT:    movl %eax, -16(%rdi)
370; CHECK-AVX2-NEXT:    movl $0, -11(%rdi)
371; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
372; CHECK-AVX2-NEXT:    movl %eax, 16(%rdi)
373; CHECK-AVX2-NEXT:    movb -12(%rdi), %al
374; CHECK-AVX2-NEXT:    movb %al, 20(%rdi)
375; CHECK-AVX2-NEXT:    movl -11(%rdi), %eax
376; CHECK-AVX2-NEXT:    movl %eax, 21(%rdi)
377; CHECK-AVX2-NEXT:    movl -7(%rdi), %eax
378; CHECK-AVX2-NEXT:    movl %eax, 25(%rdi)
379; CHECK-AVX2-NEXT:    movzwl -3(%rdi), %eax
380; CHECK-AVX2-NEXT:    movw %ax, 29(%rdi)
381; CHECK-AVX2-NEXT:    movb -1(%rdi), %al
382; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
383; CHECK-AVX2-NEXT:    retq
384;
385; CHECK-AVX512-LABEL: test_overlap_4:
386; CHECK-AVX512:       # %bb.0: # %entry
387; CHECK-AVX512-NEXT:    vmovups -16(%rdi), %xmm0
388; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
389; CHECK-AVX512-NEXT:    movslq %esi, %rax
390; CHECK-AVX512-NEXT:    movq %rax, -8(%rdi)
391; CHECK-AVX512-NEXT:    movl %eax, -16(%rdi)
392; CHECK-AVX512-NEXT:    movl $0, -11(%rdi)
393; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
394; CHECK-AVX512-NEXT:    movl %eax, 16(%rdi)
395; CHECK-AVX512-NEXT:    movb -12(%rdi), %al
396; CHECK-AVX512-NEXT:    movb %al, 20(%rdi)
397; CHECK-AVX512-NEXT:    movl -11(%rdi), %eax
398; CHECK-AVX512-NEXT:    movl %eax, 21(%rdi)
399; CHECK-AVX512-NEXT:    movl -7(%rdi), %eax
400; CHECK-AVX512-NEXT:    movl %eax, 25(%rdi)
401; CHECK-AVX512-NEXT:    movzwl -3(%rdi), %eax
402; CHECK-AVX512-NEXT:    movw %ax, 29(%rdi)
403; CHECK-AVX512-NEXT:    movb -1(%rdi), %al
404; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
405; CHECK-AVX512-NEXT:    retq
406entry:
407  %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
408  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
409  %conv = sext i32 %x to i64
410  %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8
411  %0 = bitcast i8* %add.ptr1 to i64*
412  store i64 %conv, i64* %0, align 8
413  %1 = bitcast i8* %add.ptr to i32*
414  store i32 %x, i32* %1, align 4
415  %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -11
416  %2 = bitcast i8* %add.ptr3 to i32*
417  store i32 0, i32* %2, align 4
418  %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 16
419  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr4, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
420  ret void
421}
422
423; Function Attrs: nounwind uwtable
424define dso_local void @test_overlap_5(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
425; CHECK-LABEL: test_overlap_5:
426; CHECK:       # %bb.0: # %entry
427; CHECK-NEXT:    movups -16(%rdi), %xmm0
428; CHECK-NEXT:    movups %xmm0, (%rdi)
429; CHECK-NEXT:    movslq %esi, %rax
430; CHECK-NEXT:    movq %rax, -16(%rdi)
431; CHECK-NEXT:    movb %al, -14(%rdi)
432; CHECK-NEXT:    movb $0, -11(%rdi)
433; CHECK-NEXT:    movzwl -16(%rdi), %eax
434; CHECK-NEXT:    movw %ax, 16(%rdi)
435; CHECK-NEXT:    movb -14(%rdi), %al
436; CHECK-NEXT:    movb %al, 18(%rdi)
437; CHECK-NEXT:    movzwl -13(%rdi), %eax
438; CHECK-NEXT:    movw %ax, 19(%rdi)
439; CHECK-NEXT:    movb -11(%rdi), %al
440; CHECK-NEXT:    movb %al, 21(%rdi)
441; CHECK-NEXT:    movq -10(%rdi), %rax
442; CHECK-NEXT:    movq %rax, 22(%rdi)
443; CHECK-NEXT:    movzwl -2(%rdi), %eax
444; CHECK-NEXT:    movw %ax, 30(%rdi)
445; CHECK-NEXT:    retq
446;
447; DISABLED-LABEL: test_overlap_5:
448; DISABLED:       # %bb.0: # %entry
449; DISABLED-NEXT:    movups -16(%rdi), %xmm0
450; DISABLED-NEXT:    movups %xmm0, (%rdi)
451; DISABLED-NEXT:    movslq %esi, %rax
452; DISABLED-NEXT:    movq %rax, -16(%rdi)
453; DISABLED-NEXT:    movb %al, -14(%rdi)
454; DISABLED-NEXT:    movb $0, -11(%rdi)
455; DISABLED-NEXT:    movups -16(%rdi), %xmm0
456; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
457; DISABLED-NEXT:    retq
458;
459; CHECK-AVX2-LABEL: test_overlap_5:
460; CHECK-AVX2:       # %bb.0: # %entry
461; CHECK-AVX2-NEXT:    vmovups -16(%rdi), %xmm0
462; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
463; CHECK-AVX2-NEXT:    movslq %esi, %rax
464; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
465; CHECK-AVX2-NEXT:    movb %al, -14(%rdi)
466; CHECK-AVX2-NEXT:    movb $0, -11(%rdi)
467; CHECK-AVX2-NEXT:    movzwl -16(%rdi), %eax
468; CHECK-AVX2-NEXT:    movw %ax, 16(%rdi)
469; CHECK-AVX2-NEXT:    movb -14(%rdi), %al
470; CHECK-AVX2-NEXT:    movb %al, 18(%rdi)
471; CHECK-AVX2-NEXT:    movzwl -13(%rdi), %eax
472; CHECK-AVX2-NEXT:    movw %ax, 19(%rdi)
473; CHECK-AVX2-NEXT:    movb -11(%rdi), %al
474; CHECK-AVX2-NEXT:    movb %al, 21(%rdi)
475; CHECK-AVX2-NEXT:    movq -10(%rdi), %rax
476; CHECK-AVX2-NEXT:    movq %rax, 22(%rdi)
477; CHECK-AVX2-NEXT:    movzwl -2(%rdi), %eax
478; CHECK-AVX2-NEXT:    movw %ax, 30(%rdi)
479; CHECK-AVX2-NEXT:    retq
480;
481; CHECK-AVX512-LABEL: test_overlap_5:
482; CHECK-AVX512:       # %bb.0: # %entry
483; CHECK-AVX512-NEXT:    vmovups -16(%rdi), %xmm0
484; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
485; CHECK-AVX512-NEXT:    movslq %esi, %rax
486; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
487; CHECK-AVX512-NEXT:    movb %al, -14(%rdi)
488; CHECK-AVX512-NEXT:    movb $0, -11(%rdi)
489; CHECK-AVX512-NEXT:    movzwl -16(%rdi), %eax
490; CHECK-AVX512-NEXT:    movw %ax, 16(%rdi)
491; CHECK-AVX512-NEXT:    movb -14(%rdi), %al
492; CHECK-AVX512-NEXT:    movb %al, 18(%rdi)
493; CHECK-AVX512-NEXT:    movzwl -13(%rdi), %eax
494; CHECK-AVX512-NEXT:    movw %ax, 19(%rdi)
495; CHECK-AVX512-NEXT:    movb -11(%rdi), %al
496; CHECK-AVX512-NEXT:    movb %al, 21(%rdi)
497; CHECK-AVX512-NEXT:    movq -10(%rdi), %rax
498; CHECK-AVX512-NEXT:    movq %rax, 22(%rdi)
499; CHECK-AVX512-NEXT:    movzwl -2(%rdi), %eax
500; CHECK-AVX512-NEXT:    movw %ax, 30(%rdi)
501; CHECK-AVX512-NEXT:    retq
502entry:
503  %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
504  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
505  %conv = sext i32 %x to i64
506  %0 = bitcast i8* %add.ptr to i64*
507  store i64 %conv, i64* %0, align 8
508  %conv2 = trunc i32 %x to i8
509  %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -14
510  store i8 %conv2, i8* %add.ptr3, align 1
511  %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -11
512  store i8 0, i8* %add.ptr4, align 1
513  %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16
514  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
515  ret void
516}
517
518attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
519
520attributes #1 = { argmemonly nounwind }
521