1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -instcombine -S < %s | FileCheck %s
3
4;; ---- memset -----
5
6; Ensure 0-length memset is removed
7define void @test_memset_zero_length(i8* %dest) {
8; CHECK-LABEL: @test_memset_zero_length(
9; CHECK-NEXT:    ret void
10;
11  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 0, i32 1)
12  ret void
13}
14
15define void @test_memset_to_store(i8* %dest) {
16; CHECK-LABEL: @test_memset_to_store(
17; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 1
18; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 1 [[DEST]], i8 1, i32 2, i32 1)
19; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 1 [[DEST]], i8 1, i32 4, i32 1)
20; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 1 [[DEST]], i8 1, i32 8, i32 1)
21; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 1 [[DEST]], i8 1, i32 16, i32 1)
22; CHECK-NEXT:    ret void
23;
24  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 1, i32 1)
25  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 2, i32 1)
26  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 4, i32 1)
27  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 8, i32 1)
28  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 16, i32 1)
29  ret void
30}
31
32define void @test_memset_to_store_2(i8* %dest) {
33; CHECK-LABEL: @test_memset_to_store_2(
34; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 2
35; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
36; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 2
37; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 2 [[DEST]], i8 1, i32 4, i32 2)
38; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 2 [[DEST]], i8 1, i32 8, i32 2)
39; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 2 [[DEST]], i8 1, i32 16, i32 2)
40; CHECK-NEXT:    ret void
41;
42  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 1, i32 1)
43  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 2, i32 2)
44  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 4, i32 2)
45  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 8, i32 2)
46  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 16, i32 2)
47  ret void
48}
49
50define void @test_memset_to_store_4(i8* %dest) {
51; CHECK-LABEL: @test_memset_to_store_4(
52; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 4
53; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
54; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 4
55; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
56; CHECK-NEXT:    store atomic i32 16843009, i32* [[TMP2]] unordered, align 4
57; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 4 [[DEST]], i8 1, i32 8, i32 4)
58; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 4 [[DEST]], i8 1, i32 16, i32 4)
59; CHECK-NEXT:    ret void
60;
61  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 1, i32 1)
62  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 2, i32 2)
63  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 4, i32 4)
64  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 8, i32 4)
65  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 16, i32 4)
66  ret void
67}
68
69define void @test_memset_to_store_8(i8* %dest) {
70; CHECK-LABEL: @test_memset_to_store_8(
71; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 8
72; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
73; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 8
74; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
75; CHECK-NEXT:    store atomic i32 16843009, i32* [[TMP2]] unordered, align 8
76; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64*
77; CHECK-NEXT:    store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 8
78; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 8 [[DEST]], i8 1, i32 16, i32 8)
79; CHECK-NEXT:    ret void
80;
81  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 1, i32 1)
82  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 2, i32 2)
83  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 4, i32 4)
84  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 8, i32 8)
85  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 16, i32 8)
86  ret void
87}
88
89define void @test_memset_to_store_16(i8* %dest) {
90; CHECK-LABEL: @test_memset_to_store_16(
91; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 16
92; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
93; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 16
94; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
95; CHECK-NEXT:    store atomic i32 16843009, i32* [[TMP2]] unordered, align 16
96; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64*
97; CHECK-NEXT:    store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 16
98; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 16 [[DEST]], i8 1, i32 16, i32 16)
99; CHECK-NEXT:    ret void
100;
101  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 1, i32 1)
102  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 2, i32 2)
103  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 4, i32 4)
104  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 8, i32 8)
105  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 16, i32 16)
106  ret void
107}
108
109declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture writeonly, i8, i32, i32) nounwind argmemonly
110
111
112;; =========================================
113;; ----- memmove ------
114
115
116@gconst = constant [32 x i8] c"0123456789012345678901234567890\00"
117; Check that a memmove from a global constant is converted into a memcpy
118define void @test_memmove_to_memcpy(i8* %dest) {
119; CHECK-LABEL: @test_memmove_to_memcpy(
120; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST:%.*]], i8* align 16 getelementptr inbounds ([32 x i8], [32 x i8]* @gconst, i64 0, i64 0), i32 32, i32 1)
121; CHECK-NEXT:    ret void
122;
123  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 getelementptr inbounds ([32 x i8], [32 x i8]* @gconst, i64 0, i64 0), i32 32, i32 1)
124  ret void
125}
126
127define void @test_memmove_zero_length(i8* %dest, i8* %src) {
128; CHECK-LABEL: @test_memmove_zero_length(
129; CHECK-NEXT:    ret void
130;
131  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 0, i32 1)
132  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 0, i32 2)
133  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 0, i32 4)
134  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 0, i32 8)
135  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 0, i32 16)
136  ret void
137}
138
139; memmove with src==dest is removed
140define void @test_memmove_removed(i8* %srcdest, i32 %sz) {
141; CHECK-LABEL: @test_memmove_removed(
142; CHECK-NEXT:    ret void
143;
144  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %srcdest, i8* align 1 %srcdest, i32 %sz, i32 1)
145  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %srcdest, i8* align 2 %srcdest, i32 %sz, i32 2)
146  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %srcdest, i8* align 4 %srcdest, i32 %sz, i32 4)
147  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %srcdest, i8* align 8 %srcdest, i32 %sz, i32 8)
148  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %srcdest, i8* align 16 %srcdest, i32 %sz, i32 16)
149  ret void
150}
151
152; memmove with a small constant length is converted to a load/store pair
153define void @test_memmove_loadstore(i8* %dest, i8* %src) {
154; CHECK-LABEL: @test_memmove_loadstore(
155; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 1
156; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 1
157; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 2, i32 1)
158; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 4, i32 1)
159; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 8, i32 1)
160; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 16, i32 1)
161; CHECK-NEXT:    ret void
162;
163  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 1, i32 1)
164  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 2, i32 1)
165  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 4, i32 1)
166  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 8, i32 1)
167  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 16, i32 1)
168  ret void
169}
170
171define void @test_memmove_loadstore_2(i8* %dest, i8* %src) {
172; CHECK-LABEL: @test_memmove_loadstore_2(
173; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 2
174; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 2
175; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
176; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
177; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 2
178; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 2
179; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 2 [[DEST]], i8* nonnull align 2 [[SRC]], i32 4, i32 2)
180; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 2 [[DEST]], i8* nonnull align 2 [[SRC]], i32 8, i32 2)
181; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 2 [[DEST]], i8* nonnull align 2 [[SRC]], i32 16, i32 2)
182; CHECK-NEXT:    ret void
183;
184  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 1, i32 1)
185  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 2, i32 2)
186  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 4, i32 2)
187  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 8, i32 2)
188  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 16, i32 2)
189  ret void
190}
191
192define void @test_memmove_loadstore_4(i8* %dest, i8* %src) {
193; CHECK-LABEL: @test_memmove_loadstore_4(
194; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 4
195; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 4
196; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
197; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
198; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 4
199; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 4
200; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
201; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
202; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 4
203; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 4
204; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 4 [[DEST]], i8* nonnull align 4 [[SRC]], i32 8, i32 4)
205; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 4 [[DEST]], i8* nonnull align 4 [[SRC]], i32 16, i32 4)
206; CHECK-NEXT:    ret void
207;
208  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 1, i32 1)
209  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 2, i32 2)
210  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 4, i32 4)
211  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 8, i32 4)
212  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 16, i32 4)
213  ret void
214}
215
216define void @test_memmove_loadstore_8(i8* %dest, i8* %src) {
217; CHECK-LABEL: @test_memmove_loadstore_8(
218; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 8
219; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 8
220; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
221; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
222; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 8
223; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 8
224; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
225; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
226; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 8
227; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 8
228; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
229; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
230; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 8
231; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 8
232; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 8 [[DEST]], i8* nonnull align 8 [[SRC]], i32 16, i32 8)
233; CHECK-NEXT:    ret void
234;
235  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 1, i32 1)
236  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 2, i32 2)
237  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 4, i32 4)
238  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 8, i32 8)
239  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 16, i32 8)
240  ret void
241}
242
243define void @test_memmove_loadstore_16(i8* %dest, i8* %src) {
244; CHECK-LABEL: @test_memmove_loadstore_16(
245; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 16
246; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 16
247; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
248; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
249; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 16
250; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 16
251; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
252; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
253; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 16
254; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 16
255; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
256; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
257; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 16
258; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 16
259; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 16 [[DEST]], i8* nonnull align 16 [[SRC]], i32 16, i32 16)
260; CHECK-NEXT:    ret void
261;
262  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 1, i32 1)
263  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 2, i32 2)
264  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 4, i32 4)
265  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 8, i32 8)
266  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 16, i32 16)
267  ret void
268}
269
270declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32) nounwind argmemonly
271
272;; =========================================
273;; ----- memcpy ------
274
275define void @test_memcpy_zero_length(i8* %dest, i8* %src) {
276; CHECK-LABEL: @test_memcpy_zero_length(
277; CHECK-NEXT:    ret void
278;
279  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 0, i32 1)
280  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 0, i32 2)
281  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 0, i32 4)
282  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 0, i32 8)
283  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 0, i32 16)
284  ret void
285}
286
287; memcpy with src==dest is removed
288define void @test_memcpy_removed(i8* %srcdest, i32 %sz) {
289; CHECK-LABEL: @test_memcpy_removed(
290; CHECK-NEXT:    ret void
291;
292  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %srcdest, i8* align 1 %srcdest, i32 %sz, i32 1)
293  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %srcdest, i8* align 2 %srcdest, i32 %sz, i32 2)
294  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %srcdest, i8* align 4 %srcdest, i32 %sz, i32 4)
295  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %srcdest, i8* align 8 %srcdest, i32 %sz, i32 8)
296  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %srcdest, i8* align 16 %srcdest, i32 %sz, i32 16)
297  ret void
298}
299
300; memcpy with a small constant length is converted to a load/store pair
301define void @test_memcpy_loadstore(i8* %dest, i8* %src) {
302; CHECK-LABEL: @test_memcpy_loadstore(
303; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 1
304; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 1
305; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 2, i32 1)
306; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 4, i32 1)
307; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 8, i32 1)
308; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 16, i32 1)
309; CHECK-NEXT:    ret void
310;
311  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 1, i32 1)
312  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 2, i32 1)
313  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 4, i32 1)
314  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 8, i32 1)
315  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 16, i32 1)
316  ret void
317}
318
319define void @test_memcpy_loadstore_2(i8* %dest, i8* %src) {
320; CHECK-LABEL: @test_memcpy_loadstore_2(
321; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 2
322; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 2
323; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
324; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
325; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 2
326; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 2
327; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 2 [[DEST]], i8* nonnull align 2 [[SRC]], i32 4, i32 2)
328; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 2 [[DEST]], i8* nonnull align 2 [[SRC]], i32 8, i32 2)
329; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 2 [[DEST]], i8* nonnull align 2 [[SRC]], i32 16, i32 2)
330; CHECK-NEXT:    ret void
331;
332  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 1, i32 1)
333  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 2, i32 2)
334  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 4, i32 2)
335  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 8, i32 2)
336  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 16, i32 2)
337  ret void
338}
339
340define void @test_memcpy_loadstore_4(i8* %dest, i8* %src) {
341; CHECK-LABEL: @test_memcpy_loadstore_4(
342; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 4
343; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 4
344; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
345; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
346; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 4
347; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 4
348; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
349; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
350; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 4
351; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 4
352; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 4 [[DEST]], i8* nonnull align 4 [[SRC]], i32 8, i32 4)
353; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 4 [[DEST]], i8* nonnull align 4 [[SRC]], i32 16, i32 4)
354; CHECK-NEXT:    ret void
355;
356  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 1, i32 1)
357  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 2, i32 2)
358  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 4, i32 4)
359  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 8, i32 4)
360  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 16, i32 4)
361  ret void
362}
363
364define void @test_memcpy_loadstore_8(i8* %dest, i8* %src) {
365; CHECK-LABEL: @test_memcpy_loadstore_8(
366; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 8
367; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 8
368; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
369; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
370; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 8
371; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 8
372; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
373; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
374; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 8
375; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 8
376; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
377; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
378; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 8
379; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 8
380; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 8 [[DEST]], i8* nonnull align 8 [[SRC]], i32 16, i32 8)
381; CHECK-NEXT:    ret void
382;
383  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 1, i32 1)
384  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 2, i32 2)
385  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 4, i32 4)
386  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 8, i32 8)
387  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 16, i32 8)
388  ret void
389}
390
391define void @test_memcpy_loadstore_16(i8* %dest, i8* %src) {
392; CHECK-LABEL: @test_memcpy_loadstore_16(
393; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 16
394; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 16
395; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
396; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
397; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 16
398; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 16
399; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
400; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
401; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 16
402; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 16
403; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
404; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
405; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 16
406; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 16
407; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 16 [[DEST]], i8* nonnull align 16 [[SRC]], i32 16, i32 16)
408; CHECK-NEXT:    ret void
409;
410  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 1, i32 1)
411  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 2, i32 2)
412  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 4, i32 4)
413  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 8, i32 8)
414  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 16, i32 16)
415  ret void
416}
417
418define void @test_undefined(i8* %dest, i8* %src) {
419; CHECK-LABEL: @test_undefined(
420entry:
421  br i1 undef, label %ok, label %undefined
422undefined:
423; CHECK: undefined:
424; CHECK-NEXT:    store i1 true, i1* undef
425; CHECK-NEXT:    br label %ok
426  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 7, i32 4)
427  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 -8, i32 4)
428  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 7, i32 4)
429  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 -8, i32 4)
430  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 7, i32 4)
431  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 -8, i32 4)
432  br label %ok
433ok:
434  ret void
435}
436
437declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32) nounwind argmemonly
438