1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -instcombine -S < %s | FileCheck %s
3
4;; ---- memset -----
5
6; Ensure 0-length memset is removed
7define void @test_memset_zero_length(i8* %dest) {
8; CHECK-LABEL: @test_memset_zero_length(
9; CHECK-NEXT:    ret void
10;
11  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 0, i32 1)
12  ret void
13}
14
15define void @test_memset_to_store(i8* %dest) {
16; CHECK-LABEL: @test_memset_to_store(
17; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 1
18; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
19; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 1
20; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
21; CHECK-NEXT:    store atomic i32 16843009, i32* [[TMP2]] unordered, align 1
22; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64*
23; CHECK-NEXT:    store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 1
24; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 16, i32 1)
25; CHECK-NEXT:    ret void
26;
27  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 1, i32 1)
28  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 2, i32 1)
29  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 4, i32 1)
30  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 8, i32 1)
31  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 16, i32 1)
32  ret void
33}
34
35define void @test_memset_to_store_2(i8* %dest) {
36; CHECK-LABEL: @test_memset_to_store_2(
37; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST:%.*]] to i16*
38; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 2
39; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
40; CHECK-NEXT:    store atomic i32 16843009, i32* [[TMP2]] unordered, align 2
41; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64*
42; CHECK-NEXT:    store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 2
43; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 [[DEST]], i8 1, i32 16, i32 2)
44; CHECK-NEXT:    ret void
45;
46  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 2, i32 2)
47  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 4, i32 2)
48  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 8, i32 2)
49  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 16, i32 2)
50  ret void
51}
52
53define void @test_memset_to_store_4(i8* %dest) {
54; CHECK-LABEL: @test_memset_to_store_4(
55; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST:%.*]] to i32*
56; CHECK-NEXT:    store atomic i32 16843009, i32* [[TMP1]] unordered, align 4
57; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST]] to i64*
58; CHECK-NEXT:    store atomic i64 72340172838076673, i64* [[TMP2]] unordered, align 4
59; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 [[DEST]], i8 1, i32 16, i32 4)
60; CHECK-NEXT:    ret void
61;
62  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 4, i32 4)
63  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 8, i32 4)
64  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 16, i32 4)
65  ret void
66}
67
68define void @test_memset_to_store_8(i8* %dest) {
69; CHECK-LABEL: @test_memset_to_store_8(
70; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST:%.*]] to i64*
71; CHECK-NEXT:    store atomic i64 72340172838076673, i64* [[TMP1]] unordered, align 8
72; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 [[DEST]], i8 1, i32 16, i32 8)
73; CHECK-NEXT:    ret void
74;
75  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 8, i32 8)
76  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 16, i32 8)
77  ret void
78}
79
80define void @test_memset_to_store_16(i8* %dest) {
81; CHECK-LABEL: @test_memset_to_store_16(
82; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 [[DEST:%.*]], i8 1, i32 16, i32 16)
83; CHECK-NEXT:    ret void
84;
85  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 16, i32 16)
86  ret void
87}
88
89declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture writeonly, i8, i32, i32) nounwind argmemonly
90
91
92;; =========================================
93;; ----- memmove ------
94
95
96@gconst = constant [32 x i8] c"0123456789012345678901234567890\00"
97; Check that a memmove from a global constant is converted into a memcpy
98define void @test_memmove_to_memcpy(i8* %dest) {
99; CHECK-LABEL: @test_memmove_to_memcpy(
100; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST:%.*]], i8* align 16 getelementptr inbounds ([32 x i8], [32 x i8]* @gconst, i64 0, i64 0), i32 32, i32 1)
101; CHECK-NEXT:    ret void
102;
103  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 getelementptr inbounds ([32 x i8], [32 x i8]* @gconst, i64 0, i64 0), i32 32, i32 1)
104  ret void
105}
106
107define void @test_memmove_zero_length(i8* %dest, i8* %src) {
108; CHECK-LABEL: @test_memmove_zero_length(
109; CHECK-NEXT:    ret void
110;
111  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 0, i32 1)
112  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 0, i32 2)
113  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 0, i32 4)
114  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 0, i32 8)
115  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 0, i32 16)
116  ret void
117}
118
119; memmove with src==dest is removed
120define void @test_memmove_removed(i8* %srcdest, i32 %sz) {
121; CHECK-LABEL: @test_memmove_removed(
122; CHECK-NEXT:    ret void
123;
124  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %srcdest, i8* align 1 %srcdest, i32 %sz, i32 1)
125  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %srcdest, i8* align 2 %srcdest, i32 %sz, i32 2)
126  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %srcdest, i8* align 4 %srcdest, i32 %sz, i32 4)
127  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %srcdest, i8* align 8 %srcdest, i32 %sz, i32 8)
128  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %srcdest, i8* align 16 %srcdest, i32 %sz, i32 16)
129  ret void
130}
131
132; memmove with a small constant length is converted to a load/store pair
133define void @test_memmove_loadstore(i8* %dest, i8* %src) {
134; CHECK-LABEL: @test_memmove_loadstore(
135; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 1
136; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 1
137; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
138; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
139; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 1
140; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 1
141; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
142; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
143; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 1
144; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 1
145; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
146; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
147; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 1
148; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 1
149; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 16, i32 1)
150; CHECK-NEXT:    ret void
151;
152  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 1, i32 1)
153  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 2, i32 1)
154  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 4, i32 1)
155  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 8, i32 1)
156  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 16, i32 1)
157  ret void
158}
159
160define void @test_memmove_loadstore_2(i8* %dest, i8* %src) {
161; CHECK-LABEL: @test_memmove_loadstore_2(
162; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[SRC:%.*]] to i16*
163; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST:%.*]] to i16*
164; CHECK-NEXT:    [[TMP3:%.*]] = load atomic i16, i16* [[TMP1]] unordered, align 2
165; CHECK-NEXT:    store atomic i16 [[TMP3]], i16* [[TMP2]] unordered, align 2
166; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[SRC]] to i32*
167; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[DEST]] to i32*
168; CHECK-NEXT:    [[TMP6:%.*]] = load atomic i32, i32* [[TMP4]] unordered, align 2
169; CHECK-NEXT:    store atomic i32 [[TMP6]], i32* [[TMP5]] unordered, align 2
170; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[SRC]] to i64*
171; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[DEST]] to i64*
172; CHECK-NEXT:    [[TMP9:%.*]] = load atomic i64, i64* [[TMP7]] unordered, align 2
173; CHECK-NEXT:    store atomic i64 [[TMP9]], i64* [[TMP8]] unordered, align 2
174; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 16, i32 2)
175; CHECK-NEXT:    ret void
176;
177  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 2, i32 2)
178  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 4, i32 2)
179  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 8, i32 2)
180  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 16, i32 2)
181  ret void
182}
183
184define void @test_memmove_loadstore_4(i8* %dest, i8* %src) {
185; CHECK-LABEL: @test_memmove_loadstore_4(
186; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[SRC:%.*]] to i32*
187; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST:%.*]] to i32*
188; CHECK-NEXT:    [[TMP3:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4
189; CHECK-NEXT:    store atomic i32 [[TMP3]], i32* [[TMP2]] unordered, align 4
190; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[SRC]] to i64*
191; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[DEST]] to i64*
192; CHECK-NEXT:    [[TMP6:%.*]] = load atomic i64, i64* [[TMP4]] unordered, align 4
193; CHECK-NEXT:    store atomic i64 [[TMP6]], i64* [[TMP5]] unordered, align 4
194; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 [[DEST]], i8* align 4 [[SRC]], i32 16, i32 4)
195; CHECK-NEXT:    ret void
196;
197  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 4, i32 4)
198  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 8, i32 4)
199  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 16, i32 4)
200  ret void
201}
202
203define void @test_memmove_loadstore_8(i8* %dest, i8* %src) {
204; CHECK-LABEL: @test_memmove_loadstore_8(
205; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[SRC:%.*]] to i64*
206; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST:%.*]] to i64*
207; CHECK-NEXT:    [[TMP3:%.*]] = load atomic i64, i64* [[TMP1]] unordered, align 8
208; CHECK-NEXT:    store atomic i64 [[TMP3]], i64* [[TMP2]] unordered, align 8
209; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 [[DEST]], i8* align 8 [[SRC]], i32 16, i32 8)
210; CHECK-NEXT:    ret void
211;
212  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 8, i32 8)
213  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 16, i32 8)
214  ret void
215}
216
217define void @test_memmove_loadstore_16(i8* %dest, i8* %src) {
218; CHECK-LABEL: @test_memmove_loadstore_16(
219; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 [[DEST:%.*]], i8* align 16 [[SRC:%.*]], i32 16, i32 16)
220; CHECK-NEXT:    ret void
221;
222  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 16, i32 16)
223  ret void
224}
225
226declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32) nounwind argmemonly
227
228;; =========================================
229;; ----- memcpy ------
230
231define void @test_memcpy_zero_length(i8* %dest, i8* %src) {
232; CHECK-LABEL: @test_memcpy_zero_length(
233; CHECK-NEXT:    ret void
234;
235  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 0, i32 1)
236  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 0, i32 2)
237  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 0, i32 4)
238  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 0, i32 8)
239  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 0, i32 16)
240  ret void
241}
242
243; memcpy with src==dest is removed
244define void @test_memcpy_removed(i8* %srcdest, i32 %sz) {
245; CHECK-LABEL: @test_memcpy_removed(
246; CHECK-NEXT:    ret void
247;
248  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %srcdest, i8* align 1 %srcdest, i32 %sz, i32 1)
249  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %srcdest, i8* align 2 %srcdest, i32 %sz, i32 2)
250  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %srcdest, i8* align 4 %srcdest, i32 %sz, i32 4)
251  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %srcdest, i8* align 8 %srcdest, i32 %sz, i32 8)
252  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %srcdest, i8* align 16 %srcdest, i32 %sz, i32 16)
253  ret void
254}
255
256; memcpy with a small constant length is converted to a load/store pair
257define void @test_memcpy_loadstore(i8* %dest, i8* %src) {
258; CHECK-LABEL: @test_memcpy_loadstore(
259; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 1
260; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 1
261; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
262; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
263; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 1
264; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 1
265; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
266; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
267; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 1
268; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 1
269; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
270; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
271; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 1
272; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 1
273; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 16, i32 1)
274; CHECK-NEXT:    ret void
275;
276  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 1, i32 1)
277  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 2, i32 1)
278  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 4, i32 1)
279  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 8, i32 1)
280  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 16, i32 1)
281  ret void
282}
283
284define void @test_memcpy_loadstore_2(i8* %dest, i8* %src) {
285; CHECK-LABEL: @test_memcpy_loadstore_2(
286; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[SRC:%.*]] to i16*
287; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST:%.*]] to i16*
288; CHECK-NEXT:    [[TMP3:%.*]] = load atomic i16, i16* [[TMP1]] unordered, align 2
289; CHECK-NEXT:    store atomic i16 [[TMP3]], i16* [[TMP2]] unordered, align 2
290; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[SRC]] to i32*
291; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[DEST]] to i32*
292; CHECK-NEXT:    [[TMP6:%.*]] = load atomic i32, i32* [[TMP4]] unordered, align 2
293; CHECK-NEXT:    store atomic i32 [[TMP6]], i32* [[TMP5]] unordered, align 2
294; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[SRC]] to i64*
295; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[DEST]] to i64*
296; CHECK-NEXT:    [[TMP9:%.*]] = load atomic i64, i64* [[TMP7]] unordered, align 2
297; CHECK-NEXT:    store atomic i64 [[TMP9]], i64* [[TMP8]] unordered, align 2
298; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 16, i32 2)
299; CHECK-NEXT:    ret void
300;
301  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 2, i32 2)
302  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 4, i32 2)
303  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 8, i32 2)
304  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 16, i32 2)
305  ret void
306}
307
308define void @test_memcpy_loadstore_4(i8* %dest, i8* %src) {
309; CHECK-LABEL: @test_memcpy_loadstore_4(
310; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[SRC:%.*]] to i32*
311; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST:%.*]] to i32*
312; CHECK-NEXT:    [[TMP3:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4
313; CHECK-NEXT:    store atomic i32 [[TMP3]], i32* [[TMP2]] unordered, align 4
314; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[SRC]] to i64*
315; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[DEST]] to i64*
316; CHECK-NEXT:    [[TMP6:%.*]] = load atomic i64, i64* [[TMP4]] unordered, align 4
317; CHECK-NEXT:    store atomic i64 [[TMP6]], i64* [[TMP5]] unordered, align 4
318; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 [[DEST]], i8* align 4 [[SRC]], i32 16, i32 4)
319; CHECK-NEXT:    ret void
320;
321  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 4, i32 4)
322  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 8, i32 4)
323  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 16, i32 4)
324  ret void
325}
326
327define void @test_memcpy_loadstore_8(i8* %dest, i8* %src) {
328; CHECK-LABEL: @test_memcpy_loadstore_8(
329; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[SRC:%.*]] to i64*
330; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST:%.*]] to i64*
331; CHECK-NEXT:    [[TMP3:%.*]] = load atomic i64, i64* [[TMP1]] unordered, align 8
332; CHECK-NEXT:    store atomic i64 [[TMP3]], i64* [[TMP2]] unordered, align 8
333; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 [[DEST]], i8* align 8 [[SRC]], i32 16, i32 8)
334; CHECK-NEXT:    ret void
335;
336  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 8, i32 8)
337  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 16, i32 8)
338  ret void
339}
340
341define void @test_memcpy_loadstore_16(i8* %dest, i8* %src) {
342; CHECK-LABEL: @test_memcpy_loadstore_16(
343; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 [[DEST:%.*]], i8* align 16 [[SRC:%.*]], i32 16, i32 16)
344; CHECK-NEXT:    ret void
345;
346  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 16, i32 16)
347  ret void
348}
349
350declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32) nounwind argmemonly
351