1; RUN: not --crash llc > /dev/null < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt
2; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s
3
4; Test that atomic loads are assembled properly.
5
6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
7target triple = "wasm32-unknown-unknown"
8
9;===----------------------------------------------------------------------------
10; Atomic loads: 32-bit
11;===----------------------------------------------------------------------------
12
13; Basic load.
14
15; CHECK-LABEL: load_i32_no_offset:
16; CHECK: i32.atomic.load $push0=, 0($0){{$}}
17; CHECK-NEXT: return $pop0{{$}}
18define i32 @load_i32_no_offset(i32 *%p) {
19  %v = load atomic i32, i32* %p seq_cst, align 4
20  ret i32 %v
21}
22
23; With an nuw add, we can fold an offset.
24
25; CHECK-LABEL: load_i32_with_folded_offset:
26; CHECK: i32.atomic.load $push0=, 24($0){{$}}
27define i32 @load_i32_with_folded_offset(i32* %p) {
28  %q = ptrtoint i32* %p to i32
29  %r = add nuw i32 %q, 24
30  %s = inttoptr i32 %r to i32*
31  %t = load atomic i32, i32* %s seq_cst, align 4
32  ret i32 %t
33}
34
35; With an inbounds gep, we can fold an offset.
36
37; CHECK-LABEL: load_i32_with_folded_gep_offset:
38; CHECK: i32.atomic.load $push0=, 24($0){{$}}
39define i32 @load_i32_with_folded_gep_offset(i32* %p) {
40  %s = getelementptr inbounds i32, i32* %p, i32 6
41  %t = load atomic i32, i32* %s seq_cst, align 4
42  ret i32 %t
43}
44
45; We can't fold a negative offset though, even with an inbounds gep.
46
47; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset:
48; CHECK: i32.const $push0=, -24{{$}}
49; CHECK: i32.add $push1=, $0, $pop0{{$}}
50; CHECK: i32.atomic.load $push2=, 0($pop1){{$}}
51define i32 @load_i32_with_unfolded_gep_negative_offset(i32* %p) {
52  %s = getelementptr inbounds i32, i32* %p, i32 -6
53  %t = load atomic i32, i32* %s seq_cst, align 4
54  ret i32 %t
55}
56
57; Without nuw, and even with nsw, we can't fold an offset.
58
59; CHECK-LABEL: load_i32_with_unfolded_offset:
60; CHECK: i32.const $push0=, 24{{$}}
61; CHECK: i32.add $push1=, $0, $pop0{{$}}
62; CHECK: i32.atomic.load $push2=, 0($pop1){{$}}
63define i32 @load_i32_with_unfolded_offset(i32* %p) {
64  %q = ptrtoint i32* %p to i32
65  %r = add nsw i32 %q, 24
66  %s = inttoptr i32 %r to i32*
67  %t = load atomic i32, i32* %s seq_cst, align 4
68  ret i32 %t
69}
70
71; Without inbounds, we can't fold a gep offset.
72
73; CHECK-LABEL: load_i32_with_unfolded_gep_offset:
74; CHECK: i32.const $push0=, 24{{$}}
75; CHECK: i32.add $push1=, $0, $pop0{{$}}
76; CHECK: i32.atomic.load $push2=, 0($pop1){{$}}
77define i32 @load_i32_with_unfolded_gep_offset(i32* %p) {
78  %s = getelementptr i32, i32* %p, i32 6
79  %t = load atomic i32, i32* %s seq_cst, align 4
80  ret i32 %t
81}
82
83; When loading from a fixed address, materialize a zero.
84
85; CHECK-LABEL: load_i32_from_numeric_address
86; CHECK: i32.const $push0=, 0{{$}}
87; CHECK: i32.atomic.load $push1=, 42($pop0){{$}}
88define i32 @load_i32_from_numeric_address() {
89  %s = inttoptr i32 42 to i32*
90  %t = load atomic i32, i32* %s seq_cst, align 4
91  ret i32 %t
92}
93
94; CHECK-LABEL: load_i32_from_global_address
95; CHECK: i32.const $push0=, 0{{$}}
96; CHECK: i32.atomic.load $push1=, gv($pop0){{$}}
97@gv = global i32 0
98define i32 @load_i32_from_global_address() {
99  %t = load atomic i32, i32* @gv seq_cst, align 4
100  ret i32 %t
101}
102
103;===----------------------------------------------------------------------------
104; Atomic loads: 64-bit
105;===----------------------------------------------------------------------------
106
107; Basic load.
108
109; CHECK-LABEL: load_i64_no_offset:
110; CHECK: i64.atomic.load $push0=, 0($0){{$}}
111; CHECK-NEXT: return $pop0{{$}}
112define i64 @load_i64_no_offset(i64 *%p) {
113  %v = load atomic i64, i64* %p seq_cst, align 8
114  ret i64 %v
115}
116
117; With an nuw add, we can fold an offset.
118
119; CHECK-LABEL: load_i64_with_folded_offset:
120; CHECK: i64.atomic.load $push0=, 24($0){{$}}
121define i64 @load_i64_with_folded_offset(i64* %p) {
122  %q = ptrtoint i64* %p to i32
123  %r = add nuw i32 %q, 24
124  %s = inttoptr i32 %r to i64*
125  %t = load atomic i64, i64* %s seq_cst, align 8
126  ret i64 %t
127}
128
129; With an inbounds gep, we can fold an offset.
130
131; CHECK-LABEL: load_i64_with_folded_gep_offset:
132; CHECK: i64.atomic.load $push0=, 24($0){{$}}
133define i64 @load_i64_with_folded_gep_offset(i64* %p) {
134  %s = getelementptr inbounds i64, i64* %p, i32 3
135  %t = load atomic i64, i64* %s seq_cst, align 8
136  ret i64 %t
137}
138
139; We can't fold a negative offset though, even with an inbounds gep.
140
141; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset:
142; CHECK: i32.const $push0=, -24{{$}}
143; CHECK: i32.add $push1=, $0, $pop0{{$}}
144; CHECK: i64.atomic.load $push2=, 0($pop1){{$}}
145define i64 @load_i64_with_unfolded_gep_negative_offset(i64* %p) {
146  %s = getelementptr inbounds i64, i64* %p, i32 -3
147  %t = load atomic i64, i64* %s seq_cst, align 8
148  ret i64 %t
149}
150
151; Without nuw, and even with nsw, we can't fold an offset.
152
153; CHECK-LABEL: load_i64_with_unfolded_offset:
154; CHECK: i32.const $push0=, 24{{$}}
155; CHECK: i32.add $push1=, $0, $pop0{{$}}
156; CHECK: i64.atomic.load $push2=, 0($pop1){{$}}
157define i64 @load_i64_with_unfolded_offset(i64* %p) {
158  %q = ptrtoint i64* %p to i32
159  %r = add nsw i32 %q, 24
160  %s = inttoptr i32 %r to i64*
161  %t = load atomic i64, i64* %s seq_cst, align 8
162  ret i64 %t
163}
164
165; Without inbounds, we can't fold a gep offset.
166
167; CHECK-LABEL: load_i64_with_unfolded_gep_offset:
168; CHECK: i32.const $push0=, 24{{$}}
169; CHECK: i32.add $push1=, $0, $pop0{{$}}
170; CHECK: i64.atomic.load $push2=, 0($pop1){{$}}
171define i64 @load_i64_with_unfolded_gep_offset(i64* %p) {
172  %s = getelementptr i64, i64* %p, i32 3
173  %t = load atomic i64, i64* %s seq_cst, align 8
174  ret i64 %t
175}
176
177;===----------------------------------------------------------------------------
178; Atomic stores: 32-bit
179;===----------------------------------------------------------------------------
180
181; Basic store.
182
183; CHECK-LABEL: store_i32_no_offset:
184; CHECK-NEXT: .functype store_i32_no_offset (i32, i32) -> (){{$}}
185; CHECK-NEXT: i32.atomic.store 0($0), $1{{$}}
186; CHECK-NEXT: return{{$}}
187define void @store_i32_no_offset(i32 *%p, i32 %v) {
188  store atomic i32 %v, i32* %p seq_cst, align 4
189  ret void
190}
191
192; With an nuw add, we can fold an offset.
193
194; CHECK-LABEL: store_i32_with_folded_offset:
195; CHECK: i32.atomic.store 24($0), $pop0{{$}}
196define void @store_i32_with_folded_offset(i32* %p) {
197  %q = ptrtoint i32* %p to i32
198  %r = add nuw i32 %q, 24
199  %s = inttoptr i32 %r to i32*
200  store atomic i32 0, i32* %s seq_cst, align 4
201  ret void
202}
203
204; With an inbounds gep, we can fold an offset.
205
206; CHECK-LABEL: store_i32_with_folded_gep_offset:
207; CHECK: i32.atomic.store 24($0), $pop0{{$}}
208define void @store_i32_with_folded_gep_offset(i32* %p) {
209  %s = getelementptr inbounds i32, i32* %p, i32 6
210  store atomic i32 0, i32* %s seq_cst, align 4
211  ret void
212}
213
214; We can't fold a negative offset though, even with an inbounds gep.
215
216; CHECK-LABEL: store_i32_with_unfolded_gep_negative_offset:
217; CHECK: i32.const $push0=, -24{{$}}
218; CHECK: i32.add $push1=, $0, $pop0{{$}}
219; CHECK: i32.atomic.store 0($pop1), $pop2{{$}}
220define void @store_i32_with_unfolded_gep_negative_offset(i32* %p) {
221  %s = getelementptr inbounds i32, i32* %p, i32 -6
222  store atomic i32 0, i32* %s seq_cst, align 4
223  ret void
224}
225
226; Without nuw, and even with nsw, we can't fold an offset.
227
228; CHECK-LABEL: store_i32_with_unfolded_offset:
229; CHECK: i32.const $push0=, 24{{$}}
230; CHECK: i32.add $push1=, $0, $pop0{{$}}
231; CHECK: i32.atomic.store 0($pop1), $pop2{{$}}
232define void @store_i32_with_unfolded_offset(i32* %p) {
233  %q = ptrtoint i32* %p to i32
234  %r = add nsw i32 %q, 24
235  %s = inttoptr i32 %r to i32*
236  store atomic i32 0, i32* %s seq_cst, align 4
237  ret void
238}
239
240; Without inbounds, we can't fold a gep offset.
241
242; CHECK-LABEL: store_i32_with_unfolded_gep_offset:
243; CHECK: i32.const $push0=, 24{{$}}
244; CHECK: i32.add $push1=, $0, $pop0{{$}}
245; CHECK: i32.atomic.store 0($pop1), $pop2{{$}}
246define void @store_i32_with_unfolded_gep_offset(i32* %p) {
247  %s = getelementptr i32, i32* %p, i32 6
248  store atomic i32 0, i32* %s seq_cst, align 4
249  ret void
250}
251
252; When storing from a fixed address, materialize a zero.
253
254; CHECK-LABEL: store_i32_to_numeric_address:
255; CHECK:      i32.const $push0=, 0{{$}}
256; CHECK-NEXT: i32.const $push1=, 0{{$}}
257; CHECK-NEXT: i32.atomic.store 42($pop0), $pop1{{$}}
258define void @store_i32_to_numeric_address() {
259  %s = inttoptr i32 42 to i32*
260  store atomic i32 0, i32* %s seq_cst, align 4
261  ret void
262}
263
264; CHECK-LABEL: store_i32_to_global_address:
265; CHECK: i32.const $push0=, 0{{$}}
266; CHECK: i32.const $push1=, 0{{$}}
267; CHECK: i32.atomic.store gv($pop0), $pop1{{$}}
268define void @store_i32_to_global_address() {
269  store atomic i32 0, i32* @gv seq_cst, align 4
270  ret void
271}
272
273;===----------------------------------------------------------------------------
274; Atomic stores: 64-bit
275;===----------------------------------------------------------------------------
276
277; Basic store.
278
279; CHECK-LABEL: store_i64_no_offset:
280; CHECK-NEXT: .functype store_i64_no_offset (i32, i64) -> (){{$}}
281; CHECK-NEXT: i64.atomic.store 0($0), $1{{$}}
282; CHECK-NEXT: return{{$}}
283define void @store_i64_no_offset(i64 *%p, i64 %v) {
284  store atomic i64 %v, i64* %p seq_cst, align 8
285  ret void
286}
287
288; With an nuw add, we can fold an offset.
289
290; CHECK-LABEL: store_i64_with_folded_offset:
291; CHECK: i64.atomic.store 24($0), $pop0{{$}}
292define void @store_i64_with_folded_offset(i64* %p) {
293  %q = ptrtoint i64* %p to i32
294  %r = add nuw i32 %q, 24
295  %s = inttoptr i32 %r to i64*
296  store atomic i64 0, i64* %s seq_cst, align 8
297  ret void
298}
299
300; With an inbounds gep, we can fold an offset.
301
302; CHECK-LABEL: store_i64_with_folded_gep_offset:
303; CHECK: i64.atomic.store 24($0), $pop0{{$}}
304define void @store_i64_with_folded_gep_offset(i64* %p) {
305  %s = getelementptr inbounds i64, i64* %p, i32 3
306  store atomic i64 0, i64* %s seq_cst, align 8
307  ret void
308}
309
310; We can't fold a negative offset though, even with an inbounds gep.
311
312; CHECK-LABEL: store_i64_with_unfolded_gep_negative_offset:
313; CHECK: i32.const $push0=, -24{{$}}
314; CHECK: i32.add $push1=, $0, $pop0{{$}}
315; CHECK: i64.atomic.store 0($pop1), $pop2{{$}}
316define void @store_i64_with_unfolded_gep_negative_offset(i64* %p) {
317  %s = getelementptr inbounds i64, i64* %p, i32 -3
318  store atomic i64 0, i64* %s seq_cst, align 8
319  ret void
320}
321
322; Without nuw, and even with nsw, we can't fold an offset.
323
324; CHECK-LABEL: store_i64_with_unfolded_offset:
325; CHECK: i32.const $push0=, 24{{$}}
326; CHECK: i32.add $push1=, $0, $pop0{{$}}
327; CHECK: i64.atomic.store 0($pop1), $pop2{{$}}
328define void @store_i64_with_unfolded_offset(i64* %p) {
329  %q = ptrtoint i64* %p to i32
330  %r = add nsw i32 %q, 24
331  %s = inttoptr i32 %r to i64*
332  store atomic i64 0, i64* %s seq_cst, align 8
333  ret void
334}
335
336; Without inbounds, we can't fold a gep offset.
337
338; CHECK-LABEL: store_i64_with_unfolded_gep_offset:
339; CHECK: i32.const $push0=, 24{{$}}
340; CHECK: i32.add $push1=, $0, $pop0{{$}}
341; CHECK: i64.atomic.store 0($pop1), $pop2{{$}}
342define void @store_i64_with_unfolded_gep_offset(i64* %p) {
343  %s = getelementptr i64, i64* %p, i32 3
344  store atomic i64 0, i64* %s seq_cst, align 8
345  ret void
346}
347
348;===----------------------------------------------------------------------------
349; Atomic sign-extending loads
350;===----------------------------------------------------------------------------
351
352; Fold an offset into a sign-extending load.
353
354; CHECK-LABEL: load_i8_i32_s_with_folded_offset:
355; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}}
356; CHECK-NEXT: i32.extend8_s $push1=, $pop0
357define i32 @load_i8_i32_s_with_folded_offset(i8* %p) {
358  %q = ptrtoint i8* %p to i32
359  %r = add nuw i32 %q, 24
360  %s = inttoptr i32 %r to i8*
361  %t = load atomic i8, i8* %s seq_cst, align 1
362  %u = sext i8 %t to i32
363  ret i32 %u
364}
365
366; 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s
367; CHECK-LABEL: load_i32_i64_s_with_folded_offset:
368; CHECK: i32.atomic.load $push0=, 24($0){{$}}
369; CHECK-NEXT: i64.extend_i32_s $push1=, $pop0{{$}}
370define i64 @load_i32_i64_s_with_folded_offset(i32* %p) {
371  %q = ptrtoint i32* %p to i32
372  %r = add nuw i32 %q, 24
373  %s = inttoptr i32 %r to i32*
374  %t = load atomic i32, i32* %s seq_cst, align 4
375  %u = sext i32 %t to i64
376  ret i64 %u
377}
378
379; Fold a gep offset into a sign-extending load.
380
381; CHECK-LABEL: load_i8_i32_s_with_folded_gep_offset:
382; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}}
383; CHECK-NEXT: i32.extend8_s $push1=, $pop0
384define i32 @load_i8_i32_s_with_folded_gep_offset(i8* %p) {
385  %s = getelementptr inbounds i8, i8* %p, i32 24
386  %t = load atomic i8, i8* %s seq_cst, align 1
387  %u = sext i8 %t to i32
388  ret i32 %u
389}
390
391; CHECK-LABEL: load_i16_i32_s_with_folded_gep_offset:
392; CHECK: i32.atomic.load16_u $push0=, 48($0){{$}}
393; CHECK-NEXT: i32.extend16_s $push1=, $pop0
394define i32 @load_i16_i32_s_with_folded_gep_offset(i16* %p) {
395  %s = getelementptr inbounds i16, i16* %p, i32 24
396  %t = load atomic i16, i16* %s seq_cst, align 2
397  %u = sext i16 %t to i32
398  ret i32 %u
399}
400
401; CHECK-LABEL: load_i16_i64_s_with_folded_gep_offset:
402; CHECK: i64.atomic.load16_u $push0=, 48($0){{$}}
403; CHECK-NEXT: i64.extend16_s $push1=, $pop0
404define i64 @load_i16_i64_s_with_folded_gep_offset(i16* %p) {
405  %s = getelementptr inbounds i16, i16* %p, i32 24
406  %t = load atomic i16, i16* %s seq_cst, align 2
407  %u = sext i16 %t to i64
408  ret i64 %u
409}
410
411; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
412; an 'add' if the or'ed bits are known to be zero.
413
414; CHECK-LABEL: load_i8_i32_s_with_folded_or_offset:
415; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}}
416; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
417define i32 @load_i8_i32_s_with_folded_or_offset(i32 %x) {
418  %and = and i32 %x, -4
419  %t0 = inttoptr i32 %and to i8*
420  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
421  %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1
422  %conv = sext i8 %t1 to i32
423  ret i32 %conv
424}
425
426; CHECK-LABEL: load_i8_i64_s_with_folded_or_offset:
427; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}}
428; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
429define i64 @load_i8_i64_s_with_folded_or_offset(i32 %x) {
430  %and = and i32 %x, -4
431  %t0 = inttoptr i32 %and to i8*
432  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
433  %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1
434  %conv = sext i8 %t1 to i64
435  ret i64 %conv
436}
437
438; When loading from a fixed address, materialize a zero.
439
440; CHECK-LABEL: load_i16_i32_s_from_numeric_address
441; CHECK: i32.const $push0=, 0{{$}}
442; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}}
443; CHECK-NEXT: i32.extend16_s $push2=, $pop1
444define i32 @load_i16_i32_s_from_numeric_address() {
445  %s = inttoptr i32 42 to i16*
446  %t = load atomic i16, i16* %s seq_cst, align 2
447  %u = sext i16 %t to i32
448  ret i32 %u
449}
450
451; CHECK-LABEL: load_i8_i32_s_from_global_address
452; CHECK: i32.const $push0=, 0{{$}}
453; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}}
454; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}}
455@gv8 = global i8 0
456define i32 @load_i8_i32_s_from_global_address() {
457  %t = load atomic i8, i8* @gv8 seq_cst, align 1
458  %u = sext i8 %t to i32
459  ret i32 %u
460}
461
462;===----------------------------------------------------------------------------
463; Atomic zero-extending loads
464;===----------------------------------------------------------------------------
465
466; Fold an offset into a zero-extending load.
467
468; CHECK-LABEL: load_i8_i32_z_with_folded_offset:
469; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}}
470define i32 @load_i8_i32_z_with_folded_offset(i8* %p) {
471  %q = ptrtoint i8* %p to i32
472  %r = add nuw i32 %q, 24
473  %s = inttoptr i32 %r to i8*
474  %t = load atomic i8, i8* %s seq_cst, align 1
475  %u = zext i8 %t to i32
476  ret i32 %u
477}
478
479; CHECK-LABEL: load_i32_i64_z_with_folded_offset:
480; CHECK: i64.atomic.load32_u $push0=, 24($0){{$}}
481define i64 @load_i32_i64_z_with_folded_offset(i32* %p) {
482  %q = ptrtoint i32* %p to i32
483  %r = add nuw i32 %q, 24
484  %s = inttoptr i32 %r to i32*
485  %t = load atomic i32, i32* %s seq_cst, align 4
486  %u = zext i32 %t to i64
487  ret i64 %u
488}
489
490; Fold a gep offset into a zero-extending load.
491
492; CHECK-LABEL: load_i8_i32_z_with_folded_gep_offset:
493; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}}
494define i32 @load_i8_i32_z_with_folded_gep_offset(i8* %p) {
495  %s = getelementptr inbounds i8, i8* %p, i32 24
496  %t = load atomic i8, i8* %s seq_cst, align 1
497  %u = zext i8 %t to i32
498  ret i32 %u
499}
500
501; CHECK-LABEL: load_i16_i32_z_with_folded_gep_offset:
502; CHECK: i32.atomic.load16_u $push0=, 48($0){{$}}
503define i32 @load_i16_i32_z_with_folded_gep_offset(i16* %p) {
504  %s = getelementptr inbounds i16, i16* %p, i32 24
505  %t = load atomic i16, i16* %s seq_cst, align 2
506  %u = zext i16 %t to i32
507  ret i32 %u
508}
509
510; CHECK-LABEL: load_i16_i64_z_with_folded_gep_offset:
511; CHECK: i64.atomic.load16_u $push0=, 48($0){{$}}
512define i64 @load_i16_i64_z_with_folded_gep_offset(i16* %p) {
513  %s = getelementptr inbounds i16, i16* %p, i64 24
514  %t = load atomic i16, i16* %s seq_cst, align 2
515  %u = zext i16 %t to i64
516  ret i64 %u
517}
518
519; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
520; an 'add' if the or'ed bits are known to be zero.
521
522; CHECK-LABEL: load_i8_i32_z_with_folded_or_offset:
523; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}}
524define i32 @load_i8_i32_z_with_folded_or_offset(i32 %x) {
525  %and = and i32 %x, -4
526  %t0 = inttoptr i32 %and to i8*
527  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
528  %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1
529  %conv = zext i8 %t1 to i32
530  ret i32 %conv
531}
532
533; CHECK-LABEL: load_i8_i64_z_with_folded_or_offset:
534; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}}
535define i64 @load_i8_i64_z_with_folded_or_offset(i32 %x) {
536  %and = and i32 %x, -4
537  %t0 = inttoptr i32 %and to i8*
538  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
539  %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1
540  %conv = zext i8 %t1 to i64
541  ret i64 %conv
542}
543
544; When loading from a fixed address, materialize a zero.
545
546; CHECK-LABEL: load_i16_i32_z_from_numeric_address
547; CHECK: i32.const $push0=, 0{{$}}
548; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}}
549define i32 @load_i16_i32_z_from_numeric_address() {
550  %s = inttoptr i32 42 to i16*
551  %t = load atomic i16, i16* %s seq_cst, align 2
552  %u = zext i16 %t to i32
553  ret i32 %u
554}
555
556; CHECK-LABEL: load_i8_i32_z_from_global_address
557; CHECK: i32.const $push0=, 0{{$}}
558; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}}
559define i32 @load_i8_i32_z_from_global_address() {
560  %t = load atomic i8, i8* @gv8 seq_cst, align 1
561  %u = zext i8 %t to i32
562  ret i32 %u
563}
564
565; i8 return value should test anyext loads
566
567; CHECK-LABEL: load_i8_i32_retvalue:
568; CHECK: i32.atomic.load8_u $push0=, 0($0){{$}}
569; CHECK-NEXT: return $pop0{{$}}
570define i8 @load_i8_i32_retvalue(i8 *%p) {
571  %v = load atomic i8, i8* %p seq_cst, align 1
572  ret i8 %v
573}
574
575;===----------------------------------------------------------------------------
576; Atomic truncating stores
577;===----------------------------------------------------------------------------
578
579; Fold an offset into a truncating store.
580
581; CHECK-LABEL: store_i8_i32_with_folded_offset:
582; CHECK: i32.atomic.store8 24($0), $1{{$}}
583define void @store_i8_i32_with_folded_offset(i8* %p, i32 %v) {
584  %q = ptrtoint i8* %p to i32
585  %r = add nuw i32 %q, 24
586  %s = inttoptr i32 %r to i8*
587  %t = trunc i32 %v to i8
588  store atomic i8 %t, i8* %s seq_cst, align 1
589  ret void
590}
591
592; CHECK-LABEL: store_i32_i64_with_folded_offset:
593; CHECK: i64.atomic.store32 24($0), $1{{$}}
594define void @store_i32_i64_with_folded_offset(i32* %p, i64 %v) {
595  %q = ptrtoint i32* %p to i32
596  %r = add nuw i32 %q, 24
597  %s = inttoptr i32 %r to i32*
598  %t = trunc i64 %v to i32
599  store atomic i32 %t, i32* %s seq_cst, align 4
600  ret void
601}
602
603; Fold a gep offset into a truncating store.
604
605; CHECK-LABEL: store_i8_i32_with_folded_gep_offset:
606; CHECK: i32.atomic.store8 24($0), $1{{$}}
607define void @store_i8_i32_with_folded_gep_offset(i8* %p, i32 %v) {
608  %s = getelementptr inbounds i8, i8* %p, i32 24
609  %t = trunc i32 %v to i8
610  store atomic i8 %t, i8* %s seq_cst, align 1
611  ret void
612}
613
614; CHECK-LABEL: store_i16_i32_with_folded_gep_offset:
615; CHECK: i32.atomic.store16 48($0), $1{{$}}
616define void @store_i16_i32_with_folded_gep_offset(i16* %p, i32 %v) {
617  %s = getelementptr inbounds i16, i16* %p, i32 24
618  %t = trunc i32 %v to i16
619  store atomic i16 %t, i16* %s seq_cst, align 2
620  ret void
621}
622
623; CHECK-LABEL: store_i16_i64_with_folded_gep_offset:
624; CHECK: i64.atomic.store16 48($0), $1{{$}}
625define void @store_i16_i64_with_folded_gep_offset(i16* %p, i64 %v) {
626  %s = getelementptr inbounds i16, i16* %p, i32 24
627  %t = trunc i64 %v to i16
628  store atomic i16 %t, i16* %s seq_cst, align 2
629  ret void
630}
631
632; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
633; an 'add' if the or'ed bits are known to be zero.
634
635; CHECK-LABEL: store_i8_i32_with_folded_or_offset:
636; CHECK: i32.atomic.store8 2($pop{{[0-9]+}}), $1{{$}}
637define void @store_i8_i32_with_folded_or_offset(i32 %x, i32 %v) {
638  %and = and i32 %x, -4
639  %p = inttoptr i32 %and to i8*
640  %arrayidx = getelementptr inbounds i8, i8* %p, i32 2
641  %t = trunc i32 %v to i8
642  store atomic i8 %t, i8* %arrayidx seq_cst, align 1
643  ret void
644}
645
646; CHECK-LABEL: store_i8_i64_with_folded_or_offset:
647; CHECK: i64.atomic.store8 2($pop{{[0-9]+}}), $1{{$}}
648define void @store_i8_i64_with_folded_or_offset(i32 %x, i64 %v) {
649  %and = and i32 %x, -4
650  %p = inttoptr i32 %and to i8*
651  %arrayidx = getelementptr inbounds i8, i8* %p, i32 2
652  %t = trunc i64 %v to i8
653  store atomic i8 %t, i8* %arrayidx seq_cst, align 1
654  ret void
655}
656
657;===----------------------------------------------------------------------------
658; Atomic binary read-modify-writes: 32-bit
659;===----------------------------------------------------------------------------
660
661; There are several RMW instructions, but here we only test 'add' as an example.
662
663; Basic RMW.
664
665; CHECK-LABEL: rmw_add_i32_no_offset:
666; CHECK-NEXT: .functype rmw_add_i32_no_offset (i32, i32) -> (i32){{$}}
667; CHECK: i32.atomic.rmw.add $push0=, 0($0), $1{{$}}
668; CHECK-NEXT: return $pop0{{$}}
669define i32 @rmw_add_i32_no_offset(i32* %p, i32 %v) {
670  %old = atomicrmw add i32* %p, i32 %v seq_cst
671  ret i32 %old
672}
673
674; With an nuw add, we can fold an offset.
675
676; CHECK-LABEL: rmw_add_i32_with_folded_offset:
677; CHECK: i32.atomic.rmw.add $push0=, 24($0), $1{{$}}
678define i32 @rmw_add_i32_with_folded_offset(i32* %p, i32 %v) {
679  %q = ptrtoint i32* %p to i32
680  %r = add nuw i32 %q, 24
681  %s = inttoptr i32 %r to i32*
682  %old = atomicrmw add i32* %s, i32 %v seq_cst
683  ret i32 %old
684}
685
686; With an inbounds gep, we can fold an offset.
687
688; CHECK-LABEL: rmw_add_i32_with_folded_gep_offset:
689; CHECK: i32.atomic.rmw.add $push0=, 24($0), $1{{$}}
690define i32 @rmw_add_i32_with_folded_gep_offset(i32* %p, i32 %v) {
691  %s = getelementptr inbounds i32, i32* %p, i32 6
692  %old = atomicrmw add i32* %s, i32 %v seq_cst
693  ret i32 %old
694}
695
696; We can't fold a negative offset though, even with an inbounds gep.
697
698; CHECK-LABEL: rmw_add_i32_with_unfolded_gep_negative_offset:
699; CHECK: i32.const $push0=, -24{{$}}
700; CHECK: i32.add $push1=, $0, $pop0{{$}}
701; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}}
702define i32 @rmw_add_i32_with_unfolded_gep_negative_offset(i32* %p, i32 %v) {
703  %s = getelementptr inbounds i32, i32* %p, i32 -6
704  %old = atomicrmw add i32* %s, i32 %v seq_cst
705  ret i32 %old
706}
707
708; Without nuw, and even with nsw, we can't fold an offset.
709
710; CHECK-LABEL: rmw_add_i32_with_unfolded_offset:
711; CHECK: i32.const $push0=, 24{{$}}
712; CHECK: i32.add $push1=, $0, $pop0{{$}}
713; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}}
714define i32 @rmw_add_i32_with_unfolded_offset(i32* %p, i32 %v) {
715  %q = ptrtoint i32* %p to i32
716  %r = add nsw i32 %q, 24
717  %s = inttoptr i32 %r to i32*
718  %old = atomicrmw add i32* %s, i32 %v seq_cst
719  ret i32 %old
720}
721
722; Without inbounds, we can't fold a gep offset.
723
724; CHECK-LABEL: rmw_add_i32_with_unfolded_gep_offset:
725; CHECK: i32.const $push0=, 24{{$}}
726; CHECK: i32.add $push1=, $0, $pop0{{$}}
727; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}}
728define i32 @rmw_add_i32_with_unfolded_gep_offset(i32* %p, i32 %v) {
729  %s = getelementptr i32, i32* %p, i32 6
730  %old = atomicrmw add i32* %s, i32 %v seq_cst
731  ret i32 %old
732}
733
734; When loading from a fixed address, materialize a zero.
735
736; CHECK-LABEL: rmw_add_i32_from_numeric_address
737; CHECK: i32.const $push0=, 0{{$}}
738; CHECK: i32.atomic.rmw.add $push1=, 42($pop0), $0{{$}}
739define i32 @rmw_add_i32_from_numeric_address(i32 %v) {
740  %s = inttoptr i32 42 to i32*
741  %old = atomicrmw add i32* %s, i32 %v seq_cst
742  ret i32 %old
743}
744
745; CHECK-LABEL: rmw_add_i32_from_global_address
746; CHECK: i32.const $push0=, 0{{$}}
747; CHECK: i32.atomic.rmw.add $push1=, gv($pop0), $0{{$}}
748define i32 @rmw_add_i32_from_global_address(i32 %v) {
749  %old = atomicrmw add i32* @gv, i32 %v seq_cst
750  ret i32 %old
751}
752
753;===----------------------------------------------------------------------------
754; Atomic binary read-modify-writes: 64-bit
755;===----------------------------------------------------------------------------
756
757; Basic RMW.
758
759; CHECK-LABEL: rmw_add_i64_no_offset:
760; CHECK-NEXT: .functype rmw_add_i64_no_offset (i32, i64) -> (i64){{$}}
761; CHECK: i64.atomic.rmw.add $push0=, 0($0), $1{{$}}
762; CHECK-NEXT: return $pop0{{$}}
763define i64 @rmw_add_i64_no_offset(i64* %p, i64 %v) {
764  %old = atomicrmw add i64* %p, i64 %v seq_cst
765  ret i64 %old
766}
767
768; With an nuw add, we can fold an offset.
769
770; CHECK-LABEL: rmw_add_i64_with_folded_offset:
771; CHECK: i64.atomic.rmw.add $push0=, 24($0), $1{{$}}
772define i64 @rmw_add_i64_with_folded_offset(i64* %p, i64 %v) {
773  %q = ptrtoint i64* %p to i32
774  %r = add nuw i32 %q, 24
775  %s = inttoptr i32 %r to i64*
776  %old = atomicrmw add i64* %s, i64 %v seq_cst
777  ret i64 %old
778}
779
780; With an inbounds gep, we can fold an offset.
781
782; CHECK-LABEL: rmw_add_i64_with_folded_gep_offset:
783; CHECK: i64.atomic.rmw.add $push0=, 24($0), $1{{$}}
784define i64 @rmw_add_i64_with_folded_gep_offset(i64* %p, i64 %v) {
785  %s = getelementptr inbounds i64, i64* %p, i32 3
786  %old = atomicrmw add i64* %s, i64 %v seq_cst
787  ret i64 %old
788}
789
790; We can't fold a negative offset though, even with an inbounds gep.
791
792; CHECK-LABEL: rmw_add_i64_with_unfolded_gep_negative_offset:
793; CHECK: i32.const $push0=, -24{{$}}
794; CHECK: i32.add $push1=, $0, $pop0{{$}}
795; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}}
796define i64 @rmw_add_i64_with_unfolded_gep_negative_offset(i64* %p, i64 %v) {
797  %s = getelementptr inbounds i64, i64* %p, i32 -3
798  %old = atomicrmw add i64* %s, i64 %v seq_cst
799  ret i64 %old
800}
801
802; Without nuw, and even with nsw, we can't fold an offset.
803
804; CHECK-LABEL: rmw_add_i64_with_unfolded_offset:
805; CHECK: i32.const $push0=, 24{{$}}
806; CHECK: i32.add $push1=, $0, $pop0{{$}}
807; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}}
808define i64 @rmw_add_i64_with_unfolded_offset(i64* %p, i64 %v) {
809  %q = ptrtoint i64* %p to i32
810  %r = add nsw i32 %q, 24
811  %s = inttoptr i32 %r to i64*
812  %old = atomicrmw add i64* %s, i64 %v seq_cst
813  ret i64 %old
814}
815
816; Without inbounds, we can't fold a gep offset.
817
818; CHECK-LABEL: rmw_add_i64_with_unfolded_gep_offset:
819; CHECK: i32.const $push0=, 24{{$}}
820; CHECK: i32.add $push1=, $0, $pop0{{$}}
821; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}}
822define i64 @rmw_add_i64_with_unfolded_gep_offset(i64* %p, i64 %v) {
823  %s = getelementptr i64, i64* %p, i32 3
824  %old = atomicrmw add i64* %s, i64 %v seq_cst
825  ret i64 %old
826}
827
828;===----------------------------------------------------------------------------
829; Atomic truncating & sign-extending binary RMWs
830;===----------------------------------------------------------------------------
831
832; Fold an offset into a sign-extending rmw.
833
834; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_offset:
835; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}}
836; CHECK-NEXT: i32.extend8_s $push1=, $pop0
837define i32 @rmw_add_i8_i32_s_with_folded_offset(i8* %p, i32 %v) {
838  %q = ptrtoint i8* %p to i32
839  %r = add nuw i32 %q, 24
840  %s = inttoptr i32 %r to i8*
841  %t = trunc i32 %v to i8
842  %old = atomicrmw add i8* %s, i8 %t seq_cst
843  %u = sext i8 %old to i32
844  ret i32 %u
845}
846
847; 32->64 sext rmw gets selected as i32.atomic.rmw.add, i64.extend_i32_s
848; CHECK-LABEL: rmw_add_i32_i64_s_with_folded_offset:
849; CHECK: i32.wrap_i64 $push0=, $1
850; CHECK-NEXT: i32.atomic.rmw.add $push1=, 24($0), $pop0{{$}}
851; CHECK-NEXT: i64.extend_i32_s $push2=, $pop1{{$}}
852define i64 @rmw_add_i32_i64_s_with_folded_offset(i32* %p, i64 %v) {
853  %q = ptrtoint i32* %p to i32
854  %r = add nuw i32 %q, 24
855  %s = inttoptr i32 %r to i32*
856  %t = trunc i64 %v to i32
857  %old = atomicrmw add i32* %s, i32 %t seq_cst
858  %u = sext i32 %old to i64
859  ret i64 %u
860}
861
862; Fold a gep offset into a sign-extending rmw.
863
864; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_gep_offset:
865; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}}
866; CHECK-NEXT: i32.extend8_s $push1=, $pop0
867define i32 @rmw_add_i8_i32_s_with_folded_gep_offset(i8* %p, i32 %v) {
868  %s = getelementptr inbounds i8, i8* %p, i32 24
869  %t = trunc i32 %v to i8
870  %old = atomicrmw add i8* %s, i8 %t seq_cst
871  %u = sext i8 %old to i32
872  ret i32 %u
873}
874
875; CHECK-LABEL: rmw_add_i16_i32_s_with_folded_gep_offset:
876; CHECK: i32.atomic.rmw16.add_u $push0=, 48($0), $1{{$}}
877; CHECK-NEXT: i32.extend16_s $push1=, $pop0
878define i32 @rmw_add_i16_i32_s_with_folded_gep_offset(i16* %p, i32 %v) {
879  %s = getelementptr inbounds i16, i16* %p, i32 24
880  %t = trunc i32 %v to i16
881  %old = atomicrmw add i16* %s, i16 %t seq_cst
882  %u = sext i16 %old to i32
883  ret i32 %u
884}
885
886; CHECK-LABEL: rmw_add_i16_i64_s_with_folded_gep_offset:
887; CHECK: i64.atomic.rmw16.add_u $push0=, 48($0), $1{{$}}
888; CHECK-NEXT: i64.extend16_s $push1=, $pop0
889define i64 @rmw_add_i16_i64_s_with_folded_gep_offset(i16* %p, i64 %v) {
890  %s = getelementptr inbounds i16, i16* %p, i32 24
891  %t = trunc i64 %v to i16
892  %old = atomicrmw add i16* %s, i16 %t seq_cst
893  %u = sext i16 %old to i64
894  ret i64 %u
895}
896
897; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
898; an 'add' if the or'ed bits are known to be zero.
899
900; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_or_offset:
901; CHECK: i32.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}}
902; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
903define i32 @rmw_add_i8_i32_s_with_folded_or_offset(i32 %x, i32 %v) {
904  %and = and i32 %x, -4
905  %t0 = inttoptr i32 %and to i8*
906  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
907  %t = trunc i32 %v to i8
908  %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst
909  %conv = sext i8 %old to i32
910  ret i32 %conv
911}
912
913; CHECK-LABEL: rmw_add_i8_i64_s_with_folded_or_offset:
914; CHECK: i64.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}}
915; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
916define i64 @rmw_add_i8_i64_s_with_folded_or_offset(i32 %x, i64 %v) {
917  %and = and i32 %x, -4
918  %t0 = inttoptr i32 %and to i8*
919  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
920  %t = trunc i64 %v to i8
921  %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst
922  %conv = sext i8 %old to i64
923  ret i64 %conv
924}
925
926; When loading from a fixed address, materialize a zero.
927
928; CHECK-LABEL: rmw_add_i16_i32_s_from_numeric_address
929; CHECK: i32.const $push0=, 0{{$}}
930; CHECK: i32.atomic.rmw16.add_u $push1=, 42($pop0), $0{{$}}
931; CHECK-NEXT: i32.extend16_s $push2=, $pop1
932define i32 @rmw_add_i16_i32_s_from_numeric_address(i32 %v) {
933  %s = inttoptr i32 42 to i16*
934  %t = trunc i32 %v to i16
935  %old = atomicrmw add i16* %s, i16 %t seq_cst
936  %u = sext i16 %old to i32
937  ret i32 %u
938}
939
940; CHECK-LABEL: rmw_add_i8_i32_s_from_global_address
941; CHECK: i32.const $push0=, 0{{$}}
942; CHECK: i32.atomic.rmw8.add_u $push1=, gv8($pop0), $0{{$}}
943; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}}
944define i32 @rmw_add_i8_i32_s_from_global_address(i32 %v) {
945  %t = trunc i32 %v to i8
946  %old = atomicrmw add i8* @gv8, i8 %t seq_cst
947  %u = sext i8 %old to i32
948  ret i32 %u
949}
950
951;===----------------------------------------------------------------------------
952; Atomic truncating & zero-extending binary RMWs
953;===----------------------------------------------------------------------------
954
955; Fold an offset into a zero-extending rmw.
956
957; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_offset:
958; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}}
959define i32 @rmw_add_i8_i32_z_with_folded_offset(i8* %p, i32 %v) {
960  %q = ptrtoint i8* %p to i32
961  %r = add nuw i32 %q, 24
962  %s = inttoptr i32 %r to i8*
963  %t = trunc i32 %v to i8
964  %old = atomicrmw add i8* %s, i8 %t seq_cst
965  %u = zext i8 %old to i32
966  ret i32 %u
967}
968
969; CHECK-LABEL: rmw_add_i32_i64_z_with_folded_offset:
970; CHECK: i64.atomic.rmw32.add_u $push0=, 24($0), $1{{$}}
971define i64 @rmw_add_i32_i64_z_with_folded_offset(i32* %p, i64 %v) {
972  %q = ptrtoint i32* %p to i32
973  %r = add nuw i32 %q, 24
974  %s = inttoptr i32 %r to i32*
975  %t = trunc i64 %v to i32
976  %old = atomicrmw add i32* %s, i32 %t seq_cst
977  %u = zext i32 %old to i64
978  ret i64 %u
979}
980
981; Fold a gep offset into a zero-extending rmw.
982
983; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_gep_offset:
984; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}}
985define i32 @rmw_add_i8_i32_z_with_folded_gep_offset(i8* %p, i32 %v) {
986  %s = getelementptr inbounds i8, i8* %p, i32 24
987  %t = trunc i32 %v to i8
988  %old = atomicrmw add i8* %s, i8 %t seq_cst
989  %u = zext i8 %old to i32
990  ret i32 %u
991}
992
993; CHECK-LABEL: rmw_add_i16_i32_z_with_folded_gep_offset:
994; CHECK: i32.atomic.rmw16.add_u $push0=, 48($0), $1{{$}}
995define i32 @rmw_add_i16_i32_z_with_folded_gep_offset(i16* %p, i32 %v) {
996  %s = getelementptr inbounds i16, i16* %p, i32 24
997  %t = trunc i32 %v to i16
998  %old = atomicrmw add i16* %s, i16 %t seq_cst
999  %u = zext i16 %old to i32
1000  ret i32 %u
1001}
1002
1003; CHECK-LABEL: rmw_add_i16_i64_z_with_folded_gep_offset:
1004; CHECK: i64.atomic.rmw16.add_u $push0=, 48($0), $1{{$}}
1005define i64 @rmw_add_i16_i64_z_with_folded_gep_offset(i16* %p, i64 %v) {
1006  %s = getelementptr inbounds i16, i16* %p, i32 24
1007  %t = trunc i64 %v to i16
1008  %old = atomicrmw add i16* %s, i16 %t seq_cst
1009  %u = zext i16 %old to i64
1010  ret i64 %u
1011}
1012
1013; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
1014; an 'add' if the or'ed bits are known to be zero.
1015
1016; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_or_offset:
1017; CHECK: i32.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}}
1018define i32 @rmw_add_i8_i32_z_with_folded_or_offset(i32 %x, i32 %v) {
1019  %and = and i32 %x, -4
1020  %t0 = inttoptr i32 %and to i8*
1021  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
1022  %t = trunc i32 %v to i8
1023  %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst
1024  %conv = zext i8 %old to i32
1025  ret i32 %conv
1026}
1027
1028; CHECK-LABEL: rmw_add_i8_i64_z_with_folded_or_offset:
1029; CHECK: i64.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}}
1030define i64 @rmw_add_i8_i64_z_with_folded_or_offset(i32 %x, i64 %v) {
1031  %and = and i32 %x, -4
1032  %t0 = inttoptr i32 %and to i8*
1033  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
1034  %t = trunc i64 %v to i8
1035  %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst
1036  %conv = zext i8 %old to i64
1037  ret i64 %conv
1038}
1039
1040; When loading from a fixed address, materialize a zero.
1041
1042; CHECK-LABEL: rmw_add_i16_i32_z_from_numeric_address
1043; CHECK: i32.const $push0=, 0{{$}}
1044; CHECK: i32.atomic.rmw16.add_u $push1=, 42($pop0), $0{{$}}
1045define i32 @rmw_add_i16_i32_z_from_numeric_address(i32 %v) {
1046  %s = inttoptr i32 42 to i16*
1047  %t = trunc i32 %v to i16
1048  %old = atomicrmw add i16* %s, i16 %t seq_cst
1049  %u = zext i16 %old to i32
1050  ret i32 %u
1051}
1052
1053; CHECK-LABEL: rmw_add_i8_i32_z_from_global_address
1054; CHECK: i32.const $push0=, 0{{$}}
1055; CHECK: i32.atomic.rmw8.add_u $push1=, gv8($pop0), $0{{$}}
1056define i32 @rmw_add_i8_i32_z_from_global_address(i32 %v) {
1057  %t = trunc i32 %v to i8
1058  %old = atomicrmw add i8* @gv8, i8 %t seq_cst
1059  %u = zext i8 %old to i32
1060  ret i32 %u
1061}
1062
1063; i8 return value should test anyext RMWs
1064
1065; CHECK-LABEL: rmw_add_i8_i32_retvalue:
1066; CHECK: i32.atomic.rmw8.add_u $push0=, 0($0), $1{{$}}
1067; CHECK-NEXT: return $pop0{{$}}
1068define i8 @rmw_add_i8_i32_retvalue(i8 *%p, i32 %v) {
1069  %t = trunc i32 %v to i8
1070  %old = atomicrmw add i8* %p, i8 %t seq_cst
1071  ret i8 %old
1072}
1073
1074;===----------------------------------------------------------------------------
1075; Atomic ternary read-modify-writes: 32-bit
1076;===----------------------------------------------------------------------------
1077
1078; Basic RMW.
1079
1080; CHECK-LABEL: cmpxchg_i32_no_offset:
1081; CHECK-NEXT: .functype cmpxchg_i32_no_offset (i32, i32, i32) -> (i32){{$}}
1082; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
1083; CHECK-NEXT: return $pop0{{$}}
1084define i32 @cmpxchg_i32_no_offset(i32* %p, i32 %exp, i32 %new) {
1085  %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst seq_cst
1086  %old = extractvalue { i32, i1 } %pair, 0
1087  ret i32 %old
1088}
1089
1090; With an nuw add, we can fold an offset.
1091
1092; CHECK-LABEL: cmpxchg_i32_with_folded_offset:
1093; CHECK: i32.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}}
1094define i32 @cmpxchg_i32_with_folded_offset(i32* %p, i32 %exp, i32 %new) {
1095  %q = ptrtoint i32* %p to i32
1096  %r = add nuw i32 %q, 24
1097  %s = inttoptr i32 %r to i32*
1098  %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst
1099  %old = extractvalue { i32, i1 } %pair, 0
1100  ret i32 %old
1101}
1102
1103; With an inbounds gep, we can fold an offset.
1104
1105; CHECK-LABEL: cmpxchg_i32_with_folded_gep_offset:
1106; CHECK: i32.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}}
1107define i32 @cmpxchg_i32_with_folded_gep_offset(i32* %p, i32 %exp, i32 %new) {
1108  %s = getelementptr inbounds i32, i32* %p, i32 6
1109  %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst
1110  %old = extractvalue { i32, i1 } %pair, 0
1111  ret i32 %old
1112}
1113
1114; We can't fold a negative offset though, even with an inbounds gep.
1115
1116; CHECK-LABEL: cmpxchg_i32_with_unfolded_gep_negative_offset:
1117; CHECK: i32.const $push0=, -24{{$}}
1118; CHECK: i32.add $push1=, $0, $pop0{{$}}
1119; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
1120define i32 @cmpxchg_i32_with_unfolded_gep_negative_offset(i32* %p, i32 %exp, i32 %new) {
1121  %s = getelementptr inbounds i32, i32* %p, i32 -6
1122  %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst
1123  %old = extractvalue { i32, i1 } %pair, 0
1124  ret i32 %old
1125}
1126
1127; Without nuw, and even with nsw, we can't fold an offset.
1128
1129; CHECK-LABEL: cmpxchg_i32_with_unfolded_offset:
1130; CHECK: i32.const $push0=, 24{{$}}
1131; CHECK: i32.add $push1=, $0, $pop0{{$}}
1132; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
1133define i32 @cmpxchg_i32_with_unfolded_offset(i32* %p, i32 %exp, i32 %new) {
1134  %q = ptrtoint i32* %p to i32
1135  %r = add nsw i32 %q, 24
1136  %s = inttoptr i32 %r to i32*
1137  %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst
1138  %old = extractvalue { i32, i1 } %pair, 0
1139  ret i32 %old
1140}
1141
1142; Without inbounds, we can't fold a gep offset.
1143
1144; CHECK-LABEL: cmpxchg_i32_with_unfolded_gep_offset:
1145; CHECK: i32.const $push0=, 24{{$}}
1146; CHECK: i32.add $push1=, $0, $pop0{{$}}
1147; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
1148define i32 @cmpxchg_i32_with_unfolded_gep_offset(i32* %p, i32 %exp, i32 %new) {
1149  %s = getelementptr i32, i32* %p, i32 6
1150  %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst
1151  %old = extractvalue { i32, i1 } %pair, 0
1152  ret i32 %old
1153}
1154
1155; When loading from a fixed address, materialize a zero.
1156
1157; CHECK-LABEL: cmpxchg_i32_from_numeric_address
1158; CHECK: i32.const $push0=, 0{{$}}
1159; CHECK: i32.atomic.rmw.cmpxchg $push1=, 42($pop0), $0, $1{{$}}
1160define i32 @cmpxchg_i32_from_numeric_address(i32 %exp, i32 %new) {
1161  %s = inttoptr i32 42 to i32*
1162  %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst
1163  %old = extractvalue { i32, i1 } %pair, 0
1164  ret i32 %old
1165}
1166
1167; CHECK-LABEL: cmpxchg_i32_from_global_address
1168; CHECK: i32.const $push0=, 0{{$}}
1169; CHECK: i32.atomic.rmw.cmpxchg $push1=, gv($pop0), $0, $1{{$}}
1170define i32 @cmpxchg_i32_from_global_address(i32 %exp, i32 %new) {
1171  %pair = cmpxchg i32* @gv, i32 %exp, i32 %new seq_cst seq_cst
1172  %old = extractvalue { i32, i1 } %pair, 0
1173  ret i32 %old
1174}
1175
1176;===----------------------------------------------------------------------------
1177; Atomic ternary read-modify-writes: 64-bit
1178;===----------------------------------------------------------------------------
1179
1180; Basic RMW.
1181
1182; CHECK-LABEL: cmpxchg_i64_no_offset:
1183; CHECK-NEXT: .functype cmpxchg_i64_no_offset (i32, i64, i64) -> (i64){{$}}
1184; CHECK: i64.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
1185; CHECK-NEXT: return $pop0{{$}}
1186define i64 @cmpxchg_i64_no_offset(i64* %p, i64 %exp, i64 %new) {
1187  %pair = cmpxchg i64* %p, i64 %exp, i64 %new seq_cst seq_cst
1188  %old = extractvalue { i64, i1 } %pair, 0
1189  ret i64 %old
1190}
1191
1192; With an nuw add, we can fold an offset.
1193
1194; CHECK-LABEL: cmpxchg_i64_with_folded_offset:
1195; CHECK: i64.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}}
1196define i64 @cmpxchg_i64_with_folded_offset(i64* %p, i64 %exp, i64 %new) {
1197  %q = ptrtoint i64* %p to i32
1198  %r = add nuw i32 %q, 24
1199  %s = inttoptr i32 %r to i64*
1200  %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst
1201  %old = extractvalue { i64, i1 } %pair, 0
1202  ret i64 %old
1203}
1204
1205; With an inbounds gep, we can fold an offset.
1206
1207; CHECK-LABEL: cmpxchg_i64_with_folded_gep_offset:
1208; CHECK: i64.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}}
1209define i64 @cmpxchg_i64_with_folded_gep_offset(i64* %p, i64 %exp, i64 %new) {
1210  %s = getelementptr inbounds i64, i64* %p, i32 3
1211  %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst
1212  %old = extractvalue { i64, i1 } %pair, 0
1213  ret i64 %old
1214}
1215
1216; We can't fold a negative offset though, even with an inbounds gep.
1217
1218; CHECK-LABEL: cmpxchg_i64_with_unfolded_gep_negative_offset:
1219; CHECK: i32.const $push0=, -24{{$}}
1220; CHECK: i32.add $push1=, $0, $pop0{{$}}
1221; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
1222define i64 @cmpxchg_i64_with_unfolded_gep_negative_offset(i64* %p, i64 %exp, i64 %new) {
1223  %s = getelementptr inbounds i64, i64* %p, i32 -3
1224  %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst
1225  %old = extractvalue { i64, i1 } %pair, 0
1226  ret i64 %old
1227}
1228
1229; Without nuw, and even with nsw, we can't fold an offset.
1230
1231; CHECK-LABEL: cmpxchg_i64_with_unfolded_offset:
1232; CHECK: i32.const $push0=, 24{{$}}
1233; CHECK: i32.add $push1=, $0, $pop0{{$}}
1234; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
1235define i64 @cmpxchg_i64_with_unfolded_offset(i64* %p, i64 %exp, i64 %new) {
1236  %q = ptrtoint i64* %p to i32
1237  %r = add nsw i32 %q, 24
1238  %s = inttoptr i32 %r to i64*
1239  %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst
1240  %old = extractvalue { i64, i1 } %pair, 0
1241  ret i64 %old
1242}
1243
1244; Without inbounds, we can't fold a gep offset.
1245
1246; CHECK-LABEL: cmpxchg_i64_with_unfolded_gep_offset:
1247; CHECK: i32.const $push0=, 24{{$}}
1248; CHECK: i32.add $push1=, $0, $pop0{{$}}
1249; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
1250define i64 @cmpxchg_i64_with_unfolded_gep_offset(i64* %p, i64 %exp, i64 %new) {
1251  %s = getelementptr i64, i64* %p, i32 3
1252  %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst
1253  %old = extractvalue { i64, i1 } %pair, 0
1254  ret i64 %old
1255}
1256
1257;===----------------------------------------------------------------------------
1258; Atomic truncating & sign-extending ternary RMWs
1259;===----------------------------------------------------------------------------
1260
1261; Fold an offset into a sign-extending rmw.
1262
1263; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_offset:
1264; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}}
1265; CHECK-NEXT: i32.extend8_s $push1=, $pop0
1266define i32 @cmpxchg_i8_i32_s_with_folded_offset(i8* %p, i32 %exp, i32 %new) {
1267  %q = ptrtoint i8* %p to i32
1268  %r = add nuw i32 %q, 24
1269  %s = inttoptr i32 %r to i8*
1270  %exp_t = trunc i32 %exp to i8
1271  %new_t = trunc i32 %new to i8
1272  %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst
1273  %old = extractvalue { i8, i1 } %pair, 0
1274  %u = sext i8 %old to i32
1275  ret i32 %u
1276}
1277
1278; 32->64 sext rmw gets selected as i32.atomic.rmw.cmpxchg, i64.extend_i32_s
1279; CHECK-LABEL: cmpxchg_i32_i64_s_with_folded_offset:
1280; CHECK: i32.wrap_i64 $push1=, $1
1281; CHECK-NEXT: i32.wrap_i64 $push0=, $2
1282; CHECK-NEXT: i32.atomic.rmw.cmpxchg $push2=, 24($0), $pop1, $pop0{{$}}
1283; CHECK-NEXT: i64.extend_i32_s $push3=, $pop2{{$}}
1284define i64 @cmpxchg_i32_i64_s_with_folded_offset(i32* %p, i64 %exp, i64 %new) {
1285  %q = ptrtoint i32* %p to i32
1286  %r = add nuw i32 %q, 24
1287  %s = inttoptr i32 %r to i32*
1288  %exp_t = trunc i64 %exp to i32
1289  %new_t = trunc i64 %new to i32
1290  %pair = cmpxchg i32* %s, i32 %exp_t, i32 %new_t seq_cst seq_cst
1291  %old = extractvalue { i32, i1 } %pair, 0
1292  %u = sext i32 %old to i64
1293  ret i64 %u
1294}
1295
1296; Fold a gep offset into a sign-extending rmw.
1297
1298; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_gep_offset:
1299; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}}
1300; CHECK-NEXT: i32.extend8_s $push1=, $pop0
1301define i32 @cmpxchg_i8_i32_s_with_folded_gep_offset(i8* %p, i32 %exp, i32 %new) {
1302  %s = getelementptr inbounds i8, i8* %p, i32 24
1303  %exp_t = trunc i32 %exp to i8
1304  %new_t = trunc i32 %new to i8
1305  %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst
1306  %old = extractvalue { i8, i1 } %pair, 0
1307  %u = sext i8 %old to i32
1308  ret i32 %u
1309}
1310
1311; CHECK-LABEL: cmpxchg_i16_i32_s_with_folded_gep_offset:
1312; CHECK: i32.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}}
1313; CHECK-NEXT: i32.extend16_s $push1=, $pop0
1314define i32 @cmpxchg_i16_i32_s_with_folded_gep_offset(i16* %p, i32 %exp, i32 %new) {
1315  %s = getelementptr inbounds i16, i16* %p, i32 24
1316  %exp_t = trunc i32 %exp to i16
1317  %new_t = trunc i32 %new to i16
1318  %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
1319  %old = extractvalue { i16, i1 } %pair, 0
1320  %u = sext i16 %old to i32
1321  ret i32 %u
1322}
1323
1324; CHECK-LABEL: cmpxchg_i16_i64_s_with_folded_gep_offset:
1325; CHECK: i64.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}}
1326; CHECK-NEXT: i64.extend16_s $push1=, $pop0
1327define i64 @cmpxchg_i16_i64_s_with_folded_gep_offset(i16* %p, i64 %exp, i64 %new) {
1328  %s = getelementptr inbounds i16, i16* %p, i32 24
1329  %exp_t = trunc i64 %exp to i16
1330  %new_t = trunc i64 %new to i16
1331  %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
1332  %old = extractvalue { i16, i1 } %pair, 0
1333  %u = sext i16 %old to i64
1334  ret i64 %u
1335}
1336
1337; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
1338; an 'add' if the or'ed bits are known to be zero.
1339
1340; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_or_offset:
1341; CHECK: i32.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}}
1342; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
1343define i32 @cmpxchg_i8_i32_s_with_folded_or_offset(i32 %x, i32 %exp, i32 %new) {
1344  %and = and i32 %x, -4
1345  %t0 = inttoptr i32 %and to i8*
1346  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
1347  %exp_t = trunc i32 %exp to i8
1348  %new_t = trunc i32 %new to i8
1349  %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst
1350  %old = extractvalue { i8, i1 } %pair, 0
1351  %conv = sext i8 %old to i32
1352  ret i32 %conv
1353}
1354
1355; CHECK-LABEL: cmpxchg_i8_i64_s_with_folded_or_offset:
1356; CHECK: i64.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}}
1357; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
1358define i64 @cmpxchg_i8_i64_s_with_folded_or_offset(i32 %x, i64 %exp, i64 %new) {
1359  %and = and i32 %x, -4
1360  %t0 = inttoptr i32 %and to i8*
1361  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
1362  %exp_t = trunc i64 %exp to i8
1363  %new_t = trunc i64 %new to i8
1364  %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst
1365  %old = extractvalue { i8, i1 } %pair, 0
1366  %conv = sext i8 %old to i64
1367  ret i64 %conv
1368}
1369
1370; When loading from a fixed address, materialize a zero.
1371
1372; CHECK-LABEL: cmpxchg_i16_i32_s_from_numeric_address
1373; CHECK: i32.const $push0=, 0{{$}}
1374; CHECK: i32.atomic.rmw16.cmpxchg_u $push1=, 42($pop0), $0, $1{{$}}
1375; CHECK-NEXT: i32.extend16_s $push2=, $pop1
1376define i32 @cmpxchg_i16_i32_s_from_numeric_address(i32 %exp, i32 %new) {
1377  %s = inttoptr i32 42 to i16*
1378  %exp_t = trunc i32 %exp to i16
1379  %new_t = trunc i32 %new to i16
1380  %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
1381  %old = extractvalue { i16, i1 } %pair, 0
1382  %u = sext i16 %old to i32
1383  ret i32 %u
1384}
1385
1386; CHECK-LABEL: cmpxchg_i8_i32_s_from_global_address
1387; CHECK: i32.const $push0=, 0{{$}}
1388; CHECK: i32.atomic.rmw8.cmpxchg_u $push1=, gv8($pop0), $0, $1{{$}}
1389; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}}
1390define i32 @cmpxchg_i8_i32_s_from_global_address(i32 %exp, i32 %new) {
1391  %exp_t = trunc i32 %exp to i8
1392  %new_t = trunc i32 %new to i8
1393  %pair = cmpxchg i8* @gv8, i8 %exp_t, i8 %new_t seq_cst seq_cst
1394  %old = extractvalue { i8, i1 } %pair, 0
1395  %u = sext i8 %old to i32
1396  ret i32 %u
1397}
1398
1399;===----------------------------------------------------------------------------
1400; Atomic truncating & zero-extending ternary RMWs
1401;===----------------------------------------------------------------------------
1402
1403; Fold an offset into a sign-extending rmw.
1404
1405; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_offset:
1406; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}}
1407define i32 @cmpxchg_i8_i32_z_with_folded_offset(i8* %p, i32 %exp, i32 %new) {
1408  %q = ptrtoint i8* %p to i32
1409  %r = add nuw i32 %q, 24
1410  %s = inttoptr i32 %r to i8*
1411  %exp_t = trunc i32 %exp to i8
1412  %new_t = trunc i32 %new to i8
1413  %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst
1414  %old = extractvalue { i8, i1 } %pair, 0
1415  %u = zext i8 %old to i32
1416  ret i32 %u
1417}
1418
1419; CHECK-LABEL: cmpxchg_i32_i64_z_with_folded_offset:
1420; CHECK: i64.atomic.rmw32.cmpxchg_u $push0=, 24($0), $1, $2{{$}}
1421define i64 @cmpxchg_i32_i64_z_with_folded_offset(i32* %p, i64 %exp, i64 %new) {
1422  %q = ptrtoint i32* %p to i32
1423  %r = add nuw i32 %q, 24
1424  %s = inttoptr i32 %r to i32*
1425  %exp_t = trunc i64 %exp to i32
1426  %new_t = trunc i64 %new to i32
1427  %pair = cmpxchg i32* %s, i32 %exp_t, i32 %new_t seq_cst seq_cst
1428  %old = extractvalue { i32, i1 } %pair, 0
1429  %u = zext i32 %old to i64
1430  ret i64 %u
1431}
1432
1433; Fold a gep offset into a sign-extending rmw.
1434
1435; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_gep_offset:
1436; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}}
1437define i32 @cmpxchg_i8_i32_z_with_folded_gep_offset(i8* %p, i32 %exp, i32 %new) {
1438  %s = getelementptr inbounds i8, i8* %p, i32 24
1439  %exp_t = trunc i32 %exp to i8
1440  %new_t = trunc i32 %new to i8
1441  %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst
1442  %old = extractvalue { i8, i1 } %pair, 0
1443  %u = zext i8 %old to i32
1444  ret i32 %u
1445}
1446
1447; CHECK-LABEL: cmpxchg_i16_i32_z_with_folded_gep_offset:
1448; CHECK: i32.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}}
1449define i32 @cmpxchg_i16_i32_z_with_folded_gep_offset(i16* %p, i32 %exp, i32 %new) {
1450  %s = getelementptr inbounds i16, i16* %p, i32 24
1451  %exp_t = trunc i32 %exp to i16
1452  %new_t = trunc i32 %new to i16
1453  %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
1454  %old = extractvalue { i16, i1 } %pair, 0
1455  %u = zext i16 %old to i32
1456  ret i32 %u
1457}
1458
1459; CHECK-LABEL: cmpxchg_i16_i64_z_with_folded_gep_offset:
1460; CHECK: i64.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}}
1461define i64 @cmpxchg_i16_i64_z_with_folded_gep_offset(i16* %p, i64 %exp, i64 %new) {
1462  %s = getelementptr inbounds i16, i16* %p, i32 24
1463  %exp_t = trunc i64 %exp to i16
1464  %new_t = trunc i64 %new to i16
1465  %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
1466  %old = extractvalue { i16, i1 } %pair, 0
1467  %u = zext i16 %old to i64
1468  ret i64 %u
1469}
1470
1471; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
1472; an 'add' if the or'ed bits are known to be zero.
1473
1474; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_or_offset:
1475; CHECK: i32.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}}
1476define i32 @cmpxchg_i8_i32_z_with_folded_or_offset(i32 %x, i32 %exp, i32 %new) {
1477  %and = and i32 %x, -4
1478  %t0 = inttoptr i32 %and to i8*
1479  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
1480  %exp_t = trunc i32 %exp to i8
1481  %new_t = trunc i32 %new to i8
1482  %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst
1483  %old = extractvalue { i8, i1 } %pair, 0
1484  %conv = zext i8 %old to i32
1485  ret i32 %conv
1486}
1487
1488; CHECK-LABEL: cmpxchg_i8_i64_z_with_folded_or_offset:
1489; CHECK: i64.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}}
1490define i64 @cmpxchg_i8_i64_z_with_folded_or_offset(i32 %x, i64 %exp, i64 %new) {
1491  %and = and i32 %x, -4
1492  %t0 = inttoptr i32 %and to i8*
1493  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
1494  %exp_t = trunc i64 %exp to i8
1495  %new_t = trunc i64 %new to i8
1496  %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst
1497  %old = extractvalue { i8, i1 } %pair, 0
1498  %conv = zext i8 %old to i64
1499  ret i64 %conv
1500}
1501
1502; When loading from a fixed address, materialize a zero.
1503
1504; CHECK-LABEL: cmpxchg_i16_i32_z_from_numeric_address
1505; CHECK: i32.const $push0=, 0{{$}}
1506; CHECK: i32.atomic.rmw16.cmpxchg_u $push1=, 42($pop0), $0, $1{{$}}
1507define i32 @cmpxchg_i16_i32_z_from_numeric_address(i32 %exp, i32 %new) {
1508  %s = inttoptr i32 42 to i16*
1509  %exp_t = trunc i32 %exp to i16
1510  %new_t = trunc i32 %new to i16
1511  %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
1512  %old = extractvalue { i16, i1 } %pair, 0
1513  %u = zext i16 %old to i32
1514  ret i32 %u
1515}
1516
1517; CHECK-LABEL: cmpxchg_i8_i32_z_from_global_address
1518; CHECK: i32.const $push0=, 0{{$}}
1519; CHECK: i32.atomic.rmw8.cmpxchg_u $push1=, gv8($pop0), $0, $1{{$}}
1520define i32 @cmpxchg_i8_i32_z_from_global_address(i32 %exp, i32 %new) {
1521  %exp_t = trunc i32 %exp to i8
1522  %new_t = trunc i32 %new to i8
1523  %pair = cmpxchg i8* @gv8, i8 %exp_t, i8 %new_t seq_cst seq_cst
1524  %old = extractvalue { i8, i1 } %pair, 0
1525  %u = zext i8 %old to i32
1526  ret i32 %u
1527}
1528
1529;===----------------------------------------------------------------------------
1530; Waits: 32-bit
1531;===----------------------------------------------------------------------------
1532
1533declare i32 @llvm.wasm.memory.atomic.wait32(i32*, i32, i64)
1534
1535; Basic wait.
1536
1537; CHECK-LABEL: wait32_no_offset:
1538; CHECK: memory.atomic.wait32 $push0=, 0($0), $1, $2{{$}}
1539; CHECK-NEXT: return $pop0{{$}}
1540define i32 @wait32_no_offset(i32* %p, i32 %exp, i64 %timeout) {
1541  %v = call i32 @llvm.wasm.memory.atomic.wait32(i32* %p, i32 %exp, i64 %timeout)
1542  ret i32 %v
1543}
1544
1545; With an nuw add, we can fold an offset.
1546
1547; CHECK-LABEL: wait32_with_folded_offset:
1548; CHECK: memory.atomic.wait32 $push0=, 24($0), $1, $2{{$}}
1549define i32 @wait32_with_folded_offset(i32* %p, i32 %exp, i64 %timeout) {
1550  %q = ptrtoint i32* %p to i32
1551  %r = add nuw i32 %q, 24
1552  %s = inttoptr i32 %r to i32*
1553  %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* %s, i32 %exp, i64 %timeout)
1554  ret i32 %t
1555}
1556
1557; With an inbounds gep, we can fold an offset.
1558
1559; CHECK-LABEL: wait32_with_folded_gep_offset:
1560; CHECK: memory.atomic.wait32 $push0=, 24($0), $1, $2{{$}}
1561define i32 @wait32_with_folded_gep_offset(i32* %p, i32 %exp, i64 %timeout) {
1562  %s = getelementptr inbounds i32, i32* %p, i32 6
1563  %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* %s, i32 %exp, i64 %timeout)
1564  ret i32 %t
1565}
1566
1567; We can't fold a negative offset though, even with an inbounds gep.
1568
1569; CHECK-LABEL: wait32_with_unfolded_gep_negative_offset:
1570; CHECK: i32.const $push0=, -24{{$}}
1571; CHECK: i32.add $push1=, $0, $pop0{{$}}
1572; CHECK: memory.atomic.wait32 $push2=, 0($pop1), $1, $2{{$}}
1573define i32 @wait32_with_unfolded_gep_negative_offset(i32* %p, i32 %exp, i64 %timeout) {
1574  %s = getelementptr inbounds i32, i32* %p, i32 -6
1575  %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* %s, i32 %exp, i64 %timeout)
1576  ret i32 %t
1577}
1578
1579; Without nuw, and even with nsw, we can't fold an offset.
1580
1581; CHECK-LABEL: wait32_with_unfolded_offset:
1582; CHECK: i32.const $push0=, 24{{$}}
1583; CHECK: i32.add $push1=, $0, $pop0{{$}}
1584; CHECK: memory.atomic.wait32 $push2=, 0($pop1), $1, $2{{$}}
1585define i32 @wait32_with_unfolded_offset(i32* %p, i32 %exp, i64 %timeout) {
1586  %q = ptrtoint i32* %p to i32
1587  %r = add nsw i32 %q, 24
1588  %s = inttoptr i32 %r to i32*
1589  %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* %s, i32 %exp, i64 %timeout)
1590  ret i32 %t
1591}
1592
1593; Without inbounds, we can't fold a gep offset.
1594
1595; CHECK-LABEL: wait32_with_unfolded_gep_offset:
1596; CHECK: i32.const $push0=, 24{{$}}
1597; CHECK: i32.add $push1=, $0, $pop0{{$}}
1598; CHECK: memory.atomic.wait32 $push2=, 0($pop1), $1, $2{{$}}
1599define i32 @wait32_with_unfolded_gep_offset(i32* %p, i32 %exp, i64 %timeout) {
1600  %s = getelementptr i32, i32* %p, i32 6
1601  %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* %s, i32 %exp, i64 %timeout)
1602  ret i32 %t
1603}
1604
1605; When waiting from a fixed address, materialize a zero.
1606
1607; CHECK-LABEL: wait32_from_numeric_address
1608; CHECK: i32.const $push0=, 0{{$}}
1609; CHECK: memory.atomic.wait32 $push1=, 42($pop0), $0, $1{{$}}
1610define i32 @wait32_from_numeric_address(i32 %exp, i64 %timeout) {
1611  %s = inttoptr i32 42 to i32*
1612  %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* %s, i32 %exp, i64 %timeout)
1613  ret i32 %t
1614}
1615
1616; CHECK-LABEL: wait32_from_global_address
1617; CHECK: i32.const $push0=, 0{{$}}
1618; CHECK: memory.atomic.wait32 $push1=, gv($pop0), $0, $1{{$}}
1619define i32 @wait32_from_global_address(i32 %exp, i64 %timeout) {
1620  %t = call i32 @llvm.wasm.memory.atomic.wait32(i32* @gv, i32 %exp, i64 %timeout)
1621  ret i32 %t
1622}
1623
1624;===----------------------------------------------------------------------------
1625; Waits: 64-bit
1626;===----------------------------------------------------------------------------
1627
1628declare i32 @llvm.wasm.memory.atomic.wait64(i64*, i64, i64)
1629
1630; Basic wait.
1631
1632; CHECK-LABEL: wait64_no_offset:
1633; CHECK: memory.atomic.wait64 $push0=, 0($0), $1, $2{{$}}
1634; CHECK-NEXT: return $pop0{{$}}
1635define i32 @wait64_no_offset(i64* %p, i64 %exp, i64 %timeout) {
1636  %v = call i32 @llvm.wasm.memory.atomic.wait64(i64* %p, i64 %exp, i64 %timeout)
1637  ret i32 %v
1638}
1639
1640; With an nuw add, we can fold an offset.
1641
1642; CHECK-LABEL: wait64_with_folded_offset:
1643; CHECK: memory.atomic.wait64 $push0=, 24($0), $1, $2{{$}}
1644define i32 @wait64_with_folded_offset(i64* %p, i64 %exp, i64 %timeout) {
1645  %q = ptrtoint i64* %p to i32
1646  %r = add nuw i32 %q, 24
1647  %s = inttoptr i32 %r to i64*
1648  %t = call i32 @llvm.wasm.memory.atomic.wait64(i64* %s, i64 %exp, i64 %timeout)
1649  ret i32 %t
1650}
1651
1652; With an inbounds gep, we can fold an offset.
1653
1654; CHECK-LABEL: wait64_with_folded_gep_offset:
1655; CHECK: memory.atomic.wait64 $push0=, 24($0), $1, $2{{$}}
1656define i32 @wait64_with_folded_gep_offset(i64* %p, i64 %exp, i64 %timeout) {
1657  %s = getelementptr inbounds i64, i64* %p, i32 3
1658  %t = call i32 @llvm.wasm.memory.atomic.wait64(i64* %s, i64 %exp, i64 %timeout)
1659  ret i32 %t
1660}
1661
1662; We can't fold a negative offset though, even with an inbounds gep.
1663
1664; CHECK-LABEL: wait64_with_unfolded_gep_negative_offset:
1665; CHECK: i32.const $push0=, -24{{$}}
1666; CHECK: i32.add $push1=, $0, $pop0{{$}}
1667; CHECK: memory.atomic.wait64 $push2=, 0($pop1), $1, $2{{$}}
1668define i32 @wait64_with_unfolded_gep_negative_offset(i64* %p, i64 %exp, i64 %timeout) {
1669  %s = getelementptr inbounds i64, i64* %p, i32 -3
1670  %t = call i32 @llvm.wasm.memory.atomic.wait64(i64* %s, i64 %exp, i64 %timeout)
1671  ret i32 %t
1672}
1673
1674; Without nuw, and even with nsw, we can't fold an offset.
1675
1676; CHECK-LABEL: wait64_with_unfolded_offset:
1677; CHECK: i32.const $push0=, 24{{$}}
1678; CHECK: i32.add $push1=, $0, $pop0{{$}}
1679; CHECK: memory.atomic.wait64 $push2=, 0($pop1), $1, $2{{$}}
1680define i32 @wait64_with_unfolded_offset(i64* %p, i64 %exp, i64 %timeout) {
1681  %q = ptrtoint i64* %p to i32
1682  %r = add nsw i32 %q, 24
1683  %s = inttoptr i32 %r to i64*
1684  %t = call i32 @llvm.wasm.memory.atomic.wait64(i64* %s, i64 %exp, i64 %timeout)
1685  ret i32 %t
1686}
1687
1688; Without inbounds, we can't fold a gep offset.
1689
1690; CHECK-LABEL: wait64_with_unfolded_gep_offset:
1691; CHECK: i32.const $push0=, 24{{$}}
1692; CHECK: i32.add $push1=, $0, $pop0{{$}}
1693; CHECK: memory.atomic.wait64 $push2=, 0($pop1), $1, $2{{$}}
1694define i32 @wait64_with_unfolded_gep_offset(i64* %p, i64 %exp, i64 %timeout) {
1695  %s = getelementptr i64, i64* %p, i32 3
1696  %t = call i32 @llvm.wasm.memory.atomic.wait64(i64* %s, i64 %exp, i64 %timeout)
1697  ret i32 %t
1698}
1699
1700;===----------------------------------------------------------------------------
1701; Notifies
1702;===----------------------------------------------------------------------------
1703
1704declare i32 @llvm.wasm.memory.atomic.notify(i32*, i32)
1705
1706; Basic notify.
1707
1708; CHECK-LABEL: notify_no_offset:
1709; CHECK: memory.atomic.notify $push0=, 0($0), $1{{$}}
1710; CHECK-NEXT: return $pop0{{$}}
1711define i32 @notify_no_offset(i32* %p, i32 %notify_count) {
1712  %v = call i32 @llvm.wasm.memory.atomic.notify(i32* %p, i32 %notify_count)
1713  ret i32 %v
1714}
1715
1716; With an nuw add, we can fold an offset.
1717
1718; CHECK-LABEL: notify_with_folded_offset:
1719; CHECK: memory.atomic.notify $push0=, 24($0), $1{{$}}
1720define i32 @notify_with_folded_offset(i32* %p, i32 %notify_count) {
1721  %q = ptrtoint i32* %p to i32
1722  %r = add nuw i32 %q, 24
1723  %s = inttoptr i32 %r to i32*
1724  %t = call i32 @llvm.wasm.memory.atomic.notify(i32* %s, i32 %notify_count)
1725  ret i32 %t
1726}
1727
1728; With an inbounds gep, we can fold an offset.
1729
1730; CHECK-LABEL: notify_with_folded_gep_offset:
1731; CHECK: memory.atomic.notify $push0=, 24($0), $1{{$}}
1732define i32 @notify_with_folded_gep_offset(i32* %p, i32 %notify_count) {
1733  %s = getelementptr inbounds i32, i32* %p, i32 6
1734  %t = call i32 @llvm.wasm.memory.atomic.notify(i32* %s, i32 %notify_count)
1735  ret i32 %t
1736}
1737
1738; We can't fold a negative offset though, even with an inbounds gep.
1739
1740; CHECK-LABEL: notify_with_unfolded_gep_negative_offset:
1741; CHECK: i32.const $push0=, -24{{$}}
1742; CHECK: i32.add $push1=, $0, $pop0{{$}}
1743; CHECK: memory.atomic.notify $push2=, 0($pop1), $1{{$}}
1744define i32 @notify_with_unfolded_gep_negative_offset(i32* %p, i32 %notify_count) {
1745  %s = getelementptr inbounds i32, i32* %p, i32 -6
1746  %t = call i32 @llvm.wasm.memory.atomic.notify(i32* %s, i32 %notify_count)
1747  ret i32 %t
1748}
1749
1750; Without nuw, and even with nsw, we can't fold an offset.
1751
1752; CHECK-LABEL: notify_with_unfolded_offset:
1753; CHECK: i32.const $push0=, 24{{$}}
1754; CHECK: i32.add $push1=, $0, $pop0{{$}}
1755; CHECK: memory.atomic.notify $push2=, 0($pop1), $1{{$}}
1756define i32 @notify_with_unfolded_offset(i32* %p, i32 %notify_count) {
1757  %q = ptrtoint i32* %p to i32
1758  %r = add nsw i32 %q, 24
1759  %s = inttoptr i32 %r to i32*
1760  %t = call i32 @llvm.wasm.memory.atomic.notify(i32* %s, i32 %notify_count)
1761  ret i32 %t
1762}
1763
1764; Without inbounds, we can't fold a gep offset.
1765
1766; CHECK-LABEL: notify_with_unfolded_gep_offset:
1767; CHECK: i32.const $push0=, 24{{$}}
1768; CHECK: i32.add $push1=, $0, $pop0{{$}}
1769; CHECK: memory.atomic.notify $push2=, 0($pop1), $1{{$}}
1770define i32 @notify_with_unfolded_gep_offset(i32* %p, i32 %notify_count) {
1771  %s = getelementptr i32, i32* %p, i32 6
1772  %t = call i32 @llvm.wasm.memory.atomic.notify(i32* %s, i32 %notify_count)
1773  ret i32 %t
1774}
1775
1776; When notifying from a fixed address, materialize a zero.
1777
1778; CHECK-LABEL: notify_from_numeric_address
1779; CHECK: i32.const $push0=, 0{{$}}
1780; CHECK: memory.atomic.notify $push1=, 42($pop0), $0{{$}}
1781define i32 @notify_from_numeric_address(i32 %notify_count) {
1782  %s = inttoptr i32 42 to i32*
1783  %t = call i32 @llvm.wasm.memory.atomic.notify(i32* %s, i32 %notify_count)
1784  ret i32 %t
1785}
1786
1787; CHECK-LABEL: notify_from_global_address
1788; CHECK: i32.const $push0=, 0{{$}}
1789; CHECK: memory.atomic.notify $push1=, gv($pop0), $0{{$}}
1790define i32 @notify_from_global_address(i32 %notify_count) {
1791  %t = call i32 @llvm.wasm.memory.atomic.notify(i32* @gv, i32 %notify_count)
1792  ret i32 %t
1793}
1794