1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-- -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4define amdgpu_kernel void @zext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) {
5; GCN-LABEL: zext_shl64_to_32:
6; GCN:       ; %bb.0:
7; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
8; GCN-NEXT:    s_load_dword s0, s[0:1], 0xb
9; GCN-NEXT:    s_mov_b32 s7, 0xf000
10; GCN-NEXT:    s_mov_b32 s6, -1
11; GCN-NEXT:    v_mov_b32_e32 v1, 0
12; GCN-NEXT:    s_waitcnt lgkmcnt(0)
13; GCN-NEXT:    s_lshl_b32 s0, s0, 2
14; GCN-NEXT:    v_mov_b32_e32 v0, s0
15; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
16; GCN-NEXT:    s_endpgm
17  %and = and i32 %x, 1073741823
18  %ext = zext i32 %and to i64
19  %shl = shl i64 %ext, 2
20  store i64 %shl, i64 addrspace(1)* %out, align 4
21  ret void
22}
23
24define amdgpu_kernel void @sext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) {
25; GCN-LABEL: sext_shl64_to_32:
26; GCN:       ; %bb.0:
27; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
28; GCN-NEXT:    s_load_dword s0, s[0:1], 0xb
29; GCN-NEXT:    s_mov_b32 s7, 0xf000
30; GCN-NEXT:    s_mov_b32 s6, -1
31; GCN-NEXT:    v_mov_b32_e32 v1, 0
32; GCN-NEXT:    s_waitcnt lgkmcnt(0)
33; GCN-NEXT:    s_and_b32 s0, s0, 0x1fffffff
34; GCN-NEXT:    s_lshl_b32 s0, s0, 2
35; GCN-NEXT:    v_mov_b32_e32 v0, s0
36; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
37; GCN-NEXT:    s_endpgm
38  %and = and i32 %x, 536870911
39  %ext = sext i32 %and to i64
40  %shl = shl i64 %ext, 2
41  store i64 %shl, i64 addrspace(1)* %out, align 4
42  ret void
43}
44
45define amdgpu_kernel void @zext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) {
46; GCN-LABEL: zext_shl64_overflow:
47; GCN:       ; %bb.0:
48; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
49; GCN-NEXT:    s_load_dword s0, s[0:1], 0xb
50; GCN-NEXT:    s_mov_b32 s1, 0
51; GCN-NEXT:    s_mov_b32 s7, 0xf000
52; GCN-NEXT:    s_mov_b32 s6, -1
53; GCN-NEXT:    s_waitcnt lgkmcnt(0)
54; GCN-NEXT:    s_bitset0_b32 s0, 31
55; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
56; GCN-NEXT:    v_mov_b32_e32 v0, s0
57; GCN-NEXT:    v_mov_b32_e32 v1, s1
58; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
59; GCN-NEXT:    s_endpgm
60  %and = and i32 %x, 2147483647
61  %ext = zext i32 %and to i64
62  %shl = shl i64 %ext, 2
63  store i64 %shl, i64 addrspace(1)* %out, align 4
64  ret void
65}
66
67define amdgpu_kernel void @sext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) {
68; GCN-LABEL: sext_shl64_overflow:
69; GCN:       ; %bb.0:
70; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
71; GCN-NEXT:    s_load_dword s0, s[0:1], 0xb
72; GCN-NEXT:    s_mov_b32 s1, 0
73; GCN-NEXT:    s_mov_b32 s7, 0xf000
74; GCN-NEXT:    s_mov_b32 s6, -1
75; GCN-NEXT:    s_waitcnt lgkmcnt(0)
76; GCN-NEXT:    s_bitset0_b32 s0, 31
77; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
78; GCN-NEXT:    v_mov_b32_e32 v0, s0
79; GCN-NEXT:    v_mov_b32_e32 v1, s1
80; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
81; GCN-NEXT:    s_endpgm
82  %and = and i32 %x, 2147483647
83  %ext = sext i32 %and to i64
84  %shl = shl i64 %ext, 2
85  store i64 %shl, i64 addrspace(1)* %out, align 4
86  ret void
87}
88
89define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) {
90; GCN-LABEL: mulu24_shl64:
91; GCN:       ; %bb.0: ; %bb
92; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
93; GCN-NEXT:    v_and_b32_e32 v0, 6, v0
94; GCN-NEXT:    v_mul_u32_u24_e32 v0, 7, v0
95; GCN-NEXT:    s_mov_b32 s3, 0xf000
96; GCN-NEXT:    s_mov_b32 s2, 0
97; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
98; GCN-NEXT:    v_mov_b32_e32 v1, 0
99; GCN-NEXT:    s_waitcnt lgkmcnt(0)
100; GCN-NEXT:    buffer_store_dword v1, v[0:1], s[0:3], 0 addr64
101; GCN-NEXT:    s_endpgm
102bb:
103  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
104  %tmp1 = and i32 %tmp, 6
105  %mulconv = mul nuw nsw i32 %tmp1, 7
106  %tmp2 = zext i32 %mulconv to i64
107  %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp2
108  store i32 0, i32 addrspace(1)* %tmp3, align 4
109  ret void
110}
111
112define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 addrspace(1)* nocapture readonly %arg1) {
113; GCN-LABEL: muli24_shl64:
114; GCN:       ; %bb.0: ; %bb
115; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
116; GCN-NEXT:    v_mov_b32_e32 v2, 0
117; GCN-NEXT:    s_mov_b32 s3, 0xf000
118; GCN-NEXT:    s_mov_b32 s2, 0
119; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
120; GCN-NEXT:    s_waitcnt lgkmcnt(0)
121; GCN-NEXT:    s_mov_b64 s[0:1], s[6:7]
122; GCN-NEXT:    buffer_load_dword v1, v[1:2], s[0:3], 0 addr64
123; GCN-NEXT:    v_lshlrev_b32_e32 v3, 3, v0
124; GCN-NEXT:    s_mov_b64 s[6:7], s[2:3]
125; GCN-NEXT:    v_mov_b32_e32 v4, v2
126; GCN-NEXT:    s_waitcnt vmcnt(0)
127; GCN-NEXT:    v_or_b32_e32 v0, 0x800000, v1
128; GCN-NEXT:    v_mul_i32_i24_e32 v0, -7, v0
129; GCN-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
130; GCN-NEXT:    buffer_store_dwordx2 v[1:2], v[3:4], s[4:7], 0 addr64
131; GCN-NEXT:    s_endpgm
132bb:
133  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
134  %tmp2 = sext i32 %tmp to i64
135  %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp2
136  %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4
137  %tmp5 = or i32 %tmp4, -8388608
138  %tmp6 = mul nsw i32 %tmp5, -7
139  %tmp7 = zext i32 %tmp6 to i64
140  %tmp8 = shl nuw nsw i64 %tmp7, 3
141  %tmp9 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i64 %tmp2
142  store i64 %tmp8, i64 addrspace(1)* %tmp9, align 8
143  ret void
144}
145
146declare i32 @llvm.amdgcn.workitem.id.x()
147