1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-- -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4define amdgpu_kernel void @zext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) { 5; GCN-LABEL: zext_shl64_to_32: 6; GCN: ; %bb.0: 7; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 8; GCN-NEXT: s_load_dword s0, s[0:1], 0xb 9; GCN-NEXT: s_mov_b32 s7, 0xf000 10; GCN-NEXT: s_mov_b32 s6, -1 11; GCN-NEXT: v_mov_b32_e32 v1, 0 12; GCN-NEXT: s_waitcnt lgkmcnt(0) 13; GCN-NEXT: s_lshl_b32 s0, s0, 2 14; GCN-NEXT: v_mov_b32_e32 v0, s0 15; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 16; GCN-NEXT: s_endpgm 17 %and = and i32 %x, 1073741823 18 %ext = zext i32 %and to i64 19 %shl = shl i64 %ext, 2 20 store i64 %shl, i64 addrspace(1)* %out, align 4 21 ret void 22} 23 24define amdgpu_kernel void @sext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) { 25; GCN-LABEL: sext_shl64_to_32: 26; GCN: ; %bb.0: 27; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 28; GCN-NEXT: s_load_dword s0, s[0:1], 0xb 29; GCN-NEXT: s_mov_b32 s7, 0xf000 30; GCN-NEXT: s_mov_b32 s6, -1 31; GCN-NEXT: v_mov_b32_e32 v1, 0 32; GCN-NEXT: s_waitcnt lgkmcnt(0) 33; GCN-NEXT: s_and_b32 s0, s0, 0x1fffffff 34; GCN-NEXT: s_lshl_b32 s0, s0, 2 35; GCN-NEXT: v_mov_b32_e32 v0, s0 36; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 37; GCN-NEXT: s_endpgm 38 %and = and i32 %x, 536870911 39 %ext = sext i32 %and to i64 40 %shl = shl i64 %ext, 2 41 store i64 %shl, i64 addrspace(1)* %out, align 4 42 ret void 43} 44 45define amdgpu_kernel void @zext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) { 46; GCN-LABEL: zext_shl64_overflow: 47; GCN: ; %bb.0: 48; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 49; GCN-NEXT: s_load_dword s0, s[0:1], 0xb 50; GCN-NEXT: s_mov_b32 s1, 0 51; GCN-NEXT: s_mov_b32 s7, 0xf000 52; GCN-NEXT: s_mov_b32 s6, -1 53; GCN-NEXT: s_waitcnt lgkmcnt(0) 54; GCN-NEXT: s_bitset0_b32 s0, 31 55; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 56; GCN-NEXT: v_mov_b32_e32 v0, s0 57; GCN-NEXT: v_mov_b32_e32 v1, s1 58; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 59; GCN-NEXT: s_endpgm 60 %and = and i32 %x, 2147483647 61 %ext = zext i32 %and to i64 62 %shl = shl i64 %ext, 2 63 store i64 %shl, i64 addrspace(1)* %out, align 4 64 ret void 65} 66 67define amdgpu_kernel void @sext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) { 68; GCN-LABEL: sext_shl64_overflow: 69; GCN: ; %bb.0: 70; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 71; GCN-NEXT: s_load_dword s0, s[0:1], 0xb 72; GCN-NEXT: s_mov_b32 s1, 0 73; GCN-NEXT: s_mov_b32 s7, 0xf000 74; GCN-NEXT: s_mov_b32 s6, -1 75; GCN-NEXT: s_waitcnt lgkmcnt(0) 76; GCN-NEXT: s_bitset0_b32 s0, 31 77; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 78; GCN-NEXT: v_mov_b32_e32 v0, s0 79; GCN-NEXT: v_mov_b32_e32 v1, s1 80; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 81; GCN-NEXT: s_endpgm 82 %and = and i32 %x, 2147483647 83 %ext = sext i32 %and to i64 84 %shl = shl i64 %ext, 2 85 store i64 %shl, i64 addrspace(1)* %out, align 4 86 ret void 87} 88 89define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) { 90; GCN-LABEL: mulu24_shl64: 91; GCN: ; %bb.0: ; %bb 92; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 93; GCN-NEXT: v_and_b32_e32 v0, 6, v0 94; GCN-NEXT: v_mul_u32_u24_e32 v0, 7, v0 95; GCN-NEXT: s_mov_b32 s3, 0xf000 96; GCN-NEXT: s_mov_b32 s2, 0 97; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 98; GCN-NEXT: v_mov_b32_e32 v1, 0 99; GCN-NEXT: s_waitcnt lgkmcnt(0) 100; GCN-NEXT: buffer_store_dword v1, v[0:1], s[0:3], 0 addr64 101; GCN-NEXT: s_endpgm 102bb: 103 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 104 %tmp1 = and i32 %tmp, 6 105 %mulconv = mul nuw nsw i32 %tmp1, 7 106 %tmp2 = zext i32 %mulconv to i64 107 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp2 108 store i32 0, i32 addrspace(1)* %tmp3, align 4 109 ret void 110} 111 112define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 addrspace(1)* nocapture readonly %arg1) { 113; GCN-LABEL: muli24_shl64: 114; GCN: ; %bb.0: ; %bb 115; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 116; GCN-NEXT: v_mov_b32_e32 v2, 0 117; GCN-NEXT: s_mov_b32 s3, 0xf000 118; GCN-NEXT: s_mov_b32 s2, 0 119; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 120; GCN-NEXT: s_waitcnt lgkmcnt(0) 121; GCN-NEXT: s_mov_b64 s[0:1], s[6:7] 122; GCN-NEXT: buffer_load_dword v1, v[1:2], s[0:3], 0 addr64 123; GCN-NEXT: v_lshlrev_b32_e32 v3, 3, v0 124; GCN-NEXT: s_mov_b64 s[6:7], s[2:3] 125; GCN-NEXT: v_mov_b32_e32 v4, v2 126; GCN-NEXT: s_waitcnt vmcnt(0) 127; GCN-NEXT: v_or_b32_e32 v0, 0x800000, v1 128; GCN-NEXT: v_mul_i32_i24_e32 v0, -7, v0 129; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 130; GCN-NEXT: buffer_store_dwordx2 v[1:2], v[3:4], s[4:7], 0 addr64 131; GCN-NEXT: s_endpgm 132bb: 133 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 134 %tmp2 = sext i32 %tmp to i64 135 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp2 136 %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4 137 %tmp5 = or i32 %tmp4, -8388608 138 %tmp6 = mul nsw i32 %tmp5, -7 139 %tmp7 = zext i32 %tmp6 to i64 140 %tmp8 = shl nuw nsw i64 %tmp7, 3 141 %tmp9 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i64 %tmp2 142 store i64 %tmp8, i64 addrspace(1)* %tmp9, align 8 143 ret void 144} 145 146declare i32 @llvm.amdgcn.workitem.id.x() 147