1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s 4 5; FIXME: 6; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s 7 8define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 9; GFX9-LABEL: store_lds_v3i32: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 12; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 13; GFX9-NEXT: s_waitcnt lgkmcnt(0) 14; GFX9-NEXT: v_mov_b32_e32 v3, s4 15; GFX9-NEXT: v_mov_b32_e32 v0, s0 16; GFX9-NEXT: v_mov_b32_e32 v1, s1 17; GFX9-NEXT: v_mov_b32_e32 v2, s2 18; GFX9-NEXT: ds_write_b96 v3, v[0:2] 19; GFX9-NEXT: s_endpgm 20; 21; GFX7-LABEL: store_lds_v3i32: 22; GFX7: ; %bb.0: 23; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 24; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 25; GFX7-NEXT: s_mov_b32 m0, -1 26; GFX7-NEXT: s_waitcnt lgkmcnt(0) 27; GFX7-NEXT: v_mov_b32_e32 v3, s4 28; GFX7-NEXT: v_mov_b32_e32 v0, s0 29; GFX7-NEXT: v_mov_b32_e32 v1, s1 30; GFX7-NEXT: v_mov_b32_e32 v2, s2 31; GFX7-NEXT: ds_write_b96 v3, v[0:2] 32; GFX7-NEXT: s_endpgm 33 store <3 x i32> %x, <3 x i32> addrspace(3)* %out 34 ret void 35} 36 37define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 38; GFX9-LABEL: store_lds_v3i32_align1: 39; GFX9: ; %bb.0: 40; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 41; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 42; GFX9-NEXT: s_waitcnt lgkmcnt(0) 43; GFX9-NEXT: v_mov_b32_e32 v1, s4 44; GFX9-NEXT: v_mov_b32_e32 v0, s0 45; GFX9-NEXT: s_lshr_b32 s3, s0, 8 46; GFX9-NEXT: ds_write_b8 v1, v0 47; GFX9-NEXT: v_mov_b32_e32 v0, s3 48; GFX9-NEXT: s_lshr_b32 s5, s0, 16 49; GFX9-NEXT: s_lshr_b32 s6, s0, 24 50; GFX9-NEXT: ds_write_b8 v1, v0 offset:1 51; GFX9-NEXT: v_mov_b32_e32 v0, s5 52; GFX9-NEXT: ds_write_b8 v1, v0 offset:2 53; GFX9-NEXT: v_mov_b32_e32 v0, s6 54; GFX9-NEXT: ds_write_b8 v1, v0 offset:3 55; GFX9-NEXT: v_mov_b32_e32 v0, s1 56; GFX9-NEXT: s_lshr_b32 s0, s1, 8 57; GFX9-NEXT: ds_write_b8 v1, v0 offset:4 58; GFX9-NEXT: v_mov_b32_e32 v0, s0 59; GFX9-NEXT: s_lshr_b32 s3, s1, 16 60; GFX9-NEXT: s_lshr_b32 s4, s1, 24 61; GFX9-NEXT: ds_write_b8 v1, v0 offset:5 62; GFX9-NEXT: v_mov_b32_e32 v0, s3 63; GFX9-NEXT: ds_write_b8 v1, v0 offset:6 64; GFX9-NEXT: v_mov_b32_e32 v0, s4 65; GFX9-NEXT: ds_write_b8 v1, v0 offset:7 66; GFX9-NEXT: v_mov_b32_e32 v0, s2 67; GFX9-NEXT: s_lshr_b32 s0, s2, 8 68; GFX9-NEXT: ds_write_b8 v1, v0 offset:8 69; GFX9-NEXT: v_mov_b32_e32 v0, s0 70; GFX9-NEXT: s_lshr_b32 s1, s2, 16 71; GFX9-NEXT: ds_write_b8 v1, v0 offset:9 72; GFX9-NEXT: v_mov_b32_e32 v0, s1 73; GFX9-NEXT: s_lshr_b32 s3, s2, 24 74; GFX9-NEXT: ds_write_b8 v1, v0 offset:10 75; GFX9-NEXT: v_mov_b32_e32 v0, s3 76; GFX9-NEXT: ds_write_b8 v1, v0 offset:11 77; GFX9-NEXT: s_endpgm 78; 79; GFX7-LABEL: store_lds_v3i32_align1: 80; GFX7: ; %bb.0: 81; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 82; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 83; GFX7-NEXT: s_mov_b32 m0, -1 84; GFX7-NEXT: s_waitcnt lgkmcnt(0) 85; GFX7-NEXT: v_mov_b32_e32 v1, s4 86; GFX7-NEXT: v_mov_b32_e32 v0, s0 87; GFX7-NEXT: s_lshr_b32 s3, s0, 8 88; GFX7-NEXT: ds_write_b8 v1, v0 89; GFX7-NEXT: v_mov_b32_e32 v0, s3 90; GFX7-NEXT: s_lshr_b32 s5, s0, 16 91; GFX7-NEXT: s_lshr_b32 s6, s0, 24 92; GFX7-NEXT: ds_write_b8 v1, v0 offset:1 93; GFX7-NEXT: v_mov_b32_e32 v0, s5 94; GFX7-NEXT: ds_write_b8 v1, v0 offset:2 95; GFX7-NEXT: v_mov_b32_e32 v0, s6 96; GFX7-NEXT: ds_write_b8 v1, v0 offset:3 97; GFX7-NEXT: v_mov_b32_e32 v0, s1 98; GFX7-NEXT: s_lshr_b32 s0, s1, 8 99; GFX7-NEXT: ds_write_b8 v1, v0 offset:4 100; GFX7-NEXT: v_mov_b32_e32 v0, s0 101; GFX7-NEXT: s_lshr_b32 s3, s1, 16 102; GFX7-NEXT: s_lshr_b32 s4, s1, 24 103; GFX7-NEXT: ds_write_b8 v1, v0 offset:5 104; GFX7-NEXT: v_mov_b32_e32 v0, s3 105; GFX7-NEXT: ds_write_b8 v1, v0 offset:6 106; GFX7-NEXT: v_mov_b32_e32 v0, s4 107; GFX7-NEXT: ds_write_b8 v1, v0 offset:7 108; GFX7-NEXT: v_mov_b32_e32 v0, s2 109; GFX7-NEXT: s_lshr_b32 s0, s2, 8 110; GFX7-NEXT: ds_write_b8 v1, v0 offset:8 111; GFX7-NEXT: v_mov_b32_e32 v0, s0 112; GFX7-NEXT: s_lshr_b32 s1, s2, 16 113; GFX7-NEXT: ds_write_b8 v1, v0 offset:9 114; GFX7-NEXT: v_mov_b32_e32 v0, s1 115; GFX7-NEXT: s_lshr_b32 s3, s2, 24 116; GFX7-NEXT: ds_write_b8 v1, v0 offset:10 117; GFX7-NEXT: v_mov_b32_e32 v0, s3 118; GFX7-NEXT: ds_write_b8 v1, v0 offset:11 119; GFX7-NEXT: s_endpgm 120 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 1 121 ret void 122} 123 124define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 125; GFX9-LABEL: store_lds_v3i32_align2: 126; GFX9: ; %bb.0: 127; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 128; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 129; GFX9-NEXT: s_waitcnt lgkmcnt(0) 130; GFX9-NEXT: v_mov_b32_e32 v1, s4 131; GFX9-NEXT: v_mov_b32_e32 v0, s0 132; GFX9-NEXT: s_lshr_b32 s3, s0, 16 133; GFX9-NEXT: ds_write_b16 v1, v0 134; GFX9-NEXT: v_mov_b32_e32 v0, s3 135; GFX9-NEXT: ds_write_b16 v1, v0 offset:2 136; GFX9-NEXT: v_mov_b32_e32 v0, s1 137; GFX9-NEXT: s_lshr_b32 s0, s1, 16 138; GFX9-NEXT: ds_write_b16 v1, v0 offset:4 139; GFX9-NEXT: v_mov_b32_e32 v0, s0 140; GFX9-NEXT: ds_write_b16 v1, v0 offset:6 141; GFX9-NEXT: v_mov_b32_e32 v0, s2 142; GFX9-NEXT: s_lshr_b32 s0, s2, 16 143; GFX9-NEXT: ds_write_b16 v1, v0 offset:8 144; GFX9-NEXT: v_mov_b32_e32 v0, s0 145; GFX9-NEXT: ds_write_b16 v1, v0 offset:10 146; GFX9-NEXT: s_endpgm 147; 148; GFX7-LABEL: store_lds_v3i32_align2: 149; GFX7: ; %bb.0: 150; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 151; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 152; GFX7-NEXT: s_mov_b32 m0, -1 153; GFX7-NEXT: s_waitcnt lgkmcnt(0) 154; GFX7-NEXT: v_mov_b32_e32 v1, s4 155; GFX7-NEXT: v_mov_b32_e32 v0, s0 156; GFX7-NEXT: s_lshr_b32 s3, s0, 16 157; GFX7-NEXT: ds_write_b16 v1, v0 158; GFX7-NEXT: v_mov_b32_e32 v0, s3 159; GFX7-NEXT: ds_write_b16 v1, v0 offset:2 160; GFX7-NEXT: v_mov_b32_e32 v0, s1 161; GFX7-NEXT: s_lshr_b32 s0, s1, 16 162; GFX7-NEXT: ds_write_b16 v1, v0 offset:4 163; GFX7-NEXT: v_mov_b32_e32 v0, s0 164; GFX7-NEXT: ds_write_b16 v1, v0 offset:6 165; GFX7-NEXT: v_mov_b32_e32 v0, s2 166; GFX7-NEXT: s_lshr_b32 s0, s2, 16 167; GFX7-NEXT: ds_write_b16 v1, v0 offset:8 168; GFX7-NEXT: v_mov_b32_e32 v0, s0 169; GFX7-NEXT: ds_write_b16 v1, v0 offset:10 170; GFX7-NEXT: s_endpgm 171 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 2 172 ret void 173} 174 175define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 176; GFX9-LABEL: store_lds_v3i32_align4: 177; GFX9: ; %bb.0: 178; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 179; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 180; GFX9-NEXT: s_waitcnt lgkmcnt(0) 181; GFX9-NEXT: v_mov_b32_e32 v2, s4 182; GFX9-NEXT: v_mov_b32_e32 v0, s0 183; GFX9-NEXT: v_mov_b32_e32 v1, s1 184; GFX9-NEXT: v_mov_b32_e32 v3, s2 185; GFX9-NEXT: ds_write2_b32 v2, v0, v1 offset1:1 186; GFX9-NEXT: ds_write_b32 v2, v3 offset:8 187; GFX9-NEXT: s_endpgm 188; 189; GFX7-LABEL: store_lds_v3i32_align4: 190; GFX7: ; %bb.0: 191; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 192; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 193; GFX7-NEXT: s_mov_b32 m0, -1 194; GFX7-NEXT: s_waitcnt lgkmcnt(0) 195; GFX7-NEXT: v_mov_b32_e32 v2, s4 196; GFX7-NEXT: v_mov_b32_e32 v0, s0 197; GFX7-NEXT: v_mov_b32_e32 v1, s1 198; GFX7-NEXT: v_mov_b32_e32 v3, s2 199; GFX7-NEXT: ds_write2_b32 v2, v0, v1 offset1:1 200; GFX7-NEXT: ds_write_b32 v2, v3 offset:8 201; GFX7-NEXT: s_endpgm 202 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 4 203 ret void 204} 205 206define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 207; GFX9-LABEL: store_lds_v3i32_align8: 208; GFX9: ; %bb.0: 209; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 210; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 211; GFX9-NEXT: s_waitcnt lgkmcnt(0) 212; GFX9-NEXT: v_mov_b32_e32 v2, s4 213; GFX9-NEXT: v_mov_b32_e32 v0, s0 214; GFX9-NEXT: v_mov_b32_e32 v1, s1 215; GFX9-NEXT: v_mov_b32_e32 v3, s2 216; GFX9-NEXT: ds_write_b64 v2, v[0:1] 217; GFX9-NEXT: ds_write_b32 v2, v3 offset:8 218; GFX9-NEXT: s_endpgm 219; 220; GFX7-LABEL: store_lds_v3i32_align8: 221; GFX7: ; %bb.0: 222; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 223; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 224; GFX7-NEXT: s_mov_b32 m0, -1 225; GFX7-NEXT: s_waitcnt lgkmcnt(0) 226; GFX7-NEXT: v_mov_b32_e32 v2, s4 227; GFX7-NEXT: v_mov_b32_e32 v0, s0 228; GFX7-NEXT: v_mov_b32_e32 v1, s1 229; GFX7-NEXT: v_mov_b32_e32 v3, s2 230; GFX7-NEXT: ds_write_b64 v2, v[0:1] 231; GFX7-NEXT: ds_write_b32 v2, v3 offset:8 232; GFX7-NEXT: s_endpgm 233 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 8 234 ret void 235} 236 237define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 238; GFX9-LABEL: store_lds_v3i32_align16: 239; GFX9: ; %bb.0: 240; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 241; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 242; GFX9-NEXT: s_waitcnt lgkmcnt(0) 243; GFX9-NEXT: v_mov_b32_e32 v3, s4 244; GFX9-NEXT: v_mov_b32_e32 v0, s0 245; GFX9-NEXT: v_mov_b32_e32 v1, s1 246; GFX9-NEXT: v_mov_b32_e32 v2, s2 247; GFX9-NEXT: ds_write_b96 v3, v[0:2] 248; GFX9-NEXT: s_endpgm 249; 250; GFX7-LABEL: store_lds_v3i32_align16: 251; GFX7: ; %bb.0: 252; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 253; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 254; GFX7-NEXT: s_mov_b32 m0, -1 255; GFX7-NEXT: s_waitcnt lgkmcnt(0) 256; GFX7-NEXT: v_mov_b32_e32 v3, s4 257; GFX7-NEXT: v_mov_b32_e32 v0, s0 258; GFX7-NEXT: v_mov_b32_e32 v1, s1 259; GFX7-NEXT: v_mov_b32_e32 v2, s2 260; GFX7-NEXT: ds_write_b96 v3, v[0:2] 261; GFX7-NEXT: s_endpgm 262 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 16 263 ret void 264} 265