1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s 5 6define <4 x i32> @load_lds_v4i32(<4 x i32> addrspace(3)* %ptr) { 7; GFX9-LABEL: load_lds_v4i32: 8; GFX9: ; %bb.0: 9; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX9-NEXT: ds_read_b128 v[0:3], v0 11; GFX9-NEXT: s_waitcnt lgkmcnt(0) 12; GFX9-NEXT: s_setpc_b64 s[30:31] 13; 14; GFX7-LABEL: load_lds_v4i32: 15; GFX7: ; %bb.0: 16; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17; GFX7-NEXT: s_mov_b32 m0, -1 18; GFX7-NEXT: ds_read_b128 v[0:3], v0 19; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20; GFX7-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX6-LABEL: load_lds_v4i32: 23; GFX6: ; %bb.0: 24; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX6-NEXT: v_add_i32_e32 v1, vcc, 8, v0 26; GFX6-NEXT: s_mov_b32 m0, -1 27; GFX6-NEXT: ds_read_b64 v[2:3], v1 28; GFX6-NEXT: ds_read_b64 v[0:1], v0 29; GFX6-NEXT: s_waitcnt lgkmcnt(0) 30; GFX6-NEXT: s_setpc_b64 s[30:31] 31 %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr 32 ret <4 x i32> %load 33} 34 35define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) { 36; GFX9-LABEL: load_lds_v4i32_align1: 37; GFX9: ; %bb.0: 38; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; GFX9-NEXT: ds_read_u8 v1, v0 40; GFX9-NEXT: ds_read_u8 v2, v0 offset:1 41; GFX9-NEXT: ds_read_u8 v3, v0 offset:2 42; GFX9-NEXT: ds_read_u8 v4, v0 offset:3 43; GFX9-NEXT: ds_read_u8 v5, v0 offset:4 44; GFX9-NEXT: ds_read_u8 v6, v0 offset:5 45; GFX9-NEXT: ds_read_u8 v7, v0 offset:6 46; GFX9-NEXT: ds_read_u8 v8, v0 offset:7 47; GFX9-NEXT: ds_read_u8 v9, v0 offset:8 48; GFX9-NEXT: ds_read_u8 v10, v0 offset:9 49; GFX9-NEXT: ds_read_u8 v11, v0 offset:10 50; GFX9-NEXT: ds_read_u8 v12, v0 offset:11 51; GFX9-NEXT: ds_read_u8 v13, v0 offset:12 52; GFX9-NEXT: ds_read_u8 v14, v0 offset:13 53; GFX9-NEXT: ds_read_u8 v15, v0 offset:14 54; GFX9-NEXT: ds_read_u8 v16, v0 offset:15 55; GFX9-NEXT: s_waitcnt lgkmcnt(14) 56; GFX9-NEXT: v_lshl_or_b32 v0, v2, 8, v1 57; GFX9-NEXT: s_waitcnt lgkmcnt(12) 58; GFX9-NEXT: v_lshl_or_b32 v1, v4, 8, v3 59; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 60; GFX9-NEXT: s_waitcnt lgkmcnt(10) 61; GFX9-NEXT: v_lshl_or_b32 v1, v6, 8, v5 62; GFX9-NEXT: s_waitcnt lgkmcnt(8) 63; GFX9-NEXT: v_lshl_or_b32 v2, v8, 8, v7 64; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 65; GFX9-NEXT: s_waitcnt lgkmcnt(6) 66; GFX9-NEXT: v_lshl_or_b32 v2, v10, 8, v9 67; GFX9-NEXT: s_waitcnt lgkmcnt(4) 68; GFX9-NEXT: v_lshl_or_b32 v3, v12, 8, v11 69; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 70; GFX9-NEXT: s_waitcnt lgkmcnt(2) 71; GFX9-NEXT: v_lshl_or_b32 v3, v14, 8, v13 72; GFX9-NEXT: s_waitcnt lgkmcnt(0) 73; GFX9-NEXT: v_lshl_or_b32 v4, v16, 8, v15 74; GFX9-NEXT: v_lshl_or_b32 v3, v4, 16, v3 75; GFX9-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX7-LABEL: load_lds_v4i32_align1: 78; GFX7: ; %bb.0: 79; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX7-NEXT: s_mov_b32 m0, -1 81; GFX7-NEXT: ds_read_u8 v1, v0 offset:7 82; GFX7-NEXT: ds_read_u8 v2, v0 offset:6 83; GFX7-NEXT: ds_read_u8 v3, v0 offset:5 84; GFX7-NEXT: ds_read_u8 v5, v0 offset:4 85; GFX7-NEXT: ds_read_u8 v4, v0 offset:3 86; GFX7-NEXT: ds_read_u8 v6, v0 offset:2 87; GFX7-NEXT: ds_read_u8 v7, v0 offset:1 88; GFX7-NEXT: ds_read_u8 v8, v0 89; GFX7-NEXT: s_waitcnt lgkmcnt(7) 90; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 91; GFX7-NEXT: s_waitcnt lgkmcnt(3) 92; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 93; GFX7-NEXT: s_waitcnt lgkmcnt(2) 94; GFX7-NEXT: v_or_b32_e32 v4, v4, v6 95; GFX7-NEXT: s_waitcnt lgkmcnt(1) 96; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 97; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 98; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 99; GFX7-NEXT: s_waitcnt lgkmcnt(0) 100; GFX7-NEXT: v_or_b32_e32 v7, v7, v8 101; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 102; GFX7-NEXT: v_or_b32_e32 v3, v3, v5 103; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 104; GFX7-NEXT: v_or_b32_e32 v4, v4, v7 105; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 106; GFX7-NEXT: ds_read_u8 v3, v0 offset:15 107; GFX7-NEXT: ds_read_u8 v5, v0 offset:14 108; GFX7-NEXT: ds_read_u8 v6, v0 offset:13 109; GFX7-NEXT: ds_read_u8 v7, v0 offset:12 110; GFX7-NEXT: ds_read_u8 v2, v0 offset:11 111; GFX7-NEXT: ds_read_u8 v8, v0 offset:10 112; GFX7-NEXT: ds_read_u8 v9, v0 offset:9 113; GFX7-NEXT: ds_read_u8 v0, v0 offset:8 114; GFX7-NEXT: s_waitcnt lgkmcnt(7) 115; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 116; GFX7-NEXT: s_waitcnt lgkmcnt(3) 117; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 118; GFX7-NEXT: s_waitcnt lgkmcnt(2) 119; GFX7-NEXT: v_or_b32_e32 v2, v2, v8 120; GFX7-NEXT: s_waitcnt lgkmcnt(1) 121; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 122; GFX7-NEXT: s_waitcnt lgkmcnt(0) 123; GFX7-NEXT: v_or_b32_e32 v0, v9, v0 124; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 125; GFX7-NEXT: v_or_b32_e32 v3, v3, v5 126; GFX7-NEXT: v_or_b32_e32 v2, v2, v0 127; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v6 128; GFX7-NEXT: v_or_b32_e32 v0, v0, v7 129; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 130; GFX7-NEXT: v_or_b32_e32 v3, v3, v0 131; GFX7-NEXT: v_mov_b32_e32 v0, v4 132; GFX7-NEXT: s_setpc_b64 s[30:31] 133; 134; GFX6-LABEL: load_lds_v4i32_align1: 135; GFX6: ; %bb.0: 136; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 137; GFX6-NEXT: v_add_i32_e32 v1, vcc, 5, v0 138; GFX6-NEXT: v_add_i32_e32 v2, vcc, 4, v0 139; GFX6-NEXT: v_add_i32_e32 v3, vcc, 7, v0 140; GFX6-NEXT: v_add_i32_e32 v4, vcc, 6, v0 141; GFX6-NEXT: v_add_i32_e32 v5, vcc, 9, v0 142; GFX6-NEXT: v_add_i32_e32 v6, vcc, 8, v0 143; GFX6-NEXT: v_add_i32_e32 v7, vcc, 11, v0 144; GFX6-NEXT: s_mov_b32 m0, -1 145; GFX6-NEXT: ds_read_u8 v2, v2 146; GFX6-NEXT: ds_read_u8 v3, v3 147; GFX6-NEXT: ds_read_u8 v4, v4 148; GFX6-NEXT: ds_read_u8 v5, v5 149; GFX6-NEXT: ds_read_u8 v6, v6 150; GFX6-NEXT: ds_read_u8 v7, v7 151; GFX6-NEXT: ds_read_u8 v1, v1 152; GFX6-NEXT: ds_read_u8 v8, v0 153; GFX6-NEXT: v_add_i32_e32 v9, vcc, 14, v0 154; GFX6-NEXT: v_add_i32_e32 v10, vcc, 3, v0 155; GFX6-NEXT: v_add_i32_e32 v11, vcc, 2, v0 156; GFX6-NEXT: s_waitcnt lgkmcnt(1) 157; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 158; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 159; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v3 160; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 161; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 162; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 163; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v5 164; GFX6-NEXT: v_or_b32_e32 v2, v2, v6 165; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v7 166; GFX6-NEXT: v_add_i32_e32 v4, vcc, 10, v0 167; GFX6-NEXT: v_add_i32_e32 v5, vcc, 13, v0 168; GFX6-NEXT: v_add_i32_e32 v6, vcc, 12, v0 169; GFX6-NEXT: v_add_i32_e32 v7, vcc, 15, v0 170; GFX6-NEXT: v_add_i32_e32 v0, vcc, 1, v0 171; GFX6-NEXT: ds_read_u8 v4, v4 172; GFX6-NEXT: ds_read_u8 v5, v5 173; GFX6-NEXT: ds_read_u8 v6, v6 174; GFX6-NEXT: ds_read_u8 v7, v7 175; GFX6-NEXT: ds_read_u8 v9, v9 176; GFX6-NEXT: ds_read_u8 v10, v10 177; GFX6-NEXT: ds_read_u8 v11, v11 178; GFX6-NEXT: ds_read_u8 v0, v0 179; GFX6-NEXT: s_waitcnt lgkmcnt(7) 180; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 181; GFX6-NEXT: s_waitcnt lgkmcnt(4) 182; GFX6-NEXT: v_lshlrev_b32_e32 v4, 8, v7 183; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 184; GFX6-NEXT: s_waitcnt lgkmcnt(3) 185; GFX6-NEXT: v_or_b32_e32 v4, v4, v9 186; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 187; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v5 188; GFX6-NEXT: v_or_b32_e32 v3, v3, v6 189; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 190; GFX6-NEXT: v_or_b32_e32 v3, v4, v3 191; GFX6-NEXT: s_waitcnt lgkmcnt(2) 192; GFX6-NEXT: v_lshlrev_b32_e32 v4, 8, v10 193; GFX6-NEXT: s_waitcnt lgkmcnt(1) 194; GFX6-NEXT: v_or_b32_e32 v4, v4, v11 195; GFX6-NEXT: s_waitcnt lgkmcnt(0) 196; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 197; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 198; GFX6-NEXT: v_or_b32_e32 v0, v0, v8 199; GFX6-NEXT: v_or_b32_e32 v0, v4, v0 200; GFX6-NEXT: s_setpc_b64 s[30:31] 201 %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 1 202 ret <4 x i32> %load 203} 204 205define <4 x i32> @load_lds_v4i32_align2(<4 x i32> addrspace(3)* %ptr) { 206; GFX9-LABEL: load_lds_v4i32_align2: 207; GFX9: ; %bb.0: 208; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 209; GFX9-NEXT: ds_read_u16 v1, v0 210; GFX9-NEXT: ds_read_u16 v2, v0 offset:2 211; GFX9-NEXT: ds_read_u16 v3, v0 offset:4 212; GFX9-NEXT: ds_read_u16 v4, v0 offset:6 213; GFX9-NEXT: ds_read_u16 v5, v0 offset:8 214; GFX9-NEXT: ds_read_u16 v6, v0 offset:10 215; GFX9-NEXT: ds_read_u16 v7, v0 offset:12 216; GFX9-NEXT: ds_read_u16 v8, v0 offset:14 217; GFX9-NEXT: s_waitcnt lgkmcnt(6) 218; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v1 219; GFX9-NEXT: s_waitcnt lgkmcnt(4) 220; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v3 221; GFX9-NEXT: s_waitcnt lgkmcnt(2) 222; GFX9-NEXT: v_lshl_or_b32 v2, v6, 16, v5 223; GFX9-NEXT: s_waitcnt lgkmcnt(0) 224; GFX9-NEXT: v_lshl_or_b32 v3, v8, 16, v7 225; GFX9-NEXT: s_setpc_b64 s[30:31] 226; 227; GFX7-LABEL: load_lds_v4i32_align2: 228; GFX7: ; %bb.0: 229; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 230; GFX7-NEXT: s_mov_b32 m0, -1 231; GFX7-NEXT: ds_read_u16 v3, v0 offset:14 232; GFX7-NEXT: ds_read_u16 v4, v0 offset:12 233; GFX7-NEXT: ds_read_u16 v2, v0 offset:10 234; GFX7-NEXT: ds_read_u16 v5, v0 offset:8 235; GFX7-NEXT: ds_read_u16 v1, v0 offset:6 236; GFX7-NEXT: ds_read_u16 v6, v0 offset:4 237; GFX7-NEXT: ds_read_u16 v7, v0 offset:2 238; GFX7-NEXT: ds_read_u16 v0, v0 239; GFX7-NEXT: s_waitcnt lgkmcnt(5) 240; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 241; GFX7-NEXT: s_waitcnt lgkmcnt(3) 242; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 243; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 244; GFX7-NEXT: s_waitcnt lgkmcnt(1) 245; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 246; GFX7-NEXT: s_waitcnt lgkmcnt(0) 247; GFX7-NEXT: v_or_b32_e32 v0, v7, v0 248; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 249; GFX7-NEXT: v_or_b32_e32 v2, v2, v5 250; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 251; GFX7-NEXT: s_setpc_b64 s[30:31] 252; 253; GFX6-LABEL: load_lds_v4i32_align2: 254; GFX6: ; %bb.0: 255; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 256; GFX6-NEXT: v_add_i32_e32 v1, vcc, 6, v0 257; GFX6-NEXT: v_add_i32_e32 v2, vcc, 4, v0 258; GFX6-NEXT: v_add_i32_e32 v3, vcc, 10, v0 259; GFX6-NEXT: v_add_i32_e32 v4, vcc, 8, v0 260; GFX6-NEXT: v_add_i32_e32 v5, vcc, 14, v0 261; GFX6-NEXT: v_add_i32_e32 v6, vcc, 12, v0 262; GFX6-NEXT: v_add_i32_e32 v7, vcc, 2, v0 263; GFX6-NEXT: s_mov_b32 m0, -1 264; GFX6-NEXT: ds_read_u16 v2, v2 265; GFX6-NEXT: ds_read_u16 v3, v3 266; GFX6-NEXT: ds_read_u16 v4, v4 267; GFX6-NEXT: ds_read_u16 v5, v5 268; GFX6-NEXT: ds_read_u16 v6, v6 269; GFX6-NEXT: ds_read_u16 v7, v7 270; GFX6-NEXT: ds_read_u16 v1, v1 271; GFX6-NEXT: ds_read_u16 v0, v0 272; GFX6-NEXT: s_waitcnt lgkmcnt(1) 273; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 274; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 275; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 276; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v5 277; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 278; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v7 279; GFX6-NEXT: v_or_b32_e32 v3, v3, v6 280; GFX6-NEXT: s_waitcnt lgkmcnt(0) 281; GFX6-NEXT: v_or_b32_e32 v0, v4, v0 282; GFX6-NEXT: s_setpc_b64 s[30:31] 283 %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 2 284 ret <4 x i32> %load 285} 286 287define <4 x i32> @load_lds_v4i32_align4(<4 x i32> addrspace(3)* %ptr) { 288; GFX9-LABEL: load_lds_v4i32_align4: 289; GFX9: ; %bb.0: 290; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 291; GFX9-NEXT: v_mov_b32_e32 v2, v0 292; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 293; GFX9-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3 294; GFX9-NEXT: s_waitcnt lgkmcnt(0) 295; GFX9-NEXT: s_setpc_b64 s[30:31] 296; 297; GFX7-LABEL: load_lds_v4i32_align4: 298; GFX7: ; %bb.0: 299; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 300; GFX7-NEXT: v_mov_b32_e32 v2, v0 301; GFX7-NEXT: s_mov_b32 m0, -1 302; GFX7-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 303; GFX7-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3 304; GFX7-NEXT: s_waitcnt lgkmcnt(0) 305; GFX7-NEXT: s_setpc_b64 s[30:31] 306; 307; GFX6-LABEL: load_lds_v4i32_align4: 308; GFX6: ; %bb.0: 309; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 310; GFX6-NEXT: v_add_i32_e32 v1, vcc, 4, v0 311; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v0 312; GFX6-NEXT: v_add_i32_e32 v3, vcc, 12, v0 313; GFX6-NEXT: s_mov_b32 m0, -1 314; GFX6-NEXT: ds_read_b32 v2, v2 315; GFX6-NEXT: ds_read_b32 v3, v3 316; GFX6-NEXT: ds_read_b32 v1, v1 317; GFX6-NEXT: ds_read_b32 v0, v0 318; GFX6-NEXT: s_waitcnt lgkmcnt(0) 319; GFX6-NEXT: s_setpc_b64 s[30:31] 320 %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 4 321 ret <4 x i32> %load 322} 323 324define <4 x i32> @load_lds_v4i32_align8(<4 x i32> addrspace(3)* %ptr) { 325; GFX9-LABEL: load_lds_v4i32_align8: 326; GFX9: ; %bb.0: 327; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GFX9-NEXT: ds_read_b128 v[0:3], v0 329; GFX9-NEXT: s_waitcnt lgkmcnt(0) 330; GFX9-NEXT: s_setpc_b64 s[30:31] 331; 332; GFX7-LABEL: load_lds_v4i32_align8: 333; GFX7: ; %bb.0: 334; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 335; GFX7-NEXT: s_mov_b32 m0, -1 336; GFX7-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 337; GFX7-NEXT: s_waitcnt lgkmcnt(0) 338; GFX7-NEXT: s_setpc_b64 s[30:31] 339; 340; GFX6-LABEL: load_lds_v4i32_align8: 341; GFX6: ; %bb.0: 342; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 343; GFX6-NEXT: v_add_i32_e32 v1, vcc, 8, v0 344; GFX6-NEXT: s_mov_b32 m0, -1 345; GFX6-NEXT: ds_read_b64 v[2:3], v1 346; GFX6-NEXT: ds_read_b64 v[0:1], v0 347; GFX6-NEXT: s_waitcnt lgkmcnt(0) 348; GFX6-NEXT: s_setpc_b64 s[30:31] 349 %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 8 350 ret <4 x i32> %load 351} 352 353define <4 x i32> @load_lds_v4i32_align16(<4 x i32> addrspace(3)* %ptr) { 354; GFX9-LABEL: load_lds_v4i32_align16: 355; GFX9: ; %bb.0: 356; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 357; GFX9-NEXT: ds_read_b128 v[0:3], v0 358; GFX9-NEXT: s_waitcnt lgkmcnt(0) 359; GFX9-NEXT: s_setpc_b64 s[30:31] 360; 361; GFX7-LABEL: load_lds_v4i32_align16: 362; GFX7: ; %bb.0: 363; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 364; GFX7-NEXT: s_mov_b32 m0, -1 365; GFX7-NEXT: ds_read_b128 v[0:3], v0 366; GFX7-NEXT: s_waitcnt lgkmcnt(0) 367; GFX7-NEXT: s_setpc_b64 s[30:31] 368; 369; GFX6-LABEL: load_lds_v4i32_align16: 370; GFX6: ; %bb.0: 371; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372; GFX6-NEXT: v_add_i32_e32 v1, vcc, 8, v0 373; GFX6-NEXT: s_mov_b32 m0, -1 374; GFX6-NEXT: ds_read_b64 v[2:3], v1 375; GFX6-NEXT: ds_read_b64 v[0:1], v0 376; GFX6-NEXT: s_waitcnt lgkmcnt(0) 377; GFX6-NEXT: s_setpc_b64 s[30:31] 378 %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 16 379 ret <4 x i32> %load 380} 381