1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s 6 7; FIXME: 8; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 9 10define <3 x i32> @v_load_constant_v3i32_align1(<3 x i32> addrspace(4)* %ptr) { 11; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align1: 12; GFX9-UNALIGNED: ; %bb.0: 13; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14; GFX9-UNALIGNED-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 15; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 16; GFX9-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 17; 18; GFX9-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1: 19; GFX9-NOUNALIGNED: ; %bb.0: 20; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GFX9-NOUNALIGNED-NEXT: v_add_co_u32_e32 v2, vcc, 11, v0 22; GFX9-NOUNALIGNED-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc 23; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v0, v[0:1], off 24; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v1, v[2:3], off offset:-10 25; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v[2:3], off offset:-9 26; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v5, v[2:3], off offset:-8 27; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v6, v[2:3], off offset:-7 28; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v7, v[2:3], off offset:-6 29; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v[2:3], off offset:-5 30; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v[2:3], off offset:-4 31; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v[2:3], off offset:-3 32; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v[2:3], off offset:-2 33; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v[2:3], off offset:-1 34; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v[2:3], off 35; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, 0xff 36; GFX9-NOUNALIGNED-NEXT: s_movk_i32 s4, 0xff 37; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v13, 8 38; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s5, 8 39; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) 40; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 41; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) 42; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s4, v4 43; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) 44; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s4, v5 45; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s4, v1 46; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) 47; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v7, v13, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 48; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) 49; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v8, v8, v3 50; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 51; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v9, v9, v3 52; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v4 53; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 54; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v11, v13, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 55; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) 56; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v12, v12, v3 57; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 58; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v2, v3 59; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 24, v5 60; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v5, v6, v3, v7 61; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v8 62; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v9 63; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v3, v10, v3, v11 64; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 16, v12 65; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v2 66; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v0, v1, v4 67; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v5, v6, v7 68; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v3, v8, v2 69; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] 70; 71; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align1: 72; GFX7-UNALIGNED: ; %bb.0: 73; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; GFX7-UNALIGNED-NEXT: s_mov_b32 s6, 0 75; GFX7-UNALIGNED-NEXT: s_mov_b32 s7, 0xf000 76; GFX7-UNALIGNED-NEXT: s_mov_b64 s[4:5], 0 77; GFX7-UNALIGNED-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 78; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 79; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 80; 81; GFX7-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1: 82; GFX7-NOUNALIGNED: ; %bb.0: 83; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 84; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s6, 0 85; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s7, 0xf000 86; GFX7-NOUNALIGNED-NEXT: s_mov_b64 s[4:5], 0 87; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v2, v[0:1], s[4:7], 0 addr64 88; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v3, v[0:1], s[4:7], 0 addr64 offset:1 89; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v4, v[0:1], s[4:7], 0 addr64 offset:2 90; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v5, v[0:1], s[4:7], 0 addr64 offset:3 91; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v6, v[0:1], s[4:7], 0 addr64 offset:4 92; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v7, v[0:1], s[4:7], 0 addr64 offset:5 93; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v8, v[0:1], s[4:7], 0 addr64 offset:6 94; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v9, v[0:1], s[4:7], 0 addr64 offset:7 95; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v10, v[0:1], s[4:7], 0 addr64 offset:8 96; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v11, v[0:1], s[4:7], 0 addr64 offset:9 97; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v12, v[0:1], s[4:7], 0 addr64 offset:10 98; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:11 99; GFX7-NOUNALIGNED-NEXT: v_mov_b32_e32 v1, 0xff 100; GFX7-NOUNALIGNED-NEXT: s_movk_i32 s4, 0xff 101; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(11) 102; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s4, v2 103; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) 104; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s4, v3 105; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) 106; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s4, v4 107; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) 108; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s4, v5 109; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(7) 110; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v6, s4, v6 111; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) 112; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v7, v7, v1 113; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) 114; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v8, v8, v1 115; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 116; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v9, v9, v1 117; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) 118; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v10, v10, v1 119; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 120; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v11, v11, v1 121; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) 122; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v12, v12, v1 123; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 124; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v0, v1 125; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v3 126; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v4 127; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 24, v5 128; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 8, v7 129; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 16, v8 130; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v9 131; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 8, v11 132; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v11, 16, v12 133; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v12, 24, v0 134; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v2, v1 135; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v6, v5 136; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v10, v9 137; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v3 138; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v7 139; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v11 140; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v4 141; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v8 142; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v12 143; GFX7-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] 144 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 1 145 ret <3 x i32> %load 146} 147 148define <3 x i32> @v_load_constant_v3i32_align2(<3 x i32> addrspace(4)* %ptr) { 149; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align2: 150; GFX9-UNALIGNED: ; %bb.0: 151; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; GFX9-UNALIGNED-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 153; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 154; GFX9-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 155; 156; GFX9-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2: 157; GFX9-NOUNALIGNED: ; %bb.0: 158; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 159; GFX9-NOUNALIGNED-NEXT: v_add_co_u32_e32 v2, vcc, 10, v0 160; GFX9-NOUNALIGNED-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc 161; GFX9-NOUNALIGNED-NEXT: global_load_ushort v0, v[0:1], off 162; GFX9-NOUNALIGNED-NEXT: global_load_ushort v1, v[2:3], off offset:-8 163; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v[2:3], off offset:-6 164; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v[2:3], off offset:-4 165; GFX9-NOUNALIGNED-NEXT: global_load_ushort v6, v[2:3], off offset:-2 166; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v[2:3], off 167; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, 0xffff 168; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s4, 0xffff 169; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 170; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v1 171; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 172; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 173; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v5, v5, v3 174; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v5 175; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 176; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v2, v3 177; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 178; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s4, v1 179; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v4, v3, v5 180; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v6, v3, v2 181; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] 182; 183; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align2: 184; GFX7-UNALIGNED: ; %bb.0: 185; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 186; GFX7-UNALIGNED-NEXT: s_mov_b32 s6, 0 187; GFX7-UNALIGNED-NEXT: s_mov_b32 s7, 0xf000 188; GFX7-UNALIGNED-NEXT: s_mov_b64 s[4:5], 0 189; GFX7-UNALIGNED-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 190; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 191; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 192; 193; GFX7-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2: 194; GFX7-NOUNALIGNED: ; %bb.0: 195; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 196; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s6, 0 197; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s7, 0xf000 198; GFX7-NOUNALIGNED-NEXT: s_mov_b64 s[4:5], 0 199; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 200; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:2 201; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:4 202; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:6 203; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:8 204; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:10 205; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s4, 0xffff 206; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) 207; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v2 208; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 209; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s4, v3 210; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) 211; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s4, v4 212; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 213; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s4, v5 214; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) 215; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s4, v6 216; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 217; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s4, v0 218; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 219; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v0 220; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 16, v4 221; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v1, v2 222; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v3, v4 223; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v5, v6 224; GFX7-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] 225 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 2 226 ret <3 x i32> %load 227} 228 229define <3 x i32> @v_load_constant_v3i32_align4(<3 x i32> addrspace(4)* %ptr) { 230; GFX9-LABEL: v_load_constant_v3i32_align4: 231; GFX9: ; %bb.0: 232; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 233; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 234; GFX9-NEXT: s_waitcnt vmcnt(0) 235; GFX9-NEXT: s_setpc_b64 s[30:31] 236; 237; GFX7-LABEL: v_load_constant_v3i32_align4: 238; GFX7: ; %bb.0: 239; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 240; GFX7-NEXT: s_mov_b32 s6, 0 241; GFX7-NEXT: s_mov_b32 s7, 0xf000 242; GFX7-NEXT: s_mov_b64 s[4:5], 0 243; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 244; GFX7-NEXT: s_waitcnt vmcnt(0) 245; GFX7-NEXT: s_setpc_b64 s[30:31] 246 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4 247 ret <3 x i32> %load 248} 249 250define i96 @v_load_constant_i96_align8(i96 addrspace(4)* %ptr) { 251; GFX9-LABEL: v_load_constant_i96_align8: 252; GFX9: ; %bb.0: 253; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 254; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 255; GFX9-NEXT: s_waitcnt vmcnt(0) 256; GFX9-NEXT: s_setpc_b64 s[30:31] 257; 258; GFX7-LABEL: v_load_constant_i96_align8: 259; GFX7: ; %bb.0: 260; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 261; GFX7-NEXT: s_mov_b32 s6, 0 262; GFX7-NEXT: s_mov_b32 s7, 0xf000 263; GFX7-NEXT: s_mov_b64 s[4:5], 0 264; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 265; GFX7-NEXT: s_waitcnt vmcnt(0) 266; GFX7-NEXT: s_setpc_b64 s[30:31] 267 %load = load i96, i96 addrspace(4)* %ptr, align 8 268 ret i96 %load 269} 270 271define <3 x i32> @v_load_constant_v3i32_align8(<3 x i32> addrspace(4)* %ptr) { 272; GFX9-LABEL: v_load_constant_v3i32_align8: 273; GFX9: ; %bb.0: 274; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 275; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 276; GFX9-NEXT: s_waitcnt vmcnt(0) 277; GFX9-NEXT: s_setpc_b64 s[30:31] 278; 279; GFX7-LABEL: v_load_constant_v3i32_align8: 280; GFX7: ; %bb.0: 281; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 282; GFX7-NEXT: s_mov_b32 s6, 0 283; GFX7-NEXT: s_mov_b32 s7, 0xf000 284; GFX7-NEXT: s_mov_b64 s[4:5], 0 285; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 286; GFX7-NEXT: s_waitcnt vmcnt(0) 287; GFX7-NEXT: s_setpc_b64 s[30:31] 288 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 8 289 ret <3 x i32> %load 290} 291 292define <6 x i16> @v_load_constant_v6i16_align8(<6 x i16> addrspace(4)* %ptr) { 293; GFX9-LABEL: v_load_constant_v6i16_align8: 294; GFX9: ; %bb.0: 295; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 296; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 297; GFX9-NEXT: s_waitcnt vmcnt(0) 298; GFX9-NEXT: s_setpc_b64 s[30:31] 299; 300; GFX7-LABEL: v_load_constant_v6i16_align8: 301; GFX7: ; %bb.0: 302; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 303; GFX7-NEXT: s_mov_b32 s6, 0 304; GFX7-NEXT: s_mov_b32 s7, 0xf000 305; GFX7-NEXT: s_mov_b64 s[4:5], 0 306; GFX7-NEXT: buffer_load_dwordx3 v[6:8], v[0:1], s[4:7], 0 addr64 307; GFX7-NEXT: s_waitcnt vmcnt(0) 308; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v6 309; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v7 310; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v8 311; GFX7-NEXT: v_mov_b32_e32 v0, v6 312; GFX7-NEXT: v_mov_b32_e32 v2, v7 313; GFX7-NEXT: v_mov_b32_e32 v4, v8 314; GFX7-NEXT: s_setpc_b64 s[30:31] 315 %load = load <6 x i16>, <6 x i16> addrspace(4)* %ptr, align 8 316 ret <6 x i16> %load 317} 318 319define <12 x i8> @v_load_constant_v12i8_align8(<12 x i8> addrspace(4)* %ptr) { 320; GFX9-LABEL: v_load_constant_v12i8_align8: 321; GFX9: ; %bb.0: 322; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 323; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 324; GFX9-NEXT: s_waitcnt vmcnt(0) 325; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v0 326; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v0 327; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 328; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1 329; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 330; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 331; GFX9-NEXT: v_mov_b32_e32 v4, v1 332; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2 333; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2 334; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2 335; GFX9-NEXT: v_mov_b32_e32 v8, v2 336; GFX9-NEXT: v_mov_b32_e32 v1, v13 337; GFX9-NEXT: v_mov_b32_e32 v2, v12 338; GFX9-NEXT: s_setpc_b64 s[30:31] 339; 340; GFX7-LABEL: v_load_constant_v12i8_align8: 341; GFX7: ; %bb.0: 342; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 343; GFX7-NEXT: s_mov_b32 s6, 0 344; GFX7-NEXT: s_mov_b32 s7, 0xf000 345; GFX7-NEXT: s_mov_b64 s[4:5], 0 346; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 347; GFX7-NEXT: s_waitcnt vmcnt(0) 348; GFX7-NEXT: v_lshrrev_b32_e32 v13, 8, v0 349; GFX7-NEXT: v_lshrrev_b32_e32 v12, 16, v0 350; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 351; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 352; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 353; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 354; GFX7-NEXT: v_mov_b32_e32 v4, v1 355; GFX7-NEXT: v_lshrrev_b32_e32 v9, 8, v2 356; GFX7-NEXT: v_lshrrev_b32_e32 v10, 16, v2 357; GFX7-NEXT: v_lshrrev_b32_e32 v11, 24, v2 358; GFX7-NEXT: v_mov_b32_e32 v8, v2 359; GFX7-NEXT: v_mov_b32_e32 v1, v13 360; GFX7-NEXT: v_mov_b32_e32 v2, v12 361; GFX7-NEXT: s_setpc_b64 s[30:31] 362 %load = load <12 x i8>, <12 x i8> addrspace(4)* %ptr, align 8 363 ret <12 x i8> %load 364} 365 366define <3 x i32> @v_load_constant_v3i32_align16(<3 x i32> addrspace(4)* %ptr) { 367; GFX9-LABEL: v_load_constant_v3i32_align16: 368; GFX9: ; %bb.0: 369; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 370; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 371; GFX9-NEXT: s_waitcnt vmcnt(0) 372; GFX9-NEXT: s_setpc_b64 s[30:31] 373; 374; GFX7-LABEL: v_load_constant_v3i32_align16: 375; GFX7: ; %bb.0: 376; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GFX7-NEXT: s_mov_b32 s6, 0 378; GFX7-NEXT: s_mov_b32 s7, 0xf000 379; GFX7-NEXT: s_mov_b64 s[4:5], 0 380; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 381; GFX7-NEXT: s_waitcnt vmcnt(0) 382; GFX7-NEXT: s_setpc_b64 s[30:31] 383 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 16 384 ret <3 x i32> %load 385} 386 387define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(<3 x i32> addrspace(4)* inreg %ptr) { 388; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align1: 389; GFX9-UNALIGNED: ; %bb.0: 390; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 391; GFX9-UNALIGNED-NEXT: global_load_dwordx3 v[0:2], v0, s[0:1] 392; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 393; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 394; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 395; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 396; GFX9-UNALIGNED-NEXT: ; return to shader part epilog 397; 398; GFX9-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1: 399; GFX9-NOUNALIGNED: ; %bb.0: 400; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, 0 401; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v1, v0, s[0:1] 402; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v0, s[0:1] offset:1 403; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v3, v0, s[0:1] offset:2 404; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v0, s[0:1] offset:3 405; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v5, v0, s[0:1] offset:4 406; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v6, v0, s[0:1] offset:5 407; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v7, v0, s[0:1] offset:6 408; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v0, s[0:1] offset:7 409; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v0, s[0:1] offset:8 410; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v0, s[0:1] offset:9 411; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v0, s[0:1] offset:10 412; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v0, v0, s[0:1] offset:11 413; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v12, 0xff 414; GFX9-NOUNALIGNED-NEXT: s_movk_i32 s0, 0xff 415; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s1, 8 416; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v13, 8 417; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) 418; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v2, s1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 419; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) 420; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s0, v3 421; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) 422; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s0, v4 423; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v1, s0, v2 424; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) 425; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v6, s1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 426; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) 427; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v7, v7, v12 428; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 429; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v8, v8, v12 430; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 431; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 432; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v10, v13, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 433; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) 434; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v11, v11, v12 435; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 436; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v0, v12 437; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 24, v4 438; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v4, v5, s0, v6 439; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v7 440; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 24, v8 441; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v7, v9, v12, v10 442; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 24, v0 443; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 16, v11 444; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v1, v2, v3 445; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v4, v5, v6 446; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v7, v8, v9 447; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 448; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 449; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 450; GFX9-NOUNALIGNED-NEXT: ; return to shader part epilog 451; 452; GFX7-UNALIGNED-LABEL: s_load_constant_v3i32_align1: 453; GFX7-UNALIGNED: ; %bb.0: 454; GFX7-UNALIGNED-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 455; GFX7-UNALIGNED-NEXT: s_load_dword s0, s[0:1], 0x2 456; GFX7-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) 457; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v0, s6 458; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v2, s8 459; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v2, s0 460; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v1, s7 461; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 462; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 463; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 464; GFX7-UNALIGNED-NEXT: ; return to shader part epilog 465; 466; GFX7-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1: 467; GFX7-NOUNALIGNED: ; %bb.0: 468; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s2, -1 469; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s3, 0xf000 470; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 471; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:1 472; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v2, off, s[0:3], 0 offset:2 473; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v3, off, s[0:3], 0 offset:3 474; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v4, off, s[0:3], 0 offset:4 475; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v5, off, s[0:3], 0 offset:5 476; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v6, off, s[0:3], 0 offset:6 477; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v7, off, s[0:3], 0 offset:7 478; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v8, off, s[0:3], 0 offset:8 479; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v9, off, s[0:3], 0 offset:9 480; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v10, off, s[0:3], 0 offset:10 481; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v11, off, s[0:3], 0 offset:11 482; GFX7-NOUNALIGNED-NEXT: v_mov_b32_e32 v12, 0xff 483; GFX7-NOUNALIGNED-NEXT: s_movk_i32 s0, 0xff 484; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(11) 485; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s0, v0 486; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) 487; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s0, v1 488; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) 489; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s0, v2 490; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v1 491; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(7) 492; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s0, v4 493; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) 494; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, v5, v12 495; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) 496; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v6, v6, v12 497; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 8, v5 498; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) 499; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v8, v8, v12 500; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 501; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v9, v9, v12 502; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) 503; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v10, v10, v12 504; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 8, v9 505; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s0, v3 506; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v7, v7, v12 507; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 508; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v11, v11, v12 509; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v1 510; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 511; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v4, v5 512; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v6 513; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v10, 16, v10 514; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v4, v8, v9 515; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v2 516; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 24, v3 517; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v7 518; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v6 519; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v11, 24, v11 520; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v4, v10 521; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v3 522; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v7 523; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v11 524; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 525; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 526; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 527; GFX7-NOUNALIGNED-NEXT: ; return to shader part epilog 528 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 1 529 ret <3 x i32> %load 530} 531 532define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)* inreg %ptr) { 533; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align2: 534; GFX9-UNALIGNED: ; %bb.0: 535; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 536; GFX9-UNALIGNED-NEXT: global_load_dwordx3 v[0:2], v0, s[0:1] 537; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 538; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 539; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 540; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 541; GFX9-UNALIGNED-NEXT: ; return to shader part epilog 542; 543; GFX9-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2: 544; GFX9-NOUNALIGNED: ; %bb.0: 545; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, 0 546; GFX9-NOUNALIGNED-NEXT: global_load_ushort v1, v0, s[0:1] 547; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v0, s[0:1] offset:2 548; GFX9-NOUNALIGNED-NEXT: global_load_ushort v3, v0, s[0:1] offset:4 549; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v0, s[0:1] offset:6 550; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v0, s[0:1] offset:8 551; GFX9-NOUNALIGNED-NEXT: global_load_ushort v0, v0, s[0:1] offset:10 552; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s0, 0xffff 553; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 554; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s0, v2 555; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 556; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 557; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s0, v4 558; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 16, v4 559; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 560; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s0, v0 561; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v0 562; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v1, s0, v2 563; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v3, s0, v4 564; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v5, s0, v6 565; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 566; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 567; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 568; GFX9-NOUNALIGNED-NEXT: ; return to shader part epilog 569; 570; GFX7-UNALIGNED-LABEL: s_load_constant_v3i32_align2: 571; GFX7-UNALIGNED: ; %bb.0: 572; GFX7-UNALIGNED-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 573; GFX7-UNALIGNED-NEXT: s_load_dword s0, s[0:1], 0x2 574; GFX7-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) 575; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v0, s6 576; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v2, s8 577; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v2, s0 578; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v1, s7 579; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 580; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 581; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 582; GFX7-UNALIGNED-NEXT: ; return to shader part epilog 583; 584; GFX7-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2: 585; GFX7-NOUNALIGNED: ; %bb.0: 586; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s2, -1 587; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s3, 0xf000 588; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v0, off, s[0:3], 0 589; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:2 590; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:4 591; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:6 592; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:8 593; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:10 594; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s0, 0xffff 595; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) 596; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s0, v0 597; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 598; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s0, v1 599; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 600; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 601; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s0, v3 602; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s0, v2 603; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 604; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s0, v5 605; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s0, v4 606; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v3 607; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v5 608; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v1 609; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v2, v3 610; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v4, v5 611; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 612; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 613; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 614; GFX7-NOUNALIGNED-NEXT: ; return to shader part epilog 615 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 2 616 ret <3 x i32> %load 617} 618 619define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* inreg %ptr) { 620; GFX9-LABEL: s_load_constant_v3i32_align4: 621; GFX9: ; %bb.0: 622; GFX9-NEXT: s_mov_b32 s2, s0 623; GFX9-NEXT: s_mov_b32 s3, s1 624; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 625; GFX9-NEXT: s_load_dword s2, s[2:3], 0x8 626; GFX9-NEXT: s_waitcnt lgkmcnt(0) 627; GFX9-NEXT: ; return to shader part epilog 628; 629; GFX7-LABEL: s_load_constant_v3i32_align4: 630; GFX7: ; %bb.0: 631; GFX7-NEXT: s_mov_b32 s2, s0 632; GFX7-NEXT: s_mov_b32 s3, s1 633; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 634; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 635; GFX7-NEXT: s_waitcnt lgkmcnt(0) 636; GFX7-NEXT: ; return to shader part epilog 637 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4 638 ret <3 x i32> %load 639} 640 641define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) { 642; GFX9-LABEL: s_load_constant_i96_align8: 643; GFX9: ; %bb.0: 644; GFX9-NEXT: s_mov_b32 s2, s0 645; GFX9-NEXT: s_mov_b32 s3, s1 646; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 647; GFX9-NEXT: s_load_dword s2, s[2:3], 0x8 648; GFX9-NEXT: s_waitcnt lgkmcnt(0) 649; GFX9-NEXT: ; return to shader part epilog 650; 651; GFX7-LABEL: s_load_constant_i96_align8: 652; GFX7: ; %bb.0: 653; GFX7-NEXT: s_mov_b32 s2, s0 654; GFX7-NEXT: s_mov_b32 s3, s1 655; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 656; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 657; GFX7-NEXT: s_waitcnt lgkmcnt(0) 658; GFX7-NEXT: ; return to shader part epilog 659 %load = load i96, i96 addrspace(4)* %ptr, align 8 660 ret i96 %load 661} 662 663define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)* inreg %ptr) { 664; GFX9-LABEL: s_load_constant_v3i32_align8: 665; GFX9: ; %bb.0: 666; GFX9-NEXT: s_mov_b32 s2, s0 667; GFX9-NEXT: s_mov_b32 s3, s1 668; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 669; GFX9-NEXT: s_load_dword s2, s[2:3], 0x8 670; GFX9-NEXT: s_waitcnt lgkmcnt(0) 671; GFX9-NEXT: ; return to shader part epilog 672; 673; GFX7-LABEL: s_load_constant_v3i32_align8: 674; GFX7: ; %bb.0: 675; GFX7-NEXT: s_mov_b32 s2, s0 676; GFX7-NEXT: s_mov_b32 s3, s1 677; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 678; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 679; GFX7-NEXT: s_waitcnt lgkmcnt(0) 680; GFX7-NEXT: ; return to shader part epilog 681 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 8 682 ret <3 x i32> %load 683} 684 685define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(<6 x i16> addrspace(4)* inreg %ptr) { 686; GFX9-LABEL: s_load_constant_v6i16_align8: 687; GFX9: ; %bb.0: 688; GFX9-NEXT: s_mov_b32 s2, s0 689; GFX9-NEXT: s_mov_b32 s3, s1 690; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 691; GFX9-NEXT: s_load_dword s2, s[2:3], 0x8 692; GFX9-NEXT: s_waitcnt lgkmcnt(0) 693; GFX9-NEXT: ; return to shader part epilog 694; 695; GFX7-LABEL: s_load_constant_v6i16_align8: 696; GFX7: ; %bb.0: 697; GFX7-NEXT: s_mov_b32 s2, s0 698; GFX7-NEXT: s_mov_b32 s3, s1 699; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 700; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 701; GFX7-NEXT: s_waitcnt lgkmcnt(0) 702; GFX7-NEXT: ; return to shader part epilog 703 %load = load <6 x i16>, <6 x i16> addrspace(4)* %ptr, align 8 704 %cast = bitcast <6 x i16> %load to <3 x i32> 705 ret <3 x i32> %cast 706} 707 708define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(<12 x i8> addrspace(4)* inreg %ptr) { 709; GFX9-LABEL: s_load_constant_v12i8_align8: 710; GFX9: ; %bb.0: 711; GFX9-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0 712; GFX9-NEXT: s_load_dword s8, s[0:1], 0x8 713; GFX9-NEXT: s_waitcnt lgkmcnt(0) 714; GFX9-NEXT: s_lshr_b32 s1, s12, 8 715; GFX9-NEXT: s_lshr_b32 s2, s12, 16 716; GFX9-NEXT: s_lshr_b32 s3, s12, 24 717; GFX9-NEXT: s_lshr_b32 s5, s13, 8 718; GFX9-NEXT: s_lshr_b32 s6, s13, 16 719; GFX9-NEXT: s_lshr_b32 s7, s13, 24 720; GFX9-NEXT: s_lshr_b32 s9, s8, 8 721; GFX9-NEXT: s_lshr_b32 s10, s8, 16 722; GFX9-NEXT: s_lshr_b32 s11, s8, 24 723; GFX9-NEXT: s_mov_b32 s0, s12 724; GFX9-NEXT: s_mov_b32 s4, s13 725; GFX9-NEXT: ; return to shader part epilog 726; 727; GFX7-LABEL: s_load_constant_v12i8_align8: 728; GFX7: ; %bb.0: 729; GFX7-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0 730; GFX7-NEXT: s_load_dword s8, s[0:1], 0x2 731; GFX7-NEXT: s_waitcnt lgkmcnt(0) 732; GFX7-NEXT: s_lshr_b32 s1, s12, 8 733; GFX7-NEXT: s_lshr_b32 s2, s12, 16 734; GFX7-NEXT: s_lshr_b32 s3, s12, 24 735; GFX7-NEXT: s_lshr_b32 s5, s13, 8 736; GFX7-NEXT: s_lshr_b32 s6, s13, 16 737; GFX7-NEXT: s_lshr_b32 s7, s13, 24 738; GFX7-NEXT: s_lshr_b32 s9, s8, 8 739; GFX7-NEXT: s_lshr_b32 s10, s8, 16 740; GFX7-NEXT: s_lshr_b32 s11, s8, 24 741; GFX7-NEXT: s_mov_b32 s0, s12 742; GFX7-NEXT: s_mov_b32 s4, s13 743; GFX7-NEXT: ; return to shader part epilog 744 %load = load <12 x i8>, <12 x i8> addrspace(4)* %ptr, align 8 745 ret <12 x i8> %load 746} 747 748define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align16(<3 x i32> addrspace(4)* inreg %ptr) { 749; GCN-LABEL: s_load_constant_v3i32_align16: 750; GCN: ; %bb.0: 751; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 752; GCN-NEXT: s_waitcnt lgkmcnt(0) 753; GCN-NEXT: ; return to shader part epilog 754 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 16 755 ret <3 x i32> %load 756} 757