1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-SI,FUNC %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN-HSA %s 4; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN-NOHSA,GCN-NOHSA-VI %s 5; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,EG %s 6 7define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(4)* %in) { 8; GCN-NOHSA-SI-LABEL: constant_load_i16: 9; GCN-NOHSA-SI: ; %bb.0: ; %entry 10; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 11; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 12; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 13; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 14; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 15; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 16; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 17; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 18; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 19; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 20; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 21; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 22; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[4:7], 0 23; GCN-NOHSA-SI-NEXT: s_endpgm 24; 25; GCN-HSA-LABEL: constant_load_i16: 26; GCN-HSA: ; %bb.0: ; %entry 27; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 28; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 29; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 30; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 31; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3] 32; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 33; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 34; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 35; GCN-HSA-NEXT: flat_store_short v[0:1], v2 36; GCN-HSA-NEXT: s_endpgm 37; 38; GCN-NOHSA-VI-LABEL: constant_load_i16: 39; GCN-NOHSA-VI: ; %bb.0: ; %entry 40; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 41; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 42; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 43; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 44; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 45; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 46; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 47; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 48; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 49; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 50; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 51; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 52; GCN-NOHSA-VI-NEXT: buffer_store_short v0, off, s[0:3], 0 53; GCN-NOHSA-VI-NEXT: s_endpgm 54; 55; EG-LABEL: constant_load_i16: 56; EG: ; %bb.0: ; %entry 57; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 58; EG-NEXT: TEX 0 @6 59; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 60; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 61; EG-NEXT: CF_END 62; EG-NEXT: PAD 63; EG-NEXT: Fetch clause starting at 6: 64; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 65; EG-NEXT: ALU clause starting at 8: 66; EG-NEXT: MOV * T0.X, KC0[2].Z, 67; EG-NEXT: ALU clause starting at 9: 68; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 69; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 70; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 71; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 72; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 73; EG-NEXT: LSHL T0.X, T1.W, PV.W, 74; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 75; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 76; EG-NEXT: MOV T0.Y, 0.0, 77; EG-NEXT: MOV * T0.Z, 0.0, 78; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 79; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 80entry: 81 %ld = load i16, i16 addrspace(4)* %in 82 store i16 %ld, i16 addrspace(1)* %out 83 ret void 84} 85 86define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) { 87; GCN-NOHSA-SI-LABEL: constant_load_v2i16: 88; GCN-NOHSA-SI: ; %bb.0: ; %entry 89; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 90; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 91; GCN-NOHSA-SI-NEXT: s_load_dword s4, s[2:3], 0x0 92; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 93; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 94; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 95; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 96; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 97; GCN-NOHSA-SI-NEXT: s_endpgm 98; 99; GCN-HSA-LABEL: constant_load_v2i16: 100; GCN-HSA: ; %bb.0: ; %entry 101; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 102; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 103; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 104; GCN-HSA-NEXT: s_load_dword s0, s[2:3], 0x0 105; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 106; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 107; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 108; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 109; GCN-HSA-NEXT: s_endpgm 110; 111; GCN-NOHSA-VI-LABEL: constant_load_v2i16: 112; GCN-NOHSA-VI: ; %bb.0: ; %entry 113; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 114; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 115; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 116; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 117; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 118; GCN-NOHSA-VI-NEXT: s_load_dword s0, s[2:3], 0x0 119; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 120; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 121; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 122; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 123; GCN-NOHSA-VI-NEXT: s_endpgm 124; 125; EG-LABEL: constant_load_v2i16: 126; EG: ; %bb.0: ; %entry 127; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 128; EG-NEXT: TEX 0 @6 129; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 130; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 131; EG-NEXT: CF_END 132; EG-NEXT: PAD 133; EG-NEXT: Fetch clause starting at 6: 134; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 135; EG-NEXT: ALU clause starting at 8: 136; EG-NEXT: MOV * T0.X, KC0[2].Z, 137; EG-NEXT: ALU clause starting at 9: 138; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 139; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 140entry: 141 %ld = load <2 x i16>, <2 x i16> addrspace(4)* %in 142 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out 143 ret void 144} 145 146define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) { 147; GCN-NOHSA-SI-LABEL: constant_load_v3i16: 148; GCN-NOHSA-SI: ; %bb.0: ; %entry 149; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 150; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 151; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 152; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 153; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 154; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 155; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 156; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:4 157; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 158; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 159; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 160; GCN-NOHSA-SI-NEXT: s_endpgm 161; 162; GCN-HSA-LABEL: constant_load_v3i16: 163; GCN-HSA: ; %bb.0: ; %entry 164; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 165; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 166; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 167; GCN-HSA-NEXT: s_add_u32 s4, s0, 4 168; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 169; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 170; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 171; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 172; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 173; GCN-HSA-NEXT: v_mov_b32_e32 v4, s3 174; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 175; GCN-HSA-NEXT: v_mov_b32_e32 v5, s2 176; GCN-HSA-NEXT: flat_store_short v[2:3], v4 177; GCN-HSA-NEXT: flat_store_dword v[0:1], v5 178; GCN-HSA-NEXT: s_endpgm 179; 180; GCN-NOHSA-VI-LABEL: constant_load_v3i16: 181; GCN-NOHSA-VI: ; %bb.0: ; %entry 182; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 183; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 184; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 185; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 186; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 187; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 188; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 189; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 190; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s3 191; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s2 192; GCN-NOHSA-VI-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4 193; GCN-NOHSA-VI-NEXT: buffer_store_dword v1, off, s[4:7], 0 194; GCN-NOHSA-VI-NEXT: s_endpgm 195; 196; EG-LABEL: constant_load_v3i16: 197; EG: ; %bb.0: ; %entry 198; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 199; EG-NEXT: TEX 2 @6 200; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[] 201; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0 202; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X 203; EG-NEXT: CF_END 204; EG-NEXT: Fetch clause starting at 6: 205; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1 206; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1 207; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1 208; EG-NEXT: ALU clause starting at 12: 209; EG-NEXT: MOV * T5.X, KC0[2].Z, 210; EG-NEXT: ALU clause starting at 13: 211; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 212; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 213; EG-NEXT: AND_INT T1.W, PV.W, literal.x, 214; EG-NEXT: AND_INT * T2.W, T5.X, literal.y, 215; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 216; EG-NEXT: LSHL * T1.W, PV.W, literal.x, 217; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 218; EG-NEXT: LSHL T5.X, T2.W, PV.W, 219; EG-NEXT: LSHL * T5.W, literal.x, PV.W, 220; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 221; EG-NEXT: MOV T5.Y, 0.0, 222; EG-NEXT: MOV * T5.Z, 0.0, 223; EG-NEXT: LSHR T8.X, T0.W, literal.x, 224; EG-NEXT: LSHL T0.W, T7.X, literal.y, 225; EG-NEXT: AND_INT * T1.W, T6.X, literal.z, 226; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 227; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 228; EG-NEXT: OR_INT T6.X, PV.W, PS, 229; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, 230; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 231entry: 232 %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in 233 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out 234 ret void 235} 236 237define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) { 238; GCN-NOHSA-SI-LABEL: constant_load_v4i16: 239; GCN-NOHSA-SI: ; %bb.0: ; %entry 240; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 241; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 242; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 243; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 244; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 245; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 246; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 247; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 248; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 249; GCN-NOHSA-SI-NEXT: s_endpgm 250; 251; GCN-HSA-LABEL: constant_load_v4i16: 252; GCN-HSA: ; %bb.0: ; %entry 253; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 254; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 255; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 256; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 257; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 258; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 259; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 260; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 261; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 262; GCN-HSA-NEXT: s_endpgm 263; 264; GCN-NOHSA-VI-LABEL: constant_load_v4i16: 265; GCN-NOHSA-VI: ; %bb.0: ; %entry 266; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 267; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 268; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 269; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 270; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 271; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 272; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 273; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 274; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 275; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 276; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 277; GCN-NOHSA-VI-NEXT: s_endpgm 278; 279; EG-LABEL: constant_load_v4i16: 280; EG: ; %bb.0: ; %entry 281; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 282; EG-NEXT: TEX 0 @6 283; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 284; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 285; EG-NEXT: CF_END 286; EG-NEXT: PAD 287; EG-NEXT: Fetch clause starting at 6: 288; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 289; EG-NEXT: ALU clause starting at 8: 290; EG-NEXT: MOV * T0.X, KC0[2].Z, 291; EG-NEXT: ALU clause starting at 9: 292; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 293; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 294entry: 295 %ld = load <4 x i16>, <4 x i16> addrspace(4)* %in 296 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out 297 ret void 298} 299 300define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) { 301; GCN-NOHSA-SI-LABEL: constant_load_v8i16: 302; GCN-NOHSA-SI: ; %bb.0: ; %entry 303; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 304; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 305; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 306; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 307; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 308; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 309; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 310; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 311; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 312; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 313; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 314; GCN-NOHSA-SI-NEXT: s_endpgm 315; 316; GCN-HSA-LABEL: constant_load_v8i16: 317; GCN-HSA: ; %bb.0: ; %entry 318; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 319; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 320; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 321; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 322; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 323; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 324; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 325; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 326; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 327; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 328; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 329; GCN-HSA-NEXT: s_endpgm 330; 331; GCN-NOHSA-VI-LABEL: constant_load_v8i16: 332; GCN-NOHSA-VI: ; %bb.0: ; %entry 333; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 334; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 335; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 336; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 337; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 338; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 339; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 340; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 341; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 342; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 343; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 344; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s3 345; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 346; GCN-NOHSA-VI-NEXT: s_endpgm 347; 348; EG-LABEL: constant_load_v8i16: 349; EG: ; %bb.0: ; %entry 350; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 351; EG-NEXT: TEX 0 @6 352; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 353; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 354; EG-NEXT: CF_END 355; EG-NEXT: PAD 356; EG-NEXT: Fetch clause starting at 6: 357; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 358; EG-NEXT: ALU clause starting at 8: 359; EG-NEXT: MOV * T0.X, KC0[2].Z, 360; EG-NEXT: ALU clause starting at 9: 361; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 362; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 363entry: 364 %ld = load <8 x i16>, <8 x i16> addrspace(4)* %in 365 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out 366 ret void 367} 368 369define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) { 370; GCN-NOHSA-SI-LABEL: constant_load_v16i16: 371; GCN-NOHSA-SI: ; %bb.0: ; %entry 372; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 373; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 374; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 375; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000 376; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1 377; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 378; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 379; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 380; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 381; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 382; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 383; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 384; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s0 385; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s1 386; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s2 387; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s3 388; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 389; GCN-NOHSA-SI-NEXT: s_endpgm 390; 391; GCN-HSA-LABEL: constant_load_v16i16: 392; GCN-HSA: ; %bb.0: ; %entry 393; GCN-HSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 394; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 395; GCN-HSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 396; GCN-HSA-NEXT: s_add_u32 s10, s8, 16 397; GCN-HSA-NEXT: s_addc_u32 s11, s9, 0 398; GCN-HSA-NEXT: v_mov_b32_e32 v6, s10 399; GCN-HSA-NEXT: v_mov_b32_e32 v7, s11 400; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 401; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 402; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 403; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 404; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 405; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 406; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 407; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 408; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 409; GCN-HSA-NEXT: v_mov_b32_e32 v6, s2 410; GCN-HSA-NEXT: v_mov_b32_e32 v7, s3 411; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 412; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 413; GCN-HSA-NEXT: s_endpgm 414; 415; GCN-NOHSA-VI-LABEL: constant_load_v16i16: 416; GCN-NOHSA-VI: ; %bb.0: ; %entry 417; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 418; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, 0xf000 419; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, -1 420; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 421; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s0 422; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s1 423; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[0:7], s[2:3], 0x0 424; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 425; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 426; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 427; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 428; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 429; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 430; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 431; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s2 432; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s3 433; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 434; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 435; GCN-NOHSA-VI-NEXT: s_endpgm 436; 437; EG-LABEL: constant_load_v16i16: 438; EG: ; %bb.0: ; %entry 439; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 440; EG-NEXT: TEX 0 @8 441; EG-NEXT: ALU 3, @13, KC0[CB0:0-32], KC1[] 442; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 443; EG-NEXT: ALU 1, @17, KC0[CB0:0-32], KC1[] 444; EG-NEXT: TEX 0 @10 445; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 446; EG-NEXT: CF_END 447; EG-NEXT: Fetch clause starting at 8: 448; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 449; EG-NEXT: Fetch clause starting at 10: 450; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 451; EG-NEXT: ALU clause starting at 12: 452; EG-NEXT: MOV * T0.X, KC0[2].Z, 453; EG-NEXT: ALU clause starting at 13: 454; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 455; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 456; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 457; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 458; EG-NEXT: ALU clause starting at 17: 459; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 460; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 461entry: 462 %ld = load <16 x i16>, <16 x i16> addrspace(4)* %in 463 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out 464 ret void 465} 466 467define amdgpu_kernel void @constant_load_v16i16_align2(<16 x i16> addrspace(4)* %ptr0) #0 { 468; GCN-NOHSA-SI-LABEL: constant_load_v16i16_align2: 469; GCN-NOHSA-SI: ; %bb.0: ; %entry 470; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 471; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 472; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 473; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 474; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 475; GCN-NOHSA-SI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:2 476; GCN-NOHSA-SI-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:4 477; GCN-NOHSA-SI-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:6 478; GCN-NOHSA-SI-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8 479; GCN-NOHSA-SI-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:10 480; GCN-NOHSA-SI-NEXT: buffer_load_ushort v6, off, s[0:3], 0 offset:12 481; GCN-NOHSA-SI-NEXT: buffer_load_ushort v7, off, s[0:3], 0 offset:14 482; GCN-NOHSA-SI-NEXT: buffer_load_ushort v8, off, s[0:3], 0 offset:16 483; GCN-NOHSA-SI-NEXT: buffer_load_ushort v9, off, s[0:3], 0 offset:18 484; GCN-NOHSA-SI-NEXT: buffer_load_ushort v10, off, s[0:3], 0 offset:20 485; GCN-NOHSA-SI-NEXT: buffer_load_ushort v11, off, s[0:3], 0 offset:22 486; GCN-NOHSA-SI-NEXT: buffer_load_ushort v12, off, s[0:3], 0 offset:24 487; GCN-NOHSA-SI-NEXT: buffer_load_ushort v13, off, s[0:3], 0 offset:26 488; GCN-NOHSA-SI-NEXT: buffer_load_ushort v14, off, s[0:3], 0 offset:28 489; GCN-NOHSA-SI-NEXT: buffer_load_ushort v15, off, s[0:3], 0 offset:30 490; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(8) 491; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 492; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v16, 16, v3 493; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v17, 16, v2 494; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v18, 16, v1 495; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 496; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v15, 16, v15 497; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v13, 16, v13 498; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v11, 16, v11 499; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v9, 16, v9 500; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v3, v7, v6 501; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v2, v16, v5 502; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v1, v17, v4 503; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v0, v18, v0 504; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v7, v15, v14 505; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v6, v13, v12 506; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v5, v11, v10 507; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v4, v9, v8 508; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 509; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 510; GCN-NOHSA-SI-NEXT: s_endpgm 511; 512; GCN-HSA-LABEL: constant_load_v16i16_align2: 513; GCN-HSA: ; %bb.0: ; %entry 514; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 515; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 516; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 517; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 518; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 519; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 520; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 521; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 522; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 523; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 524; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 525; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 526; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[0:3] 527; GCN-HSA-NEXT: s_endpgm 528; 529; GCN-NOHSA-VI-LABEL: constant_load_v16i16_align2: 530; GCN-NOHSA-VI: ; %bb.0: ; %entry 531; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 532; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 533; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 534; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 535; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 536; GCN-NOHSA-VI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:2 537; GCN-NOHSA-VI-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:4 538; GCN-NOHSA-VI-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:6 539; GCN-NOHSA-VI-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8 540; GCN-NOHSA-VI-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:10 541; GCN-NOHSA-VI-NEXT: buffer_load_ushort v6, off, s[0:3], 0 offset:12 542; GCN-NOHSA-VI-NEXT: buffer_load_ushort v7, off, s[0:3], 0 offset:14 543; GCN-NOHSA-VI-NEXT: buffer_load_ushort v8, off, s[0:3], 0 offset:16 544; GCN-NOHSA-VI-NEXT: buffer_load_ushort v9, off, s[0:3], 0 offset:18 545; GCN-NOHSA-VI-NEXT: buffer_load_ushort v10, off, s[0:3], 0 offset:20 546; GCN-NOHSA-VI-NEXT: buffer_load_ushort v11, off, s[0:3], 0 offset:22 547; GCN-NOHSA-VI-NEXT: buffer_load_ushort v12, off, s[0:3], 0 offset:24 548; GCN-NOHSA-VI-NEXT: buffer_load_ushort v13, off, s[0:3], 0 offset:26 549; GCN-NOHSA-VI-NEXT: buffer_load_ushort v14, off, s[0:3], 0 offset:28 550; GCN-NOHSA-VI-NEXT: buffer_load_ushort v15, off, s[0:3], 0 offset:30 551; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(14) 552; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v18, 16, v1 553; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v0, v18, v0 554; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(12) 555; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v17, 16, v2 556; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v1, v17, v4 557; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(10) 558; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v16, 16, v3 559; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v2, v16, v5 560; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8) 561; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 562; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v3, v7, v6 563; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(6) 564; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v9, 16, v9 565; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v4, v9, v8 566; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4) 567; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v11, 16, v11 568; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v5, v11, v10 569; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2) 570; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v13, 16, v13 571; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v6, v13, v12 572; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 573; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v15, 16, v15 574; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v7, v15, v14 575; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 576; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 577; GCN-NOHSA-VI-NEXT: s_endpgm 578; 579; EG-LABEL: constant_load_v16i16_align2: 580; EG: ; %bb.0: ; %entry 581; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 582; EG-NEXT: TEX 0 @8 583; EG-NEXT: ALU 1, @13, KC0[], KC1[] 584; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 585; EG-NEXT: TEX 0 @10 586; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1 587; EG-NEXT: CF_END 588; EG-NEXT: PAD 589; EG-NEXT: Fetch clause starting at 8: 590; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 591; EG-NEXT: Fetch clause starting at 10: 592; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 593; EG-NEXT: ALU clause starting at 12: 594; EG-NEXT: MOV * T0.X, KC0[2].Y, 595; EG-NEXT: ALU clause starting at 13: 596; EG-NEXT: MOV * T2.X, literal.x, 597; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00) 598entry: 599 %ld = load <16 x i16>, <16 x i16> addrspace(4)* %ptr0, align 2 600 store <16 x i16> %ld, <16 x i16> addrspace(1)* undef, align 32 601 ret void 602} 603 604define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 605; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i32: 606; GCN-NOHSA-SI: ; %bb.0: 607; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 608; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 609; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 610; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 611; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 612; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 613; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 614; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 615; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 616; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 617; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 618; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 619; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 620; GCN-NOHSA-SI-NEXT: s_endpgm 621; 622; GCN-HSA-LABEL: constant_zextload_i16_to_i32: 623; GCN-HSA: ; %bb.0: 624; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 625; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 626; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 627; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 628; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3] 629; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 630; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 631; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 632; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 633; GCN-HSA-NEXT: s_endpgm 634; 635; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i32: 636; GCN-NOHSA-VI: ; %bb.0: 637; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 638; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 639; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 640; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 641; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 642; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 643; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 644; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 645; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 646; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 647; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 648; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 649; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 650; GCN-NOHSA-VI-NEXT: s_endpgm 651; 652; EG-LABEL: constant_zextload_i16_to_i32: 653; EG: ; %bb.0: 654; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 655; EG-NEXT: TEX 0 @6 656; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 657; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 658; EG-NEXT: CF_END 659; EG-NEXT: PAD 660; EG-NEXT: Fetch clause starting at 6: 661; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 662; EG-NEXT: ALU clause starting at 8: 663; EG-NEXT: MOV * T0.X, KC0[2].Z, 664; EG-NEXT: ALU clause starting at 9: 665; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 666; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 667 %a = load i16, i16 addrspace(4)* %in 668 %ext = zext i16 %a to i32 669 store i32 %ext, i32 addrspace(1)* %out 670 ret void 671} 672 673define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 674; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i32: 675; GCN-NOHSA-SI: ; %bb.0: 676; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 677; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 678; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 679; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 680; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 681; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 682; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 683; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 684; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 685; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 686; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 687; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 688; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 689; GCN-NOHSA-SI-NEXT: s_endpgm 690; 691; GCN-HSA-LABEL: constant_sextload_i16_to_i32: 692; GCN-HSA: ; %bb.0: 693; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 694; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 695; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 696; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 697; GCN-HSA-NEXT: flat_load_sshort v2, v[2:3] 698; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 699; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 700; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 701; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 702; GCN-HSA-NEXT: s_endpgm 703; 704; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i32: 705; GCN-NOHSA-VI: ; %bb.0: 706; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 707; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 708; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 709; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 710; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 711; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 712; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 713; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 714; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 715; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 716; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[4:7], 0 717; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 718; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 719; GCN-NOHSA-VI-NEXT: s_endpgm 720; 721; EG-LABEL: constant_sextload_i16_to_i32: 722; EG: ; %bb.0: 723; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 724; EG-NEXT: TEX 0 @6 725; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 726; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 727; EG-NEXT: CF_END 728; EG-NEXT: PAD 729; EG-NEXT: Fetch clause starting at 6: 730; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 731; EG-NEXT: ALU clause starting at 8: 732; EG-NEXT: MOV * T0.X, KC0[2].Z, 733; EG-NEXT: ALU clause starting at 9: 734; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 735; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 736; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 737 %a = load i16, i16 addrspace(4)* %in 738 %ext = sext i16 %a to i32 739 store i32 %ext, i32 addrspace(1)* %out 740 ret void 741} 742 743define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 { 744; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i32: 745; GCN-NOHSA-SI: ; %bb.0: 746; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 747; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 748; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 749; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 750; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 751; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 752; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 753; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 754; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 755; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 756; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 757; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 758; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 759; GCN-NOHSA-SI-NEXT: s_endpgm 760; 761; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i32: 762; GCN-HSA: ; %bb.0: 763; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 764; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 765; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 766; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 767; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3] 768; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 769; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 770; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 771; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 772; GCN-HSA-NEXT: s_endpgm 773; 774; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i32: 775; GCN-NOHSA-VI: ; %bb.0: 776; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 777; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 778; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 779; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 780; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 781; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 782; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 783; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 784; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 785; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 786; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 787; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 788; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 789; GCN-NOHSA-VI-NEXT: s_endpgm 790; 791; EG-LABEL: constant_zextload_v1i16_to_v1i32: 792; EG: ; %bb.0: 793; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 794; EG-NEXT: TEX 0 @6 795; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 796; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 797; EG-NEXT: CF_END 798; EG-NEXT: PAD 799; EG-NEXT: Fetch clause starting at 6: 800; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 801; EG-NEXT: ALU clause starting at 8: 802; EG-NEXT: MOV * T0.X, KC0[2].Z, 803; EG-NEXT: ALU clause starting at 9: 804; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 805; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 806 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in 807 %ext = zext <1 x i16> %load to <1 x i32> 808 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 809 ret void 810} 811 812define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 { 813; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i32: 814; GCN-NOHSA-SI: ; %bb.0: 815; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 816; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 817; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 818; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 819; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 820; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 821; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 822; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 823; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 824; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 825; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 826; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 827; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 828; GCN-NOHSA-SI-NEXT: s_endpgm 829; 830; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i32: 831; GCN-HSA: ; %bb.0: 832; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 833; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 834; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 835; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 836; GCN-HSA-NEXT: flat_load_sshort v2, v[2:3] 837; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 838; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 839; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 840; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 841; GCN-HSA-NEXT: s_endpgm 842; 843; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i32: 844; GCN-NOHSA-VI: ; %bb.0: 845; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 846; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 847; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 848; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 849; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 850; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 851; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 852; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 853; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 854; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 855; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[4:7], 0 856; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 857; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 858; GCN-NOHSA-VI-NEXT: s_endpgm 859; 860; EG-LABEL: constant_sextload_v1i16_to_v1i32: 861; EG: ; %bb.0: 862; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 863; EG-NEXT: TEX 0 @6 864; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 865; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 866; EG-NEXT: CF_END 867; EG-NEXT: PAD 868; EG-NEXT: Fetch clause starting at 6: 869; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 870; EG-NEXT: ALU clause starting at 8: 871; EG-NEXT: MOV * T0.X, KC0[2].Z, 872; EG-NEXT: ALU clause starting at 9: 873; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 874; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 875; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 876 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in 877 %ext = sext <1 x i16> %load to <1 x i32> 878 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 879 ret void 880} 881 882define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 { 883; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i32: 884; GCN-NOHSA-SI: ; %bb.0: 885; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 886; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 887; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 888; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 889; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 890; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16 891; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s2, 0xffff 892; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 893; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 894; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s4 895; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 896; GCN-NOHSA-SI-NEXT: s_endpgm 897; 898; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i32: 899; GCN-HSA: ; %bb.0: 900; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 901; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 902; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 903; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 904; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 905; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 906; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 907; GCN-HSA-NEXT: s_and_b32 s1, s2, 0xffff 908; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 909; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 910; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 911; GCN-HSA-NEXT: s_endpgm 912; 913; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i32: 914; GCN-NOHSA-VI: ; %bb.0: 915; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 916; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 917; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 918; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 919; GCN-NOHSA-VI-NEXT: s_load_dword s2, s[2:3], 0x0 920; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 921; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 922; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 923; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s2, 16 924; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s2, 0xffff 925; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s1 926; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s0 927; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 928; GCN-NOHSA-VI-NEXT: s_endpgm 929; 930; EG-LABEL: constant_zextload_v2i16_to_v2i32: 931; EG: ; %bb.0: 932; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 933; EG-NEXT: TEX 0 @6 934; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 935; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1 936; EG-NEXT: CF_END 937; EG-NEXT: PAD 938; EG-NEXT: Fetch clause starting at 6: 939; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 940; EG-NEXT: ALU clause starting at 8: 941; EG-NEXT: MOV * T4.X, KC0[2].Z, 942; EG-NEXT: ALU clause starting at 9: 943; EG-NEXT: LSHR * T4.Y, T4.X, literal.x, 944; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 945; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 946; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 947; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 948 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in 949 %ext = zext <2 x i16> %load to <2 x i32> 950 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 951 ret void 952} 953 954; TODO: We should use ASHR instead of LSHR + BFE 955define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 { 956; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i32: 957; GCN-NOHSA-SI: ; %bb.0: 958; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 959; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 960; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 961; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 962; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 963; GCN-NOHSA-SI-NEXT: s_ashr_i32 s4, s2, 16 964; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s2 965; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 966; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 967; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s4 968; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 969; GCN-NOHSA-SI-NEXT: s_endpgm 970; 971; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i32: 972; GCN-HSA: ; %bb.0: 973; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 974; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 975; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 976; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 977; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 978; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 979; GCN-HSA-NEXT: s_ashr_i32 s0, s2, 16 980; GCN-HSA-NEXT: s_sext_i32_i16 s1, s2 981; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 982; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 983; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 984; GCN-HSA-NEXT: s_endpgm 985; 986; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i32: 987; GCN-NOHSA-VI: ; %bb.0: 988; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 989; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 990; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 991; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 992; GCN-NOHSA-VI-NEXT: s_load_dword s2, s[2:3], 0x0 993; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 994; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 995; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 996; GCN-NOHSA-VI-NEXT: s_ashr_i32 s0, s2, 16 997; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s1, s2 998; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s1 999; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s0 1000; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1001; GCN-NOHSA-VI-NEXT: s_endpgm 1002; 1003; EG-LABEL: constant_sextload_v2i16_to_v2i32: 1004; EG: ; %bb.0: 1005; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1006; EG-NEXT: TEX 0 @6 1007; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1008; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1 1009; EG-NEXT: CF_END 1010; EG-NEXT: PAD 1011; EG-NEXT: Fetch clause starting at 6: 1012; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1013; EG-NEXT: ALU clause starting at 8: 1014; EG-NEXT: MOV * T4.X, KC0[2].Z, 1015; EG-NEXT: ALU clause starting at 9: 1016; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x, 1017; EG-NEXT: LSHR T0.W, T4.X, literal.x, 1018; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y, 1019; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1020; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x, 1021; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1022 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in 1023 %ext = sext <2 x i16> %load to <2 x i32> 1024 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 1025 ret void 1026} 1027 1028define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) { 1029; GCN-NOHSA-SI-LABEL: constant_zextload_v3i16_to_v3i32: 1030; GCN-NOHSA-SI: ; %bb.0: ; %entry 1031; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1032; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1033; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1034; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1035; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1036; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, 0xffff 1037; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1038; GCN-NOHSA-SI-NEXT: s_lshr_b32 s7, s4, 16 1039; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s6 1040; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s6 1041; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 1042; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 1043; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1044; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1045; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 1046; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1047; GCN-NOHSA-SI-NEXT: s_endpgm 1048; 1049; GCN-HSA-LABEL: constant_zextload_v3i16_to_v3i32: 1050; GCN-HSA: ; %bb.0: ; %entry 1051; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1052; GCN-HSA-NEXT: s_mov_b32 s6, 0xffff 1053; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1054; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1055; GCN-HSA-NEXT: v_mov_b32_e32 v4, s1 1056; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1057; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1058; GCN-HSA-NEXT: s_lshr_b32 s2, s0, 16 1059; GCN-HSA-NEXT: s_and_b32 s1, s1, s6 1060; GCN-HSA-NEXT: s_and_b32 s0, s0, s6 1061; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1062; GCN-HSA-NEXT: v_mov_b32_e32 v1, s2 1063; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1064; GCN-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1065; GCN-HSA-NEXT: s_endpgm 1066; 1067; GCN-NOHSA-VI-LABEL: constant_zextload_v3i16_to_v3i32: 1068; GCN-NOHSA-VI: ; %bb.0: ; %entry 1069; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1070; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff 1071; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1072; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1073; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1074; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1075; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1076; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 1077; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1078; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s4, 16 1079; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s8 1080; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s8 1081; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1082; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s6 1083; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1084; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 1085; GCN-NOHSA-VI-NEXT: s_endpgm 1086; 1087; EG-LABEL: constant_zextload_v3i16_to_v3i32: 1088; EG: ; %bb.0: ; %entry 1089; EG-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[] 1090; EG-NEXT: TEX 2 @6 1091; EG-NEXT: ALU 2, @17, KC0[], KC1[] 1092; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0 1093; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1 1094; EG-NEXT: CF_END 1095; EG-NEXT: Fetch clause starting at 6: 1096; EG-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1 1097; EG-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1 1098; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1 1099; EG-NEXT: ALU clause starting at 12: 1100; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 1101; EG-NEXT: MOV * T1.X, KC0[2].Z, 1102; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1103; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1104; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1105; EG-NEXT: ALU clause starting at 17: 1106; EG-NEXT: LSHR T4.X, T0.W, literal.x, 1107; EG-NEXT: MOV * T3.Y, T1.X, 1108; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1109entry: 1110 %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in 1111 %ext = zext <3 x i16> %ld to <3 x i32> 1112 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 1113 ret void 1114} 1115 1116define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) { 1117; GCN-NOHSA-SI-LABEL: constant_sextload_v3i16_to_v3i32: 1118; GCN-NOHSA-SI: ; %bb.0: ; %entry 1119; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1120; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1121; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1122; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1123; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1124; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1125; GCN-NOHSA-SI-NEXT: s_ashr_i32 s6, s4, 16 1126; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1127; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1128; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 1129; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 1130; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1131; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1132; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s6 1133; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1134; GCN-NOHSA-SI-NEXT: s_endpgm 1135; 1136; GCN-HSA-LABEL: constant_sextload_v3i16_to_v3i32: 1137; GCN-HSA: ; %bb.0: ; %entry 1138; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1139; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1140; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1141; GCN-HSA-NEXT: v_mov_b32_e32 v4, s1 1142; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1143; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1144; GCN-HSA-NEXT: s_ashr_i32 s2, s0, 16 1145; GCN-HSA-NEXT: s_sext_i32_i16 s1, s1 1146; GCN-HSA-NEXT: s_sext_i32_i16 s0, s0 1147; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1148; GCN-HSA-NEXT: v_mov_b32_e32 v1, s2 1149; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1150; GCN-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1151; GCN-HSA-NEXT: s_endpgm 1152; 1153; GCN-NOHSA-VI-LABEL: constant_sextload_v3i16_to_v3i32: 1154; GCN-NOHSA-VI: ; %bb.0: ; %entry 1155; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1156; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1157; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1158; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1159; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1160; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1161; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 1162; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1163; GCN-NOHSA-VI-NEXT: s_ashr_i32 s6, s4, 16 1164; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 1165; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 1166; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1167; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s6 1168; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1169; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 1170; GCN-NOHSA-VI-NEXT: s_endpgm 1171; 1172; EG-LABEL: constant_sextload_v3i16_to_v3i32: 1173; EG: ; %bb.0: ; %entry 1174; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1175; EG-NEXT: TEX 2 @6 1176; EG-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[] 1177; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 1178; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1179; EG-NEXT: CF_END 1180; EG-NEXT: Fetch clause starting at 6: 1181; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1 1182; EG-NEXT: VTX_READ_16 T2.X, T0.X, 4, #1 1183; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1184; EG-NEXT: ALU clause starting at 12: 1185; EG-NEXT: MOV * T0.X, KC0[2].Z, 1186; EG-NEXT: ALU clause starting at 13: 1187; EG-NEXT: BFE_INT * T0.Y, T1.X, 0.0, literal.x, 1188; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1189; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 1190; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 1191; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1192; EG-NEXT: BFE_INT T2.X, T2.X, 0.0, literal.x, 1193; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1194; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) 1195; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 1196; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1197entry: 1198 %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in 1199 %ext = sext <3 x i16> %ld to <3 x i32> 1200 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 1201 ret void 1202} 1203 1204; v4i16 is naturally 8 byte aligned 1205; TODO: This should use LD, but for some there are redundant MOVs 1206define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 { 1207; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i32: 1208; GCN-NOHSA-SI: ; %bb.0: 1209; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1210; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1211; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1212; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1213; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, 0xffff 1214; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1215; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s5, 16 1216; GCN-NOHSA-SI-NEXT: s_lshr_b32 s7, s4, 16 1217; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s2 1218; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s2 1219; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1220; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1221; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 1222; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1223; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s6 1224; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1225; GCN-NOHSA-SI-NEXT: s_endpgm 1226; 1227; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i32: 1228; GCN-HSA: ; %bb.0: 1229; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1230; GCN-HSA-NEXT: s_mov_b32 s6, 0xffff 1231; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1232; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1233; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1234; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1235; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1236; GCN-HSA-NEXT: s_lshr_b32 s2, s1, 16 1237; GCN-HSA-NEXT: s_lshr_b32 s3, s0, 16 1238; GCN-HSA-NEXT: s_and_b32 s1, s1, s6 1239; GCN-HSA-NEXT: s_and_b32 s0, s0, s6 1240; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1241; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1242; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1243; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1244; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1245; GCN-HSA-NEXT: s_endpgm 1246; 1247; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i32: 1248; GCN-NOHSA-VI: ; %bb.0: 1249; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1250; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff 1251; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1252; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1253; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1254; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1255; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1256; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 1257; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1258; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s5, 16 1259; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s4, 16 1260; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s8 1261; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s8 1262; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1263; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 1264; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1265; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s6 1266; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1267; GCN-NOHSA-VI-NEXT: s_endpgm 1268; 1269; EG-LABEL: constant_zextload_v4i16_to_v4i32: 1270; EG: ; %bb.0: 1271; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1272; EG-NEXT: TEX 0 @6 1273; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] 1274; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 1275; EG-NEXT: CF_END 1276; EG-NEXT: PAD 1277; EG-NEXT: Fetch clause starting at 6: 1278; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1279; EG-NEXT: ALU clause starting at 8: 1280; EG-NEXT: MOV * T5.X, KC0[2].Z, 1281; EG-NEXT: ALU clause starting at 9: 1282; EG-NEXT: MOV T2.X, T5.X, 1283; EG-NEXT: MOV * T3.X, T5.Y, 1284; EG-NEXT: MOV T0.Y, PV.X, 1285; EG-NEXT: MOV * T0.Z, PS, 1286; EG-NEXT: LSHR * T5.W, PV.Z, literal.x, 1287; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1288; EG-NEXT: AND_INT * T5.Z, T0.Z, literal.x, 1289; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1290; EG-NEXT: LSHR * T5.Y, T0.Y, literal.x, 1291; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1292; EG-NEXT: AND_INT T5.X, T0.Y, literal.x, 1293; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.y, 1294; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1295 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in 1296 %ext = zext <4 x i16> %load to <4 x i32> 1297 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 1298 ret void 1299} 1300 1301; v4i16 is naturally 8 byte aligned 1302; TODO: This should use LD, but for some there are redundant MOVs 1303; TODO: We should use ASHR instead of LSHR + BFE 1304define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 { 1305; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i32: 1306; GCN-NOHSA-SI: ; %bb.0: 1307; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1308; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1309; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1310; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1311; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1312; GCN-NOHSA-SI-NEXT: s_ashr_i32 s8, s4, 16 1313; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[4:5], 48 1314; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1315; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1316; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1317; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1318; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s8 1319; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1320; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s6 1321; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1322; GCN-NOHSA-SI-NEXT: s_endpgm 1323; 1324; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i32: 1325; GCN-HSA: ; %bb.0: 1326; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1327; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1328; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1329; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1330; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1331; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1332; GCN-HSA-NEXT: s_ashr_i64 s[2:3], s[0:1], 48 1333; GCN-HSA-NEXT: s_ashr_i32 s4, s0, 16 1334; GCN-HSA-NEXT: s_sext_i32_i16 s1, s1 1335; GCN-HSA-NEXT: s_sext_i32_i16 s0, s0 1336; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1337; GCN-HSA-NEXT: v_mov_b32_e32 v1, s4 1338; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1339; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1340; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1341; GCN-HSA-NEXT: s_endpgm 1342; 1343; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i32: 1344; GCN-NOHSA-VI: ; %bb.0: 1345; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1346; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1347; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1348; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1349; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1350; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1351; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 1352; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1353; GCN-NOHSA-VI-NEXT: s_ashr_i32 s6, s5, 16 1354; GCN-NOHSA-VI-NEXT: s_ashr_i32 s7, s4, 16 1355; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 1356; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 1357; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1358; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 1359; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1360; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s6 1361; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1362; GCN-NOHSA-VI-NEXT: s_endpgm 1363; 1364; EG-LABEL: constant_sextload_v4i16_to_v4i32: 1365; EG: ; %bb.0: 1366; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1367; EG-NEXT: TEX 0 @6 1368; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 1369; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 1370; EG-NEXT: CF_END 1371; EG-NEXT: PAD 1372; EG-NEXT: Fetch clause starting at 6: 1373; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1374; EG-NEXT: ALU clause starting at 8: 1375; EG-NEXT: MOV * T5.X, KC0[2].Z, 1376; EG-NEXT: ALU clause starting at 9: 1377; EG-NEXT: MOV T2.X, T5.X, 1378; EG-NEXT: MOV * T3.X, T5.Y, 1379; EG-NEXT: MOV T0.Y, PV.X, 1380; EG-NEXT: MOV * T0.Z, PS, 1381; EG-NEXT: BFE_INT * T5.Z, PV.Z, 0.0, literal.x, 1382; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1383; EG-NEXT: BFE_INT T5.X, T0.Y, 0.0, literal.x, 1384; EG-NEXT: LSHR * T0.W, T0.Z, literal.x, 1385; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1386; EG-NEXT: BFE_INT T5.W, PV.W, 0.0, literal.x, 1387; EG-NEXT: LSHR * T0.W, T0.Y, literal.x, 1388; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1389; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 1390; EG-NEXT: BFE_INT * T5.Y, PS, 0.0, literal.y, 1391; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1392 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in 1393 %ext = sext <4 x i16> %load to <4 x i32> 1394 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 1395 ret void 1396} 1397 1398; v8i16 is naturally 16 byte aligned 1399; TODO: These should use LSHR instead of BFE_UINT 1400; TODO: This should use DST, but for some there are redundant MOVs 1401define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 { 1402; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i32: 1403; GCN-NOHSA-SI: ; %bb.0: 1404; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1405; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1406; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1407; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1408; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1409; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, 0xffff 1410; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1411; GCN-NOHSA-SI-NEXT: s_lshr_b32 s9, s5, 16 1412; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s4, 16 1413; GCN-NOHSA-SI-NEXT: s_lshr_b32 s11, s7, 16 1414; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s6, 16 1415; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s8 1416; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s8 1417; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s8 1418; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s8 1419; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1420; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s12 1421; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1422; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11 1423; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1424; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1425; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1426; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s10 1427; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1428; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 1429; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1430; GCN-NOHSA-SI-NEXT: s_endpgm 1431; 1432; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i32: 1433; GCN-HSA: ; %bb.0: 1434; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1435; GCN-HSA-NEXT: s_mov_b32 s8, 0xffff 1436; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1437; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1438; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1439; GCN-HSA-NEXT: s_lshr_b32 s2, s7, 16 1440; GCN-HSA-NEXT: s_lshr_b32 s3, s6, 16 1441; GCN-HSA-NEXT: s_lshr_b32 s9, s5, 16 1442; GCN-HSA-NEXT: s_lshr_b32 s10, s4, 16 1443; GCN-HSA-NEXT: s_and_b32 s7, s7, s8 1444; GCN-HSA-NEXT: s_and_b32 s6, s6, s8 1445; GCN-HSA-NEXT: s_and_b32 s5, s5, s8 1446; GCN-HSA-NEXT: s_and_b32 s4, s4, s8 1447; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1448; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1449; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1450; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1451; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1452; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1453; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1454; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1455; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1456; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1457; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1458; GCN-HSA-NEXT: v_mov_b32_e32 v1, s10 1459; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1460; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9 1461; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1462; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1463; GCN-HSA-NEXT: s_endpgm 1464; 1465; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i32: 1466; GCN-NOHSA-VI: ; %bb.0: 1467; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1468; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff 1469; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1470; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1471; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1472; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1473; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1474; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 1475; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1476; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s7, 16 1477; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s6, 16 1478; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, s8 1479; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, s8 1480; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s5, 16 1481; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s4, 16 1482; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s8 1483; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s8 1484; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 1485; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s12 1486; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 1487; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11 1488; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1489; GCN-NOHSA-VI-NEXT: s_nop 0 1490; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1491; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s10 1492; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1493; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s9 1494; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1495; GCN-NOHSA-VI-NEXT: s_endpgm 1496; 1497; EG-LABEL: constant_zextload_v8i16_to_v8i32: 1498; EG: ; %bb.0: 1499; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1500; EG-NEXT: TEX 0 @6 1501; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 1502; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 1503; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 1504; EG-NEXT: CF_END 1505; EG-NEXT: Fetch clause starting at 6: 1506; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1507; EG-NEXT: ALU clause starting at 8: 1508; EG-NEXT: MOV * T7.X, KC0[2].Z, 1509; EG-NEXT: ALU clause starting at 9: 1510; EG-NEXT: LSHR * T8.W, T7.Y, literal.x, 1511; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1512; EG-NEXT: AND_INT * T8.Z, T7.Y, literal.x, 1513; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1514; EG-NEXT: LSHR T8.Y, T7.X, literal.x, 1515; EG-NEXT: LSHR * T9.W, T7.W, literal.x, 1516; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1517; EG-NEXT: AND_INT T8.X, T7.X, literal.x, 1518; EG-NEXT: AND_INT T9.Z, T7.W, literal.x, 1519; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.y, 1520; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1521; EG-NEXT: LSHR * T9.Y, T7.Z, literal.x, 1522; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1523; EG-NEXT: AND_INT T9.X, T7.Z, literal.x, 1524; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1525; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1526; EG-NEXT: LSHR * T10.X, PV.W, literal.x, 1527; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1528 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in 1529 %ext = zext <8 x i16> %load to <8 x i32> 1530 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 1531 ret void 1532} 1533 1534; v8i16 is naturally 16 byte aligned 1535; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT 1536; TODO: This should use DST, but for some there are redundant MOVs 1537define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 { 1538; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i32: 1539; GCN-NOHSA-SI: ; %bb.0: 1540; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1541; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1542; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1543; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1544; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1545; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1546; GCN-NOHSA-SI-NEXT: s_ashr_i32 s8, s5, 16 1547; GCN-NOHSA-SI-NEXT: s_ashr_i32 s9, s4, 16 1548; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1549; GCN-NOHSA-SI-NEXT: s_ashr_i32 s10, s7, 16 1550; GCN-NOHSA-SI-NEXT: s_ashr_i32 s11, s6, 16 1551; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 1552; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 1553; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1554; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1555; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 1556; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1557; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s10 1558; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1559; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1560; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1561; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9 1562; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1563; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s8 1564; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1565; GCN-NOHSA-SI-NEXT: s_endpgm 1566; 1567; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i32: 1568; GCN-HSA: ; %bb.0: 1569; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1570; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1571; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1572; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1573; GCN-HSA-NEXT: s_ashr_i32 s2, s7, 16 1574; GCN-HSA-NEXT: s_ashr_i32 s3, s6, 16 1575; GCN-HSA-NEXT: s_ashr_i32 s8, s5, 16 1576; GCN-HSA-NEXT: s_ashr_i32 s9, s4, 16 1577; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1578; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1579; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1580; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1581; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 1582; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 1583; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1584; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1585; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1586; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1587; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1588; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 1589; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 1590; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1591; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1592; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 1593; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1594; GCN-HSA-NEXT: v_mov_b32_e32 v3, s8 1595; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1596; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1597; GCN-HSA-NEXT: s_endpgm 1598; 1599; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i32: 1600; GCN-NOHSA-VI: ; %bb.0: 1601; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1602; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1603; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1604; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1605; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1606; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1607; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 1608; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1609; GCN-NOHSA-VI-NEXT: s_ashr_i32 s10, s7, 16 1610; GCN-NOHSA-VI-NEXT: s_ashr_i32 s11, s6, 16 1611; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 1612; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 1613; GCN-NOHSA-VI-NEXT: s_ashr_i32 s8, s5, 16 1614; GCN-NOHSA-VI-NEXT: s_ashr_i32 s9, s4, 16 1615; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 1616; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 1617; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 1618; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s11 1619; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 1620; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s10 1621; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1622; GCN-NOHSA-VI-NEXT: s_nop 0 1623; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1624; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 1625; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1626; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s8 1627; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1628; GCN-NOHSA-VI-NEXT: s_endpgm 1629; 1630; EG-LABEL: constant_sextload_v8i16_to_v8i32: 1631; EG: ; %bb.0: 1632; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1633; EG-NEXT: TEX 0 @6 1634; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] 1635; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 1636; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 1637; EG-NEXT: CF_END 1638; EG-NEXT: Fetch clause starting at 6: 1639; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1640; EG-NEXT: ALU clause starting at 8: 1641; EG-NEXT: MOV * T7.X, KC0[2].Z, 1642; EG-NEXT: ALU clause starting at 9: 1643; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x, 1644; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1645; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x, 1646; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x, 1647; EG-NEXT: LSHR * T0.W, T7.Y, literal.x, 1648; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1649; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x, 1650; EG-NEXT: LSHR T0.Z, T7.W, literal.x, 1651; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x, 1652; EG-NEXT: LSHR * T0.W, T7.X, literal.x, 1653; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1654; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 1655; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y, 1656; EG-NEXT: LSHR T1.Z, T7.Z, literal.y, 1657; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y, 1658; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1659; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1660; EG-NEXT: LSHR T10.X, PS, literal.x, 1661; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y, 1662; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1663 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in 1664 %ext = sext <8 x i16> %load to <8 x i32> 1665 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 1666 ret void 1667} 1668 1669define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 { 1670; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i32: 1671; GCN-NOHSA-SI: ; %bb.0: 1672; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 1673; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1674; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 1675; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000 1676; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1 1677; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, 0xffff 1678; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1679; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s1, 16 1680; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s0, 16 1681; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s3, 16 1682; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s2, 16 1683; GCN-NOHSA-SI-NEXT: s_lshr_b32 s17, s5, 16 1684; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s4, 16 1685; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s7, 16 1686; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s6, 16 1687; GCN-NOHSA-SI-NEXT: s_and_b32 s1, s1, s12 1688; GCN-NOHSA-SI-NEXT: s_and_b32 s0, s0, s12 1689; GCN-NOHSA-SI-NEXT: s_and_b32 s3, s3, s12 1690; GCN-NOHSA-SI-NEXT: s_and_b32 s2, s2, s12 1691; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s12 1692; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s12 1693; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s12 1694; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s12 1695; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1696; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s20 1697; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1698; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s19 1699; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48 1700; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1701; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1702; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s18 1703; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1704; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s17 1705; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32 1706; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1707; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s2 1708; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s16 1709; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s3 1710; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s15 1711; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 1712; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1713; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s0 1714; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s14 1715; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s1 1716; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13 1717; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 1718; GCN-NOHSA-SI-NEXT: s_endpgm 1719; 1720; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i32: 1721; GCN-HSA: ; %bb.0: 1722; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1723; GCN-HSA-NEXT: s_mov_b32 s12, 0xffff 1724; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1725; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1726; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1727; GCN-HSA-NEXT: s_lshr_b32 s2, s11, 16 1728; GCN-HSA-NEXT: s_lshr_b32 s3, s10, 16 1729; GCN-HSA-NEXT: s_lshr_b32 s13, s5, 16 1730; GCN-HSA-NEXT: s_lshr_b32 s14, s4, 16 1731; GCN-HSA-NEXT: s_lshr_b32 s15, s7, 16 1732; GCN-HSA-NEXT: s_lshr_b32 s16, s6, 16 1733; GCN-HSA-NEXT: s_lshr_b32 s17, s9, 16 1734; GCN-HSA-NEXT: s_lshr_b32 s18, s8, 16 1735; GCN-HSA-NEXT: s_and_b32 s11, s11, s12 1736; GCN-HSA-NEXT: s_and_b32 s10, s10, s12 1737; GCN-HSA-NEXT: s_and_b32 s5, s5, s12 1738; GCN-HSA-NEXT: s_and_b32 s4, s4, s12 1739; GCN-HSA-NEXT: s_and_b32 s7, s7, s12 1740; GCN-HSA-NEXT: s_and_b32 s6, s6, s12 1741; GCN-HSA-NEXT: s_and_b32 s9, s9, s12 1742; GCN-HSA-NEXT: s_and_b32 s8, s8, s12 1743; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1744; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 1745; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1746; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1747; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1748; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1749; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 1750; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 1751; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 1752; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1753; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1754; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1755; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1756; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1757; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 1758; GCN-HSA-NEXT: v_mov_b32_e32 v1, s18 1759; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 1760; GCN-HSA-NEXT: v_mov_b32_e32 v3, s17 1761; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1762; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1763; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1764; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1765; GCN-HSA-NEXT: v_mov_b32_e32 v1, s16 1766; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1767; GCN-HSA-NEXT: v_mov_b32_e32 v3, s15 1768; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1769; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1770; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1771; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1772; GCN-HSA-NEXT: v_mov_b32_e32 v1, s14 1773; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1774; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13 1775; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1776; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1777; GCN-HSA-NEXT: s_endpgm 1778; 1779; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i32: 1780; GCN-NOHSA-VI: ; %bb.0: 1781; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1782; GCN-NOHSA-VI-NEXT: s_mov_b32 s12, 0xffff 1783; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1784; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1785; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1786; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1787; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1788; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[6:7], 0x0 1789; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1790; GCN-NOHSA-VI-NEXT: s_lshr_b32 s19, s11, 16 1791; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s10, 16 1792; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, s12 1793; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, s12 1794; GCN-NOHSA-VI-NEXT: s_lshr_b32 s17, s9, 16 1795; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s8, 16 1796; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, s12 1797; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, s12 1798; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 1799; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s20 1800; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 1801; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s19 1802; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 1803; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s7, 16 1804; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s6, 16 1805; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, s12 1806; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, s12 1807; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 1808; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s18 1809; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 1810; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17 1811; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 1812; GCN-NOHSA-VI-NEXT: s_lshr_b32 s13, s5, 16 1813; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s4, 16 1814; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s12 1815; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s12 1816; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 1817; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s16 1818; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 1819; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15 1820; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1821; GCN-NOHSA-VI-NEXT: s_nop 0 1822; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1823; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s14 1824; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1825; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13 1826; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1827; GCN-NOHSA-VI-NEXT: s_endpgm 1828; 1829; EG-LABEL: constant_zextload_v16i16_to_v16i32: 1830; EG: ; %bb.0: 1831; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1832; EG-NEXT: TEX 1 @8 1833; EG-NEXT: ALU 35, @13, KC0[CB0:0-32], KC1[] 1834; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0 1835; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0 1836; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0 1837; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1 1838; EG-NEXT: CF_END 1839; EG-NEXT: Fetch clause starting at 8: 1840; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 1841; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 1842; EG-NEXT: ALU clause starting at 12: 1843; EG-NEXT: MOV * T11.X, KC0[2].Z, 1844; EG-NEXT: ALU clause starting at 13: 1845; EG-NEXT: LSHR * T13.W, T12.Y, literal.x, 1846; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1847; EG-NEXT: AND_INT * T13.Z, T12.Y, literal.x, 1848; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1849; EG-NEXT: LSHR T13.Y, T12.X, literal.x, 1850; EG-NEXT: LSHR * T14.W, T12.W, literal.x, 1851; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1852; EG-NEXT: AND_INT T13.X, T12.X, literal.x, 1853; EG-NEXT: AND_INT T14.Z, T12.W, literal.x, 1854; EG-NEXT: LSHR * T12.X, KC0[2].Y, literal.y, 1855; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1856; EG-NEXT: LSHR T14.Y, T12.Z, literal.x, 1857; EG-NEXT: LSHR * T15.W, T11.Y, literal.x, 1858; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1859; EG-NEXT: AND_INT T14.X, T12.Z, literal.x, 1860; EG-NEXT: AND_INT T15.Z, T11.Y, literal.x, 1861; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1862; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1863; EG-NEXT: LSHR T16.X, PV.W, literal.x, 1864; EG-NEXT: LSHR T15.Y, T11.X, literal.y, 1865; EG-NEXT: LSHR T17.W, T11.W, literal.y, 1866; EG-NEXT: AND_INT * T15.X, T11.X, literal.z, 1867; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1868; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1869; EG-NEXT: AND_INT T17.Z, T11.W, literal.x, 1870; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1871; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 1872; EG-NEXT: LSHR T11.X, PV.W, literal.x, 1873; EG-NEXT: LSHR T17.Y, T11.Z, literal.y, 1874; EG-NEXT: AND_INT * T17.X, T11.Z, literal.z, 1875; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1876; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1877; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1878; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 1879; EG-NEXT: LSHR * T18.X, PV.W, literal.x, 1880; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1881 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in 1882 %ext = zext <16 x i16> %load to <16 x i32> 1883 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 1884 ret void 1885} 1886 1887define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 { 1888; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i32: 1889; GCN-NOHSA-SI: ; %bb.0: 1890; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 1891; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1892; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 1893; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000 1894; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1 1895; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1896; GCN-NOHSA-SI-NEXT: s_ashr_i32 s12, s1, 16 1897; GCN-NOHSA-SI-NEXT: s_ashr_i32 s13, s0, 16 1898; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s1, s1 1899; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s0, s0 1900; GCN-NOHSA-SI-NEXT: s_ashr_i32 s14, s3, 16 1901; GCN-NOHSA-SI-NEXT: s_ashr_i32 s15, s2, 16 1902; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s3, s3 1903; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s2, s2 1904; GCN-NOHSA-SI-NEXT: s_ashr_i32 s16, s5, 16 1905; GCN-NOHSA-SI-NEXT: s_ashr_i32 s17, s4, 16 1906; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1907; GCN-NOHSA-SI-NEXT: s_ashr_i32 s18, s7, 16 1908; GCN-NOHSA-SI-NEXT: s_ashr_i32 s19, s6, 16 1909; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 1910; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 1911; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1912; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1913; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 1914; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1915; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 1916; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48 1917; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1918; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1919; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s17 1920; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1921; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s16 1922; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32 1923; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1924; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s2 1925; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s15 1926; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s3 1927; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s14 1928; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 1929; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1930; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s0 1931; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s13 1932; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s1 1933; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s12 1934; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 1935; GCN-NOHSA-SI-NEXT: s_endpgm 1936; 1937; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i32: 1938; GCN-HSA: ; %bb.0: 1939; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1940; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1941; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1942; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1943; GCN-HSA-NEXT: s_ashr_i32 s2, s11, 16 1944; GCN-HSA-NEXT: s_ashr_i32 s3, s10, 16 1945; GCN-HSA-NEXT: s_ashr_i32 s12, s5, 16 1946; GCN-HSA-NEXT: s_ashr_i32 s13, s4, 16 1947; GCN-HSA-NEXT: s_ashr_i32 s14, s7, 16 1948; GCN-HSA-NEXT: s_ashr_i32 s15, s6, 16 1949; GCN-HSA-NEXT: s_ashr_i32 s16, s9, 16 1950; GCN-HSA-NEXT: s_ashr_i32 s17, s8, 16 1951; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1952; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 1953; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1954; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1955; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1956; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11 1957; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10 1958; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1959; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 1960; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 1961; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 1962; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1963; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1964; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1965; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9 1966; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8 1967; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1968; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1969; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 1970; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 1971; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 1972; GCN-HSA-NEXT: v_mov_b32_e32 v3, s16 1973; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1974; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1975; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 1976; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 1977; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1978; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1979; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 1980; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1981; GCN-HSA-NEXT: v_mov_b32_e32 v3, s14 1982; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1983; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1984; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 1985; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 1986; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1987; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1988; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 1989; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1990; GCN-HSA-NEXT: v_mov_b32_e32 v3, s12 1991; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1992; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1993; GCN-HSA-NEXT: s_endpgm 1994; 1995; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i32: 1996; GCN-NOHSA-VI: ; %bb.0: 1997; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1998; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1999; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2000; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2001; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2002; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2003; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[6:7], 0x0 2004; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2005; GCN-NOHSA-VI-NEXT: s_ashr_i32 s18, s11, 16 2006; GCN-NOHSA-VI-NEXT: s_ashr_i32 s19, s10, 16 2007; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11 2008; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10 2009; GCN-NOHSA-VI-NEXT: s_ashr_i32 s16, s9, 16 2010; GCN-NOHSA-VI-NEXT: s_ashr_i32 s17, s8, 16 2011; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9 2012; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8 2013; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 2014; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 2015; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 2016; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s18 2017; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2018; GCN-NOHSA-VI-NEXT: s_ashr_i32 s14, s7, 16 2019; GCN-NOHSA-VI-NEXT: s_ashr_i32 s15, s6, 16 2020; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 2021; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 2022; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 2023; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s17 2024; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 2025; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s16 2026; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2027; GCN-NOHSA-VI-NEXT: s_ashr_i32 s12, s5, 16 2028; GCN-NOHSA-VI-NEXT: s_ashr_i32 s13, s4, 16 2029; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 2030; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 2031; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 2032; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 2033; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 2034; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s14 2035; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2036; GCN-NOHSA-VI-NEXT: s_nop 0 2037; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 2038; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13 2039; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 2040; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s12 2041; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2042; GCN-NOHSA-VI-NEXT: s_endpgm 2043; 2044; EG-LABEL: constant_sextload_v16i16_to_v16i32: 2045; EG: ; %bb.0: 2046; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2047; EG-NEXT: TEX 1 @8 2048; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[] 2049; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0 2050; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0 2051; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0 2052; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1 2053; EG-NEXT: CF_END 2054; EG-NEXT: Fetch clause starting at 8: 2055; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2056; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2057; EG-NEXT: ALU clause starting at 12: 2058; EG-NEXT: MOV * T11.X, KC0[2].Z, 2059; EG-NEXT: ALU clause starting at 13: 2060; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 2061; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2062; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2063; EG-NEXT: LSHR T14.X, PV.W, literal.x, 2064; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y, 2065; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2066; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x, 2067; EG-NEXT: LSHR T0.Y, T12.W, literal.x, 2068; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212 2069; EG-NEXT: LSHR T0.W, T12.Y, literal.x, 2070; EG-NEXT: LSHR * T1.W, T11.Y, literal.x, 2071; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2072; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x, 2073; EG-NEXT: LSHR T1.Y, T11.W, literal.x, 2074; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x, 2075; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x, 2076; EG-NEXT: LSHR * T1.W, T11.X, literal.x, 2077; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2078; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x, 2079; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x, 2080; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x, 2081; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x, 2082; EG-NEXT: LSHR * T1.W, T11.Z, literal.x, 2083; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2084; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x, 2085; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x, 2086; EG-NEXT: LSHR T0.Z, T12.X, literal.x, 2087; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x, 2088; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2089; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44) 2090; EG-NEXT: LSHR T11.X, PS, literal.x, 2091; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y, 2092; EG-NEXT: LSHR T0.Z, T12.Z, literal.y, 2093; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y, 2094; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2095; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2096; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2097; EG-NEXT: LSHR T12.X, PS, literal.x, 2098; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y, 2099; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2100 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in 2101 %ext = sext <16 x i16> %load to <16 x i32> 2102 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 2103 ret void 2104} 2105 2106define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 { 2107; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i32: 2108; GCN-NOHSA-SI: ; %bb.0: 2109; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9 2110; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2111; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2112; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, 0xffff 2113; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2114; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s1, 16 2115; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s0, 16 2116; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s3, 16 2117; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s2, 16 2118; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s5, 16 2119; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s4, 16 2120; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s7, 16 2121; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s6, 16 2122; GCN-NOHSA-SI-NEXT: s_and_b32 s27, s1, s18 2123; GCN-NOHSA-SI-NEXT: s_and_b32 s28, s0, s18 2124; GCN-NOHSA-SI-NEXT: s_and_b32 s29, s3, s18 2125; GCN-NOHSA-SI-NEXT: s_and_b32 s30, s2, s18 2126; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s18 2127; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s18 2128; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s18 2129; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s18 2130; GCN-NOHSA-SI-NEXT: s_and_b32 s31, s9, s18 2131; GCN-NOHSA-SI-NEXT: s_and_b32 s33, s8, s18 2132; GCN-NOHSA-SI-NEXT: s_and_b32 s34, s11, s18 2133; GCN-NOHSA-SI-NEXT: s_and_b32 s35, s10, s18 2134; GCN-NOHSA-SI-NEXT: s_and_b32 s36, s13, s18 2135; GCN-NOHSA-SI-NEXT: s_and_b32 s37, s12, s18 2136; GCN-NOHSA-SI-NEXT: s_and_b32 s38, s15, s18 2137; GCN-NOHSA-SI-NEXT: s_and_b32 s18, s14, s18 2138; GCN-NOHSA-SI-NEXT: s_lshr_b32 s9, s9, 16 2139; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s8, 16 2140; GCN-NOHSA-SI-NEXT: s_lshr_b32 s11, s11, 16 2141; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s10, 16 2142; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s13, 16 2143; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s12, 16 2144; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s15, 16 2145; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s14, 16 2146; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2147; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2148; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 2149; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 2150; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 2151; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s14 2152; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s38 2153; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s15 2154; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2155; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2156; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s37 2157; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s12 2158; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s36 2159; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13 2160; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2161; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2162; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s35 2163; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s10 2164; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s34 2165; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11 2166; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2167; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2168; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s33 2169; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s8 2170; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s31 2171; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 2172; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2173; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2174; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 2175; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s26 2176; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 2177; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s25 2178; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2179; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2180; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 2181; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s24 2182; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 2183; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s23 2184; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2185; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2186; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s30 2187; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s22 2188; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s29 2189; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s21 2190; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2191; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2192; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s28 2193; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s20 2194; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s27 2195; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s19 2196; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2197; GCN-NOHSA-SI-NEXT: s_endpgm 2198; 2199; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i32: 2200; GCN-HSA: ; %bb.0: 2201; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2202; GCN-HSA-NEXT: s_mov_b32 s20, 0xffff 2203; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2204; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0 2205; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2206; GCN-HSA-NEXT: s_and_b32 s34, s17, s20 2207; GCN-HSA-NEXT: s_and_b32 s35, s16, s20 2208; GCN-HSA-NEXT: s_and_b32 s36, s19, s20 2209; GCN-HSA-NEXT: s_and_b32 s21, s5, s20 2210; GCN-HSA-NEXT: s_and_b32 s22, s4, s20 2211; GCN-HSA-NEXT: s_and_b32 s23, s7, s20 2212; GCN-HSA-NEXT: s_and_b32 s24, s6, s20 2213; GCN-HSA-NEXT: s_and_b32 s25, s9, s20 2214; GCN-HSA-NEXT: s_and_b32 s26, s8, s20 2215; GCN-HSA-NEXT: s_and_b32 s27, s11, s20 2216; GCN-HSA-NEXT: s_and_b32 s28, s10, s20 2217; GCN-HSA-NEXT: s_and_b32 s29, s13, s20 2218; GCN-HSA-NEXT: s_and_b32 s30, s12, s20 2219; GCN-HSA-NEXT: s_and_b32 s31, s15, s20 2220; GCN-HSA-NEXT: s_and_b32 s33, s14, s20 2221; GCN-HSA-NEXT: s_and_b32 s20, s18, s20 2222; GCN-HSA-NEXT: s_lshr_b32 s17, s17, 16 2223; GCN-HSA-NEXT: s_lshr_b32 s16, s16, 16 2224; GCN-HSA-NEXT: s_lshr_b32 s19, s19, 16 2225; GCN-HSA-NEXT: s_lshr_b32 s18, s18, 16 2226; GCN-HSA-NEXT: s_lshr_b32 s5, s5, 16 2227; GCN-HSA-NEXT: s_lshr_b32 s4, s4, 16 2228; GCN-HSA-NEXT: s_lshr_b32 s7, s7, 16 2229; GCN-HSA-NEXT: s_lshr_b32 s6, s6, 16 2230; GCN-HSA-NEXT: s_lshr_b32 s9, s9, 16 2231; GCN-HSA-NEXT: s_lshr_b32 s8, s8, 16 2232; GCN-HSA-NEXT: s_lshr_b32 s11, s11, 16 2233; GCN-HSA-NEXT: s_lshr_b32 s10, s10, 16 2234; GCN-HSA-NEXT: s_lshr_b32 s13, s13, 16 2235; GCN-HSA-NEXT: s_lshr_b32 s12, s12, 16 2236; GCN-HSA-NEXT: s_lshr_b32 s15, s15, 16 2237; GCN-HSA-NEXT: s_lshr_b32 s14, s14, 16 2238; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 2239; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2240; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 2241; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 2242; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 2243; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2244; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3 2245; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2 2246; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 2247; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20 2248; GCN-HSA-NEXT: v_mov_b32_e32 v1, s18 2249; GCN-HSA-NEXT: v_mov_b32_e32 v2, s36 2250; GCN-HSA-NEXT: v_mov_b32_e32 v3, s19 2251; GCN-HSA-NEXT: v_mov_b32_e32 v4, s35 2252; GCN-HSA-NEXT: v_mov_b32_e32 v5, s16 2253; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2254; GCN-HSA-NEXT: v_mov_b32_e32 v6, s34 2255; GCN-HSA-NEXT: v_mov_b32_e32 v7, s17 2256; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 2257; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 2258; GCN-HSA-NEXT: v_mov_b32_e32 v0, s33 2259; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2260; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2261; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 2262; GCN-HSA-NEXT: v_mov_b32_e32 v1, s14 2263; GCN-HSA-NEXT: v_mov_b32_e32 v2, s31 2264; GCN-HSA-NEXT: v_mov_b32_e32 v3, s15 2265; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2266; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2267; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2268; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2269; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2270; GCN-HSA-NEXT: v_mov_b32_e32 v0, s30 2271; GCN-HSA-NEXT: v_mov_b32_e32 v1, s12 2272; GCN-HSA-NEXT: v_mov_b32_e32 v2, s29 2273; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13 2274; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2275; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2276; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2277; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2278; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 2279; GCN-HSA-NEXT: v_mov_b32_e32 v0, s28 2280; GCN-HSA-NEXT: v_mov_b32_e32 v1, s10 2281; GCN-HSA-NEXT: v_mov_b32_e32 v2, s27 2282; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 2283; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2284; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2285; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2286; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2287; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2288; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26 2289; GCN-HSA-NEXT: v_mov_b32_e32 v1, s8 2290; GCN-HSA-NEXT: v_mov_b32_e32 v2, s25 2291; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9 2292; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2293; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2294; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2295; GCN-HSA-NEXT: v_mov_b32_e32 v0, s24 2296; GCN-HSA-NEXT: v_mov_b32_e32 v1, s6 2297; GCN-HSA-NEXT: v_mov_b32_e32 v2, s23 2298; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 2299; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2300; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2301; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2302; GCN-HSA-NEXT: v_mov_b32_e32 v0, s22 2303; GCN-HSA-NEXT: v_mov_b32_e32 v1, s4 2304; GCN-HSA-NEXT: v_mov_b32_e32 v2, s21 2305; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 2306; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2307; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2308; GCN-HSA-NEXT: s_endpgm 2309; 2310; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i32: 2311; GCN-NOHSA-VI: ; %bb.0: 2312; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 2313; GCN-NOHSA-VI-NEXT: s_mov_b32 s20, 0xffff 2314; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 2315; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2316; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2317; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2318; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2319; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[4:19], s[6:7], 0x0 2320; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2321; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s19, 16 2322; GCN-NOHSA-VI-NEXT: s_lshr_b32 s37, s18, 16 2323; GCN-NOHSA-VI-NEXT: s_and_b32 s19, s19, s20 2324; GCN-NOHSA-VI-NEXT: s_and_b32 s18, s18, s20 2325; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s17, 16 2326; GCN-NOHSA-VI-NEXT: s_lshr_b32 s35, s16, 16 2327; GCN-NOHSA-VI-NEXT: s_and_b32 s17, s17, s20 2328; GCN-NOHSA-VI-NEXT: s_and_b32 s16, s16, s20 2329; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 2330; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37 2331; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19 2332; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s36 2333; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2334; GCN-NOHSA-VI-NEXT: s_lshr_b32 s31, s15, 16 2335; GCN-NOHSA-VI-NEXT: s_lshr_b32 s33, s14, 16 2336; GCN-NOHSA-VI-NEXT: s_and_b32 s15, s15, s20 2337; GCN-NOHSA-VI-NEXT: s_and_b32 s14, s14, s20 2338; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 2339; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s35 2340; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17 2341; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s34 2342; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2343; GCN-NOHSA-VI-NEXT: s_lshr_b32 s29, s13, 16 2344; GCN-NOHSA-VI-NEXT: s_lshr_b32 s30, s12, 16 2345; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s13, s20 2346; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s12, s20 2347; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 2348; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s33 2349; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 2350; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s31 2351; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2352; GCN-NOHSA-VI-NEXT: s_lshr_b32 s27, s11, 16 2353; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s10, 16 2354; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, s20 2355; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, s20 2356; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 2357; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s30 2358; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 2359; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s29 2360; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2361; GCN-NOHSA-VI-NEXT: s_lshr_b32 s25, s9, 16 2362; GCN-NOHSA-VI-NEXT: s_lshr_b32 s26, s8, 16 2363; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, s20 2364; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, s20 2365; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 2366; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s28 2367; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 2368; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s27 2369; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2370; GCN-NOHSA-VI-NEXT: s_lshr_b32 s23, s7, 16 2371; GCN-NOHSA-VI-NEXT: s_lshr_b32 s24, s6, 16 2372; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, s20 2373; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, s20 2374; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 2375; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s26 2376; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 2377; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s25 2378; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2379; GCN-NOHSA-VI-NEXT: s_lshr_b32 s21, s5, 16 2380; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s4, 16 2381; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s20 2382; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s20 2383; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 2384; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s24 2385; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 2386; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s23 2387; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2388; GCN-NOHSA-VI-NEXT: s_nop 0 2389; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 2390; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s22 2391; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 2392; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s21 2393; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2394; GCN-NOHSA-VI-NEXT: s_endpgm 2395; 2396; EG-LABEL: constant_zextload_v32i16_to_v32i32: 2397; EG: ; %bb.0: 2398; EG-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[] 2399; EG-NEXT: TEX 3 @12 2400; EG-NEXT: ALU 71, @21, KC0[CB0:0-32], KC1[] 2401; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0 2402; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0 2403; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T32.X, 0 2404; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T22.X, 0 2405; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T29.X, 0 2406; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T19.X, 0 2407; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0 2408; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T20.X, 1 2409; EG-NEXT: CF_END 2410; EG-NEXT: Fetch clause starting at 12: 2411; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 2412; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 48, #1 2413; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 2414; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1 2415; EG-NEXT: ALU clause starting at 20: 2416; EG-NEXT: MOV * T19.X, KC0[2].Z, 2417; EG-NEXT: ALU clause starting at 21: 2418; EG-NEXT: LSHR * T23.W, T20.Y, literal.x, 2419; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2420; EG-NEXT: AND_INT * T23.Z, T20.Y, literal.x, 2421; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2422; EG-NEXT: LSHR T23.Y, T20.X, literal.x, 2423; EG-NEXT: LSHR * T24.W, T20.W, literal.x, 2424; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2425; EG-NEXT: AND_INT T23.X, T20.X, literal.x, 2426; EG-NEXT: AND_INT T24.Z, T20.W, literal.x, 2427; EG-NEXT: LSHR * T20.X, KC0[2].Y, literal.y, 2428; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 2429; EG-NEXT: LSHR T24.Y, T20.Z, literal.x, 2430; EG-NEXT: LSHR * T25.W, T19.Y, literal.x, 2431; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2432; EG-NEXT: AND_INT T24.X, T20.Z, literal.x, 2433; EG-NEXT: AND_INT T25.Z, T19.Y, literal.x, 2434; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2435; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2436; EG-NEXT: LSHR T26.X, PV.W, literal.x, 2437; EG-NEXT: LSHR T25.Y, T19.X, literal.y, 2438; EG-NEXT: LSHR T27.W, T19.W, literal.y, 2439; EG-NEXT: AND_INT * T25.X, T19.X, literal.z, 2440; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2441; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2442; EG-NEXT: AND_INT T27.Z, T19.W, literal.x, 2443; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2444; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2445; EG-NEXT: LSHR T19.X, PV.W, literal.x, 2446; EG-NEXT: LSHR T27.Y, T19.Z, literal.y, 2447; EG-NEXT: LSHR T28.W, T22.Y, literal.y, 2448; EG-NEXT: AND_INT * T27.X, T19.Z, literal.z, 2449; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2450; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2451; EG-NEXT: AND_INT T28.Z, T22.Y, literal.x, 2452; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2453; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 2454; EG-NEXT: LSHR T29.X, PV.W, literal.x, 2455; EG-NEXT: LSHR T28.Y, T22.X, literal.y, 2456; EG-NEXT: LSHR T30.W, T22.W, literal.y, 2457; EG-NEXT: AND_INT * T28.X, T22.X, literal.z, 2458; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2459; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2460; EG-NEXT: AND_INT T30.Z, T22.W, literal.x, 2461; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2462; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 2463; EG-NEXT: LSHR T22.X, PV.W, literal.x, 2464; EG-NEXT: LSHR T30.Y, T22.Z, literal.y, 2465; EG-NEXT: LSHR T31.W, T21.Y, literal.y, 2466; EG-NEXT: AND_INT * T30.X, T22.Z, literal.z, 2467; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2468; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2469; EG-NEXT: AND_INT T31.Z, T21.Y, literal.x, 2470; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2471; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 2472; EG-NEXT: LSHR T32.X, PV.W, literal.x, 2473; EG-NEXT: LSHR T31.Y, T21.X, literal.y, 2474; EG-NEXT: LSHR T33.W, T21.W, literal.y, 2475; EG-NEXT: AND_INT * T31.X, T21.X, literal.z, 2476; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2477; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2478; EG-NEXT: AND_INT T33.Z, T21.W, literal.x, 2479; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2480; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 2481; EG-NEXT: LSHR T21.X, PV.W, literal.x, 2482; EG-NEXT: LSHR T33.Y, T21.Z, literal.y, 2483; EG-NEXT: AND_INT * T33.X, T21.Z, literal.z, 2484; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2485; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2486; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2487; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 2488; EG-NEXT: LSHR * T34.X, PV.W, literal.x, 2489; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2490 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in 2491 %ext = zext <32 x i16> %load to <32 x i32> 2492 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 2493 ret void 2494} 2495 2496define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 { 2497; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i32: 2498; GCN-NOHSA-SI: ; %bb.0: 2499; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9 2500; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2501; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2502; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2503; GCN-NOHSA-SI-NEXT: s_ashr_i32 s18, s1, 16 2504; GCN-NOHSA-SI-NEXT: s_ashr_i32 s19, s0, 16 2505; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s20, s1 2506; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s21, s0 2507; GCN-NOHSA-SI-NEXT: s_ashr_i32 s22, s3, 16 2508; GCN-NOHSA-SI-NEXT: s_ashr_i32 s23, s2, 16 2509; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s24, s3 2510; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s25, s2 2511; GCN-NOHSA-SI-NEXT: s_ashr_i32 s26, s5, 16 2512; GCN-NOHSA-SI-NEXT: s_ashr_i32 s27, s4, 16 2513; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 2514; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 2515; GCN-NOHSA-SI-NEXT: s_ashr_i32 s28, s7, 16 2516; GCN-NOHSA-SI-NEXT: s_ashr_i32 s29, s6, 16 2517; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 2518; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 2519; GCN-NOHSA-SI-NEXT: s_ashr_i32 s30, s9, 16 2520; GCN-NOHSA-SI-NEXT: s_ashr_i32 s31, s8, 16 2521; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9 2522; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8 2523; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s11, 16 2524; GCN-NOHSA-SI-NEXT: s_ashr_i32 s34, s10, 16 2525; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s11, s11 2526; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10 2527; GCN-NOHSA-SI-NEXT: s_ashr_i32 s35, s13, 16 2528; GCN-NOHSA-SI-NEXT: s_ashr_i32 s36, s12, 16 2529; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s13, s13 2530; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s12, s12 2531; GCN-NOHSA-SI-NEXT: s_ashr_i32 s37, s15, 16 2532; GCN-NOHSA-SI-NEXT: s_ashr_i32 s38, s14, 16 2533; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s15, s15 2534; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s14, s14 2535; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2536; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2537; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 2538; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 2539; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 2540; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s38 2541; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 2542; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s37 2543; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2544; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2545; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 2546; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s36 2547; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 2548; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s35 2549; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2550; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2551; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 2552; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34 2553; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 2554; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33 2555; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2556; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2557; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 2558; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s31 2559; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 2560; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s30 2561; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2562; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2563; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 2564; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s29 2565; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 2566; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s28 2567; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2568; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2569; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 2570; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s27 2571; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 2572; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s26 2573; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2574; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2575; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s25 2576; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s23 2577; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24 2578; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s22 2579; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2580; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2581; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s21 2582; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 2583; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 2584; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 2585; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2586; GCN-NOHSA-SI-NEXT: s_endpgm 2587; 2588; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i32: 2589; GCN-HSA: ; %bb.0: 2590; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2591; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2592; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0 2593; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2594; GCN-HSA-NEXT: s_ashr_i32 s33, s17, 16 2595; GCN-HSA-NEXT: s_ashr_i32 s34, s16, 16 2596; GCN-HSA-NEXT: s_ashr_i32 s35, s19, 16 2597; GCN-HSA-NEXT: s_ashr_i32 s36, s18, 16 2598; GCN-HSA-NEXT: s_ashr_i32 s20, s5, 16 2599; GCN-HSA-NEXT: s_ashr_i32 s21, s4, 16 2600; GCN-HSA-NEXT: s_ashr_i32 s22, s7, 16 2601; GCN-HSA-NEXT: s_ashr_i32 s23, s6, 16 2602; GCN-HSA-NEXT: s_ashr_i32 s24, s9, 16 2603; GCN-HSA-NEXT: s_ashr_i32 s25, s8, 16 2604; GCN-HSA-NEXT: s_ashr_i32 s26, s11, 16 2605; GCN-HSA-NEXT: s_ashr_i32 s27, s10, 16 2606; GCN-HSA-NEXT: s_ashr_i32 s28, s13, 16 2607; GCN-HSA-NEXT: s_ashr_i32 s29, s12, 16 2608; GCN-HSA-NEXT: s_ashr_i32 s30, s15, 16 2609; GCN-HSA-NEXT: s_ashr_i32 s31, s14, 16 2610; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 2611; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2612; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 2613; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 2614; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 2615; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2616; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3 2617; GCN-HSA-NEXT: s_sext_i32_i16 s16, s16 2618; GCN-HSA-NEXT: s_sext_i32_i16 s19, s19 2619; GCN-HSA-NEXT: s_sext_i32_i16 s18, s18 2620; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2 2621; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 2622; GCN-HSA-NEXT: s_sext_i32_i16 s17, s17 2623; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 2624; GCN-HSA-NEXT: v_mov_b32_e32 v1, s36 2625; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19 2626; GCN-HSA-NEXT: v_mov_b32_e32 v3, s35 2627; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 2628; GCN-HSA-NEXT: v_mov_b32_e32 v5, s34 2629; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2630; GCN-HSA-NEXT: v_mov_b32_e32 v6, s17 2631; GCN-HSA-NEXT: v_mov_b32_e32 v7, s33 2632; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 2633; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 2634; GCN-HSA-NEXT: s_sext_i32_i16 s15, s15 2635; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2636; GCN-HSA-NEXT: s_sext_i32_i16 s14, s14 2637; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2638; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 2639; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 2640; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31 2641; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 2642; GCN-HSA-NEXT: v_mov_b32_e32 v3, s30 2643; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2644; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2645; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2646; GCN-HSA-NEXT: s_sext_i32_i16 s13, s13 2647; GCN-HSA-NEXT: s_sext_i32_i16 s12, s12 2648; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2649; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2650; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12 2651; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 2652; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13 2653; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 2654; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2655; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2656; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2657; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11 2658; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10 2659; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2660; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 2661; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 2662; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 2663; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 2664; GCN-HSA-NEXT: v_mov_b32_e32 v3, s26 2665; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2666; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2667; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2668; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9 2669; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8 2670; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2671; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2672; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 2673; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 2674; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 2675; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 2676; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2677; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2678; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 2679; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 2680; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2681; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 2682; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 2683; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 2684; GCN-HSA-NEXT: v_mov_b32_e32 v3, s22 2685; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2686; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2687; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 2688; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 2689; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2690; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2691; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 2692; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 2693; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 2694; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2695; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2696; GCN-HSA-NEXT: s_endpgm 2697; 2698; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i32: 2699; GCN-NOHSA-VI: ; %bb.0: 2700; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 2701; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 2702; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2703; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2704; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2705; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2706; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[4:19], s[6:7], 0x0 2707; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2708; GCN-NOHSA-VI-NEXT: s_ashr_i32 s35, s19, 16 2709; GCN-NOHSA-VI-NEXT: s_ashr_i32 s36, s18, 16 2710; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s19, s19 2711; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s18, s18 2712; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s17, 16 2713; GCN-NOHSA-VI-NEXT: s_ashr_i32 s34, s16, 16 2714; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s17, s17 2715; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s16, s16 2716; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 2717; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s36 2718; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19 2719; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 2720; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2721; GCN-NOHSA-VI-NEXT: s_ashr_i32 s30, s15, 16 2722; GCN-NOHSA-VI-NEXT: s_ashr_i32 s31, s14, 16 2723; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s15, s15 2724; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s14, s14 2725; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 2726; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 2727; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17 2728; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 2729; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2730; GCN-NOHSA-VI-NEXT: s_ashr_i32 s28, s13, 16 2731; GCN-NOHSA-VI-NEXT: s_ashr_i32 s29, s12, 16 2732; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s13, s13 2733; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s12, s12 2734; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 2735; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 2736; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 2737; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s30 2738; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2739; GCN-NOHSA-VI-NEXT: s_ashr_i32 s26, s11, 16 2740; GCN-NOHSA-VI-NEXT: s_ashr_i32 s27, s10, 16 2741; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11 2742; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10 2743; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 2744; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 2745; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 2746; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s28 2747; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2748; GCN-NOHSA-VI-NEXT: s_ashr_i32 s24, s9, 16 2749; GCN-NOHSA-VI-NEXT: s_ashr_i32 s25, s8, 16 2750; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9 2751; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8 2752; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 2753; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27 2754; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 2755; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s26 2756; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2757; GCN-NOHSA-VI-NEXT: s_ashr_i32 s22, s7, 16 2758; GCN-NOHSA-VI-NEXT: s_ashr_i32 s23, s6, 16 2759; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 2760; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 2761; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 2762; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 2763; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 2764; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s24 2765; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2766; GCN-NOHSA-VI-NEXT: s_ashr_i32 s20, s5, 16 2767; GCN-NOHSA-VI-NEXT: s_ashr_i32 s21, s4, 16 2768; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 2769; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 2770; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 2771; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 2772; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 2773; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s22 2774; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2775; GCN-NOHSA-VI-NEXT: s_nop 0 2776; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 2777; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21 2778; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 2779; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s20 2780; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2781; GCN-NOHSA-VI-NEXT: s_endpgm 2782; 2783; EG-LABEL: constant_sextload_v32i16_to_v32i32: 2784; EG: ; %bb.0: 2785; EG-NEXT: ALU 8, @20, KC0[CB0:0-32], KC1[] 2786; EG-NEXT: TEX 3 @12 2787; EG-NEXT: ALU 73, @29, KC0[CB0:0-32], KC1[] 2788; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T24.X, 0 2789; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T22.X, 0 2790; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T28.X, 0 2791; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0 2792; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T26.X, 0 2793; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0 2794; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0 2795; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1 2796; EG-NEXT: CF_END 2797; EG-NEXT: Fetch clause starting at 12: 2798; EG-NEXT: VTX_READ_128 T23.XYZW, T22.X, 16, #1 2799; EG-NEXT: VTX_READ_128 T24.XYZW, T22.X, 32, #1 2800; EG-NEXT: VTX_READ_128 T25.XYZW, T22.X, 0, #1 2801; EG-NEXT: VTX_READ_128 T22.XYZW, T22.X, 48, #1 2802; EG-NEXT: ALU clause starting at 20: 2803; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x, 2804; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2805; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2806; EG-NEXT: LSHR T20.X, PV.W, literal.x, 2807; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2808; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 2809; EG-NEXT: LSHR T21.X, PV.W, literal.x, 2810; EG-NEXT: MOV * T22.X, KC0[2].Z, 2811; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2812; EG-NEXT: ALU clause starting at 29: 2813; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2814; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2815; EG-NEXT: LSHR T26.X, PV.W, literal.x, 2816; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2817; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 2818; EG-NEXT: LSHR T27.X, PV.W, literal.x, 2819; EG-NEXT: LSHR T0.W, T22.W, literal.y, 2820; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 2821; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2822; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) 2823; EG-NEXT: LSHR T28.X, PS, literal.x, 2824; EG-NEXT: LSHR T0.Y, T22.Y, literal.y, 2825; EG-NEXT: BFE_INT T29.Z, T25.Y, 0.0, literal.y, BS:VEC_120/SCL_212 2826; EG-NEXT: LSHR T1.W, T24.W, literal.y, 2827; EG-NEXT: LSHR * T2.W, T24.Y, literal.y, 2828; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2829; EG-NEXT: BFE_INT T29.X, T25.X, 0.0, literal.x, 2830; EG-NEXT: LSHR T1.Y, T23.W, literal.x, 2831; EG-NEXT: BFE_INT T30.Z, T25.W, 0.0, literal.x, BS:VEC_120/SCL_212 2832; EG-NEXT: LSHR T3.W, T23.Y, literal.x, 2833; EG-NEXT: LSHR * T4.W, T25.Y, literal.x, 2834; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2835; EG-NEXT: BFE_INT T30.X, T25.Z, 0.0, literal.x, 2836; EG-NEXT: LSHR T2.Y, T25.W, literal.x, 2837; EG-NEXT: BFE_INT T31.Z, T23.Y, 0.0, literal.x, 2838; EG-NEXT: BFE_INT T29.W, PS, 0.0, literal.x, 2839; EG-NEXT: LSHR * T4.W, T25.X, literal.x, 2840; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2841; EG-NEXT: BFE_INT T31.X, T23.X, 0.0, literal.x, 2842; EG-NEXT: BFE_INT T29.Y, PS, 0.0, literal.x, 2843; EG-NEXT: BFE_INT T32.Z, T23.W, 0.0, literal.x, 2844; EG-NEXT: BFE_INT T30.W, PV.Y, 0.0, literal.x, 2845; EG-NEXT: LSHR * T4.W, T25.Z, literal.x, 2846; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2847; EG-NEXT: BFE_INT T32.X, T23.Z, 0.0, literal.x, 2848; EG-NEXT: BFE_INT T30.Y, PS, 0.0, literal.x, 2849; EG-NEXT: BFE_INT T25.Z, T24.Y, 0.0, literal.x, 2850; EG-NEXT: BFE_INT T31.W, T3.W, 0.0, literal.x, 2851; EG-NEXT: LSHR * T3.W, T23.X, literal.x, 2852; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2853; EG-NEXT: BFE_INT T25.X, T24.X, 0.0, literal.x, 2854; EG-NEXT: BFE_INT T31.Y, PS, 0.0, literal.x, 2855; EG-NEXT: BFE_INT T33.Z, T24.W, 0.0, literal.x, 2856; EG-NEXT: BFE_INT T32.W, T1.Y, 0.0, literal.x, 2857; EG-NEXT: LSHR * T3.W, T23.Z, literal.x, 2858; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2859; EG-NEXT: BFE_INT T33.X, T24.Z, 0.0, literal.x, 2860; EG-NEXT: BFE_INT T32.Y, PS, 0.0, literal.x, 2861; EG-NEXT: BFE_INT T23.Z, T22.Y, 0.0, literal.x, 2862; EG-NEXT: BFE_INT T25.W, T2.W, 0.0, literal.x, 2863; EG-NEXT: LSHR * T2.W, T24.X, literal.x, 2864; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2865; EG-NEXT: BFE_INT T23.X, T22.X, 0.0, literal.x, 2866; EG-NEXT: BFE_INT T25.Y, PS, 0.0, literal.x, 2867; EG-NEXT: BFE_INT T34.Z, T22.W, 0.0, literal.x, 2868; EG-NEXT: BFE_INT T33.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212 2869; EG-NEXT: LSHR * T1.W, T24.Z, literal.x, 2870; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2871; EG-NEXT: BFE_INT T34.X, T22.Z, 0.0, literal.x, 2872; EG-NEXT: BFE_INT T33.Y, PS, 0.0, literal.x, 2873; EG-NEXT: LSHR T0.Z, T22.X, literal.x, 2874; EG-NEXT: BFE_INT T23.W, T0.Y, 0.0, literal.x, 2875; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 2876; EG-NEXT: 16(2.242078e-44), 96(1.345247e-43) 2877; EG-NEXT: LSHR T22.X, PS, literal.x, 2878; EG-NEXT: BFE_INT T23.Y, PV.Z, 0.0, literal.y, 2879; EG-NEXT: LSHR T0.Z, T22.Z, literal.y, 2880; EG-NEXT: BFE_INT T34.W, T0.W, 0.0, literal.y, 2881; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2882; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2883; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 2884; EG-NEXT: LSHR T24.X, PS, literal.x, 2885; EG-NEXT: BFE_INT * T34.Y, PV.Z, 0.0, literal.y, 2886; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2887 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in 2888 %ext = sext <32 x i16> %load to <32 x i32> 2889 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 2890 ret void 2891} 2892 2893define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 { 2894; GCN-NOHSA-SI-LABEL: constant_zextload_v64i16_to_v64i32: 2895; GCN-NOHSA-SI: ; %bb.0: 2896; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 2897; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2898; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0 2899; GCN-NOHSA-SI-NEXT: s_mov_b32 s20, 0xffff 2900; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x10 2901; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2902; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s5, 16 2903; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s4, 16 2904; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s7, 16 2905; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s6, 16 2906; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s9, 16 2907; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s8, 16 2908; GCN-NOHSA-SI-NEXT: s_lshr_b32 s27, s11, 16 2909; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s10, 16 2910; GCN-NOHSA-SI-NEXT: s_lshr_b32 s29, s13, 16 2911; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s12, 16 2912; GCN-NOHSA-SI-NEXT: s_lshr_b32 s31, s15, 16 2913; GCN-NOHSA-SI-NEXT: s_lshr_b32 s33, s14, 16 2914; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s17, 16 2915; GCN-NOHSA-SI-NEXT: s_lshr_b32 s35, s16, 16 2916; GCN-NOHSA-SI-NEXT: s_lshr_b32 s52, s19, 16 2917; GCN-NOHSA-SI-NEXT: s_lshr_b32 s53, s18, 16 2918; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s20 2919; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s20 2920; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s20 2921; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s20 2922; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, s20 2923; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, s20 2924; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s20 2925; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, s20 2926; GCN-NOHSA-SI-NEXT: s_and_b32 s13, s13, s20 2927; GCN-NOHSA-SI-NEXT: s_and_b32 s12, s12, s20 2928; GCN-NOHSA-SI-NEXT: s_and_b32 s15, s15, s20 2929; GCN-NOHSA-SI-NEXT: s_and_b32 s14, s14, s20 2930; GCN-NOHSA-SI-NEXT: s_and_b32 s17, s17, s20 2931; GCN-NOHSA-SI-NEXT: s_and_b32 s16, s16, s20 2932; GCN-NOHSA-SI-NEXT: s_and_b32 s19, s19, s20 2933; GCN-NOHSA-SI-NEXT: s_and_b32 s18, s18, s20 2934; GCN-NOHSA-SI-NEXT: s_and_b32 s54, s37, s20 2935; GCN-NOHSA-SI-NEXT: s_and_b32 s55, s36, s20 2936; GCN-NOHSA-SI-NEXT: s_and_b32 s56, s39, s20 2937; GCN-NOHSA-SI-NEXT: s_and_b32 s57, s38, s20 2938; GCN-NOHSA-SI-NEXT: s_and_b32 s58, s41, s20 2939; GCN-NOHSA-SI-NEXT: s_and_b32 s59, s40, s20 2940; GCN-NOHSA-SI-NEXT: s_and_b32 s60, s43, s20 2941; GCN-NOHSA-SI-NEXT: s_and_b32 s61, s42, s20 2942; GCN-NOHSA-SI-NEXT: s_and_b32 s62, s45, s20 2943; GCN-NOHSA-SI-NEXT: s_and_b32 s63, s44, s20 2944; GCN-NOHSA-SI-NEXT: s_and_b32 s64, s47, s20 2945; GCN-NOHSA-SI-NEXT: s_and_b32 s65, s46, s20 2946; GCN-NOHSA-SI-NEXT: s_and_b32 s66, s49, s20 2947; GCN-NOHSA-SI-NEXT: s_and_b32 s67, s48, s20 2948; GCN-NOHSA-SI-NEXT: s_and_b32 s68, s51, s20 2949; GCN-NOHSA-SI-NEXT: s_and_b32 s20, s50, s20 2950; GCN-NOHSA-SI-NEXT: s_lshr_b32 s37, s37, 16 2951; GCN-NOHSA-SI-NEXT: s_lshr_b32 s36, s36, 16 2952; GCN-NOHSA-SI-NEXT: s_lshr_b32 s39, s39, 16 2953; GCN-NOHSA-SI-NEXT: s_lshr_b32 s38, s38, 16 2954; GCN-NOHSA-SI-NEXT: s_lshr_b32 s41, s41, 16 2955; GCN-NOHSA-SI-NEXT: s_lshr_b32 s40, s40, 16 2956; GCN-NOHSA-SI-NEXT: s_lshr_b32 s42, s42, 16 2957; GCN-NOHSA-SI-NEXT: s_lshr_b32 s45, s45, 16 2958; GCN-NOHSA-SI-NEXT: s_lshr_b32 s44, s44, 16 2959; GCN-NOHSA-SI-NEXT: s_lshr_b32 s47, s47, 16 2960; GCN-NOHSA-SI-NEXT: s_lshr_b32 s46, s46, 16 2961; GCN-NOHSA-SI-NEXT: s_lshr_b32 s49, s49, 16 2962; GCN-NOHSA-SI-NEXT: s_lshr_b32 s48, s48, 16 2963; GCN-NOHSA-SI-NEXT: s_lshr_b32 s51, s51, 16 2964; GCN-NOHSA-SI-NEXT: s_lshr_b32 s50, s50, 16 2965; GCN-NOHSA-SI-NEXT: s_lshr_b32 s43, s43, 16 2966; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2967; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2968; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s20 2969; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s50 2970; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s68 2971; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s51 2972; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s67 2973; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s48 2974; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s66 2975; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s49 2976; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s65 2977; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s46 2978; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s64 2979; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s47 2980; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s63 2981; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s44 2982; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s62 2983; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s45 2984; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s61 2985; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s42 2986; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s60 2987; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s59 2988; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s43 2989; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s40 2990; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s58 2991; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s41 2992; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 2993; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 2994; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 2995; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 2996; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 2997; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160 2998; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5) 2999; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s57 3000; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s38 3001; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s56 3002; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s39 3003; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 3004; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3005; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s55 3006; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s36 3007; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s54 3008; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s37 3009; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 3010; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3011; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 3012; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s53 3013; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19 3014; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s52 3015; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 3016; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3017; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16 3018; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s35 3019; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s17 3020; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s34 3021; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 3022; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3023; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 3024; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s33 3025; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 3026; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s31 3027; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 3028; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3029; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 3030; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s30 3031; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 3032; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s29 3033; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 3034; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3035; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 3036; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s28 3037; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 3038; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s27 3039; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3040; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3041; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 3042; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s26 3043; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 3044; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s25 3045; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3046; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3047; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 3048; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s24 3049; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 3050; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s23 3051; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3052; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3053; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 3054; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s22 3055; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 3056; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s21 3057; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3058; GCN-NOHSA-SI-NEXT: s_endpgm 3059; 3060; GCN-HSA-LABEL: constant_zextload_v64i16_to_v64i32: 3061; GCN-HSA: ; %bb.0: 3062; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3063; GCN-HSA-NEXT: s_mov_b32 s37, 0xffff 3064; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3065; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0 3066; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3067; GCN-HSA-NEXT: s_lshr_b32 s20, s5, 16 3068; GCN-HSA-NEXT: s_lshr_b32 s21, s4, 16 3069; GCN-HSA-NEXT: s_lshr_b32 s22, s7, 16 3070; GCN-HSA-NEXT: s_lshr_b32 s23, s6, 16 3071; GCN-HSA-NEXT: s_lshr_b32 s24, s9, 16 3072; GCN-HSA-NEXT: s_lshr_b32 s25, s8, 16 3073; GCN-HSA-NEXT: s_lshr_b32 s26, s11, 16 3074; GCN-HSA-NEXT: s_lshr_b32 s27, s10, 16 3075; GCN-HSA-NEXT: s_lshr_b32 s28, s13, 16 3076; GCN-HSA-NEXT: s_lshr_b32 s29, s12, 16 3077; GCN-HSA-NEXT: s_lshr_b32 s30, s15, 16 3078; GCN-HSA-NEXT: s_lshr_b32 s31, s14, 16 3079; GCN-HSA-NEXT: s_lshr_b32 s33, s17, 16 3080; GCN-HSA-NEXT: s_lshr_b32 s34, s16, 16 3081; GCN-HSA-NEXT: s_lshr_b32 s35, s19, 16 3082; GCN-HSA-NEXT: s_lshr_b32 s36, s18, 16 3083; GCN-HSA-NEXT: s_and_b32 s38, s5, s37 3084; GCN-HSA-NEXT: s_and_b32 s39, s4, s37 3085; GCN-HSA-NEXT: s_and_b32 s40, s7, s37 3086; GCN-HSA-NEXT: s_and_b32 s41, s6, s37 3087; GCN-HSA-NEXT: s_and_b32 s42, s9, s37 3088; GCN-HSA-NEXT: s_and_b32 s43, s8, s37 3089; GCN-HSA-NEXT: s_and_b32 s44, s11, s37 3090; GCN-HSA-NEXT: s_and_b32 s45, s10, s37 3091; GCN-HSA-NEXT: s_and_b32 s46, s13, s37 3092; GCN-HSA-NEXT: s_and_b32 s47, s12, s37 3093; GCN-HSA-NEXT: s_and_b32 s48, s15, s37 3094; GCN-HSA-NEXT: s_and_b32 s49, s14, s37 3095; GCN-HSA-NEXT: s_and_b32 s50, s17, s37 3096; GCN-HSA-NEXT: s_and_b32 s51, s16, s37 3097; GCN-HSA-NEXT: s_and_b32 s52, s19, s37 3098; GCN-HSA-NEXT: s_and_b32 s53, s18, s37 3099; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x10 3100; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3101; GCN-HSA-NEXT: s_and_b32 s59, s8, s37 3102; GCN-HSA-NEXT: s_and_b32 s60, s11, s37 3103; GCN-HSA-NEXT: s_and_b32 s61, s10, s37 3104; GCN-HSA-NEXT: s_and_b32 s62, s13, s37 3105; GCN-HSA-NEXT: s_and_b32 s63, s12, s37 3106; GCN-HSA-NEXT: s_and_b32 s64, s15, s37 3107; GCN-HSA-NEXT: s_and_b32 s65, s14, s37 3108; GCN-HSA-NEXT: s_and_b32 s66, s17, s37 3109; GCN-HSA-NEXT: s_and_b32 s67, s16, s37 3110; GCN-HSA-NEXT: s_and_b32 s68, s19, s37 3111; GCN-HSA-NEXT: s_and_b32 s54, s5, s37 3112; GCN-HSA-NEXT: s_and_b32 s55, s4, s37 3113; GCN-HSA-NEXT: s_and_b32 s56, s7, s37 3114; GCN-HSA-NEXT: s_and_b32 s57, s6, s37 3115; GCN-HSA-NEXT: s_and_b32 s58, s9, s37 3116; GCN-HSA-NEXT: s_and_b32 s37, s18, s37 3117; GCN-HSA-NEXT: s_lshr_b32 s9, s9, 16 3118; GCN-HSA-NEXT: s_lshr_b32 s11, s11, 16 3119; GCN-HSA-NEXT: s_lshr_b32 s10, s10, 16 3120; GCN-HSA-NEXT: s_lshr_b32 s13, s13, 16 3121; GCN-HSA-NEXT: s_lshr_b32 s12, s12, 16 3122; GCN-HSA-NEXT: s_lshr_b32 s15, s15, 16 3123; GCN-HSA-NEXT: s_lshr_b32 s14, s14, 16 3124; GCN-HSA-NEXT: s_lshr_b32 s17, s17, 16 3125; GCN-HSA-NEXT: s_lshr_b32 s16, s16, 16 3126; GCN-HSA-NEXT: s_lshr_b32 s19, s19, 16 3127; GCN-HSA-NEXT: s_lshr_b32 s18, s18, 16 3128; GCN-HSA-NEXT: s_lshr_b32 s5, s5, 16 3129; GCN-HSA-NEXT: s_lshr_b32 s4, s4, 16 3130; GCN-HSA-NEXT: s_lshr_b32 s7, s7, 16 3131; GCN-HSA-NEXT: s_lshr_b32 s6, s6, 16 3132; GCN-HSA-NEXT: s_lshr_b32 s8, s8, 16 3133; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 3134; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3135; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3 3136; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2 3137; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 3138; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3139; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 3140; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 3141; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0 3142; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3143; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3 3144; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2 3145; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 3146; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3147; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3 3148; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2 3149; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0 3150; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3151; GCN-HSA-NEXT: v_mov_b32_e32 v31, s3 3152; GCN-HSA-NEXT: v_mov_b32_e32 v30, s2 3153; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 3154; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3155; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3 3156; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2 3157; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 3158; GCN-HSA-NEXT: v_mov_b32_e32 v4, s67 3159; GCN-HSA-NEXT: v_mov_b32_e32 v5, s16 3160; GCN-HSA-NEXT: v_mov_b32_e32 v6, s66 3161; GCN-HSA-NEXT: v_mov_b32_e32 v7, s17 3162; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3163; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7] 3164; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 3165; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 3166; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 3167; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3168; GCN-HSA-NEXT: v_mov_b32_e32 v35, s3 3169; GCN-HSA-NEXT: v_mov_b32_e32 v34, s2 3170; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 3171; GCN-HSA-NEXT: v_mov_b32_e32 v16, s61 3172; GCN-HSA-NEXT: v_mov_b32_e32 v17, s10 3173; GCN-HSA-NEXT: v_mov_b32_e32 v18, s60 3174; GCN-HSA-NEXT: v_mov_b32_e32 v19, s11 3175; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3176; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19] 3177; GCN-HSA-NEXT: v_mov_b32_e32 v0, s37 3178; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 3179; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 3180; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 3181; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3182; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 3183; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 3184; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 3185; GCN-HSA-NEXT: v_mov_b32_e32 v1, s18 3186; GCN-HSA-NEXT: v_mov_b32_e32 v2, s68 3187; GCN-HSA-NEXT: v_mov_b32_e32 v3, s19 3188; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[0:3] 3189; GCN-HSA-NEXT: v_mov_b32_e32 v8, s65 3190; GCN-HSA-NEXT: v_mov_b32_e32 v9, s14 3191; GCN-HSA-NEXT: v_mov_b32_e32 v10, s64 3192; GCN-HSA-NEXT: v_mov_b32_e32 v11, s15 3193; GCN-HSA-NEXT: v_mov_b32_e32 v12, s63 3194; GCN-HSA-NEXT: v_mov_b32_e32 v13, s12 3195; GCN-HSA-NEXT: v_mov_b32_e32 v14, s62 3196; GCN-HSA-NEXT: v_mov_b32_e32 v15, s13 3197; GCN-HSA-NEXT: v_mov_b32_e32 v20, s59 3198; GCN-HSA-NEXT: v_mov_b32_e32 v0, s57 3199; GCN-HSA-NEXT: v_mov_b32_e32 v21, s8 3200; GCN-HSA-NEXT: v_mov_b32_e32 v22, s58 3201; GCN-HSA-NEXT: v_mov_b32_e32 v23, s9 3202; GCN-HSA-NEXT: v_mov_b32_e32 v1, s6 3203; GCN-HSA-NEXT: v_mov_b32_e32 v2, s56 3204; GCN-HSA-NEXT: v_mov_b32_e32 v4, s55 3205; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 3206; GCN-HSA-NEXT: v_mov_b32_e32 v5, s4 3207; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3208; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 3209; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15] 3210; GCN-HSA-NEXT: v_mov_b32_e32 v6, s54 3211; GCN-HSA-NEXT: v_mov_b32_e32 v8, s53 3212; GCN-HSA-NEXT: v_mov_b32_e32 v7, s5 3213; GCN-HSA-NEXT: v_mov_b32_e32 v12, s51 3214; GCN-HSA-NEXT: v_mov_b32_e32 v9, s36 3215; GCN-HSA-NEXT: v_mov_b32_e32 v10, s52 3216; GCN-HSA-NEXT: v_mov_b32_e32 v11, s35 3217; GCN-HSA-NEXT: v_mov_b32_e32 v13, s34 3218; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23] 3219; GCN-HSA-NEXT: v_mov_b32_e32 v14, s50 3220; GCN-HSA-NEXT: v_mov_b32_e32 v15, s33 3221; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 3222; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[4:7] 3223; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 3224; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 3225; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3226; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3227; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 3228; GCN-HSA-NEXT: v_mov_b32_e32 v0, s49 3229; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31 3230; GCN-HSA-NEXT: v_mov_b32_e32 v2, s48 3231; GCN-HSA-NEXT: v_mov_b32_e32 v3, s30 3232; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3233; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3234; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3235; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3236; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 3237; GCN-HSA-NEXT: v_mov_b32_e32 v0, s47 3238; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 3239; GCN-HSA-NEXT: v_mov_b32_e32 v2, s46 3240; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 3241; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3242; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3243; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3244; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3245; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 3246; GCN-HSA-NEXT: v_mov_b32_e32 v0, s45 3247; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 3248; GCN-HSA-NEXT: v_mov_b32_e32 v2, s44 3249; GCN-HSA-NEXT: v_mov_b32_e32 v3, s26 3250; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3251; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3252; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3253; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3254; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 3255; GCN-HSA-NEXT: v_mov_b32_e32 v0, s43 3256; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 3257; GCN-HSA-NEXT: v_mov_b32_e32 v2, s42 3258; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 3259; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3260; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3261; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3262; GCN-HSA-NEXT: v_mov_b32_e32 v0, s41 3263; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 3264; GCN-HSA-NEXT: v_mov_b32_e32 v2, s40 3265; GCN-HSA-NEXT: v_mov_b32_e32 v3, s22 3266; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3267; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3268; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3269; GCN-HSA-NEXT: v_mov_b32_e32 v0, s39 3270; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 3271; GCN-HSA-NEXT: v_mov_b32_e32 v2, s38 3272; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 3273; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3274; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3275; GCN-HSA-NEXT: s_endpgm 3276; 3277; GCN-NOHSA-VI-LABEL: constant_zextload_v64i16_to_v64i32: 3278; GCN-NOHSA-VI: ; %bb.0: 3279; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 3280; GCN-NOHSA-VI-NEXT: s_mov_b32 s20, 0xffff 3281; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3282; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x40 3283; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x0 3284; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 3285; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 3286; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3287; GCN-NOHSA-VI-NEXT: s_and_b32 s68, s19, s20 3288; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s51, 16 3289; GCN-NOHSA-VI-NEXT: s_lshr_b32 s53, s50, 16 3290; GCN-NOHSA-VI-NEXT: s_lshr_b32 s54, s5, 16 3291; GCN-NOHSA-VI-NEXT: s_lshr_b32 s55, s4, 16 3292; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s7, 16 3293; GCN-NOHSA-VI-NEXT: s_lshr_b32 s57, s6, 16 3294; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s9, 16 3295; GCN-NOHSA-VI-NEXT: s_lshr_b32 s59, s8, 16 3296; GCN-NOHSA-VI-NEXT: s_lshr_b32 s60, s11, 16 3297; GCN-NOHSA-VI-NEXT: s_lshr_b32 s61, s10, 16 3298; GCN-NOHSA-VI-NEXT: s_and_b32 s62, s13, s20 3299; GCN-NOHSA-VI-NEXT: s_and_b32 s63, s12, s20 3300; GCN-NOHSA-VI-NEXT: s_and_b32 s64, s15, s20 3301; GCN-NOHSA-VI-NEXT: s_and_b32 s65, s14, s20 3302; GCN-NOHSA-VI-NEXT: s_and_b32 s66, s17, s20 3303; GCN-NOHSA-VI-NEXT: s_and_b32 s67, s16, s20 3304; GCN-NOHSA-VI-NEXT: s_lshr_b32 s19, s19, 16 3305; GCN-NOHSA-VI-NEXT: s_lshr_b32 s21, s37, 16 3306; GCN-NOHSA-VI-NEXT: s_and_b32 s22, s37, s20 3307; GCN-NOHSA-VI-NEXT: s_lshr_b32 s23, s36, 16 3308; GCN-NOHSA-VI-NEXT: s_and_b32 s24, s36, s20 3309; GCN-NOHSA-VI-NEXT: s_lshr_b32 s25, s39, 16 3310; GCN-NOHSA-VI-NEXT: s_and_b32 s26, s39, s20 3311; GCN-NOHSA-VI-NEXT: s_lshr_b32 s27, s38, 16 3312; GCN-NOHSA-VI-NEXT: s_and_b32 s28, s38, s20 3313; GCN-NOHSA-VI-NEXT: s_lshr_b32 s29, s41, 16 3314; GCN-NOHSA-VI-NEXT: s_and_b32 s30, s41, s20 3315; GCN-NOHSA-VI-NEXT: s_lshr_b32 s31, s40, 16 3316; GCN-NOHSA-VI-NEXT: s_and_b32 s33, s40, s20 3317; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s43, 16 3318; GCN-NOHSA-VI-NEXT: s_and_b32 s35, s43, s20 3319; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s42, 16 3320; GCN-NOHSA-VI-NEXT: s_and_b32 s37, s42, s20 3321; GCN-NOHSA-VI-NEXT: s_lshr_b32 s38, s45, 16 3322; GCN-NOHSA-VI-NEXT: s_and_b32 s39, s45, s20 3323; GCN-NOHSA-VI-NEXT: s_lshr_b32 s40, s44, 16 3324; GCN-NOHSA-VI-NEXT: s_and_b32 s41, s44, s20 3325; GCN-NOHSA-VI-NEXT: s_lshr_b32 s42, s47, 16 3326; GCN-NOHSA-VI-NEXT: s_and_b32 s43, s47, s20 3327; GCN-NOHSA-VI-NEXT: s_lshr_b32 s44, s46, 16 3328; GCN-NOHSA-VI-NEXT: s_and_b32 s45, s46, s20 3329; GCN-NOHSA-VI-NEXT: s_lshr_b32 s46, s49, 16 3330; GCN-NOHSA-VI-NEXT: s_and_b32 s47, s49, s20 3331; GCN-NOHSA-VI-NEXT: s_lshr_b32 s49, s48, 16 3332; GCN-NOHSA-VI-NEXT: s_and_b32 s48, s48, s20 3333; GCN-NOHSA-VI-NEXT: s_and_b32 s51, s51, s20 3334; GCN-NOHSA-VI-NEXT: s_and_b32 s50, s50, s20 3335; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s20 3336; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s20 3337; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, s20 3338; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, s20 3339; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, s20 3340; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, s20 3341; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, s20 3342; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, s20 3343; GCN-NOHSA-VI-NEXT: s_and_b32 s20, s18, s20 3344; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s18, 16 3345; GCN-NOHSA-VI-NEXT: s_lshr_b32 s17, s17, 16 3346; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s16, 16 3347; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 3348; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s18 3349; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s68 3350; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s19 3351; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 3352; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s15, 16 3353; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s14, 16 3354; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s67 3355; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s16 3356; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s66 3357; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17 3358; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 3359; GCN-NOHSA-VI-NEXT: s_lshr_b32 s13, s13, 16 3360; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s12, 16 3361; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s65 3362; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s14 3363; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s64 3364; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15 3365; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 3366; GCN-NOHSA-VI-NEXT: s_nop 0 3367; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s63 3368; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s12 3369; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s62 3370; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13 3371; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 3372; GCN-NOHSA-VI-NEXT: s_nop 0 3373; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 3374; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s61 3375; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 3376; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s60 3377; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 3378; GCN-NOHSA-VI-NEXT: s_nop 0 3379; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 3380; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s59 3381; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 3382; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s58 3383; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 3384; GCN-NOHSA-VI-NEXT: s_nop 0 3385; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 3386; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s57 3387; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 3388; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s56 3389; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 3390; GCN-NOHSA-VI-NEXT: s_nop 0 3391; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 3392; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s55 3393; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 3394; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s54 3395; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 3396; GCN-NOHSA-VI-NEXT: s_nop 0 3397; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s50 3398; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s53 3399; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s51 3400; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s52 3401; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 3402; GCN-NOHSA-VI-NEXT: s_nop 0 3403; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48 3404; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s49 3405; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s47 3406; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s46 3407; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 3408; GCN-NOHSA-VI-NEXT: s_nop 0 3409; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s45 3410; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s44 3411; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s43 3412; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s42 3413; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 3414; GCN-NOHSA-VI-NEXT: s_nop 0 3415; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s41 3416; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s40 3417; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s39 3418; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s38 3419; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 3420; GCN-NOHSA-VI-NEXT: s_nop 0 3421; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s37 3422; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s36 3423; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s35 3424; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s34 3425; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3426; GCN-NOHSA-VI-NEXT: s_nop 0 3427; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s33 3428; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 3429; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s30 3430; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s29 3431; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3432; GCN-NOHSA-VI-NEXT: s_nop 0 3433; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 3434; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27 3435; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26 3436; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s25 3437; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3438; GCN-NOHSA-VI-NEXT: s_nop 0 3439; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 3440; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 3441; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s22 3442; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s21 3443; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3444; GCN-NOHSA-VI-NEXT: s_endpgm 3445; 3446; EG-LABEL: constant_zextload_v64i16_to_v64i32: 3447; EG: ; %bb.0: 3448; EG-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[] 3449; EG-NEXT: TEX 3 @22 3450; EG-NEXT: ALU 55, @39, KC0[CB0:0-32], KC1[] 3451; EG-NEXT: TEX 3 @30 3452; EG-NEXT: ALU 87, @95, KC0[CB0:0-32], KC1[] 3453; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0 3454; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T49.X, 0 3455; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T64.X, 0 3456; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T50.X, 0 3457; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T61.X, 0 3458; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T51.X, 0 3459; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T58.X, 0 3460; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T52.X, 0 3461; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T55.X, 0 3462; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T37.X, 0 3463; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T48.X, 0 3464; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T38.X, 0 3465; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T46.X, 0 3466; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T39.X, 0 3467; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T43.X, 0 3468; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T36.X, 1 3469; EG-NEXT: CF_END 3470; EG-NEXT: Fetch clause starting at 22: 3471; EG-NEXT: VTX_READ_128 T36.XYZW, T35.X, 0, #1 3472; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 48, #1 3473; EG-NEXT: VTX_READ_128 T38.XYZW, T35.X, 32, #1 3474; EG-NEXT: VTX_READ_128 T39.XYZW, T35.X, 16, #1 3475; EG-NEXT: Fetch clause starting at 30: 3476; EG-NEXT: VTX_READ_128 T49.XYZW, T35.X, 112, #1 3477; EG-NEXT: VTX_READ_128 T50.XYZW, T35.X, 96, #1 3478; EG-NEXT: VTX_READ_128 T51.XYZW, T35.X, 80, #1 3479; EG-NEXT: VTX_READ_128 T52.XYZW, T35.X, 64, #1 3480; EG-NEXT: ALU clause starting at 38: 3481; EG-NEXT: MOV * T35.X, KC0[2].Z, 3482; EG-NEXT: ALU clause starting at 39: 3483; EG-NEXT: LSHR * T40.W, T36.Y, literal.x, 3484; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3485; EG-NEXT: AND_INT * T40.Z, T36.Y, literal.x, 3486; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3487; EG-NEXT: LSHR T40.Y, T36.X, literal.x, 3488; EG-NEXT: LSHR * T41.W, T36.W, literal.x, 3489; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3490; EG-NEXT: AND_INT T40.X, T36.X, literal.x, 3491; EG-NEXT: AND_INT T41.Z, T36.W, literal.x, 3492; EG-NEXT: LSHR * T36.X, KC0[2].Y, literal.y, 3493; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 3494; EG-NEXT: LSHR T41.Y, T36.Z, literal.x, 3495; EG-NEXT: LSHR * T42.W, T39.Y, literal.x, 3496; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3497; EG-NEXT: AND_INT T41.X, T36.Z, literal.x, 3498; EG-NEXT: AND_INT T42.Z, T39.Y, literal.x, 3499; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3500; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 3501; EG-NEXT: LSHR T43.X, PV.W, literal.x, 3502; EG-NEXT: LSHR T42.Y, T39.X, literal.y, 3503; EG-NEXT: LSHR T44.W, T39.W, literal.y, 3504; EG-NEXT: AND_INT * T42.X, T39.X, literal.z, 3505; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3506; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3507; EG-NEXT: AND_INT T44.Z, T39.W, literal.x, 3508; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3509; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 3510; EG-NEXT: LSHR T39.X, PV.W, literal.x, 3511; EG-NEXT: LSHR T44.Y, T39.Z, literal.y, 3512; EG-NEXT: LSHR T45.W, T38.Y, literal.y, 3513; EG-NEXT: AND_INT * T44.X, T39.Z, literal.z, 3514; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3515; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3516; EG-NEXT: AND_INT T45.Z, T38.Y, literal.x, 3517; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3518; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 3519; EG-NEXT: LSHR T46.X, PV.W, literal.x, 3520; EG-NEXT: LSHR T45.Y, T38.X, literal.y, 3521; EG-NEXT: LSHR T47.W, T38.W, literal.y, 3522; EG-NEXT: AND_INT * T45.X, T38.X, literal.z, 3523; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3524; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3525; EG-NEXT: AND_INT T47.Z, T38.W, literal.x, 3526; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3527; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 3528; EG-NEXT: LSHR T38.X, PV.W, literal.x, 3529; EG-NEXT: LSHR T47.Y, T38.Z, literal.y, 3530; EG-NEXT: AND_INT * T47.X, T38.Z, literal.z, 3531; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3532; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3533; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, 3534; EG-NEXT: LSHR * T35.W, T37.Y, literal.y, 3535; EG-NEXT: 80(1.121039e-43), 16(2.242078e-44) 3536; EG-NEXT: LSHR T48.X, PV.W, literal.x, 3537; EG-NEXT: AND_INT * T35.Z, T37.Y, literal.y, 3538; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41) 3539; EG-NEXT: ALU clause starting at 95: 3540; EG-NEXT: LSHR T35.Y, T37.X, literal.x, 3541; EG-NEXT: LSHR * T53.W, T37.W, literal.x, 3542; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3543; EG-NEXT: AND_INT T35.X, T37.X, literal.x, 3544; EG-NEXT: AND_INT T53.Z, T37.W, literal.x, 3545; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3546; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 3547; EG-NEXT: LSHR T37.X, PV.W, literal.x, 3548; EG-NEXT: LSHR T53.Y, T37.Z, literal.y, 3549; EG-NEXT: LSHR T54.W, T52.Y, literal.y, 3550; EG-NEXT: AND_INT * T53.X, T37.Z, literal.z, 3551; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3552; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3553; EG-NEXT: AND_INT T54.Z, T52.Y, literal.x, 3554; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3555; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 3556; EG-NEXT: LSHR T55.X, PV.W, literal.x, 3557; EG-NEXT: LSHR T54.Y, T52.X, literal.y, 3558; EG-NEXT: LSHR T56.W, T52.W, literal.y, 3559; EG-NEXT: AND_INT * T54.X, T52.X, literal.z, 3560; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3561; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3562; EG-NEXT: AND_INT T56.Z, T52.W, literal.x, 3563; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3564; EG-NEXT: 65535(9.183409e-41), 128(1.793662e-43) 3565; EG-NEXT: LSHR T52.X, PV.W, literal.x, 3566; EG-NEXT: LSHR T56.Y, T52.Z, literal.y, 3567; EG-NEXT: LSHR T57.W, T51.Y, literal.y, 3568; EG-NEXT: AND_INT * T56.X, T52.Z, literal.z, 3569; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3570; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3571; EG-NEXT: AND_INT T57.Z, T51.Y, literal.x, 3572; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3573; EG-NEXT: 65535(9.183409e-41), 144(2.017870e-43) 3574; EG-NEXT: LSHR T58.X, PV.W, literal.x, 3575; EG-NEXT: LSHR T57.Y, T51.X, literal.y, 3576; EG-NEXT: LSHR T59.W, T51.W, literal.y, 3577; EG-NEXT: AND_INT * T57.X, T51.X, literal.z, 3578; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3579; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3580; EG-NEXT: AND_INT T59.Z, T51.W, literal.x, 3581; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3582; EG-NEXT: 65535(9.183409e-41), 160(2.242078e-43) 3583; EG-NEXT: LSHR T51.X, PV.W, literal.x, 3584; EG-NEXT: LSHR T59.Y, T51.Z, literal.y, 3585; EG-NEXT: LSHR T60.W, T50.Y, literal.y, 3586; EG-NEXT: AND_INT * T59.X, T51.Z, literal.z, 3587; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3588; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3589; EG-NEXT: AND_INT T60.Z, T50.Y, literal.x, 3590; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3591; EG-NEXT: 65535(9.183409e-41), 176(2.466285e-43) 3592; EG-NEXT: LSHR T61.X, PV.W, literal.x, 3593; EG-NEXT: LSHR T60.Y, T50.X, literal.y, 3594; EG-NEXT: LSHR T62.W, T50.W, literal.y, 3595; EG-NEXT: AND_INT * T60.X, T50.X, literal.z, 3596; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3597; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3598; EG-NEXT: AND_INT T62.Z, T50.W, literal.x, 3599; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3600; EG-NEXT: 65535(9.183409e-41), 192(2.690493e-43) 3601; EG-NEXT: LSHR T50.X, PV.W, literal.x, 3602; EG-NEXT: LSHR T62.Y, T50.Z, literal.y, 3603; EG-NEXT: LSHR T63.W, T49.Y, literal.y, 3604; EG-NEXT: AND_INT * T62.X, T50.Z, literal.z, 3605; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3606; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3607; EG-NEXT: AND_INT T63.Z, T49.Y, literal.x, 3608; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3609; EG-NEXT: 65535(9.183409e-41), 208(2.914701e-43) 3610; EG-NEXT: LSHR T64.X, PV.W, literal.x, 3611; EG-NEXT: LSHR T63.Y, T49.X, literal.y, 3612; EG-NEXT: LSHR T65.W, T49.W, literal.y, 3613; EG-NEXT: AND_INT * T63.X, T49.X, literal.z, 3614; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3615; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3616; EG-NEXT: AND_INT T65.Z, T49.W, literal.x, 3617; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3618; EG-NEXT: 65535(9.183409e-41), 224(3.138909e-43) 3619; EG-NEXT: LSHR T49.X, PV.W, literal.x, 3620; EG-NEXT: LSHR T65.Y, T49.Z, literal.y, 3621; EG-NEXT: AND_INT * T65.X, T49.Z, literal.z, 3622; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3623; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3624; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3625; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 3626; EG-NEXT: LSHR * T66.X, PV.W, literal.x, 3627; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3628 %load = load <64 x i16>, <64 x i16> addrspace(4)* %in 3629 %ext = zext <64 x i16> %load to <64 x i32> 3630 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 3631 ret void 3632} 3633 3634define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 { 3635; GCN-NOHSA-SI-LABEL: constant_sextload_v64i16_to_v64i32: 3636; GCN-NOHSA-SI: ; %bb.0: 3637; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 3638; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3639; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x0 3640; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x10 3641; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3642; GCN-NOHSA-SI-NEXT: s_ashr_i32 s20, s37, 16 3643; GCN-NOHSA-SI-NEXT: s_ashr_i32 s21, s36, 16 3644; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s22, s37 3645; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s23, s36 3646; GCN-NOHSA-SI-NEXT: s_ashr_i32 s24, s39, 16 3647; GCN-NOHSA-SI-NEXT: s_ashr_i32 s25, s38, 16 3648; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s26, s39 3649; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s27, s38 3650; GCN-NOHSA-SI-NEXT: s_ashr_i32 s28, s41, 16 3651; GCN-NOHSA-SI-NEXT: s_ashr_i32 s29, s40, 16 3652; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s30, s41 3653; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s31, s40 3654; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s43, 16 3655; GCN-NOHSA-SI-NEXT: s_ashr_i32 s34, s42, 16 3656; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s35, s43 3657; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s36, s42 3658; GCN-NOHSA-SI-NEXT: s_ashr_i32 s37, s45, 16 3659; GCN-NOHSA-SI-NEXT: s_ashr_i32 s38, s44, 16 3660; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s39, s45 3661; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s40, s44 3662; GCN-NOHSA-SI-NEXT: s_ashr_i32 s41, s47, 16 3663; GCN-NOHSA-SI-NEXT: s_ashr_i32 s42, s46, 16 3664; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s43, s47 3665; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s44, s46 3666; GCN-NOHSA-SI-NEXT: s_ashr_i32 s45, s49, 16 3667; GCN-NOHSA-SI-NEXT: s_ashr_i32 s46, s48, 16 3668; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s47, s49 3669; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s48, s48 3670; GCN-NOHSA-SI-NEXT: s_ashr_i32 s49, s51, 16 3671; GCN-NOHSA-SI-NEXT: s_ashr_i32 s52, s50, 16 3672; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s51, s51 3673; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s50, s50 3674; GCN-NOHSA-SI-NEXT: s_ashr_i32 s53, s5, 16 3675; GCN-NOHSA-SI-NEXT: s_ashr_i32 s54, s4, 16 3676; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 3677; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 3678; GCN-NOHSA-SI-NEXT: s_ashr_i32 s55, s7, 16 3679; GCN-NOHSA-SI-NEXT: s_ashr_i32 s56, s6, 16 3680; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 3681; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 3682; GCN-NOHSA-SI-NEXT: s_ashr_i32 s57, s9, 16 3683; GCN-NOHSA-SI-NEXT: s_ashr_i32 s58, s8, 16 3684; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9 3685; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8 3686; GCN-NOHSA-SI-NEXT: s_ashr_i32 s59, s10, 16 3687; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s60, s11 3688; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10 3689; GCN-NOHSA-SI-NEXT: s_ashr_i32 s61, s13, 16 3690; GCN-NOHSA-SI-NEXT: s_ashr_i32 s62, s12, 16 3691; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s13, s13 3692; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s12, s12 3693; GCN-NOHSA-SI-NEXT: s_ashr_i32 s63, s15, 16 3694; GCN-NOHSA-SI-NEXT: s_ashr_i32 s64, s14, 16 3695; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s15, s15 3696; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s14, s14 3697; GCN-NOHSA-SI-NEXT: s_ashr_i32 s65, s17, 16 3698; GCN-NOHSA-SI-NEXT: s_ashr_i32 s66, s16, 16 3699; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s17, s17 3700; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s16, s16 3701; GCN-NOHSA-SI-NEXT: s_ashr_i32 s67, s19, 16 3702; GCN-NOHSA-SI-NEXT: s_ashr_i32 s68, s18, 16 3703; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s19, s19 3704; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s18, s18 3705; GCN-NOHSA-SI-NEXT: s_ashr_i32 s11, s11, 16 3706; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 3707; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 3708; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 3709; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s68 3710; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19 3711; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s67 3712; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s16 3713; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s66 3714; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s17 3715; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s65 3716; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s14 3717; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s64 3718; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s15 3719; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s63 3720; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s12 3721; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s62 3722; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s13 3723; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s61 3724; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s10 3725; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s59 3726; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s60 3727; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s8 3728; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s11 3729; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s58 3730; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s9 3731; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s57 3732; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 3733; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 3734; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 3735; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 3736; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 3737; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160 3738; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5) 3739; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 3740; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s56 3741; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 3742; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s55 3743; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 3744; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3745; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 3746; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s54 3747; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 3748; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s53 3749; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 3750; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3751; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s50 3752; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s52 3753; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s51 3754; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s49 3755; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 3756; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3757; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s48 3758; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s46 3759; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s47 3760; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s45 3761; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 3762; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3763; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s44 3764; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s42 3765; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s43 3766; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s41 3767; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 3768; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3769; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s40 3770; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s38 3771; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s39 3772; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s37 3773; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 3774; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3775; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36 3776; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34 3777; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s35 3778; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33 3779; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3780; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3781; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s31 3782; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s29 3783; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s30 3784; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s28 3785; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3786; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3787; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s27 3788; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s25 3789; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s26 3790; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s24 3791; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3792; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3793; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s23 3794; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s21 3795; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s22 3796; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s20 3797; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3798; GCN-NOHSA-SI-NEXT: s_endpgm 3799; 3800; GCN-HSA-LABEL: constant_sextload_v64i16_to_v64i32: 3801; GCN-HSA: ; %bb.0: 3802; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3803; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3804; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0 3805; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3806; GCN-HSA-NEXT: s_ashr_i32 s20, s5, 16 3807; GCN-HSA-NEXT: s_ashr_i32 s21, s4, 16 3808; GCN-HSA-NEXT: s_sext_i32_i16 s22, s5 3809; GCN-HSA-NEXT: s_sext_i32_i16 s23, s4 3810; GCN-HSA-NEXT: s_ashr_i32 s24, s7, 16 3811; GCN-HSA-NEXT: s_ashr_i32 s25, s6, 16 3812; GCN-HSA-NEXT: s_sext_i32_i16 s26, s7 3813; GCN-HSA-NEXT: s_sext_i32_i16 s27, s6 3814; GCN-HSA-NEXT: s_ashr_i32 s28, s9, 16 3815; GCN-HSA-NEXT: s_ashr_i32 s29, s8, 16 3816; GCN-HSA-NEXT: s_sext_i32_i16 s30, s9 3817; GCN-HSA-NEXT: s_sext_i32_i16 s31, s8 3818; GCN-HSA-NEXT: s_ashr_i32 s33, s11, 16 3819; GCN-HSA-NEXT: s_ashr_i32 s34, s10, 16 3820; GCN-HSA-NEXT: s_sext_i32_i16 s35, s11 3821; GCN-HSA-NEXT: s_sext_i32_i16 s36, s10 3822; GCN-HSA-NEXT: s_ashr_i32 s37, s13, 16 3823; GCN-HSA-NEXT: s_ashr_i32 s38, s12, 16 3824; GCN-HSA-NEXT: s_sext_i32_i16 s39, s13 3825; GCN-HSA-NEXT: s_sext_i32_i16 s40, s12 3826; GCN-HSA-NEXT: s_ashr_i32 s41, s15, 16 3827; GCN-HSA-NEXT: s_ashr_i32 s42, s14, 16 3828; GCN-HSA-NEXT: s_sext_i32_i16 s43, s15 3829; GCN-HSA-NEXT: s_sext_i32_i16 s44, s14 3830; GCN-HSA-NEXT: s_ashr_i32 s45, s17, 16 3831; GCN-HSA-NEXT: s_ashr_i32 s46, s16, 16 3832; GCN-HSA-NEXT: s_sext_i32_i16 s47, s17 3833; GCN-HSA-NEXT: s_sext_i32_i16 s48, s16 3834; GCN-HSA-NEXT: s_ashr_i32 s49, s19, 16 3835; GCN-HSA-NEXT: s_ashr_i32 s50, s18, 16 3836; GCN-HSA-NEXT: s_sext_i32_i16 s51, s19 3837; GCN-HSA-NEXT: s_sext_i32_i16 s52, s18 3838; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x10 3839; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3840; GCN-HSA-NEXT: s_ashr_i32 s57, s9, 16 3841; GCN-HSA-NEXT: s_ashr_i32 s59, s11, 16 3842; GCN-HSA-NEXT: s_ashr_i32 s60, s10, 16 3843; GCN-HSA-NEXT: s_ashr_i32 s61, s13, 16 3844; GCN-HSA-NEXT: s_ashr_i32 s62, s12, 16 3845; GCN-HSA-NEXT: s_ashr_i32 s63, s15, 16 3846; GCN-HSA-NEXT: s_ashr_i32 s64, s14, 16 3847; GCN-HSA-NEXT: s_ashr_i32 s65, s17, 16 3848; GCN-HSA-NEXT: s_ashr_i32 s66, s16, 16 3849; GCN-HSA-NEXT: s_ashr_i32 s67, s19, 16 3850; GCN-HSA-NEXT: s_ashr_i32 s68, s18, 16 3851; GCN-HSA-NEXT: s_ashr_i32 s53, s5, 16 3852; GCN-HSA-NEXT: s_ashr_i32 s54, s4, 16 3853; GCN-HSA-NEXT: s_ashr_i32 s55, s7, 16 3854; GCN-HSA-NEXT: s_ashr_i32 s56, s6, 16 3855; GCN-HSA-NEXT: s_ashr_i32 s58, s8, 16 3856; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 3857; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3858; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3 3859; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2 3860; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 3861; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3862; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 3863; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 3864; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0 3865; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3866; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3 3867; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2 3868; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 3869; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3870; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3 3871; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2 3872; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0 3873; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3874; GCN-HSA-NEXT: v_mov_b32_e32 v31, s3 3875; GCN-HSA-NEXT: v_mov_b32_e32 v30, s2 3876; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 3877; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3878; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3 3879; GCN-HSA-NEXT: s_sext_i32_i16 s17, s17 3880; GCN-HSA-NEXT: s_sext_i32_i16 s16, s16 3881; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2 3882; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 3883; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 3884; GCN-HSA-NEXT: v_mov_b32_e32 v5, s66 3885; GCN-HSA-NEXT: v_mov_b32_e32 v6, s17 3886; GCN-HSA-NEXT: v_mov_b32_e32 v7, s65 3887; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3888; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7] 3889; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 3890; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 3891; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 3892; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3893; GCN-HSA-NEXT: v_mov_b32_e32 v35, s3 3894; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11 3895; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10 3896; GCN-HSA-NEXT: v_mov_b32_e32 v34, s2 3897; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 3898; GCN-HSA-NEXT: v_mov_b32_e32 v16, s10 3899; GCN-HSA-NEXT: v_mov_b32_e32 v17, s60 3900; GCN-HSA-NEXT: v_mov_b32_e32 v18, s11 3901; GCN-HSA-NEXT: v_mov_b32_e32 v19, s59 3902; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3903; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19] 3904; GCN-HSA-NEXT: s_sext_i32_i16 s19, s19 3905; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 3906; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 3907; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 3908; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3909; GCN-HSA-NEXT: s_sext_i32_i16 s18, s18 3910; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 3911; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 3912; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8 3913; GCN-HSA-NEXT: s_sext_i32_i16 s13, s13 3914; GCN-HSA-NEXT: s_sext_i32_i16 s12, s12 3915; GCN-HSA-NEXT: s_sext_i32_i16 s15, s15 3916; GCN-HSA-NEXT: s_sext_i32_i16 s14, s14 3917; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 3918; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 3919; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 3920; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 3921; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9 3922; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 3923; GCN-HSA-NEXT: v_mov_b32_e32 v1, s68 3924; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19 3925; GCN-HSA-NEXT: v_mov_b32_e32 v3, s67 3926; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[0:3] 3927; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 3928; GCN-HSA-NEXT: v_mov_b32_e32 v8, s14 3929; GCN-HSA-NEXT: v_mov_b32_e32 v9, s64 3930; GCN-HSA-NEXT: v_mov_b32_e32 v10, s15 3931; GCN-HSA-NEXT: v_mov_b32_e32 v11, s63 3932; GCN-HSA-NEXT: v_mov_b32_e32 v12, s12 3933; GCN-HSA-NEXT: v_mov_b32_e32 v13, s62 3934; GCN-HSA-NEXT: v_mov_b32_e32 v14, s13 3935; GCN-HSA-NEXT: v_mov_b32_e32 v15, s61 3936; GCN-HSA-NEXT: v_mov_b32_e32 v20, s8 3937; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 3938; GCN-HSA-NEXT: v_mov_b32_e32 v21, s58 3939; GCN-HSA-NEXT: v_mov_b32_e32 v22, s9 3940; GCN-HSA-NEXT: v_mov_b32_e32 v23, s57 3941; GCN-HSA-NEXT: v_mov_b32_e32 v1, s56 3942; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 3943; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 3944; GCN-HSA-NEXT: v_mov_b32_e32 v3, s55 3945; GCN-HSA-NEXT: v_mov_b32_e32 v5, s54 3946; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3947; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 3948; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15] 3949; GCN-HSA-NEXT: v_mov_b32_e32 v6, s5 3950; GCN-HSA-NEXT: v_mov_b32_e32 v8, s52 3951; GCN-HSA-NEXT: v_mov_b32_e32 v7, s53 3952; GCN-HSA-NEXT: v_mov_b32_e32 v12, s48 3953; GCN-HSA-NEXT: v_mov_b32_e32 v9, s50 3954; GCN-HSA-NEXT: v_mov_b32_e32 v10, s51 3955; GCN-HSA-NEXT: v_mov_b32_e32 v11, s49 3956; GCN-HSA-NEXT: v_mov_b32_e32 v13, s46 3957; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23] 3958; GCN-HSA-NEXT: v_mov_b32_e32 v14, s47 3959; GCN-HSA-NEXT: v_mov_b32_e32 v15, s45 3960; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 3961; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[4:7] 3962; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 3963; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 3964; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3965; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3966; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 3967; GCN-HSA-NEXT: v_mov_b32_e32 v0, s44 3968; GCN-HSA-NEXT: v_mov_b32_e32 v1, s42 3969; GCN-HSA-NEXT: v_mov_b32_e32 v2, s43 3970; GCN-HSA-NEXT: v_mov_b32_e32 v3, s41 3971; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3972; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3973; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3974; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3975; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 3976; GCN-HSA-NEXT: v_mov_b32_e32 v0, s40 3977; GCN-HSA-NEXT: v_mov_b32_e32 v1, s38 3978; GCN-HSA-NEXT: v_mov_b32_e32 v2, s39 3979; GCN-HSA-NEXT: v_mov_b32_e32 v3, s37 3980; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3981; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3982; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3983; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3984; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 3985; GCN-HSA-NEXT: v_mov_b32_e32 v0, s36 3986; GCN-HSA-NEXT: v_mov_b32_e32 v1, s34 3987; GCN-HSA-NEXT: v_mov_b32_e32 v2, s35 3988; GCN-HSA-NEXT: v_mov_b32_e32 v3, s33 3989; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3990; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3991; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3992; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3993; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 3994; GCN-HSA-NEXT: v_mov_b32_e32 v0, s31 3995; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 3996; GCN-HSA-NEXT: v_mov_b32_e32 v2, s30 3997; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 3998; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3999; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4000; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4001; GCN-HSA-NEXT: v_mov_b32_e32 v0, s27 4002; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 4003; GCN-HSA-NEXT: v_mov_b32_e32 v2, s26 4004; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 4005; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4006; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4007; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4008; GCN-HSA-NEXT: v_mov_b32_e32 v0, s23 4009; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 4010; GCN-HSA-NEXT: v_mov_b32_e32 v2, s22 4011; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 4012; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4013; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4014; GCN-HSA-NEXT: s_endpgm 4015; 4016; GCN-NOHSA-VI-LABEL: constant_sextload_v64i16_to_v64i32: 4017; GCN-NOHSA-VI: ; %bb.0: 4018; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4019; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4020; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x40 4021; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x0 4022; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4023; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4024; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4025; GCN-NOHSA-VI-NEXT: s_ashr_i32 s67, s19, 16 4026; GCN-NOHSA-VI-NEXT: s_ashr_i32 s68, s18, 16 4027; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s19, s19 4028; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s18, s18 4029; GCN-NOHSA-VI-NEXT: s_ashr_i32 s65, s17, 16 4030; GCN-NOHSA-VI-NEXT: s_ashr_i32 s66, s16, 16 4031; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s17, s17 4032; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s16, s16 4033; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 4034; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s68 4035; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19 4036; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s67 4037; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 4038; GCN-NOHSA-VI-NEXT: s_ashr_i32 s63, s15, 16 4039; GCN-NOHSA-VI-NEXT: s_ashr_i32 s64, s14, 16 4040; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s15, s15 4041; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s14, s14 4042; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 4043; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s66 4044; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17 4045; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s65 4046; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 4047; GCN-NOHSA-VI-NEXT: s_ashr_i32 s61, s13, 16 4048; GCN-NOHSA-VI-NEXT: s_ashr_i32 s62, s12, 16 4049; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s13, s13 4050; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s12, s12 4051; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 4052; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s64 4053; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 4054; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s63 4055; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 4056; GCN-NOHSA-VI-NEXT: s_ashr_i32 s59, s11, 16 4057; GCN-NOHSA-VI-NEXT: s_ashr_i32 s60, s10, 16 4058; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11 4059; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10 4060; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 4061; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s62 4062; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 4063; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s61 4064; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 4065; GCN-NOHSA-VI-NEXT: s_ashr_i32 s57, s9, 16 4066; GCN-NOHSA-VI-NEXT: s_ashr_i32 s58, s8, 16 4067; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9 4068; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8 4069; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 4070; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s60 4071; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 4072; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s59 4073; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 4074; GCN-NOHSA-VI-NEXT: s_ashr_i32 s55, s7, 16 4075; GCN-NOHSA-VI-NEXT: s_ashr_i32 s56, s6, 16 4076; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 4077; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 4078; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 4079; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s58 4080; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 4081; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s57 4082; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 4083; GCN-NOHSA-VI-NEXT: s_ashr_i32 s53, s5, 16 4084; GCN-NOHSA-VI-NEXT: s_ashr_i32 s54, s4, 16 4085; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 4086; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 4087; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 4088; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s56 4089; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 4090; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s55 4091; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 4092; GCN-NOHSA-VI-NEXT: s_ashr_i32 s52, s50, 16 4093; GCN-NOHSA-VI-NEXT: s_ashr_i32 s20, s37, 16 4094; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s22, s37 4095; GCN-NOHSA-VI-NEXT: s_ashr_i32 s24, s39, 16 4096; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s26, s39 4097; GCN-NOHSA-VI-NEXT: s_ashr_i32 s28, s41, 16 4098; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s30, s41 4099; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s43, 16 4100; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s35, s43 4101; GCN-NOHSA-VI-NEXT: s_ashr_i32 s37, s45, 16 4102; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s39, s45 4103; GCN-NOHSA-VI-NEXT: s_ashr_i32 s41, s47, 16 4104; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s43, s47 4105; GCN-NOHSA-VI-NEXT: s_ashr_i32 s45, s49, 16 4106; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s47, s49 4107; GCN-NOHSA-VI-NEXT: s_ashr_i32 s49, s51, 16 4108; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s51, s51 4109; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s50, s50 4110; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 4111; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s54 4112; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 4113; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s53 4114; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 4115; GCN-NOHSA-VI-NEXT: s_ashr_i32 s21, s36, 16 4116; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s23, s36 4117; GCN-NOHSA-VI-NEXT: s_ashr_i32 s25, s38, 16 4118; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s27, s38 4119; GCN-NOHSA-VI-NEXT: s_ashr_i32 s29, s40, 16 4120; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s31, s40 4121; GCN-NOHSA-VI-NEXT: s_ashr_i32 s34, s42, 16 4122; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s36, s42 4123; GCN-NOHSA-VI-NEXT: s_ashr_i32 s38, s44, 16 4124; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s40, s44 4125; GCN-NOHSA-VI-NEXT: s_ashr_i32 s42, s46, 16 4126; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s44, s46 4127; GCN-NOHSA-VI-NEXT: s_ashr_i32 s46, s48, 16 4128; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s48, s48 4129; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s50 4130; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s52 4131; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s51 4132; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s49 4133; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 4134; GCN-NOHSA-VI-NEXT: s_nop 0 4135; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48 4136; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s46 4137; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s47 4138; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s45 4139; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 4140; GCN-NOHSA-VI-NEXT: s_nop 0 4141; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s44 4142; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s42 4143; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s43 4144; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s41 4145; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 4146; GCN-NOHSA-VI-NEXT: s_nop 0 4147; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s40 4148; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s38 4149; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s39 4150; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s37 4151; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 4152; GCN-NOHSA-VI-NEXT: s_nop 0 4153; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36 4154; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 4155; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s35 4156; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 4157; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 4158; GCN-NOHSA-VI-NEXT: s_nop 0 4159; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s31 4160; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 4161; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s30 4162; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s28 4163; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 4164; GCN-NOHSA-VI-NEXT: s_nop 0 4165; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s27 4166; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 4167; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26 4168; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s24 4169; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4170; GCN-NOHSA-VI-NEXT: s_nop 0 4171; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s23 4172; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21 4173; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s22 4174; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s20 4175; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4176; GCN-NOHSA-VI-NEXT: s_endpgm 4177; 4178; EG-LABEL: constant_sextload_v64i16_to_v64i32: 4179; EG: ; %bb.0: 4180; EG-NEXT: ALU 17, @38, KC0[CB0:0-32], KC1[] 4181; EG-NEXT: TEX 7 @22 4182; EG-NEXT: ALU 75, @56, KC0[CB0:0-32], KC1[] 4183; EG-NEXT: ALU 71, @132, KC0[CB0:0-32], KC1[] 4184; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T48.X, 0 4185; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T41.X, 0 4186; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T56.X, 0 4187; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T55.X, 0 4188; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T54.X, 0 4189; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T53.X, 0 4190; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T52.X, 0 4191; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T51.X, 0 4192; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T50.X, 0 4193; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T49.X, 0 4194; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T40.X, 0 4195; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T39.X, 0 4196; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T38.X, 0 4197; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0 4198; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0 4199; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1 4200; EG-NEXT: CF_END 4201; EG-NEXT: PAD 4202; EG-NEXT: Fetch clause starting at 22: 4203; EG-NEXT: VTX_READ_128 T42.XYZW, T41.X, 16, #1 4204; EG-NEXT: VTX_READ_128 T43.XYZW, T41.X, 32, #1 4205; EG-NEXT: VTX_READ_128 T44.XYZW, T41.X, 0, #1 4206; EG-NEXT: VTX_READ_128 T45.XYZW, T41.X, 48, #1 4207; EG-NEXT: VTX_READ_128 T46.XYZW, T41.X, 64, #1 4208; EG-NEXT: VTX_READ_128 T47.XYZW, T41.X, 80, #1 4209; EG-NEXT: VTX_READ_128 T48.XYZW, T41.X, 96, #1 4210; EG-NEXT: VTX_READ_128 T41.XYZW, T41.X, 112, #1 4211; EG-NEXT: ALU clause starting at 38: 4212; EG-NEXT: LSHR T35.X, KC0[2].Y, literal.x, 4213; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4214; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4215; EG-NEXT: LSHR T36.X, PV.W, literal.x, 4216; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4217; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 4218; EG-NEXT: LSHR T37.X, PV.W, literal.x, 4219; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4220; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 4221; EG-NEXT: LSHR T38.X, PV.W, literal.x, 4222; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4223; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 4224; EG-NEXT: LSHR T39.X, PV.W, literal.x, 4225; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4226; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 4227; EG-NEXT: LSHR T40.X, PV.W, literal.x, 4228; EG-NEXT: MOV * T41.X, KC0[2].Z, 4229; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4230; EG-NEXT: ALU clause starting at 56: 4231; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4232; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 4233; EG-NEXT: LSHR T49.X, PV.W, literal.x, 4234; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4235; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 4236; EG-NEXT: LSHR T50.X, PV.W, literal.x, 4237; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4238; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 4239; EG-NEXT: LSHR T51.X, PV.W, literal.x, 4240; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4241; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 4242; EG-NEXT: LSHR T52.X, PV.W, literal.x, 4243; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4244; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 4245; EG-NEXT: LSHR T53.X, PV.W, literal.x, 4246; EG-NEXT: LSHR T0.Y, T41.W, literal.y, 4247; EG-NEXT: LSHR T0.Z, T41.Y, literal.y, 4248; EG-NEXT: LSHR T0.W, T48.W, literal.y, BS:VEC_120/SCL_212 4249; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4250; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4251; EG-NEXT: 176(2.466285e-43), 0(0.000000e+00) 4252; EG-NEXT: LSHR T54.X, PS, literal.x, 4253; EG-NEXT: LSHR T1.Y, T48.Y, literal.y, 4254; EG-NEXT: LSHR T1.Z, T47.W, literal.y, 4255; EG-NEXT: LSHR T1.W, T47.Y, literal.y, BS:VEC_120/SCL_212 4256; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.z, 4257; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4258; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00) 4259; EG-NEXT: LSHR T55.X, PS, literal.x, 4260; EG-NEXT: LSHR T2.Y, T46.W, literal.y, 4261; EG-NEXT: LSHR T2.Z, T46.Y, literal.y, 4262; EG-NEXT: LSHR T2.W, T45.W, literal.y, BS:VEC_120/SCL_212 4263; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.z, 4264; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4265; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00) 4266; EG-NEXT: LSHR T56.X, PS, literal.x, 4267; EG-NEXT: LSHR T3.Y, T45.Y, literal.y, 4268; EG-NEXT: BFE_INT T57.Z, T44.Y, 0.0, literal.y, BS:VEC_120/SCL_212 4269; EG-NEXT: LSHR T3.W, T43.W, literal.y, 4270; EG-NEXT: LSHR * T4.W, T43.Y, literal.y, 4271; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4272; EG-NEXT: BFE_INT T57.X, T44.X, 0.0, literal.x, 4273; EG-NEXT: LSHR T4.Y, T42.W, literal.x, 4274; EG-NEXT: BFE_INT T58.Z, T44.W, 0.0, literal.x, BS:VEC_120/SCL_212 4275; EG-NEXT: LSHR T5.W, T42.Y, literal.x, 4276; EG-NEXT: LSHR * T6.W, T44.Y, literal.x, 4277; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4278; EG-NEXT: BFE_INT T58.X, T44.Z, 0.0, literal.x, 4279; EG-NEXT: LSHR T5.Y, T44.W, literal.x, 4280; EG-NEXT: BFE_INT T59.Z, T42.Y, 0.0, literal.x, 4281; EG-NEXT: BFE_INT T57.W, PS, 0.0, literal.x, 4282; EG-NEXT: LSHR * T6.W, T44.X, literal.x, 4283; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4284; EG-NEXT: BFE_INT T59.X, T42.X, 0.0, literal.x, 4285; EG-NEXT: BFE_INT T57.Y, PS, 0.0, literal.x, 4286; EG-NEXT: BFE_INT T60.Z, T42.W, 0.0, literal.x, 4287; EG-NEXT: BFE_INT T58.W, PV.Y, 0.0, literal.x, 4288; EG-NEXT: LSHR * T6.W, T44.Z, literal.x, 4289; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4290; EG-NEXT: BFE_INT T60.X, T42.Z, 0.0, literal.x, 4291; EG-NEXT: BFE_INT T58.Y, PS, 0.0, literal.x, 4292; EG-NEXT: BFE_INT T44.Z, T43.Y, 0.0, literal.x, 4293; EG-NEXT: BFE_INT T59.W, T5.W, 0.0, literal.x, 4294; EG-NEXT: LSHR * T5.W, T42.X, literal.x, 4295; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4296; EG-NEXT: BFE_INT T44.X, T43.X, 0.0, literal.x, 4297; EG-NEXT: BFE_INT T59.Y, PS, 0.0, literal.x, 4298; EG-NEXT: BFE_INT T61.Z, T43.W, 0.0, literal.x, 4299; EG-NEXT: BFE_INT T60.W, T4.Y, 0.0, literal.x, 4300; EG-NEXT: LSHR * T5.W, T42.Z, literal.x, 4301; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4302; EG-NEXT: BFE_INT T61.X, T43.Z, 0.0, literal.x, 4303; EG-NEXT: BFE_INT T60.Y, PS, 0.0, literal.x, 4304; EG-NEXT: BFE_INT T42.Z, T45.Y, 0.0, literal.x, 4305; EG-NEXT: BFE_INT * T44.W, T4.W, 0.0, literal.x, 4306; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4307; EG-NEXT: ALU clause starting at 132: 4308; EG-NEXT: LSHR * T4.W, T43.X, literal.x, 4309; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4310; EG-NEXT: BFE_INT T42.X, T45.X, 0.0, literal.x, 4311; EG-NEXT: BFE_INT T44.Y, PV.W, 0.0, literal.x, 4312; EG-NEXT: BFE_INT T62.Z, T45.W, 0.0, literal.x, 4313; EG-NEXT: BFE_INT T61.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212 4314; EG-NEXT: LSHR * T3.W, T43.Z, literal.x, 4315; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4316; EG-NEXT: BFE_INT T62.X, T45.Z, 0.0, literal.x, 4317; EG-NEXT: BFE_INT T61.Y, PS, 0.0, literal.x, 4318; EG-NEXT: BFE_INT T43.Z, T46.Y, 0.0, literal.x, 4319; EG-NEXT: BFE_INT T42.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4320; EG-NEXT: LSHR * T3.W, T45.X, literal.x, 4321; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4322; EG-NEXT: BFE_INT T43.X, T46.X, 0.0, literal.x, 4323; EG-NEXT: BFE_INT T42.Y, PS, 0.0, literal.x, 4324; EG-NEXT: BFE_INT T63.Z, T46.W, 0.0, literal.x, 4325; EG-NEXT: BFE_INT T62.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212 4326; EG-NEXT: LSHR * T2.W, T45.Z, literal.x, 4327; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4328; EG-NEXT: BFE_INT T63.X, T46.Z, 0.0, literal.x, 4329; EG-NEXT: BFE_INT T62.Y, PS, 0.0, literal.x, 4330; EG-NEXT: BFE_INT T45.Z, T47.Y, 0.0, literal.x, 4331; EG-NEXT: BFE_INT T43.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4332; EG-NEXT: LSHR * T2.W, T46.X, literal.x, 4333; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4334; EG-NEXT: BFE_INT T45.X, T47.X, 0.0, literal.x, 4335; EG-NEXT: BFE_INT T43.Y, PS, 0.0, literal.x, 4336; EG-NEXT: BFE_INT T64.Z, T47.W, 0.0, literal.x, 4337; EG-NEXT: BFE_INT T63.W, T2.Y, 0.0, literal.x, 4338; EG-NEXT: LSHR * T2.W, T46.Z, literal.x, 4339; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4340; EG-NEXT: BFE_INT T64.X, T47.Z, 0.0, literal.x, 4341; EG-NEXT: BFE_INT T63.Y, PS, 0.0, literal.x, 4342; EG-NEXT: BFE_INT T46.Z, T48.Y, 0.0, literal.x, 4343; EG-NEXT: BFE_INT T45.W, T1.W, 0.0, literal.x, 4344; EG-NEXT: LSHR * T1.W, T47.X, literal.x, 4345; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4346; EG-NEXT: BFE_INT T46.X, T48.X, 0.0, literal.x, 4347; EG-NEXT: BFE_INT T45.Y, PS, 0.0, literal.x, 4348; EG-NEXT: BFE_INT T65.Z, T48.W, 0.0, literal.x, 4349; EG-NEXT: BFE_INT T64.W, T1.Z, 0.0, literal.x, 4350; EG-NEXT: LSHR * T1.W, T47.Z, literal.x, 4351; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4352; EG-NEXT: BFE_INT T65.X, T48.Z, 0.0, literal.x, 4353; EG-NEXT: BFE_INT T64.Y, PS, 0.0, literal.x, 4354; EG-NEXT: BFE_INT T47.Z, T41.Y, 0.0, literal.x, 4355; EG-NEXT: BFE_INT T46.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4356; EG-NEXT: LSHR * T1.W, T48.X, literal.x, 4357; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4358; EG-NEXT: BFE_INT T47.X, T41.X, 0.0, literal.x, 4359; EG-NEXT: BFE_INT T46.Y, PS, 0.0, literal.x, 4360; EG-NEXT: BFE_INT T66.Z, T41.W, 0.0, literal.x, 4361; EG-NEXT: BFE_INT T65.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212 4362; EG-NEXT: LSHR * T0.W, T48.Z, literal.x, 4363; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4364; EG-NEXT: BFE_INT T66.X, T41.Z, 0.0, literal.x, 4365; EG-NEXT: BFE_INT T65.Y, PS, 0.0, literal.x, 4366; EG-NEXT: LSHR T1.Z, T41.X, literal.x, 4367; EG-NEXT: BFE_INT T47.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4368; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4369; EG-NEXT: 16(2.242078e-44), 224(3.138909e-43) 4370; EG-NEXT: LSHR T41.X, PS, literal.x, 4371; EG-NEXT: BFE_INT T47.Y, PV.Z, 0.0, literal.y, 4372; EG-NEXT: LSHR T0.Z, T41.Z, literal.y, 4373; EG-NEXT: BFE_INT T66.W, T0.Y, 0.0, literal.y, 4374; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 4375; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4376; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 4377; EG-NEXT: LSHR T48.X, PS, literal.x, 4378; EG-NEXT: BFE_INT * T66.Y, PV.Z, 0.0, literal.y, 4379; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4380 %load = load <64 x i16>, <64 x i16> addrspace(4)* %in 4381 %ext = sext <64 x i16> %load to <64 x i32> 4382 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 4383 ret void 4384} 4385 4386define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 4387; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i64: 4388; GCN-NOHSA-SI: ; %bb.0: 4389; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4390; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4391; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4392; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 4393; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 4394; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4395; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 4396; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 4397; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4398; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4399; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4400; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 4401; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4402; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4403; GCN-NOHSA-SI-NEXT: s_endpgm 4404; 4405; GCN-HSA-LABEL: constant_zextload_i16_to_i64: 4406; GCN-HSA: ; %bb.0: 4407; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4408; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4409; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 4410; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 4411; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3] 4412; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 4413; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 4414; GCN-HSA-NEXT: v_mov_b32_e32 v3, 0 4415; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 4416; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4417; GCN-HSA-NEXT: s_endpgm 4418; 4419; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i64: 4420; GCN-NOHSA-VI: ; %bb.0: 4421; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 4422; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4423; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4424; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 4425; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 4426; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4427; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 4428; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 4429; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4430; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 4431; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 4432; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 4433; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4434; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 4435; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 4436; GCN-NOHSA-VI-NEXT: s_endpgm 4437; 4438; EG-LABEL: constant_zextload_i16_to_i64: 4439; EG: ; %bb.0: 4440; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4441; EG-NEXT: TEX 0 @6 4442; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 4443; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4444; EG-NEXT: CF_END 4445; EG-NEXT: PAD 4446; EG-NEXT: Fetch clause starting at 6: 4447; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 4448; EG-NEXT: ALU clause starting at 8: 4449; EG-NEXT: MOV * T0.X, KC0[2].Z, 4450; EG-NEXT: ALU clause starting at 9: 4451; EG-NEXT: MOV * T0.Y, 0.0, 4452; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 4453; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4454 %a = load i16, i16 addrspace(4)* %in 4455 %ext = zext i16 %a to i64 4456 store i64 %ext, i64 addrspace(1)* %out 4457 ret void 4458} 4459 4460; FIXME: Need to optimize this sequence to avoid extra bfe: 4461; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64 4462; t31: i64 = any_extend t28 4463; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16 4464; TODO: These could be expanded earlier using ASHR 15 4465define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 4466; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i64: 4467; GCN-NOHSA-SI: ; %bb.0: 4468; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4469; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4470; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4471; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 4472; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 4473; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4474; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 4475; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 4476; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 4477; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4478; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4479; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4480; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4481; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4482; GCN-NOHSA-SI-NEXT: s_endpgm 4483; 4484; GCN-HSA-LABEL: constant_sextload_i16_to_i64: 4485; GCN-HSA: ; %bb.0: 4486; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4487; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4488; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 4489; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 4490; GCN-HSA-NEXT: flat_load_sshort v2, v[2:3] 4491; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 4492; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 4493; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 4494; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4495; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4496; GCN-HSA-NEXT: s_endpgm 4497; 4498; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i64: 4499; GCN-NOHSA-VI: ; %bb.0: 4500; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 4501; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4502; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4503; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4504; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 4505; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 4506; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 4507; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 4508; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 4509; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 4510; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 4511; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4512; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 4513; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4514; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 4515; GCN-NOHSA-VI-NEXT: s_endpgm 4516; 4517; EG-LABEL: constant_sextload_i16_to_i64: 4518; EG: ; %bb.0: 4519; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4520; EG-NEXT: TEX 0 @6 4521; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 4522; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4523; EG-NEXT: CF_END 4524; EG-NEXT: PAD 4525; EG-NEXT: Fetch clause starting at 6: 4526; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 4527; EG-NEXT: ALU clause starting at 8: 4528; EG-NEXT: MOV * T0.X, KC0[2].Z, 4529; EG-NEXT: ALU clause starting at 9: 4530; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 4531; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 4532; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 4533; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 4534; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 4535 %a = load i16, i16 addrspace(4)* %in 4536 %ext = sext i16 %a to i64 4537 store i64 %ext, i64 addrspace(1)* %out 4538 ret void 4539} 4540 4541define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 { 4542; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i64: 4543; GCN-NOHSA-SI: ; %bb.0: 4544; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4545; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4546; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4547; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 4548; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 4549; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4550; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 4551; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 4552; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4553; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4554; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4555; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 4556; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4557; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4558; GCN-NOHSA-SI-NEXT: s_endpgm 4559; 4560; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i64: 4561; GCN-HSA: ; %bb.0: 4562; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4563; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4564; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 4565; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 4566; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3] 4567; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 4568; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 4569; GCN-HSA-NEXT: v_mov_b32_e32 v3, 0 4570; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 4571; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4572; GCN-HSA-NEXT: s_endpgm 4573; 4574; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i64: 4575; GCN-NOHSA-VI: ; %bb.0: 4576; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 4577; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4578; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4579; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 4580; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 4581; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4582; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 4583; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 4584; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4585; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 4586; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 4587; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 4588; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4589; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 4590; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 4591; GCN-NOHSA-VI-NEXT: s_endpgm 4592; 4593; EG-LABEL: constant_zextload_v1i16_to_v1i64: 4594; EG: ; %bb.0: 4595; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4596; EG-NEXT: TEX 0 @6 4597; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 4598; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4599; EG-NEXT: CF_END 4600; EG-NEXT: PAD 4601; EG-NEXT: Fetch clause starting at 6: 4602; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 4603; EG-NEXT: ALU clause starting at 8: 4604; EG-NEXT: MOV * T0.X, KC0[2].Z, 4605; EG-NEXT: ALU clause starting at 9: 4606; EG-NEXT: MOV * T0.Y, 0.0, 4607; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 4608; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4609 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in 4610 %ext = zext <1 x i16> %load to <1 x i64> 4611 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 4612 ret void 4613} 4614 4615define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 { 4616; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i64: 4617; GCN-NOHSA-SI: ; %bb.0: 4618; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4619; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4620; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4621; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 4622; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 4623; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4624; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 4625; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 4626; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 4627; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4628; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4629; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4630; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4631; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4632; GCN-NOHSA-SI-NEXT: s_endpgm 4633; 4634; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i64: 4635; GCN-HSA: ; %bb.0: 4636; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4637; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4638; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 4639; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 4640; GCN-HSA-NEXT: flat_load_sshort v2, v[2:3] 4641; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 4642; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 4643; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 4644; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4645; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4646; GCN-HSA-NEXT: s_endpgm 4647; 4648; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i64: 4649; GCN-NOHSA-VI: ; %bb.0: 4650; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 4651; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4652; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4653; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4654; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 4655; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 4656; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 4657; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 4658; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 4659; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 4660; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 4661; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4662; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 4663; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4664; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 4665; GCN-NOHSA-VI-NEXT: s_endpgm 4666; 4667; EG-LABEL: constant_sextload_v1i16_to_v1i64: 4668; EG: ; %bb.0: 4669; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4670; EG-NEXT: TEX 0 @6 4671; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 4672; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4673; EG-NEXT: CF_END 4674; EG-NEXT: PAD 4675; EG-NEXT: Fetch clause starting at 6: 4676; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 4677; EG-NEXT: ALU clause starting at 8: 4678; EG-NEXT: MOV * T0.X, KC0[2].Z, 4679; EG-NEXT: ALU clause starting at 9: 4680; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 4681; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 4682; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 4683; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 4684; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 4685 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in 4686 %ext = sext <1 x i16> %load to <1 x i64> 4687 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 4688 ret void 4689} 4690 4691define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 { 4692; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i64: 4693; GCN-NOHSA-SI: ; %bb.0: 4694; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4695; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4696; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 4697; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4698; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 4699; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4700; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16 4701; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s2, 0xffff 4702; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4703; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 4704; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 4705; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 4706; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4707; GCN-NOHSA-SI-NEXT: s_endpgm 4708; 4709; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i64: 4710; GCN-HSA: ; %bb.0: 4711; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4712; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 4713; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 4714; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4715; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4716; GCN-HSA-NEXT: s_load_dword s0, s[2:3], 0x0 4717; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4718; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4719; GCN-HSA-NEXT: s_lshr_b32 s1, s0, 16 4720; GCN-HSA-NEXT: s_and_b32 s0, s0, 0xffff 4721; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 4722; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 4723; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4724; GCN-HSA-NEXT: s_endpgm 4725; 4726; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i64: 4727; GCN-NOHSA-VI: ; %bb.0: 4728; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 4729; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 4730; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4731; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4732; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 4733; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4734; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 4735; GCN-NOHSA-VI-NEXT: s_load_dword s4, s[6:7], 0x0 4736; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 4737; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4738; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s4, 0xffff 4739; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 4740; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s5 4741; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 4742; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4743; GCN-NOHSA-VI-NEXT: s_endpgm 4744; 4745; EG-LABEL: constant_zextload_v2i16_to_v2i64: 4746; EG: ; %bb.0: 4747; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4748; EG-NEXT: TEX 0 @6 4749; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[] 4750; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 4751; EG-NEXT: CF_END 4752; EG-NEXT: PAD 4753; EG-NEXT: Fetch clause starting at 6: 4754; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 4755; EG-NEXT: ALU clause starting at 8: 4756; EG-NEXT: MOV * T4.X, KC0[2].Z, 4757; EG-NEXT: ALU clause starting at 9: 4758; EG-NEXT: LSHR * T4.Z, T4.X, literal.x, 4759; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4760; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 4761; EG-NEXT: MOV T4.Y, 0.0, 4762; EG-NEXT: MOV T4.W, 0.0, 4763; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 4764; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 4765 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in 4766 %ext = zext <2 x i16> %load to <2 x i64> 4767 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 4768 ret void 4769} 4770 4771define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 { 4772; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i64: 4773; GCN-NOHSA-SI: ; %bb.0: 4774; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4775; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4776; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 4777; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4778; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4779; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16 4780; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[2:3], 0x100000 4781; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 4782; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4783; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 4784; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 4785; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 4786; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 4787; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4788; GCN-NOHSA-SI-NEXT: s_endpgm 4789; 4790; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i64: 4791; GCN-HSA: ; %bb.0: 4792; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4793; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4794; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4795; GCN-HSA-NEXT: s_load_dword s0, s[2:3], 0x0 4796; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4797; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4798; GCN-HSA-NEXT: s_lshr_b32 s2, s0, 16 4799; GCN-HSA-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000 4800; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 4801; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 4802; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 4803; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 4804; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 4805; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4806; GCN-HSA-NEXT: s_endpgm 4807; 4808; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i64: 4809; GCN-NOHSA-VI: ; %bb.0: 4810; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 4811; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4812; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4813; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4814; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 4815; GCN-NOHSA-VI-NEXT: s_load_dword s4, s[6:7], 0x0 4816; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 4817; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4818; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[4:5], 0x100000 4819; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 4820; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 4821; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 4822; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 4823; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 4824; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 4825; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4826; GCN-NOHSA-VI-NEXT: s_endpgm 4827; 4828; EG-LABEL: constant_sextload_v2i16_to_v2i64: 4829; EG: ; %bb.0: 4830; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4831; EG-NEXT: TEX 0 @6 4832; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 4833; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 4834; EG-NEXT: CF_END 4835; EG-NEXT: PAD 4836; EG-NEXT: Fetch clause starting at 6: 4837; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 4838; EG-NEXT: ALU clause starting at 8: 4839; EG-NEXT: MOV * T4.X, KC0[2].Z, 4840; EG-NEXT: ALU clause starting at 9: 4841; EG-NEXT: ASHR * T4.W, T4.X, literal.x, 4842; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 4843; EG-NEXT: ASHR * T4.Z, T4.X, literal.x, 4844; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4845; EG-NEXT: BFE_INT T4.X, T4.X, 0.0, literal.x, 4846; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 4847; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 4848; EG-NEXT: ASHR * T4.Y, PV.X, literal.x, 4849; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 4850 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in 4851 %ext = sext <2 x i16> %load to <2 x i64> 4852 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 4853 ret void 4854} 4855 4856define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 { 4857; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i64: 4858; GCN-NOHSA-SI: ; %bb.0: 4859; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4860; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4861; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4862; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 4863; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4864; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 4865; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, 0xffff 4866; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 4867; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4868; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4869; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4870; GCN-NOHSA-SI-NEXT: s_lshr_b32 s0, s3, 16 4871; GCN-NOHSA-SI-NEXT: s_lshr_b32 s1, s2, 16 4872; GCN-NOHSA-SI-NEXT: s_and_b32 s3, s3, s8 4873; GCN-NOHSA-SI-NEXT: s_and_b32 s2, s2, s8 4874; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s3 4875; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s0 4876; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 4877; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4878; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s2 4879; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s1 4880; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4881; GCN-NOHSA-SI-NEXT: s_endpgm 4882; 4883; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i64: 4884; GCN-HSA: ; %bb.0: 4885; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4886; GCN-HSA-NEXT: s_mov_b32 s6, 0xffff 4887; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 4888; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 4889; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4890; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 4891; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4892; GCN-HSA-NEXT: s_lshr_b32 s4, s3, 16 4893; GCN-HSA-NEXT: s_lshr_b32 s5, s2, 16 4894; GCN-HSA-NEXT: s_and_b32 s7, s2, s6 4895; GCN-HSA-NEXT: s_and_b32 s2, s3, s6 4896; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4897; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 4898; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4899; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4900; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 4901; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4902; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4903; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4904; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7 4905; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 4906; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4907; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4908; GCN-HSA-NEXT: s_endpgm 4909; 4910; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i64: 4911; GCN-NOHSA-VI: ; %bb.0: 4912; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 4913; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff 4914; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 4915; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4916; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4917; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4918; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 4919; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 4920; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 4921; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 4922; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4923; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s5, s8 4924; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s5, 16 4925; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s4, s8 4926; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 4927; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s7 4928; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 4929; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4930; GCN-NOHSA-VI-NEXT: s_nop 0 4931; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 4932; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 4933; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4934; GCN-NOHSA-VI-NEXT: s_endpgm 4935; 4936; EG-LABEL: constant_zextload_v4i16_to_v4i64: 4937; EG: ; %bb.0: 4938; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4939; EG-NEXT: TEX 0 @6 4940; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[] 4941; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0 4942; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1 4943; EG-NEXT: CF_END 4944; EG-NEXT: Fetch clause starting at 6: 4945; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 4946; EG-NEXT: ALU clause starting at 8: 4947; EG-NEXT: MOV * T5.X, KC0[2].Z, 4948; EG-NEXT: ALU clause starting at 9: 4949; EG-NEXT: MOV T2.X, T5.X, 4950; EG-NEXT: MOV * T3.X, T5.Y, 4951; EG-NEXT: MOV T0.Y, PV.X, 4952; EG-NEXT: MOV * T0.Z, PS, 4953; EG-NEXT: LSHR * T5.Z, PV.Z, literal.x, 4954; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4955; EG-NEXT: AND_INT T5.X, T0.Z, literal.x, 4956; EG-NEXT: MOV T5.Y, 0.0, 4957; EG-NEXT: LSHR T6.Z, T0.Y, literal.y, 4958; EG-NEXT: AND_INT * T6.X, T0.Y, literal.x, 4959; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 4960; EG-NEXT: MOV T6.Y, 0.0, 4961; EG-NEXT: MOV T5.W, 0.0, 4962; EG-NEXT: MOV * T6.W, 0.0, 4963; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 4964; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4965; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4966; EG-NEXT: LSHR * T8.X, PV.W, literal.x, 4967; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4968 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in 4969 %ext = zext <4 x i16> %load to <4 x i64> 4970 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 4971 ret void 4972} 4973 4974define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 { 4975; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i64: 4976; GCN-NOHSA-SI: ; %bb.0: 4977; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4978; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4979; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 4980; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4981; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4982; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4983; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s5 4984; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s4, 16 4985; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[4:5], 0x100000 4986; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 4987; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 4988; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 4989; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 4990; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 4991; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 4992; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 4993; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4994; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4995; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 4996; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 4997; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 4998; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 4999; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5000; GCN-NOHSA-SI-NEXT: s_endpgm 5001; 5002; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i64: 5003; GCN-HSA: ; %bb.0: 5004; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5005; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5006; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 5007; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5008; GCN-HSA-NEXT: s_mov_b32 s4, s3 5009; GCN-HSA-NEXT: s_lshr_b32 s6, s2, 16 5010; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 5011; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[2:3], 0x100000 5012; GCN-HSA-NEXT: s_ashr_i64 s[2:3], s[2:3], 48 5013; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 5014; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 5015; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5016; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 5017; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5018; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5019; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 5020; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 5021; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5022; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5023; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5024; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 5025; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 5026; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 5027; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 5028; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5029; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5030; GCN-HSA-NEXT: s_endpgm 5031; 5032; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i64: 5033; GCN-NOHSA-VI: ; %bb.0: 5034; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5035; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5036; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5037; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5038; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5039; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5040; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5041; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5042; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s5 5043; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s5, 16 5044; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 5045; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 5046; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[4:5], 0x100000 5047; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 5048; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 5049; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 5050; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 5051; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 5052; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11 5053; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5054; GCN-NOHSA-VI-NEXT: s_nop 0 5055; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 5056; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 5057; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5058; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 5059; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5060; GCN-NOHSA-VI-NEXT: s_endpgm 5061; 5062; EG-LABEL: constant_sextload_v4i16_to_v4i64: 5063; EG: ; %bb.0: 5064; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5065; EG-NEXT: TEX 0 @6 5066; EG-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[] 5067; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0 5068; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 5069; EG-NEXT: CF_END 5070; EG-NEXT: Fetch clause starting at 6: 5071; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 5072; EG-NEXT: ALU clause starting at 8: 5073; EG-NEXT: MOV * T5.X, KC0[2].Z, 5074; EG-NEXT: ALU clause starting at 9: 5075; EG-NEXT: MOV T2.X, T5.X, 5076; EG-NEXT: MOV * T3.X, T5.Y, 5077; EG-NEXT: MOV T0.Y, PS, 5078; EG-NEXT: MOV * T0.Z, PV.X, 5079; EG-NEXT: ASHR * T5.W, PV.Z, literal.x, 5080; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5081; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 5082; EG-NEXT: ASHR T5.Z, T0.Z, literal.y, 5083; EG-NEXT: ASHR * T7.W, T0.Y, literal.z, 5084; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5085; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5086; EG-NEXT: BFE_INT T5.X, T0.Z, 0.0, literal.x, 5087; EG-NEXT: ASHR * T7.Z, T0.Y, literal.x, 5088; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5089; EG-NEXT: BFE_INT T7.X, T0.Y, 0.0, literal.x, 5090; EG-NEXT: ASHR T5.Y, PV.X, literal.y, 5091; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 5092; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5093; EG-NEXT: LSHR T8.X, PV.W, literal.x, 5094; EG-NEXT: ASHR * T7.Y, PV.X, literal.y, 5095; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5096 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in 5097 %ext = sext <4 x i16> %load to <4 x i64> 5098 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 5099 ret void 5100} 5101 5102define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 { 5103; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i64: 5104; GCN-NOHSA-SI: ; %bb.0: 5105; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5106; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5107; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5108; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0 5109; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5110; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5111; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, 0xffff 5112; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5113; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5114; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5115; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5116; GCN-NOHSA-SI-NEXT: s_lshr_b32 s0, s9, 16 5117; GCN-NOHSA-SI-NEXT: s_lshr_b32 s1, s11, 16 5118; GCN-NOHSA-SI-NEXT: s_lshr_b32 s3, s10, 16 5119; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s8, 16 5120; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, s2 5121; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, s2 5122; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s2 5123; GCN-NOHSA-SI-NEXT: s_and_b32 s2, s9, s2 5124; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s11 5125; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s1 5126; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:48 5127; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5128; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s2 5129; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s0 5130; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 5131; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5132; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 5133; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s3 5134; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:32 5135; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5136; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 5137; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 5138; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5139; GCN-NOHSA-SI-NEXT: s_endpgm 5140; 5141; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i64: 5142; GCN-HSA: ; %bb.0: 5143; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5144; GCN-HSA-NEXT: s_mov_b32 s8, 0xffff 5145; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5146; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5147; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5148; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5149; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5150; GCN-HSA-NEXT: s_lshr_b32 s9, s5, 16 5151; GCN-HSA-NEXT: s_lshr_b32 s2, s7, 16 5152; GCN-HSA-NEXT: s_lshr_b32 s10, s6, 16 5153; GCN-HSA-NEXT: s_lshr_b32 s11, s4, 16 5154; GCN-HSA-NEXT: s_and_b32 s3, s7, s8 5155; GCN-HSA-NEXT: s_and_b32 s4, s4, s8 5156; GCN-HSA-NEXT: s_and_b32 s6, s6, s8 5157; GCN-HSA-NEXT: s_and_b32 s5, s5, s8 5158; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 5159; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 5160; GCN-HSA-NEXT: v_mov_b32_e32 v0, s3 5161; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5162; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5163; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5164; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5165; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5166; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5167; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5168; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5169; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 5170; GCN-HSA-NEXT: v_mov_b32_e32 v0, s5 5171; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 5172; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5173; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5174; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5175; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 5176; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 5177; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5178; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5179; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5180; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 5181; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 5182; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5183; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5184; GCN-HSA-NEXT: s_endpgm 5185; 5186; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i64: 5187; GCN-NOHSA-VI: ; %bb.0: 5188; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5189; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff 5190; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5191; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5192; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5193; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5194; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5195; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5196; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 5197; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5198; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5199; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s4, s8 5200; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s5, s8 5201; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s6, s8 5202; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s7, s8 5203; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s7, 16 5204; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 5205; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 5206; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 5207; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5208; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s5, 16 5209; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s11 5210; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 5211; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5212; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 5213; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 5214; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 5215; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5216; GCN-NOHSA-VI-NEXT: s_nop 0 5217; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s9 5218; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5219; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5220; GCN-NOHSA-VI-NEXT: s_endpgm 5221; 5222; EG-LABEL: constant_zextload_v8i16_to_v8i64: 5223; EG: ; %bb.0: 5224; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 5225; EG-NEXT: TEX 0 @8 5226; EG-NEXT: ALU 30, @11, KC0[CB0:0-32], KC1[] 5227; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0 5228; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0 5229; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0 5230; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1 5231; EG-NEXT: CF_END 5232; EG-NEXT: Fetch clause starting at 8: 5233; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 5234; EG-NEXT: ALU clause starting at 10: 5235; EG-NEXT: MOV * T7.X, KC0[2].Z, 5236; EG-NEXT: ALU clause starting at 11: 5237; EG-NEXT: LSHR * T8.Z, T7.W, literal.x, 5238; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5239; EG-NEXT: AND_INT T8.X, T7.W, literal.x, 5240; EG-NEXT: MOV T8.Y, 0.0, 5241; EG-NEXT: LSHR T9.Z, T7.Z, literal.y, 5242; EG-NEXT: AND_INT * T9.X, T7.Z, literal.x, 5243; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5244; EG-NEXT: MOV T9.Y, 0.0, 5245; EG-NEXT: LSHR * T10.Z, T7.Y, literal.x, 5246; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5247; EG-NEXT: AND_INT T10.X, T7.Y, literal.x, 5248; EG-NEXT: MOV T10.Y, 0.0, 5249; EG-NEXT: LSHR T7.Z, T7.X, literal.y, 5250; EG-NEXT: AND_INT * T7.X, T7.X, literal.x, 5251; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5252; EG-NEXT: MOV T7.Y, 0.0, 5253; EG-NEXT: MOV T8.W, 0.0, 5254; EG-NEXT: MOV * T9.W, 0.0, 5255; EG-NEXT: MOV T10.W, 0.0, 5256; EG-NEXT: MOV * T7.W, 0.0, 5257; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x, 5258; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5259; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5260; EG-NEXT: LSHR T12.X, PV.W, literal.x, 5261; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5262; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5263; EG-NEXT: LSHR T13.X, PV.W, literal.x, 5264; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5265; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 5266; EG-NEXT: LSHR * T14.X, PV.W, literal.x, 5267; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5268 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in 5269 %ext = zext <8 x i16> %load to <8 x i64> 5270 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 5271 ret void 5272} 5273 5274define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 { 5275; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i64: 5276; GCN-NOHSA-SI: ; %bb.0: 5277; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5278; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5279; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5280; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5281; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5282; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5283; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s7 5284; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s5 5285; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s6, 16 5286; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s4, 16 5287; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[4:5], 0x100000 5288; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[6:7], 0x100000 5289; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 5290; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 5291; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 5292; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 5293; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 5294; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5295; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 5296; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9 5297; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 5298; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 5299; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5300; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5301; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 5302; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 5303; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 5304; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 5305; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5306; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5307; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 5308; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 5309; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s16 5310; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s17 5311; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 5312; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13 5313; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5314; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s14 5315; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s15 5316; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 5317; GCN-NOHSA-SI-NEXT: s_endpgm 5318; 5319; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i64: 5320; GCN-HSA: ; %bb.0: 5321; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5322; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5323; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5324; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5325; GCN-HSA-NEXT: s_mov_b32 s2, s7 5326; GCN-HSA-NEXT: s_mov_b32 s8, s5 5327; GCN-HSA-NEXT: s_lshr_b32 s10, s6, 16 5328; GCN-HSA-NEXT: s_lshr_b32 s12, s4, 16 5329; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 5330; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[4:5], 0x100000 5331; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000 5332; GCN-HSA-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 5333; GCN-HSA-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 5334; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5335; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 5336; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 5337; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5338; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 5339; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5340; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5341; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5342; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5343; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5344; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 5345; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 5346; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5347; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5348; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5349; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5350; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 5351; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 5352; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 5353; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 5354; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 5355; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5356; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5357; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5358; GCN-HSA-NEXT: v_mov_b32_e32 v0, s16 5359; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 5360; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 5361; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 5362; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5363; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5364; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5365; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 5366; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 5367; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12 5368; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13 5369; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5370; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5371; GCN-HSA-NEXT: s_endpgm 5372; 5373; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i64: 5374; GCN-NOHSA-VI: ; %bb.0: 5375; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5376; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5377; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5378; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5379; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5380; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5381; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 5382; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5383; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[6:7], 0x100000 5384; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 5385; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000 5386; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s7 5387; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[6:7], 0x100000 5388; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s7, 16 5389; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 5390; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s5 5391; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s5, 16 5392; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 5393; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 5394; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 5395; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 5396; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5397; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[4:5], 0x100000 5398; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 5399; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 5400; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5401; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 5402; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 5403; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16 5404; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17 5405; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5406; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 5407; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 5408; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s11 5409; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12 5410; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13 5411; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5412; GCN-NOHSA-VI-NEXT: s_nop 0 5413; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 5414; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 5415; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5416; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 5417; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5418; GCN-NOHSA-VI-NEXT: s_endpgm 5419; 5420; EG-LABEL: constant_sextload_v8i16_to_v8i64: 5421; EG: ; %bb.0: 5422; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 5423; EG-NEXT: TEX 0 @8 5424; EG-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[] 5425; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0 5426; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0 5427; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0 5428; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1 5429; EG-NEXT: CF_END 5430; EG-NEXT: Fetch clause starting at 8: 5431; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 5432; EG-NEXT: ALU clause starting at 10: 5433; EG-NEXT: MOV * T7.X, KC0[2].Z, 5434; EG-NEXT: ALU clause starting at 11: 5435; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 5436; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5437; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5438; EG-NEXT: LSHR T9.X, PV.W, literal.x, 5439; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 5440; EG-NEXT: ASHR * T10.W, T7.X, literal.z, 5441; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5442; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5443; EG-NEXT: LSHR T11.X, PV.W, literal.x, 5444; EG-NEXT: ASHR T10.Z, T7.X, literal.y, 5445; EG-NEXT: ASHR * T12.W, T7.Y, literal.z, 5446; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5447; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5448; EG-NEXT: BFE_INT T10.X, T7.X, 0.0, literal.x, 5449; EG-NEXT: ASHR T12.Z, T7.Y, literal.x, 5450; EG-NEXT: ASHR * T13.W, T7.Z, literal.y, 5451; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5452; EG-NEXT: BFE_INT T12.X, T7.Y, 0.0, literal.x, 5453; EG-NEXT: ASHR T10.Y, PV.X, literal.y, 5454; EG-NEXT: ASHR T13.Z, T7.Z, literal.x, 5455; EG-NEXT: ASHR * T14.W, T7.W, literal.y, 5456; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5457; EG-NEXT: BFE_INT T13.X, T7.Z, 0.0, literal.x, 5458; EG-NEXT: ASHR T12.Y, PV.X, literal.y, 5459; EG-NEXT: ASHR * T14.Z, T7.W, literal.x, 5460; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5461; EG-NEXT: BFE_INT T14.X, T7.W, 0.0, literal.x, 5462; EG-NEXT: ASHR T13.Y, PV.X, literal.y, 5463; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 5464; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5465; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 5466; EG-NEXT: LSHR T7.X, PV.W, literal.x, 5467; EG-NEXT: ASHR * T14.Y, PV.X, literal.y, 5468; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5469 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in 5470 %ext = sext <8 x i16> %load to <8 x i64> 5471 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 5472 ret void 5473} 5474 5475define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 { 5476; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i64: 5477; GCN-NOHSA-SI: ; %bb.0: 5478; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x9 5479; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5480; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5481; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[14:15], 0x0 5482; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5483; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5484; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, 0xffff 5485; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5486; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s12 5487; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s13 5488; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5489; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s5, 16 5490; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s7, 16 5491; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s11, 16 5492; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s9, 16 5493; GCN-NOHSA-SI-NEXT: s_lshr_b32 s17, s8, 16 5494; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s10, 16 5495; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s6, 16 5496; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s4, 16 5497; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s14 5498; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s14 5499; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, s14 5500; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, s14 5501; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s14 5502; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s14 5503; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, s14 5504; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s14 5505; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s9 5506; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s16 5507; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 5508; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5509; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s11 5510; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 5511; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 5512; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5513; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s7 5514; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 5515; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5516; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5517; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 5518; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 5519; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5520; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5521; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 5522; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s17 5523; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 5524; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5525; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 5526; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s18 5527; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 5528; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5529; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 5530; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19 5531; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5532; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5533; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 5534; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 5535; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5536; GCN-NOHSA-SI-NEXT: s_endpgm 5537; 5538; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i64: 5539; GCN-HSA: ; %bb.0: 5540; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5541; GCN-HSA-NEXT: s_mov_b32 s12, 0xffff 5542; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5543; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5544; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5545; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 5546; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5547; GCN-HSA-NEXT: s_lshr_b32 s13, s5, 16 5548; GCN-HSA-NEXT: s_lshr_b32 s14, s7, 16 5549; GCN-HSA-NEXT: s_lshr_b32 s15, s11, 16 5550; GCN-HSA-NEXT: s_lshr_b32 s2, s9, 16 5551; GCN-HSA-NEXT: s_lshr_b32 s16, s8, 16 5552; GCN-HSA-NEXT: s_lshr_b32 s17, s10, 16 5553; GCN-HSA-NEXT: s_lshr_b32 s18, s6, 16 5554; GCN-HSA-NEXT: s_lshr_b32 s19, s4, 16 5555; GCN-HSA-NEXT: s_and_b32 s3, s9, s12 5556; GCN-HSA-NEXT: s_and_b32 s4, s4, s12 5557; GCN-HSA-NEXT: s_and_b32 s6, s6, s12 5558; GCN-HSA-NEXT: s_and_b32 s10, s10, s12 5559; GCN-HSA-NEXT: s_and_b32 s8, s8, s12 5560; GCN-HSA-NEXT: s_and_b32 s5, s5, s12 5561; GCN-HSA-NEXT: s_and_b32 s7, s7, s12 5562; GCN-HSA-NEXT: s_and_b32 s11, s11, s12 5563; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 5564; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 5565; GCN-HSA-NEXT: v_mov_b32_e32 v0, s3 5566; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5567; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5568; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5569; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 5570; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5571; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5572; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5573; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5574; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 5575; GCN-HSA-NEXT: v_mov_b32_e32 v0, s11 5576; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 5577; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5578; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5579; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5580; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5581; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5582; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7 5583; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 5584; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5585; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5586; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5587; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5588; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 5589; GCN-HSA-NEXT: v_mov_b32_e32 v0, s5 5590; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13 5591; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5592; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5593; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5594; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5595; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 5596; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 5597; GCN-HSA-NEXT: v_mov_b32_e32 v2, s16 5598; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5599; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5600; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5601; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5602; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 5603; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 5604; GCN-HSA-NEXT: v_mov_b32_e32 v2, s17 5605; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5606; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5607; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5608; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 5609; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18 5610; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5611; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5612; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5613; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 5614; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19 5615; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5616; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5617; GCN-HSA-NEXT: s_endpgm 5618; 5619; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i64: 5620; GCN-NOHSA-VI: ; %bb.0: 5621; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5622; GCN-NOHSA-VI-NEXT: s_mov_b32 s12, 0xffff 5623; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5624; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5625; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5626; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5627; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5628; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5629; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[6:7], 0x0 5630; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5631; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5632; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s4, s12 5633; GCN-NOHSA-VI-NEXT: s_and_b32 s14, s5, s12 5634; GCN-NOHSA-VI-NEXT: s_and_b32 s15, s6, s12 5635; GCN-NOHSA-VI-NEXT: s_and_b32 s16, s7, s12 5636; GCN-NOHSA-VI-NEXT: s_and_b32 s17, s8, s12 5637; GCN-NOHSA-VI-NEXT: s_and_b32 s18, s9, s12 5638; GCN-NOHSA-VI-NEXT: s_and_b32 s19, s10, s12 5639; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s11, s12 5640; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s11, 16 5641; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s10, 16 5642; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 5643; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 5644; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 5645; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s9, 16 5646; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s19 5647; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 5648; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 5649; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s8, 16 5650; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 5651; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 5652; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 5653; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s7, 16 5654; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s17 5655; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8 5656; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 5657; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 5658; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 5659; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 5660; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5661; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s5, 16 5662; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s15 5663; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 5664; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5665; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 5666; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 5667; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 5668; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5669; GCN-NOHSA-VI-NEXT: s_nop 0 5670; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s13 5671; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5672; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5673; GCN-NOHSA-VI-NEXT: s_endpgm 5674; 5675; EG-LABEL: constant_zextload_v16i16_to_v16i64: 5676; EG: ; %bb.0: 5677; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 5678; EG-NEXT: TEX 1 @12 5679; EG-NEXT: ALU 62, @17, KC0[CB0:0-32], KC1[] 5680; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0 5681; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0 5682; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0 5683; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0 5684; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0 5685; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0 5686; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0 5687; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1 5688; EG-NEXT: CF_END 5689; EG-NEXT: Fetch clause starting at 12: 5690; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 5691; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 5692; EG-NEXT: ALU clause starting at 16: 5693; EG-NEXT: MOV * T11.X, KC0[2].Z, 5694; EG-NEXT: ALU clause starting at 17: 5695; EG-NEXT: LSHR * T13.Z, T12.W, literal.x, 5696; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5697; EG-NEXT: AND_INT T13.X, T12.W, literal.x, 5698; EG-NEXT: MOV T13.Y, 0.0, 5699; EG-NEXT: LSHR T14.Z, T12.Z, literal.y, 5700; EG-NEXT: AND_INT * T14.X, T12.Z, literal.x, 5701; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5702; EG-NEXT: MOV T14.Y, 0.0, 5703; EG-NEXT: LSHR * T15.Z, T12.Y, literal.x, 5704; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5705; EG-NEXT: AND_INT T15.X, T12.Y, literal.x, 5706; EG-NEXT: MOV T15.Y, 0.0, 5707; EG-NEXT: LSHR T12.Z, T12.X, literal.y, 5708; EG-NEXT: AND_INT * T12.X, T12.X, literal.x, 5709; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5710; EG-NEXT: MOV T12.Y, 0.0, 5711; EG-NEXT: LSHR * T16.Z, T11.W, literal.x, 5712; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5713; EG-NEXT: AND_INT T16.X, T11.W, literal.x, 5714; EG-NEXT: MOV T16.Y, 0.0, 5715; EG-NEXT: LSHR T17.Z, T11.Z, literal.y, 5716; EG-NEXT: AND_INT * T17.X, T11.Z, literal.x, 5717; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5718; EG-NEXT: MOV T17.Y, 0.0, 5719; EG-NEXT: LSHR * T18.Z, T11.Y, literal.x, 5720; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5721; EG-NEXT: AND_INT T18.X, T11.Y, literal.x, 5722; EG-NEXT: MOV T18.Y, 0.0, 5723; EG-NEXT: LSHR T11.Z, T11.X, literal.y, 5724; EG-NEXT: AND_INT * T11.X, T11.X, literal.x, 5725; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5726; EG-NEXT: MOV T11.Y, 0.0, 5727; EG-NEXT: MOV T13.W, 0.0, 5728; EG-NEXT: MOV * T14.W, 0.0, 5729; EG-NEXT: MOV T15.W, 0.0, 5730; EG-NEXT: MOV * T12.W, 0.0, 5731; EG-NEXT: MOV T16.W, 0.0, 5732; EG-NEXT: MOV * T17.W, 0.0, 5733; EG-NEXT: MOV T18.W, 0.0, 5734; EG-NEXT: MOV * T11.W, 0.0, 5735; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x, 5736; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5737; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5738; EG-NEXT: LSHR T20.X, PV.W, literal.x, 5739; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5740; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5741; EG-NEXT: LSHR T21.X, PV.W, literal.x, 5742; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5743; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 5744; EG-NEXT: LSHR T22.X, PV.W, literal.x, 5745; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5746; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 5747; EG-NEXT: LSHR T23.X, PV.W, literal.x, 5748; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5749; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 5750; EG-NEXT: LSHR T24.X, PV.W, literal.x, 5751; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5752; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 5753; EG-NEXT: LSHR T25.X, PV.W, literal.x, 5754; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5755; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 5756; EG-NEXT: LSHR * T26.X, PV.W, literal.x, 5757; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5758 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in 5759 %ext = zext <16 x i16> %load to <16 x i64> 5760 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 5761 ret void 5762} 5763 5764define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 { 5765; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i64: 5766; GCN-NOHSA-SI: ; %bb.0: 5767; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5768; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5769; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 5770; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5771; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5772; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5773; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, s11 5774; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, s9 5775; GCN-NOHSA-SI-NEXT: s_mov_b32 s16, s7 5776; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, s5 5777; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s10, 16 5778; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s8, 16 5779; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s6, 16 5780; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s4, 16 5781; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[4:5], 0x100000 5782; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[30:31], s[6:7], 0x100000 5783; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[8:9], 0x100000 5784; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[10:11], 0x100000 5785; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 5786; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 5787; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[8:9], s[8:9], 48 5788; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[10:11], s[10:11], 48 5789; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000 5790; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 5791; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 5792; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5793; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000 5794; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 5795; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x100000 5796; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 5797; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 5798; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s13 5799; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10 5800; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11 5801; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 5802; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5803; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 5804; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s15 5805; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 5806; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 5807; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 5808; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5809; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16 5810; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s17 5811; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 5812; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 5813; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5814; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5815; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 5816; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 5817; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 5818; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 5819; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5820; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5821; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36 5822; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s37 5823; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s34 5824; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s35 5825; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s30 5826; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s31 5827; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s28 5828; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s29 5829; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 5830; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s21 5831; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 5832; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s22 5833; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s23 5834; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 5835; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s24 5836; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s25 5837; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 5838; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s26 5839; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s27 5840; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 5841; GCN-NOHSA-SI-NEXT: s_endpgm 5842; 5843; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i64: 5844; GCN-HSA: ; %bb.0: 5845; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5846; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5847; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 5848; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5849; GCN-HSA-NEXT: s_mov_b32 s2, s11 5850; GCN-HSA-NEXT: s_mov_b32 s12, s9 5851; GCN-HSA-NEXT: s_mov_b32 s14, s7 5852; GCN-HSA-NEXT: s_mov_b32 s16, s5 5853; GCN-HSA-NEXT: s_lshr_b32 s18, s10, 16 5854; GCN-HSA-NEXT: s_lshr_b32 s20, s8, 16 5855; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 5856; GCN-HSA-NEXT: s_bfe_i64 s[34:35], s[10:11], 0x100000 5857; GCN-HSA-NEXT: s_ashr_i64 s[10:11], s[10:11], 48 5858; GCN-HSA-NEXT: s_lshr_b32 s22, s6, 16 5859; GCN-HSA-NEXT: s_lshr_b32 s24, s4, 16 5860; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5861; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[4:5], 0x100000 5862; GCN-HSA-NEXT: s_bfe_i64 s[28:29], s[6:7], 0x100000 5863; GCN-HSA-NEXT: s_bfe_i64 s[30:31], s[8:9], 0x100000 5864; GCN-HSA-NEXT: s_ashr_i64 s[8:9], s[8:9], 48 5865; GCN-HSA-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 5866; GCN-HSA-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 5867; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5868; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5869; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 5870; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 5871; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[24:25], 0x100000 5872; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[22:23], 0x100000 5873; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 5874; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000 5875; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 5876; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 5877; GCN-HSA-NEXT: s_add_u32 s22, s0, 0x70 5878; GCN-HSA-NEXT: s_addc_u32 s23, s1, 0 5879; GCN-HSA-NEXT: v_mov_b32_e32 v6, s8 5880; GCN-HSA-NEXT: s_add_u32 s8, s0, 0x50 5881; GCN-HSA-NEXT: v_mov_b32_e32 v8, s22 5882; GCN-HSA-NEXT: v_mov_b32_e32 v7, s9 5883; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 5884; GCN-HSA-NEXT: v_mov_b32_e32 v11, s9 5885; GCN-HSA-NEXT: v_mov_b32_e32 v9, s23 5886; GCN-HSA-NEXT: v_mov_b32_e32 v4, s12 5887; GCN-HSA-NEXT: v_mov_b32_e32 v5, s13 5888; GCN-HSA-NEXT: v_mov_b32_e32 v10, s8 5889; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 5890; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 5891; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 5892; GCN-HSA-NEXT: s_add_u32 s6, s0, 48 5893; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 5894; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 5895; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 5896; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 5897; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 5898; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 5899; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5900; GCN-HSA-NEXT: s_nop 0 5901; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 5902; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 5903; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 5904; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 5905; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 5906; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 5907; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x60 5908; GCN-HSA-NEXT: v_mov_b32_e32 v0, s16 5909; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 5910; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5911; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 5912; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 5913; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 5914; GCN-HSA-NEXT: s_add_u32 s4, s0, 64 5915; GCN-HSA-NEXT: v_mov_b32_e32 v0, s34 5916; GCN-HSA-NEXT: v_mov_b32_e32 v1, s35 5917; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18 5918; GCN-HSA-NEXT: v_mov_b32_e32 v3, s19 5919; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5920; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 5921; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 5922; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 5923; GCN-HSA-NEXT: s_add_u32 s4, s0, 32 5924; GCN-HSA-NEXT: v_mov_b32_e32 v0, s30 5925; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31 5926; GCN-HSA-NEXT: v_mov_b32_e32 v2, s20 5927; GCN-HSA-NEXT: v_mov_b32_e32 v3, s21 5928; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5929; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 5930; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 5931; GCN-HSA-NEXT: v_mov_b32_e32 v0, s28 5932; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 5933; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 5934; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 5935; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 5936; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5937; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5938; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26 5939; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 5940; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 5941; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 5942; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5943; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5944; GCN-HSA-NEXT: s_endpgm 5945; 5946; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i64: 5947; GCN-NOHSA-VI: ; %bb.0: 5948; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5949; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5950; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5951; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5952; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5953; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5954; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[6:7], 0x0 5955; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5956; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[10:11], 0x100000 5957; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s10, 16 5958; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[10:11], 0x100000 5959; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s11 5960; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[10:11], 0x100000 5961; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s11, 16 5962; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[8:9], 0x100000 5963; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s8, 16 5964; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 5965; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[8:9], 0x100000 5966; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s9 5967; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[6:7], 0x100000 5968; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 5969; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[8:9], 0x100000 5970; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s9, 16 5971; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36 5972; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37 5973; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 5974; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11 5975; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 5976; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[6:7], 0x100000 5977; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s7 5978; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 5979; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 5980; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 5981; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s34 5982; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 5983; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 5984; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[6:7], 0x100000 5985; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s7, 16 5986; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 5987; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 5988; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8 5989; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s9 5990; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 5991; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 5992; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 5993; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 5994; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26 5995; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s27 5996; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 5997; GCN-NOHSA-VI-NEXT: s_mov_b32 s14, s5 5998; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s5, 16 5999; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 6000; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 6001; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 6002; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 6003; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6004; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[4:5], 0x100000 6005; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 6006; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 6007; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 6008; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 6009; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 6010; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s20 6011; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s21 6012; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6013; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 6014; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 6015; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 6016; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16 6017; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17 6018; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6019; GCN-NOHSA-VI-NEXT: s_nop 0 6020; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 6021; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13 6022; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 6023; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 6024; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6025; GCN-NOHSA-VI-NEXT: s_endpgm 6026; 6027; EG-LABEL: constant_sextload_v16i16_to_v16i64: 6028; EG: ; %bb.0: 6029; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 6030; EG-NEXT: TEX 1 @12 6031; EG-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[] 6032; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0 6033; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0 6034; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0 6035; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0 6036; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0 6037; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0 6038; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0 6039; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1 6040; EG-NEXT: CF_END 6041; EG-NEXT: Fetch clause starting at 12: 6042; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 6043; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 6044; EG-NEXT: ALU clause starting at 16: 6045; EG-NEXT: MOV * T11.X, KC0[2].Z, 6046; EG-NEXT: ALU clause starting at 17: 6047; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 6048; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6049; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6050; EG-NEXT: LSHR T14.X, PV.W, literal.x, 6051; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6052; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6053; EG-NEXT: LSHR T15.X, PV.W, literal.x, 6054; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6055; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6056; EG-NEXT: LSHR T16.X, PV.W, literal.x, 6057; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6058; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6059; EG-NEXT: LSHR T17.X, PV.W, literal.x, 6060; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6061; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6062; EG-NEXT: LSHR T18.X, PV.W, literal.x, 6063; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 6064; EG-NEXT: ASHR * T19.W, T11.X, literal.z, 6065; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6066; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6067; EG-NEXT: LSHR T20.X, PV.W, literal.x, 6068; EG-NEXT: ASHR T19.Z, T11.X, literal.y, 6069; EG-NEXT: ASHR * T21.W, T11.Y, literal.z, 6070; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6071; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6072; EG-NEXT: BFE_INT T19.X, T11.X, 0.0, literal.x, 6073; EG-NEXT: ASHR T21.Z, T11.Y, literal.x, 6074; EG-NEXT: ASHR * T22.W, T11.Z, literal.y, 6075; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6076; EG-NEXT: BFE_INT T21.X, T11.Y, 0.0, literal.x, 6077; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 6078; EG-NEXT: ASHR T22.Z, T11.Z, literal.x, 6079; EG-NEXT: ASHR * T23.W, T11.W, literal.y, 6080; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6081; EG-NEXT: BFE_INT T22.X, T11.Z, 0.0, literal.x, 6082; EG-NEXT: ASHR T21.Y, PV.X, literal.y, 6083; EG-NEXT: ASHR T23.Z, T11.W, literal.x, 6084; EG-NEXT: ASHR * T24.W, T12.X, literal.y, 6085; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6086; EG-NEXT: BFE_INT T23.X, T11.W, 0.0, literal.x, 6087; EG-NEXT: ASHR T22.Y, PV.X, literal.y, 6088; EG-NEXT: ASHR T24.Z, T12.X, literal.x, 6089; EG-NEXT: ASHR * T11.W, T12.Y, literal.y, 6090; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6091; EG-NEXT: BFE_INT T24.X, T12.X, 0.0, literal.x, 6092; EG-NEXT: ASHR T23.Y, PV.X, literal.y, 6093; EG-NEXT: ASHR T11.Z, T12.Y, literal.x, 6094; EG-NEXT: ASHR * T25.W, T12.Z, literal.y, 6095; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6096; EG-NEXT: BFE_INT T11.X, T12.Y, 0.0, literal.x, 6097; EG-NEXT: ASHR T24.Y, PV.X, literal.y, 6098; EG-NEXT: ASHR T25.Z, T12.Z, literal.x, 6099; EG-NEXT: ASHR * T26.W, T12.W, literal.y, 6100; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6101; EG-NEXT: BFE_INT T25.X, T12.Z, 0.0, literal.x, 6102; EG-NEXT: ASHR T11.Y, PV.X, literal.y, 6103; EG-NEXT: ASHR * T26.Z, T12.W, literal.x, 6104; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6105; EG-NEXT: BFE_INT T26.X, T12.W, 0.0, literal.x, 6106; EG-NEXT: ASHR T25.Y, PV.X, literal.y, 6107; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 6108; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6109; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 6110; EG-NEXT: LSHR T12.X, PV.W, literal.x, 6111; EG-NEXT: ASHR * T26.Y, PV.X, literal.y, 6112; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6113 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in 6114 %ext = sext <16 x i16> %load to <16 x i64> 6115 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 6116 ret void 6117} 6118 6119define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 { 6120; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i64: 6121; GCN-NOHSA-SI: ; %bb.0: 6122; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 6123; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6124; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0 6125; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, 0xffff 6126; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6127; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s5, 16 6128; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s7, 16 6129; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s9, 16 6130; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s11, 16 6131; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s13, 16 6132; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s15, 16 6133; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s17, 16 6134; GCN-NOHSA-SI-NEXT: s_lshr_b32 s27, s19, 16 6135; GCN-NOHSA-SI-NEXT: s_and_b32 s28, s4, s2 6136; GCN-NOHSA-SI-NEXT: s_and_b32 s29, s6, s2 6137; GCN-NOHSA-SI-NEXT: s_and_b32 s30, s8, s2 6138; GCN-NOHSA-SI-NEXT: s_and_b32 s31, s10, s2 6139; GCN-NOHSA-SI-NEXT: s_and_b32 s33, s12, s2 6140; GCN-NOHSA-SI-NEXT: s_and_b32 s34, s14, s2 6141; GCN-NOHSA-SI-NEXT: s_and_b32 s35, s16, s2 6142; GCN-NOHSA-SI-NEXT: s_and_b32 s36, s18, s2 6143; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s2 6144; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s2 6145; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, s2 6146; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s2 6147; GCN-NOHSA-SI-NEXT: s_and_b32 s13, s13, s2 6148; GCN-NOHSA-SI-NEXT: s_and_b32 s15, s15, s2 6149; GCN-NOHSA-SI-NEXT: s_and_b32 s17, s17, s2 6150; GCN-NOHSA-SI-NEXT: s_and_b32 s19, s19, s2 6151; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s18, 16 6152; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s16, 16 6153; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s14, 16 6154; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s12, 16 6155; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s10, 16 6156; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s8, 16 6157; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s6, 16 6158; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s4, 16 6159; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6160; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 6161; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6162; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 6163; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s19 6164; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s27 6165; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 6166; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6167; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s17 6168; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s26 6169; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 6170; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6171; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s15 6172; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s25 6173; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 6174; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6175; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s13 6176; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24 6177; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 6178; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6179; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s11 6180; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s23 6181; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 6182; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6183; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s9 6184; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s22 6185; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 6186; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6187; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s7 6188; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s21 6189; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6190; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6191; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 6192; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 6193; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6194; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6195; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36 6196; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s18 6197; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 6198; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6199; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s35 6200; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s16 6201; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 6202; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6203; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s34 6204; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s14 6205; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 6206; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6207; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s33 6208; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 6209; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 6210; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6211; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s31 6212; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10 6213; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 6214; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6215; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s30 6216; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 6217; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 6218; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6219; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s29 6220; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 6221; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6222; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6223; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s28 6224; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 6225; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6226; GCN-NOHSA-SI-NEXT: s_endpgm 6227; 6228; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i64: 6229; GCN-HSA: ; %bb.0: 6230; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 6231; GCN-HSA-NEXT: s_mov_b32 s20, 0xffff 6232; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 6233; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 6234; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6235; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0 6236; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6237; GCN-HSA-NEXT: s_and_b32 s21, s4, s20 6238; GCN-HSA-NEXT: s_and_b32 s22, s6, s20 6239; GCN-HSA-NEXT: s_and_b32 s23, s8, s20 6240; GCN-HSA-NEXT: s_and_b32 s24, s10, s20 6241; GCN-HSA-NEXT: s_and_b32 s25, s12, s20 6242; GCN-HSA-NEXT: s_and_b32 s26, s14, s20 6243; GCN-HSA-NEXT: s_and_b32 s27, s16, s20 6244; GCN-HSA-NEXT: s_and_b32 s28, s18, s20 6245; GCN-HSA-NEXT: s_and_b32 s29, s5, s20 6246; GCN-HSA-NEXT: s_and_b32 s30, s7, s20 6247; GCN-HSA-NEXT: s_and_b32 s31, s9, s20 6248; GCN-HSA-NEXT: s_and_b32 s33, s11, s20 6249; GCN-HSA-NEXT: s_and_b32 s34, s13, s20 6250; GCN-HSA-NEXT: s_and_b32 s35, s15, s20 6251; GCN-HSA-NEXT: s_and_b32 s36, s17, s20 6252; GCN-HSA-NEXT: s_and_b32 s20, s19, s20 6253; GCN-HSA-NEXT: s_lshr_b32 s19, s19, 16 6254; GCN-HSA-NEXT: s_lshr_b32 s5, s5, 16 6255; GCN-HSA-NEXT: s_lshr_b32 s7, s7, 16 6256; GCN-HSA-NEXT: s_lshr_b32 s9, s9, 16 6257; GCN-HSA-NEXT: s_lshr_b32 s11, s11, 16 6258; GCN-HSA-NEXT: s_lshr_b32 s13, s13, 16 6259; GCN-HSA-NEXT: s_lshr_b32 s15, s15, 16 6260; GCN-HSA-NEXT: s_lshr_b32 s17, s17, 16 6261; GCN-HSA-NEXT: s_lshr_b32 s18, s18, 16 6262; GCN-HSA-NEXT: s_lshr_b32 s16, s16, 16 6263; GCN-HSA-NEXT: s_lshr_b32 s14, s14, 16 6264; GCN-HSA-NEXT: s_lshr_b32 s12, s12, 16 6265; GCN-HSA-NEXT: s_lshr_b32 s10, s10, 16 6266; GCN-HSA-NEXT: s_lshr_b32 s8, s8, 16 6267; GCN-HSA-NEXT: s_lshr_b32 s6, s6, 16 6268; GCN-HSA-NEXT: s_lshr_b32 s4, s4, 16 6269; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 6270; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6271; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6272; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6273; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0 6274; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6275; GCN-HSA-NEXT: v_mov_b32_e32 v7, s3 6276; GCN-HSA-NEXT: v_mov_b32_e32 v6, s2 6277; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0 6278; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6279; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 6280; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 6281; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 6282; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6283; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3 6284; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2 6285; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 6286; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20 6287; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19 6288; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6289; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6290; GCN-HSA-NEXT: v_mov_b32_e32 v0, s36 6291; GCN-HSA-NEXT: v_mov_b32_e32 v2, s17 6292; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 6293; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6294; GCN-HSA-NEXT: v_mov_b32_e32 v0, s35 6295; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 6296; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 6297; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6298; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 6299; GCN-HSA-NEXT: v_mov_b32_e32 v0, s34 6300; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13 6301; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 6302; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6303; GCN-HSA-NEXT: v_mov_b32_e32 v0, s33 6304; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 6305; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6306; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6307; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6308; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6309; GCN-HSA-NEXT: v_mov_b32_e32 v0, s31 6310; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 6311; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6312; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6313; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6314; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6315; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6316; GCN-HSA-NEXT: v_mov_b32_e32 v0, s30 6317; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 6318; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6319; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6320; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6321; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6322; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 6323; GCN-HSA-NEXT: v_mov_b32_e32 v0, s29 6324; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 6325; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6326; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6327; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6328; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6329; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 6330; GCN-HSA-NEXT: v_mov_b32_e32 v0, s28 6331; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18 6332; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6333; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6334; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6335; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6336; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 6337; GCN-HSA-NEXT: v_mov_b32_e32 v0, s27 6338; GCN-HSA-NEXT: v_mov_b32_e32 v2, s16 6339; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6340; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6341; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6342; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6343; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 6344; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26 6345; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 6346; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6347; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6348; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6349; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6350; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 6351; GCN-HSA-NEXT: v_mov_b32_e32 v0, s25 6352; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12 6353; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6354; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6355; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6356; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6357; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 6358; GCN-HSA-NEXT: v_mov_b32_e32 v0, s24 6359; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 6360; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6361; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6362; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6363; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6364; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 6365; GCN-HSA-NEXT: v_mov_b32_e32 v0, s23 6366; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8 6367; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6368; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6369; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6370; GCN-HSA-NEXT: v_mov_b32_e32 v0, s22 6371; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 6372; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6373; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6374; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6375; GCN-HSA-NEXT: v_mov_b32_e32 v0, s21 6376; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 6377; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6378; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6379; GCN-HSA-NEXT: s_endpgm 6380; 6381; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i64: 6382; GCN-NOHSA-VI: ; %bb.0: 6383; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 6384; GCN-NOHSA-VI-NEXT: s_mov_b32 s20, 0xffff 6385; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 6386; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 6387; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 6388; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6389; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 6390; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 6391; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[4:19], s[6:7], 0x0 6392; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 6393; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6394; GCN-NOHSA-VI-NEXT: s_and_b32 s21, s4, s20 6395; GCN-NOHSA-VI-NEXT: s_and_b32 s22, s5, s20 6396; GCN-NOHSA-VI-NEXT: s_and_b32 s23, s6, s20 6397; GCN-NOHSA-VI-NEXT: s_and_b32 s24, s7, s20 6398; GCN-NOHSA-VI-NEXT: s_and_b32 s25, s8, s20 6399; GCN-NOHSA-VI-NEXT: s_and_b32 s26, s9, s20 6400; GCN-NOHSA-VI-NEXT: s_and_b32 s27, s10, s20 6401; GCN-NOHSA-VI-NEXT: s_and_b32 s28, s11, s20 6402; GCN-NOHSA-VI-NEXT: s_and_b32 s29, s12, s20 6403; GCN-NOHSA-VI-NEXT: s_and_b32 s30, s13, s20 6404; GCN-NOHSA-VI-NEXT: s_and_b32 s31, s14, s20 6405; GCN-NOHSA-VI-NEXT: s_and_b32 s33, s15, s20 6406; GCN-NOHSA-VI-NEXT: s_and_b32 s34, s16, s20 6407; GCN-NOHSA-VI-NEXT: s_and_b32 s35, s17, s20 6408; GCN-NOHSA-VI-NEXT: s_and_b32 s36, s18, s20 6409; GCN-NOHSA-VI-NEXT: s_and_b32 s20, s19, s20 6410; GCN-NOHSA-VI-NEXT: s_lshr_b32 s19, s19, 16 6411; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s18, 16 6412; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 6413; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19 6414; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 6415; GCN-NOHSA-VI-NEXT: s_lshr_b32 s17, s17, 16 6416; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36 6417; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s18 6418; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 6419; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s16, 16 6420; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s35 6421; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17 6422; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 6423; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s15, 16 6424; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s34 6425; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16 6426; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 6427; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s14, 16 6428; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s33 6429; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 6430; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 6431; GCN-NOHSA-VI-NEXT: s_lshr_b32 s13, s13, 16 6432; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s31 6433; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s14 6434; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 6435; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s12, 16 6436; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 6437; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 6438; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 6439; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s11, 16 6440; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s29 6441; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12 6442; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 6443; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s10, 16 6444; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 6445; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 6446; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 6447; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s9, 16 6448; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s27 6449; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 6450; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 6451; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s8, 16 6452; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26 6453; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 6454; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 6455; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s7, 16 6456; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s25 6457; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8 6458; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 6459; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 6460; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 6461; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 6462; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6463; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s5, 16 6464; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s23 6465; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 6466; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6467; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 6468; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 6469; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 6470; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6471; GCN-NOHSA-VI-NEXT: s_nop 0 6472; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s21 6473; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 6474; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6475; GCN-NOHSA-VI-NEXT: s_endpgm 6476; 6477; EG-LABEL: constant_zextload_v32i16_to_v32i64: 6478; EG: ; %bb.0: 6479; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 6480; EG-NEXT: TEX 2 @22 6481; EG-NEXT: ALU 33, @31, KC0[], KC1[] 6482; EG-NEXT: TEX 0 @28 6483; EG-NEXT: ALU 92, @65, KC0[CB0:0-32], KC1[] 6484; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0 6485; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0 6486; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0 6487; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0 6488; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0 6489; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0 6490; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0 6491; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0 6492; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0 6493; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0 6494; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0 6495; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0 6496; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0 6497; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0 6498; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0 6499; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1 6500; EG-NEXT: CF_END 6501; EG-NEXT: Fetch clause starting at 22: 6502; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 6503; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 16, #1 6504; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 6505; EG-NEXT: Fetch clause starting at 28: 6506; EG-NEXT: VTX_READ_128 T29.XYZW, T19.X, 0, #1 6507; EG-NEXT: ALU clause starting at 30: 6508; EG-NEXT: MOV * T19.X, KC0[2].Z, 6509; EG-NEXT: ALU clause starting at 31: 6510; EG-NEXT: LSHR * T23.Z, T20.W, literal.x, 6511; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6512; EG-NEXT: AND_INT T23.X, T20.W, literal.x, 6513; EG-NEXT: MOV T23.Y, 0.0, 6514; EG-NEXT: LSHR T24.Z, T20.Z, literal.y, 6515; EG-NEXT: AND_INT * T24.X, T20.Z, literal.x, 6516; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6517; EG-NEXT: MOV T24.Y, 0.0, 6518; EG-NEXT: LSHR * T25.Z, T20.Y, literal.x, 6519; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6520; EG-NEXT: AND_INT T25.X, T20.Y, literal.x, 6521; EG-NEXT: MOV T25.Y, 0.0, 6522; EG-NEXT: LSHR T20.Z, T20.X, literal.y, 6523; EG-NEXT: AND_INT * T20.X, T20.X, literal.x, 6524; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6525; EG-NEXT: MOV T20.Y, 0.0, 6526; EG-NEXT: LSHR * T26.Z, T22.W, literal.x, 6527; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6528; EG-NEXT: AND_INT T26.X, T22.W, literal.x, 6529; EG-NEXT: MOV T26.Y, 0.0, 6530; EG-NEXT: LSHR T27.Z, T22.Z, literal.y, 6531; EG-NEXT: AND_INT * T27.X, T22.Z, literal.x, 6532; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6533; EG-NEXT: MOV T27.Y, 0.0, 6534; EG-NEXT: LSHR * T28.Z, T22.Y, literal.x, 6535; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6536; EG-NEXT: AND_INT T28.X, T22.Y, literal.x, 6537; EG-NEXT: MOV T28.Y, 0.0, 6538; EG-NEXT: LSHR T22.Z, T22.X, literal.y, 6539; EG-NEXT: AND_INT * T22.X, T22.X, literal.x, 6540; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6541; EG-NEXT: MOV T22.Y, 0.0, 6542; EG-NEXT: LSHR * T19.Z, T21.W, literal.x, 6543; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6544; EG-NEXT: ALU clause starting at 65: 6545; EG-NEXT: AND_INT T19.X, T21.W, literal.x, 6546; EG-NEXT: MOV T19.Y, 0.0, 6547; EG-NEXT: LSHR T30.Z, T21.Z, literal.y, 6548; EG-NEXT: AND_INT * T30.X, T21.Z, literal.x, 6549; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6550; EG-NEXT: MOV T30.Y, 0.0, 6551; EG-NEXT: LSHR * T31.Z, T21.Y, literal.x, 6552; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6553; EG-NEXT: AND_INT T31.X, T21.Y, literal.x, 6554; EG-NEXT: MOV T31.Y, 0.0, 6555; EG-NEXT: LSHR T21.Z, T21.X, literal.y, 6556; EG-NEXT: AND_INT * T21.X, T21.X, literal.x, 6557; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6558; EG-NEXT: MOV T21.Y, 0.0, 6559; EG-NEXT: LSHR * T32.Z, T29.W, literal.x, 6560; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6561; EG-NEXT: AND_INT T32.X, T29.W, literal.x, 6562; EG-NEXT: MOV T32.Y, 0.0, 6563; EG-NEXT: LSHR T33.Z, T29.Z, literal.y, 6564; EG-NEXT: AND_INT * T33.X, T29.Z, literal.x, 6565; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6566; EG-NEXT: MOV T33.Y, 0.0, 6567; EG-NEXT: LSHR * T34.Z, T29.Y, literal.x, 6568; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6569; EG-NEXT: AND_INT T34.X, T29.Y, literal.x, 6570; EG-NEXT: MOV T34.Y, 0.0, 6571; EG-NEXT: LSHR T29.Z, T29.X, literal.y, 6572; EG-NEXT: AND_INT * T29.X, T29.X, literal.x, 6573; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6574; EG-NEXT: MOV T29.Y, 0.0, 6575; EG-NEXT: MOV T23.W, 0.0, 6576; EG-NEXT: MOV * T24.W, 0.0, 6577; EG-NEXT: MOV T25.W, 0.0, 6578; EG-NEXT: MOV * T20.W, 0.0, 6579; EG-NEXT: MOV T26.W, 0.0, 6580; EG-NEXT: MOV * T27.W, 0.0, 6581; EG-NEXT: MOV T28.W, 0.0, 6582; EG-NEXT: MOV * T22.W, 0.0, 6583; EG-NEXT: MOV T19.W, 0.0, 6584; EG-NEXT: MOV * T30.W, 0.0, 6585; EG-NEXT: MOV T31.W, 0.0, 6586; EG-NEXT: MOV * T21.W, 0.0, 6587; EG-NEXT: MOV T32.W, 0.0, 6588; EG-NEXT: MOV * T33.W, 0.0, 6589; EG-NEXT: MOV T34.W, 0.0, 6590; EG-NEXT: MOV * T29.W, 0.0, 6591; EG-NEXT: LSHR T35.X, KC0[2].Y, literal.x, 6592; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6593; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6594; EG-NEXT: LSHR T36.X, PV.W, literal.x, 6595; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6596; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6597; EG-NEXT: LSHR T37.X, PV.W, literal.x, 6598; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6599; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6600; EG-NEXT: LSHR T38.X, PV.W, literal.x, 6601; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6602; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6603; EG-NEXT: LSHR T39.X, PV.W, literal.x, 6604; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6605; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6606; EG-NEXT: LSHR T40.X, PV.W, literal.x, 6607; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6608; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6609; EG-NEXT: LSHR T41.X, PV.W, literal.x, 6610; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6611; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 6612; EG-NEXT: LSHR T42.X, PV.W, literal.x, 6613; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6614; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 6615; EG-NEXT: LSHR T43.X, PV.W, literal.x, 6616; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6617; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 6618; EG-NEXT: LSHR T44.X, PV.W, literal.x, 6619; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6620; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 6621; EG-NEXT: LSHR T45.X, PV.W, literal.x, 6622; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6623; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 6624; EG-NEXT: LSHR T46.X, PV.W, literal.x, 6625; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6626; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 6627; EG-NEXT: LSHR T47.X, PV.W, literal.x, 6628; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6629; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 6630; EG-NEXT: LSHR T48.X, PV.W, literal.x, 6631; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6632; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 6633; EG-NEXT: LSHR T49.X, PV.W, literal.x, 6634; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6635; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 6636; EG-NEXT: LSHR * T50.X, PV.W, literal.x, 6637; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6638 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in 6639 %ext = zext <32 x i16> %load to <32 x i64> 6640 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 6641 ret void 6642} 6643 6644define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 { 6645; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i64: 6646; GCN-NOHSA-SI: ; %bb.0: 6647; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 6648; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6649; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x0 6650; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6651; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, s23 6652; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s21 6653; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s19 6654; GCN-NOHSA-SI-NEXT: s_mov_b32 s24, s17 6655; GCN-NOHSA-SI-NEXT: s_mov_b32 s26, s15 6656; GCN-NOHSA-SI-NEXT: s_mov_b32 s28, s13 6657; GCN-NOHSA-SI-NEXT: s_mov_b32 s30, s11 6658; GCN-NOHSA-SI-NEXT: s_mov_b32 s34, s9 6659; GCN-NOHSA-SI-NEXT: s_lshr_b32 s36, s22, 16 6660; GCN-NOHSA-SI-NEXT: s_lshr_b32 s38, s20, 16 6661; GCN-NOHSA-SI-NEXT: s_lshr_b32 s40, s18, 16 6662; GCN-NOHSA-SI-NEXT: s_lshr_b32 s42, s16, 16 6663; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[44:45], s[4:5], 0x100000 6664; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 6665; GCN-NOHSA-SI-NEXT: s_lshr_b32 s46, s14, 16 6666; GCN-NOHSA-SI-NEXT: s_lshr_b32 s48, s12, 16 6667; GCN-NOHSA-SI-NEXT: s_lshr_b32 s50, s10, 16 6668; GCN-NOHSA-SI-NEXT: s_lshr_b32 s52, s8, 16 6669; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[8:9], 0x100000 6670; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[54:55], s[10:11], 0x100000 6671; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[56:57], s[12:13], 0x100000 6672; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[58:59], s[14:15], 0x100000 6673; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[60:61], s[16:17], 0x100000 6674; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[62:63], s[18:19], 0x100000 6675; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[64:65], s[20:21], 0x100000 6676; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[66:67], s[22:23], 0x100000 6677; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[8:9], s[8:9], 48 6678; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[10:11], s[10:11], 48 6679; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[14:15], s[14:15], 48 6680; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[16:17], s[16:17], 48 6681; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[18:19], s[18:19], 48 6682; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[20:21], s[20:21], 48 6683; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[22:23], s[22:23], 48 6684; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[12:13], s[12:13], 48 6685; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s2 6686; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s3 6687; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s22 6688; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s23 6689; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s44 6690; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s45 6691; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s20 6692; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s21 6693; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6694; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6695; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[28:29], 0x100000 6696; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[26:27], 0x100000 6697; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 6698; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 6699; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[34:35], 0x100000 6700; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[30:31], 0x100000 6701; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s6 6702; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s7 6703; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s18 6704; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s19 6705; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s24 6706; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s25 6707; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s16 6708; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s17 6709; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s22 6710; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s23 6711; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s14 6712; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s15 6713; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s20 6714; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s21 6715; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s12 6716; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s13 6717; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 6718; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[52:53], 0x100000 6719; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[50:51], 0x100000 6720; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[48:49], 0x100000 6721; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[46:47], 0x100000 6722; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[42:43], 0x100000 6723; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[40:41], 0x100000 6724; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[38:39], 0x100000 6725; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[36:37], 0x100000 6726; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208 6727; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:176 6728; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144 6729; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112 6730; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80 6731; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5) 6732; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s28 6733; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s29 6734; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10 6735; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11 6736; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6737; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6738; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s26 6739; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s27 6740; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 6741; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 6742; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6743; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6744; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s66 6745; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s67 6746; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s64 6747; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s65 6748; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s62 6749; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s63 6750; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s60 6751; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s61 6752; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s58 6753; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s59 6754; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s56 6755; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s57 6756; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v24, s54 6757; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v25, s55 6758; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24 6759; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s25 6760; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 6761; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6762; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 6763; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 6764; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s22 6765; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s23 6766; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192 6767; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s20 6768; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s21 6769; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160 6770; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s18 6771; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s19 6772; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128 6773; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s16 6774; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s17 6775; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96 6776; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s14 6777; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s15 6778; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64 6779; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v26, s12 6780; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, s13 6781; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32 6782; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 6783; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 6784; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6785; GCN-NOHSA-SI-NEXT: s_endpgm 6786; 6787; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i64: 6788; GCN-HSA: ; %bb.0: 6789; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 6790; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6791; GCN-HSA-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x0 6792; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6793; GCN-HSA-NEXT: s_mov_b32 s8, s51 6794; GCN-HSA-NEXT: s_mov_b32 s34, s49 6795; GCN-HSA-NEXT: s_mov_b32 s52, s47 6796; GCN-HSA-NEXT: s_mov_b32 s54, s45 6797; GCN-HSA-NEXT: s_mov_b32 s56, s43 6798; GCN-HSA-NEXT: s_mov_b32 s58, s41 6799; GCN-HSA-NEXT: s_mov_b32 s60, s39 6800; GCN-HSA-NEXT: s_mov_b32 s62, s37 6801; GCN-HSA-NEXT: s_lshr_b32 s30, s46, 16 6802; GCN-HSA-NEXT: s_lshr_b32 s24, s44, 16 6803; GCN-HSA-NEXT: s_lshr_b32 s20, s42, 16 6804; GCN-HSA-NEXT: s_lshr_b32 s16, s40, 16 6805; GCN-HSA-NEXT: s_lshr_b32 s12, s38, 16 6806; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 6807; GCN-HSA-NEXT: s_lshr_b32 s64, s50, 16 6808; GCN-HSA-NEXT: s_lshr_b32 s66, s48, 16 6809; GCN-HSA-NEXT: s_lshr_b32 s68, s36, 16 6810; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[36:37], 0x100000 6811; GCN-HSA-NEXT: s_ashr_i64 s[28:29], s[36:37], 48 6812; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[38:39], 0x100000 6813; GCN-HSA-NEXT: s_ashr_i64 s[36:37], s[38:39], 48 6814; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[40:41], 0x100000 6815; GCN-HSA-NEXT: s_ashr_i64 s[38:39], s[40:41], 48 6816; GCN-HSA-NEXT: s_ashr_i64 s[40:41], s[42:43], 48 6817; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[42:43], 0x100000 6818; GCN-HSA-NEXT: s_ashr_i64 s[42:43], s[44:45], 48 6819; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[44:45], 0x100000 6820; GCN-HSA-NEXT: s_ashr_i64 s[44:45], s[46:47], 48 6821; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[46:47], 0x100000 6822; GCN-HSA-NEXT: s_ashr_i64 s[46:47], s[48:49], 48 6823; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[48:49], 0x100000 6824; GCN-HSA-NEXT: s_ashr_i64 s[48:49], s[50:51], 48 6825; GCN-HSA-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x100000 6826; GCN-HSA-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x100000 6827; GCN-HSA-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x100000 6828; GCN-HSA-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x100000 6829; GCN-HSA-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x100000 6830; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[50:51], 0x100000 6831; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 6832; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 6833; GCN-HSA-NEXT: v_mov_b32_e32 v2, s48 6834; GCN-HSA-NEXT: v_mov_b32_e32 v3, s49 6835; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[68:69], 0x100000 6836; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 6837; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 6838; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 6839; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 6840; GCN-HSA-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x100000 6841; GCN-HSA-NEXT: s_bfe_i64 s[48:49], s[66:67], 0x100000 6842; GCN-HSA-NEXT: s_bfe_i64 s[50:51], s[64:65], 0x100000 6843; GCN-HSA-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x100000 6844; GCN-HSA-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x100000 6845; GCN-HSA-NEXT: s_add_u32 s64, s0, 0xf0 6846; GCN-HSA-NEXT: s_addc_u32 s65, s1, 0 6847; GCN-HSA-NEXT: v_mov_b32_e32 v4, s34 6848; GCN-HSA-NEXT: s_add_u32 s34, s0, 0xd0 6849; GCN-HSA-NEXT: v_mov_b32_e32 v5, s35 6850; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0 6851; GCN-HSA-NEXT: v_mov_b32_e32 v24, s34 6852; GCN-HSA-NEXT: v_mov_b32_e32 v25, s35 6853; GCN-HSA-NEXT: s_add_u32 s34, s0, 0xb0 6854; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0 6855; GCN-HSA-NEXT: v_mov_b32_e32 v26, s34 6856; GCN-HSA-NEXT: v_mov_b32_e32 v27, s35 6857; GCN-HSA-NEXT: s_add_u32 s34, s0, 0x90 6858; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0 6859; GCN-HSA-NEXT: v_mov_b32_e32 v28, s34 6860; GCN-HSA-NEXT: v_mov_b32_e32 v29, s35 6861; GCN-HSA-NEXT: s_add_u32 s34, s0, 0x70 6862; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0 6863; GCN-HSA-NEXT: v_mov_b32_e32 v30, s34 6864; GCN-HSA-NEXT: v_mov_b32_e32 v31, s35 6865; GCN-HSA-NEXT: s_add_u32 s34, s0, 0x50 6866; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0 6867; GCN-HSA-NEXT: v_mov_b32_e32 v32, s34 6868; GCN-HSA-NEXT: v_mov_b32_e32 v33, s35 6869; GCN-HSA-NEXT: s_add_u32 s34, s0, 48 6870; GCN-HSA-NEXT: v_mov_b32_e32 v6, s46 6871; GCN-HSA-NEXT: v_mov_b32_e32 v7, s47 6872; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7] 6873; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0 6874; GCN-HSA-NEXT: v_mov_b32_e32 v24, s34 6875; GCN-HSA-NEXT: v_mov_b32_e32 v25, s35 6876; GCN-HSA-NEXT: s_add_u32 s34, s0, 16 6877; GCN-HSA-NEXT: s_addc_u32 s35, s1, 0 6878; GCN-HSA-NEXT: v_mov_b32_e32 v8, s52 6879; GCN-HSA-NEXT: v_mov_b32_e32 v9, s53 6880; GCN-HSA-NEXT: v_mov_b32_e32 v10, s44 6881; GCN-HSA-NEXT: v_mov_b32_e32 v11, s45 6882; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 6883; GCN-HSA-NEXT: v_mov_b32_e32 v22, s64 6884; GCN-HSA-NEXT: v_mov_b32_e32 v8, s26 6885; GCN-HSA-NEXT: s_add_u32 s26, s0, 0xe0 6886; GCN-HSA-NEXT: v_mov_b32_e32 v9, s27 6887; GCN-HSA-NEXT: v_mov_b32_e32 v12, s54 6888; GCN-HSA-NEXT: v_mov_b32_e32 v13, s55 6889; GCN-HSA-NEXT: v_mov_b32_e32 v14, s42 6890; GCN-HSA-NEXT: v_mov_b32_e32 v15, s43 6891; GCN-HSA-NEXT: s_addc_u32 s27, s1, 0 6892; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15] 6893; GCN-HSA-NEXT: v_mov_b32_e32 v23, s65 6894; GCN-HSA-NEXT: v_mov_b32_e32 v12, s22 6895; GCN-HSA-NEXT: v_mov_b32_e32 v16, s56 6896; GCN-HSA-NEXT: v_mov_b32_e32 v17, s57 6897; GCN-HSA-NEXT: v_mov_b32_e32 v18, s40 6898; GCN-HSA-NEXT: v_mov_b32_e32 v19, s41 6899; GCN-HSA-NEXT: s_add_u32 s22, s0, 0xc0 6900; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3] 6901; GCN-HSA-NEXT: v_mov_b32_e32 v34, s34 6902; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19] 6903; GCN-HSA-NEXT: v_mov_b32_e32 v13, s23 6904; GCN-HSA-NEXT: v_mov_b32_e32 v16, s26 6905; GCN-HSA-NEXT: s_addc_u32 s23, s1, 0 6906; GCN-HSA-NEXT: v_mov_b32_e32 v18, s22 6907; GCN-HSA-NEXT: v_mov_b32_e32 v20, s58 6908; GCN-HSA-NEXT: v_mov_b32_e32 v21, s59 6909; GCN-HSA-NEXT: v_mov_b32_e32 v0, s60 6910; GCN-HSA-NEXT: v_mov_b32_e32 v22, s38 6911; GCN-HSA-NEXT: v_mov_b32_e32 v23, s39 6912; GCN-HSA-NEXT: v_mov_b32_e32 v1, s61 6913; GCN-HSA-NEXT: v_mov_b32_e32 v2, s36 6914; GCN-HSA-NEXT: v_mov_b32_e32 v3, s37 6915; GCN-HSA-NEXT: v_mov_b32_e32 v4, s62 6916; GCN-HSA-NEXT: v_mov_b32_e32 v5, s63 6917; GCN-HSA-NEXT: v_mov_b32_e32 v35, s35 6918; GCN-HSA-NEXT: v_mov_b32_e32 v6, s28 6919; GCN-HSA-NEXT: v_mov_b32_e32 v7, s29 6920; GCN-HSA-NEXT: v_mov_b32_e32 v10, s50 6921; GCN-HSA-NEXT: v_mov_b32_e32 v11, s51 6922; GCN-HSA-NEXT: v_mov_b32_e32 v17, s27 6923; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23] 6924; GCN-HSA-NEXT: v_mov_b32_e32 v14, s48 6925; GCN-HSA-NEXT: v_mov_b32_e32 v15, s49 6926; GCN-HSA-NEXT: v_mov_b32_e32 v19, s23 6927; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 6928; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[4:7] 6929; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 6930; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 6931; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 6932; GCN-HSA-NEXT: s_add_u32 s18, s0, 0xa0 6933; GCN-HSA-NEXT: v_mov_b32_e32 v1, s19 6934; GCN-HSA-NEXT: s_addc_u32 s19, s1, 0 6935; GCN-HSA-NEXT: v_mov_b32_e32 v4, s18 6936; GCN-HSA-NEXT: v_mov_b32_e32 v2, s30 6937; GCN-HSA-NEXT: v_mov_b32_e32 v3, s31 6938; GCN-HSA-NEXT: v_mov_b32_e32 v5, s19 6939; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6940; GCN-HSA-NEXT: s_nop 0 6941; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 6942; GCN-HSA-NEXT: s_add_u32 s14, s0, 0x80 6943; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 6944; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0 6945; GCN-HSA-NEXT: v_mov_b32_e32 v4, s14 6946; GCN-HSA-NEXT: v_mov_b32_e32 v2, s24 6947; GCN-HSA-NEXT: v_mov_b32_e32 v3, s25 6948; GCN-HSA-NEXT: v_mov_b32_e32 v5, s15 6949; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6950; GCN-HSA-NEXT: s_nop 0 6951; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 6952; GCN-HSA-NEXT: s_add_u32 s10, s0, 0x60 6953; GCN-HSA-NEXT: v_mov_b32_e32 v1, s11 6954; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 6955; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 6956; GCN-HSA-NEXT: v_mov_b32_e32 v2, s20 6957; GCN-HSA-NEXT: v_mov_b32_e32 v3, s21 6958; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 6959; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6960; GCN-HSA-NEXT: s_nop 0 6961; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 6962; GCN-HSA-NEXT: s_add_u32 s6, s0, 64 6963; GCN-HSA-NEXT: v_mov_b32_e32 v1, s7 6964; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 6965; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 6966; GCN-HSA-NEXT: v_mov_b32_e32 v2, s16 6967; GCN-HSA-NEXT: v_mov_b32_e32 v3, s17 6968; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 6969; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6970; GCN-HSA-NEXT: s_nop 0 6971; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 6972; GCN-HSA-NEXT: s_add_u32 s4, s0, 32 6973; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 6974; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6975; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6976; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12 6977; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13 6978; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6979; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6980; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6981; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6982; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6983; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8 6984; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9 6985; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6986; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6987; GCN-HSA-NEXT: s_endpgm 6988; 6989; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i64: 6990; GCN-NOHSA-VI: ; %bb.0: 6991; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24 6992; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6993; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[12:27], s[10:11], 0x0 6994; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, 0xf000 6995; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, -1 6996; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6997; GCN-NOHSA-VI-NEXT: s_mov_b32 s66, s27 6998; GCN-NOHSA-VI-NEXT: s_lshr_b32 s68, s27, 16 6999; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x100000 7000; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x100000 7001; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[64:65], s[26:27], 0x100000 7002; GCN-NOHSA-VI-NEXT: s_lshr_b32 s26, s26, 16 7003; GCN-NOHSA-VI-NEXT: s_mov_b32 s60, s25 7004; GCN-NOHSA-VI-NEXT: s_lshr_b32 s62, s25, 16 7005; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000 7006; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s66 7007; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s67 7008; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s68 7009; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s69 7010; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:240 7011; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[58:59], s[24:25], 0x100000 7012; GCN-NOHSA-VI-NEXT: s_lshr_b32 s24, s24, 16 7013; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x100000 7014; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x100000 7015; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s64 7016; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s65 7017; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26 7018; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s27 7019; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:224 7020; GCN-NOHSA-VI-NEXT: s_mov_b32 s54, s23 7021; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s23, 16 7022; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 7023; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s60 7024; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s61 7025; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s62 7026; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s63 7027; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:208 7028; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[52:53], s[22:23], 0x100000 7029; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s22, 16 7030; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x100000 7031; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x100000 7032; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s58 7033; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s59 7034; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s24 7035; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s25 7036; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:192 7037; GCN-NOHSA-VI-NEXT: s_mov_b32 s48, s21 7038; GCN-NOHSA-VI-NEXT: s_lshr_b32 s50, s21, 16 7039; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x100000 7040; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s54 7041; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s55 7042; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s56 7043; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s57 7044; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:176 7045; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[46:47], s[20:21], 0x100000 7046; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s20, 16 7047; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x100000 7048; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x100000 7049; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s52 7050; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s53 7051; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s22 7052; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s23 7053; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:160 7054; GCN-NOHSA-VI-NEXT: s_mov_b32 s42, s19 7055; GCN-NOHSA-VI-NEXT: s_lshr_b32 s44, s19, 16 7056; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 7057; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48 7058; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s49 7059; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s50 7060; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s51 7061; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:144 7062; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[40:41], s[18:19], 0x100000 7063; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s18, 16 7064; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x100000 7065; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x100000 7066; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s46 7067; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s47 7068; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s20 7069; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s21 7070; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:128 7071; GCN-NOHSA-VI-NEXT: s_mov_b32 s36, s17 7072; GCN-NOHSA-VI-NEXT: s_lshr_b32 s38, s17, 16 7073; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000 7074; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s42 7075; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s43 7076; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s44 7077; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s45 7078; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112 7079; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[16:17], 0x100000 7080; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s16, 16 7081; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x100000 7082; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x100000 7083; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s40 7084; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s41 7085; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s18 7086; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s19 7087; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96 7088; GCN-NOHSA-VI-NEXT: s_mov_b32 s30, s15 7089; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s15, 16 7090; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 7091; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36 7092; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37 7093; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s38 7094; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s39 7095; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80 7096; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[14:15], 0x100000 7097; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s14, 16 7098; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x100000 7099; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x100000 7100; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 7101; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 7102; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16 7103; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17 7104; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64 7105; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s12, 16 7106; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s13, 16 7107; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[0:1], s[12:13], 0x100000 7108; GCN-NOHSA-VI-NEXT: s_mov_b32 s12, s13 7109; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 7110; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 7111; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 7112; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s34 7113; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 7114; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48 7115; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 7116; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x100000 7117; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 7118; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 7119; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s14 7120; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15 7121; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32 7122; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 7123; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 7124; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13 7125; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s28 7126; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s29 7127; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 7128; GCN-NOHSA-VI-NEXT: s_nop 0 7129; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 7130; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 7131; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 7132; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 7133; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 7134; GCN-NOHSA-VI-NEXT: s_endpgm 7135; 7136; EG-LABEL: constant_sextload_v32i16_to_v32i64: 7137; EG: ; %bb.0: 7138; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 7139; EG-NEXT: TEX 0 @22 7140; EG-NEXT: ALU 55, @31, KC0[CB0:0-32], KC1[] 7141; EG-NEXT: TEX 2 @24 7142; EG-NEXT: ALU 74, @87, KC0[CB0:0-32], KC1[] 7143; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0 7144; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0 7145; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T34.X, 0 7146; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T33.X, 0 7147; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0 7148; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0 7149; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T30.X, 0 7150; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T29.X, 0 7151; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0 7152; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0 7153; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0 7154; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T25.X, 0 7155; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0 7156; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0 7157; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0 7158; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1 7159; EG-NEXT: CF_END 7160; EG-NEXT: Fetch clause starting at 22: 7161; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 7162; EG-NEXT: Fetch clause starting at 24: 7163; EG-NEXT: VTX_READ_128 T38.XYZW, T19.X, 48, #1 7164; EG-NEXT: VTX_READ_128 T39.XYZW, T19.X, 32, #1 7165; EG-NEXT: VTX_READ_128 T40.XYZW, T19.X, 16, #1 7166; EG-NEXT: ALU clause starting at 30: 7167; EG-NEXT: MOV * T19.X, KC0[2].Z, 7168; EG-NEXT: ALU clause starting at 31: 7169; EG-NEXT: LSHR T21.X, KC0[2].Y, literal.x, 7170; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7171; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7172; EG-NEXT: LSHR T22.X, PV.W, literal.x, 7173; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7174; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7175; EG-NEXT: LSHR T23.X, PV.W, literal.x, 7176; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7177; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7178; EG-NEXT: LSHR T24.X, PV.W, literal.x, 7179; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7180; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7181; EG-NEXT: LSHR T25.X, PV.W, literal.x, 7182; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7183; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7184; EG-NEXT: LSHR T26.X, PV.W, literal.x, 7185; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7186; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7187; EG-NEXT: LSHR T27.X, PV.W, literal.x, 7188; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7189; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 7190; EG-NEXT: LSHR T28.X, PV.W, literal.x, 7191; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7192; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 7193; EG-NEXT: LSHR T29.X, PV.W, literal.x, 7194; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7195; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 7196; EG-NEXT: LSHR T30.X, PV.W, literal.x, 7197; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7198; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 7199; EG-NEXT: LSHR T31.X, PV.W, literal.x, 7200; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7201; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 7202; EG-NEXT: LSHR T32.X, PV.W, literal.x, 7203; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7204; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 7205; EG-NEXT: LSHR T33.X, PV.W, literal.x, 7206; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7207; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 7208; EG-NEXT: LSHR T34.X, PV.W, literal.x, 7209; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 7210; EG-NEXT: ASHR * T35.W, T20.X, literal.z, 7211; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 7212; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7213; EG-NEXT: LSHR T36.X, PV.W, literal.x, 7214; EG-NEXT: ASHR T35.Z, T20.X, literal.y, 7215; EG-NEXT: ASHR * T37.W, T20.Y, literal.z, 7216; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7217; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7218; EG-NEXT: BFE_INT T35.X, T20.X, 0.0, literal.x, 7219; EG-NEXT: ASHR * T37.Z, T20.Y, literal.x, 7220; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7221; EG-NEXT: BFE_INT T37.X, T20.Y, 0.0, literal.x, 7222; EG-NEXT: ASHR T35.Y, PV.X, literal.y, 7223; EG-NEXT: ASHR * T19.W, T20.Z, literal.y, 7224; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7225; EG-NEXT: ALU clause starting at 87: 7226; EG-NEXT: ASHR T19.Z, T20.Z, literal.x, 7227; EG-NEXT: ASHR * T41.W, T20.W, literal.y, 7228; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7229; EG-NEXT: BFE_INT T19.X, T20.Z, 0.0, literal.x, 7230; EG-NEXT: ASHR T37.Y, T37.X, literal.y, 7231; EG-NEXT: ASHR T41.Z, T20.W, literal.x, 7232; EG-NEXT: ASHR * T42.W, T40.X, literal.y, BS:VEC_120/SCL_212 7233; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7234; EG-NEXT: BFE_INT T41.X, T20.W, 0.0, literal.x, 7235; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 7236; EG-NEXT: ASHR T42.Z, T40.X, literal.x, 7237; EG-NEXT: ASHR * T20.W, T40.Y, literal.y, 7238; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7239; EG-NEXT: BFE_INT T42.X, T40.X, 0.0, literal.x, 7240; EG-NEXT: ASHR T41.Y, PV.X, literal.y, 7241; EG-NEXT: ASHR T20.Z, T40.Y, literal.x, 7242; EG-NEXT: ASHR * T43.W, T40.Z, literal.y, 7243; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7244; EG-NEXT: BFE_INT T20.X, T40.Y, 0.0, literal.x, 7245; EG-NEXT: ASHR T42.Y, PV.X, literal.y, 7246; EG-NEXT: ASHR T43.Z, T40.Z, literal.x, 7247; EG-NEXT: ASHR * T44.W, T40.W, literal.y, 7248; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7249; EG-NEXT: BFE_INT T43.X, T40.Z, 0.0, literal.x, 7250; EG-NEXT: ASHR T20.Y, PV.X, literal.y, 7251; EG-NEXT: ASHR T44.Z, T40.W, literal.x, 7252; EG-NEXT: ASHR * T45.W, T39.X, literal.y, 7253; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7254; EG-NEXT: BFE_INT T44.X, T40.W, 0.0, literal.x, 7255; EG-NEXT: ASHR T43.Y, PV.X, literal.y, 7256; EG-NEXT: ASHR T45.Z, T39.X, literal.x, 7257; EG-NEXT: ASHR * T40.W, T39.Y, literal.y, 7258; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7259; EG-NEXT: BFE_INT T45.X, T39.X, 0.0, literal.x, 7260; EG-NEXT: ASHR T44.Y, PV.X, literal.y, 7261; EG-NEXT: ASHR T40.Z, T39.Y, literal.x, 7262; EG-NEXT: ASHR * T46.W, T39.Z, literal.y, 7263; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7264; EG-NEXT: BFE_INT T40.X, T39.Y, 0.0, literal.x, 7265; EG-NEXT: ASHR T45.Y, PV.X, literal.y, 7266; EG-NEXT: ASHR T46.Z, T39.Z, literal.x, 7267; EG-NEXT: ASHR * T47.W, T39.W, literal.y, 7268; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7269; EG-NEXT: BFE_INT T46.X, T39.Z, 0.0, literal.x, 7270; EG-NEXT: ASHR T40.Y, PV.X, literal.y, 7271; EG-NEXT: ASHR T47.Z, T39.W, literal.x, 7272; EG-NEXT: ASHR * T48.W, T38.X, literal.y, 7273; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7274; EG-NEXT: BFE_INT T47.X, T39.W, 0.0, literal.x, 7275; EG-NEXT: ASHR T46.Y, PV.X, literal.y, 7276; EG-NEXT: ASHR T48.Z, T38.X, literal.x, 7277; EG-NEXT: ASHR * T39.W, T38.Y, literal.y, 7278; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7279; EG-NEXT: BFE_INT T48.X, T38.X, 0.0, literal.x, 7280; EG-NEXT: ASHR T47.Y, PV.X, literal.y, 7281; EG-NEXT: ASHR T39.Z, T38.Y, literal.x, 7282; EG-NEXT: ASHR * T49.W, T38.Z, literal.y, 7283; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7284; EG-NEXT: BFE_INT T39.X, T38.Y, 0.0, literal.x, 7285; EG-NEXT: ASHR T48.Y, PV.X, literal.y, 7286; EG-NEXT: ASHR T49.Z, T38.Z, literal.x, 7287; EG-NEXT: ASHR * T50.W, T38.W, literal.y, 7288; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7289; EG-NEXT: BFE_INT T49.X, T38.Z, 0.0, literal.x, 7290; EG-NEXT: ASHR T39.Y, PV.X, literal.y, 7291; EG-NEXT: ASHR * T50.Z, T38.W, literal.x, 7292; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7293; EG-NEXT: BFE_INT T50.X, T38.W, 0.0, literal.x, 7294; EG-NEXT: ASHR T49.Y, PV.X, literal.y, 7295; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 7296; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7297; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 7298; EG-NEXT: LSHR T38.X, PV.W, literal.x, 7299; EG-NEXT: ASHR * T50.Y, PV.X, literal.y, 7300; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 7301 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in 7302 %ext = sext <32 x i16> %load to <32 x i64> 7303 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 7304 ret void 7305} 7306 7307; These trigger undefined register machine verifier errors 7308 7309; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 { 7310; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in 7311; %ext = zext <64 x i16> %load to <64 x i64> 7312; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 7313; ret void 7314; } 7315 7316; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 { 7317; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in 7318; %ext = sext <64 x i16> %load to <64 x i64> 7319; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 7320; ret void 7321; } 7322 7323attributes #0 = { nounwind } 7324